In [1]:
! pip install peft transformers datasets accelerate bitsandbytes



In [3]:
import os
import json
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from peft import get_peft_model, LoraConfig, TaskType
from datasets import Dataset

In [4]:
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
train_path = "datasets/pubmedqa/train_prompted.jsonl"
val_path = "datasets/pubmedqa/val_prompted.jsonl"
output_dir = "models/lora_pubmedqa_cpu"
max_length = 512

In [5]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # Set padding token

# Load model on CPU
model = AutoModelForCausalLM.from_pretrained(model_name)

# Configure LoRA
lora_config = LoraConfig(
    r=8,
    lora_alpha=8,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

The 8-bit optimizer is not available on your device, only available on CUDA for now.


In [6]:
def load_jsonl_dataset(path):
    with open(path, "r") as f:
        return [json.loads(line) for line in f]

def tokenize(sample):
    input_text = sample["prompt"]
    target_text = sample["response"]
    return tokenizer(
        input_text,
        text_target=target_text,
        truncation=True,
        padding="max_length",
        max_length=max_length
    )

In [7]:
train_raw = load_jsonl_dataset(train_path)
val_raw = load_jsonl_dataset(val_path)

# Tokenize and convert to Hugging Face datasets
train_dataset = Dataset.from_list([tokenize(sample) for sample in train_raw])
val_dataset = Dataset.from_list([tokenize(sample) for sample in val_raw])

In [8]:
training_args = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=1,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    eval_steps=100,
    save_steps=200,
    logging_steps=50,
    save_total_limit=1,
    learning_rate=2e-4,
    report_to="none",
    no_cuda=True  # ✅ This is the key fix
)



In [9]:
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
torch.device("cpu")
model = AutoModelForCausalLM.from_pretrained(model_name).cpu()

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [10]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    data_collator=DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
)

  trainer = Trainer(


In [11]:
trainer.train()

: 

In [None]:
model.save_pretrained(f"{output_dir}/adapter")
print(f"LoRA adapter saved to {output_dir}/adapter")