In [None]:
!pip install transformers datasets peft accelerate bitsandbytes -q

In [None]:
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

In [None]:
!pip install --upgrade datasets fsspec

In [None]:
from datasets import load_dataset, DownloadConfig

dataset = load_dataset(
    "medalpaca/medical_meadow_mediqa",
    split="train",
    cache_dir="./cache",

    token=True
)

MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)


tokenizer.pad_token = tokenizer.eos_token

def tokenize_function(example):
    text = f"Instruction: {example['instruction']}\nResponse: {example['output']}"
    encodings = tokenizer(text, padding="max_length", truncation=True, max_length=512, return_tensors='pt')
    encodings['attention_mask'] = encodings['attention_mask'].unsqueeze(0)
    return encodings

tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset.column_names)

tokenized_dataset.set_format("torch")

In [None]:
print(tokenizer.model_max_length)

In [None]:
import torch
from transformers import AutoModelForCausalLM
from peft import LoraConfig, get_peft_model

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    load_in_4bit=True,
    device_map="auto"
)

lora_config = LoraConfig(
    r=16, lora_alpha=32, lora_dropout=0.05, bias="none",
    target_modules=["q_proj", "v_proj"]
)

model = get_peft_model(model, lora_config)

model.print_trainable_parameters()


In [None]:
training_args = TrainingArguments(
    output_dir="./fine_tuned_model",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    num_train_epochs=1,
    save_strategy="no",
    logging_dir="./logs",
    logging_steps=10,
    report_to="none",
    fp16=True
)


In [None]:
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        """
        Custom loss computation to incorporate the training logic from `training_step`.
        """

        input_ids = inputs.get("input_ids")
        attention_mask = inputs.get("attention_mask")

        input_ids = input_ids.to(model.device)
        attention_mask = attention_mask.to(model.device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits

        shift_logits = logits[..., :-1, :].contiguous()
        shift_labels = input_ids[..., 1:].contiguous()

        loss_fn = torch.nn.CrossEntropyLoss()
        loss = loss_fn(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))

        return (loss, outputs) if return_outputs else loss

trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

In [None]:
trainer.train()

In [None]:
model.save_pretrained("./fine_tuned_lora")

tokenizer.save_pretrained("./fine_tuned_lora")

print("LoRA adapters saved successfully!")


In [None]:
from peft import PeftModel

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    load_in_4bit=True,
    device_map="cuda:0",
)

model = PeftModel.from_pretrained(model, "./fine_tuned_lora", device_map="cuda:0") # specify device_map here as well
model = model.merge_and_unload()

tokenizer = AutoTokenizer.from_pretrained("./fine_tuned_lora")

print("Fine-tuned model with LoRA adapters loaded successfully!")

In [None]:
def generate_response(prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    with torch.no_grad():  # Reduce memory usage
        output = model.generate(**inputs, max_length=300)
    return tokenizer.decode(output[0].cpu(), skip_special_tokens=True)

# Test response generation
print(generate_response("Symptoms of headache ? and solution"))
