In [None]:
!pip install transformers datasets peft accelerate bitsandbytes -q

In [None]:
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

In [None]:
# Load dataset from Hugging Face
dataset = load_dataset("medalpaca/medical_meadow_mediqa", split="train")

# Load tokenizer (change model if needed)
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"  # You can change this if needed
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# Add padding token to the tokenizer
tokenizer.pad_token = tokenizer.eos_token  # or '[PAD]' if you prefer

# Define a safe tokenization function
# Define a safe tokenization function
def tokenize_function(example):
    text = f"Instruction: {example['instruction']}\nResponse: {example['output']}"
    # Ensure attention mask has the correct shape
    encodings = tokenizer(text, padding="max_length", truncation=True, max_length=512, return_tensors='pt')
    encodings['attention_mask'] = encodings['attention_mask'].unsqueeze(0) # Adding a dimension
    return encodings

# Apply tokenization
tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset.column_names)

# Convert dataset to PyTorch format
tokenized_dataset.set_format("torch")

In [None]:
print(tokenizer.model_max_length)


In [None]:
import torch
from transformers import AutoModelForCausalLM
from peft import LoraConfig, get_peft_model

# Load base model in 4-bit for low memory usage
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    load_in_4bit=True,
    device_map="auto"
)

# Configure LoRA
lora_config = LoraConfig(
    r=16, lora_alpha=32, lora_dropout=0.05, bias="none",
    target_modules=["q_proj", "v_proj"]
)

# Apply LoRA
model = get_peft_model(model, lora_config)

# Print trainable parameters
model.print_trainable_parameters()


In [None]:

training_args = TrainingArguments(
    output_dir="./fine_tuned_model",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    num_train_epochs=1,
    save_strategy="no",  # Don't save full model to prevent crashes
    logging_dir="./logs",
    logging_steps=10,
    report_to="none",
    fp16=True
)


In [None]:
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None): # Add num_items_in_batch argument
        """
        Custom loss computation to incorporate the training logic from `training_step`.
        """
        # Get the necessary inputs
        input_ids = inputs.get("input_ids")
        attention_mask = inputs.get("attention_mask")

        # Move inputs to the appropriate device (e.g., GPU)
        input_ids = input_ids.to(model.device)
        attention_mask = attention_mask.to(model.device)

        # Forward pass
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits

        # Shift logits and labels for causal language modeling
        shift_logits = logits[..., :-1, :].contiguous()
        shift_labels = input_ids[..., 1:].contiguous()

        # Calculate loss
        loss_fn = torch.nn.CrossEntropyLoss()
        loss = loss_fn(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))

        return (loss, outputs) if return_outputs else loss

trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

In [None]:
trainer.train()

In [None]:
# Save LoRA adapters
model.save_pretrained("./fine_tuned_lora")

# Save tokenizer
tokenizer.save_pretrained("./fine_tuned_lora")

print("LoRA adapters saved successfully!")


In [None]:
from peft import PeftModel

# Load base model, adjusting device_map for memory constraints
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    load_in_4bit=True,
    device_map="cuda:0",
)

# Load LoRA adapters & merge
model = PeftModel.from_pretrained(model, "./fine_tuned_lora", device_map="cuda:0") 
model = model.merge_and_unload()

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("./fine_tuned_lora")

print("Fine-tuned model with LoRA adapters loaded successfully!")

In [None]:
def generate_response(prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda") 
    with torch.no_grad():  
        output = model.generate(**inputs, max_length=200)
    return tokenizer.decode(output[0].cpu(), skip_special_tokens=True) 

# Test response generation
print(generate_response("What are the symptoms of diabetes?"))
