# Libraries

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, DataCollatorForLanguageModeling

In [None]:
# Check if GPU is available
print("🔍 Checking GPU availability...")
print(f"CUDA Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
else:
    print("⚠️ No GPU detected. Training will be very slow!")

# Pretrained LLM (falcon-7b-instruct)

In [None]:
# First, check if GPU is available
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"Number of GPUs: {torch.cuda.device_count()}")
    print(f"Current GPU: {torch.cuda.get_device_name()}")

# Define model name
MODEL_NAME = "tiiuae/falcon-7b-instruct"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# Add padding token if it doesn't exist
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    
# Define quantization config
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

# Load model with optimized settings
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    quantization_config=quant_config,
)

print("✅ Model Loaded Successfully!")

# Resize token embeddings to account for added special tokens
model.resize_token_embeddings(len(tokenizer))

# Healthcare Dataset (MedQA - USMLE)

In [None]:
from datasets import load_dataset

# Load the MedQA dataset
dataset = load_dataset("GBaker/MedQA-USMLE-4-options", split="train")
print(f"Dataset size: {len(dataset)} examples")
print(dataset[0])

# Tokenization and Preprocessing

In [None]:
# Define max sequence length - use a consistent value
MAX_LENGTH = 512

# Process examples one at a time to avoid batching issues
def preprocess_function(example):
    # Ensure inputs are strings
    question = example["question"]
    if isinstance(question, list):
        question = " ".join(question)
    
    answer = example["answer"]
    if isinstance(answer, list):
        answer = " ".join(answer)
    
    # Create the full text
    full_text = f"Question: {question}\nAnswer: {answer}"
    
    # Tokenize with fixed length
    encoded = tokenizer(
        full_text,
        max_length=MAX_LENGTH,
        padding="max_length",
        truncation=True,
        return_tensors=None  # Return Python lists
    )
    
    # Return the encoded example
    return encoded

# Process the dataset
processed_dataset = dataset.map(
    preprocess_function,
    remove_columns=dataset.column_names,
    desc="Processing dataset"
)

# Add labels for causal language modeling (same as input_ids)
def add_labels(example):
    example["labels"] = example["input_ids"].copy()
    return example

tokenized_datasets = processed_dataset.map(add_labels, desc="Adding labels")


In [None]:
# Use data collator for language modeling
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False  # We're doing causal language modeling, not masked language modeling
)

# Split the dataset
train_test_split = tokenized_datasets.train_test_split(test_size=0.2)
train_dataset = train_test_split["train"].select(range(2000))
eval_dataset = train_test_split["test"].select(range(1000))

In [None]:
train_dataset

# Implement LoRA for Efficient Fine-Tuning

In [None]:
from peft import LoraConfig, get_peft_model

# Define LoRA config
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# Apply LoRA
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

# Training

In [None]:
import time
from transformers import TrainingArguments, Trainer, EarlyStoppingCallback

# Define Training Arguments with label_names
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=2,  # Reduced batch size for stability
    per_device_eval_batch_size=2,
    num_train_epochs=10,
    logging_dir="./logs",
    logging_steps=10,  # Log more frequently to see progress
    fp16=True,
    load_best_model_at_end=True,
    metric_for_best_model="loss",
    save_total_limit=2,
    report_to="tensorboard",  # Enable TensorBoard reporting
    label_names=["labels"],  # Explicitly specify label names
    gradient_accumulation_steps=4,  # Accumulate gradients for effective larger batch size
)

# Early Stopping Callback
early_stopping = EarlyStoppingCallback(early_stopping_patience=3)

# Create Trainer with data collator
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    data_collator=data_collator,  # Use the data collator for proper batching
    callbacks=[early_stopping],
)


In [None]:
# Track training time
start_time = time.time()

# Train Model
print("\n🚀 Starting Training...")
trainer.train()

# End timer
end_time = time.time()
total_time = end_time - start_time

# Print Training Summary
print(f"\n✅ Training Completed in {total_time:.2f} seconds")

# Saving the Model

In [None]:
# After training completes, save the model
print("\n📦 Saving the fine-tuned model...")

# Save the full model
trainer.save_model("./final_model")

# Save PEFT adapter specifically (more efficient)
model.save_pretrained("./peft_adapter")

# Save tokenizer
tokenizer.save_pretrained("./final_model")
tokenizer.save_pretrained("./peft_adapter")

In [None]:
import torch
torch.cuda.empty_cache()

In [None]:
import gc
gc.collect()