In [None]:
# 2. Imports and Environment Setup
import os
import torch
import wandb
from huggingface_hub import login, create_repo, upload_folder

# GPU Check
if not torch.cuda.is_available():
    raise SystemError("GPU not available! Enable GPU in Kaggle settings.")
print("GPU is available.")

# Weights & Biases Login
wandb_key = os.environ.get('your key')
if wandb_key:
    wandb.login(key=wandb_key)
else:
    print("WANDB_API_KEY not set. Set it as Kaggle secret.")

# Hugging Face Login
import os
from dotenv import load_dotenv

load_dotenv()
hf_token = os.getenv("HF_TOKEN")
if hf_token:
    login(token=hf_token)
else:
    print("HF_TOKEN not set. Set it as Kaggle secret.")

In [None]:
# 3. Load & Format Dataset
from datasets import load_dataset

dataset = load_dataset("FreedomIntelligence/Medical-CoT")

def format_example(ex):
    think = ex.get("think", "")
    response = ex.get("response", "")
    return f"<think>{think}</think> <response>{response}</response>"

formatted_ds = dataset.map(lambda x: {"text": format_example(x)})

val_data = formatted_ds["train"].select(range(100))
train_data = formatted_ds["train"].select(range(100, len(formatted_ds["train"])))

In [None]:
# 4. Load LLaMA 3.2 3B Model with LoRA from Unsloth
from unsloth import FastLanguageModel
from transformers import AutoTokenizer

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/llama-3-3b-bnb-4bit",
    max_seq_length=4096,
    dtype=torch.float16,
    load_in_4bit=True,
)

model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

In [None]:
# 5. Tokenize and Preprocess Data
from datasets import Dataset
from transformers import DataCollatorForLanguageModeling

train_ds = Dataset.from_list(train_data)
val_ds = Dataset.from_list(val_data)

def tokenize_fn(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)

train_ds = train_ds.map(tokenize_fn, batched=True)
val_ds = val_ds.map(tokenize_fn, batched=True)

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)


In [None]:
# 6. Training the Model with wandb Logging
from transformers import TrainingArguments, Trainer
from evaluate import load

rouge = load("rouge")

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=3,
    weight_decay=0.01,
    gradient_accumulation_steps=4,
    logging_dir="./logs",
    report_to="wandb",
    logging_steps=10,
)

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    return rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()


In [None]:
# 7. Save Model Adapter and Tokenizer
model.save_pretrained("lora_medical_model")
tokenizer.save_pretrained("lora_medical_model")

In [None]:
# 8. Upload to Hugging Face Hub
repo_id = "ArshiaJ05/lora-medical-llama3-3b"
create_repo(repo_id, exist_ok=True)

upload_folder(
    folder_path="lora_medical_model",
    repo_id=repo_id,
    repo_type="model",
)

print(f"Model and tokenizer uploaded to https://huggingface.co/{repo_id}")

In [None]:
# 9. Load Model & Adapter for Inference
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

base_model = AutoModelForCausalLM.from_pretrained("unsloth/llama-3-3b-bnb-4bit")
peft_model = PeftModel.from_pretrained(base_model, repo_id)
tokenizer = AutoTokenizer.from_pretrained(repo_id)

input_text = "<think>Patient reports nausea and vomiting...</think>"
inputs = tokenizer(input_text, return_tensors="pt").to(peft_model.device)

outputs = peft_model.generate(**inputs, max_new_tokens=100)
print("Generated response:")
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [None]:
# 10. ROUGE-L Score Comparison
# Placeholder for before/after comparison
preds_before = ["<response>Sample prediction before</response>"]
preds_after = ["<response>Sample prediction after</response>"]
refs = ["<response>Ground truth reference</response>"]

score_before = rouge.compute(predictions=preds_before, references=refs, use_stemmer=True)
score_after = rouge.compute(predictions=preds_after, references=refs, use_stemmer=True)

print(f"ROUGE-L before fine-tuning: {score_before['rougeL']}")
print(f"ROUGE-L after fine-tuning: {score_after['rougeL']}")

# End of Notebook
print("Notebook complete. Deliverables include Kaggle notebook, wandb logs, HF repo, and evaluation results.")

# You can run this code in colab