# Step 1: Environment Setup

In [None]:
import os
os.environ["FLASH_ATTENTION_FORCE_DISABLED"] = "1"
os.environ["DISABLE_TRITON"] = "1"


In [None]:
%%capture

!pip install unsloth # install unsloth

In [None]:
!pip install transformers==4.51.3 trl==0.8.6 bitsandbytes accelerate --no-deps --quiet

### Verify GPU

In [None]:
!nvidia-smi # verify GPU

## Install Relevent Packages

In [None]:
from kaggle_secrets import UserSecretsClient
# Modules for fine-tuning
from unsloth import FastLanguageModel
import torch # Import PyTorch
from trl import SFTTrainer # Trainer for supervised fine-tuning (SFT)
from unsloth import is_bfloat16_supported # Checks if the hardware supports bfloat16 precision
# Hugging Face modules
from huggingface_hub import login # Lets you login to API
from transformers import TrainingArguments # Defines training hyperparameters
from datasets import load_dataset # Lets you load fine-tuning datasets
# Import weights and biases
import wandb
# Import kaggle secrets
from kaggle_secrets import UserSecretsClient

# Step 2: Dataset Preparation

In [None]:
import pandas as pd
from datasets import Dataset

# Load dataset from Hugging Face
dataset = load_dataset("FreedomIntelligence/medical-o1-reasoning-SFT", "en")

# Convert to pandas DataFrame
df = pd.DataFrame(dataset["train"])

# Check the column names (optional debug)
print("Columns:", df.columns)
print(df.head(2))



## Combine columns into a formatted prompt-response format

In [None]:
# Combine columns into a formatted prompt-response format
def format_example(row):
    return {
        "text": f"### Question:\n{row['Question']}\n\n### Reasoning:\n{row['Complex_CoT']}\n\n### Answer:\n{row['Response']}"
    }

formatted_data = df.apply(format_example, axis=1)
formatted_df = pd.DataFrame(formatted_data.tolist())


## Split dataset

In [None]:
# Split dataset
val_df = formatted_df.sample(n=100, random_state=42)
train_df = formatted_df.drop(val_df.index)

# Convert to Hugging Face datasets format
train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)

# Display example
print(train_dataset[0])

# Step 3: Load LLaMA 3.2 (3B) & Set Fine-Tuning Strategy Using Unsloth

## 1. Load the Model (4-bit, with LoRA)

In [None]:
from kaggle_secrets import UserSecretsClient
from unsloth import FastLanguageModel
from transformers import AutoTokenizer

# Load Hugging Face token securely from Kaggle secrets
user_secrets = UserSecretsClient()
hf_token = user_secrets.get_secret("HF_Tokens")
wandb_token = user_secrets.get_secret("wnb")


# Log in to Weights & Biases
import wandb
wandb.login(key=wandb_token)

# Load base model
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-3B-Instruct",
    max_seq_length = 2048,
    dtype = None,     # Let Unsloth choose the best dtype (float16, etc.)
    load_in_4bit = True,
    token = hf_token,
)

# Prepare model for training
FastLanguageModel.for_training(model,
    use_gradient_checkpointing = True,
)




```
# This is formatted as code
```

## 2. Prepare the Model for Training with LoRA

In [None]:
# Now apply PEFT (LoRA)
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,                 # LoRA Rank
    lora_alpha = 32,        # LoRA Scaling factor
    lora_dropout = 0.0,    # Dropout
    bias = "none"           # No bias tuning
)

## 3. Tokenize the Dataset

In [None]:
def tokenize(example):
    tokenized = tokenizer(
        example["text"],
        truncation = True,
        padding = "max_length",
        max_length = 2048
    )
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

In [None]:
train_dataset = train_dataset.map(tokenize)
val_dataset = val_dataset.map(tokenize)

## 4. Set Training Arguments

In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir = "llama3-medical-finetuning",  # Where the model checkpoints will be saved
    per_device_train_batch_size = 2,  # Effective batch size = 2 * gradient_accumulation_steps
    gradient_accumulation_steps = 2,  # Accumulates gradients for more stable training
    max_steps = 60,  # Small number for quick test run
    logging_steps = 1,  # Logs every step for debugging
    save_steps = 10,  # Saves model every 10 steps
    learning_rate = 2e-4,  # A good starting point for PEFT
    num_train_epochs = 1,  # Will be overridden if max_steps is reached first
    fp16 = True,  # You can turn this ON if you want mixed-precision on Colab Pro/Pro+ GPUs
    optim = "adamw_torch",  # Preferable over "paged_adamw_32bit" if that caused issues
    lr_scheduler_type = "cosine",  # Smooth learning rate curve
    warmup_steps = 5,  # Start with low LR for stability
    report_to = "wandb",  # Disable W&B
)

## 5. formatting_func for Your Dataset

In [None]:
print(val_df.columns.tolist())


In [None]:
print(val_df["text"].iloc[0])


## Extract Question, Reasoning, and Answer with Regex

In [None]:
import re

def extract_fields(text):
    question_match = re.search(r"### Question:\n(.+?)\n### Reasoning:", text, re.DOTALL)
    reasoning_match = re.search(r"### Reasoning:\n(.+?)\n### Answer:", text, re.DOTALL)
    answer_match = re.search(r"### Answer:\n(.+)", text, re.DOTALL)

    return {
        "Question": question_match.group(1).strip() if question_match else None,
        "Complex_CoT": reasoning_match.group(1).strip() if reasoning_match else None,
        "Response": answer_match.group(1).strip() if answer_match else None,
    }

# Apply to all rows
parsed_df = val_df["text"].apply(extract_fields).apply(pd.Series)

# Merge with original dataframe if needed
val_df = pd.concat([val_df, parsed_df], axis=1)


In [None]:
def formatting_func(example):
    question = example["Question"]
    reasoning = example["Complex_CoT"]
    response = example["Response"]

    return f"### Question:\n{question}\n\n### Reasoning:\n{reasoning}\n\n### Answer:\n{response}"


## SFTTrainer Setup

In [None]:
from trl import SFTTrainer

trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    args=training_args,
    tokenizer=tokenizer,
    formatting_func=formatting_func,
    packing=True,
)

## ROUGE-L Score Calculation (Before Training (Baseline Score))

In [None]:
print(val_df.columns.tolist())


In [None]:
# Install required packages
!pip install -q evaluate rouge_score

import evaluate
rouge = evaluate.load("rouge")

# Get baseline predictions
def generate_response_baseline(example):
    prompt = formatting_func(example)
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to("cuda")
    outputs = model.generate(**inputs, max_new_tokens=200)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Apply to validation set
val_df["baseline_pred"] = val_df.apply(generate_response_baseline, axis=1)

# Compute ROUGE-L score
baseline_scores = rouge.compute(predictions=val_df["baseline_pred"].tolist(),
                                 references=val_df["Response"].tolist(),
                                 use_stemmer=True)
print("ROUGE-L Before Fine-Tuning:", baseline_scores["rougeL"])


## Training

This will:

    Start supervised fine-tuning on medical dataset.

    Log metrics (e.g., loss) to the console and to Weights & Biases (since we're using report_to="wandb").

In [None]:
import wandb
wandb.init(settings=wandb.Settings(init_timeout=120))


In [None]:
trainer.train()

## After Training (Post Fine-Tuning Score)

In [None]:
# Reload fine-tuned model (if necessary) and run predictions again
def generate_response_finetuned(example):
    prompt = formatting_func(example)
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to("cuda")
    outputs = model.generate(**inputs, max_new_tokens=200)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

val_df["finetuned_pred"] = val_df.apply(generate_response_finetuned, axis=1)
finetuned_scores = rouge.compute(predictions=val_df["finetuned_pred"].tolist(),
                                  references=val_df["Response"].tolist(),
                                  use_stemmer=True)
print("ROUGE-L After Fine-Tuning:", finetuned_scores["rougeL"])


## save the model

In [None]:
save_path = "llama3-medical-finetuned"

model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)



In [None]:
from huggingface_hub import notebook_login
notebook_login()


In [None]:
model.push_to_hub("SaadKabeer/llama3-medical-finetuned")



In [None]:
tokenizer.push_to_hub("SaadKabeer/llama3-medical-finetuned")