<a href="https://colab.research.google.com/github/sap156/Supervised-Fine-Tuning-UnSloth/blob/main/SFT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install unsloth

In [None]:
from unsloth import FastLanguageModel
import torch

In [None]:
max_seq_length = 2048
dtype = None
load_in_4bit = True

In [None]:
from huggingface_hub import login
from google.colab import userdata

hf_token = userdata.get('HuggingFace')  # Securely fetch token from Colab secrets
login(hf_token)  # Log in to Hugging Face

In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/DeepSeek-R1-Distill-Llama-8B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token = hf_token,
)


In [None]:
prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context.

Write a response that appropriately completes the request.

Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:

You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning.

Please answer the following medical question.

### Question:

{}

### Response:

<think>{}"""

In [None]:
question = "A 28-year-old woman presents with a 3-week history of an intensely itchy, red, and scaly rash on the flexor surfaces of her elbows and behind her knees. She reports a personal history of asthma and seasonal allergies. Physical examination reveals lichenification and excoriations in the affected areas. What is the most likely diagnosis, and what is the primary immune pathway involved in this condition?"

FastLanguageModel.for_inference(model)

inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")

outputs = model.generate(
 input_ids=inputs.input_ids,
 attention_mask=inputs.attention_mask,
 max_new_tokens=1200,
 use_cache=True,
)

response = tokenizer.batch_decode(outputs)
print(response[0].split("### Response:")[1])

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=9001,
    use_rslora=False,
    loftq_config=None,
)


In [None]:
train_prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context.

Write a response that appropriately completes the request.

Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:

You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning.

Please answer the following medical question.

### Question:

{}

### Response:

<think>

{}

</think>

{}"""


In [None]:
EOS_TOKEN = tokenizer.eos_token  # Must add EOS_TOKEN to signal the end of each example

def formatting_prompts_func(examples):
    inputs = examples["Question"]          # The medical question
    cots = examples["Complex_CoT"]         # The reasoning/explanation (Chain of Thought)
    outputs = examples["Response"]         # The final answer
    texts = []

    for input, cot, output in zip(inputs, cots, outputs):
        text = train_prompt_style.format(input, cot, output) + EOS_TOKEN
        texts.append(text)

    return { "text": texts }


In [None]:
from datasets import load_dataset

# Load the first 500 records from the dataset
dataset = load_dataset(
    "FreedomIntelligence/medical-o1-reasoning-SFT",
    "en",
    split="train[0:500]",
    trust_remote_code=True
)

# Apply the formatting function to shape each example into a training prompt
dataset = dataset.map(formatting_prompts_func, batched=True)

# Display the formatted second example to verify
dataset["text"][1]


In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,  # Number of processes to load/format the dataset

    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,

        # Use num_train_epochs and warmup_ratio for longer runs
        warmup_steps=5,
        max_steps=60,  # Keep small for quick demos. Increase for real training.

        learning_rate=2e-4,
        fp16=not is_bfloat16_supported(),  # Use fp16 unless bf16 is supported
        bf16=is_bfloat16_supported(),      # Use bf16 if supported by your GPU

        logging_steps=10,
        optim="adamw_8bit",  # 8-bit AdamW optimizer to reduce memory use
        weight_decay=0.01,
        lr_scheduler_type="linear",

        seed=3407,  # Ensures reproducibility
        output_dir="outputs",  # Where checkpoints and logs are stored
        report_to="none"  # Disables logging to external services like WandB
    ),
)


In [None]:
trainer_stats = trainer.train()

In [None]:
question = """A 28-year-old woman presents with a 3-week history of an intensely itchy, red, and scaly rash on the flexor surfaces of her elbows and behind her knees.
She reports a personal history of asthma and seasonal allergies. Physical examination reveals lichenification and excoriations in the affected areas.
What is the most likely diagnosis, and what is the primary immune pathway involved in this condition?"""

# Set the model to inference mode again
FastLanguageModel.for_inference(model)

# Tokenize the prompt using the original prompt format
inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")

# Generate a prediction using the fine-tuned model
outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=1200,
    use_cache=True,
)

# Decode and display the model's new response
response = tokenizer.batch_decode(outputs)
print(response[0].split("### Response:")[1])

In [None]:
new_model_online = "sap156/DeepSeek-R1-Medical-INSTAGRAM"  # Hugging Face repo name
new_model_local = "DeepSeek-R1-Medical-INSTAGRAM"           # Local folder name

# Save model and tokenizer locally
model.save_pretrained(new_model_local)
tokenizer.save_pretrained(new_model_local)

In [None]:
# Push to Hugging Face — requires a write-enabled HF token (set earlier in Step 5)
model.push_to_hub(new_model_online)
tokenizer.push_to_hub(new_model_online)