# Unsloth

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
# Step 1: Environment Setup
!pip -q install transformers datasets accelerate bitsandbytes peft

In [None]:
import pandas as pd
import torch
from datasets import Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
import requests
import os
import json

In [None]:
import numpy as np
import time
from tqdm import trange

In [None]:
%%capture
!pip install unsloth
!pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

In [None]:
# Load data
context = pd.read_csv('/content/drive/My Drive/BT5153_2025/Group Project/context.csv')
df_train = pd.read_csv('/content/drive/My Drive/BT5153_2025/Group Project/context_train.csv')
df_test = pd.read_csv('/content/drive/My Drive/BT5153_2025/Group Project/context_test.csv')

In [None]:
context.head()

Unnamed: 0,order_id,patient_med_info,open_fda_api,train_test,medication,ground_truth_label,ground_truth_reason,ground_truth_monograph_chunk
0,1,"Patient is a 71 years old female, height 1.56m...",https://api.fda.gov/drug/label.json?search=ope...,train,METOCLOPRAMIDE,unsafe,This order is not safe because symptoms of Par...,Patients with preexisting Parkinson's disease ...
1,2,"Patient is a 65 years old male, height 1.8m an...",https://api.fda.gov/drug/label.json?search=ope...,test,METOCLOPRAMIDE,unsafe,This order is not safe because symptoms of Par...,Patients with preexisting Parkinson's disease ...
2,3,"Patient is a 31 years old male, height 1.68m a...",https://api.fda.gov/drug/label.json?search=ope...,train,METOCLOPRAMIDE,safe,This order is safe because it aligns with the ...,Administer from 10 mg to 15 mg metoclopramide ...
3,4,"Patient is a 30 years old female, height 1.64m...",https://api.fda.gov/drug/label.json?search=ope...,test,METOCLOPRAMIDE,safe,This order is safe because it aligns with the ...,Administer from 10 mg to 15 mg metoclopramide ...
4,5,"Patient is a 70 years old male, height 1.62m a...",https://api.fda.gov/drug/label.json?search=ope...,train,IBUPROFEN,unsafe,This order is not safe because use of nonstero...,"Heart attack and stroke warning: NSAIDs, excep..."


In [None]:
med_url_dict = {}
med_monography_dict = {}

med_url_dict = {}
for i in range(context.shape[0]):
    med_url_dict[context.iloc[i, 4]] = context.iloc[i,2]

for med, url in med_url_dict.items():
    # Send a GET request
    response = requests.get(url)

    if response.status_code == 200:
        # Parse the JSON response
        data = response.json()

        # Retrieve relevant drug monography information and concatenate
        do_not_use = data['results'][0]['do_not_use'][0] if 'do_not_use' in data['results'][0].keys() else None
        dosage_and_administration = data['results'][0]['dosage_and_administration'][0]
        boxed_warning = data['results'][0]['boxed_warning'][0] if 'boxed_warning' in data['results'][0].keys() else None
        pregnancy = data['results'][0]['pregnancy'][0] if 'pregnancy' in data['results'][0].keys() else None
        warnings = data['results'][0]['warnings'][0] if 'warnings' in data['results'][0].keys() else None
        contraindication = data['results'][0]['contraindication'][0] if 'contraindication' in data['results'][0].keys() else None
        combined_text = " ".join(filter(None, [do_not_use, dosage_and_administration, boxed_warning, pregnancy, warnings, contraindication]))
        med_monography_dict[med] = f"Drug Monography Information:\n{combined_text}"

    else:
         med_monography_dict[med] = f"Error: {response.status_code}, drug monography not available"

In [None]:
# # Convert dict to DataFrame
# df_monograph = pd.DataFrame(list(med_monography_dict.items()), columns=['medication', 'monograph'])

# # Save to CSV
# df_monograph.to_csv('med_monograph_v2.csv', index=False)

In [None]:
from unsloth import FastLanguageModel

max_seq_length = 8192 # or 25000
dtype = None
load_in_4bit = True


model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/DeepSeek-R1-Distill-Llama-8B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token='huggingfacetoken', # insert hugging face token here
)


Please restructure your imports with 'import unsloth' at the top of your file.
  from unsloth import FastLanguageModel


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.3.19: Fast Llama patching. Transformers: 4.50.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [None]:
# original prompt (without prompt engineering)
prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning.
Please answer the following medical question. Is the medication order safe for the patient?

### Question:
{}

### Response:
<think>{}"""

### Sample Response without SFT

In [None]:
from tqdm import trange

In [None]:
output = []
for i in trange(50):
  sample_patient_info = context.iloc[i]['patient_med_info']
  sample_med_name = context.iloc[i]['medication']
  monography_info = f"{med_monography_dict[sample_med_name]}."
  full_prompt = f"{sample_patient_info}\n\n Drug Monography Information:\n{monography_info}"
  question = full_prompt # try sample_patient_info without monograph, then full_prompt with monograph, then with prompt engineer + SFT (below)

  FastLanguageModel.for_inference(model)
  inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")

  outputs = model.generate(
      input_ids=inputs.input_ids,
      attention_mask=inputs.attention_mask,
      max_new_tokens=1500,
      use_cache=True,
  )
  response = tokenizer.batch_decode(outputs)
  answer = response[0].split("### Response:")[1]
  output.append({'order_id': i + 1, 'response': answer})

100%|██████████| 50/50 [1:01:16<00:00, 73.54s/it]


In [None]:
output_untrain = pd.DataFrame(output)
output_untrain.shape

(50, 2)

In [None]:
print(output_untrain['response'][0].split("</think>")[1].split("<｜end")[0])



The medication order for metoclopramide is considered safe for the patient at this time, given the current duration of treatment (three weeks) and the fact that the recommended maximum duration is 12 weeks. However, it is crucial to closely monitor the patient for any signs of tardive dyskinesia (TD), such as involuntary movements, and for potential QT prolongation on an electrocardiogram. Additionally, the patient's renal function should be monitored due to her slightly decreased eGFR. If the treatment needs to be continued beyond three weeks, a careful assessment of the risks versus benefits should be performed, including regular monitoring for TD and QT changes.


In [None]:
output_untrain['response_without_think'] = output_untrain['response'].apply(lambda x: x.split("</think>\n\n")[1].split("<｜end")[0] if isinstance(x, str) else '')
output_untrain.head()

Unnamed: 0,order_id,response,response_without_think
0,1,"\n<think>\nOkay, so I need to determine if the...",The medication order for metoclopramide is not...
1,2,"\n<think>\nOkay, so I'm trying to figure out i...",The medication order for oral metoclopramide 1...
2,3,"\n<think>\nOkay, so I need to determine if the...",The medication order for metoclopramide is con...
3,4,"\n<think>\nOkay, I'm trying to figure out if t...",The medication order for oral metoclopramide 1...
4,5,"\n<think>\nOkay, I need to determine if the me...",The medication order for ibuprofen is not safe...


In [None]:
output_untrain.to_csv("output_untrain_without_monograph.csv", index=False)

### SFT Unsloth

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

In [None]:
# Add third placeholder for the complex chain of thought column
train_prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning and treatment planning.
You are provided with a medication order for a patient, with patient and drug monography information.
Please answer if the medication order is safe for the patient? If drug monography is not available, the order is considered unsafe due to insufficient information.

### Question:
{}

### Response:
<think>
{}
</think>
{}"""

In [None]:
# Create a "text" column in the dataset, which consists of the train prompt style
# Fill the placeholders with questions, chains of text, and answers

EOS_TOKEN = tokenizer.eos_token  # Must add EOS_TOKEN

def formatting_prompts_func(examples):
    inputs = examples["Question"]
    cots = examples["Complex_CoT"]
    outputs = examples["Response"]
    texts = []
    for input, cot, output in zip(inputs, cots, outputs):
        text = train_prompt_style.format(input, cot, output) + EOS_TOKEN
        texts.append(text)
    return {"text": texts}

In [None]:
df = pd.read_csv('/content/drive/My Drive/BT5153_2025/Group Project/cot.csv')
dataset = Dataset.from_pandas(df)
dataset = dataset.map(formatting_prompts_func, batched = True,)

Map:   0%|          | 0/25 [00:00<?, ? examples/s]

In [None]:
dataset

Dataset({
    features: ['Question', 'Complex_CoT', 'Response', 'text'],
    num_rows: 25
})

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",  # True or "unsloth" for very long context
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)

Unsloth 2025.3.19 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [None]:
from transformers import DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False  # IMPORTANT: Turn off masked language modeling!
)

In [None]:
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field ="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    data_collator=data_collator,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        num_train_epochs = 3, # stay as 3, > 5 response is bad
        # warmup_steps=5,
        # max_steps=64, # max steps or epochs
        learning_rate=2e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=5,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
        report_to = 'none'
    ),
)

Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/25 [00:00<?, ? examples/s]

In [None]:
start = time.time()
trainer_stats = trainer.train()
end = time.time()
difference = end - start
print(f"Training time: {end - start} seconds")

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 25 | Num Epochs = 3 | Total steps = 9
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 41,943,040/8,000,000,000 (0.52% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
5,1.4868


Training time: 468.4738087654114 seconds


In [None]:
# # Save the model
# # new_model_local = "DeepSeek-R1-Med-Verif-COT_v1"
# model.save_pretrained("base_model")
# tokenizer.save_pretrained("base_model")

# model.save_pretrained_merged(new_model_local, tokenizer, save_method = "merged_16bit",)

In [None]:
# UPDATED PROMPT
prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning.
You are provided with a medication order for a patient. Patient information and drug monography is also provided.
You can only use information from the drug monography. If drug monography information is not available, the order is considered as unsafe due to insufficient information.
Is the medication order safe for the patient?

### Question:
{}

### Response:
<think>{}"""

In [None]:
FastLanguageModel.for_inference(model)

output = []
time_data = []

for i in trange(50):
  sample_patient_info = context.iloc[i]['patient_med_info']
  sample_med_name = context.iloc[i]['medication']
  monography_info = f"{med_monography_dict[sample_med_name]}. \n\n Is the medication order safe for this patient?"
  full_prompt = f"{sample_patient_info}\n\n{monography_info}"
  question = full_prompt

  inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")
  start = time.time()
  outputs = model.generate(
      input_ids=inputs.input_ids,
      attention_mask=inputs.attention_mask,
      max_new_tokens=2500,
      temperature = 0.5,
      use_cache=True)
  end = time.time()
  difference = end - start
  time_data.append(difference)

  response = tokenizer.batch_decode(outputs)
  answer = response[0].split("### Response:")[1]
  output.append({'order_id': i + 1, 'response': answer})

100%|██████████| 50/50 [43:44<00:00, 52.49s/it]


In [None]:
output_trained_temp = pd.DataFrame(output)
output_trained_temp.head()

Unnamed: 0,order_id,response
0,1,"\n<think>\nAlright, I'm trying to figure out i..."
1,2,"\n<think>\nAlright, I need to determine if the..."
2,3,"\n<think>\nOkay, so I need to figure out if th..."
3,4,"\n<think>\nAlright, I'm trying to determine if..."
4,5,"\n<think>\nAlright, let's tackle this question..."


In [None]:
output_trained_temp['response'][9]

"\n<think>\nThe doctor ordered oral metformin 500mg two times a day (after meals). According to the drug monography, metformin hydrochloride extended-release tablets are contraindicated in patients with an estimated glomerular filtration rate (eGFR) below 30 mL/minute/1.73 m². The patient's eGFR is 40.0 mL/min/1.73 m², which is above the threshold for contraindication. However, the patient has a history of recurrent cellulitis and newly diagnosed type 2 diabetes mellitus, which may increase the risk of developing lactic acidosis. The doctor should monitor the patient's renal function and discontinue metformin hydrochloride extended-release tablets if the eGFR falls below 45 mL/minute/1.73 m². Additionally, the patient should be closely monitored for signs and symptoms of lactic acidosis, especially in the presence of other risk factors such as age greater than 65 years, concomitant use of certain medications, and excessive alcohol intake. The doctor should also inform the patient about

In [None]:
output_trained_temp['response_without_think'] = np.nan
for i in range(50):
  response = output_trained_temp['response'][i]
  if "</think>\n" in response:
    try:
      output_trained_temp.loc[i, 'response_without_think'] = response.split("</think>\n")[1].split("<｜end")[0]
    except IndexError:
      print(f"Warning: could not split response at index {i}")
  else:
    pass

The medication order for metoclopramide is considered safe for this patient given the short-term use and existing monitoring. However, caution is advised due to the patient's medical history, particularly her Parkinson's disease, which increases the risk of tardive dyskinesia and extrapyramidal symptoms. The patient should be closely monitored for any signs of these conditions.

**Final Answer:**
The medication order is generally safe for this patient, but caution is advised due to her medical history.' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  output_trained_temp.loc[i, 'response_without_think'] = response.split("</think>\n")[1].split("<｜end")[0]


In [None]:
output_trained_temp['inference_time'] = time_data
output_trained_temp.head()

Unnamed: 0,order_id,response,response_without_think,inference_time
0,1,"\n<think>\nAlright, I'm trying to figure out i...",\nThe medication order for metoclopramide is c...,42.055704
1,2,"\n<think>\nAlright, I need to determine if the...",\nThe medication order for metoclopramide is n...,35.927823
2,3,"\n<think>\nOkay, so I need to figure out if th...",\nThe medication order for metoclopramide is n...,61.085153
3,4,"\n<think>\nAlright, I'm trying to determine if...",\nThe medication order for metoclopramide is s...,65.526168
4,5,"\n<think>\nAlright, let's tackle this question...",\nThe medication order for ibuprofen 200mg thr...,32.083213


In [None]:
output_trained_temp.to_csv("output_trained_v6_temp.csv", index=False)