In [1]:
import json
import torch
from datasets import Dataset, load_dataset
from unsloth import FastLanguageModel
from transformers import TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


ðŸ¦¥ Unsloth: Will patch your computer to enable 2x faster free finetuning.
ðŸ¦¥ Unsloth Zoo will now patch everything to make training faster!


In [2]:
'''
path_to_model = 'openhermes-2.5-mistral-7b.Q2_K.gguf'
llm = Llama(model_path = path_to_model, 
            n_ctx = 9999, 
            max_new_tokens = 2048, 
            temperature = 0, 
            cache = False, 
            verbose = True,
            chat_format = 'chatml')
'''

"\npath_to_model = 'openhermes-2.5-mistral-7b.Q2_K.gguf'\nllm = Llama(model_path = path_to_model, \n            n_ctx = 9999, \n            max_new_tokens = 2048, \n            temperature = 0, \n            cache = False, \n            verbose = True,\n            chat_format = 'chatml')\n"

In [3]:
path_to_model = 'teknium/OpenHermes-2.5-Mistral-7B'

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = path_to_model,
    max_seq_length = 4096,
    load_in_4bit = True,
    device_map = "auto"
)

==((====))==  Unsloth 2025.8.1: Fast Mistral patching. Transformers: 4.55.0.
   \\   /|    NVIDIA A10G. Num GPUs = 1. Max memory: 22.069 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.1+cu126. CUDA: 8.6. CUDA Toolkit: 12.6. Triton: 3.3.1
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.31.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [4]:
model = FastLanguageModel.get_peft_model(
    model,
    r=8,
    target_modules=["q_proj", "v_proj"],
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    use_gradient_checkpointing="unsloth",
)

Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.1.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.
Unsloth 2025.8.1 patched 32 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


In [5]:
with open('manual_notes_200.json') as json_file:
    labelled_notes = json.load(json_file)

In [6]:
system_prompt = """
You are a strict JSON generator. Only output JSON that matches the schema below.
Do not include any extra text, commentary, or explanations.

Schema:
- imatinib_mentioned: true if the drug imatinib (also known as Gleevec) is mentioned in the note, otherwise false
- related_drugs_mentioned: true if drugs related to imatinib (e.g., dasatinib, nilotinib, bosutinib) are mentioned, otherwise false
- cml_diagnosed: true if chronic myeloid leukemia (CML) is diagnosed, otherwise false
- cml_in_regression: true if chronic myeloid leukemia is mentioned as being in regression, otherwise false
- aml_diagnosed: true if acute myeloid leukemia (AML) is diagnosed, otherwise false
- blast_phase_cml: true if blast phase CML is explicitly mentioned, otherwise false
- bmt_history: true if history of bone marrow transplant (BMT) is mentioned, otherwise false
- acute_phase_cml: true if acute phase CML is explicitly mentioned, otherwise false

Rules:
1. Only mark a field as true if the note clearly indicates it.
2. If the note does not explicitly mention a field, mark it false.
3. The output must always be valid JSON with all eight fields present.
"""


In [7]:
df = pd.read_parquet('latest_notes.parquet')
#first_100 = df['note_text'].iloc[0:200]
first_200 = df['note_text'].iloc[0:200].values

In [8]:
schema_fields = [
    "imatinib_mentioned",
    "related_drugs_mentioned",
    "cml_diagnosed",
    "cml_in_regression",
    "aml_diagnosed",
    "blast_phase_cml",
    "bmt_history",
    "acute_phase_cml"
]

input_json = [
    {
        "system": system_prompt,
        "input": note_text,
        "output": {field: note.get(field, False) for field in schema_fields}
    }
    for note_text, note in zip(first_200, labelled_notes)
]

with open('finetuning_input.json', 'w') as json_file:
    json.dump(input_json, json_file, indent=4)


In [9]:
dataset = load_dataset("json", data_files= "finetuning_input.json")["train"] #"inputs/finetuning_input.json")["train"]

def format_chat(example):
    return {
        "text": f"""<|im_start|>system
{example['system']}<|im_end|>
<|im_start|>user
{example['input']}<|im_end|>
<|im_start|>assistant
{json.dumps(example['output'])}<|im_end|>"""
    }

formatted_dataset = dataset.map(format_chat)


Generating train split: 200 examples [00:00, 3582.23 examples/s]
Map: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 200/200 [00:00<00:00, 2061.67 examples/s]


In [10]:
# dataset = Dataset.from_list(train_dataset)

# Tokenize
def tokenize(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length= 4096) #4096)

tokenized_dataset = formatted_dataset.map(tokenize, batched=True)

Map: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 200/200 [00:00<00:00, 498.46 examples/s]


In [11]:
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=tokenized_dataset,
    dataset_text_field="text",
    max_seq_length= 4096, #4096,
    packing=False,
    args=TrainingArguments(
        per_device_train_batch_size=5,
        gradient_accumulation_steps=1,
        num_train_epochs=1,
        learning_rate=2e-4,
        fp16=False,
        bf16=True,
        logging_steps=20,
        save_steps=100,
        optim="adamw_8bit",
        output_dir="outputs",
        save_total_limit=2,
        report_to="none",
    )
)

In [12]:
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 200 | Num Epochs = 1 | Total steps = 40
O^O/ \_/ \    Batch size per device = 5 | Gradient accumulation steps = 1
\        /    Data Parallel GPUs = 1 | Total batch size (5 x 1 x 1) = 5
 "-____-"     Trainable parameters = 3,407,872 of 7,245,156,352 (0.05% trained)


Step,Training Loss
20,6.0515
40,4.1642


Unsloth: Will smartly offload gradients to save VRAM!


TrainOutput(global_step=40, training_loss=5.107891273498535, metrics={'train_runtime': 686.1624, 'train_samples_per_second': 0.291, 'train_steps_per_second': 0.058, 'total_flos': 3.4967107141632e+16, 'train_loss': 5.107891273498535})

In [18]:
model = model.merge_and_unload()
model.save_pretrained("merged_model")

AttributeError: 'MistralForCausalLM' object has no attribute 'merge_and_unload'

In [38]:
base_model = "teknium/OpenHermes-2.5-Mistral-7B" 

from transformers import AutoModelForCausalLM, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(base_model)

# Load your merged fine-tuned weights
model = AutoModelForCausalLM.from_pretrained(
    "merged_model",         
    device_map="auto",
    load_in_4bit=True,
    torch_dtype="auto"       
).eval()

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


In [39]:
df = pd.read_parquet('latest_notes.parquet')
evaluation_df = df['note_text'].iloc[201:301].values

In [40]:
input_json = [
    {
        "system": system_prompt,
        "input": evaluation_df[ind][:11000],
    }
    for ind in range(len(evaluation_df)) # This now iterates through all rows
]

with open('test_input.json', 'w') as json_file:
    json.dump(input_json, json_file, indent=4)

In [41]:
dataset = load_dataset("json", data_files= "test_input.json")['train']

def format_chat(example):
    return {
        "text": f"""<|im_start|>system
{example['system']}<|im_end|>
<|im_start|>user
{example['input']}<|im_end|>
"""
    }

formatted_dataset = dataset.map(format_chat)

Generating train split: 100 examples [00:00, 2141.40 examples/s]
Map: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 100/100 [00:00<00:00, 1278.22 examples/s]


In [42]:
def tokenize(example):
    return tokenizer(example["text"], truncation=True, max_length= 4096) #4096)

tokenized_dataset = formatted_dataset.map(tokenize, batched = True)

Map: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 100/100 [00:00<00:00, 754.09 examples/s]


In [43]:
from tqdm import tqdm
test_outputs = []
for i, example in tqdm(enumerate(tokenized_dataset), total=len(tokenized_dataset), desc="Generating outputs"):
    inputs = tokenizer(example["text"], return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=512,   
        temperature=0.2,      
        do_sample=False
    )
    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
    test_outputs.append({
        "note": example["text"],
        "output": decoded
    })

Generating outputs:   0%|          | 0/100 [00:00<?, ?it/s]


AttributeError: 'MistralAttention' object has no attribute 'apply_qkv'

In [12]:
with open("test_outputs.json", "w", encoding="utf-8") as f:
    json.dump(test_outputs, f, indent=2, ensure_ascii=False)

In [16]:
evaluation_json = [
    {
        "system": system_prompt,
        "input": note,
        "output": {
            "imatinib_mentioned": False,
            "related_drugs_mentioned": False,
            "cml_diagnosed": False,
            "cml_in_regression": False
        }
    }
    for note in evaluation_df
]


In [None]:
evaluation_json

In [17]:
with open("evaluation_json.json", "w", encoding="utf-8") as f:
    json.dump(evaluation_json, f, indent=2, ensure_ascii=False)