In [None]:
# Import libraries
import numpy as np
import os
from PIL import Image
import random

# Set random seed
random.seed(42)

# Unload all data such that it's easily accessible
main_data_path = "/content/drive/MyDrive/ENGSCI/4TH YEAR/fall 4th year/ESC499/Code Test/Finetuning/Data/"
training_data_indices = main_data_path+"INDICES.npz"
training_data_path = main_data_path+"training"
test_data_path = main_data_path+"test"

In [None]:
# 42s 44s
%%capture
!pip install unsloth
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git


In [None]:
!pip install --upgrade torch torchvision torchaudio # Upgrading PyTorch to the latest version
import torchvision
from unsloth import FastVisionModel # FastLanguageModel for LLMs
import torch
from unsloth import is_bf16_supported
from unsloth.trainer import UnslothVisionDataCollator
from trl import SFTTrainer, SFTConfig
from transformers import TextStreamer
from transformers import TrainerCallback, TrainingArguments

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [None]:
# Mount the Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Functions (TEST VERSION)

def load_scan_from_npz(file_path):
    data = np.load(file_path)
    return data['voxel'], data['ax'], data['sag'], data['cor'], data['label']

def create_image_dataset(file_path):
    v, a, s, c, l = load_scan_from_npz(file_path)
    begin, end = a
    image_label_pairs = []
    for i in range(begin+20, end+1-20, 1):
        np_array, label = v[i], l
        np_array = np.uint8(255 * (np_array - np.min(np_array)) / (np.max(np_array) - np.min(np_array)))
        image = Image.fromarray(np_array)
        image_label_pairs.append({"image":image, "label":label})
    return image_label_pairs

def counts_zeros_and_ones(full_list):
    zeros = [entry for entry in full_list if entry["label"] == 0]
    ones = [entry for entry in full_list if entry["label"] == 1]
    num_zeros, num_ones = len(zeros), len(ones)
    print(f"Number of ZEROS: {len(zeros)} || Number of ONES: {len(ones)}")
    return num_zeros, num_ones, zeros, ones

def convert_to_conversation(sample):
    instruction = "Classify the brain scan as Low Grade Glioma (0), High Grade Glioma (1), or No Glioma (2). Respond only in the following format: Choice: <0, 1, or 2> Reasoning: <Provide concise reasoning using 10 keywords based on the scan's visual features>."

    conversation = [
        { "role": "user",
          "content" : [
            {"type" : "text",  "text"  : instruction},
            {"type" : "image", "image" : sample['image']} ]
        },
        { "role" : "assistant",
          "content" : [
            {"type": "text", "text": f"Choice: {sample['label']} Reasoning:..."} ]
        },
    ]
    return { "messages" : conversation }

def create_conversation_dataset(training_data_path):

    # Extract all the patients and the corresponding filenames
    filenames = []
    for filename in os.listdir(training_data_path):
        if filename.endswith(".npz"):
            file_path = os.path.join(training_data_path, filename)
            filenames.append(file_path)
    filenames = sorted(filenames, key=lambda x: int(x.split('_')[-1].split('.')[0]))

    # Now we build the dataset
    patients = []
    for filename in filenames:
        image_label_pairs = create_image_dataset(filename)
        patients.append(image_label_pairs)

    # # Now convert the dataset into input for LLM
    llm_patients = []
    for patient in patients:
        llm_patient = [convert_to_conversation(sample) for sample in patient]
        llm_patients.append(llm_patient)
    print(f"Number of patients: {len(llm_patients)}")
    print(llm_patients[0][0])

    return patients, llm_patients

data_patients, llm_patients = create_conversation_dataset(test_data_path)

Number of patients: 55
{'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': "Classify the brain scan as Low Grade Glioma (0), High Grade Glioma (1), or No Glioma (2). Respond only in the following format: Choice: <0, 1, or 2> Reasoning: <Provide concise reasoning using 10 keywords based on the scan's visual features>."}, {'type': 'image', 'image': <PIL.Image.Image image mode=L size=128x128 at 0x7EA7A9CF4E10>}]}, {'role': 'assistant', 'content': [{'type': 'text', 'text': 'Choice: 0 Reasoning:...'}]}]}


In [None]:
# Add the name of the model we want to import
# lora_model_name = "liufelic/newlora_100step_model"
lora_model_name = "liufelic/newlora_100step_model"
lora_model_name = "unsloth/Llama-3.2-11B-Vision-Instruct"

# Load the model we previously trained
from unsloth import FastVisionModel
model, tokenizer = FastVisionModel.from_pretrained(
    model_name = lora_model_name, # YOUR MODEL YOU USED FOR TRAINING
    load_in_4bit = True, # Set to False for 16bit LoRA
)
FastVisionModel.for_inference(model) # Enable for inference!

==((====))==  Unsloth 2025.2.4: Fast Mllama vision patching. Transformers: 4.48.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post2. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/269M [00:00<?, ?B/s]

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MllamaForConditionalGeneration(
      (vision_model): MllamaVisionModel(
        (patch_embedding): Conv2d(3, 1280, kernel_size=(14, 14), stride=(14, 14), padding=valid, bias=False)
        (gated_positional_embedding): MllamaPrecomputedPositionEmbedding(
          (tile_embedding): Embedding(9, 8197120)
        )
        (pre_tile_positional_embedding): MllamaPrecomputedAspectRatioEmbedding(
          (embedding): Embedding(9, 5120)
        )
        (post_tile_positional_embedding): MllamaPrecomputedAspectRatioEmbedding(
          (embedding): Embedding(9, 5120)
        )
        (layernorm_pre): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
        (layernorm_post): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
        (transformer): MllamaVisionEncoder(
          (layers): ModuleList(
            (0-12): 13 x MllamaVisionEncoderLayer(
              (self_attn): MllamaVisionSdpaAttention(
               

In [None]:
import re

def extract_choice(output_text):
    match = re.search(r"Choice:\s*[012]\b", output_text)
    if match: return match.group(0)
    else: return output_text.replace("\n", " ||| ")

def write_file_for_one_patient(patient, path):
    # Open the file in append mode (if it doesn't exist, it will be created)
    with open(path, 'w') as file:
        for sample_id in range(len(patient)):

            # Extract the info from the message
            sample = patient[sample_id]
            image = sample["image"]
            ground_truth = sample["label"]

            # Instruction without reasoning
            instruction = "Classify the brain scan as Low Grade Glioma (0), High Grade Glioma (1), or No Glioma (2) based on the scan's visual features. Respond only in the following format: Choice: <0, 1, or 2>."
            messages = [
                {"role": "user", "content": [
                    {"type": "image"},
                    {"type": "text", "text": instruction}
                ]}
            ]
            input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
            inputs = tokenizer(
                image,
                input_text,
                add_special_tokens=False,
                return_tensors="pt",
            ).to("cuda")

            # Generate the tokens
            output_tokens = model.generate(**inputs, max_new_tokens=20,
                                           use_cache=True, temperature=1.5, min_p=0.1)
            generated_text = extract_choice(tokenizer.decode(output_tokens[0], skip_special_tokens=True))

            # Prepare the output message
            result_message = f"Ground truth: {ground_truth} || Model output: {generated_text}"

            # Print to the terminal
            # print(result_message)

            # Write to the file
            file.write(result_message + '\n')  # Add a newline after each entry

    return


In [None]:
# Unload all data such that it's easily accessible
main_results_folder = "/content/drive/MyDrive/ENGSCI/4TH YEAR/fall 4th year/ESC499/Code Test/Finetuning/Results/"

# for patient_id in range(len(data_patients)):
for patient_id in range(31, 55, 1):

    print(f"Patient ID: {patient_id}")
    patient = data_patients[patient_id]

    # path for patient data storage
    patient_results_text = main_results_folder + f"patient_{patient_id}.txt"
    write_file_for_one_patient(patient, patient_results_text)



Patient ID: 31
Patient ID: 32
Patient ID: 33
Patient ID: 34
Patient ID: 35
Patient ID: 36
Patient ID: 37
Patient ID: 38
Patient ID: 39
Patient ID: 40
Patient ID: 41
Patient ID: 42
Patient ID: 43
Patient ID: 44
Patient ID: 45
Patient ID: 46
Patient ID: 47
Patient ID: 48
Patient ID: 49
Patient ID: 50
Patient ID: 51
Patient ID: 52
Patient ID: 53
Patient ID: 54
