In [1]:
# Import libraries
import numpy as np
import os
from PIL import Image
import random

# Set random seed
random.seed(42)

# Unload all data such that it's easily accessible
main_data_path = "/content/drive/MyDrive/ENGSCI/4TH YEAR/fall 4th year/ESC499/Code Test/Finetuning/Data/"
training_data_path = main_data_path+"training"
test_data_path = main_data_path+"test"
training_seg_path = main_data_path+"train_seg_bbounds.npz"
test_seg_path = main_data_path+"test_seg_bbounds.npz"

In [2]:
# 42s 44s
%%capture
!pip install unsloth
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git


In [3]:
!pip install --upgrade torch torchvision torchaudio # Upgrading PyTorch to the latest version
import torchvision
from unsloth import FastVisionModel # FastLanguageModel for LLMs
import torch
from unsloth import is_bf16_supported
from unsloth.trainer import UnslothVisionDataCollator
from trl import SFTTrainer, SFTConfig
from transformers import TextStreamer
from transformers import TrainerCallback, TrainingArguments

Collecting torchaudio
  Downloading torchaudio-2.6.0-cp311-cp311-manylinux1_x86_64.whl.metadata (6.6 kB)
Downloading torchaudio-2.6.0-cp311-cp311-manylinux1_x86_64.whl (3.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.4/3.4 MB[0m [31m30.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torchaudio
  Attempting uninstall: torchaudio
    Found existing installation: torchaudio 2.5.1+cu124
    Uninstalling torchaudio-2.5.1+cu124:
      Successfully uninstalled torchaudio-2.5.1+cu124
Successfully installed torchaudio-2.6.0
🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [4]:
# Mount the Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
# Functions (TEST VERSION)

def load_scan_from_npz(file_path):
    data = np.load(file_path)
    return data['voxel'], data['ax'], data['sag'], data['cor'], data['label']

def pair_segs(filename, patient_specific_bb):
    v, a, s, c, l = load_scan_from_npz(filename)
    begin, end = a
    image_seg_pairs = []
    for i in range(begin+20, end+1-20, 1):
        row_min, col_min, row_max, col_max = patient_specific_bb[i]
        np_array, label = v[i], l
        np_array = np.uint8(255 * (np_array - np.min(np_array)) / (np.max(np_array) - np.min(np_array)))
        image = Image.fromarray(np_array)
        image_seg_pairs.append({"image":image, "bb":[(row_min, col_min), (row_min, col_max), (row_max, col_max), (row_max, col_min)] })
    return image_seg_pairs

def load_normal_npz(file_path):
    loaded_data = np.load(file_path)
    return loaded_data['array']


def convert_to_conversation(sample):
    instruction = '''
You are an expert medical AI assistant specializing in glioma segmentation on FLAIR-mode brain scans.
Given a 128x128 grayscale brain scan, output the bounding box around the tumor using the four corner vertices.
The tumor region is the brightest, high-intensity abnormality distinct from normal brain structures.
Ensure the bounding box tightly encloses the entire tumor without extending into non-tumor regions.
The bounding box output must be formatted strictly as:[(row_min, col_min), (row_min, col_max), (row_max, col_max), (row_max, col_min)] where (row, col) are integers between 0 and 127, with (0,0) at the top-left and row increasing downward, and col increasing rightward.
Do not output any other text or explanation, only the coordinate list in the exact format above.
    '''

    conversation = [
        { "role": "user",
          "content" : [
            {"type" : "text",  "text"  : instruction},
            {"type" : "image", "image" : sample['image']} ]
        },
        { "role" : "assistant",
          "content" : [
            {"type": "text", "text": f"{sample['bb']}"} ]
        },
    ]
    return { "messages" : conversation }


def create_conversation_dataset(data_path, segs_path):

    segs_bb = load_normal_npz(segs_path)

    # Extract all the patients and the corresponding filenames
    filenames = []
    for filename in os.listdir(data_path):
        if filename.endswith(".npz"):
            file_path = os.path.join(data_path, filename)
            filenames.append(file_path)
    filenames = sorted(filenames, key=lambda x: int(x.split('_')[-1].split('.')[0]))

    # Now we build the dataset
    patients = []
    for index in range(len(filenames)):
        filename = filenames[index]
        patient_specific_bb = segs_bb[index]
        image_seg_pairs = pair_segs(filename, patient_specific_bb)
        patients.append(image_seg_pairs)
        # patients += image_seg_pairs

    # Now convert the dataset into input for LLM
    llm_patients = []
    for patient in patients:
        llm_patient = [convert_to_conversation(sample) for sample in patient]
        llm_patients.append(llm_patient)
    print(f"Number of patients: {len(llm_patients)}")
    print(llm_patients[0][0])

    return patients, llm_patients


In [6]:
data_patients, llm_patients = create_conversation_dataset(test_data_path, test_seg_path)

Number of patients: 55
{'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': '\nYou are an expert medical AI assistant specializing in glioma segmentation on FLAIR-mode brain scans.\nGiven a 128x128 grayscale brain scan, output the bounding box around the tumor using the four corner vertices.\nThe tumor region is the brightest, high-intensity abnormality distinct from normal brain structures.\nEnsure the bounding box tightly encloses the entire tumor without extending into non-tumor regions.\nThe bounding box output must be formatted strictly as:[(row_min, col_min), (row_min, col_max), (row_max, col_max), (row_max, col_min)] where (row, col) are integers between 0 and 127, with (0,0) at the top-left and row increasing downward, and col increasing rightward.\nDo not output any other text or explanation, only the coordinate list in the exact format above.\n    '}, {'type': 'image', 'image': <PIL.Image.Image image mode=L size=128x128 at 0x7B24BF51A290>}]}, {'role': 'assistan

In [7]:
# Add the name of the model we want to import
# lora_model_name = "liufelic/seg_100step_model"
lora_model_name = "liufelic/seg_200step_model"

# Load the model we previously trained
from unsloth import FastVisionModel
model, tokenizer = FastVisionModel.from_pretrained(
    model_name = lora_model_name, # YOUR MODEL YOU USED FOR TRAINING
    load_in_4bit = True, # Set to False for 16bit LoRA
)
FastVisionModel.for_inference(model) # Enable for inference!

==((====))==  Unsloth 2025.2.4: Fast Mllama vision patching. Transformers: 4.48.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors.index.json:   0%|          | 0.00/375k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.94G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/210 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/477 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/55.9k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

chat_template.json:   0%|          | 0.00/5.15k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/269M [00:00<?, ?B/s]

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MllamaForConditionalGeneration(
      (vision_model): MllamaVisionModel(
        (patch_embedding): Conv2d(3, 1280, kernel_size=(14, 14), stride=(14, 14), padding=valid, bias=False)
        (gated_positional_embedding): MllamaPrecomputedPositionEmbedding(
          (tile_embedding): Embedding(9, 8197120)
        )
        (pre_tile_positional_embedding): MllamaPrecomputedAspectRatioEmbedding(
          (embedding): Embedding(9, 5120)
        )
        (post_tile_positional_embedding): MllamaPrecomputedAspectRatioEmbedding(
          (embedding): Embedding(9, 5120)
        )
        (layernorm_pre): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
        (layernorm_post): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
        (transformer): MllamaVisionEncoder(
          (layers): ModuleList(
            (0-12): 13 x MllamaVisionEncoderLayer(
              (self_attn): MllamaVisionSdpaAttention(
               

In [23]:
import re

def extract_choice(output_text):
    bbox_pattern = r"\[\((\d{1,3}, \d{1,3})\), \((\d{1,3}, \d{1,3})\), \((\d{1,3}, \d{1,3})\), \((\d{1,3}, \d{1,3})\)\]"
    match = re.search(bbox_pattern, output_text)
    if match: return match.group(0)
    else: return output_text.split("assistant")[-1].strip()

def write_file_for_one_patient(patient, path):
    # Open the file in append mode (if it doesn't exist, it will be created)
    with open(path, 'w') as file:
      for sample_id in range(len(patient)):

          # Extract the info from the message
          sample = patient[sample_id]
          image = sample["image"]
          ground_truth = sample["bb"]

          # Instruction
          instruction = '''
          You are an expert medical AI assistant specializing in glioma segmentation on FLAIR-mode brain scans.
          Given a 128x128 grayscale brain scan, output the bounding box around the tumor using the four corner vertices.
          The tumor region is the brightest, high-intensity abnormality distinct from normal brain structures.
          Ensure the bounding box tightly encloses the entire tumor without extending into non-tumor regions.
          The bounding box output must be formatted strictly as:[(row_min, col_min), (row_min, col_max), (row_max, col_max), (row_max, col_min)] where (row, col) are integers between 0 and 127, with (0,0) at the top-left and row increasing downward, and col increasing rightward.
          Do not output any other text or explanation, only the coordinate list in the exact format above.
          '''
          messages = [
              {"role": "user", "content": [
                  {"type": "image"},
                  {"type": "text", "text": instruction}
              ]}
          ]

          input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
          inputs = tokenizer(
              image,
              input_text,
              add_special_tokens=False,
              return_tensors="pt",
          ).to("cuda")

          # Generate the tokens
          output_tokens = model.generate(**inputs, max_new_tokens=40,
                                          use_cache=True, temperature=1.5, min_p=0.1)
          generated_text = extract_choice(tokenizer.decode(output_tokens[0], skip_special_tokens=True))

          # Prepare the output message
          result_message = f"Ground truth: {ground_truth} || Model output: {generated_text}"

          # # Print to the terminal
          # print(result_message)

          # Write to the file
          file.write(result_message + '\n')  # Add a newline after each entry

    return


In [24]:
# Unload all data such that it's easily accessible
main_results_folder = "/content/drive/MyDrive/ENGSCI/4TH YEAR/fall 4th year/ESC499/Code Test/Finetuning/Results/"

# for patient_id in range(len(data_patients)):
for patient_id in range(55):

    print(f"Patient ID: {patient_id}")
    patient = data_patients[patient_id]

    # path for patient data storage
    patient_results_text = main_results_folder + f"patient_{patient_id}.txt"
    write_file_for_one_patient(patient, patient_results_text)

Patient ID: 0
Patient ID: 1
Patient ID: 2
Patient ID: 3
Patient ID: 4
Patient ID: 5
Patient ID: 6
Patient ID: 7
Patient ID: 8
Patient ID: 9
Patient ID: 10
Patient ID: 11
Patient ID: 12
Patient ID: 13
Patient ID: 14
Patient ID: 15
Patient ID: 16
Patient ID: 17
Patient ID: 18
Patient ID: 19
Patient ID: 20
Patient ID: 21
Patient ID: 22
Patient ID: 23
Patient ID: 24
Patient ID: 25
Patient ID: 26
Patient ID: 27
Patient ID: 28
Patient ID: 29
Patient ID: 30
Patient ID: 31
Patient ID: 32
Patient ID: 33
Patient ID: 34
Patient ID: 35
Patient ID: 36
Patient ID: 37
Patient ID: 38
Patient ID: 39
Patient ID: 40
Patient ID: 41
Patient ID: 42
Patient ID: 43
Patient ID: 44
Patient ID: 45
Patient ID: 46
Patient ID: 47
Patient ID: 48
Patient ID: 49
Patient ID: 50
Patient ID: 51
Patient ID: 52
Patient ID: 53
Patient ID: 54
