In [1]:
import json
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from tqdm.notebook import tqdm
import pickle
import random

from unsloth import FastVisionModel
from transformers import TextStreamer

pd.options.display.max_rows = 5

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [20]:
file_path = 'jl_fs/input/df_test.pkl'

with open(file_path, 'rb') as file:
    df_test = pickle.load(file)

In [21]:
df_test

Unnamed: 0,class,image
0,Normal,<PIL.Image.Image image mode=L size=300x200 at ...
1,Normal,<PIL.Image.Image image mode=L size=300x200 at ...
...,...,...
21890,Paced-Normal Fusion,<PIL.Image.Image image mode=L size=300x200 at ...
21891,Paced-Normal Fusion,<PIL.Image.Image image mode=L size=300x200 at ...


In [22]:
import pandas as pd
import numpy as np

df_test = df_test.sample(frac=1, random_state=42).reset_index(drop=True)
midpoint = len(df_test) // 2

data_test    = df_test.iloc[:midpoint].copy()  
data_holdout = df_test.iloc[midpoint:].copy()

data_test.shape,  data_holdout.shape

((10946, 2), (10946, 2))

In [32]:
data_test

Unnamed: 0,class,image
0,Normal,<PIL.Image.Image image mode=L size=300x200 at ...
1,Normal,<PIL.Image.Image image mode=L size=300x200 at ...
...,...,...
10944,Normal,<PIL.Image.Image image mode=L size=300x200 at ...
10945,Normal,<PIL.Image.Image image mode=L size=300x200 at ...


# Inference on Test Data

In [12]:
model_name = "Aidan777/Llama-3.2-11B-Vision-Instruct_finetuned_ecg"

model, tokenizer = FastVisionModel.from_pretrained(
                                        model_name=model_name, 
                                        load_in_4bit=True)

==((====))==  Unsloth 2025.2.15: Fast Mllama vision patching. Transformers: 4.49.0.
   \\   /|    GPU: NVIDIA RTX 6000 Ada Generation. Max memory: 47.507 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 8.9. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/269M [00:00<?, ?B/s]

In [19]:
instruction_user = """
You are a specialist Cardiologist specializing in the analysis of ECG (Electrocardiogram) signals.
Your goal is to analyze ECG signals and classify them accurately.
You are provided with a ECG image and a class that corresponds to the image.
You only classify ECG signals into one of the following five categories: 
"Normal", "Supraventricular Arrhythmia", "Ventricular Arrhythmia", "Ventricular-Normal Fusion", "Paced-Normal Fusion".
Each ECG signal must be classified into exactly one of these categories.
No additional text should be included in the output.
You are provided with an ECG signal image to look into. It is Time (x-axis) vs Voltage (y-axis). 
Classify the ECG signal accurately by analyzing the provided data.
"""

In [33]:
FastVisionModel.for_inference(model)

outputs = {}
for i in tqdm(range(len(data_test))):

    image = data_test.iloc[i]['image']
    idx   = data_test.index[i]
    assert isinstance(image, Image.Image)  
    
    messages = [{"role": "user",
                 "content": [{"type": "image"},
                             {"type": "text", "text": instruction_user}]}]

    input_text    = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
    inputs        = tokenizer(image, input_text, add_special_tokens=False, return_tensors="pt").to("cuda")
    text_streamer = TextStreamer(tokenizer, skip_prompt=True)
    
    output = model.generate(**inputs,
                            streamer=text_streamer,
                            max_new_tokens=128, # of the output
                            use_cache=True,
                            temperature=1.5,
                            min_p=0.1)


    decoded_text  = tokenizer.decode(output[0], skip_special_tokens=True)
    required_text = decoded_text.split('assistant\n\n', 1)[-1]
    del output
    
    outputs[idx] = required_text

  0%|          | 0/10946 [00:00<?, ?it/s]

Normal<|eot_id|>
Normal<|eot_id|>
Normal<|eot_id|>
Supraventricular Arrhythmia<|eot_id|>
Normal<|eot_id|>
Normal<|eot_id|>
Supraventricular Arrhythmia<|eot_id|>
Supraventricular Arrhythmia<|eot_id|>
Normal<|eot_id|>
Normal<|eot_id|>
Ventricular-Normal Fusion<|eot_id|>
Normal<|eot_id|>
Normal<|eot_id|>
Normal<|eot_id|>
Ventricular Arrhythmia<|eot_id|>
Normal<|eot_id|>
Paced-Normal Fusion<|eot_id|>
Paced-Normal Fusion<|eot_id|>
Paced-Normal Fusion<|eot_id|>
Normal<|eot_id|>
Normal<|eot_id|>
Normal<|eot_id|>
Normal<|eot_id|>
Paced-Normal Fusion<|eot_id|>
Normal<|eot_id|>
Normal<|eot_id|>
Normal<|eot_id|>
Ventricular Arrhythmia<|eot_id|>
Normal<|eot_id|>
Normal<|eot_id|>
Normal<|eot_id|>
Normal<|eot_id|>
Paced-Normal Fusion<|eot_id|>
Normal<|eot_id|>
Normal<|eot_id|>
Normal<|eot_id|>
Normal<|eot_id|>
Normal<|eot_id|>
Supraventricular Arrhythmia<|eot_id|>
Normal<|eot_id|>
Normal<|eot_id|>
Normal<|eot_id|>
Ventricular Arrhythmia<|eot_id|>
Ventricular Arrhythmia<|eot_id|>
Normal<|eot_id|>
Nor

In [34]:
file_name = "jl_fs/Llama-3.2-11B-Vision-Instruct_finetuned_ecg_TEST_OUTPUT.json"

with open(file_name, "w") as file:
    json.dump(outputs, file, indent=4)  