# LLM Inference via Ollama API 
Running Model inference using Ollama API for few shot prompting

In [1]:
# imports and functions required to convert images into base64 for the model inference
import base64
from io import BytesIO

from IPython.display import HTML, display
from PIL import Image
import pydicom
import numpy as np
import os
import requests

### Converting dicom scans to png images for the Vision Models 
This is merely an intermediate before conversion to base64 but all images are saved as .png hence this only needs to be run once for each study

### Converting .png images to base64 for Ollama
Ollama Models require images to be sent through 

In [2]:
def convert_to_base64(pil_image):
    """
    Convert PIL images to Base64 encoded strings

    :param pil_image: PIL image
    :return: Re-sized Base64 string
    """

    buffered = BytesIO()
    pil_image.save(buffered, format="PNG")  # You can change the format if needed
    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
    return img_str

In [3]:
models = ['gemma3:4b-it-qat', 'qwen2.5vl:3b']

prompt = "You are a radiologist reviewing this imaging study. Based on the image provided, generate only the *Findings* section of a radiology report. Use structured, concise, and formal language consistent with professional radiology reporting. Do not include the Impression, Conclusion, or Recommendations."

In [4]:
img = Image.open("./dataset/s11/1b1.png")
base64_img = convert_to_base64(img)

## Zero Shot Prompting
Getting the model to generate findings for Radiology Scans without seeing any prior images and ground truth
Currently just testing out 2-4 images to test API

In [10]:
data = {
    "model" : "gemma3:4b-it-qat",
    "prompt" : prompt,
    "stream" : False, 
    "images": [base64_img]
}

ollama_url = "http://localhost:11434/api/generate"

response = requests.post(ollama_url, json=data)
print(response.json())

{'model': 'gemma3:4b-it-qat', 'created_at': '2025-07-19T20:13:30.0138587Z', 'response': 'Findings:\n*   Mild increased interstitial markings are noted in both lungs.\n*   Mild cardiomediastinal contour.\n*   Right pneumothorax with moderate air density noted in the right hemithorax. A pleural effusion is not directly visualized.\n*   No acute lung consolidations identified.\n*   Right chest tube present.', 'done': True, 'done_reason': 'stop', 'context': [105, 2364, 107, 236840, 3024, 236772, 236771, 236842, 107, 107, 3048, 659, 496, 4574, 16097, 35329, 672, 20502, 2748, 236761, 18186, 580, 506, 2471, 3847, 236764, 8729, 1186, 506, 808, 65362, 236829, 3336, 529, 496, 131230, 2072, 236761, 6890, 31044, 236764, 63510, 236764, 532, 10781, 5192, 9958, 607, 5707, 131230, 13761, 236761, 3574, 711, 3204, 506, 118340, 236764, 63916, 236764, 653, 91330, 236761, 106, 107, 105, 4368, 107, 107, 65362, 236787, 107, 236829, 138, 84432, 4869, 57610, 68189, 659, 8601, 528, 1800, 38464, 236761, 107, 236

## Few Shot Prompting
Giving the model prior example to see various radiology scans and then invoke inference based off of given examples!

In [5]:
import re

def extract_findings(report_text):
    match = re.search(r'FINDINGS:(.*?)(IMPRESSION:|$)', report_text, re.DOTALL | re.IGNORECASE)
    if match:
        return match.group(1).strip()
    return None

In [6]:
human_message = {
      "role": "user",
      "content": prompt,
      "images": []
}

model_message = {
    "role": "assistant",
    "content": "",
    "images": None
}

data = {
  "model": "gemma3:4b-it-qat",
  "messages": [],
  "stream": False,
}

In [7]:
data_dir = './dataset'
examples = []
few_shot_studies = ['s0', 's1', 's2']

for folder in few_shot_studies:
    s_folder = os.path.join(data_dir, folder)
    # gives files within each study
    all_files = os.listdir(s_folder)
    image_files = [f for f in all_files if f.lower().endswith('.png')]

    # this contains the single ground truth file
    text_files = [f for f in all_files if f.lower().endswith('.txt')]
    
    for each_file in image_files:
       
        image_path = os.path.join(s_folder, each_file)
        img = Image.open(image_path)
        # convert to base64 version of image
        base64_str = convert_to_base64(img)
        f = open(os.path.join(s_folder, text_files[0]))
        report_truth = extract_findings(f.read())

        h_message = human_message.copy()
        h_message["images"].append(base64_str)
        ai_message = model_message.copy()
        ai_message["content"] = report_truth
        data["messages"].append(h_message)
        data["messages"].append(ai_message)
      

In [8]:
test_study = "./dataset/s12/57c.png"
img = Image.open(test_study)
base64_img = convert_to_base64(img)

h_message = human_message.copy()
h_message["images"].append(base64_img)

data["messages"].append(h_message)

In [9]:
ollama_url = "http://localhost:11434/api/chat"

response = requests.post(ollama_url, json=data)
print(response.json())

{'error': 'model runner has unexpectedly stopped, this may be due to resource limitations or an internal error, check ollama server logs for details'}


In [None]:
print(data["messages"])