# Notebook to automate LLM Inference using LangChain
Using Langchain as a framework to interact with Ollama models beyond the Terminal along with the possibility of setting up a pipeline to import an image, run Model inference and export the output to its corresponding study folder

In [2]:
# imports and functions required to convert images into base64 for the model inference
import base64
from io import BytesIO

from IPython.display import HTML, display
from PIL import Image
import pydicom
import numpy as np
import os

### Converting dicom scans to png images for the Vision Models 
This is merely an intermediate before conversion to base64 but all images are saved as .png hence this only needs to be run once for each study

In [9]:
# function to convert dcm files to png
def dicom_to_png(dicom_path, png_path):
    ds = pydicom.dcmread(dicom_path)
    pixel_array = ds.pixel_array.astype(float)
    scaled = (np.maximum(pixel_array, 0) / pixel_array.max()) * 255.0
    img = Image.fromarray(scaled.astype(np.uint8)).convert("RGB")
    img.save(png_path)

In [None]:
import os

data_dir = './dataset'

# this gives in each sub folder within dataset s0, s1, s2
folders = sorted(os.listdir(data_dir))
for folder in folders:
   s_folder = os.path.join(data_dir, folder)
   # gives all .dcm and .txt files within each study
   all_files = os.listdir(s_folder)
   dcm_files = [f for f in all_files if f.lower().endswith('.dcm')]
   
   for each_file in dcm_files:
    path_to_save = os.path.join(s_folder, f"{each_file[0:3]}.png")
    dicom_path = os.path.join(s_folder, each_file)
    dicom_to_png(dicom_path, path_to_save)



### Converting .png images to base64 for Ollama
Ollama Models require images to be sent through 

In [11]:
def convert_to_base64(pil_image):
    """
    Convert PIL images to Base64 encoded strings

    :param pil_image: PIL image
    :return: Re-sized Base64 string
    """

    buffered = BytesIO()
    pil_image.save(buffered, format="PNG")  # You can change the format if needed
    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
    return img_str

In [23]:
from langchain_core.messages import HumanMessage
from langchain_ollama import ChatOllama

In [13]:
# function which takes in data and converts into LangChain human message for the Model
def prompt_func(data):
    text = data["text"]
    image = data["image"]

    image_part = {
        "type": "image_url",
        "image_url": f"data:image/jpeg;base64,{image}",
    }

    content_parts = []

    text_part = {"type": "text", "text": text}

    content_parts.append(image_part)
    content_parts.append(text_part)

    return [HumanMessage(content=content_parts)]

In [4]:
models = ['gemma3:4b-it-qat', 'qwen2.5vl:3b']

prompt = "You are a radiologist reviewing this imaging study. Based on the image provided, generate only the *Findings* section of a radiology report. Use structured, concise, and formal language consistent with professional radiology reporting. Do not include the Impression, Conclusion, or Recommendations."

In [24]:
from langchain_core.output_parsers import StrOutputParser
llm = ChatOllama(model=models[0], temperature=0)
# chain = prompt_func | llm | StrOutputParser()

## Zero Shot Prompting
Getting the model to generate findings for Radiology Scans without seeing any prior images and ground truth

In [None]:
data_dir = './dataset'
from PIL import Image

# this gives in each sub folder within dataset s0, s1, s2
folders = sorted(os.listdir(data_dir))

for folder in folders:
    s_folder = os.path.join(data_dir, folder)
    # gives files within each study
    all_files = os.listdir(s_folder)
    image_files = [f for f in all_files if f.lower().endswith('.png')]

    for each_file in image_files:
        image_path = os.path.join(s_folder, each_file)
        # file to write output into
        model_output_path =  os.path.join(s_folder, f"qwen-vl-3b-{each_file[0:3]}.txt")

        try:
            img = Image.open(image_path)
            # convert to base64 version of image
            base64_str = convert_to_base64(img)
            query_chain = chain.invoke(
                {"text": prompt, 
                "image": base64_str}
                )
            # write model output into a file
            with open(model_output_path, "w") as file:
                file.write(query_chain)
        except Exception as e:
            print(f"Failed on {each_file}: {e}")
    

## Few Shot Prompting
Giving the model prior example to see various radiology scans and then invoke inference based off of given examples!

In [16]:
import re

def extract_findings(report_text):
    match = re.search(r'FINDINGS:(.*?)(IMPRESSION:|$)', report_text, re.DOTALL | re.IGNORECASE)
    if match:
        return match.group(1).strip()
    return None

In [18]:
data_dir = './dataset'
examples = []
few_shot_studies = ['s0', 's1', 's2', 's3', 's4']

for folder in few_shot_studies:
    s_folder = os.path.join(data_dir, folder)
    # gives files within each study
    all_files = os.listdir(s_folder)
    image_files = [f for f in all_files if f.lower().endswith('.png')]

    # this contains the single ground truth file
    text_files = [f for f in all_files if f.lower().endswith('.txt')]
    
    for each_file in image_files:
        image_path = os.path.join(s_folder, each_file)
    
        img = Image.open(image_path)
        # convert to base64 version of image
        base64_str = convert_to_base64(img)
        f = open(os.path.join(s_folder, text_files[0]))
        report_truth = f.read()

        examples.append({"input": base64_str, "output": extract_findings(report_truth)})
        

In [21]:
from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate

# This is a prompt template used to format each individual example.
example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}"),
        ("ai", "{output}"),
    ]
)

few_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
)

final_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", prompt),
        few_shot_prompt,
        ("human", "{input}"),
    ]
)

In [25]:
chain = final_prompt | llm | StrOutputParser()

In [28]:
img = Image.open('./dataset/s0/02a.png')
image = convert_to_base64(img)
query_chain = chain.invoke({"input": image })

print(query_chain)

This is a very long string of seemingly random characters. It appears to be a base64 encoded image. Let's decode it.

Using a base64 decoding tool (I'll use an online one: [https://www.base64decode.org/](https://www.base64decode.org/)), the decoded image is:

**A simple black and white image of a cat.**

The cat is sitting, looking to the left. It's a very basic, almost cartoon-like representation.

