In [1]:
# magic/dataset.py

import json
import logging
import os

from PIL import Image
from torch.utils.data import Dataset


class MAGICDataset(Dataset):
    """_summary_

    :param _type_ Dataset: MAGICDataset for ImageCLEF 2024 challenge
    """
    def __init__(self, file_path:str="data/", split:str="train"):
        """
        :param split: which dataset should be chosen
        :param file_path: main path with data
        """
        self.json_file = file_path + split + "_downloaded.json"
        self.folder_path = file_path + "images/" + split
        self.data = self._get_preprocessed_data()

    def _get_preprocessed_data(self):
        with open(self.json_file, encoding="utf8") as f :
            json_data = json.load(f)
        temp_data = []
        for sample in json_data:
            if len(sample["image_ids"]) != 1 :
                logging.warning(f'Different number of images ({len(sample["image_ids"])}) for question than 1')
            image_path = self.folder_path + '/' + sample["image_ids"][0] + '.jpg'
            if not os.path.exists(image_path):
                image_path = self.folder_path + '/' + sample["image_ids"][0] + '.png'
                if not os.path.exists(image_path):
                    logging.warning(f"Couldn't find path {image_path}")
                    continue
            temp_data.append({
                "image" : image_path,
                "description" : sample["query_title_en"],
                "answer" : sample["responses"][0]["content_en"],
                "encounter_id" : sample["encounter_id"]
            })
        return temp_data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]
        prompt = (
            "This is additional information about the dermatology issue on the image: "
            + sample["description"]
            + " What dermatological disease is on the image and how can it be treated?"
        )
        return {
            "image": Image.open(sample["image"]),  # Should be a PIL image
            "qa": [
                {
                    "question": prompt,
                    "answer": sample["answer"],
                }
            ], ## Why array?
            "encounter_id": sample['encounter_id']
        }

In [None]:
!pip install einops

In [None]:
# moondream2/inference.py

In [None]:
def print_sample_info(encounter_id, question, answer):
    print("-------------")
    print(f"{encounter_id=}")
    print(f"{question=}")
    print(f"{answer=}")
    print("-------------")

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_id = "vikhyatk/moondream2"
revision = "2024-04-02"
model = AutoModelForCausalLM.from_pretrained(
    model_id, trust_remote_code=True, revision=revision
)
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)

In [None]:
print("START")
dataset = MAGICDataset("/content/drive/MyDrive/reddit/", "valid")

response = []
for sample in dataset:
    encounter_id = sample["encounter_id"]
    question = sample['qa'][0]['question']
    encoded_image = model.encode_image(sample['image'])
    answer = model.answer_question(encoded_image, question, tokenizer)
    result = {
        "encounter_id": encounter_id,
        "responses": [{
            "content_en": answer
        }]
    }
    response.append(result)
    with open('/content/drive/MyDrive/CLEF/predictions/moondream2/2025-05-02-valid-prediction.json', 'w') as f:
        json.dump(response, f)
    print_sample_info(encounter_id, question, answer)