# VQA Model Result Evaluation

## 0. Set-Up Environment

### 0.1. Import Necessary Libraries

In [16]:
import json
from pathlib import Path
from typing import Optional

from datasets import Dataset, disable_progress_bars, load_dataset

from utils.enums import VQAStrategyType
from utils.notebook_helpers import display_base64_image, display_formatted_section
from utils.string_formatting_helpers import format_vqa_strategy_name
from visual_qa_model import VisualQAModel
from visual_qa_strategies.rag_q_vqa_strategy import RagQVQAStrategy
from visual_qa_strategies.zero_shot_vqa_strategy import ZeroShotVQAStrategy

### 0.2. Detect Google Colab Form Annotation Automatically

In [2]:
%load_ext ipyform
%form_config --auto-detect 1

### 0.3. Define Helper Functions

In [3]:
def visualize_qa_pair_by_id(
    model: VisualQAModel,
    dataset: Dataset,
    id: int,
    image_width: Optional[int] = None,
    image_height: Optional[int] = None
) -> None:
    # Obtain row by index
    filtered_dataset = dataset.filter(lambda row: row['index'] == id)
    if len(filtered_dataset) == 0:
        raise ValueError(f"No row found with index {id}")
    else:
        row = filtered_dataset[0]

    # Display row id
    display_formatted_section(
        section_name="ID",
        section_style="margin: 20px 0;",
        section_content=row['index']
    )

    # Display question
    display_formatted_section(
        section_name="Question",
        section_style="margin-bottom: 20px;",
        section_content=row['question']
    )

    # Display context image
    display_formatted_section(
        section_name="Context Image",
        section_style="margin-bottom: 20px;",
        section_content=""
    )
    display_base64_image(
        base64_image=row['image'],
        width=image_width,
        height=image_height
    )

    # Predict answer
    predicted_option = model.generate_answer_from_row(
        row=row,
        possible_options=['A', 'B', 'C', 'D']
    )

    # Display possible answers marking both the gold and the predicted option
    formatted_options = []
    possible_options = ['A', 'B', 'C', 'D']
    for option in possible_options:
        if option == row['correct_option']:
            formatted_options.append(f"<p style='color: rgb(0, 255, 0);'><b>{option}) {row[option]}</b>")
        elif option == predicted_option:
            formatted_options.append(f"<p style='color: rgb(255, 0, 0);'><b>{option}) {row[option]}</b>")
        else:
            formatted_options.append(f"<p>{option}) {row[option]}")
    answer = "<br><br>" + "<br>".join(formatted_options)

    display_formatted_section(
        section_name="Possible Answers",
        section_style="margin-top: 30px;",
        section_content=answer
    )

In [17]:
def fetch_result_from_json(
    evaluation_results_folder: Path,
    question_id: int,
    vqa_strategy_name: str
) -> str:
    evaluation_results_filename = f'spain_english_{vqa_strategy_name}_evaluation.json'
    evaluation_results_path = evaluation_results_folder / evaluation_results_filename
    with open(evaluation_results_path, mode='r', encoding='utf-8') as evaluation_file:
        evaluation_data = json.load(evaluation_file)
    
    return evaluation_data['predictions'][question_id]

## 1. Evaluate VQA Approaches

Define Constants

In [4]:
DATASET_DIR = Path("../data/WorldMedQA-V")
MODEL_NAME = "llava"
COUNTRY = "spain"
FILE_TYPE = "english"
RESULTS_DIR = Path('../evaluation_results')

Load Dataset

In [5]:
# Set dataset file path
dataset_filename = f"{COUNTRY}_{FILE_TYPE}_processed.tsv"
data_filepath = str(DATASET_DIR / dataset_filename)

# Load dataset
world_med_qa_v_dataset = load_dataset(
    "csv",
    data_files=[data_filepath],
    sep="\t",
)["train"]
world_med_qa_v_dataset

Dataset({
    features: ['index', 'image', 'question', 'A', 'B', 'C', 'D', 'answer', 'correct_option', 'split'],
    num_rows: 125
})

### 1.1. Zero-Shot

Load Model

In [6]:
llava_model = VisualQAModel(
    visual_qa_strategy=ZeroShotVQAStrategy(),
    model_name=MODEL_NAME,
    country=COUNTRY,
    file_type=FILE_TYPE
)

- Loading Zero-Shot Strategy ...
+ Zero-Shot Strategy loaded.
- Loading Llava Model ...
+ Llava Model Loaded.


Evaluate Model

In [7]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(5),
    save_path=RESULTS_DIR
)

Evaluating model (spain_english subset) ...: 100%|██████████| 5/5 [06:24<00:00, 76.97s/it]


### 1.2. RAG

Define Model Specific Constants

In [7]:
INDEX_DIR = Path('../data/WikiMed/indexed_db')
INDEX_NAME = "Wikimed+S-PubMedBert-MS-MARCO-FullTexts"
EMBEDDING_MODEL_NAME = "pritamdeka/S-PubMedBert-MS-MARCO"
RELEVANT_DOCS_COUNT = 1

#### 1.2.1. RAG Q

Load Model

In [8]:
llava_model.visual_qa_strategy = RagQVQAStrategy(
    index_dir=INDEX_DIR,
    index_name=INDEX_NAME,
    embedding_model_name=EMBEDDING_MODEL_NAME,
    relevant_docs_count=RELEVANT_DOCS_COUNT
)

- Loading RAG Q Strategy ...
	- Loading Embeddings ...
	+ Embeddings Loaded.
	- Loading Index ...
	+ Index Loaded.
	- Loading Retriever ...
	+ Retriever Loaded.
+ RAG Q Strategy loaded.
- Loading Llava Model ...
+ Llava Model Loaded.


Evaluate Model

In [9]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(5),
    save_path=RESULTS_DIR
)

Evaluating model (spain_english subset) ...:   0%|          | 0/5 [00:00<?, ?it/s]

Evaluating model (spain_english subset) ...: 100%|██████████| 5/5 [18:23<00:00, 220.77s/it]


#### 1.2.2. RAG Q+As

#### 1.2.3. RAG IMG

#### 1.2.4. RAG DB, RERANKER, ...

## 2. VQA Approaches Playground

Define Model Specific Constants

In [6]:
DATASET_DIR = Path("../data/WorldMedQA-V")
MODEL_NAME = "llava"
COUNTRY = "spain"
FILE_TYPE = "english"
RESULTS_DIR = Path('../evaluation_results')

Define RAG Q Specific Constants

In [7]:
INDEX_DIR = Path('../data/WikiMed/indexed_db')
INDEX_NAME = "Wikimed+S-PubMedBert-MS-MARCO-FullTexts"
EMBEDDING_MODEL_NAME = "pritamdeka/S-PubMedBert-MS-MARCO"
RELEVANT_DOCS_COUNT = 1

Define Possible VQA Strategies

In [8]:
vqa_strategies = {
    VQAStrategyType.ZERO_SHOT: ZeroShotVQAStrategy(),
    VQAStrategyType.RAG_Q: RagQVQAStrategy(
        index_dir=INDEX_DIR,
        index_name=INDEX_NAME,
        embedding_model_name=EMBEDDING_MODEL_NAME,
        relevant_docs_count=RELEVANT_DOCS_COUNT
    ),
    VQAStrategyType.RAG_Q_AS: None,
    VQAStrategyType.RAG_IMG: None,
    VQAStrategyType.RAG_DB_RERANKER: None
}

- Loading Zero-Shot Strategy ...
+ Zero-Shot Strategy loaded.
- Loading RAG Q Strategy ...
	- Loading Embeddings ...
	+ Embeddings Loaded.
	- Loading Index ...
	+ Index Loaded.
	- Loading Retriever ...
	+ Retriever Loaded.
+ RAG Q Strategy loaded.


Load Dataset

In [9]:
# Set dataset file path
dataset_filename = f"{COUNTRY}_{FILE_TYPE}_processed.tsv"
data_filepath = str(DATASET_DIR / dataset_filename)

# Load dataset
world_med_qa_v_dataset = load_dataset(
    "csv",
    data_files=[data_filepath],
    sep="\t",
)["train"]
world_med_qa_v_dataset

Dataset({
    features: ['index', 'image', 'question', 'A', 'B', 'C', 'D', 'answer', 'correct_option', 'split'],
    num_rows: 125
})

Experiment with the Models

In [10]:
# avoid progress bar when applying filter to dataset
disable_progress_bars()

In [18]:
# @title Interactive VQA Model Exploration Form
vqa_strategy_type = 'Zero-Shot' # @param ["Zero-Shot", "RAG Q", "RAG Q+As", "RAG IMG", "RAG DB-Reranker"]
question_id = 1 # @param {"type":"integer"}
image_width = 600 # @param {"type":"integer"}
action = 'Fetch from JSON' # @param ["Execute Model", Fetch from JSON]


if action == "Execute Model":
    visualize_qa_pair_by_id(
        model=VisualQAModel(
            visual_qa_strategy=vqa_strategies[vqa_strategy_type],
            model_name=MODEL_NAME,
            country=COUNTRY,
            file_type=FILE_TYPE
        ),
        dataset=world_med_qa_v_dataset,
        id=question_id,
        image_width=image_width
    )
elif action == "Fetch from JSON":
    result = fetch_result_from_json(
        evaluation_results_folder=RESULTS_DIR,
        question_id=question_id,
        vqa_strategy_name=format_vqa_strategy_name(strategy_name=vqa_strategy_type)
    )
    print(result)

FormWidget(children=(VBox(children=(HTML(value=''), HTML(value='<h2>Interactive VQA Model Exploration Form</h2…