# VQA Model Result Evaluation

## 0. Environment Setup

### 0.1. Import Necessary Libraries

In [1]:
from pathlib import Path

from datasets import disable_progress_bars

import src.utils.dataset_helpers.world_med_qa_v.dataset_management as world_med_qa_v_dataset_management
import src.utils.dataset_helpers.world_med_qa_v.plot_helpers as world_med_qa_v_plot_helpers
from src.utils.data_definitions import DocSplitOptions
from src.utils.enums import RagQPromptType, VQAStrategyType, ZeroShotPromptType
from src.utils.string_formatting_helpers import to_snake_case_strategy_name
from src.visual_qa_model import VisualQAModel
from src.visual_qa_strategies.base_vqa_strategy import BaseVQAStrategy
from src.visual_qa_strategies.rag_q_vqa_strategy import RagQVQAStrategy
from src.visual_qa_strategies.zero_shot_vqa_strategy import ZeroShotVQAStrategy

### 0.2. Configure Environment Settings

Detect Google Colab Form Annotation Automatically

In [2]:
%load_ext ipyform
%form_config --auto-detect 1

Enable Automatic Module Reloading

In [3]:
%load_ext autoreload
%autoreload 2

Disable Progress Bar for Dataset Filtering

In [4]:
disable_progress_bars()

## 1. Evaluation of VQA Approaches

Define Constants

In [5]:
DATASET_DIR = Path("data/WorldMedQA-V")
MODEL_NAME = "llava"
COUNTRY = "spain"
FILE_TYPE = "english"
RESULTS_DIR = Path('evaluation_results')

Load Dataset

In [6]:
world_med_qa_v_dataset = world_med_qa_v_dataset_management.load_vqa_dataset(
    data_path=DATASET_DIR,
    country=COUNTRY,
    file_type=FILE_TYPE
)
world_med_qa_v_dataset

- Loading WorldMedQA-V dataset (filename: spain_english_processed.tsv) ...
+ WorldMedQA-V dataset (filename: spain_english_processed.tsv) loaded.


Dataset({
    features: ['index', 'image', 'question', 'A', 'B', 'C', 'D', 'answer', 'correct_option', 'split'],
    num_rows: 125
})

### 1.1. Zero-Shot Evaluation

Load Model

In [7]:
llava_model = VisualQAModel(
    visual_qa_strategy=ZeroShotVQAStrategy(prompt_type=ZeroShotPromptType.V1),
    model_name=MODEL_NAME,
    country=COUNTRY,
    file_type=FILE_TYPE
)

- Loading Zero-Shot strategy ...
+ Zero-Shot strategy loaded.
- Loading Llava model (prompt template: zs_v1) ...
+ Llava model (prompt template: zs_v1) loaded.


Evaluate Model (Prompt Template: `zs_v1`)

In [None]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(5),
    save_path=RESULTS_DIR
)

Evaluate Model (Prompt Template: `zs_v2`)

In [9]:
llava_model.visual_qa_strategy.prompt_type = ZeroShotPromptType.V2
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(5),
    save_path=RESULTS_DIR
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 5/5 [06:20<00:00, 76.08s/it]

+ Model evaluation (spain_english subset) completed.





Evaluate Model (Prompt Template: `zs_v3`)

In [10]:
llava_model.visual_qa_strategy.prompt_type = ZeroShotPromptType.V3
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(5),
    save_path=RESULTS_DIR
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 5/5 [06:24<00:00, 76.90s/it]

+ Model evaluation (spain_english subset) completed.





### 1.2. Retrieval-Augmented Generation (RAG) Evaluation

Define Model Specific Constants

In [8]:
INDEX_DIR = Path('data/WikiMed/indexed_db')
INDEX_NAME = "Wikimed+S-PubMedBert-MS-MARCO-FullTexts"
EMBEDDING_MODEL_NAME = "pritamdeka/S-PubMedBert-MS-MARCO"
RELEVANT_DOCS_COUNT = 1

#### 1.2.1. RAG Q (Question Only)

Load Model

In [None]:
llava_model.visual_qa_strategy = RagQVQAStrategy(
    prompt_type=RagQPromptType.V1,
    index_dir=INDEX_DIR,
    index_name=INDEX_NAME,
    embedding_model_name=EMBEDDING_MODEL_NAME,
    relevant_docs_count=RELEVANT_DOCS_COUNT
)

- Loading RAG Q strategy ...


ValueError: Missing required arguments: 
	- relevant_docs_count: int 


Evaluate Model (Prompt Template: `rq_v1`)

In [15]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(5),
    save_path=RESULTS_DIR
)

- Evaluating model (spain_english subset) ...:   0%|          | 0/5 [00:00<?, ?it/s]


ValueError: Unexpected keyword arguments: patata

Evaluate Model (Prompt Template: `rq_v2`)

In [14]:
llava_model.visual_qa_strategy.prompt_type = RagQPromptType.V2
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(5),
    save_path=RESULTS_DIR,
    doc_split_options = DocSplitOptions(
        chunk_size=500,
        chunk_overlap=0,
        short_docs_count=1
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 5/5 [06:52<00:00, 82.42s/it]

+ Model evaluation (spain_english subset) completed.





Evaluate Model (Prompt Template: `rq_v3`)

In [1]:
llava_model.visual_qa_strategy.prompt_type = RagQPromptType.V3
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(5),
    save_path=RESULTS_DIR,
    chunk_size=500,
    chunk_overlap=0,
    short_docs_count=1
)

NameError: name 'RagQPromptType' is not defined

Evaluate Model (Prompt Template: `rq_v4`)

In [16]:
llava_model.visual_qa_strategy.prompt_type = RagQPromptType.V4
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(5),
    save_path=RESULTS_DIR
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 5/5 [16:48<00:00, 201.77s/it]

+ Model evaluation (spain_english subset) completed.





#### 1.2.2. RAG Q+As (Question + Answers)

#### 1.2.3. RAG IMG (Image-Based)

#### 1.2.4. RAG DB-Reranker (Database with Reranker)

## 2. VQA Approaches Exploration

Define Model Specific Constants

In [9]:
DATASET_DIR = Path("data/WorldMedQA-V")
MODEL_NAME = "llava"
COUNTRY = "spain"
FILE_TYPE = "english"
RESULTS_DIR = Path('evaluation_results')

Define RAG Q Specific Constants

In [10]:
INDEX_DIR = Path('data/WikiMed/indexed_db')
INDEX_NAME = "Wikimed+S-PubMedBert-MS-MARCO-FullTexts"
EMBEDDING_MODEL_NAME = "pritamdeka/S-PubMedBert-MS-MARCO"
RELEVANT_DOCS_COUNT = 1

Define Possible VQA Strategies

In [11]:
vqa_strategies: dict[VQAStrategyType, BaseVQAStrategy] = {
    VQAStrategyType.ZERO_SHOT: ZeroShotVQAStrategy(prompt_type=ZeroShotPromptType.V1),
    VQAStrategyType.RAG_Q: RagQVQAStrategy(
        prompt_type=RagQPromptType.V1,
        index_dir=INDEX_DIR,
        index_name=INDEX_NAME,
        embedding_model_name=EMBEDDING_MODEL_NAME,
        relevant_docs_count=RELEVANT_DOCS_COUNT
    ),
    VQAStrategyType.RAG_Q_AS: None,
    VQAStrategyType.RAG_IMG: None,
    VQAStrategyType.RAG_DB_RERANKER: None
}

- Loading Zero-Shot strategy ...
+ Zero-Shot strategy loaded.
- Loading RAG Q strategy ...
	- Loading Embeddings ...
	+ Embeddings Loaded.
	- Loading Index ...
	+ Index Loaded.
	- Loading Retriever ...
	+ Retriever Loaded.
+ RAG Q strategy loaded.


Load Dataset

In [12]:
world_med_qa_v_dataset = world_med_qa_v_dataset_management.load_vqa_dataset(
    data_path=DATASET_DIR,
    country=COUNTRY,
    file_type=FILE_TYPE
)
world_med_qa_v_dataset

- Loading WorldMedQA-V dataset (filename: spain_english_processed.tsv) ...
+ WorldMedQA-V dataset (filename: spain_english_processed.tsv) loaded.


Dataset({
    features: ['index', 'image', 'question', 'A', 'B', 'C', 'D', 'answer', 'correct_option', 'split'],
    num_rows: 125
})

Experiment with the Models

In [13]:
# @title Interactive VQA Model Exploration Form
vqa_strategy_type = 'Zero-Shot' # @param ["Zero-Shot", "RAG Q", "RAG Q+As", "RAG IMG", "RAG DB-Reranker"]
prompt_type = "zs_v1" # @param ["zs_v1", "zs_v2", "zs_v3", "rq_v1", "rq_v2", "rq_v3", "rq_v4"]
question_id = 1 # @param {"type":"integer"}
image_width = 600 # @param {"type":"integer"}
action = 'Fetch from JSON' # @param ["Execute Model", "Fetch from JSON"]


row = world_med_qa_v_dataset_management.get_dataset_row_by_id(
    dataset=world_med_qa_v_dataset,
    question_id=question_id
)

if action == "Execute Model":
    formatted_vqa_strategy_type = to_snake_case_strategy_name(strategy_name=vqa_strategy_type)
    chosen_vqa_strategy = vqa_strategies[VQAStrategyType(formatted_vqa_strategy_type)]
    chosen_vqa_strategy.prompt_type = ZeroShotPromptType(prompt_type)
    model=VisualQAModel(
        visual_qa_strategy=chosen_vqa_strategy,
        model_name=MODEL_NAME,
        country=COUNTRY,
        file_type=FILE_TYPE
    )
    world_med_qa_v_plot_helpers.visualize_qa_pair_row(
        row=row,
        image_width=image_width,
        model_answer=model.generate_answer_from_row(
            row=row,
            possible_options=['A', 'B', 'C', 'D'],
            verbose=True
        )
    )
elif action == "Fetch from JSON":
    model_answer = world_med_qa_v_dataset_management.fetch_model_answer_from_json(
        evaluation_results_folder=RESULTS_DIR,
        vqa_strategy_name=to_snake_case_strategy_name(strategy_name=vqa_strategy_type),
        country=COUNTRY,
        file_type=FILE_TYPE,
        prompt_type_name=prompt_type,
        question_id=question_id,
    )
    world_med_qa_v_plot_helpers.visualize_qa_pair_row(
        row=row,
        image_width=image_width,
        model_answer=model_answer
    )

FormWidget(children=(VBox(children=(HTML(value=''), HTML(value='<h2>Interactive VQA Model Exploration Form</h2…