# VQA Model Result Evaluation

## 0. Environment Setup

### 0.1. Import Necessary Libraries

In [1]:
from pathlib import Path

from datasets import disable_progress_bars

import src.utils.dataset_helpers.world_med_qa_v.dataset_management as world_med_qa_v_dataset_management
import src.utils.dataset_helpers.world_med_qa_v.plot_helpers as world_med_qa_v_plot_helpers
from src.utils.data_definitions import DocSplitOptions
from src.utils.enums import RagQPromptType, VQAStrategyType, ZeroShotPromptType
from src.utils.string_formatting_helpers import to_snake_case_strategy_name
from src.utils.text_splitters.paragraph_splitter import ParagraphSplitter
from src.visual_qa_model import VisualQAModel
from src.visual_qa_strategies.base_vqa_strategy import BaseVQAStrategy
from src.visual_qa_strategies.rag_q_vqa_strategy import RagQVQAStrategy
from src.visual_qa_strategies.zero_shot_vqa_strategy import ZeroShotVQAStrategy

### 0.2. Configure Environment Settings

Detect Google Colab Form Annotation Automatically

In [2]:
%load_ext ipyform
%form_config --auto-detect 1

Enable Automatic Module Reloading

In [3]:
%load_ext autoreload
%autoreload 2

Disable Progress Bar for Dataset Filtering

In [4]:
disable_progress_bars()

## 1. Evaluation of VQA Approaches

Define Constants

In [5]:
DATASET_DIR = Path("data/WorldMedQA-V")
MODEL_NAME = "llava"
COUNTRY = "spain"
FILE_TYPE = "english"
RESULTS_DIR = Path('evaluation_results')

Load Dataset

In [6]:
world_med_qa_v_dataset = world_med_qa_v_dataset_management.load_vqa_dataset(
    data_path=DATASET_DIR,
    country=COUNTRY,
    file_type=FILE_TYPE
)
world_med_qa_v_dataset

- Loading WorldMedQA-V dataset (filename: spain_english_processed.tsv) ...
+ WorldMedQA-V dataset (filename: spain_english_processed.tsv) loaded.


Dataset({
    features: ['index', 'image', 'question', 'A', 'B', 'C', 'D', 'answer', 'correct_option', 'split'],
    num_rows: 125
})

### 1.1. Zero-Shot Evaluation

Load Model

In [7]:
llava_model = VisualQAModel(
    visual_qa_strategy=ZeroShotVQAStrategy(prompt_type=ZeroShotPromptType.V1),
    model_name=MODEL_NAME,
    country=COUNTRY,
    file_type=FILE_TYPE
)

- Loading Zero-Shot strategy ...
+ Zero-Shot strategy loaded.
- Loading Llava model (prompt template: zs_v1) ...
+ Llava model (prompt template: zs_v1) loaded.


Evaluate Model (Prompt Template: `zs_v1`)

In [None]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(5),
    save_path=RESULTS_DIR
)

Evaluate Model (Prompt Template: `zs_v2`)

In [9]:
llava_model.visual_qa_strategy.prompt_type = ZeroShotPromptType.V2
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(5),
    save_path=RESULTS_DIR
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 5/5 [06:20<00:00, 76.08s/it]

+ Model evaluation (spain_english subset) completed.





Evaluate Model (Prompt Template: `zs_v3`)

In [10]:
llava_model.visual_qa_strategy.prompt_type = ZeroShotPromptType.V3
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(5),
    save_path=RESULTS_DIR
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 5/5 [06:24<00:00, 76.90s/it]

+ Model evaluation (spain_english subset) completed.





### 1.2. Retrieval-Augmented Generation (RAG) Evaluation

Define Model Specific Constants

In [8]:
INDEX_DIR = Path('data/WikiMed/indexed_db')
INDEX_NAME = "Wikimed+S-PubMedBert-MS-MARCO-FullTexts"
EMBEDDING_MODEL_NAME = "pritamdeka/S-PubMedBert-MS-MARCO"
RELEVANT_DOCS_COUNT = 2

#### 1.2.1. RAG Q (Question Only)

Load Model

In [9]:
llava_model.visual_qa_strategy = RagQVQAStrategy(
    prompt_type=RagQPromptType.V1,
    index_dir=INDEX_DIR,
    index_name=INDEX_NAME,
    embedding_model_name=EMBEDDING_MODEL_NAME,
    relevant_docs_count=RELEVANT_DOCS_COUNT
)

- Loading RAG Q strategy ...
	- Loading Embeddings ...
	+ Embeddings Loaded.
	- Loading Index ...
	+ Index Loaded.
	- Loading Retriever ...
	+ Retriever Loaded.
+ RAG Q strategy loaded.
- Loading Llava model (prompt template: rq_v1) ...
+ Llava model (prompt template: rq_v1) loaded.


Evaluate Model (Prompt Template: `rq_v1`)

In [10]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [07:56<00:00, 238.45s/it]

+ Model evaluation (spain_english subset) completed.





Evaluate Model (Prompt Template: `rq_v2`)

In [11]:
llava_model.visual_qa_strategy.prompt_type = RagQPromptType.V2
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR,
    doc_splitter = ParagraphSplitter(
        token_count=2,
        add_title=True
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [04:01<00:00, 120.58s/it]

+ Model evaluation (spain_english subset) completed.





Evaluate Model (Prompt Template: `rq_v3`)

In [1]:
llava_model.visual_qa_strategy.prompt_type = RagQPromptType.V3
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(5),
    save_path=RESULTS_DIR,
    chunk_size=500,
    chunk_overlap=0,
    short_docs_count=1
)

NameError: name 'RagQPromptType' is not defined

Evaluate Model (Prompt Template: `rq_v4`)

In [16]:
llava_model.visual_qa_strategy.prompt_type = RagQPromptType.V4
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(5),
    save_path=RESULTS_DIR
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 5/5 [16:48<00:00, 201.77s/it]

+ Model evaluation (spain_english subset) completed.





#### 1.2.2. RAG Q+As (Question + Answers)

#### 1.2.3. RAG IMG (Image-Based)

#### 1.2.4. RAG DB-Reranker (Database with Reranker)

## 2. VQA Approaches Exploration

Define Model Specific Constants

In [9]:
DATASET_DIR = Path("data/WorldMedQA-V")
MODEL_NAME = "llava"
COUNTRY = "spain"
FILE_TYPE = "english"
RESULTS_DIR = Path('evaluation_results')

Define RAG Q Specific Constants

In [10]:
INDEX_DIR = Path('data/WikiMed/indexed_db')
INDEX_NAME = "Wikimed+S-PubMedBert-MS-MARCO-FullTexts"
EMBEDDING_MODEL_NAME = "pritamdeka/S-PubMedBert-MS-MARCO"
RELEVANT_DOCS_COUNT = 1

Define Possible VQA Strategies

In [11]:
vqa_strategies: dict[VQAStrategyType, BaseVQAStrategy] = {
    VQAStrategyType.ZERO_SHOT: ZeroShotVQAStrategy(prompt_type=ZeroShotPromptType.V1),
    VQAStrategyType.RAG_Q: RagQVQAStrategy(
        prompt_type=RagQPromptType.V1,
        index_dir=INDEX_DIR,
        index_name=INDEX_NAME,
        embedding_model_name=EMBEDDING_MODEL_NAME,
        relevant_docs_count=RELEVANT_DOCS_COUNT
    ),
    VQAStrategyType.RAG_Q_AS: None,
    VQAStrategyType.RAG_IMG: None,
    VQAStrategyType.RAG_DB_RERANKER: None
}

- Loading Zero-Shot strategy ...
+ Zero-Shot strategy loaded.
- Loading RAG Q strategy ...
	- Loading Embeddings ...
	+ Embeddings Loaded.
	- Loading Index ...
	+ Index Loaded.
	- Loading Retriever ...
	+ Retriever Loaded.
+ RAG Q strategy loaded.


Load Dataset

In [12]:
world_med_qa_v_dataset = world_med_qa_v_dataset_management.load_vqa_dataset(
    data_path=DATASET_DIR,
    country=COUNTRY,
    file_type=FILE_TYPE
)
world_med_qa_v_dataset

- Loading WorldMedQA-V dataset (filename: spain_english_processed.tsv) ...
+ WorldMedQA-V dataset (filename: spain_english_processed.tsv) loaded.


Dataset({
    features: ['index', 'image', 'question', 'A', 'B', 'C', 'D', 'answer', 'correct_option', 'split'],
    num_rows: 125
})

Experiment with the Models

In [13]:
# @title Interactive VQA Model Exploration Form
vqa_strategy_type = 'Zero-Shot' # @param ["Zero-Shot", "RAG Q", "RAG Q+As", "RAG IMG", "RAG DB-Reranker"]
prompt_type = "zs_v1" # @param ["zs_v1", "zs_v2", "zs_v3", "rq_v1", "rq_v2", "rq_v3", "rq_v4"]
question_id = 1 # @param {"type":"integer"}
image_width = 600 # @param {"type":"integer"}
action = 'Fetch from JSON' # @param ["Execute Model", "Fetch from JSON"]


row = world_med_qa_v_dataset_management.get_dataset_row_by_id(
    dataset=world_med_qa_v_dataset,
    question_id=question_id
)

if action == "Execute Model":
    formatted_vqa_strategy_type = to_snake_case_strategy_name(strategy_name=vqa_strategy_type)
    chosen_vqa_strategy = vqa_strategies[VQAStrategyType(formatted_vqa_strategy_type)]
    chosen_vqa_strategy.prompt_type = ZeroShotPromptType(prompt_type)
    model=VisualQAModel(
        visual_qa_strategy=chosen_vqa_strategy,
        model_name=MODEL_NAME,
        country=COUNTRY,
        file_type=FILE_TYPE
    )
    world_med_qa_v_plot_helpers.visualize_qa_pair_row(
        row=row,
        image_width=image_width,
        model_answer=model.generate_answer_from_row(
            row=row,
            possible_options=['A', 'B', 'C', 'D'],
            verbose=True
        )
    )
elif action == "Fetch from JSON":
    model_answer = world_med_qa_v_dataset_management.fetch_model_answer_from_json(
        evaluation_results_folder=RESULTS_DIR,
        vqa_strategy_name=to_snake_case_strategy_name(strategy_name=vqa_strategy_type),
        country=COUNTRY,
        file_type=FILE_TYPE,
        prompt_type_name=prompt_type,
        question_id=question_id,
    )
    world_med_qa_v_plot_helpers.visualize_qa_pair_row(
        row=row,
        image_width=image_width,
        model_answer=model_answer
    )

FormWidget(children=(VBox(children=(HTML(value=''), HTML(value='<h2>Interactive VQA Model Exploration Form</h2…

---

In [10]:
import src.utils.dataset_helpers.wikimed.dataset_management as wikimed_dataset_management

In [11]:
WIKIMED_DATASET_PATH = Path("data/WikiMed/WikiMed.json")
wikimed_dataset_metadata_df = wikimed_dataset_management.load_wikimed_dataset_metadata(data_path=WIKIMED_DATASET_PATH)
wikimed_dataset_metadata_df

- Loading WikiMed dataset metadata ...: 100%|██████████| 1.74G/1.74G [00:06<00:00, 250MB/s]


+ WikiMed dataset metadata loaded.


Unnamed: 0,id,title,word_count,sentence_count,word_quartile_interval,sentence_quartile_interval
0,1842616,Polypterus,6834,93,"[Q3, Max.)",Outlier
1,1842710,Etorphine,4126,35,"[Q3, Max.)","[Q3, Max.)"
2,1842870,Leeza Gibbons,6771,55,"[Q3, Max.)","[Q3, Max.)"
3,1842882,USS Niagara (1813),14787,124,Outlier,Outlier
4,1843050,Meramec State Park,2685,25,"[Q2, Q3)","[Q2, Q3)"
...,...,...,...,...,...,...
393613,53047425,Dom Orejudos,4190,32,"[Q3, Max.)","[Q3, Max.)"
393614,53047642,List of birds of the Prince Edward Islands,2599,3,"[Q2, Q3)","[Q1, Q2)"
393615,53047754,"Oakview, Queensland",2508,24,"[Q2, Q3)","[Q2, Q3)"
393616,53047783,Lilah Denton Lindsey,6434,52,"[Q3, Max.)","[Q3, Max.)"


In [12]:
row = wikimed_dataset_management.get_dataset_row_by_doc_title(
    dataset_path=WIKIMED_DATASET_PATH,
    dataset_metadata=wikimed_dataset_metadata_df,
    doc_title='Bariatric surgery'
)

In [48]:
row2 = wikimed_dataset_management.get_dataset_row_by_doc_title(
    dataset_path=WIKIMED_DATASET_PATH,
    dataset_metadata=wikimed_dataset_metadata_df,
    doc_title='Esophagogastroduodenoscopy'
)

In [49]:
row3 = wikimed_dataset_management.get_dataset_row_by_doc_title(
    dataset_path=WIKIMED_DATASET_PATH,
    dataset_metadata=wikimed_dataset_metadata_df,
    doc_title='Mouth assessment'
)

In [10]:
from src.utils.text_splitters.paragraph_splitter import ParagraphSplitter
from langchain_core.documents import Document


doc_splitter = ParagraphSplitter(
    token_count=2,
    add_title=True
)
shortened_docs = doc_splitter.split_documents(
    documents=[
        Document(page_content=row['text']),
        Document(page_content=row2['text']),
        Document(page_content=row3['text'])
    ]
)
def format_docs(docs: list[Document]) -> str:
    return "\n\n".join(docs)

print(format_docs(shortened_docs))

NameError: name 'row' is not defined

---

In [12]:
row = world_med_qa_v_dataset_management.get_dataset_row_by_id(
    dataset=world_med_qa_v_dataset,
    question_id=1
)

In [19]:
from typing import Union
from langchain_core.documents import Document

from src.utils.text_splitters.base_splitter import BaseSplitter


class HamiltonSplitter(BaseSplitter):

    def split_documents(self, documents: list[Document]) -> list[str]:
        shortened_documents = []

        for document in documents:
            page_content = document.page_content
            split_paragraphs = [par.strip() for par in page_content.split('\n\n')]
            split_paragraphs = self._add_title_if_needed(split_document=split_paragraphs)

            shortened_paragraphs = split_paragraphs[:self._token_count]
            shortened_text = self._join_paragraphs(shortened_paragraphs)
            shortened_documents.append(shortened_text)

        return shortened_documents

    def _join_paragraphs(
        self,
        paragraphs: Union[list[str], list[Document]]
    ) -> str:
        return "\n\n".join(paragraphs)


In [14]:
from src.utils.text_splitters.paragraph_splitter import ParagraphSplitter


model_answer=llava_model.generate_answer_from_row(
    row=row,
    possible_options=['A', 'B', 'C', 'D'],
    verbose=True,
    doc_splitter = ParagraphSplitter(
        token_count=2,
        add_title=True
    )
)
print(f"model_answer: '{model_answer}'")

- Generating Answer for Question (ID: 1) ...
2
["Bariatric surgery\n\nBariatric surgery (or weight loss surgery) includes a variety of procedures performed on people who are obese. Long term weight loss through Standard of Care procedures (Roux en Y Bypass, Sleeve Gastrectomy, and Biliopancreatic Diversion with Duodenal Switch) is largely achieved by altering gut hormone levels that are responsible for hunger and satiety, leading to a new hormonal weight set point . Bariatric surgery is a hormonal surgery in these procedures, for which the alteration in gut hormones develops as a result of the procedure's restriction and malabsorption.\n\nLong-term studies show the procedures cause significant long-term loss of weight, recovery from diabetes, improvement in cardiovascular risk factors, and a mortality reduction from 40% to 23%. The U.S. National Institutes of Health recommends bariatric surgery for obese people with a body mass index (BMI) of at least 40, and for people with BMI of at 