# VQA Model Result Evaluation

## 0. Set-Up Environment

### 0.1. Import Necessary Libraries

In [None]:
from pathlib import Path

from datasets import load_dataset

from visual_qa_model import VisualQAModel
from visual_qa_strategies.rag_q_vqa_strategy import RagQVQAStrategy
from visual_qa_strategies.zero_shot_vqa_strategy import ZeroShotVQAStrategy

## 1. Evaluate VQA Approaches

Define Constants

In [2]:
DATASET_DIR = Path("../data/WorldMedQA-V")
MODEL_NAME = "llava"
COUNTRY = "spain"
FILE_TYPE = "english"
RESULTS_DIR = Path('../evaluation_results')

Load Dataset

In [3]:
# Set dataset file path
dataset_filename = f"{COUNTRY}_{FILE_TYPE}_processed.tsv"
data_filepath = str(DATASET_DIR / dataset_filename)

# Load dataset
world_med_qa_v_dataset = load_dataset(
    "csv",
    data_files=[data_filepath],
    sep="\t",
)["train"]
world_med_qa_v_dataset

Dataset({
    features: ['index', 'image', 'question', 'A', 'B', 'C', 'D', 'answer', 'correct_option', 'split'],
    num_rows: 125
})

### 1.1. Zero-Shot

Load Model

In [None]:
llava_model = VisualQAModel(
    strategy=ZeroShotVQAStrategy(),
    model_name=MODEL_NAME,
    country=COUNTRY,
    file_type=FILE_TYPE
)

ZERO SHOT MODEL LOADED !!


Answer Example Question

In [5]:
llava_model.generate_answer_from_row(
    row=world_med_qa_v_dataset[0],
    possible_options=['A', 'B', 'C', 'D']
)

'A'

Evaluate Model

In [5]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(5),
    save_path=RESULTS_DIR
)

Evaluating model (spain_english subset) ...: 100%|██████████| 5/5 [05:44<00:00, 68.85s/it]


### 1.2. RAG

Load Constants

In [6]:
INDEX_DIR = Path('../data/WikiMed/indexed_db')
INDEX_NAME = "Wikimed+S-PubMedBert-MS-MARCO-FullTexts"
EMBEDDING_MODEL_NAME = "pritamdeka/S-PubMedBert-MS-MARCO"
RELEVANT_DOCS_COUNT = 1

#### 1.2.1. RAG Q

Load Model

In [7]:
llava_model.strategy = RagQVQAStrategy(
    index_dir=INDEX_DIR,
    index_name=INDEX_NAME,
    embedding_model_name=EMBEDDING_MODEL_NAME,
    relevant_docs_count=RELEVANT_DOCS_COUNT
)

Embeddings loaded!
Index loaded!
Retriever loaded!
RAG Q MODEL LOADED !!


Answer Example Question

In [8]:
llava_model.generate_answer_from_row(
    row=world_med_qa_v_dataset[0],
    possible_options=['A', 'B', 'C', 'D']
)

RAG Q ANSWER!!


'*'

Evaluate Model

In [None]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(5),
    save_path=RESULTS_DIR
)

Evaluating model (spain_english subset) ...:   0%|          | 0/5 [00:00<?, ?it/s]

Evaluating model (spain_english subset) ...:  20%|██        | 1/5 [04:07<16:29, 247.34s/it]

RAG Q ANSWER!!


Evaluating model (spain_english subset) ...:  40%|████      | 2/5 [08:03<12:01, 240.65s/it]

RAG Q ANSWER!!


Evaluating model (spain_english subset) ...:  60%|██████    | 3/5 [11:59<07:56, 238.40s/it]

RAG Q ANSWER!!


Evaluating model (spain_english subset) ...:  80%|████████  | 4/5 [15:22<03:44, 224.54s/it]

RAG Q ANSWER!!


Evaluating model (spain_english subset) ...: 100%|██████████| 5/5 [19:18<00:00, 231.65s/it]

RAG Q ANSWER!!





#### 1.2.2. RAG Q+As

#### 1.2.3. RAG IMG

#### 1.2.4. RAG DB, RERANKER, ...