# VQA Model Result Evaluation

## 0. Set-Up Environment

### 0.1. Import Necessary Libraries

In [1]:
from pathlib import Path

from datasets import load_dataset

from src.zero_shot_vqa_model import ZeroShotVQAModel
from src.rag_q_vqa_model import RAGQVQAModel

## 1. Evaluate VQA Approaches

Define Constants

In [2]:
DATASET_DIR = Path("data/WorldMedQA-V")
COUNTRY = "spain"
FILE_TYPE = "english"
RESULTS_DIR = Path('evaluation_results')

Load Dataset

In [3]:
# Set dataset file path
dataset_filename = f"{COUNTRY}_{FILE_TYPE}_processed.tsv"
data_filepath = str(DATASET_DIR / dataset_filename)

# Load dataset
world_med_qa_v_dataset = load_dataset(
    "csv",
    data_files=[data_filepath],
    sep="\t",
)["train"]
world_med_qa_v_dataset

Dataset({
    features: ['index', 'image', 'question', 'A', 'B', 'C', 'D', 'answer', 'correct_option', 'split'],
    num_rows: 125
})

### 1.1. Zero-Shot

Load Model

In [None]:
llava_model = ZeroShotVQAModel(
    model_name="llava",
    country=COUNTRY,
    file_type=FILE_TYPE
)

Evaluate Model

In [None]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(5),
    save_path=RESULTS_DIR
)

Evaluating model (spain_english subset) ...: 100%|██████████| 5/5 [06:33<00:00, 78.73s/it]


### 1.2. RAG

Load Constants

In [4]:
INDEX_DIR = Path('data/WikiMed/indexed_db')
INDEX_NAME = "Wikimed+S-PubMedBert-MS-MARCO-FullTexts"
EMBEDDING_MODEL_NAME = "pritamdeka/S-PubMedBert-MS-MARCO"
RELEVANT_DOCS_COUNT = 1

#### 1.2.1. RAG Q

Load Model

In [5]:
llava_rag_model = RAGQVQAModel(
    model_name="llava",
    country=COUNTRY,
    file_type=FILE_TYPE
)

Load Retriever

In [7]:
wikimed_retriever = llava_rag_model.index_wikimed_data(
    index_dir=INDEX_DIR,
    index_name=INDEX_NAME,
    embedding_model_name=EMBEDDING_MODEL_NAME,
    relevant_docs_count=RELEVANT_DOCS_COUNT
)

Embeddings loaded!
Index loaded!
Retriever loaded!


Evaluate Model

In [8]:
llava_rag_model.evaluate(
    dataset=world_med_qa_v_dataset.take(5),
    retriever=wikimed_retriever,
    save_path=RESULTS_DIR
)

Evaluating model (spain_english subset) ...: 100%|██████████| 5/5 [16:37<00:00, 199.47s/it]


#### 1.2.2. RAG Q+As

#### 1.2.3. RAG IMG

#### 1.2.4. RAG DB, RERANKER, ...