# VQA Model Result Evaluation

## 0. Environment Setup

### 0.1. Import Necessary Libraries

In [1]:
from itertools import product
from pathlib import Path

import pandas as pd
from datasets import disable_progress_bars

import src.utils.dataset_helpers.world_med_qa_v.dataset_management as world_med_qa_v_dataset_management
import src.utils.dataset_helpers.world_med_qa_v.plot_helpers as world_med_qa_v_plot_helpers
from src.ui.vqa_approaches_exploration_form import VQAApproachesExplorationForm
from src.utils.data_definitions import (
    DocSplitterOptions,
    LoggerConfig,
    GeneralDocSplitterOptions,
    GeneralVQAStrategiesDetails,
    VQAStrategyDetail
)
from src.utils.enums import DocumentSplitterType, RagQPromptType, VQAStrategyType, ZeroShotPromptType
from src.utils.logger import LoggerManager
from src.utils.text_splitters.paragraph_splitter import ParagraphSplitter
from src.utils.text_splitters.recursive_character_splitter import RecursiveCharacterSplitter
from src.utils.text_splitters.spacy_sentence_splitter import SpacySentenceSplitter
from src.visual_qa_model import VisualQAModel
from src.visual_qa_strategies.base_vqa_strategy import BaseVQAStrategy
from src.visual_qa_strategies.rag_q_as_vqa_strategy import RagQAsVQAStrategy
from src.visual_qa_strategies.rag_q_vqa_strategy import RagQVQAStrategy
from src.visual_qa_strategies.zero_shot_vqa_strategy import ZeroShotVQAStrategy

### 0.2. Configure Environment Settings

Enable Automatic Module Reloading

In [2]:
%load_ext autoreload
%autoreload 2

Disable Progress Bar for Dataset Filtering

In [3]:
disable_progress_bars()

## 1. Evaluation of VQA Approaches

Define Constants

In [5]:
DATASET_DIR = Path("data/WorldMedQA-V")
MODEL_NAME = "llava"
COUNTRY = "spain"
FILE_TYPE = "english"
RESULTS_DIR = Path('evaluation_results')

Load Dataset

In [5]:
world_med_qa_v_dataset = world_med_qa_v_dataset_management.load_vqa_dataset(
    data_path=DATASET_DIR,
    country=COUNTRY,
    file_type=FILE_TYPE
)
world_med_qa_v_dataset

- Loading WorldMedQA-V dataset (filename: spain_english_processed.tsv) ...
+ WorldMedQA-V dataset (filename: spain_english_processed.tsv) loaded.


Dataset({
    features: ['index', 'image', 'question', 'A', 'B', 'C', 'D', 'answer', 'correct_option', 'split'],
    num_rows: 125
})

### 1.1. Zero-Shot Evaluation

Load Model

In [6]:
llava_model = VisualQAModel(
    visual_qa_strategy=ZeroShotVQAStrategy(prompt_type=ZeroShotPromptType.V1),
    model_name=MODEL_NAME,
    country=COUNTRY,
    file_type=FILE_TYPE
)

- Loading Zero-Shot strategy ...
+ Zero-Shot strategy loaded.
- Loading Llava model (prompt template: zs_v1) ...
+ Llava model (prompt template: zs_v1) loaded.


Evaluate Model (Prompt Template: `zs_v1`)

In [7]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True
)

- Evaluating model (spain_english subset) ...:   0%|          | 0/2 [00:00<?, ?it/s]

- Generating Answer for Question (ID: 1) ...


- Evaluating model (spain_english subset) ...:  50%|█████     | 1/2 [01:12<01:12, 72.71s/it]

+ Answer for Question (ID: 1) generated.
- Generating Answer for Question (ID: 2) ...


- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:26<00:00, 73.32s/it]

+ Answer for Question (ID: 2) generated.
+ Model evaluation (spain_english subset) completed.





Evaluate Model (Prompt Template: `zs_v2`)

In [None]:
llava_model.visual_qa_strategy.prompt_type = ZeroShotPromptType.V2
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:16<00:00, 68.04s/it]

+ Model evaluation (spain_english subset) completed.





Evaluate Model (Prompt Template: `zs_v3`)

In [None]:
llava_model.visual_qa_strategy.prompt_type = ZeroShotPromptType.V3
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(5),
    results_path=RESULTS_DIR,
    use_image=True
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 5/5 [06:24<00:00, 76.90s/it]

+ Model evaluation (spain_english subset) completed.





### 1.2. Retrieval-Augmented Generation (RAG) Evaluation

Define Model Specific Constants

In [8]:
INDEX_DIR = Path('data/WikiMed/indexed_db')
INDEX_NAME = "Wikimed+S-PubMedBert-MS-MARCO-FullTexts"
EMBEDDING_MODEL_NAME = "pritamdeka/S-PubMedBert-MS-MARCO"
RELEVANT_DOCS_COUNT = 2

#### 1.2.1. RAG Q (Question Only)

Load Model

In [9]:
llava_model.visual_qa_strategy = RagQVQAStrategy(
    prompt_type=RagQPromptType.V1,
    index_dir=INDEX_DIR,
    index_name=INDEX_NAME,
    embedding_model_name=EMBEDDING_MODEL_NAME,
    relevant_docs_count=RELEVANT_DOCS_COUNT
)

- Loading RAG Q strategy ...
	- Loading Document Retriever ...
		- Loading Embeddings ...
		+ Embeddings Loaded.
		- Loading Index ...
		+ Index Loaded.
		- Loading Retriever ...
		+ Retriever Loaded.
	+ Document Retriever Loaded.
+ RAG Q strategy loaded.
- Loading Llava model (prompt template: rq_v1) ...
+ Llava model (prompt template: rq_v1) loaded.


Evaluate Model (Prompt Template: `rq_v1`)

- Document Splitter Type: `No Document Splitter`

In [None]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [07:47<00:00, 234.00s/it]

+ Model evaluation (spain_english subset) completed.





- Document Splitter Type: `Recursive Character Splitter`

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`with_title`

In [None]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True,
    doc_splitter = RecursiveCharacterSplitter(
        token_count=2,
        chunk_size=200,
        chunk_overlap=0,
        add_title=True
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:53<00:00, 86.67s/it]

+ Model evaluation (spain_english subset) completed.





&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`no_title`

In [None]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True,
    doc_splitter = RecursiveCharacterSplitter(
        token_count=2,
        chunk_size=200,
        chunk_overlap=0,
        add_title=False
    )
)

- Evaluating model (spain_english subset) ...:   0%|          | 0/2 [00:00<?, ?it/s]

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:54<00:00, 87.13s/it]

+ Model evaluation (spain_english subset) completed.





- Document Splitter Type: `spaCy Sentence Splitter`

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`with_title`

In [10]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True,
    doc_splitter = SpacySentenceSplitter(
        token_count=2,
        add_title=True
    )
)

- Evaluating model (spain_english subset) ...:   0%|          | 0/2 [00:00<?, ?it/s]

- Generating Answer for Question (ID: 1) ...


- Evaluating model (spain_english subset) ...:  50%|█████     | 1/2 [01:45<01:45, 105.28s/it]

+ Answer for Question (ID: 1) generated.
- Generating Answer for Question (ID: 2) ...


- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [03:11<00:00, 95.66s/it] 

+ Answer for Question (ID: 2) generated.
+ Model evaluation (spain_english subset) completed.





&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`no_title`

In [None]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True,
    doc_splitter = SpacySentenceSplitter(
        token_count=2,
        add_title=False
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:41<00:00, 80.58s/it]

+ Model evaluation (spain_english subset) completed.





- Document Splitter Type: `Paragraph Splitter`

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`with_title`

In [None]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True,
    doc_splitter = ParagraphSplitter(
        token_count=2,
        add_title=True
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [03:59<00:00, 119.96s/it]

+ Model evaluation (spain_english subset) completed.





&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`no_title`

In [None]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True,
    doc_splitter = ParagraphSplitter(
        token_count=2,
        add_title=False
    )
)

- Evaluating model (spain_english subset) ...:   0%|          | 0/2 [00:00<?, ?it/s]

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [04:47<00:00, 143.91s/it]

+ Model evaluation (spain_english subset) completed.





Evaluate Model (Prompt Template: `rq_v2`)

- Document Splitter Type: `No Document Splitter`

In [None]:
llava_model.visual_qa_strategy.prompt_type = RagQPromptType.V2
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [07:47<00:00, 233.96s/it]

+ Model evaluation (spain_english subset) completed.





- Document Splitter Type: `Recursive Character Splitter`

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`with_title`

In [None]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True,
    doc_splitter = RecursiveCharacterSplitter(
        token_count=2,
        chunk_size=200,
        chunk_overlap=0,
        add_title=True
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:38<00:00, 79.16s/it]

+ Model evaluation (spain_english subset) completed.





&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`no_title`

In [None]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True,
    doc_splitter = RecursiveCharacterSplitter(
        token_count=2,
        chunk_size=200,
        chunk_overlap=0,
        add_title=False
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:34<00:00, 77.47s/it]

+ Model evaluation (spain_english subset) completed.





- Document Splitter Type: `spaCy Sentence Splitter`

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`with_title`

In [None]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True,
    doc_splitter = SpacySentenceSplitter(
        token_count=2,
        add_title=True
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:40<00:00, 80.45s/it]

+ Model evaluation (spain_english subset) completed.





&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`no_title`

In [None]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True,
    doc_splitter = SpacySentenceSplitter(
        token_count=2,
        add_title=False
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:35<00:00, 77.74s/it]

+ Model evaluation (spain_english subset) completed.





- Document Splitter Type: `Paragraph Splitter`

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`with_title`

In [None]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True,
    doc_splitter = ParagraphSplitter(
        token_count=2,
        add_title=True
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [04:17<00:00, 128.63s/it]

+ Model evaluation (spain_english subset) completed.





&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`no_title`

In [None]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True,
    doc_splitter = ParagraphSplitter(
        token_count=2,
        add_title=False
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [03:36<00:00, 108.21s/it]

+ Model evaluation (spain_english subset) completed.





Evaluate Model (Prompt Template: `rq_v3`)

- Document Splitter Type: `No Document Splitter`

In [None]:
llava_model.visual_qa_strategy.prompt_type = RagQPromptType.V3
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True,
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [07:07<00:00, 213.70s/it]

+ Model evaluation (spain_english subset) completed.





- Document Splitter Type: `Recursive Character Splitter`

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`with_title`

In [None]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True,
    doc_splitter = RecursiveCharacterSplitter(
        token_count=2,
        chunk_size=200,
        chunk_overlap=0,
        add_title=True
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:41<00:00, 80.54s/it]

+ Model evaluation (spain_english subset) completed.





&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`no_title`

In [None]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True,
    doc_splitter = RecursiveCharacterSplitter(
        token_count=2,
        chunk_size=200,
        chunk_overlap=0,
        add_title=False
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:32<00:00, 76.36s/it]

+ Model evaluation (spain_english subset) completed.





- Document Splitter Type: `spaCy Sentence Splitter`

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`with_title`

In [None]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True,
    doc_splitter = SpacySentenceSplitter(
        token_count=2,
        add_title=True
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:38<00:00, 79.02s/it]

+ Model evaluation (spain_english subset) completed.





&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`no_title`

In [None]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True,
    doc_splitter = SpacySentenceSplitter(
        token_count=2,
        add_title=False
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:49<00:00, 84.81s/it]

+ Model evaluation (spain_english subset) completed.





- Document Splitter Type: `Paragraph Splitter`

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`with_title`

In [None]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True,
    doc_splitter = ParagraphSplitter(
        token_count=2,
        add_title=True
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [04:17<00:00, 128.51s/it]

+ Model evaluation (spain_english subset) completed.





&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`no_title`

In [None]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True,
    doc_splitter = ParagraphSplitter(
        token_count=2,
        add_title=False
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [03:46<00:00, 113.07s/it]

+ Model evaluation (spain_english subset) completed.





Evaluate Model (Prompt Template: `rq_v4`)

- Document Splitter Type: `No Document Splitter`

In [None]:
llava_model.visual_qa_strategy.prompt_type = RagQPromptType.V4
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [06:52<00:00, 206.30s/it]

+ Model evaluation (spain_english subset) completed.





- Document Splitter Type: `Recursive Character Splitter`

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`with_title`

In [None]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True,
    doc_splitter = RecursiveCharacterSplitter(
        token_count=2,
        chunk_size=200,
        chunk_overlap=0,
        add_title=True
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [03:41<00:00, 110.63s/it]

+ Model evaluation (spain_english subset) completed.





&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`no_title`

In [None]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True,
    doc_splitter = RecursiveCharacterSplitter(
        token_count=2,
        chunk_size=200,
        chunk_overlap=0,
        add_title=False
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:49<00:00, 84.97s/it]

+ Model evaluation (spain_english subset) completed.





- Document Splitter Type: `spaCy Sentence Splitter`

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`with_title`

In [None]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True,
    doc_splitter = SpacySentenceSplitter(
        token_count=2,
        add_title=True
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:53<00:00, 86.73s/it]

+ Model evaluation (spain_english subset) completed.





&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`no_title`

In [None]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True,
    doc_splitter = SpacySentenceSplitter(
        token_count=2,
        add_title=False
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:56<00:00, 88.44s/it]

+ Model evaluation (spain_english subset) completed.





- Document Splitter Type: `Paragraph Splitter`

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`with_title`

In [None]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True,
    doc_splitter = ParagraphSplitter(
        token_count=2,
        add_title=True
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [04:47<00:00, 143.97s/it]

+ Model evaluation (spain_english subset) completed.





&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`no_title`

In [None]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    results_path=RESULTS_DIR,
    use_image=True,
    doc_splitter = ParagraphSplitter(
        token_count=2,
        add_title=False
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [04:10<00:00, 125.05s/it]

+ Model evaluation (spain_english subset) completed.





#### 1.2.2. RAG Q+As (Question + Answers)

#### 1.2.3. RAG IMG (Image-Based)

#### 1.2.4. RAG DB-Reranker (Database with Reranker)

## 2. VQA Approaches Exploration

Define Model Specific Constants

In [4]:
DATASET_DIR = Path("data/WorldMedQA-V")
MODEL_NAME = "llava"
RESULTS_DIR = Path('evaluation_results')
LOGS_DIR = Path('logs')

Define RAG Q Specific Constants

In [5]:
INDEX_DIR = Path('data/WikiMed/indexed_db')
INDEX_NAME = "Wikimed+S-PubMedBert-MS-MARCO-FullTexts"
EMBEDDING_MODEL_NAME = "pritamdeka/S-PubMedBert-MS-MARCO"
RELEVANT_DOCS_COUNT = 1

Define Possible VQA Strategies

In [6]:
vqa_strategies: dict[VQAStrategyType, BaseVQAStrategy] = {
    VQAStrategyType.ZERO_SHOT: ZeroShotVQAStrategy(prompt_type=ZeroShotPromptType.V1),
    VQAStrategyType.RAG_Q: RagQVQAStrategy(
        prompt_type=RagQPromptType.V1,
        index_dir=INDEX_DIR,
        index_name=INDEX_NAME,
        embedding_model_name=EMBEDDING_MODEL_NAME,
        relevant_docs_count=RELEVANT_DOCS_COUNT
    ),
    VQAStrategyType.RAG_Q_AS: RagQAsVQAStrategy(
        prompt_type=RagQPromptType.V1,
        index_dir=INDEX_DIR,
        index_name=INDEX_NAME,
        embedding_model_name=EMBEDDING_MODEL_NAME,
        relevant_docs_count=RELEVANT_DOCS_COUNT
    ),
    VQAStrategyType.RAG_IMG: None,
    VQAStrategyType.RAG_DB_RERANKER: None
}

- Loading Zero-Shot strategy ...
+ Zero-Shot strategy loaded.
- Loading RAG Q strategy ...
	- Loading Document Retriever ...
		- Loading Embeddings ...
		+ Embeddings Loaded.
		- Loading Index ...
		+ Index Loaded.
		- Loading Retriever ...
		+ Retriever Loaded.
	+ Document Retriever Loaded.
+ RAG Q strategy loaded.
- Loading RAG Q+As strategy ...
** Instance of DocumentRetriever already exists, returning the existing instance. **
+ RAG Q+As strategy loaded.


Load Dataset

In [7]:
countries = ['spain', 'brazil', 'israel', 'japan']
file_types = ['english', 'local']

world_med_qa_v_dataset = {
    f"{country}_{file_type}": world_med_qa_v_dataset_management.load_vqa_dataset(
        data_path=DATASET_DIR,
        country=country,
        file_type=file_type
    )
    for country, file_type in list(product(countries, file_types))
}
world_med_qa_v_dataset

- Loading WorldMedQA-V dataset (filename: spain_english_processed.tsv) ...
+ WorldMedQA-V dataset (filename: spain_english_processed.tsv) loaded.
- Loading WorldMedQA-V dataset (filename: spain_local_processed.tsv) ...
+ WorldMedQA-V dataset (filename: spain_local_processed.tsv) loaded.
- Loading WorldMedQA-V dataset (filename: brazil_english_processed.tsv) ...
+ WorldMedQA-V dataset (filename: brazil_english_processed.tsv) loaded.
- Loading WorldMedQA-V dataset (filename: brazil_local_processed.tsv) ...
+ WorldMedQA-V dataset (filename: brazil_local_processed.tsv) loaded.
- Loading WorldMedQA-V dataset (filename: israel_english_processed.tsv) ...
+ WorldMedQA-V dataset (filename: israel_english_processed.tsv) loaded.
- Loading WorldMedQA-V dataset (filename: israel_local_processed.tsv) ...
+ WorldMedQA-V dataset (filename: israel_local_processed.tsv) loaded.
- Loading WorldMedQA-V dataset (filename: japan_english_processed.tsv) ...
+ WorldMedQA-V dataset (filename: japan_english_proce

{'spain_english': Dataset({
     features: ['index', 'image', 'question', 'A', 'B', 'C', 'D', 'answer', 'correct_option', 'split'],
     num_rows: 125
 }),
 'spain_local': Dataset({
     features: ['index', 'image', 'question', 'A', 'B', 'C', 'D', 'answer', 'correct_option', 'split'],
     num_rows: 125
 }),
 'brazil_english': Dataset({
     features: ['index', 'image', 'question', 'A', 'B', 'C', 'D', 'answer', 'correct_option', 'split'],
     num_rows: 89
 }),
 'brazil_local': Dataset({
     features: ['index', 'image', 'question', 'A', 'B', 'C', 'D', 'answer', 'correct_option', 'split'],
     num_rows: 89
 }),
 'israel_english': Dataset({
     features: ['index', 'image', 'question', 'A', 'B', 'C', 'D', 'answer', 'correct_option', 'split'],
     num_rows: 186
 }),
 'israel_local': Dataset({
     features: ['index', 'image', 'question', 'A', 'B', 'C', 'D', 'answer', 'correct_option', 'split'],
     num_rows: 186
 }),
 'japan_english': Dataset({
     features: ['index', 'image', 'quest

Prepare Logger Manager

In [8]:
logger_manager = LoggerManager(
    log_save_directory=LOGS_DIR,
    logger_config=LoggerConfig(
        console_handler_enabled=False,
        file_handler_enabled=True
    ))

Logger 'VisualQALogger' created!
	- Root Log Directory: logs
	- Full Log Filepath: None
	- Handlers:
		* Console Handler: Disabled
		* File Handler: Enabled



Experiment with the `VQA Approaches Exploration Form`

In [None]:
vqa_approaches_exploration_form = VQAApproachesExplorationForm(
    dataset=world_med_qa_v_dataset,
    model_name=MODEL_NAME,
    vqa_strategies=vqa_strategies,
    evaluation_results_folder=RESULTS_DIR,
    logger_manager = logger_manager
)
vqa_approaches_exploration_form.display_form()

VBox(children=(HTML(value="<h1 style='text-align: center; margin-bottom: 20px;'>VQA Approaches Exploration For…

## 3. Result Analysis

Define Constants

In [4]:
RESULTS_DIR = Path('evaluation_results')

### 3.1. Development Subset Results

#### 3.1.1. Get Best Zero-Shot Configuration

In [5]:
zero_shot_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=GeneralVQAStrategiesDetails(
        countries=["spain"],
        file_types=["english"],
        use_images=[True],
        vqa_strategy_types=[VQAStrategyType.ZERO_SHOT],
        prompt_types=list(ZeroShotPromptType),
        relevant_docs_count=[None],
        doc_splitter_options=[None],
        should_apply_rag_to_questions=[None]
    ).get_possible_vqa_strategy_details()
)

zero_shot_evaluation_results = zero_shot_evaluation_results.fillna("-")
zero_shot_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,zero_shot,zs_v1,-,-,-,-,-,-,-,0.28,1.0
1,spain,english,zero_shot,zs_v2,-,-,-,-,-,-,-,0.304,1.0
2,spain,english,zero_shot,zs_v3,-,-,-,-,-,-,-,0.304,1.0


In [6]:
world_med_qa_v_plot_helpers.display_bar_chart_on_evaluation_results(
    evaluation_results=zero_shot_evaluation_results,
    title="[Zero-Shot] Analysis of LLaVA Model Accuracy",
    x_axis_title="Prompt Type",
    y_axis_title="Accuracy",
    x_dataframe_column_name="prompt_type",
    y_dataframe_column_name="accuracy"
)

In [7]:
best_zero_shot_results = world_med_qa_v_dataset_management.get_max_accuracy_rows(
    evaluation_results=zero_shot_evaluation_results
)
best_zero_shot_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
1,spain,english,zero_shot,zs_v2,-,-,-,-,-,-,-,0.304,1.0
2,spain,english,zero_shot,zs_v3,-,-,-,-,-,-,-,0.304,1.0


\* From the best results only the first experiment will be considered for further experiments

In [8]:
best_zero_shot_result = best_zero_shot_results.head(1)
best_zero_shot_result

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
1,spain,english,zero_shot,zs_v2,-,-,-,-,-,-,-,0.304,1.0


#### 3.1.2. Get Best Document Splitter

Fixed Values:

**- VQA Strategy: RAG (Question Only)**<br>
**- Prompt Type: rq_v1**<br>
**- Relevant Docs. Count: 1**<br>

Variables:

**- Title: {Yes, No}**<br>
**- Chunk Count:**<br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;**+ No Splitter: -**<br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;**+ Paragraph Splitter: {1, 2, 3, 4}**<br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;**+ Recursive Character Splitter: {1, 2}**<br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;**+ spaCy Sentence Splitter: {1, 2, 3, 4, 5}**<br>
**- Chunk Size (only for Recursive Character Splitter): {300, 600, 900}**<br>
<br>


Aim --> Get best **Document Splitter**<br>
<br>

##### 3.1.2.1. No Splitter

In [9]:
no_splitter_vqa_strategy_details = [
    GeneralVQAStrategiesDetails(
        countries=["spain"],
        file_types=["english"],
        use_images=[True],
        vqa_strategy_types=[VQAStrategyType.RAG_Q],
        prompt_types=[RagQPromptType.V1],
        relevant_docs_count=[1],
        doc_splitter_options=[GeneralDocSplitterOptions(
            doc_splitter_types=[DocumentSplitterType.NO_SPLITTER],
            add_titles=[False, True],
            token_counts=[1],
            chunk_sizes=[None],
            chunk_overlaps=[None]
        )],
        should_apply_rag_to_questions=[None]
    ).get_possible_vqa_strategy_details()
]

no_splitter_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        item
        for detail in no_splitter_vqa_strategy_details
        for item in detail
    ]
)
no_splitter_evaluation_results = no_splitter_evaluation_results.fillna("-")
no_splitter_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q,rq_v1,1,no_splitter,False,1,-,-,-,0.088,0.328
1,spain,english,rag_q,rq_v1,1,no_splitter,True,1,-,-,-,0.088,0.32


In [10]:
world_med_qa_v_plot_helpers.display_bar_chart_on_splitter_evaluation_results(
    splitter_evaluation_results=no_splitter_evaluation_results,
    document_splitter_type=None,
    title="[No Splitter] Analysis of LLaVA Model Accuracy"
)

In [11]:
no_splitter_mean_accuracy = world_med_qa_v_dataset_management.get_mean_accuracy(no_splitter_evaluation_results)
print(f"Mean Accuracy (No Splitter): {no_splitter_mean_accuracy:.4f}")

Mean Accuracy (No Splitter): 0.0880


##### 3.1.2.2. Paragraph Splitter

In [12]:
paragraph_splitter_vqa_strategy_details = [
    GeneralVQAStrategiesDetails(
        countries=["spain"],
        file_types=["english"],
        use_images=[True],
        vqa_strategy_types=[VQAStrategyType.RAG_Q],
        prompt_types=[RagQPromptType.V1],
        relevant_docs_count=[1],
        doc_splitter_options=[
            GeneralDocSplitterOptions(
                doc_splitter_types=[DocumentSplitterType.PARAGRAPH_SPLITTER],
                add_titles=[False, True],
                token_counts=[1, 2, 3, 4],
                chunk_sizes=[None],
                chunk_overlaps=[None]
            )
        ],
        should_apply_rag_to_questions=[None]
    ).get_possible_vqa_strategy_details()
]

paragraph_splitter_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        item
        for detail in paragraph_splitter_vqa_strategy_details
        for item in detail
    ]
)
paragraph_splitter_evaluation_results = paragraph_splitter_evaluation_results.fillna("-")
paragraph_splitter_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q,rq_v1,1,paragraph_splitter,False,1,-,-,-,0.272,1.0
1,spain,english,rag_q,rq_v1,1,paragraph_splitter,True,1,-,-,-,0.264,1.0
2,spain,english,rag_q,rq_v1,1,paragraph_splitter,False,2,-,-,-,0.28,0.984
3,spain,english,rag_q,rq_v1,1,paragraph_splitter,True,2,-,-,-,0.296,0.976
4,spain,english,rag_q,rq_v1,1,paragraph_splitter,False,3,-,-,-,0.216,0.976
5,spain,english,rag_q,rq_v1,1,paragraph_splitter,True,3,-,-,-,0.248,0.976
6,spain,english,rag_q,rq_v1,1,paragraph_splitter,False,4,-,-,-,0.208,0.936
7,spain,english,rag_q,rq_v1,1,paragraph_splitter,True,4,-,-,-,0.208,0.936


In [13]:
world_med_qa_v_plot_helpers.display_bar_chart_on_splitter_evaluation_results(
    splitter_evaluation_results=paragraph_splitter_evaluation_results,
    document_splitter_type=DocumentSplitterType.PARAGRAPH_SPLITTER,
    title="[Paragraph Splitter] Analysis of LLaVA Model Accuracy"
)

In [14]:
paragraph_splitter_mean_accuracy = world_med_qa_v_dataset_management.get_mean_accuracy(paragraph_splitter_evaluation_results)
print(f"Mean Accuracy (Paragraph Splitter): {paragraph_splitter_mean_accuracy:.4f}")

Mean Accuracy (Paragraph Splitter): 0.2490


##### 3.1.2.3. Recursive Character Splitter

In [15]:
recursive_character_splitter_vqa_strategy_details = [
    GeneralVQAStrategiesDetails(
        countries=["spain"],
        file_types=["english"],
        use_images=[True],
        vqa_strategy_types=[VQAStrategyType.RAG_Q],
        prompt_types=[RagQPromptType.V1],
        relevant_docs_count=[1],
        doc_splitter_options=[
            GeneralDocSplitterOptions(
                doc_splitter_types=[DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER],
                add_titles=[False, True],
                token_counts=[1, 2],
                chunk_sizes=[300, 600, 900],
                chunk_overlaps=[0]
            )
        ],
        should_apply_rag_to_questions=[None]
    ).get_possible_vqa_strategy_details()
]

recursive_character_splitter_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        item
        for detail in recursive_character_splitter_vqa_strategy_details
        for item in detail
    ]
)
recursive_character_splitter_evaluation_results = recursive_character_splitter_evaluation_results.fillna("-")
recursive_character_splitter_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q,rq_v1,1,recursive_character_splitter,False,1,300,0,-,0.28,1.0
1,spain,english,rag_q,rq_v1,1,recursive_character_splitter,False,1,600,0,-,0.296,1.0
2,spain,english,rag_q,rq_v1,1,recursive_character_splitter,False,1,900,0,-,0.272,1.0
3,spain,english,rag_q,rq_v1,1,recursive_character_splitter,True,1,300,0,-,0.256,1.0
4,spain,english,rag_q,rq_v1,1,recursive_character_splitter,True,1,600,0,-,0.288,1.0
5,spain,english,rag_q,rq_v1,1,recursive_character_splitter,True,1,900,0,-,0.264,1.0
6,spain,english,rag_q,rq_v1,1,recursive_character_splitter,False,2,300,0,-,0.256,1.0
7,spain,english,rag_q,rq_v1,1,recursive_character_splitter,False,2,600,0,-,0.248,1.0
8,spain,english,rag_q,rq_v1,1,recursive_character_splitter,False,2,900,0,-,0.272,1.0
9,spain,english,rag_q,rq_v1,1,recursive_character_splitter,True,2,300,0,-,0.28,1.0


In [16]:
world_med_qa_v_plot_helpers.display_bar_chart_on_splitter_evaluation_results(
    splitter_evaluation_results=recursive_character_splitter_evaluation_results,
    document_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
    title="[Recursive Sentence Splitter] Analysis of LLaVA Model Accuracy"
)

In [17]:
recursive_character_splitter_mean_accuracy = world_med_qa_v_dataset_management.get_mean_accuracy(
    recursive_character_splitter_evaluation_results
)
print(f"Mean Accuracy (Recursive Character Splitter): {recursive_character_splitter_mean_accuracy:.4f}")

Mean Accuracy (Recursive Character Splitter): 0.2713


##### 3.1.2.4. spaCy Sentence Splitter

In [18]:
spacy_sentence_splitter_vqa_strategy_details = [
    GeneralVQAStrategiesDetails(
        countries=["spain"],
        file_types=["english"],
        use_images=[True],
        vqa_strategy_types=[VQAStrategyType.RAG_Q],
        prompt_types=[RagQPromptType.V1],
        relevant_docs_count=[1],
        doc_splitter_options=[
            GeneralDocSplitterOptions(
                doc_splitter_types=[DocumentSplitterType.SPACY_SENTENCE_SPLITTER],
                add_titles=[False, True],
                token_counts=[1, 2, 3, 4, 5],
                chunk_sizes=[None],
                chunk_overlaps=[None]
            )
        ],
        should_apply_rag_to_questions=[None]
    ).get_possible_vqa_strategy_details()
]

spacy_sentence_splitter_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        item
        for detail in spacy_sentence_splitter_vqa_strategy_details
        for item in detail
    ]
)
spacy_sentence_splitter_evaluation_results = spacy_sentence_splitter_evaluation_results.fillna("-")
spacy_sentence_splitter_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q,rq_v1,1,spacy_sentence_splitter,False,1,-,-,-,0.296,1.0
1,spain,english,rag_q,rq_v1,1,spacy_sentence_splitter,True,1,-,-,-,0.28,1.0
2,spain,english,rag_q,rq_v1,1,spacy_sentence_splitter,False,2,-,-,-,0.272,1.0
3,spain,english,rag_q,rq_v1,1,spacy_sentence_splitter,True,2,-,-,-,0.248,1.0
4,spain,english,rag_q,rq_v1,1,spacy_sentence_splitter,False,3,-,-,-,0.272,1.0
5,spain,english,rag_q,rq_v1,1,spacy_sentence_splitter,True,3,-,-,-,0.272,1.0
6,spain,english,rag_q,rq_v1,1,spacy_sentence_splitter,False,4,-,-,-,0.24,1.0
7,spain,english,rag_q,rq_v1,1,spacy_sentence_splitter,True,4,-,-,-,0.256,1.0
8,spain,english,rag_q,rq_v1,1,spacy_sentence_splitter,False,5,-,-,-,0.272,1.0
9,spain,english,rag_q,rq_v1,1,spacy_sentence_splitter,True,5,-,-,-,0.248,1.0


In [19]:
world_med_qa_v_plot_helpers.display_bar_chart_on_splitter_evaluation_results(
    splitter_evaluation_results=spacy_sentence_splitter_evaluation_results,
    document_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
    title="[spaCy Sentence Splitter] Analysis of LLaVA Model Accuracy"
)

In [20]:
spacy_sentence_splitter_mean_accuracy = world_med_qa_v_dataset_management.get_mean_accuracy(
    spacy_sentence_splitter_evaluation_results
)
print(f"Mean Accuracy (spaCy Sentence Splitter): {spacy_sentence_splitter_mean_accuracy:.4f}")

Mean Accuracy (spaCy Sentence Splitter): 0.2656


##### 3.1.2.5. Best Splitter

In [21]:
mean_accuracy_values = {
    "No Splitter": no_splitter_mean_accuracy,
    "Paragraph Splitter": paragraph_splitter_mean_accuracy,
    "Recursive Character Splitter": recursive_character_splitter_mean_accuracy,
    "spaCy Sentence Splitter": spacy_sentence_splitter_mean_accuracy
}

In [22]:
world_med_qa_v_plot_helpers.display_bar_chart_on_best_mean_accuracy_results(
    mean_accuracy_values=mean_accuracy_values,
    title="Mean Accuracy Values for each Document Splitter"
)

In [23]:
best_splitter = max(mean_accuracy_values, key=mean_accuracy_values.get)
print(f"BEST SPLITTER:\n\n{best_splitter} (ACC = {mean_accuracy_values[best_splitter]:.4f})")

BEST SPLITTER:

Recursive Character Splitter (ACC = 0.2713)


#### 3.1.3. Get Best Configuration for each VQA Strategy 

Fixed Values:

**- Document Splitter: Recursive Character Splitter**<br>
**- Relevant Docs. Count = 1**<br>

Variables:

**- VQA Strategy: {RAG (Question Only), RAG (Answers Only), RAG (Question and Answers)}**<br>
**- Title: {Yes, No}**<br>
**- Chunk Count: {1, 2}**<br>
**- Chunk Size: {300, 600, 900}**<br>
**- Prompt Type = {rq_v1, rq_v2, rq_v3, rq_v4, rq_v5, rq_v6}**<br>
<br>


Aim --> Get the best **(Title, Chunk Count, Chunk Size, Prompt Type) combination** for each VQA Strategy<br>
<br>

##### 3.1.3.1. RAG (Question Only)

###### 3.1.3.1.1. No Title

**- Relevant Documents Count = 1**

In [24]:
evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=GeneralVQAStrategiesDetails(
        countries=["spain"],
        file_types=["english"],
        use_images=[True],
        vqa_strategy_types=[VQAStrategyType.RAG_Q],
        prompt_types=list(RagQPromptType),
        relevant_docs_count=[1],
        doc_splitter_options=[
            GeneralDocSplitterOptions(
                doc_splitter_types=[DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER],
                add_titles=[False],
                token_counts=[1, 2],
                chunk_sizes=[300, 600, 900],
                chunk_overlaps=[0]
            )
        ],
        should_apply_rag_to_questions=[None]
    ).get_possible_vqa_strategy_details()
)

evaluation_results = evaluation_results.fillna("-")
evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q,rq_v1,1,recursive_character_splitter,False,1,300,0,-,0.28,1.0
1,spain,english,rag_q,rq_v1,1,recursive_character_splitter,False,1,600,0,-,0.296,1.0
2,spain,english,rag_q,rq_v1,1,recursive_character_splitter,False,1,900,0,-,0.272,1.0
3,spain,english,rag_q,rq_v1,1,recursive_character_splitter,False,2,300,0,-,0.256,1.0
4,spain,english,rag_q,rq_v1,1,recursive_character_splitter,False,2,600,0,-,0.248,1.0
5,spain,english,rag_q,rq_v1,1,recursive_character_splitter,False,2,900,0,-,0.272,1.0
6,spain,english,rag_q,rq_v2,1,recursive_character_splitter,False,1,300,0,-,0.248,1.0
7,spain,english,rag_q,rq_v2,1,recursive_character_splitter,False,1,600,0,-,0.256,1.0
8,spain,english,rag_q,rq_v2,1,recursive_character_splitter,False,1,900,0,-,0.288,1.0
9,spain,english,rag_q,rq_v2,1,recursive_character_splitter,False,2,300,0,-,0.24,1.0


In [25]:
world_med_qa_v_plot_helpers.plot_rag_q_evaluation_results_by_groups(
    title="[RAG (Question Only) - No Title - Recursive Character Splitter - RDC = 1] Analysis of LLaVA Model Accuracy",
    evaluation_results=evaluation_results,
    row_variable='token_count',
    column_variable='chunk_size',
    bar_graph_variable='prompt_type'
)

In [26]:
no_title_rag_q_rdc1_results = world_med_qa_v_dataset_management.get_max_accuracy_rows(
    evaluation_results=evaluation_results
)
no_title_rag_q_rdc1_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
33,spain,english,rag_q,rq_v6,1,recursive_character_splitter,False,2,300,0,-,0.304,1.0


**- Token Count = 2**<br>
**- Prompt Type = rq_v6**<br>
**- Chunk Size = 300**

In [27]:
no_title_rag_q_tc_2_rq_v6_cs300_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=GeneralVQAStrategiesDetails(
        countries=["spain"],
        file_types=["english"],
        use_images=[True],
        vqa_strategy_types=[VQAStrategyType.RAG_Q],
        prompt_types=[RagQPromptType.V6],
        relevant_docs_count=[1, 2, 3, 4, 5],
        doc_splitter_options=[
            GeneralDocSplitterOptions(
                doc_splitter_types=[DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER],
                add_titles=[False],
                token_counts=[2],
                chunk_sizes=[300],
                chunk_overlaps=[0]
            )
        ],
        should_apply_rag_to_questions=[None]
    ).get_possible_vqa_strategy_details()
)

no_title_rag_q_tc_2_rq_v6_cs300_results = no_title_rag_q_tc_2_rq_v6_cs300_results.fillna("-")
no_title_rag_q_tc_2_rq_v6_cs300_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q,rq_v6,1,recursive_character_splitter,False,2,300,0,-,0.304,1.0
1,spain,english,rag_q,rq_v6,2,recursive_character_splitter,False,2,300,0,-,0.304,1.0
2,spain,english,rag_q,rq_v6,3,recursive_character_splitter,False,2,300,0,-,0.312,1.0
3,spain,english,rag_q,rq_v6,4,recursive_character_splitter,False,2,300,0,-,0.304,1.0
4,spain,english,rag_q,rq_v6,5,recursive_character_splitter,False,2,300,0,-,0.304,1.0


In [28]:
world_med_qa_v_plot_helpers.display_bar_chart_on_evaluation_results(
    evaluation_results=no_title_rag_q_tc_2_rq_v6_cs300_results,
    title="[RAG-Q] LLaVA Model Accuracy (No Title - Token Count = 2; Prompt Type = rq_v6 - Chunk Size = 300)",
    x_axis_title="Relevant Documents Count",
    y_axis_title="Accuracy",
    x_dataframe_column_name="relevant_docs_count",
    y_dataframe_column_name="accuracy"
)

In [29]:
rc_splitter_best_rag_q_no_title_results = world_med_qa_v_dataset_management.get_max_accuracy_rows(
    evaluation_results=no_title_rag_q_tc_2_rq_v6_cs300_results
)
rc_splitter_best_rag_q_no_title_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
2,spain,english,rag_q,rq_v6,3,recursive_character_splitter,False,2,300,0,-,0.312,1.0


###### 3.1.3.1.2. With Title

**- Relevant Documents Count = 1**

In [30]:
evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=GeneralVQAStrategiesDetails(
        countries=["spain"],
        file_types=["english"],
        use_images=[True],
        vqa_strategy_types=[VQAStrategyType.RAG_Q],
        prompt_types=list(RagQPromptType),
        relevant_docs_count=[1],
        doc_splitter_options=[
            GeneralDocSplitterOptions(
                doc_splitter_types=[DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER],
                add_titles=[True],
                token_counts=[1, 2],
                chunk_sizes=[300, 600, 900],
                chunk_overlaps=[0]
            )
        ],
        should_apply_rag_to_questions=[None]
    ).get_possible_vqa_strategy_details()
)

evaluation_results = evaluation_results.fillna("-")
evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q,rq_v1,1,recursive_character_splitter,True,1,300,0,-,0.256,1.0
1,spain,english,rag_q,rq_v1,1,recursive_character_splitter,True,1,600,0,-,0.288,1.0
2,spain,english,rag_q,rq_v1,1,recursive_character_splitter,True,1,900,0,-,0.264,1.0
3,spain,english,rag_q,rq_v1,1,recursive_character_splitter,True,2,300,0,-,0.28,1.0
4,spain,english,rag_q,rq_v1,1,recursive_character_splitter,True,2,600,0,-,0.272,1.0
5,spain,english,rag_q,rq_v1,1,recursive_character_splitter,True,2,900,0,-,0.272,1.0
6,spain,english,rag_q,rq_v2,1,recursive_character_splitter,True,1,300,0,-,0.248,1.0
7,spain,english,rag_q,rq_v2,1,recursive_character_splitter,True,1,600,0,-,0.272,1.0
8,spain,english,rag_q,rq_v2,1,recursive_character_splitter,True,1,900,0,-,0.28,1.0
9,spain,english,rag_q,rq_v2,1,recursive_character_splitter,True,2,300,0,-,0.264,1.0


In [31]:
world_med_qa_v_plot_helpers.plot_rag_q_evaluation_results_by_groups(
    title="[RAG (Question Only) - With Title - Recursive Character Splitter - RDC = 1] Analysis of LLaVA Model Accuracy",
    evaluation_results=evaluation_results,
    row_variable='token_count',
    column_variable='chunk_size',
    bar_graph_variable='prompt_type'
)

In [32]:
with_title_rag_q_rdc1_results = world_med_qa_v_dataset_management.get_max_accuracy_rows(
    evaluation_results=evaluation_results
)
with_title_rag_q_rdc1_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
14,spain,english,rag_q,rq_v3,1,recursive_character_splitter,True,1,900,0,-,0.304,1.0


**- Token Count = 1**<br>
**- Prompt Type = rq_v3**<br>
**- Chunk Size = 900**

In [33]:
with_title_rag_q_tc_1_rq_v3_cs900_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=GeneralVQAStrategiesDetails(
        countries=["spain"],
        file_types=["english"],
        use_images=[True],
        vqa_strategy_types=[VQAStrategyType.RAG_Q],
        prompt_types=[RagQPromptType.V3],
        relevant_docs_count=[1, 2, 3, 4, 5],
        doc_splitter_options=[
            GeneralDocSplitterOptions(
                doc_splitter_types=[DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER],
                add_titles=[True],
                token_counts=[1],
                chunk_sizes=[900],
                chunk_overlaps=[0]
            )
        ],
        should_apply_rag_to_questions=[None]
    ).get_possible_vqa_strategy_details()
)

with_title_rag_q_tc_1_rq_v3_cs900_results = with_title_rag_q_tc_1_rq_v3_cs900_results.fillna("-")
with_title_rag_q_tc_1_rq_v3_cs900_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q,rq_v3,1,recursive_character_splitter,True,1,900,0,-,0.304,1.0
1,spain,english,rag_q,rq_v3,2,recursive_character_splitter,True,1,900,0,-,0.264,1.0
2,spain,english,rag_q,rq_v3,3,recursive_character_splitter,True,1,900,0,-,0.272,1.0
3,spain,english,rag_q,rq_v3,4,recursive_character_splitter,True,1,900,0,-,0.256,1.0
4,spain,english,rag_q,rq_v3,5,recursive_character_splitter,True,1,900,0,-,0.256,1.0


In [34]:
world_med_qa_v_plot_helpers.display_bar_chart_on_evaluation_results(
    evaluation_results=with_title_rag_q_tc_1_rq_v3_cs900_results,
    title="[RAG-Q] LLaVA Model Accuracy (With Title - Token Count = 1; Prompt Type = rq_v3 - Chunk Size = 900)",
    x_axis_title="Relevant Documents Count",
    y_axis_title="Accuracy",
    x_dataframe_column_name="relevant_docs_count",
    y_dataframe_column_name="accuracy"
)

In [35]:
rc_splitter_best_rag_q_with_title_results = world_med_qa_v_dataset_management.get_max_accuracy_rows(
    evaluation_results=with_title_rag_q_tc_1_rq_v3_cs900_results
)
rc_splitter_best_rag_q_with_title_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q,rq_v3,1,recursive_character_splitter,True,1,900,0,-,0.304,1.0


###### 3.1.3.1.3. Best Configuration

\* From the best RAG (Question Only) results only the best experiment will be considered for further experiments

In [36]:
rc_splitter_best_rag_q_results = pd.concat(
    [
        rc_splitter_best_rag_q_no_title_results,
        rc_splitter_best_rag_q_with_title_results
    ],
    ignore_index=True
)
rc_splitter_best_rag_q_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q,rq_v6,3,recursive_character_splitter,False,2,300,0,-,0.312,1.0
1,spain,english,rag_q,rq_v3,1,recursive_character_splitter,True,1,900,0,-,0.304,1.0


In [37]:
rc_splitter_best_rag_q_result = rc_splitter_best_rag_q_results[
    rc_splitter_best_rag_q_results['accuracy'] == rc_splitter_best_rag_q_results['accuracy'].max()
]
rc_splitter_best_rag_q_result

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q,rq_v6,3,recursive_character_splitter,False,2,300,0,-,0.312,1.0


##### 3.1.3.2. RAG (Answers Only)

###### 3.1.3.2.1. No Title

**- Relevant Documents Count = 1**

In [38]:
evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=GeneralVQAStrategiesDetails(
        countries=["spain"],
        file_types=["english"],
        use_images=[True],
        vqa_strategy_types=[VQAStrategyType.RAG_Q_AS],
        prompt_types=list(RagQPromptType),
        relevant_docs_count=[1],
        doc_splitter_options=[
            GeneralDocSplitterOptions(
                doc_splitter_types=[DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER],
                add_titles=[False],
                token_counts=[1, 2],
                chunk_sizes=[300, 600, 900],
                chunk_overlaps=[0]
            )
        ],
        should_apply_rag_to_questions=[False]
    ).get_possible_vqa_strategy_details()
)

evaluation_results = evaluation_results.fillna("-")
evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q_as,rq_v1,1,recursive_character_splitter,False,1,300,0,False,0.264,1.0
1,spain,english,rag_q_as,rq_v1,1,recursive_character_splitter,False,1,600,0,False,0.288,1.0
2,spain,english,rag_q_as,rq_v1,1,recursive_character_splitter,False,1,900,0,False,0.264,0.992
3,spain,english,rag_q_as,rq_v1,1,recursive_character_splitter,False,2,300,0,False,0.272,1.0
4,spain,english,rag_q_as,rq_v1,1,recursive_character_splitter,False,2,600,0,False,0.24,0.848
5,spain,english,rag_q_as,rq_v1,1,recursive_character_splitter,False,2,900,0,False,0.04,0.2
6,spain,english,rag_q_as,rq_v2,1,recursive_character_splitter,False,1,300,0,False,0.296,1.0
7,spain,english,rag_q_as,rq_v2,1,recursive_character_splitter,False,1,600,0,False,0.288,1.0
8,spain,english,rag_q_as,rq_v2,1,recursive_character_splitter,False,1,900,0,False,0.28,0.992
9,spain,english,rag_q_as,rq_v2,1,recursive_character_splitter,False,2,300,0,False,0.256,1.0


In [39]:
world_med_qa_v_plot_helpers.plot_rag_q_evaluation_results_by_groups(
    title="[RAG (Answers Only) - No Title - Recursive Character Splitter - RDC = 1] Analysis of LLaVA Model Accuracy",
    evaluation_results=evaluation_results,
    row_variable='token_count',
    column_variable='chunk_size',
    bar_graph_variable='prompt_type'
)

In [40]:
no_title_rag_not_q_as_rdc1_results = world_med_qa_v_dataset_management.get_max_accuracy_rows(
    evaluation_results=evaluation_results
)
no_title_rag_not_q_as_rdc1_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
28,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,2,600,0,False,0.32,1.0


**- Token Count = 2**<br>
**- Prompt Type = rq_v5**<br>
**- Chunk Size = 600**

In [41]:
no_title_rag_not_q_as_tc_2_rq_v5_cs600_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=GeneralVQAStrategiesDetails(
        countries=["spain"],
        file_types=["english"],
        use_images=[True],
        vqa_strategy_types=[VQAStrategyType.RAG_Q_AS],
        prompt_types=[RagQPromptType.V5],
        relevant_docs_count=[1, 2, 3, 4, 5],
        doc_splitter_options=[
            GeneralDocSplitterOptions(
                doc_splitter_types=[DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER],
                add_titles=[False],
                token_counts=[2],
                chunk_sizes=[600],
                chunk_overlaps=[0]
            )
        ],
        should_apply_rag_to_questions=[False]
    ).get_possible_vqa_strategy_details()
)

no_title_rag_not_q_as_tc_2_rq_v5_cs600_results = no_title_rag_not_q_as_tc_2_rq_v5_cs600_results.fillna("-")
no_title_rag_not_q_as_tc_2_rq_v5_cs600_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,2,600,0,False,0.32,1.0
1,spain,english,rag_q_as,rq_v5,2,recursive_character_splitter,False,2,600,0,False,0.312,1.0
2,spain,english,rag_q_as,rq_v5,3,recursive_character_splitter,False,2,600,0,False,0.312,1.0
3,spain,english,rag_q_as,rq_v5,4,recursive_character_splitter,False,2,600,0,False,0.304,1.0
4,spain,english,rag_q_as,rq_v5,5,recursive_character_splitter,False,2,600,0,False,0.288,1.0


In [42]:
world_med_qa_v_plot_helpers.display_bar_chart_on_evaluation_results(
    evaluation_results=no_title_rag_not_q_as_tc_2_rq_v5_cs600_results,
    title="[RAG-As] LLaVA Model Accuracy (No Title - Token Count = 2; Prompt Type = rq_v5 - Chunk Size = 600)",
    x_axis_title="Relevant Documents Count",
    y_axis_title="Accuracy",
    x_dataframe_column_name="relevant_docs_count",
    y_dataframe_column_name="accuracy"
)

In [43]:
rc_splitter_best_rag_not_q_as_no_title_results = world_med_qa_v_dataset_management.get_max_accuracy_rows(
    evaluation_results=no_title_rag_not_q_as_tc_2_rq_v5_cs600_results
)
rc_splitter_best_rag_not_q_as_no_title_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,2,600,0,False,0.32,1.0


###### 3.1.3.2.2. With Title

**- Relevant Documents Count = 1**

In [44]:
evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=GeneralVQAStrategiesDetails(
        countries=["spain"],
        file_types=["english"],
        use_images=[True],
        vqa_strategy_types=[VQAStrategyType.RAG_Q_AS],
        prompt_types=list(RagQPromptType),
        relevant_docs_count=[1],
        doc_splitter_options=[
            GeneralDocSplitterOptions(
                doc_splitter_types=[DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER],
                add_titles=[True],
                token_counts=[1, 2],
                chunk_sizes=[300, 600, 900],
                chunk_overlaps=[0]
            )
        ],
        should_apply_rag_to_questions=[False]
    ).get_possible_vqa_strategy_details()
)

evaluation_results = evaluation_results.fillna("-")
evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q_as,rq_v1,1,recursive_character_splitter,True,1,300,0,False,0.264,1.0
1,spain,english,rag_q_as,rq_v1,1,recursive_character_splitter,True,1,600,0,False,0.272,1.0
2,spain,english,rag_q_as,rq_v1,1,recursive_character_splitter,True,1,900,0,False,0.28,0.992
3,spain,english,rag_q_as,rq_v1,1,recursive_character_splitter,True,2,300,0,False,0.272,1.0
4,spain,english,rag_q_as,rq_v1,1,recursive_character_splitter,True,2,600,0,False,0.192,0.816
5,spain,english,rag_q_as,rq_v1,1,recursive_character_splitter,True,2,900,0,False,0.048,0.168
6,spain,english,rag_q_as,rq_v2,1,recursive_character_splitter,True,1,300,0,False,0.256,1.0
7,spain,english,rag_q_as,rq_v2,1,recursive_character_splitter,True,1,600,0,False,0.28,1.0
8,spain,english,rag_q_as,rq_v2,1,recursive_character_splitter,True,1,900,0,False,0.264,0.992
9,spain,english,rag_q_as,rq_v2,1,recursive_character_splitter,True,2,300,0,False,0.256,1.0


In [45]:
world_med_qa_v_plot_helpers.plot_rag_q_evaluation_results_by_groups(
    title="[RAG (Answers Only) - With Title - Recursive Character Splitter - RDC = 1] Analysis of LLaVA Model Accuracy",
    evaluation_results=evaluation_results,
    row_variable='token_count',
    column_variable='chunk_size',
    bar_graph_variable='prompt_type'
)

In [46]:
with_title_rag_not_q_as_rdc1_results = world_med_qa_v_dataset_management.get_max_accuracy_rows(
    evaluation_results=evaluation_results
)
with_title_rag_not_q_as_rdc1_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
28,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,True,2,600,0,False,0.312,1.0
34,spain,english,rag_q_as,rq_v6,1,recursive_character_splitter,True,2,600,0,False,0.312,1.0


**- Token Count = 2**<br>
**- Prompt Type = rq_v5**<br>
**- Chunk Size = 600**

In [47]:
with_title_rag_not_q_as_tc_2_rq_v5_cs600_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=GeneralVQAStrategiesDetails(
        countries=["spain"],
        file_types=["english"],
        use_images=[True],
        vqa_strategy_types=[VQAStrategyType.RAG_Q_AS],
        prompt_types=[RagQPromptType.V5],
        relevant_docs_count=[1, 2, 3, 4, 5],
        doc_splitter_options=[
            GeneralDocSplitterOptions(
                doc_splitter_types=[DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER],
                add_titles=[True],
                token_counts=[2],
                chunk_sizes=[600],
                chunk_overlaps=[0]
            )
        ],
        should_apply_rag_to_questions=[False]
    ).get_possible_vqa_strategy_details()
)

with_title_rag_not_q_as_tc_2_rq_v5_cs600_results = with_title_rag_not_q_as_tc_2_rq_v5_cs600_results.fillna("-")
with_title_rag_not_q_as_tc_2_rq_v5_cs600_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,True,2,600,0,False,0.312,1.0
1,spain,english,rag_q_as,rq_v5,2,recursive_character_splitter,True,2,600,0,False,0.288,1.0
2,spain,english,rag_q_as,rq_v5,3,recursive_character_splitter,True,2,600,0,False,0.304,1.0
3,spain,english,rag_q_as,rq_v5,4,recursive_character_splitter,True,2,600,0,False,0.304,1.0
4,spain,english,rag_q_as,rq_v5,5,recursive_character_splitter,True,2,600,0,False,0.304,1.0


In [48]:
world_med_qa_v_plot_helpers.display_bar_chart_on_evaluation_results(
    evaluation_results=with_title_rag_not_q_as_tc_2_rq_v5_cs600_results,
    title="[RAG-As] LLaVA Model Accuracy (With Title - Token Count = 2; Prompt Type = rq_v5 - Chunk Size = 600)",
    x_axis_title="Relevant Documents Count",
    y_axis_title="Accuracy",
    x_dataframe_column_name="relevant_docs_count",
    y_dataframe_column_name="accuracy"
)

In [49]:
rc_splitter_best_rag_not_q_as_with_title_results_v1 = world_med_qa_v_dataset_management.get_max_accuracy_rows(
    evaluation_results=with_title_rag_not_q_as_tc_2_rq_v5_cs600_results
)
rc_splitter_best_rag_not_q_as_with_title_results_v1

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,True,2,600,0,False,0.312,1.0


**- Token Count = 2**<br>
**- Prompt Type = rq_v6**<br>
**- Chunk Size = 600**

In [50]:
with_title_rag_not_q_as_tc_2_rq_v6_cs600_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=GeneralVQAStrategiesDetails(
        countries=["spain"],
        file_types=["english"],
        use_images=[True],
        vqa_strategy_types=[VQAStrategyType.RAG_Q_AS],
        prompt_types=[RagQPromptType.V6],
        relevant_docs_count=[1, 2, 3, 4, 5],
        doc_splitter_options=[
            GeneralDocSplitterOptions(
                doc_splitter_types=[DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER],
                add_titles=[True],
                token_counts=[2],
                chunk_sizes=[600],
                chunk_overlaps=[0]
            )
        ],
        should_apply_rag_to_questions=[False]
    ).get_possible_vqa_strategy_details()
)

with_title_rag_not_q_as_tc_2_rq_v6_cs600_results = with_title_rag_not_q_as_tc_2_rq_v6_cs600_results.fillna("-")
with_title_rag_not_q_as_tc_2_rq_v6_cs600_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q_as,rq_v6,1,recursive_character_splitter,True,2,600,0,False,0.312,1.0
1,spain,english,rag_q_as,rq_v6,2,recursive_character_splitter,True,2,600,0,False,0.296,1.0
2,spain,english,rag_q_as,rq_v6,3,recursive_character_splitter,True,2,600,0,False,0.288,1.0
3,spain,english,rag_q_as,rq_v6,4,recursive_character_splitter,True,2,600,0,False,0.296,1.0
4,spain,english,rag_q_as,rq_v6,5,recursive_character_splitter,True,2,600,0,False,0.288,1.0


In [51]:
world_med_qa_v_plot_helpers.display_bar_chart_on_evaluation_results(
    evaluation_results=with_title_rag_not_q_as_tc_2_rq_v6_cs600_results,
    title="[RAG-As] LLaVA Model Accuracy (With Title - Token Count = 2; Prompt Type = rq_v6 - Chunk Size = 600)",
    x_axis_title="Relevant Documents Count",
    y_axis_title="Accuracy",
    x_dataframe_column_name="relevant_docs_count",
    y_dataframe_column_name="accuracy"
)

In [52]:
rc_splitter_best_rag_not_q_as_with_title_results_v2 = world_med_qa_v_dataset_management.get_max_accuracy_rows(
    evaluation_results=with_title_rag_not_q_as_tc_2_rq_v6_cs600_results
)
rc_splitter_best_rag_not_q_as_with_title_results_v2

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q_as,rq_v6,1,recursive_character_splitter,True,2,600,0,False,0.312,1.0


**- Best Combination**

In [53]:
rc_splitter_best_rag_not_q_as_with_title_results = world_med_qa_v_dataset_management.get_max_accuracy_rows(
    evaluation_results=pd.concat([
        rc_splitter_best_rag_not_q_as_with_title_results_v1,
        rc_splitter_best_rag_not_q_as_with_title_results_v2
    ])
)
rc_splitter_best_rag_not_q_as_with_title_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,True,2,600,0,False,0.312,1.0
0,spain,english,rag_q_as,rq_v6,1,recursive_character_splitter,True,2,600,0,False,0.312,1.0


###### 3.1.3.2.3. Best Configuration

\* From the best RAG (Answers Only) results only the best experiment will be considered for further experiments

In [54]:
rc_splitter_best_rag_not_q_as_results = pd.concat(
    [
        rc_splitter_best_rag_not_q_as_no_title_results,
        rc_splitter_best_rag_not_q_as_with_title_results
    ],
    ignore_index=True
)
rc_splitter_best_rag_not_q_as_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,2,600,0,False,0.32,1.0
1,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,True,2,600,0,False,0.312,1.0
2,spain,english,rag_q_as,rq_v6,1,recursive_character_splitter,True,2,600,0,False,0.312,1.0


In [55]:
rc_splitter_best_rag_not_q_as_result = rc_splitter_best_rag_not_q_as_results[
    rc_splitter_best_rag_not_q_as_results['accuracy'] == rc_splitter_best_rag_not_q_as_results['accuracy'].max()
]
rc_splitter_best_rag_not_q_as_result

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,2,600,0,False,0.32,1.0


##### 3.1.3.3. RAG (Question and Answers)

###### 3.1.3.3.1. No Title

**- Relevant Documents Count = 1**

In [56]:
evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=GeneralVQAStrategiesDetails(
        countries=["spain"],
        file_types=["english"],
        use_images=[True],
        vqa_strategy_types=[VQAStrategyType.RAG_Q_AS],
        prompt_types=list(RagQPromptType),
        relevant_docs_count=[1],
        doc_splitter_options=[
            GeneralDocSplitterOptions(
                doc_splitter_types=[DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER],
                add_titles=[False],
                token_counts=[1, 2],
                chunk_sizes=[300, 600, 900],
                chunk_overlaps=[0]
            )
        ],
        should_apply_rag_to_questions=[True]
    ).get_possible_vqa_strategy_details()
)

evaluation_results = evaluation_results.fillna("-")
evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q_as,rq_v1,1,recursive_character_splitter,False,1,300,0,True,0.264,1.0
1,spain,english,rag_q_as,rq_v1,1,recursive_character_splitter,False,1,600,0,True,0.264,1.0
2,spain,english,rag_q_as,rq_v1,1,recursive_character_splitter,False,1,900,0,True,0.224,0.904
3,spain,english,rag_q_as,rq_v1,1,recursive_character_splitter,False,2,300,0,True,0.28,1.0
4,spain,english,rag_q_as,rq_v1,1,recursive_character_splitter,False,2,600,0,True,0.096,0.424
5,spain,english,rag_q_as,rq_v1,1,recursive_character_splitter,False,2,900,0,True,0.136,0.512
6,spain,english,rag_q_as,rq_v2,1,recursive_character_splitter,False,1,300,0,True,0.256,1.0
7,spain,english,rag_q_as,rq_v2,1,recursive_character_splitter,False,1,600,0,True,0.232,1.0
8,spain,english,rag_q_as,rq_v2,1,recursive_character_splitter,False,1,900,0,True,0.184,0.872
9,spain,english,rag_q_as,rq_v2,1,recursive_character_splitter,False,2,300,0,True,0.264,1.0


In [57]:
world_med_qa_v_plot_helpers.plot_rag_q_evaluation_results_by_groups(
    title="[RAG (Question and Answers) - No Title - Recursive Character Splitter - RDC = 1] Analysis of LLaVA Model Accuracy",
    evaluation_results=evaluation_results,
    row_variable='token_count',
    column_variable='chunk_size',
    bar_graph_variable='prompt_type'
)

In [58]:
no_title_rag_q_as_rdc1_results = world_med_qa_v_dataset_management.get_max_accuracy_rows(
    evaluation_results=evaluation_results
)
no_title_rag_q_as_rdc1_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
15,spain,english,rag_q_as,rq_v3,1,recursive_character_splitter,False,2,300,0,True,0.32,1.0
24,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,300,0,True,0.32,1.0
25,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,600,0,True,0.32,1.0
28,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,2,600,0,True,0.32,1.0


**- Token Count = 2**<br>
**- Prompt Type = rq_v3**<br>
**- Chunk Size = 300**

In [59]:
no_title_rag_q_as_tc_2_rq_v3_cs300_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=GeneralVQAStrategiesDetails(
        countries=["spain"],
        file_types=["english"],
        use_images=[True],
        vqa_strategy_types=[VQAStrategyType.RAG_Q_AS],
        prompt_types=[RagQPromptType.V3],
        relevant_docs_count=[1, 2, 3, 4, 5],
        doc_splitter_options=[
            GeneralDocSplitterOptions(
                doc_splitter_types=[DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER],
                add_titles=[False],
                token_counts=[2],
                chunk_sizes=[300],
                chunk_overlaps=[0]
            )
        ],
        should_apply_rag_to_questions=[True]
    ).get_possible_vqa_strategy_details()
)

no_title_rag_q_as_tc_2_rq_v3_cs300_results = no_title_rag_q_as_tc_2_rq_v3_cs300_results.fillna("-")
no_title_rag_q_as_tc_2_rq_v3_cs300_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q_as,rq_v3,1,recursive_character_splitter,False,2,300,0,True,0.32,1.0
1,spain,english,rag_q_as,rq_v3,2,recursive_character_splitter,False,2,300,0,True,0.136,0.592
2,spain,english,rag_q_as,rq_v3,3,recursive_character_splitter,False,2,300,0,True,0.128,0.544
3,spain,english,rag_q_as,rq_v3,4,recursive_character_splitter,False,2,300,0,True,0.184,0.744
4,spain,english,rag_q_as,rq_v3,5,recursive_character_splitter,False,2,300,0,True,0.168,0.688


In [60]:
world_med_qa_v_plot_helpers.display_bar_chart_on_evaluation_results(
    evaluation_results=no_title_rag_q_as_tc_2_rq_v3_cs300_results,
    title="[RAG-Q+As] LLaVA Model Accuracy (No Title - Token Count = 2; Prompt Type = rq_v3 - Chunk Size = 300)",
    x_axis_title="Relevant Documents Count",
    y_axis_title="Accuracy",
    x_dataframe_column_name="relevant_docs_count",
    y_dataframe_column_name="accuracy"
)

In [61]:
rc_splitter_best_rag_q_as_no_title_results_v1 = world_med_qa_v_dataset_management.get_max_accuracy_rows(
    evaluation_results=no_title_rag_q_as_tc_2_rq_v3_cs300_results
)
rc_splitter_best_rag_q_as_no_title_results_v1

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q_as,rq_v3,1,recursive_character_splitter,False,2,300,0,True,0.32,1.0


**- Token Count = 1**<br>
**- Prompt Type = rq_v5**<br>
**- Chunk Size = 300**

In [62]:
no_title_rag_q_as_tc_1_rq_v5_cs300_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=GeneralVQAStrategiesDetails(
        countries=["spain"],
        file_types=["english"],
        use_images=[True],
        vqa_strategy_types=[VQAStrategyType.RAG_Q_AS],
        prompt_types=[RagQPromptType.V5],
        relevant_docs_count=[1, 2, 3, 4, 5],
        doc_splitter_options=[
            GeneralDocSplitterOptions(
                doc_splitter_types=[DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER],
                add_titles=[False],
                token_counts=[1],
                chunk_sizes=[300],
                chunk_overlaps=[0]
            )
        ],
        should_apply_rag_to_questions=[True]
    ).get_possible_vqa_strategy_details()
)

no_title_rag_q_as_tc_1_rq_v5_cs300_results = no_title_rag_q_as_tc_1_rq_v5_cs300_results.fillna("-")
no_title_rag_q_as_tc_1_rq_v5_cs300_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,300,0,True,0.32,1.0
1,spain,english,rag_q_as,rq_v5,2,recursive_character_splitter,False,1,300,0,True,0.296,1.0
2,spain,english,rag_q_as,rq_v5,3,recursive_character_splitter,False,1,300,0,True,0.272,1.0
3,spain,english,rag_q_as,rq_v5,4,recursive_character_splitter,False,1,300,0,True,0.296,1.0
4,spain,english,rag_q_as,rq_v5,5,recursive_character_splitter,False,1,300,0,True,0.296,1.0


In [63]:
world_med_qa_v_plot_helpers.display_bar_chart_on_evaluation_results(
    evaluation_results=no_title_rag_q_as_tc_1_rq_v5_cs300_results,
    title="[RAG-Q+As] LLaVA Model Accuracy (No Title - Token Count = 1; Prompt Type = rq_v5 - Chunk Size = 300)",
    x_axis_title="Relevant Documents Count",
    y_axis_title="Accuracy",
    x_dataframe_column_name="relevant_docs_count",
    y_dataframe_column_name="accuracy"
)

In [64]:
rc_splitter_best_rag_q_as_no_title_results_v2 = world_med_qa_v_dataset_management.get_max_accuracy_rows(
    evaluation_results=no_title_rag_q_as_tc_1_rq_v5_cs300_results
)
rc_splitter_best_rag_q_as_no_title_results_v2

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,300,0,True,0.32,1.0


**- Token Count = 1**<br>
**- Prompt Type = rq_v5**<br>
**- Chunk Size = 600**

In [65]:
no_title_rag_q_as_tc_1_rq_v5_cs600_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=GeneralVQAStrategiesDetails(
        countries=["spain"],
        file_types=["english"],
        use_images=[True],
        vqa_strategy_types=[VQAStrategyType.RAG_Q_AS],
        prompt_types=[RagQPromptType.V5],
        relevant_docs_count=[1, 2, 3, 4, 5],
        doc_splitter_options=[
            GeneralDocSplitterOptions(
                doc_splitter_types=[DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER],
                add_titles=[False],
                token_counts=[1],
                chunk_sizes=[600],
                chunk_overlaps=[0]
            )
        ],
        should_apply_rag_to_questions=[True]
    ).get_possible_vqa_strategy_details()
)

no_title_rag_q_as_tc_1_rq_v5_cs600_results = no_title_rag_q_as_tc_1_rq_v5_cs600_results.fillna("-")
no_title_rag_q_as_tc_1_rq_v5_cs600_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,600,0,True,0.32,1.0
1,spain,english,rag_q_as,rq_v5,2,recursive_character_splitter,False,1,600,0,True,0.304,1.0
2,spain,english,rag_q_as,rq_v5,3,recursive_character_splitter,False,1,600,0,True,0.304,1.0
3,spain,english,rag_q_as,rq_v5,4,recursive_character_splitter,False,1,600,0,True,0.304,1.0
4,spain,english,rag_q_as,rq_v5,5,recursive_character_splitter,False,1,600,0,True,0.304,1.0


In [66]:
world_med_qa_v_plot_helpers.display_bar_chart_on_evaluation_results(
    evaluation_results=no_title_rag_q_as_tc_1_rq_v5_cs600_results,
    title="[RAG-Q+As] LLaVA Model Accuracy (No Title - Token Count = 1; Prompt Type = rq_v5 - Chunk Size = 600)",
    x_axis_title="Relevant Documents Count",
    y_axis_title="Accuracy",
    x_dataframe_column_name="relevant_docs_count",
    y_dataframe_column_name="accuracy"
)

In [67]:
rc_splitter_best_rag_q_as_no_title_results_v3 = world_med_qa_v_dataset_management.get_max_accuracy_rows(
    evaluation_results=no_title_rag_q_as_tc_1_rq_v5_cs600_results
)
rc_splitter_best_rag_q_as_no_title_results_v3

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,600,0,True,0.32,1.0


**- Token Count = 2**<br>
**- Prompt Type = rq_v5**<br>
**- Chunk Size = 600**

In [68]:
no_title_rag_q_as_tc_2_rq_v5_cs600_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=GeneralVQAStrategiesDetails(
        countries=["spain"],
        file_types=["english"],
        use_images=[True],
        vqa_strategy_types=[VQAStrategyType.RAG_Q_AS],
        prompt_types=[RagQPromptType.V5],
        relevant_docs_count=[1, 2, 3, 4, 5],
        doc_splitter_options=[
            GeneralDocSplitterOptions(
                doc_splitter_types=[DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER],
                add_titles=[False],
                token_counts=[2],
                chunk_sizes=[600],
                chunk_overlaps=[0]
            )
        ],
        should_apply_rag_to_questions=[True]
    ).get_possible_vqa_strategy_details()
)

no_title_rag_q_as_tc_2_rq_v5_cs600_results = no_title_rag_q_as_tc_2_rq_v5_cs600_results.fillna("-")
no_title_rag_q_as_tc_2_rq_v5_cs600_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,2,600,0,True,0.32,1.0
1,spain,english,rag_q_as,rq_v5,2,recursive_character_splitter,False,2,600,0,True,0.304,1.0
2,spain,english,rag_q_as,rq_v5,3,recursive_character_splitter,False,2,600,0,True,0.304,1.0
3,spain,english,rag_q_as,rq_v5,4,recursive_character_splitter,False,2,600,0,True,0.304,1.0
4,spain,english,rag_q_as,rq_v5,5,recursive_character_splitter,False,2,600,0,True,0.304,1.0


In [69]:
world_med_qa_v_plot_helpers.display_bar_chart_on_evaluation_results(
    evaluation_results=no_title_rag_q_as_tc_2_rq_v5_cs600_results,
    title="[RAG-Q+As] LLaVA Model Accuracy (No Title - Token Count = 2; Prompt Type = rq_v5 - Chunk Size = 600)",
    x_axis_title="Relevant Documents Count",
    y_axis_title="Accuracy",
    x_dataframe_column_name="relevant_docs_count",
    y_dataframe_column_name="accuracy"
)

In [70]:
rc_splitter_best_rag_q_as_no_title_results_v4 = world_med_qa_v_dataset_management.get_max_accuracy_rows(
    evaluation_results=no_title_rag_q_as_tc_2_rq_v5_cs600_results
)
rc_splitter_best_rag_q_as_no_title_results_v4

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,2,600,0,True,0.32,1.0


**- Best Combination**

In [71]:
rc_splitter_best_rag_q_as_no_title_results = world_med_qa_v_dataset_management.get_max_accuracy_rows(
    evaluation_results=pd.concat([
        rc_splitter_best_rag_q_as_no_title_results_v1,
        rc_splitter_best_rag_q_as_no_title_results_v2,
        rc_splitter_best_rag_q_as_no_title_results_v3,
        rc_splitter_best_rag_q_as_no_title_results_v4
    ])
)
rc_splitter_best_rag_q_as_no_title_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q_as,rq_v3,1,recursive_character_splitter,False,2,300,0,True,0.32,1.0
0,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,300,0,True,0.32,1.0
0,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,600,0,True,0.32,1.0
0,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,2,600,0,True,0.32,1.0


###### 3.1.3.3.2. With Title

**- Relevant Documents Count = 1**

In [72]:
evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=GeneralVQAStrategiesDetails(
        countries=["spain"],
        file_types=["english"],
        use_images=[True],
        vqa_strategy_types=[VQAStrategyType.RAG_Q_AS],
        prompt_types=list(RagQPromptType),
        relevant_docs_count=[1],
        doc_splitter_options=[
            GeneralDocSplitterOptions(
                doc_splitter_types=[DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER],
                add_titles=[True],
                token_counts=[1, 2],
                chunk_sizes=[300, 600, 900],
                chunk_overlaps=[0]
            )
        ],
        should_apply_rag_to_questions=[True]
    ).get_possible_vqa_strategy_details()
)

evaluation_results = evaluation_results.fillna("-")
evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q_as,rq_v1,1,recursive_character_splitter,True,1,300,0,True,0.264,1.0
1,spain,english,rag_q_as,rq_v1,1,recursive_character_splitter,True,1,600,0,True,0.256,1.0
2,spain,english,rag_q_as,rq_v1,1,recursive_character_splitter,True,1,900,0,True,0.176,0.8
3,spain,english,rag_q_as,rq_v1,1,recursive_character_splitter,True,2,300,0,True,0.264,1.0
4,spain,english,rag_q_as,rq_v1,1,recursive_character_splitter,True,2,600,0,True,0.12,0.376
5,spain,english,rag_q_as,rq_v1,1,recursive_character_splitter,True,2,900,0,True,0.112,0.472
6,spain,english,rag_q_as,rq_v2,1,recursive_character_splitter,True,1,300,0,True,0.296,1.0
7,spain,english,rag_q_as,rq_v2,1,recursive_character_splitter,True,1,600,0,True,0.256,1.0
8,spain,english,rag_q_as,rq_v2,1,recursive_character_splitter,True,1,900,0,True,0.168,0.776
9,spain,english,rag_q_as,rq_v2,1,recursive_character_splitter,True,2,300,0,True,0.264,1.0


In [73]:
world_med_qa_v_plot_helpers.plot_rag_q_evaluation_results_by_groups(
    title="[RAG (Question and Answers) - With Title - Recursive Character Splitter - RDC = 1] Analysis of LLaVA Model Accuracy",
    evaluation_results=evaluation_results,
    row_variable='token_count',
    column_variable='chunk_size',
    bar_graph_variable='prompt_type'
)

In [74]:
with_title_rag_q_as_rdc1_results = world_med_qa_v_dataset_management.get_max_accuracy_rows(
    evaluation_results=evaluation_results
)
with_title_rag_q_as_rdc1_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
33,spain,english,rag_q_as,rq_v6,1,recursive_character_splitter,True,2,300,0,True,0.312,1.0


**- Token Count = 2**<br>
**- Prompt Type = rq_v6**<br>
**- Chunk Size = 300**

In [75]:
with_title_rag_q_as_tc_2_rq_v6_cs300_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=GeneralVQAStrategiesDetails(
        countries=["spain"],
        file_types=["english"],
        use_images=[True],
        vqa_strategy_types=[VQAStrategyType.RAG_Q_AS],
        prompt_types=[RagQPromptType.V6],
        relevant_docs_count=[1, 2, 3, 4, 5],
        doc_splitter_options=[
            GeneralDocSplitterOptions(
                doc_splitter_types=[DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER],
                add_titles=[True],
                token_counts=[2],
                chunk_sizes=[300],
                chunk_overlaps=[0]
            )
        ],
        should_apply_rag_to_questions=[True]
    ).get_possible_vqa_strategy_details()
)

with_title_rag_q_as_tc_2_rq_v6_cs300_results = with_title_rag_q_as_tc_2_rq_v6_cs300_results.fillna("-")
with_title_rag_q_as_tc_2_rq_v6_cs300_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q_as,rq_v6,1,recursive_character_splitter,True,2,300,0,True,0.312,1.0
1,spain,english,rag_q_as,rq_v6,2,recursive_character_splitter,True,2,300,0,True,0.288,1.0
2,spain,english,rag_q_as,rq_v6,3,recursive_character_splitter,True,2,300,0,True,0.288,1.0
3,spain,english,rag_q_as,rq_v6,4,recursive_character_splitter,True,2,300,0,True,0.288,1.0
4,spain,english,rag_q_as,rq_v6,5,recursive_character_splitter,True,2,300,0,True,0.288,1.0


In [76]:
world_med_qa_v_plot_helpers.display_bar_chart_on_evaluation_results(
    evaluation_results=with_title_rag_q_as_tc_2_rq_v6_cs300_results,
    title="[RAG-Q+As] LLaVA Model Accuracy (With Title - Token Count = 2; Prompt Type = rq_v6 - Chunk Size = 300)",
    x_axis_title="Relevant Documents Count",
    y_axis_title="Accuracy",
    x_dataframe_column_name="relevant_docs_count",
    y_dataframe_column_name="accuracy"
)

In [77]:
rc_splitter_best_rag_q_as_with_title_results = world_med_qa_v_dataset_management.get_max_accuracy_rows(
    evaluation_results=with_title_rag_q_as_tc_2_rq_v6_cs300_results
)
rc_splitter_best_rag_q_as_with_title_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q_as,rq_v6,1,recursive_character_splitter,True,2,300,0,True,0.312,1.0


###### 3.1.3.3.3. Best Configuration

\* From the best RAG (Question and Answers) results only the best experiment will be considered for further experiments

In [78]:
rc_splitter_best_rag_q_as_results = pd.concat(
    [
        rc_splitter_best_rag_q_as_no_title_results,
        rc_splitter_best_rag_q_as_with_title_results
    ],
    ignore_index=True
)
rc_splitter_best_rag_q_as_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q_as,rq_v3,1,recursive_character_splitter,False,2,300,0,True,0.32,1.0
1,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,300,0,True,0.32,1.0
2,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,600,0,True,0.32,1.0
3,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,2,600,0,True,0.32,1.0
4,spain,english,rag_q_as,rq_v6,1,recursive_character_splitter,True,2,300,0,True,0.312,1.0


In [79]:
rc_splitter_best_rag_q_as_result = rc_splitter_best_rag_q_as_results[
    rc_splitter_best_rag_q_as_results['accuracy'] == rc_splitter_best_rag_q_as_results['accuracy'].max()
]
rc_splitter_best_rag_q_as_result

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q_as,rq_v3,1,recursive_character_splitter,False,2,300,0,True,0.32,1.0
1,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,300,0,True,0.32,1.0
2,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,600,0,True,0.32,1.0
3,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,2,600,0,True,0.32,1.0


\* Given the fact that there is a tie for various configurations, only one will be selected for further experiments based on the following priority criteria (in ascending order):

1. Token Count
2. Chunk Size
3. Prompt Type

In [80]:
rc_splitter_best_rag_q_as_result = rc_splitter_best_rag_q_as_result.sort_values(
    by=['token_count', 'chunk_size', 'prompt_type'], ignore_index=True
)
rc_splitter_best_rag_q_as_result

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,300,0,True,0.32,1.0
1,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,600,0,True,0.32,1.0
2,spain,english,rag_q_as,rq_v3,1,recursive_character_splitter,False,2,300,0,True,0.32,1.0
3,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,2,600,0,True,0.32,1.0


In [81]:
rc_splitter_best_rag_q_as_result = rc_splitter_best_rag_q_as_result.head(1)
rc_splitter_best_rag_q_as_result

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,300,0,True,0.32,1.0


#### 3.1.4 Best Results Summary

In [82]:
spanish_english_world_med_qa_v_evaluation_results = pd.DataFrame({
    'country': ['spain'],
    'file_type': ['english'],
    'vqa_strategy_type': ['Matos et al. (2024)'],
    'prompt_type': ['-'],
    'relevant_docs_count': ['-'],
    'doc_splitter': ['-'],
    'add_title': ['-'],
    'token_count': ['-'],
    'chunk_size': ['-'],
    'chunk_overlap': ['-'],
    'should_apply_rag_to_question': ['-'],
    'accuracy': [0.45],
    'well_formatted_answers': [1.0]
})

spanish_english_world_med_qa_v_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,Matos et al. (2024),-,-,-,-,-,-,-,-,0.45,1.0


In [83]:
world_med_qa_v_plot_helpers.display_evaluation_results_summary(
    evaluation_results_list=[
        spanish_english_world_med_qa_v_evaluation_results,
        best_zero_shot_result,
        rc_splitter_best_rag_q_result,
        rc_splitter_best_rag_not_q_as_result,
        rc_splitter_best_rag_q_as_result,
    ],
    excluded_indexes=[0],
    include_green_highlight=True
)

Country,File Type,VQA Strategy,Prompt Type,Relevant Document Count,Title,Token Count,Chunk Size,Accuracy
Spain,English,Matos et al. (2024),-,-,-,-,-,0.45
Spain,English,Zero-Shot,zs_v2,-,-,-,-,0.304
Spain,English,RAG Q,rq_v6,3,No,2,300,0.312
Spain,English,RAG Q+As (Answers Only),rq_v5,1,No,2,600,0.32
Spain,English,RAG Q+As (Question and Answers),rq_v5,1,No,1,300,0.32


### 3.2. Test Subset Results

#### 3.2.1. English Translation Splits

##### 3.2.1.1. Portuguese

In [84]:
portuguese_english_zero_shot_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="brazil",
            file_type="english",
            use_image=True,
            vqa_strategy_type=VQAStrategyType.ZERO_SHOT,
            prompt_type=ZeroShotPromptType.V2
        )
    ]
)

portuguese_english_zero_shot_evaluation_results = portuguese_english_zero_shot_evaluation_results.fillna("-")
portuguese_english_zero_shot_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,brazil,english,zero_shot,zs_v2,-,-,-,-,-,-,-,0.337079,1.0


In [85]:
portuguese_english_rag_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="brazil",
            file_type="english",
            use_image=True,
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V6,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=300,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="brazil",
            file_type="english",
            use_image=True,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=600,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=False
        ),
        VQAStrategyDetail(
            country="brazil",
            file_type="english",
            use_image=True,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=1,
                chunk_size=300,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=True
        )
    ]
)

portuguese_english_rag_evaluation_results = portuguese_english_rag_evaluation_results.fillna("-")
portuguese_english_rag_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,brazil,english,rag_q,rq_v6,3,recursive_character_splitter,False,2,300,0,-,0.337079,1.0
1,brazil,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,2,600,0,False,0.348315,1.0
2,brazil,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,300,0,True,0.280899,1.0


In [86]:
world_med_qa_v_plot_helpers.display_evaluation_results_summary(
    evaluation_results_list=[
        portuguese_english_zero_shot_evaluation_results,
        portuguese_english_rag_evaluation_results
    ]
)

Country,File Type,VQA Strategy,Prompt Type,Relevant Document Count,Title,Token Count,Chunk Size,Accuracy
Brazil,English,Zero-Shot,zs_v2,-,-,-,-,0.3371
Brazil,English,RAG Q,rq_v6,3,No,2,300,0.3371
Brazil,English,RAG Q+As (Answers Only),rq_v5,1,No,2,600,0.3483
Brazil,English,RAG Q+As (Question and Answers),rq_v5,1,No,1,300,0.2809


##### 3.2.1.2. Hebrew

In [87]:
hebrew_english_zero_shot_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="israel",
            file_type="english",
            use_image=True,
            vqa_strategy_type=VQAStrategyType.ZERO_SHOT,
            prompt_type=ZeroShotPromptType.V2
        )
    ]
)

hebrew_english_zero_shot_evaluation_results = hebrew_english_zero_shot_evaluation_results.fillna("-")
hebrew_english_zero_shot_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,israel,english,zero_shot,zs_v2,-,-,-,-,-,-,-,0.232432,0.994595


In [88]:
hebrew_english_rag_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="israel",
            file_type="english",
            use_image=True,
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V6,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=300,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="israel",
            file_type="english",
            use_image=True,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=600,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=False
        ),
        VQAStrategyDetail(
            country="israel",
            file_type="english",
            use_image=True,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=1,
                chunk_size=300,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=True
        )
    ]
)

hebrew_english_rag_evaluation_results = hebrew_english_rag_evaluation_results.fillna("-")
hebrew_english_rag_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,israel,english,rag_q,rq_v6,3,recursive_character_splitter,False,2,300,0,-,0.237838,1.0
1,israel,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,2,600,0,False,0.254054,1.0
2,israel,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,300,0,True,0.248649,1.0


In [89]:
world_med_qa_v_plot_helpers.display_evaluation_results_summary(
    evaluation_results_list=[
        hebrew_english_zero_shot_evaluation_results,
        hebrew_english_rag_evaluation_results
    ]
)

Country,File Type,VQA Strategy,Prompt Type,Relevant Document Count,Title,Token Count,Chunk Size,Accuracy
Israel,English,Zero-Shot,zs_v2,-,-,-,-,0.2324
Israel,English,RAG Q,rq_v6,3,No,2,300,0.2378
Israel,English,RAG Q+As (Answers Only),rq_v5,1,No,2,600,0.2541
Israel,English,RAG Q+As (Question and Answers),rq_v5,1,No,1,300,0.2486


##### 3.2.1.3. Japanese

In [90]:
japanese_english_zero_shot_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="japan",
            file_type="english",
            use_image=True,
            vqa_strategy_type=VQAStrategyType.ZERO_SHOT,
            prompt_type=ZeroShotPromptType.V2
        )
    ]
)

japanese_english_zero_shot_evaluation_results = japanese_english_zero_shot_evaluation_results.fillna("-")
japanese_english_zero_shot_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,japan,english,zero_shot,zs_v2,-,-,-,-,-,-,-,0.27381,1.0


In [91]:
japanese_english_rag_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="japan",
            file_type="english",
            use_image=True,
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V6,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=300,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="japan",
            file_type="english",
            use_image=True,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=600,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=False
        ),
        VQAStrategyDetail(
            country="japan",
            file_type="english",
            use_image=True,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=1,
                chunk_size=300,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=True
        )
    ]
)

japanese_english_rag_evaluation_results = japanese_english_rag_evaluation_results.fillna("-")
japanese_english_rag_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,japan,english,rag_q,rq_v6,3,recursive_character_splitter,False,2,300,0,-,0.279762,1.0
1,japan,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,2,600,0,False,0.345238,1.0
2,japan,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,300,0,True,0.392857,1.0


In [92]:
world_med_qa_v_plot_helpers.display_evaluation_results_summary(
    evaluation_results_list=[
        japanese_english_zero_shot_evaluation_results,
        japanese_english_rag_evaluation_results
    ]
)

Country,File Type,VQA Strategy,Prompt Type,Relevant Document Count,Title,Token Count,Chunk Size,Accuracy
Japan,English,Zero-Shot,zs_v2,-,-,-,-,0.2738
Japan,English,RAG Q,rq_v6,3,No,2,300,0.2798
Japan,English,RAG Q+As (Answers Only),rq_v5,1,No,2,600,0.3452
Japan,English,RAG Q+As (Question and Answers),rq_v5,1,No,1,300,0.3929


##### 3.2.1.4. Results Summary

In [93]:
world_med_qa_v_plot_helpers.display_test_results_summary(
    evaluation_results_list=[
        portuguese_english_zero_shot_evaluation_results,
        portuguese_english_rag_evaluation_results,
        hebrew_english_zero_shot_evaluation_results,
        hebrew_english_rag_evaluation_results,
        japanese_english_zero_shot_evaluation_results,
        japanese_english_rag_evaluation_results
    ]
)

Unnamed: 0,Zero-Shot,RAG (Question Only),RAG (Answers Only),RAG (Question and Answers)
Brazil,0.3371,0.3371,0.3483,0.2809
Israel,0.2324,0.2378,0.2541,0.2486
Japan,0.2738,0.2798,0.3452,0.3929
Mean Accuracy,0.2811,0.2849,0.3159,0.3075


#### 3.2.2. Local Language Splits

##### 3.2.2.1. Spanish

In [94]:
spanish_local_zero_shot_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="spain",
            file_type="local",
            use_image=True,
            vqa_strategy_type=VQAStrategyType.ZERO_SHOT,
            prompt_type=ZeroShotPromptType.V2
        )
    ]
)

spanish_local_zero_shot_evaluation_results = spanish_local_zero_shot_evaluation_results.fillna("-")
spanish_local_zero_shot_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,local,zero_shot,zs_v2,-,-,-,-,-,-,-,0.28,1.0


In [95]:
spanish_local_rag_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="spain",
            file_type="local",
            use_image=True,
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V6,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=300,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="local",
            use_image=True,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=600,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=False
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="local",
            use_image=True,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=1,
                chunk_size=300,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=True
        )
    ]
)

spanish_local_rag_evaluation_results = spanish_local_rag_evaluation_results.fillna("-")
spanish_local_rag_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,local,rag_q,rq_v6,3,recursive_character_splitter,False,2,300,0,-,0.256,1.0
1,spain,local,rag_q_as,rq_v5,1,recursive_character_splitter,False,2,600,0,False,0.248,1.0
2,spain,local,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,300,0,True,0.256,1.0


In [96]:
world_med_qa_v_plot_helpers.display_evaluation_results_summary(
    evaluation_results_list=[
        spanish_local_zero_shot_evaluation_results,
        spanish_local_rag_evaluation_results
    ]
)

Country,File Type,VQA Strategy,Prompt Type,Relevant Document Count,Title,Token Count,Chunk Size,Accuracy
Spain,Local,Zero-Shot,zs_v2,-,-,-,-,0.28
Spain,Local,RAG Q,rq_v6,3,No,2,300,0.256
Spain,Local,RAG Q+As (Answers Only),rq_v5,1,No,2,600,0.248
Spain,Local,RAG Q+As (Question and Answers),rq_v5,1,No,1,300,0.256


##### 3.2.2.2. Portuguese

In [97]:
portuguese_local_zero_shot_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="brazil",
            file_type="local",
            use_image=True,
            vqa_strategy_type=VQAStrategyType.ZERO_SHOT,
            prompt_type=ZeroShotPromptType.V2
        )
    ]
)

portuguese_local_zero_shot_evaluation_results = portuguese_local_zero_shot_evaluation_results.fillna("-")
portuguese_local_zero_shot_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,brazil,local,zero_shot,zs_v2,-,-,-,-,-,-,-,0.280899,1.0


In [98]:
portuguese_local_rag_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="brazil",
            file_type="local",
            use_image=True,
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V6,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=300,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="brazil",
            file_type="local",
            use_image=True,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=600,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=False
        ),
        VQAStrategyDetail(
            country="brazil",
            file_type="local",
            use_image=True,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=1,
                chunk_size=300,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=True
        )
    ]
)

portuguese_local_rag_evaluation_results = portuguese_local_rag_evaluation_results.fillna("-")
portuguese_local_rag_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,brazil,local,rag_q,rq_v6,3,recursive_character_splitter,False,2,300,0,-,0.303371,1.0
1,brazil,local,rag_q_as,rq_v5,1,recursive_character_splitter,False,2,600,0,False,0.280899,1.0
2,brazil,local,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,300,0,True,0.280899,1.0


In [99]:
world_med_qa_v_plot_helpers.display_evaluation_results_summary(
    evaluation_results_list=[
        portuguese_local_zero_shot_evaluation_results,
        portuguese_local_rag_evaluation_results
    ]
)

Country,File Type,VQA Strategy,Prompt Type,Relevant Document Count,Title,Token Count,Chunk Size,Accuracy
Brazil,Local,Zero-Shot,zs_v2,-,-,-,-,0.2809
Brazil,Local,RAG Q,rq_v6,3,No,2,300,0.3034
Brazil,Local,RAG Q+As (Answers Only),rq_v5,1,No,2,600,0.2809
Brazil,Local,RAG Q+As (Question and Answers),rq_v5,1,No,1,300,0.2809


##### 3.2.2.3. Hebrew

In [100]:
hebrew_local_zero_shot_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="israel",
            file_type="local",
            use_image=True,
            vqa_strategy_type=VQAStrategyType.ZERO_SHOT,
            prompt_type=ZeroShotPromptType.V2
        )
    ]
)

hebrew_local_zero_shot_evaluation_results = hebrew_local_zero_shot_evaluation_results.fillna("-")
hebrew_local_zero_shot_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,israel,local,zero_shot,zs_v2,-,-,-,-,-,-,-,0.227027,1.0


In [101]:
hebrew_local_rag_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="israel",
            file_type="local",
            use_image=True,
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V6,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=300,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="israel",
            file_type="local",
            use_image=True,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=600,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=False
        ),
        VQAStrategyDetail(
            country="israel",
            file_type="local",
            use_image=True,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=1,
                chunk_size=300,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=True
        )
    ]
)

hebrew_local_rag_evaluation_results = hebrew_local_rag_evaluation_results.fillna("-")
hebrew_local_rag_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,israel,local,rag_q,rq_v6,3,recursive_character_splitter,False,2,300,0,-,0.232432,1.0
1,israel,local,rag_q_as,rq_v5,1,recursive_character_splitter,False,2,600,0,False,0.237838,1.0
2,israel,local,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,300,0,True,0.254054,1.0


In [102]:
world_med_qa_v_plot_helpers.display_evaluation_results_summary(
    evaluation_results_list=[
        hebrew_local_zero_shot_evaluation_results,
        hebrew_local_rag_evaluation_results
    ]
)

Country,File Type,VQA Strategy,Prompt Type,Relevant Document Count,Title,Token Count,Chunk Size,Accuracy
Israel,Local,Zero-Shot,zs_v2,-,-,-,-,0.227
Israel,Local,RAG Q,rq_v6,3,No,2,300,0.2324
Israel,Local,RAG Q+As (Answers Only),rq_v5,1,No,2,600,0.2378
Israel,Local,RAG Q+As (Question and Answers),rq_v5,1,No,1,300,0.2541


##### 3.2.2.4. Japanese

In [103]:
japanese_local_zero_shot_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="japan",
            file_type="local",
            use_image=True,
            vqa_strategy_type=VQAStrategyType.ZERO_SHOT,
            prompt_type=ZeroShotPromptType.V2
        )
    ]
)

japanese_local_zero_shot_evaluation_results = japanese_local_zero_shot_evaluation_results.fillna("-")
japanese_local_zero_shot_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,japan,local,zero_shot,zs_v2,-,-,-,-,-,-,-,0.208333,0.958333


In [104]:
japanese_local_rag_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="japan",
            file_type="local",
            use_image=True,
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V6,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=300,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="japan",
            file_type="local",
            use_image=True,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=600,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=False
        ),
        VQAStrategyDetail(
            country="japan",
            file_type="local",
            use_image=True,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=1,
                chunk_size=300,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=True
        )
    ]
)

japanese_local_rag_evaluation_results = japanese_local_rag_evaluation_results.fillna("-")
japanese_local_rag_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,japan,local,rag_q,rq_v6,3,recursive_character_splitter,False,2,300,0,-,0.261905,0.982143
1,japan,local,rag_q_as,rq_v5,1,recursive_character_splitter,False,2,600,0,False,0.261905,0.988095
2,japan,local,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,300,0,True,0.244048,0.988095


In [105]:
world_med_qa_v_plot_helpers.display_evaluation_results_summary(
    evaluation_results_list=[
        japanese_local_zero_shot_evaluation_results,
        japanese_local_rag_evaluation_results
    ]
)

Country,File Type,VQA Strategy,Prompt Type,Relevant Document Count,Title,Token Count,Chunk Size,Accuracy
Japan,Local,Zero-Shot,zs_v2,-,-,-,-,0.2083
Japan,Local,RAG Q,rq_v6,3,No,2,300,0.2619
Japan,Local,RAG Q+As (Answers Only),rq_v5,1,No,2,600,0.2619
Japan,Local,RAG Q+As (Question and Answers),rq_v5,1,No,1,300,0.244


##### 3.2.2.5. Results Summary

In [106]:
world_med_qa_v_plot_helpers.display_test_results_summary(
    evaluation_results_list=[
        spanish_local_zero_shot_evaluation_results,
        spanish_local_rag_evaluation_results,
        portuguese_local_zero_shot_evaluation_results,
        portuguese_local_rag_evaluation_results,
        hebrew_local_zero_shot_evaluation_results,
        hebrew_local_rag_evaluation_results,
        japanese_local_zero_shot_evaluation_results,
        japanese_local_rag_evaluation_results
    ]
)

Unnamed: 0,Zero-Shot,RAG (Question Only),RAG (Answers Only),RAG (Question and Answers)
Spain,0.28,0.256,0.248,0.256
Brazil,0.2809,0.3034,0.2809,0.2809
Israel,0.227,0.2324,0.2378,0.2541
Japan,0.2083,0.2619,0.2619,0.244
Mean Accuracy,0.2491,0.2634,0.2572,0.2588


### 3.3. Test Best Configurations without Context-Image

#### 3.3.1. English Translation Splits

##### 3.3.1.1. Spanish

In [107]:
spanish_english_no_image_zero_shot_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.ZERO_SHOT,
            prompt_type=ZeroShotPromptType.V2
        )
    ]
)

spanish_english_no_image_zero_shot_evaluation_results = spanish_english_no_image_zero_shot_evaluation_results.fillna("-")
spanish_english_no_image_zero_shot_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,zero_shot,zs_v2,-,-,-,-,-,-,-,0.288,1.0


In [108]:
spanish_english_no_image_rag_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V6,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=300,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=600,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=False
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=1,
                chunk_size=300,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=True
        )
    ]
)

spanish_english_no_image_rag_evaluation_results = spanish_english_no_image_rag_evaluation_results.fillna("-")
spanish_english_no_image_rag_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,english,rag_q,rq_v6,3,recursive_character_splitter,False,2,300,0,-,0.312,1.0
1,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,2,600,0,False,0.304,1.0
2,spain,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,300,0,True,0.288,1.0


In [109]:
world_med_qa_v_plot_helpers.display_evaluation_results_summary(
    evaluation_results_list=[
        spanish_english_no_image_zero_shot_evaluation_results,
        spanish_english_no_image_rag_evaluation_results
    ]
)

Country,File Type,VQA Strategy,Prompt Type,Relevant Document Count,Title,Token Count,Chunk Size,Accuracy
Spain,English,Zero-Shot,zs_v2,-,-,-,-,0.288
Spain,English,RAG Q,rq_v6,3,No,2,300,0.312
Spain,English,RAG Q+As (Answers Only),rq_v5,1,No,2,600,0.304
Spain,English,RAG Q+As (Question and Answers),rq_v5,1,No,1,300,0.288


##### 3.3.1.2. Portuguese

In [110]:
portuguese_english_no_image_zero_shot_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="brazil",
            file_type="english",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.ZERO_SHOT,
            prompt_type=ZeroShotPromptType.V2
        )
    ]
)

portuguese_english_no_image_zero_shot_evaluation_results = portuguese_english_no_image_zero_shot_evaluation_results.fillna("-")
portuguese_english_no_image_zero_shot_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,brazil,english,zero_shot,zs_v2,-,-,-,-,-,-,-,0.314607,1.0


In [111]:
portuguese_english_no_image_rag_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="brazil",
            file_type="english",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V6,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=300,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="brazil",
            file_type="english",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=600,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=False
        ),
        VQAStrategyDetail(
            country="brazil",
            file_type="english",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=1,
                chunk_size=300,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=True
        )
    ]
)

portuguese_english_no_image_rag_evaluation_results = portuguese_english_no_image_rag_evaluation_results.fillna("-")
portuguese_english_no_image_rag_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,brazil,english,rag_q,rq_v6,3,recursive_character_splitter,False,2,300,0,-,0.325843,1.0
1,brazil,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,2,600,0,False,0.370787,1.0
2,brazil,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,300,0,True,0.348315,1.0


In [112]:
world_med_qa_v_plot_helpers.display_evaluation_results_summary(
    evaluation_results_list=[
        portuguese_english_no_image_zero_shot_evaluation_results,
        portuguese_english_no_image_rag_evaluation_results
    ]
)

Country,File Type,VQA Strategy,Prompt Type,Relevant Document Count,Title,Token Count,Chunk Size,Accuracy
Brazil,English,Zero-Shot,zs_v2,-,-,-,-,0.3146
Brazil,English,RAG Q,rq_v6,3,No,2,300,0.3258
Brazil,English,RAG Q+As (Answers Only),rq_v5,1,No,2,600,0.3708
Brazil,English,RAG Q+As (Question and Answers),rq_v5,1,No,1,300,0.3483


##### 3.3.1.3. Hebrew

In [113]:
hebrew_english_no_image_zero_shot_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="israel",
            file_type="english",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.ZERO_SHOT,
            prompt_type=ZeroShotPromptType.V2
        )
    ]
)

hebrew_english_no_image_zero_shot_evaluation_results = hebrew_english_no_image_zero_shot_evaluation_results.fillna("-")
hebrew_english_no_image_zero_shot_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,israel,english,zero_shot,zs_v2,-,-,-,-,-,-,-,0.227027,0.994595


In [114]:
hebrew_english_no_image_rag_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="israel",
            file_type="english",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V6,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=300,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="israel",
            file_type="english",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=600,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=False
        ),
        VQAStrategyDetail(
            country="israel",
            file_type="english",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=1,
                chunk_size=300,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=True
        )
    ]
)

hebrew_english_no_image_rag_evaluation_results = hebrew_english_no_image_rag_evaluation_results.fillna("-")
hebrew_english_no_image_rag_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,israel,english,rag_q,rq_v6,3,recursive_character_splitter,False,2,300,0,-,0.232432,1.0
1,israel,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,2,600,0,False,0.254054,1.0
2,israel,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,300,0,True,0.243243,1.0


In [115]:
world_med_qa_v_plot_helpers.display_evaluation_results_summary(
    evaluation_results_list=[
        hebrew_english_no_image_zero_shot_evaluation_results,
        hebrew_english_no_image_rag_evaluation_results
    ]
)

Country,File Type,VQA Strategy,Prompt Type,Relevant Document Count,Title,Token Count,Chunk Size,Accuracy
Israel,English,Zero-Shot,zs_v2,-,-,-,-,0.227
Israel,English,RAG Q,rq_v6,3,No,2,300,0.2324
Israel,English,RAG Q+As (Answers Only),rq_v5,1,No,2,600,0.2541
Israel,English,RAG Q+As (Question and Answers),rq_v5,1,No,1,300,0.2432


##### 3.3.1.4. Japanese

In [116]:
japanese_english_no_image_zero_shot_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="japan",
            file_type="english",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.ZERO_SHOT,
            prompt_type=ZeroShotPromptType.V2
        )
    ]
)

japanese_english_no_image_zero_shot_evaluation_results = japanese_english_no_image_zero_shot_evaluation_results.fillna("-")
japanese_english_no_image_zero_shot_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,japan,english,zero_shot,zs_v2,-,-,-,-,-,-,-,0.27381,1.0


In [117]:
japanese_english_no_image_rag_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="japan",
            file_type="english",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V6,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=300,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="japan",
            file_type="english",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=600,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=False
        ),
        VQAStrategyDetail(
            country="japan",
            file_type="english",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=1,
                chunk_size=300,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=True
        )
    ]
)

japanese_english_no_image_rag_evaluation_results = japanese_english_no_image_rag_evaluation_results.fillna("-")
japanese_english_no_image_rag_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,japan,english,rag_q,rq_v6,3,recursive_character_splitter,False,2,300,0,-,0.291667,1.0
1,japan,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,2,600,0,False,0.369048,1.0
2,japan,english,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,300,0,True,0.380952,1.0


In [118]:
world_med_qa_v_plot_helpers.display_evaluation_results_summary(
    evaluation_results_list=[
        japanese_english_no_image_zero_shot_evaluation_results,
        japanese_english_no_image_rag_evaluation_results
    ]
)

Country,File Type,VQA Strategy,Prompt Type,Relevant Document Count,Title,Token Count,Chunk Size,Accuracy
Japan,English,Zero-Shot,zs_v2,-,-,-,-,0.2738
Japan,English,RAG Q,rq_v6,3,No,2,300,0.2917
Japan,English,RAG Q+As (Answers Only),rq_v5,1,No,2,600,0.369
Japan,English,RAG Q+As (Question and Answers),rq_v5,1,No,1,300,0.381


##### 3.3.1.5. Results Summary

In [119]:
world_med_qa_v_plot_helpers.display_test_results_summary(
    evaluation_results_list=[
        spanish_english_no_image_zero_shot_evaluation_results,
        spanish_english_no_image_rag_evaluation_results,
        portuguese_english_no_image_zero_shot_evaluation_results,
        portuguese_english_no_image_rag_evaluation_results,
        hebrew_english_no_image_zero_shot_evaluation_results,
        hebrew_english_no_image_rag_evaluation_results,
        japanese_english_no_image_zero_shot_evaluation_results,
        japanese_english_no_image_rag_evaluation_results
    ]
)

Unnamed: 0,Zero-Shot,RAG (Question Only),RAG (Answers Only),RAG (Question and Answers)
Spain,0.288,0.312,0.304,0.288
Brazil,0.3146,0.3258,0.3708,0.3483
Israel,0.227,0.2324,0.2541,0.2432
Japan,0.2738,0.2917,0.369,0.381
Mean Accuracy,0.2759,0.2905,0.3245,0.3151


#### 3.3.2. Local Language Splits

##### 3.3.2.1. Spanish

In [120]:
spanish_local_no_image_zero_shot_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="spain",
            file_type="local",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.ZERO_SHOT,
            prompt_type=ZeroShotPromptType.V2
        )
    ]
)

spanish_local_no_image_zero_shot_evaluation_results = spanish_local_no_image_zero_shot_evaluation_results.fillna("-")
spanish_local_no_image_zero_shot_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,local,zero_shot,zs_v2,-,-,-,-,-,-,-,0.272,1.0


In [121]:
spanish_local_no_image_rag_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="spain",
            file_type="local",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V6,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=300,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="local",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=600,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=False
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="local",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=1,
                chunk_size=300,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=True
        )
    ]
)

spanish_local_no_image_rag_evaluation_results = spanish_local_no_image_rag_evaluation_results.fillna("-")
spanish_local_no_image_rag_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,spain,local,rag_q,rq_v6,3,recursive_character_splitter,False,2,300,0,-,0.288,1.0
1,spain,local,rag_q_as,rq_v5,1,recursive_character_splitter,False,2,600,0,False,0.256,1.0
2,spain,local,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,300,0,True,0.296,1.0


In [122]:
world_med_qa_v_plot_helpers.display_evaluation_results_summary(
    evaluation_results_list=[
        spanish_local_no_image_zero_shot_evaluation_results,
        spanish_local_no_image_rag_evaluation_results
    ]
)

Country,File Type,VQA Strategy,Prompt Type,Relevant Document Count,Title,Token Count,Chunk Size,Accuracy
Spain,Local,Zero-Shot,zs_v2,-,-,-,-,0.272
Spain,Local,RAG Q,rq_v6,3,No,2,300,0.288
Spain,Local,RAG Q+As (Answers Only),rq_v5,1,No,2,600,0.256
Spain,Local,RAG Q+As (Question and Answers),rq_v5,1,No,1,300,0.296


##### 3.3.2.2. Portuguese

In [123]:
portuguese_local_no_image_zero_shot_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="brazil",
            file_type="local",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.ZERO_SHOT,
            prompt_type=ZeroShotPromptType.V2
        )
    ]
)

portuguese_local_no_image_zero_shot_evaluation_results = portuguese_local_no_image_zero_shot_evaluation_results.fillna("-")
portuguese_local_no_image_zero_shot_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,brazil,local,zero_shot,zs_v2,-,-,-,-,-,-,-,0.292135,1.0


In [124]:
portuguese_local_no_image_rag_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="brazil",
            file_type="local",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V6,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=300,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="brazil",
            file_type="local",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=600,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=False
        ),
        VQAStrategyDetail(
            country="brazil",
            file_type="local",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=1,
                chunk_size=300,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=True
        )
    ]
)

portuguese_local_no_image_rag_evaluation_results = portuguese_local_no_image_rag_evaluation_results.fillna("-")
portuguese_local_no_image_rag_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,brazil,local,rag_q,rq_v6,3,recursive_character_splitter,False,2,300,0,-,0.325843,1.0
1,brazil,local,rag_q_as,rq_v5,1,recursive_character_splitter,False,2,600,0,False,0.303371,1.0
2,brazil,local,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,300,0,True,0.314607,1.0


In [125]:
world_med_qa_v_plot_helpers.display_evaluation_results_summary(
    evaluation_results_list=[
        portuguese_local_no_image_zero_shot_evaluation_results,
        portuguese_local_no_image_rag_evaluation_results
    ]
)

Country,File Type,VQA Strategy,Prompt Type,Relevant Document Count,Title,Token Count,Chunk Size,Accuracy
Brazil,Local,Zero-Shot,zs_v2,-,-,-,-,0.2921
Brazil,Local,RAG Q,rq_v6,3,No,2,300,0.3258
Brazil,Local,RAG Q+As (Answers Only),rq_v5,1,No,2,600,0.3034
Brazil,Local,RAG Q+As (Question and Answers),rq_v5,1,No,1,300,0.3146


##### 3.3.2.3. Hebrew

In [126]:
hebrew_local_no_image_zero_shot_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="israel",
            file_type="local",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.ZERO_SHOT,
            prompt_type=ZeroShotPromptType.V2
        )
    ]
)

hebrew_local_no_image_zero_shot_evaluation_results = hebrew_local_no_image_zero_shot_evaluation_results.fillna("-")
hebrew_local_no_image_zero_shot_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,israel,local,zero_shot,zs_v2,-,-,-,-,-,-,-,0.227027,1.0


In [127]:
hebrew_local_no_image_rag_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="israel",
            file_type="local",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V6,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=300,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="israel",
            file_type="local",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=600,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=False
        ),
        VQAStrategyDetail(
            country="israel",
            file_type="local",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=1,
                chunk_size=300,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=True
        )
    ]
)

hebrew_local_no_image_rag_evaluation_results = hebrew_local_no_image_rag_evaluation_results.fillna("-")
hebrew_local_no_image_rag_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,israel,local,rag_q,rq_v6,3,recursive_character_splitter,False,2,300,0,-,0.237838,1.0
1,israel,local,rag_q_as,rq_v5,1,recursive_character_splitter,False,2,600,0,False,0.232432,1.0
2,israel,local,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,300,0,True,0.237838,1.0


In [128]:
world_med_qa_v_plot_helpers.display_evaluation_results_summary(
    evaluation_results_list=[
        hebrew_local_no_image_zero_shot_evaluation_results,
        hebrew_local_no_image_rag_evaluation_results
    ]
)

Country,File Type,VQA Strategy,Prompt Type,Relevant Document Count,Title,Token Count,Chunk Size,Accuracy
Israel,Local,Zero-Shot,zs_v2,-,-,-,-,0.227
Israel,Local,RAG Q,rq_v6,3,No,2,300,0.2378
Israel,Local,RAG Q+As (Answers Only),rq_v5,1,No,2,600,0.2324
Israel,Local,RAG Q+As (Question and Answers),rq_v5,1,No,1,300,0.2378


##### 3.3.2.4. Japanese

In [129]:
japanese_local_no_image_zero_shot_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="japan",
            file_type="local",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.ZERO_SHOT,
            prompt_type=ZeroShotPromptType.V2
        )
    ]
)

japanese_local_no_image_zero_shot_evaluation_results = japanese_local_no_image_zero_shot_evaluation_results.fillna("-")
japanese_local_no_image_zero_shot_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,japan,local,zero_shot,zs_v2,-,-,-,-,-,-,-,0.208333,0.958333


In [130]:
japanese_local_no_image_rag_evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="japan",
            file_type="local",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V6,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=300,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="japan",
            file_type="local",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=2,
                chunk_size=600,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=False
        ),
        VQAStrategyDetail(
            country="japan",
            file_type="local",
            use_image=False,
            vqa_strategy_type=VQAStrategyType.RAG_Q_AS,
            prompt_type=RagQPromptType.V5,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                token_count=1,
                chunk_size=300,
                chunk_overlap=0
            ),
            should_apply_rag_to_question=True
        )
    ]
)

japanese_local_no_image_rag_evaluation_results = japanese_local_no_image_rag_evaluation_results.fillna("-")
japanese_local_no_image_rag_evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,should_apply_rag_to_question,accuracy,well_formatted_answers
0,japan,local,rag_q,rq_v6,3,recursive_character_splitter,False,2,300,0,-,0.22619,1.0
1,japan,local,rag_q_as,rq_v5,1,recursive_character_splitter,False,2,600,0,False,0.25,1.0
2,japan,local,rag_q_as,rq_v5,1,recursive_character_splitter,False,1,300,0,True,0.279762,1.0


In [131]:
world_med_qa_v_plot_helpers.display_evaluation_results_summary(
    evaluation_results_list=[
        japanese_local_no_image_zero_shot_evaluation_results,
        japanese_local_no_image_rag_evaluation_results
    ]
)

Country,File Type,VQA Strategy,Prompt Type,Relevant Document Count,Title,Token Count,Chunk Size,Accuracy
Japan,Local,Zero-Shot,zs_v2,-,-,-,-,0.2083
Japan,Local,RAG Q,rq_v6,3,No,2,300,0.2262
Japan,Local,RAG Q+As (Answers Only),rq_v5,1,No,2,600,0.25
Japan,Local,RAG Q+As (Question and Answers),rq_v5,1,No,1,300,0.2798


##### 3.3.2.5. Results Summary

In [132]:
world_med_qa_v_plot_helpers.display_test_results_summary(
    evaluation_results_list=[
        spanish_local_no_image_zero_shot_evaluation_results,
        spanish_local_no_image_rag_evaluation_results,
        portuguese_local_no_image_zero_shot_evaluation_results,
        portuguese_local_no_image_rag_evaluation_results,
        hebrew_local_no_image_zero_shot_evaluation_results,
        hebrew_local_no_image_rag_evaluation_results,
        japanese_local_no_image_zero_shot_evaluation_results,
        japanese_local_no_image_rag_evaluation_results
    ]
)

Unnamed: 0,Zero-Shot,RAG (Question Only),RAG (Answers Only),RAG (Question and Answers)
Spain,0.272,0.288,0.256,0.296
Brazil,0.2921,0.3258,0.3034,0.3146
Israel,0.227,0.2378,0.2324,0.2378
Japan,0.2083,0.2262,0.25,0.2798
Mean Accuracy,0.2499,0.2695,0.2605,0.2821
