# VQA Model Result Evaluation

## 0. Environment Setup

### 0.1. Import Necessary Libraries

In [11]:
from pathlib import Path

from datasets import disable_progress_bars

import src.utils.dataset_helpers.world_med_qa_v.dataset_management as world_med_qa_v_dataset_management
import src.utils.dataset_helpers.world_med_qa_v.plot_helpers as world_med_qa_v_plot_helpers
from src.utils.data_definitions import DocSplitterOptions, GeneralDocSplitterOptions, GeneralVQAStrategiesDetails, VQAStrategyDetail
from src.utils.enums import DocumentSplitterType, RagQPromptType, VQAStrategyType, ZeroShotPromptType
from src.utils.string_formatting_helpers import to_snake_case_strategy_name
from src.utils.text_splitters.paragraph_splitter import ParagraphSplitter
from src.utils.text_splitters.recursive_character_splitter import RecursiveCharacterSplitter
from src.utils.text_splitters.spacy_sentence_splitter import SpacySentenceSplitter
from src.visual_qa_model import VisualQAModel
from src.visual_qa_strategies.base_vqa_strategy import BaseVQAStrategy
from src.visual_qa_strategies.rag_q_vqa_strategy import RagQVQAStrategy
from src.visual_qa_strategies.zero_shot_vqa_strategy import ZeroShotVQAStrategy

### 0.2. Configure Environment Settings

Detect Google Colab Form Annotation Automatically

In [2]:
%load_ext ipyform
%form_config --auto-detect 1

Enable Automatic Module Reloading

In [3]:
%load_ext autoreload
%autoreload 2

Disable Progress Bar for Dataset Filtering

In [4]:
disable_progress_bars()

## 1. Evaluation of VQA Approaches

Define Constants

In [5]:
DATASET_DIR = Path("data/WorldMedQA-V")
MODEL_NAME = "llava"
COUNTRY = "spain"
FILE_TYPE = "english"
RESULTS_DIR = Path('evaluation_results')

Load Dataset

In [6]:
world_med_qa_v_dataset = world_med_qa_v_dataset_management.load_vqa_dataset(
    data_path=DATASET_DIR,
    country=COUNTRY,
    file_type=FILE_TYPE
)
world_med_qa_v_dataset

- Loading WorldMedQA-V dataset (filename: spain_english_processed.tsv) ...
+ WorldMedQA-V dataset (filename: spain_english_processed.tsv) loaded.


Dataset({
    features: ['index', 'image', 'question', 'A', 'B', 'C', 'D', 'answer', 'correct_option', 'split'],
    num_rows: 125
})

### 1.1. Zero-Shot Evaluation

Load Model

In [7]:
llava_model = VisualQAModel(
    visual_qa_strategy=ZeroShotVQAStrategy(prompt_type=ZeroShotPromptType.V1),
    model_name=MODEL_NAME,
    country=COUNTRY,
    file_type=FILE_TYPE
)

- Loading Zero-Shot strategy ...
+ Zero-Shot strategy loaded.
- Loading Llava model (prompt template: zs_v1) ...
+ Llava model (prompt template: zs_v1) loaded.


Evaluate Model (Prompt Template: `zs_v1`)

In [10]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(5),
    save_path=RESULTS_DIR
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 5/5 [06:57<00:00, 83.53s/it]

+ Model evaluation (spain_english subset) completed.





Evaluate Model (Prompt Template: `zs_v2`)

In [9]:
llava_model.visual_qa_strategy.prompt_type = ZeroShotPromptType.V2
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(5),
    save_path=RESULTS_DIR
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 5/5 [06:20<00:00, 76.08s/it]

+ Model evaluation (spain_english subset) completed.





Evaluate Model (Prompt Template: `zs_v3`)

In [10]:
llava_model.visual_qa_strategy.prompt_type = ZeroShotPromptType.V3
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(5),
    save_path=RESULTS_DIR
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 5/5 [06:24<00:00, 76.90s/it]

+ Model evaluation (spain_english subset) completed.





### 1.2. Retrieval-Augmented Generation (RAG) Evaluation

Define Model Specific Constants

In [8]:
INDEX_DIR = Path('data/WikiMed/indexed_db')
INDEX_NAME = "Wikimed+S-PubMedBert-MS-MARCO-FullTexts"
EMBEDDING_MODEL_NAME = "pritamdeka/S-PubMedBert-MS-MARCO"
RELEVANT_DOCS_COUNT = 2

#### 1.2.1. RAG Q (Question Only)

Load Model

In [10]:
llava_model.visual_qa_strategy = RagQVQAStrategy(
    prompt_type=RagQPromptType.V1,
    index_dir=INDEX_DIR,
    index_name=INDEX_NAME,
    embedding_model_name=EMBEDDING_MODEL_NAME,
    relevant_docs_count=RELEVANT_DOCS_COUNT
)

- Loading RAG Q strategy ...
	- Loading Embeddings ...
	+ Embeddings Loaded.
	- Loading Index ...
	+ Index Loaded.
	- Loading Retriever ...
	+ Retriever Loaded.
+ RAG Q strategy loaded.
- Loading Llava model (prompt template: rq_v1) ...
+ Llava model (prompt template: rq_v1) loaded.


Evaluate Model (Prompt Template: `rq_v1`)

- Document Splitter Type: `No Document Splitter`

In [10]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [07:09<00:00, 214.66s/it]

+ Model evaluation (spain_english subset) completed.





- Document Splitter Type: `Recursive Character Splitter`

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`with_title`

In [11]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR,
    doc_splitter = RecursiveCharacterSplitter(
        token_count=2,
        chunk_size=200,
        chunk_overlap=0,
        add_title=True
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:53<00:00, 86.67s/it]

+ Model evaluation (spain_english subset) completed.





&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`no_title`

In [21]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR,
    doc_splitter = RecursiveCharacterSplitter(
        token_count=2,
        chunk_size=200,
        chunk_overlap=0,
        add_title=False
    )
)

- Evaluating model (spain_english subset) ...:   0%|          | 0/2 [00:00<?, ?it/s]

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:35<00:00, 77.52s/it]

+ Model evaluation (spain_english subset) completed.





- Document Splitter Type: `spaCy Sentence Splitter`

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`with_title`

In [25]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR,
    doc_splitter = SpacySentenceSplitter(
        token_count=2,
        add_title=True
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:45<00:00, 82.89s/it]

+ Model evaluation (spain_english subset) completed.





&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`no_title`

In [26]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR,
    doc_splitter = SpacySentenceSplitter(
        token_count=2,
        add_title=False
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:38<00:00, 79.35s/it]

+ Model evaluation (spain_english subset) completed.





- Document Splitter Type: `Paragraph Splitter`

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`with_title`

In [27]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR,
    doc_splitter = ParagraphSplitter(
        token_count=2,
        add_title=True
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [04:19<00:00, 129.72s/it]

+ Model evaluation (spain_english subset) completed.





&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`no_title`

In [28]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR,
    doc_splitter = ParagraphSplitter(
        token_count=2,
        add_title=False
    )
)

- Evaluating model (spain_english subset) ...:   0%|          | 0/2 [00:00<?, ?it/s]

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [03:48<00:00, 114.43s/it]

+ Model evaluation (spain_english subset) completed.





Evaluate Model (Prompt Template: `rq_v2`)

- Document Splitter Type: `No Document Splitter`

In [29]:
llava_model.visual_qa_strategy.prompt_type = RagQPromptType.V2
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [07:47<00:00, 233.96s/it]

+ Model evaluation (spain_english subset) completed.





- Document Splitter Type: `Recursive Character Splitter`

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`with_title`

In [30]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR,
    doc_splitter = RecursiveCharacterSplitter(
        token_count=2,
        chunk_size=200,
        chunk_overlap=0,
        add_title=True
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:38<00:00, 79.16s/it]

+ Model evaluation (spain_english subset) completed.





&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`no_title`

In [31]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR,
    doc_splitter = RecursiveCharacterSplitter(
        token_count=2,
        chunk_size=200,
        chunk_overlap=0,
        add_title=False
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:34<00:00, 77.47s/it]

+ Model evaluation (spain_english subset) completed.





- Document Splitter Type: `spaCy Sentence Splitter`

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`with_title`

In [32]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR,
    doc_splitter = SpacySentenceSplitter(
        token_count=2,
        add_title=True
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:40<00:00, 80.45s/it]

+ Model evaluation (spain_english subset) completed.





&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`no_title`

In [33]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR,
    doc_splitter = SpacySentenceSplitter(
        token_count=2,
        add_title=False
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:35<00:00, 77.74s/it]

+ Model evaluation (spain_english subset) completed.





- Document Splitter Type: `Paragraph Splitter`

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`with_title`

In [34]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR,
    doc_splitter = ParagraphSplitter(
        token_count=2,
        add_title=True
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [04:17<00:00, 128.63s/it]

+ Model evaluation (spain_english subset) completed.





&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`no_title`

In [35]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR,
    doc_splitter = ParagraphSplitter(
        token_count=2,
        add_title=False
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [03:36<00:00, 108.21s/it]

+ Model evaluation (spain_english subset) completed.





Evaluate Model (Prompt Template: `rq_v3`)

- Document Splitter Type: `No Document Splitter`

In [10]:
llava_model.visual_qa_strategy.prompt_type = RagQPromptType.V3
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [07:07<00:00, 213.70s/it]

+ Model evaluation (spain_english subset) completed.





- Document Splitter Type: `Recursive Character Splitter`

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`with_title`

In [11]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR,
    doc_splitter = RecursiveCharacterSplitter(
        token_count=2,
        chunk_size=200,
        chunk_overlap=0,
        add_title=True
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:41<00:00, 80.54s/it]

+ Model evaluation (spain_english subset) completed.





&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`no_title`

In [12]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR,
    doc_splitter = RecursiveCharacterSplitter(
        token_count=2,
        chunk_size=200,
        chunk_overlap=0,
        add_title=False
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:32<00:00, 76.36s/it]

+ Model evaluation (spain_english subset) completed.





- Document Splitter Type: `spaCy Sentence Splitter`

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`with_title`

In [13]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR,
    doc_splitter = SpacySentenceSplitter(
        token_count=2,
        add_title=True
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:38<00:00, 79.02s/it]

+ Model evaluation (spain_english subset) completed.





&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`no_title`

In [14]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR,
    doc_splitter = SpacySentenceSplitter(
        token_count=2,
        add_title=False
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:49<00:00, 84.81s/it]

+ Model evaluation (spain_english subset) completed.





- Document Splitter Type: `Paragraph Splitter`

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`with_title`

In [15]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR,
    doc_splitter = ParagraphSplitter(
        token_count=2,
        add_title=True
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [04:17<00:00, 128.51s/it]

+ Model evaluation (spain_english subset) completed.





&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`no_title`

In [16]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR,
    doc_splitter = ParagraphSplitter(
        token_count=2,
        add_title=False
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [03:46<00:00, 113.07s/it]

+ Model evaluation (spain_english subset) completed.





Evaluate Model (Prompt Template: `rq_v4`)

- Document Splitter Type: `No Document Splitter`

In [17]:
llava_model.visual_qa_strategy.prompt_type = RagQPromptType.V4
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [06:52<00:00, 206.30s/it]

+ Model evaluation (spain_english subset) completed.





- Document Splitter Type: `Recursive Character Splitter`

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`with_title`

In [18]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR,
    doc_splitter = RecursiveCharacterSplitter(
        token_count=2,
        chunk_size=200,
        chunk_overlap=0,
        add_title=True
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [03:41<00:00, 110.63s/it]

+ Model evaluation (spain_english subset) completed.





&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`no_title`

In [19]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR,
    doc_splitter = RecursiveCharacterSplitter(
        token_count=2,
        chunk_size=200,
        chunk_overlap=0,
        add_title=False
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:49<00:00, 84.97s/it]

+ Model evaluation (spain_english subset) completed.





- Document Splitter Type: `spaCy Sentence Splitter`

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`with_title`

In [20]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR,
    doc_splitter = SpacySentenceSplitter(
        token_count=2,
        add_title=True
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:53<00:00, 86.73s/it]

+ Model evaluation (spain_english subset) completed.





&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`no_title`

In [21]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR,
    doc_splitter = SpacySentenceSplitter(
        token_count=2,
        add_title=False
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [02:56<00:00, 88.44s/it]

+ Model evaluation (spain_english subset) completed.





- Document Splitter Type: `Paragraph Splitter`

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`with_title`

In [22]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR,
    doc_splitter = ParagraphSplitter(
        token_count=2,
        add_title=True
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [04:47<00:00, 143.97s/it]

+ Model evaluation (spain_english subset) completed.





&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`no_title`

In [23]:
llava_model.evaluate(
    dataset=world_med_qa_v_dataset.take(2),
    save_path=RESULTS_DIR,
    doc_splitter = ParagraphSplitter(
        token_count=2,
        add_title=False
    )
)

- Evaluating model (spain_english subset) ...: 100%|██████████| 2/2 [04:10<00:00, 125.05s/it]

+ Model evaluation (spain_english subset) completed.





#### 1.2.2. RAG Q+As (Question + Answers)

#### 1.2.3. RAG IMG (Image-Based)

#### 1.2.4. RAG DB-Reranker (Database with Reranker)

## 2. VQA Approaches Exploration

Define Model Specific Constants

In [11]:
DATASET_DIR = Path("data/WorldMedQA-V")
MODEL_NAME = "llava"
COUNTRY = "spain"
FILE_TYPE = "english"
RESULTS_DIR = Path('evaluation_results')

Define RAG Q Specific Constants

In [12]:
INDEX_DIR = Path('data/WikiMed/indexed_db')
INDEX_NAME = "Wikimed+S-PubMedBert-MS-MARCO-FullTexts"
EMBEDDING_MODEL_NAME = "pritamdeka/S-PubMedBert-MS-MARCO"
RELEVANT_DOCS_COUNT = 1

Define Possible VQA Strategies

In [11]:
vqa_strategies: dict[VQAStrategyType, BaseVQAStrategy] = {
    VQAStrategyType.ZERO_SHOT: ZeroShotVQAStrategy(prompt_type=ZeroShotPromptType.V1),
    VQAStrategyType.RAG_Q: RagQVQAStrategy(
        prompt_type=RagQPromptType.V1,
        index_dir=INDEX_DIR,
        index_name=INDEX_NAME,
        embedding_model_name=EMBEDDING_MODEL_NAME,
        relevant_docs_count=RELEVANT_DOCS_COUNT
    ),
    VQAStrategyType.RAG_Q_AS: None,
    VQAStrategyType.RAG_IMG: None,
    VQAStrategyType.RAG_DB_RERANKER: None
}

- Loading Zero-Shot strategy ...
+ Zero-Shot strategy loaded.
- Loading RAG Q strategy ...
	- Loading Embeddings ...
	+ Embeddings Loaded.
	- Loading Index ...
	+ Index Loaded.
	- Loading Retriever ...
	+ Retriever Loaded.
+ RAG Q strategy loaded.


Load Dataset

In [12]:
world_med_qa_v_dataset = world_med_qa_v_dataset_management.load_vqa_dataset(
    data_path=DATASET_DIR,
    country=COUNTRY,
    file_type=FILE_TYPE
)
world_med_qa_v_dataset

- Loading WorldMedQA-V dataset (filename: spain_english_processed.tsv) ...
+ WorldMedQA-V dataset (filename: spain_english_processed.tsv) loaded.


Dataset({
    features: ['index', 'image', 'question', 'A', 'B', 'C', 'D', 'answer', 'correct_option', 'split'],
    num_rows: 125
})

Experiment with the Models

In [13]:
# @title Interactive VQA Model Exploration Form
vqa_strategy_type = 'Zero-Shot' # @param ["Zero-Shot", "RAG Q", "RAG Q+As", "RAG IMG", "RAG DB-Reranker"]
prompt_type = "zs_v1" # @param ["zs_v1", "zs_v2", "zs_v3", "rq_v1", "rq_v2", "rq_v3", "rq_v4"]
question_id = 1 # @param {"type":"integer"}
image_width = 600 # @param {"type":"integer"}
action = 'Fetch from JSON' # @param ["Execute Model", "Fetch from JSON"]


row = world_med_qa_v_dataset_management.get_dataset_row_by_id(
    dataset=world_med_qa_v_dataset,
    question_id=question_id
)

if action == "Execute Model":
    formatted_vqa_strategy_type = to_snake_case_strategy_name(strategy_name=vqa_strategy_type)
    chosen_vqa_strategy = vqa_strategies[VQAStrategyType(formatted_vqa_strategy_type)]
    chosen_vqa_strategy.prompt_type = ZeroShotPromptType(prompt_type)
    model=VisualQAModel(
        visual_qa_strategy=chosen_vqa_strategy,
        model_name=MODEL_NAME,
        country=COUNTRY,
        file_type=FILE_TYPE
    )
    world_med_qa_v_plot_helpers.visualize_qa_pair_row(
        row=row,
        image_width=image_width,
        model_answer=model.generate_answer_from_row(
            row=row,
            possible_options=['A', 'B', 'C', 'D'],
            verbose=True
        )
    )
elif action == "Fetch from JSON":
    model_answer = world_med_qa_v_dataset_management.fetch_model_answer_from_json(
        evaluation_results_folder=RESULTS_DIR,
        vqa_strategy_name=to_snake_case_strategy_name(strategy_name=vqa_strategy_type),
        country=COUNTRY,
        file_type=FILE_TYPE,
        prompt_type_name=prompt_type,
        question_id=question_id,
    )
    world_med_qa_v_plot_helpers.visualize_qa_pair_row(
        row=row,
        image_width=image_width,
        model_answer=model_answer
    )

FormWidget(children=(VBox(children=(HTML(value=''), HTML(value='<h2>Interactive VQA Model Exploration Form</h2…

## 3. Result Analysis

### 3.1. Rag Q (Relevant Documents Count: 1)

In [21]:
evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.PARAGRAPH_SPLITTER,
                add_title=False,
                token_count=1
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.PARAGRAPH_SPLITTER,
                add_title=False,
                token_count=2
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.PARAGRAPH_SPLITTER,
                add_title=False,
                token_count=3
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.PARAGRAPH_SPLITTER,
                add_title=False,
                token_count=4
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                add_title=False,
                token_count=1,
                chunk_size=300,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                add_title=False,
                token_count=1,
                chunk_size=600,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                add_title=False,
                token_count=1,
                chunk_size=900,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                add_title=False,
                token_count=2,
                chunk_size=300,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                add_title=False,
                token_count=2,
                chunk_size=600,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                add_title=False,
                token_count=2,
                chunk_size=900,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=1
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=2
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=3
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=4
            )
        )
    ]
)
evaluation_results = evaluation_results.fillna("-")
evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,accuracy,well_formatted_answers
0,spain,english,rag_q,rq_v1,1,paragraph_splitter,False,1,-,-,0.256,1.0
1,spain,english,rag_q,rq_v1,1,paragraph_splitter,False,2,-,-,0.288,0.984
2,spain,english,rag_q,rq_v1,1,paragraph_splitter,False,3,-,-,0.24,0.976
3,spain,english,rag_q,rq_v1,1,paragraph_splitter,False,4,-,-,0.224,0.936
4,spain,english,rag_q,rq_v1,1,recursive_character_splitter,False,1,300.0,0.0,0.264,1.0
5,spain,english,rag_q,rq_v1,1,recursive_character_splitter,False,1,600.0,0.0,0.28,1.0
6,spain,english,rag_q,rq_v1,1,recursive_character_splitter,False,1,900.0,0.0,0.272,1.0
7,spain,english,rag_q,rq_v1,1,recursive_character_splitter,False,2,300.0,0.0,0.272,1.0
8,spain,english,rag_q,rq_v1,1,recursive_character_splitter,False,2,600.0,0.0,0.24,1.0
9,spain,english,rag_q,rq_v1,1,recursive_character_splitter,False,2,900.0,0.0,0.272,1.0


In [22]:
world_med_qa_v_plot_helpers.display_bar_chart_on_evaluation_results(
    evaluation_results=evaluation_results,
    title="Analysis of LLaVA Model Accuracy Across Different RAG Approaches and Parameter Settings (RDC: 1)"
)

### 3.2. Rag Q (Relevant Documents Count: 2)

In [20]:
evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=2,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.PARAGRAPH_SPLITTER,
                add_title=False,
                token_count=1
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=2,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.PARAGRAPH_SPLITTER,
                add_title=False,
                token_count=2
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=2,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.PARAGRAPH_SPLITTER,
                add_title=False,
                token_count=3
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=2,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.PARAGRAPH_SPLITTER,
                add_title=False,
                token_count=4
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=2,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                add_title=False,
                token_count=1,
                chunk_size=300,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=2,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                add_title=False,
                token_count=1,
                chunk_size=600,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=2,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                add_title=False,
                token_count=1,
                chunk_size=900,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=2,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                add_title=False,
                token_count=2,
                chunk_size=300,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=2,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                add_title=False,
                token_count=2,
                chunk_size=600,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=2,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                add_title=False,
                token_count=2,
                chunk_size=900,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=2,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=1
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=2,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=2
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=2,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=3
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=2,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=4
            )
        )
    ]
)
evaluation_results = evaluation_results.fillna("-")
evaluation_results

FileNotFoundError: [Errno 2] No such file or directory: 'evaluation_results/rag_q/rdc2/spacy_sent_splitting/no_title_tc1/spain_english_rq_v1_evaluation.json'

In [13]:
world_med_qa_v_plot_helpers.display_bar_chart_on_evaluation_results(
    evaluation_results=evaluation_results,
    title="Analysis of LLaVA Model Accuracy Across Different RAG Approaches and Parameter Settings (RDC: 2)"
)

### 3.3. Rag Q (Relevant Documents Count: 3)

In [19]:
evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.PARAGRAPH_SPLITTER,
                add_title=False,
                token_count=1
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.PARAGRAPH_SPLITTER,
                add_title=False,
                token_count=2
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.PARAGRAPH_SPLITTER,
                add_title=False,
                token_count=3
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.PARAGRAPH_SPLITTER,
                add_title=False,
                token_count=4
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                add_title=False,
                token_count=1,
                chunk_size=300,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                add_title=False,
                token_count=1,
                chunk_size=600,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                add_title=False,
                token_count=1,
                chunk_size=900,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                add_title=False,
                token_count=2,
                chunk_size=300,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                add_title=False,
                token_count=2,
                chunk_size=600,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
                add_title=False,
                token_count=2,
                chunk_size=900,
                chunk_overlap=0
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=1
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=2
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=3
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=3,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=4
            )
        )
    ]
)
evaluation_results = evaluation_results.fillna("-")
evaluation_results

FileNotFoundError: [Errno 2] No such file or directory: 'evaluation_results/rag_q/rdc3/spacy_sent_splitting/no_title_tc1/spain_english_rq_v1_evaluation.json'

In [18]:
world_med_qa_v_plot_helpers.display_bar_chart_on_evaluation_results(
    evaluation_results=evaluation_results,
    title="Analysis of LLaVA Model Accuracy Across Different RAG Approaches and Parameter Settings (RDC: 3)"
)

---

`spacy_sentence_splitter` seems to output the most consistent results. Hence, we continue experimenting only with this **document splitter**.

### 3.4. Rag Q 

- Relevant Documents Count: 1
- Document Splitter: spacy_sentence_splitter

In [15]:
evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=[
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=1
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V2,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=1
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V3,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=1
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V4,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=1
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=2
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V2,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=2
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V3,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=2
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V4,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=2
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=3
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V2,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=3
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V3,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=3
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V4,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=3
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=4
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V2,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=4
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V3,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=4
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V4,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=4
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V1,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=5
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V2,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=5
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V3,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=5
            )
        ),
        VQAStrategyDetail(
            country="spain",
            file_type="english",
            vqa_strategy_type=VQAStrategyType.RAG_Q,
            prompt_type=RagQPromptType.V4,
            relevant_docs_count=1,
            doc_splitter_options=DocSplitterOptions(
                doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
                add_title=False,
                token_count=5
            )
        ),
    ]
)
evaluation_results = evaluation_results.fillna("-")
evaluation_results

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,accuracy,well_formatted_answers
0,spain,english,rag_q,rq_v1,1,spacy_sentence_splitter,False,1,-,-,0.296,1.0
1,spain,english,rag_q,rq_v2,1,spacy_sentence_splitter,False,1,-,-,0.28,1.0
2,spain,english,rag_q,rq_v3,1,spacy_sentence_splitter,False,1,-,-,0.28,1.0
3,spain,english,rag_q,rq_v4,1,spacy_sentence_splitter,False,1,-,-,0.296,1.0
4,spain,english,rag_q,rq_v1,1,spacy_sentence_splitter,False,2,-,-,0.272,1.0
5,spain,english,rag_q,rq_v2,1,spacy_sentence_splitter,False,2,-,-,0.272,1.0
6,spain,english,rag_q,rq_v3,1,spacy_sentence_splitter,False,2,-,-,0.272,1.0
7,spain,english,rag_q,rq_v4,1,spacy_sentence_splitter,False,2,-,-,0.24,1.0
8,spain,english,rag_q,rq_v1,1,spacy_sentence_splitter,False,3,-,-,0.272,1.0
9,spain,english,rag_q,rq_v2,1,spacy_sentence_splitter,False,3,-,-,0.288,1.0


In [16]:
world_med_qa_v_plot_helpers.display_bar_chart_on_evaluation_results(
    evaluation_results=evaluation_results,
    title="Analysis of LLaVA Model Accuracy Across Different RAG Approaches and Parameter Settings (RDC: 1)"
)

- HACER FUNCIÓN MÁS GENERAL PARA CREAR LOS DATACLASS DINÁMICAMENTE (con dataclass `GeneralVQAStrategiesDetails`)

In [15]:
from typing import get_args
from src.utils.types_aliases import PromptType
from src.utils.data_definitions import GeneralDocSplitterOptions, GeneralVQAStrategiesDetails


b = GeneralVQAStrategiesDetails(
    countries=["spain"],
    file_types=["english"],
    vqa_strategy_types=[VQAStrategyType.ZERO_SHOT, VQAStrategyType.RAG_Q],
    prompt_types=[
        prompt_value
        for prompt_type in get_args(PromptType)
        for prompt_value in list(prompt_type)
    ],
    relevant_docs_count=[None, 1, 2, 3],
    doc_splitter_options=GeneralDocSplitterOptions(
        doc_splitter_types=[None, DocumentSplitterType.SPACY_SENTENCE_SPLITTER],
        add_titles=[None, True, False],
        token_counts=[None, 1, 2, 3],
        chunk_sizes=[None, 300, 600],
        chunk_overlaps=[None, 0]
    )
)
b.get_possible_vqa_strategy_details()

attribute=['spain']
attribute=['english']
attribute=[<VQAStrategyType.ZERO_SHOT: 'zero_shot'>, <VQAStrategyType.RAG_Q: 'rag_q'>]
attribute=[<ZeroShotPromptType.V1: 'zs_v1'>, <ZeroShotPromptType.V2: 'zs_v2'>, <ZeroShotPromptType.V3: 'zs_v3'>, <RagQPromptType.V1: 'rq_v1'>, <RagQPromptType.V2: 'rq_v2'>, <RagQPromptType.V3: 'rq_v3'>, <RagQPromptType.V4: 'rq_v4'>, <RagQPromptType.V5: 'rq_v5'>, <RagQPromptType.V6: 'rq_v6'>]
attribute=[None, 1, 2, 3]
attribute=[None, <DocumentSplitterType.SPACY_SENTENCE_SPLITTER: 'spacy_sentence_splitter'>]
attribute=[None, True, False]
attribute=[None, 1, 2, 3]
attribute=[None, 300, 600]
attribute=[None, 0]
1 --> ('spain', 'english', <VQAStrategyType.ZERO_SHOT: 'zero_shot'>, <ZeroShotPromptType.V1: 'zs_v1'>, None, None, None, None, None, None)
2 --> ('spain', 'english', <VQAStrategyType.ZERO_SHOT: 'zero_shot'>, <ZeroShotPromptType.V1: 'zs_v1'>, None, None, None, None, None, 0)
3 --> ('spain', 'english', <VQAStrategyType.ZERO_SHOT: 'zero_shot'>, <ZeroShotProm

[VQAStrategyDetail(country='spain', file_type='english', vqa_strategy_type=<VQAStrategyType.ZERO_SHOT: 'zero_shot'>, prompt_type=<ZeroShotPromptType.V1: 'zs_v1'>, relevant_docs_count=None, doc_splitter_options=None),
 VQAStrategyDetail(country='spain', file_type='english', vqa_strategy_type=<VQAStrategyType.ZERO_SHOT: 'zero_shot'>, prompt_type=<ZeroShotPromptType.V2: 'zs_v2'>, relevant_docs_count=None, doc_splitter_options=None),
 VQAStrategyDetail(country='spain', file_type='english', vqa_strategy_type=<VQAStrategyType.ZERO_SHOT: 'zero_shot'>, prompt_type=<ZeroShotPromptType.V3: 'zs_v3'>, relevant_docs_count=None, doc_splitter_options=None),
 VQAStrategyDetail(country='spain', file_type='english', vqa_strategy_type=<VQAStrategyType.RAG_Q: 'rag_q'>, prompt_type=<RagQPromptType.V1: 'rq_v1'>, relevant_docs_count=1, doc_splitter_options=DocSplitterOptions(doc_splitter_type=None, add_title=None, token_count=None, chunk_size=None, chunk_overlap=None)),
 VQAStrategyDetail(country='spain', fi

In [None]:
from typing import get_args
from src.utils.types_aliases import PromptType
from src.utils.data_definitions import GeneralDocSplitterOptions, GeneralVQAStrategiesDetails


c = GeneralVQAStrategiesDetails(
    countries=["spain"],
    file_types=["english"],
    vqa_strategy_types=[VQAStrategyType.ZERO_SHOT, VQAStrategyType.RAG_Q],  # [VQAStrategyType.RAG_Q],
    prompt_types=[ZeroShotPromptType.V1, RagQPromptType.V1],
    relevant_docs_count=[None, 1],
    doc_splitter_options=GeneralDocSplitterOptions(
        doc_splitter_types=[
            DocumentSplitterType.PARAGRAPH_SPLITTER,
            # DocumentSplitterType.RECURSIVE_CHARACTER_SPLITTER,
            # DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
            # None
        ],
        add_titles=[None, False],
        token_counts=[None, 1, 2, 3, 4],
        chunk_sizes=[None],  # [None, 300, 600, 900],
        chunk_overlaps=[None] # [None, 0]
    )
)
c.get_possible_vqa_strategy_details()

attribute=['spain']
attribute=['english']
attribute=[<VQAStrategyType.ZERO_SHOT: 'zero_shot'>, <VQAStrategyType.RAG_Q: 'rag_q'>]
attribute=[<ZeroShotPromptType.V1: 'zs_v1'>, <RagQPromptType.V1: 'rq_v1'>]
attribute=[None, 1]
attribute=[<DocumentSplitterType.PARAGRAPH_SPLITTER: 'paragraph_splitter'>]
attribute=[None, False]
attribute=[None, 1, 2, 3, 4]
attribute=[None]
attribute=[None]
Invalid VQAStrategyDetail: If 'vqa_strategy_type' is of type VQAStrategyType.ZERO_SHOT, 'relevant_docs_count' must be None. Skipping...
Invalid VQAStrategyDetail: If 'vqa_strategy_type' is of type VQAStrategyType.ZERO_SHOT, 'relevant_docs_count' must be None. Skipping...
Invalid VQAStrategyDetail: If 'vqa_strategy_type' is of type VQAStrategyType.ZERO_SHOT, 'relevant_docs_count' must be None. Skipping...
Invalid VQAStrategyDetail: If 'vqa_strategy_type' is of type VQAStrategyType.ZERO_SHOT, 'relevant_docs_count' must be None. Skipping...
Invalid VQAStrategyDetail: If 'vqa_strategy_type' is of type VQAStrat

[VQAStrategyDetail(country='spain', file_type='english', vqa_strategy_type=<VQAStrategyType.ZERO_SHOT: 'zero_shot'>, prompt_type=<ZeroShotPromptType.V1: 'zs_v1'>, relevant_docs_count=None, doc_splitter_options=None),
 VQAStrategyDetail(country='spain', file_type='english', vqa_strategy_type=<VQAStrategyType.RAG_Q: 'rag_q'>, prompt_type=<RagQPromptType.V1: 'rq_v1'>, relevant_docs_count=1, doc_splitter_options=DocSplitterOptions(doc_splitter_type=<DocumentSplitterType.PARAGRAPH_SPLITTER: 'paragraph_splitter'>, add_title=False, token_count=1, chunk_size=None, chunk_overlap=None)),
 VQAStrategyDetail(country='spain', file_type='english', vqa_strategy_type=<VQAStrategyType.RAG_Q: 'rag_q'>, prompt_type=<RagQPromptType.V1: 'rq_v1'>, relevant_docs_count=1, doc_splitter_options=DocSplitterOptions(doc_splitter_type=<DocumentSplitterType.PARAGRAPH_SPLITTER: 'paragraph_splitter'>, add_title=False, token_count=2, chunk_size=None, chunk_overlap=None)),
 VQAStrategyDetail(country='spain', file_type='

In [None]:
evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=c.get_possible_vqa_strategy_details()
)
evaluation_results = evaluation_results.fillna("-")
evaluation_results

attribute=['spain']
attribute=['english']
attribute=[<VQAStrategyType.ZERO_SHOT: 'zero_shot'>, <VQAStrategyType.RAG_Q: 'rag_q'>]
attribute=[<ZeroShotPromptType.V1: 'zs_v1'>, <RagQPromptType.V1: 'rq_v1'>]
attribute=[None, 1]
attribute=[<DocumentSplitterType.PARAGRAPH_SPLITTER: 'paragraph_splitter'>]
attribute=[None, False]
attribute=[None, 1, 2, 3, 4]
attribute=[None]
attribute=[None]
Invalid VQAStrategyDetail: If 'vqa_strategy_type' is of type VQAStrategyType.ZERO_SHOT, 'relevant_docs_count' must be None. Skipping...
Invalid VQAStrategyDetail: If 'vqa_strategy_type' is of type VQAStrategyType.ZERO_SHOT, 'relevant_docs_count' must be None. Skipping...
Invalid VQAStrategyDetail: If 'vqa_strategy_type' is of type VQAStrategyType.ZERO_SHOT, 'relevant_docs_count' must be None. Skipping...
Invalid VQAStrategyDetail: If 'vqa_strategy_type' is of type VQAStrategyType.ZERO_SHOT, 'relevant_docs_count' must be None. Skipping...
Invalid VQAStrategyDetail: If 'vqa_strategy_type' is of type VQAStrat

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,accuracy,well_formatted_answers
0,spain,english,zero_shot,zs_v1,-,-,-,-,-,-,0.304,1.0
1,spain,english,rag_q,rq_v1,1.0,paragraph_splitter,False,1.0,-,-,0.256,1.0
2,spain,english,rag_q,rq_v1,1.0,paragraph_splitter,False,2.0,-,-,0.288,0.984
3,spain,english,rag_q,rq_v1,1.0,paragraph_splitter,False,3.0,-,-,0.24,0.976
4,spain,english,rag_q,rq_v1,1.0,paragraph_splitter,False,4.0,-,-,0.224,0.936


- Hacer gráficos más pequeños:
  + Fijando:
    * vqa_strategy: (e.g. `Rag Q`)
    * doc_splitter: (e.g. `spacy_sentence_splitter`)
    * add_title: (e.g. `False`)
  + Cambiando:
    * (en cada gráfico): prompt_type: (e.g. [`rq_v1 - rq_v6`])
    * (por columna): token_count: (e.g. [`1 - 5`])
    * (por fila): relative_docs_count: (e.g. [`1 - 5`]) 

In [66]:
list(RagQPromptType)

[<RagQPromptType.V1: 'rq_v1'>,
 <RagQPromptType.V2: 'rq_v2'>,
 <RagQPromptType.V3: 'rq_v3'>,
 <RagQPromptType.V4: 'rq_v4'>,
 <RagQPromptType.V5: 'rq_v5'>,
 <RagQPromptType.V6: 'rq_v6'>]

In [None]:
evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
    evaluation_results_folder=RESULTS_DIR,
    vqa_strategy_details=GeneralVQAStrategiesDetails(
        countries=["spain"],
        file_types=["english"],
        vqa_strategy_types=[VQAStrategyType.RAG_Q],
        prompt_types=list(RagQPromptType),
        relevant_docs_count=[1],
        doc_splitter_options=GeneralDocSplitterOptions(
            doc_splitter_types=[DocumentSplitterType.SPACY_SENTENCE_SPLITTER],
            add_titles=[False],
            token_counts=[1, 2, 3, 4, 5],
            chunk_sizes=[None],
            chunk_overlaps=[None]
        )
    ).get_possible_vqa_strategy_details()
)
evaluation_results

attribute=['spain']
attribute=['english']
attribute=[<VQAStrategyType.RAG_Q: 'rag_q'>]
attribute=[<RagQPromptType.V1: 'rq_v1'>, <RagQPromptType.V2: 'rq_v2'>, <RagQPromptType.V3: 'rq_v3'>, <RagQPromptType.V4: 'rq_v4'>, <RagQPromptType.V5: 'rq_v5'>, <RagQPromptType.V6: 'rq_v6'>]
attribute=[1]
attribute=[<DocumentSplitterType.SPACY_SENTENCE_SPLITTER: 'spacy_sentence_splitter'>]
attribute=[False]
attribute=[1, 2, 3, 4, 5]
attribute=[None]
attribute=[None]
1 --> VQAStrategyDetail(country='spain', file_type='english', vqa_strategy_type=<VQAStrategyType.RAG_Q: 'rag_q'>, prompt_type=<RagQPromptType.V1: 'rq_v1'>, relevant_docs_count=1, doc_splitter_options=DocSplitterOptions(doc_splitter_type=<DocumentSplitterType.SPACY_SENTENCE_SPLITTER: 'spacy_sentence_splitter'>, add_title=False, token_count=1, chunk_size=None, chunk_overlap=None))
2 --> VQAStrategyDetail(country='spain', file_type='english', vqa_strategy_type=<VQAStrategyType.RAG_Q: 'rag_q'>, prompt_type=<RagQPromptType.V1: 'rq_v1'>, releva

Unnamed: 0,country,file_type,vqa_strategy_type,prompt_type,relevant_docs_count,doc_splitter,add_title,token_count,chunk_size,chunk_overlap,accuracy,well_formatted_answers
0,spain,english,rag_q,rq_v1,1,spacy_sentence_splitter,False,1,,,0.296,1.0
1,spain,english,rag_q,rq_v1,1,spacy_sentence_splitter,False,2,,,0.272,1.0
2,spain,english,rag_q,rq_v1,1,spacy_sentence_splitter,False,3,,,0.272,1.0
3,spain,english,rag_q,rq_v1,1,spacy_sentence_splitter,False,4,,,0.24,1.0
4,spain,english,rag_q,rq_v1,1,spacy_sentence_splitter,False,5,,,0.256,1.0
5,spain,english,rag_q,rq_v2,1,spacy_sentence_splitter,False,1,,,0.28,1.0
6,spain,english,rag_q,rq_v2,1,spacy_sentence_splitter,False,2,,,0.272,1.0
7,spain,english,rag_q,rq_v2,1,spacy_sentence_splitter,False,3,,,0.288,1.0
8,spain,english,rag_q,rq_v2,1,spacy_sentence_splitter,False,4,,,0.256,1.0
9,spain,english,rag_q,rq_v2,1,spacy_sentence_splitter,False,5,,,0.256,1.0


In [79]:
# evaluation_results = world_med_qa_v_dataset_management.load_evaluation_results(
#     evaluation_results_folder=RESULTS_DIR,
#     vqa_strategy_details=[
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V1,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=1
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V2,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=1
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V3,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=1
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V4,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=1
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V5,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=1
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V6,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=1
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V1,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=2
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V2,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=2
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V3,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=2
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V4,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=2
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V5,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=2
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V6,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=2
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V1,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=3
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V2,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=3
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V3,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=3
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V4,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=3
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V5,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=3
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V6,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=3
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V1,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=4
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V2,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=4
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V3,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=4
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V4,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=4
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V5,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=4
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V6,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=4
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V1,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=5
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V2,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=5
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V3,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=5
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V4,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=5
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V5,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=5
#             )
#         ),
#         VQAStrategyDetail(
#             country="spain",
#             file_type="english",
#             vqa_strategy_type=VQAStrategyType.RAG_Q,
#             prompt_type=RagQPromptType.V6,
#             relevant_docs_count=1,
#             doc_splitter_options=DocSplitterOptions(
#                 doc_splitter_type=DocumentSplitterType.SPACY_SENTENCE_SPLITTER,
#                 add_title=False,
#                 token_count=5
#             )
#         )
#     ]
# )
# evaluation_results = evaluation_results.fillna("-")
# evaluation_results

In [13]:
world_med_qa_v_plot_helpers.plot_rag_q_evaluation_results_by_groups(
    title="Analysis of LLaVA Model Accuracy Across Different RAG Approaches and Parameter Settings",
    evaluation_results=evaluation_results,
    row_variable='relevant_docs_count',
    column_variable='token_count',
    bar_graph_variable='prompt_type'
)

- 'rdc1_tc3' -> rq_v4 (el que mejor resultado da).
- Hacer lo mismo con Q+As (5 rags) (con pregunta y sin pregunta) --> DECIDIMOS LA MEJOR FORMA DE HACER IR  [SACAMOS OTRAS 2 LÍNEAS DE GRÁFICOS]
- Partir de este y hacer experimentos (2, 3, 4, 5 dokumentu)

BLOQUE RAG:

A: <RAG> ... </RAG> <br>
B: <RAG> ... </RAG> <br>
C: <RAG> ... </RAG> <br>
D: <RAG> ... </RAG> <br>

In [14]:
world_med_qa_v_plot_helpers.plot_rag_q_evaluation_results_by_groups(
    title="Analysis of LLaVA Model Accuracy Across Different RAG Approaches and Parameter Settings",
    evaluation_results=evaluation_results,
    row_variable='relevant_docs_count',
    column_variable='prompt_type',
    bar_graph_variable='token_count'
)