In [1]:
import os

PROJECT_DIR = ''

IS_GOOGLE_COLAB_ENABLED = False

if IS_GOOGLE_COLAB_ENABLED:
    print('Google Colab is enabled. Running on Google Colab.')

    from google.colab import drive
    # Mount Google Drive
    drive.mount('/content/drive', force_remount=True)

    PROJECT_DIR = '/content/drive/MyDrive/Colab/'

    # Ensure the directory exists
    os.makedirs(PROJECT_DIR, exist_ok=True)

else:
    print('Google Colab is not enabled. Running locally.')

    # Local configuration and folder setup
    PROJECT_DIR = '/Users/ondergormez/Repos/BLM5109_Collective_Learning/03-Project/'

LLM_ANSWERS_DIR = PROJECT_DIR + 'llm_answers'
RAGAS_SCORES_DIR = PROJECT_DIR + 'ragas_scores'

# Ensure the directory exists
os.makedirs(LLM_ANSWERS_DIR, exist_ok=True)
os.makedirs(RAGAS_SCORES_DIR, exist_ok=True)

Google Colab is not enabled. Running locally.


# Logger Kullanımı

- Oluşturulan kod içerisinde loglama işlemleri için `logging` kütüphanesi kullanılmıştır.
- Böylelikle geliştirme aşamasında debug için koyulan kodların çıktılarına bakılarak hata ayıklama işlemleri yapılabilir. Bu kodların çıktıları rapor sunulurken kaldırılacaktır.


In [2]:
import logging
import logging.config
import yaml
import textwrap


class CustomFormatter(logging.Formatter):

    def format(self, record):
        wrapped_message = super().format(record)

        # wrap the log messages to 120 characters to better fit the screen
        if (len(wrapped_message) > 120):
            wrapped_message = "\n".join(textwrap.wrap(wrapped_message, width=120))

            # indent the wrapped lines but not the first line
            first_line, rest = wrapped_message.split('\n', 1)
            wrapped_message = first_line + '\n' + textwrap.indent(rest, ' ' * 4) + '\n'

        return wrapped_message


LOGGER_CONFIG_FILE = PROJECT_DIR + 'logger_config.yaml'

# Load the YAML configuration
with open(LOGGER_CONFIG_FILE, 'r') as file:
    config = yaml.safe_load(file.read())
    logging.config.dictConfig(config)

# Apply the custom formatter to the selected handler
logger = logging.getLogger("default_logger")
for handler in logger.handlers:
    if isinstance(handler, logging.StreamHandler):
        handler.setFormatter(CustomFormatter(handler.formatter._fmt, datefmt=handler.formatter.datefmt))

# TODO: Set the log level to INFO
# logger.setLevel(logging.DEBUG)
logger.setLevel(logging.INFO)


logger.debug("This is a debug message that will be wrapped after 140 characters. " * 10)
logger.info("Logging is configured successfully.")

[INFO] [2025-01-27T22:56:37.576Z] Logging is configured successfully.


In [3]:
gemma_model_name = "gemma_2_9b"
gemma_llm_answers_task_b_file_names = [
    '2025_01_27-12_40_21_gemma_2_9b_llm_answers_for_cntx_length_15_and_position_of_correct_answer_1.csv',
    '2025_01_27-13_00_31_gemma_2_9b_llm_answers_for_cntx_length_15_and_position_of_correct_answer_8.csv',
    '2025_01_27-13_20_42_gemma_2_9b_llm_answers_for_cntx_length_15_and_position_of_correct_answer_15.csv',
]

gemma_llm_answers_task_b_file_names

['2025_01_27-12_40_21_gemma_2_9b_llm_answers_for_cntx_length_15_and_position_of_correct_answer_1.csv',
 '2025_01_27-13_00_31_gemma_2_9b_llm_answers_for_cntx_length_15_and_position_of_correct_answer_8.csv',
 '2025_01_27-13_20_42_gemma_2_9b_llm_answers_for_cntx_length_15_and_position_of_correct_answer_15.csv']

In [4]:
import time


def save_scores(llm_answers_file_name, llm_answers_df):
    # add time stamp to each file name for unique file names and further analysis
    time_stamp = time.strftime("%Y_%m_%d-%H_%M_%S")

    file_name = f'{RAGAS_SCORES_DIR}/{llm_answers_file_name}_{time_stamp}_ragas_scores.csv'

    llm_answers_df.to_csv(file_name, index=True)
    logger.info(f"Scores are saved to the file: '{file_name}'")

    return file_name

In [5]:
from datasets import Dataset
from ragas import evaluate
from ragas.metrics import faithfulness, answer_correctness, answer_relevancy
import pandas as pd

import os

# TODO: Replace the api key with the correct one from https://platform.openai.com/settings/organization/api-keys
# os.environ["OPENAI_API_KEY"] = "Your API Key"


def get_ragas_scores(file_names):
    ragas_scores_file_names = []

    model_name_for_ragas_score_eval = ''
    for file_name in file_names:
        logger.info(f"Evaluating ragas score for the file: {file_name}")

        llm_answers_df = pd.read_csv(LLM_ANSWERS_DIR + '/' + file_name)
        logger.info("Shape of the dataframe: " + str(llm_answers_df.shape))
        print()

        # Add new columns to store the metrics
        llm_answers_df[f'faithfulness'] = ""
        llm_answers_df['answer_correctness'] = ""
        llm_answers_df['answer_relevancy'] = ""

        if 'cosmos' in file_name:
            model_name_for_ragas_score_eval = cosmos_model_name
        elif 'gemma' in file_name:
            model_name_for_ragas_score_eval = gemma_model_name

        for i in range(len(llm_answers_df)):
            data_sample = {
                'question': [
                    llm_answers_df.iloc[i]['question'],
                ],
                'ground_truth': [
                    llm_answers_df.iloc[i]['answer'],
                ],
                'answer': [
                    llm_answers_df.iloc[i][f'{model_name_for_ragas_score_eval}_answer'],
                ],
                'contexts': [
                    [
                        llm_answers_df.iloc[i]['context'],
                    ],
                ]
            }

            dataset = Dataset.from_dict(data_sample)
            score = evaluate(dataset, metrics=[faithfulness, answer_correctness, answer_relevancy])
            logger.info(f"Scores for the question {i + 1} of {len(llm_answers_df)}\n: {score}")

            score_df = score.to_pandas()

            chunk_unique_id = llm_answers_df.iloc[i]['id']
            index_of_chunk = llm_answers_df['id'] == chunk_unique_id

            llm_answers_df.loc[index_of_chunk, 'faithfulness'] = score_df.iloc[0].get('faithfulness')
            llm_answers_df.loc[index_of_chunk, 'answer_correctness'] = score_df.iloc[0].get('answer_correctness')
            llm_answers_df.loc[index_of_chunk, 'answer_relevancy'] = score_df.iloc[0].get('answer_relevancy')

        # get just file name without path and extension
        temp_fn = os.path.splitext(os.path.basename(file_name))[0]

        fn = save_scores(temp_fn, llm_answers_df)
        ragas_scores_file_names.append(fn)

    return ragas_scores_file_names

In [6]:
ragas_scores_file_names = get_ragas_scores(gemma_llm_answers_task_b_file_names)

[INFO] [2025-01-27T22:56:38.518Z] Evaluating ragas score for the file:
    2025_01_27-12_40_21_gemma_2_9b_llm_answers_for_cntx_length_15_and_position_of_correct_answer_1.csv

[INFO] [2025-01-27T22:56:38.535Z] Shape of the dataframe: (50, 11)



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T22:57:06.113Z] Scores for the question 1 of 50 : {'faithfulness': 0.0000, 'answer_correctness':
    0.7007, 'answer_relevancy': 0.9009}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T22:57:39.389Z] Scores for the question 2 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.4134, 'answer_relevancy': 1.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T22:58:13.602Z] Scores for the question 3 of 50 : {'faithfulness': 0.7273, 'answer_correctness':
    0.5019, 'answer_relevancy': 0.8781}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T22:58:37.998Z] Scores for the question 4 of 50 : {'faithfulness': 0.1250, 'answer_correctness':
    0.3718, 'answer_relevancy': 0.8342}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T22:59:03.368Z] Scores for the question 5 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.7259, 'answer_relevancy': 0.9935}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T22:59:29.993Z] Scores for the question 6 of 50 : {'faithfulness': 0.3000, 'answer_correctness':
    0.7315, 'answer_relevancy': 0.8447}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T22:59:46.217Z] Scores for the question 7 of 50 : {'faithfulness': 0.6667, 'answer_correctness':
    0.9857, 'answer_relevancy': 0.8403}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T22:59:59.812Z] Scores for the question 8 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.6069, 'answer_relevancy': 0.9164}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:00:27.643Z] Scores for the question 9 of 50 : {'faithfulness': 0.0000, 'answer_correctness':
    0.7626, 'answer_relevancy': 0.8290}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:00:40.164Z] Scores for the question 10 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.8374, 'answer_relevancy': 0.8425}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:01:01.292Z] Scores for the question 11 of 50 : {'faithfulness': 0.3333, 'answer_correctness':
    0.6150, 'answer_relevancy': 0.8832}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:01:30.473Z] Scores for the question 12 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.7344, 'answer_relevancy': 0.8543}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:01:51.572Z] Scores for the question 13 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.7685, 'answer_relevancy': 0.7825}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:02:21.517Z] Scores for the question 14 of 50 : {'faithfulness': 0.3636, 'answer_correctness':
    0.6112, 'answer_relevancy': 0.8707}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:02:51.776Z] Scores for the question 15 of 50 : {'faithfulness': 0.6000, 'answer_correctness':
    0.5830, 'answer_relevancy': 0.9225}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:03:16.921Z] Scores for the question 16 of 50 : {'faithfulness': 0.3750, 'answer_correctness':
    0.4518, 'answer_relevancy': 0.8791}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:03:49.592Z] Scores for the question 17 of 50 : {'faithfulness': 0.6667, 'answer_correctness':
    0.7337, 'answer_relevancy': 0.8606}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:04:05.679Z] Scores for the question 18 of 50 : {'faithfulness': 0.4286, 'answer_correctness':
    0.6862, 'answer_relevancy': 0.8907}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:05:17.781Z] Scores for the question 19 of 50 : {'faithfulness': 0.2500, 'answer_correctness':
    0.5607, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:05:42.256Z] Scores for the question 20 of 50 : {'faithfulness': 0.5000, 'answer_correctness':
    0.5841, 'answer_relevancy': 0.9462}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:05:55.484Z] Scores for the question 21 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.4778, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:06:12.815Z] Scores for the question 22 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.7962, 'answer_relevancy': 0.8798}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:06:42.626Z] Scores for the question 23 of 50 : {'faithfulness': 0.5000, 'answer_correctness':
    0.3461, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:07:00.750Z] Scores for the question 24 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.4134, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:07:30.038Z] Scores for the question 25 of 50 : {'faithfulness': 0.5000, 'answer_correctness':
    0.7647, 'answer_relevancy': 0.8686}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:07:57.802Z] Scores for the question 26 of 50 : {'faithfulness': 0.6000, 'answer_correctness':
    0.4230, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:08:12.793Z] Scores for the question 27 of 50 : {'faithfulness': 0.6667, 'answer_correctness':
    0.7321, 'answer_relevancy': 0.8663}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:08:39.685Z] Scores for the question 28 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.3259, 'answer_relevancy': 0.7756}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:09:11.943Z] Scores for the question 29 of 50 : {'faithfulness': 0.8182, 'answer_correctness':
    0.4468, 'answer_relevancy': 0.7472}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:09:35.894Z] Scores for the question 30 of 50 : {'faithfulness': 0.1000, 'answer_correctness':
    0.2177, 'answer_relevancy': 0.8303}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:10:05.326Z] Scores for the question 31 of 50 : {'faithfulness': 0.7143, 'answer_correctness':
    0.3995, 'answer_relevancy': 0.7581}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:10:31.164Z] Scores for the question 32 of 50 : {'faithfulness': 0.2500, 'answer_correctness':
    0.3636, 'answer_relevancy': 0.8771}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:10:59.770Z] Scores for the question 33 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.7015, 'answer_relevancy': 0.7535}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:11:38.164Z] Scores for the question 34 of 50 : {'faithfulness': 0.3750, 'answer_correctness':
    0.5946, 'answer_relevancy': 0.8627}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:12:14.569Z] Scores for the question 35 of 50 : {'faithfulness': 0.3333, 'answer_correctness':
    0.8626, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:12:37.585Z] Scores for the question 36 of 50 : {'faithfulness': 0.4000, 'answer_correctness':
    0.5480, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:14:11.974Z] Scores for the question 37 of 50 : {'faithfulness': 0.4286, 'answer_correctness':
    0.5504, 'answer_relevancy': 0.8659}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:14:35.251Z] Scores for the question 38 of 50 : {'faithfulness': 0.8000, 'answer_correctness':
    0.6176, 'answer_relevancy': 0.8959}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:15:04.722Z] Scores for the question 39 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.2824, 'answer_relevancy': 0.7335}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:15:49.630Z] Scores for the question 40 of 50 : {'faithfulness': 0.6429, 'answer_correctness':
    0.4643, 'answer_relevancy': 0.8599}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:16:10.812Z] Scores for the question 41 of 50 : {'faithfulness': 0.0000, 'answer_correctness':
    0.6776, 'answer_relevancy': 0.8474}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:16:44.367Z] Scores for the question 42 of 50 : {'faithfulness': 0.4000, 'answer_correctness':
    0.4018, 'answer_relevancy': 0.8491}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:17:13.371Z] Scores for the question 43 of 50 : {'faithfulness': 0.4444, 'answer_correctness':
    0.5079, 'answer_relevancy': 0.9134}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:17:29.685Z] Scores for the question 44 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.9775, 'answer_relevancy': 0.9650}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:17:52.928Z] Scores for the question 45 of 50 : {'faithfulness': 0.2000, 'answer_correctness':
    0.9363, 'answer_relevancy': 0.9595}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:18:14.869Z] Scores for the question 46 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.6969, 'answer_relevancy': 0.9636}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:18:41.176Z] Scores for the question 47 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.8681, 'answer_relevancy': 0.8760}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:18:51.615Z] Scores for the question 48 of 50 : {'faithfulness': 0.5000, 'answer_correctness':
    0.9750, 'answer_relevancy': 0.8198}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:19:16.930Z] Scores for the question 49 of 50 : {'faithfulness': 0.5714, 'answer_correctness':
    0.6121, 'answer_relevancy': 0.8323}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:19:33.601Z] Scores for the question 50 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.9863, 'answer_relevancy': 0.0000}

[INFO] [2025-01-27T23:19:33.623Z] Scores are saved to the file: '/Users/ondergormez/Repos/BLM5109_Collective_Learning/03
    -Project/ragas_scores/2025_01_27-
    12_40_21_gemma_2_9b_llm_answers_for_cntx_length_15_and_position_of_correct_answer_1_2025_01_27-
    23_19_33_ragas_scores.csv'

[INFO] [2025-01-27T23:19:33.623Z] Evaluating ragas score for the file:
    2025_01_27-13_00_31_gemma_2_9b_llm_answers_for_cntx_length_15_and_position_of_correct_answer_8.csv

[INFO] [2025-01-27T23:19:33.636Z] Shape of the dataframe: (50, 11)



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:19:56.339Z] Scores for the question 1 of 50 : {'faithfulness': 0.2500, 'answer_correctness':
    0.7663, 'answer_relevancy': 0.9339}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:20:18.162Z] Scores for the question 2 of 50 : {'faithfulness': 0.8571, 'answer_correctness':
    0.5108, 'answer_relevancy': 1.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:20:48.276Z] Scores for the question 3 of 50 : {'faithfulness': 0.8333, 'answer_correctness':
    0.6937, 'answer_relevancy': 0.9638}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:21:13.195Z] Scores for the question 4 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.5535, 'answer_relevancy': 0.8412}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:21:30.062Z] Scores for the question 5 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.5325, 'answer_relevancy': 1.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:22:03.042Z] Scores for the question 6 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.8568, 'answer_relevancy': 0.8084}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:22:22.653Z] Scores for the question 7 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.5624, 'answer_relevancy': 0.8314}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:22:49.599Z] Scores for the question 8 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.6605, 'answer_relevancy': 0.8538}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:23:16.983Z] Scores for the question 9 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.5673, 'answer_relevancy': 0.8040}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:23:37.003Z] Scores for the question 10 of 50 : {'faithfulness': 0.3333, 'answer_correctness':
    0.6661, 'answer_relevancy': 0.8620}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:23:55.279Z] Scores for the question 11 of 50 : {'faithfulness': 0.4000, 'answer_correctness':
    0.6481, 'answer_relevancy': 0.8832}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:24:23.885Z] Scores for the question 12 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.7592, 'answer_relevancy': 0.8444}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:24:40.420Z] Scores for the question 13 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.7716, 'answer_relevancy': 0.7726}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:24:59.070Z] Scores for the question 14 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.7317, 'answer_relevancy': 0.8672}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:25:24.640Z] Scores for the question 15 of 50 : {'faithfulness': 0.8750, 'answer_correctness':
    0.7890, 'answer_relevancy': 0.7910}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:25:46.950Z] Scores for the question 16 of 50 : {'faithfulness': 0.0000, 'answer_correctness':
    0.7075, 'answer_relevancy': 0.8791}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:26:09.402Z] Scores for the question 17 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.3618, 'answer_relevancy': 0.8198}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:26:21.177Z] Scores for the question 18 of 50 : {'faithfulness': 0.5000, 'answer_correctness':
    0.7331, 'answer_relevancy': 0.9412}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:26:43.098Z] Scores for the question 19 of 50 : {'faithfulness': 0.5000, 'answer_correctness':
    0.5028, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

Exception raised in Job[2]: TimeoutError()


[INFO] [2025-01-27T23:29:44.301Z] Scores for the question 20 of 50 : {'faithfulness': 0.2857, 'answer_correctness':
    0.7328, 'answer_relevancy': nan}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:30:10.036Z] Scores for the question 21 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.2205, 'answer_relevancy': 0.9221}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:30:24.371Z] Scores for the question 22 of 50 : {'faithfulness': 0.6000, 'answer_correctness':
    0.5670, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:30:44.701Z] Scores for the question 23 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.4798, 'answer_relevancy': 0.8454}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:31:01.879Z] Scores for the question 24 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.9825, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:31:26.247Z] Scores for the question 25 of 50 : {'faithfulness': 0.1429, 'answer_correctness':
    0.7955, 'answer_relevancy': 0.8299}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:31:42.037Z] Scores for the question 26 of 50 : {'faithfulness': 0.7500, 'answer_correctness':
    0.6112, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:31:54.452Z] Scores for the question 27 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.7308, 'answer_relevancy': 0.8922}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:32:24.633Z] Scores for the question 28 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.2071, 'answer_relevancy': 0.7560}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:33:22.197Z] Scores for the question 29 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.5282, 'answer_relevancy': 0.8423}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:33:45.282Z] Scores for the question 30 of 50 : {'faithfulness': 0.8000, 'answer_correctness':
    0.2205, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:34:16.194Z] Scores for the question 31 of 50 : {'faithfulness': 0.3636, 'answer_correctness':
    0.5699, 'answer_relevancy': 0.7535}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:34:47.071Z] Scores for the question 32 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.3749, 'answer_relevancy': 0.8771}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:35:22.844Z] Scores for the question 33 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.6848, 'answer_relevancy': 0.7568}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:35:48.598Z] Scores for the question 34 of 50 : {'faithfulness': 0.9000, 'answer_correctness':
    0.6055, 'answer_relevancy': 0.8540}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:36:11.753Z] Scores for the question 35 of 50 : {'faithfulness': 0.0833, 'answer_correctness':
    0.4937, 'answer_relevancy': 0.8967}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:36:24.739Z] Scores for the question 36 of 50 : {'faithfulness': 0.7500, 'answer_correctness':
    0.5191, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:36:45.456Z] Scores for the question 37 of 50 : {'faithfulness': 0.8000, 'answer_correctness':
    0.7302, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:36:59.499Z] Scores for the question 38 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.6597, 'answer_relevancy': 0.8960}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:37:21.345Z] Scores for the question 39 of 50 : {'faithfulness': 0.2857, 'answer_correctness':
    0.2137, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:37:49.573Z] Scores for the question 40 of 50 : {'faithfulness': 0.3571, 'answer_correctness':
    0.5210, 'answer_relevancy': 0.8599}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:38:22.722Z] Scores for the question 41 of 50 : {'faithfulness': 0.2667, 'answer_correctness':
    0.3746, 'answer_relevancy': 0.8083}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:38:45.086Z] Scores for the question 42 of 50 : {'faithfulness': 0.7500, 'answer_correctness':
    0.5512, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:39:04.111Z] Scores for the question 43 of 50 : {'faithfulness': 0.3000, 'answer_correctness':
    0.4489, 'answer_relevancy': 0.9134}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:39:20.653Z] Scores for the question 44 of 50 : {'faithfulness': 0.8750, 'answer_correctness':
    0.8557, 'answer_relevancy': 0.9680}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:39:40.114Z] Scores for the question 45 of 50 : {'faithfulness': 0.8000, 'answer_correctness':
    0.8263, 'answer_relevancy': 0.9621}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:39:55.506Z] Scores for the question 46 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.7313, 'answer_relevancy': 0.8639}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:40:18.204Z] Scores for the question 47 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.8416, 'answer_relevancy': 0.8716}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:40:29.782Z] Scores for the question 48 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.8281, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:41:04.285Z] Scores for the question 49 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.4761, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:41:15.758Z] Scores for the question 50 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.7255, 'answer_relevancy': 0.7952}

[INFO] [2025-01-27T23:41:15.778Z] Scores are saved to the file: '/Users/ondergormez/Repos/BLM5109_Collective_Learning/03
    -Project/ragas_scores/2025_01_27-
    13_00_31_gemma_2_9b_llm_answers_for_cntx_length_15_and_position_of_correct_answer_8_2025_01_27-
    23_41_15_ragas_scores.csv'

[INFO] [2025-01-27T23:41:15.780Z] Evaluating ragas score for the file:
    2025_01_27-13_20_42_gemma_2_9b_llm_answers_for_cntx_length_15_and_position_of_correct_answer_15.csv

[INFO] [2025-01-27T23:41:15.793Z] Shape of the dataframe: (50, 11)



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:41:39.143Z] Scores for the question 1 of 50 : {'faithfulness': 0.2000, 'answer_correctness':
    0.7675, 'answer_relevancy': 0.8619}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:42:07.095Z] Scores for the question 2 of 50 : {'faithfulness': 0.9167, 'answer_correctness':
    0.7597, 'answer_relevancy': 1.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:42:37.738Z] Scores for the question 3 of 50 : {'faithfulness': 0.6000, 'answer_correctness':
    0.6369, 'answer_relevancy': 0.9983}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:42:57.523Z] Scores for the question 4 of 50 : {'faithfulness': 0.8000, 'answer_correctness':
    0.5386, 'answer_relevancy': 0.8271}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:43:12.031Z] Scores for the question 5 of 50 : {'faithfulness': 0.8000, 'answer_correctness':
    0.8042, 'answer_relevancy': 0.9868}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:43:32.505Z] Scores for the question 6 of 50 : {'faithfulness': 0.8000, 'answer_correctness':
    0.7325, 'answer_relevancy': 0.7974}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:43:47.081Z] Scores for the question 7 of 50 : {'faithfulness': 0.5000, 'answer_correctness':
    0.9830, 'answer_relevancy': 0.8345}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:43:59.870Z] Scores for the question 8 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.7342, 'answer_relevancy': 0.9849}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:44:22.203Z] Scores for the question 9 of 50 : {'faithfulness': 0.4000, 'answer_correctness':
    0.6381, 'answer_relevancy': 0.9808}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:44:41.398Z] Scores for the question 10 of 50 : {'faithfulness': 0.5000, 'answer_correctness':
    0.8801, 'answer_relevancy': 0.8735}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:44:57.799Z] Scores for the question 11 of 50 : {'faithfulness': 0.4000, 'answer_correctness':
    0.8656, 'answer_relevancy': 0.9641}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:45:21.188Z] Scores for the question 12 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.7596, 'answer_relevancy': 0.8444}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:45:37.224Z] Scores for the question 13 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.9879, 'answer_relevancy': 0.8098}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:45:54.508Z] Scores for the question 14 of 50 : {'faithfulness': 0.7143, 'answer_correctness':
    0.8773, 'answer_relevancy': 0.8788}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:46:24.881Z] Scores for the question 15 of 50 : {'faithfulness': 0.7500, 'answer_correctness':
    0.6118, 'answer_relevancy': 0.9156}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:46:52.363Z] Scores for the question 16 of 50 : {'faithfulness': 0.1111, 'answer_correctness':
    0.8305, 'answer_relevancy': 0.8791}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:47:17.163Z] Scores for the question 17 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.9363, 'answer_relevancy': 0.8507}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:47:30.278Z] Scores for the question 18 of 50 : {'faithfulness': 0.1667, 'answer_correctness':
    0.8327, 'answer_relevancy': 0.9731}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:47:44.929Z] Scores for the question 19 of 50 : {'faithfulness': 0.4000, 'answer_correctness':
    0.5280, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:48:05.436Z] Scores for the question 20 of 50 : {'faithfulness': 0.6667, 'answer_correctness':
    0.5894, 'answer_relevancy': 0.9461}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:48:23.314Z] Scores for the question 21 of 50 : {'faithfulness': 0.7500, 'answer_correctness':
    0.2274, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:48:41.014Z] Scores for the question 22 of 50 : {'faithfulness': 0.5000, 'answer_correctness':
    0.6642, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:48:57.097Z] Scores for the question 23 of 50 : {'faithfulness': 0.5000, 'answer_correctness':
    0.5669, 'answer_relevancy': 0.8800}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:49:12.522Z] Scores for the question 24 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.9860, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:49:42.165Z] Scores for the question 25 of 50 : {'faithfulness': 0.1250, 'answer_correctness':
    0.7623, 'answer_relevancy': 0.8298}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:50:03.725Z] Scores for the question 26 of 50 : {'faithfulness': 0.5556, 'answer_correctness':
    0.4832, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:50:15.933Z] Scores for the question 27 of 50 : {'faithfulness': 0.6667, 'answer_correctness':
    0.7324, 'answer_relevancy': 0.8921}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:50:39.792Z] Scores for the question 28 of 50 : {'faithfulness': 0.8889, 'answer_correctness':
    0.2024, 'answer_relevancy': 0.7753}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:51:05.847Z] Scores for the question 29 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.8284, 'answer_relevancy': 0.8085}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:51:22.551Z] Scores for the question 30 of 50 : {'faithfulness': 0.7143, 'answer_correctness':
    0.4191, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:51:44.077Z] Scores for the question 31 of 50 : {'faithfulness': 0.8333, 'answer_correctness':
    0.5577, 'answer_relevancy': 0.7626}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:52:01.581Z] Scores for the question 32 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.8385, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:52:31.156Z] Scores for the question 33 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.6811, 'answer_relevancy': 0.8391}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:52:50.993Z] Scores for the question 34 of 50 : {'faithfulness': 0.6667, 'answer_correctness':
    0.5294, 'answer_relevancy': 0.9426}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:53:19.080Z] Scores for the question 35 of 50 : {'faithfulness': 0.0000, 'answer_correctness':
    0.2893, 'answer_relevancy': 0.8037}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:53:36.547Z] Scores for the question 36 of 50 : {'faithfulness': 0.7500, 'answer_correctness':
    0.4692, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:54:02.697Z] Scores for the question 37 of 50 : {'faithfulness': 0.8462, 'answer_correctness':
    0.9134, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:54:19.862Z] Scores for the question 38 of 50 : {'faithfulness': 0.8571, 'answer_correctness':
    0.7406, 'answer_relevancy': 0.8946}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:54:45.363Z] Scores for the question 39 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.1949, 'answer_relevancy': 0.6903}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:55:04.570Z] Scores for the question 40 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.7077, 'answer_relevancy': 0.8599}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:55:23.359Z] Scores for the question 41 of 50 : {'faithfulness': 0.5000, 'answer_correctness':
    0.7917, 'answer_relevancy': 0.8474}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:55:42.638Z] Scores for the question 42 of 50 : {'faithfulness': 0.9000, 'answer_correctness':
    0.4268, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:56:04.624Z] Scores for the question 43 of 50 : {'faithfulness': 0.6667, 'answer_correctness':
    0.5533, 'answer_relevancy': 0.9134}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:56:22.026Z] Scores for the question 44 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.9089, 'answer_relevancy': 0.9650}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:56:38.289Z] Scores for the question 45 of 50 : {'faithfulness': 0.6667, 'answer_correctness':
    0.8310, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:56:57.666Z] Scores for the question 46 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.8326, 'answer_relevancy': 0.8423}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:57:33.900Z] Scores for the question 47 of 50 : {'faithfulness': 0.4615, 'answer_correctness':
    0.6995, 'answer_relevancy': 0.8750}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:57:43.823Z] Scores for the question 48 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.6029, 'answer_relevancy': 0.8149}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:58:11.892Z] Scores for the question 49 of 50 : {'faithfulness': 0.1250, 'answer_correctness':
    0.6622, 'answer_relevancy': 0.0000}



Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

[INFO] [2025-01-27T23:58:25.963Z] Scores for the question 50 of 50 : {'faithfulness': 1.0000, 'answer_correctness':
    0.9834, 'answer_relevancy': 0.9176}

[INFO] [2025-01-27T23:58:25.984Z] Scores are saved to the file: '/Users/ondergormez/Repos/BLM5109_Collective_Learning/03
    -Project/ragas_scores/2025_01_27-
    13_20_42_gemma_2_9b_llm_answers_for_cntx_length_15_and_position_of_correct_answer_15_2025_01_27-
    23_58_25_ragas_scores.csv'



In [7]:
ragas_scores_file_names

['/Users/ondergormez/Repos/BLM5109_Collective_Learning/03-Project/ragas_scores/2025_01_27-12_40_21_gemma_2_9b_llm_answers_for_cntx_length_15_and_position_of_correct_answer_1_2025_01_27-23_19_33_ragas_scores.csv',
 '/Users/ondergormez/Repos/BLM5109_Collective_Learning/03-Project/ragas_scores/2025_01_27-13_00_31_gemma_2_9b_llm_answers_for_cntx_length_15_and_position_of_correct_answer_8_2025_01_27-23_41_15_ragas_scores.csv',
 '/Users/ondergormez/Repos/BLM5109_Collective_Learning/03-Project/ragas_scores/2025_01_27-13_20_42_gemma_2_9b_llm_answers_for_cntx_length_15_and_position_of_correct_answer_15_2025_01_27-23_58_25_ragas_scores.csv']