# Manual Text-Evaluation of the Models in Comparison

In [2]:
import os
import warnings
import random
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from utils.config import ROOT_DIR
from utils.evaluation import (
    get_all_test_data,
    get_table_column_names,
    get_evaluation_data
)

Configuration loaded in: 0.0 seconds
Debugging is enabled: True
Device: cuda is available
VectorStoreIndex: wiki_movie_plots
----------------------------------------------------------------------------------------------------


In [3]:
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning)

In [4]:
sns.set_palette("pastel")

### Set all the parameters

In [5]:
llms = ["oll_gemma_instruct", "oll_llama3_instruct", "oll_mistral_instruct"]
collections = {
    "wiki_movie_plots_512_50_mxbai": "Wiki Movie Plots 512 50",
    "wiki_movie_plots_1024_100_mxbai": "Wiki Movie Plots 1024 100",
    "wiki_movie_plots_2048_200_mxbai": "Wiki Movie Plots 2048 200"
}

metrics = {
    "context_relevancy_score": "Context Relevancy Score",
    "correctness_score": "Correctness Score",
    "faithfulness_score": "Faithfulness Score",
    "relevancy_score": "Relevancy Score",
    "model_score": "Model Score"
}
metrics_keys = [*metrics.keys()]
metrics_values = [*metrics.values()]
labels_llms = {
    "oll_llama3_instruct": "Llama3 Instruct",
    "oll_gemma_instruct": "Gemma Instruct",
    "oll_mistral_instruct": "Mistral Instruct"
}
chat_mode = "CONTEXT"

In [6]:
column_names = get_table_column_names(table_name="evaluation")
column_names

['id',
 'llm_model',
 'embed_model',
 'collection_name',
 'question_id',
 'question',
 'context',
 'reference_answer',
 'response',
 'answer_feedback',
 'context_relevancy_score',
 'context_relevancy_feedback',
 'correctness_score',
 'correctness_feedback',
 'faithfulness_score',
 'relevancy_score',
 'relevancy_feedback',
 'model_score',
 'model_feedback',
 'chat_mode']

### Define the functions

In [7]:
def compare_llms(collection_names=None):
    if collection_names is None:
        collection_names = ["wiki_movie_plots_1024_100_mxbai"]
    data_for_llms = dict()

    for collection_name in collection_names:
        data_for_llms[collection_name] = dict()
        for llm in llms:
            data = get_all_test_data(llm=True, llm_model=llm, collection_name=collection_name, chat_mode=chat_mode)
            data_for_llms[collection_name][llm] = pd.DataFrame(data=data, columns=column_names)
    return data_for_llms

In [132]:
def feedback_block(dataframe):
    for row, values in dataframe.iterrows():
        print('#'*100)
        data = (
            f"{values['llm_model'][4:-9]}_{values['collection_name'][17:-6]}\n"
            f"Reference Answer: {values['reference_answer']} \n"
            f"------------------------------------------------\n"
            f"Model Answer:\n{values['response']}\n"
            f"------------------------------------------------\n"
            f"{metrics_values[0]}: {values[metrics_keys[0]]}\n"
            f"{metrics_values[1]}: {values[metrics_keys[1]]}\n"
            f"{metrics_values[2]}: {values[metrics_keys[2]]}\n"
            f"{metrics_values[3]}: {values[metrics_keys[3]]}\n"
            f"{metrics_values[4]}: {values[metrics_keys[4]]}\n"
        )
        print(data)

### Get the data for the LLM models

In [133]:
data_for_llms = compare_llms(collections)

In [134]:
evaluation_data_context = get_evaluation_data()
evaluation_df_context = pd.DataFrame(data=evaluation_data_context, columns=column_names)
evaluation_df_context

Unnamed: 0,id,llm_model,embed_model,collection_name,question_id,question,context,reference_answer,response,answer_feedback,context_relevancy_score,context_relevancy_feedback,correctness_score,correctness_feedback,faithfulness_score,relevancy_score,relevancy_feedback,model_score,model_feedback,chat_mode
0,1072,oll_llama3_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,0,"Who is the director of the movie ""The Karate K...",release_year: 1984\ntitle: The Karate Kid\nori...,"The director of the movie ""The Karate Kid"" rel...","According to the context information provided,...",1. Does the provided response match the subjec...,1.000,1. The retrieved context matches the subject m...,5.0,The generated answer is both relevant and corr...,1.0,1.0,YES,1.0,Assistant A provided a more comprehensive resp...,CONTEXT
1,1073,oll_llama3_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,1,"What is the plot of ""The Karate Kid"" released ...",release_year: 1984\ntitle: The Karate Kid\nori...,"The plot of ""The Karate Kid"" released in 1984 ...","Here's the plot of ""The Karate Kid"" (1984):\n\...",1. Does the provided response match the subjec...,1.000,1. The retrieved context matches the subject m...,4.0,The generated answer provides a detailed and a...,1.0,1.0,YES,1.0,Assistant A provides a detailed and comprehens...,CONTEXT
2,1074,oll_llama3_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,2,"What is the genre of the movie ""The Karate Kid...",release_year: 1984\ntitle: The Karate Kid\nori...,"The genre of the movie ""The Karate Kid"" releas...","The genre of the movie ""The Karate Kid"" releas...",1. The response provided matches the subject m...,1.000,1. The retrieved context matches the subject m...,4.0,"The generated answer is relevant and correct, ...",1.0,1.0,YES,1.0,Assistant A provided a more detailed and compr...,CONTEXT
3,1075,oll_llama3_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,3,Who was the legendary boxing trainer that took...,release_year: 2016\ntitle: Hands of Stone\nori...,"Ray Arcel, an American legendary boxing traine...","According to the context information, the lege...",1. The response correctly identifies Ray Arcel...,1.000,1. The retrieved context matches the subject m...,4.5,The generated answer is highly relevant to the...,1.0,1.0,YES,0.5,,CONTEXT
4,1076,oll_llama3_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,4,What was the result of the rematch between Rob...,release_year: 2016\ntitle: Hands of Stone\nori...,"Durán famously gave up by saying ""No más"" to t...","According to the context information, in the r...",1. The response provided matches the subject m...,1.000,1. The retrieved context matches the subject m...,4.5,The generated answer is highly relevant to the...,1.0,1.0,YES,1.0,Assistant A provided a more detailed and accur...,CONTEXT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2830,3902,oll_mistral_instruct,mxbai,wiki_movie_plots_2048_200_mxbai,310,What is the main plot of Leviathan?,release_year: 1989\ntitle: Leviathan\norigin_e...,The main plot of Leviathan revolves around a c...,"The main plot of the film ""Leviathan,"" releas...",1. The response does not match the subject mat...,1.000,1. The retrieved context matches the subject m...,3.0,The generated answer provides a detailed summa...,1.0,0.0,NO,1.0,Assistant A provides a detailed and comprehens...,CONTEXT
2831,3903,oll_mistral_instruct,mxbai,wiki_movie_plots_2048_200_mxbai,311,How does the film Leviathan end?,release_year: 1989\ntitle: Leviathan\norigin_e...,The film Leviathan ends with the survivors esc...,"In the film ""Leviathan,"" released in 1989, th...",1. The response provided does not match the su...,1.000,1. The retrieved context matches the subject m...,2.5,The generated answer provides a detailed summa...,1.0,0.0,NO,0.5,,CONTEXT
2832,3904,oll_mistral_instruct,mxbai,wiki_movie_plots_2048_200_mxbai,312,"Who are the main characters in the movie ""The ...",release_year: 2002\ntitle: The Tuxedo\norigin_...,The main characters are Jimmy Tong (played by ...,"The main characters in the movie ""The Tuxedo,...",1. The response matches the subject matter of ...,0.875,1. The retrieved context matches the subject m...,4.0,The generated answer provides a detailed and a...,1.0,1.0,YES,1.0,Assistant A provides a detailed and comprehens...,CONTEXT
2833,3905,oll_mistral_instruct,mxbai,wiki_movie_plots_2048_200_mxbai,313,"What is the main plot of the movie ""The Tuxedo""?",release_year: 2002\ntitle: The Tuxedo\norigin_...,"The main plot revolves around Jimmy Tong, a ta...","The main plot of the movie ""The Tuxedo,"" rele...",1. The response matches the subject matter of ...,1.000,1. The retrieved context matches the subject m...,4.0,The generated answer provides a detailed summa...,1.0,0.0,NO,1.0,Assistant A provides a detailed and accurate s...,CONTEXT


### Generate 10 random QuestionIDs for Evaluation

In [135]:
# generate 5 numbers between 0-314
number_of_ids = 5

In [136]:
file = f'{ROOT_DIR}/evaluation/evaluation_tests/random_question_ids.txt'
if not os.path.exists(file):
    rand_list = []
    for i in range(0, number_of_ids):
        n = random.randint(0, 314)
        rand_list.append(n)
    with open(file, 'w') as random_ids:
        random_ids.write(", ".join(str(num) for num in rand_list))

### Read File with Random numbers
Now all The corresponding data from the random ids are going to be fetched to investigate and evaluate results.

In [137]:
with open(file, "r") as file:
    get_ids_from_file = [*map(int, file.read().split(', '))]

In [138]:
get_ids_from_file

[51, 228, 94, 271, 226]

In [139]:
evaluation_df = evaluation_df_context[evaluation_df_context['question_id'].isin(get_ids_from_file)]
evaluation_df

Unnamed: 0,id,llm_model,embed_model,collection_name,question_id,question,context,reference_answer,response,answer_feedback,context_relevancy_score,context_relevancy_feedback,correctness_score,correctness_feedback,faithfulness_score,relevancy_score,relevancy_feedback,model_score,model_feedback,chat_mode
51,1123,oll_llama3_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,51,"Who is the director of the film ""We Are What W...",release_year: 2013\ntitle: We Are What We Are\...,"The director of the film ""We Are What We Are"" ...","According to the context information, Jim Mick...",1. Does the provided response match the subjec...,1.0,1. The retrieved context matches the subject m...,4.5,The generated answer is highly relevant to the...,1.0,1.0,YES,0.0,Assistant A and Assistant B both correctly ide...,CONTEXT
94,1166,oll_llama3_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,94,How does Melanie's priorities shift throughout...,release_year: 1996\ntitle: One Fine Day\norigi...,Melanie's priorities shift from focusing on he...,"According to the context information, Melanie'...",1. The response does not match the subject mat...,1.0,1. The retrieved context matches the subject m...,4.0,The generated answer provides a relevant and a...,1.0,1.0,YES,0.5,,CONTEXT
226,1298,oll_llama3_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,226,What role does Pyramid Head play in the climax...,release_year: 2012\ntitle: Silent Hill: Revela...,"Pyramid Head kills the Missionary, allowing He...","According to the context information, Pyramid ...",1. The response partially matches the subject ...,1.0,1. The retrieved context matches the subject m...,4.0,The generated answer provides relevant informa...,1.0,1.0,YES,0.5,,CONTEXT
228,1300,oll_llama3_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,228,"What is the main conflict in the plot of ""Our ...","release_year: 1996\ntitle: Our Son, the Matchm...",The main conflict revolves around Julie's past...,"According to the context information, the main...",1. The response does not match the subject mat...,1.0,1. The retrieved context matches the subject m...,2.0,The generated answer is somewhat relevant as i...,1.0,0.0,NO,0.0,Assistant B provides a more accurate and relev...,CONTEXT
271,1343,oll_llama3_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,271,"What is the main conflict in the plot of ""High...",release_year: 1996\ntitle: High School High\no...,The main conflict revolves around Richard Clar...,"According to the context information, the main...",1. The response does not match the subject mat...,1.0,1. The retrieved context matches the subject m...,2.0,The generated answer is somewhat relevant as i...,1.0,0.0,NO,0.0,Assistant B provides a more accurate and relev...,CONTEXT
366,1438,oll_gemma_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,51,"Who is the director of the film ""We Are What W...",release_year: 2013\ntitle: We Are What We Are\...,"The director of the film ""We Are What We Are"" ...","The director of the film ""We Are What We Are"" ...","1. Yes, the response matches the subject matte...",1.0,1. The retrieved context matches the subject m...,4.0,The generated answer is relevant and fully cor...,1.0,1.0,YES,0.0,Assistant B provides a more informative respon...,CONTEXT
409,1481,oll_gemma_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,94,How does Melanie's priorities shift throughout...,release_year: 1996\ntitle: One Fine Day\norigi...,Melanie's priorities shift from focusing on he...,Melanie's priorities shift throughout the movi...,1. The response does not directly address the ...,1.0,1. The retrieved context matches the subject m...,3.0,The generated answer is relevant to the user q...,1.0,1.0,YES,0.0,Assistant B provides a more accurate and detai...,CONTEXT
541,1613,oll_gemma_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,226,What role does Pyramid Head play in the climax...,release_year: 2012\ntitle: Silent Hill: Revela...,"Pyramid Head kills the Missionary, allowing He...",Pyramid Head is the main antagonist of the mov...,1. The response does not match the subject mat...,1.0,1. The retrieved context matches the subject m...,3.0,The generated answer is relevant to the movie ...,1.0,0.0,NO,0.0,Assistant A's response is inaccurate as it sta...,CONTEXT
543,1615,oll_gemma_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,228,"What is the main conflict in the plot of ""Our ...","release_year: 1996\ntitle: Our Son, the Matchm...",The main conflict revolves around Julie's past...,"The main conflict in the plot of ""Our Son, the...",1. The response provided does match the subjec...,1.0,1. The retrieved context matches the subject m...,3.0,The generated answer is relevant to the user q...,1.0,1.0,YES,0.5,,CONTEXT
586,1658,oll_gemma_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,271,"What is the main conflict in the plot of ""High...",release_year: 1996\ntitle: High School High\no...,The main conflict revolves around Richard Clar...,The main conflict in the plot revolves around ...,1. The response does not match the subject mat...,1.0,1. The retrieved context matches the subject m...,2.0,The generated answer is somewhat relevant as i...,1.0,0.0,NO,0.0,Assistant B provides a more accurate and relev...,CONTEXT


In [140]:
list_a = evaluation_df.columns.to_list()
list_a

['id',
 'llm_model',
 'embed_model',
 'collection_name',
 'question_id',
 'question',
 'context',
 'reference_answer',
 'response',
 'answer_feedback',
 'context_relevancy_score',
 'context_relevancy_feedback',
 'correctness_score',
 'correctness_feedback',
 'faithfulness_score',
 'relevancy_score',
 'relevancy_feedback',
 'model_score',
 'model_feedback',
 'chat_mode']

In [141]:
list_b = [
    'embed_model',
    'relevancy_feedback',
    'chat_mode'
]
list_b

['embed_model', 'relevancy_feedback', 'chat_mode']

In [142]:
# filter columns
# credit: https://stackoverflow.com/questions/55013992/python-subtract-list-of-strings-from-another-list-of-strings
columns_to_filter = [x for x in list_a if not x in list_b or list_b.remove(x)]
columns_to_filter

['id',
 'llm_model',
 'collection_name',
 'question_id',
 'question',
 'context',
 'reference_answer',
 'response',
 'answer_feedback',
 'context_relevancy_score',
 'context_relevancy_feedback',
 'correctness_score',
 'correctness_feedback',
 'faithfulness_score',
 'relevancy_score',
 'model_score',
 'model_feedback']

In [143]:
# display the number of filtered columns to check the number the filtered dataframe has
len(columns_to_filter)

17

In [144]:
# only display the filtered columns
evaluation_df_f = evaluation_df[columns_to_filter]
evaluation_df_f

Unnamed: 0,id,llm_model,collection_name,question_id,question,context,reference_answer,response,answer_feedback,context_relevancy_score,context_relevancy_feedback,correctness_score,correctness_feedback,faithfulness_score,relevancy_score,model_score,model_feedback
51,1123,oll_llama3_instruct,wiki_movie_plots_1024_100_mxbai,51,"Who is the director of the film ""We Are What W...",release_year: 2013\ntitle: We Are What We Are\...,"The director of the film ""We Are What We Are"" ...","According to the context information, Jim Mick...",1. Does the provided response match the subjec...,1.0,1. The retrieved context matches the subject m...,4.5,The generated answer is highly relevant to the...,1.0,1.0,0.0,Assistant A and Assistant B both correctly ide...
94,1166,oll_llama3_instruct,wiki_movie_plots_1024_100_mxbai,94,How does Melanie's priorities shift throughout...,release_year: 1996\ntitle: One Fine Day\norigi...,Melanie's priorities shift from focusing on he...,"According to the context information, Melanie'...",1. The response does not match the subject mat...,1.0,1. The retrieved context matches the subject m...,4.0,The generated answer provides a relevant and a...,1.0,1.0,0.5,
226,1298,oll_llama3_instruct,wiki_movie_plots_1024_100_mxbai,226,What role does Pyramid Head play in the climax...,release_year: 2012\ntitle: Silent Hill: Revela...,"Pyramid Head kills the Missionary, allowing He...","According to the context information, Pyramid ...",1. The response partially matches the subject ...,1.0,1. The retrieved context matches the subject m...,4.0,The generated answer provides relevant informa...,1.0,1.0,0.5,
228,1300,oll_llama3_instruct,wiki_movie_plots_1024_100_mxbai,228,"What is the main conflict in the plot of ""Our ...","release_year: 1996\ntitle: Our Son, the Matchm...",The main conflict revolves around Julie's past...,"According to the context information, the main...",1. The response does not match the subject mat...,1.0,1. The retrieved context matches the subject m...,2.0,The generated answer is somewhat relevant as i...,1.0,0.0,0.0,Assistant B provides a more accurate and relev...
271,1343,oll_llama3_instruct,wiki_movie_plots_1024_100_mxbai,271,"What is the main conflict in the plot of ""High...",release_year: 1996\ntitle: High School High\no...,The main conflict revolves around Richard Clar...,"According to the context information, the main...",1. The response does not match the subject mat...,1.0,1. The retrieved context matches the subject m...,2.0,The generated answer is somewhat relevant as i...,1.0,0.0,0.0,Assistant B provides a more accurate and relev...
366,1438,oll_gemma_instruct,wiki_movie_plots_1024_100_mxbai,51,"Who is the director of the film ""We Are What W...",release_year: 2013\ntitle: We Are What We Are\...,"The director of the film ""We Are What We Are"" ...","The director of the film ""We Are What We Are"" ...","1. Yes, the response matches the subject matte...",1.0,1. The retrieved context matches the subject m...,4.0,The generated answer is relevant and fully cor...,1.0,1.0,0.0,Assistant B provides a more informative respon...
409,1481,oll_gemma_instruct,wiki_movie_plots_1024_100_mxbai,94,How does Melanie's priorities shift throughout...,release_year: 1996\ntitle: One Fine Day\norigi...,Melanie's priorities shift from focusing on he...,Melanie's priorities shift throughout the movi...,1. The response does not directly address the ...,1.0,1. The retrieved context matches the subject m...,3.0,The generated answer is relevant to the user q...,1.0,1.0,0.0,Assistant B provides a more accurate and detai...
541,1613,oll_gemma_instruct,wiki_movie_plots_1024_100_mxbai,226,What role does Pyramid Head play in the climax...,release_year: 2012\ntitle: Silent Hill: Revela...,"Pyramid Head kills the Missionary, allowing He...",Pyramid Head is the main antagonist of the mov...,1. The response does not match the subject mat...,1.0,1. The retrieved context matches the subject m...,3.0,The generated answer is relevant to the movie ...,1.0,0.0,0.0,Assistant A's response is inaccurate as it sta...
543,1615,oll_gemma_instruct,wiki_movie_plots_1024_100_mxbai,228,"What is the main conflict in the plot of ""Our ...","release_year: 1996\ntitle: Our Son, the Matchm...",The main conflict revolves around Julie's past...,"The main conflict in the plot of ""Our Son, the...",1. The response provided does match the subjec...,1.0,1. The retrieved context matches the subject m...,3.0,The generated answer is relevant to the user q...,1.0,1.0,0.5,
586,1658,oll_gemma_instruct,wiki_movie_plots_1024_100_mxbai,271,"What is the main conflict in the plot of ""High...",release_year: 1996\ntitle: High School High\no...,The main conflict revolves around Richard Clar...,The main conflict in the plot revolves around ...,1. The response does not match the subject mat...,1.0,1. The retrieved context matches the subject m...,2.0,The generated answer is somewhat relevant as i...,1.0,0.0,0.0,Assistant B provides a more accurate and relev...


The size is matching so the filtering was successful

### Prepare Data for Excel

Retrieve all 5 Questions

In [145]:
get_5_rows_by_ids = evaluation_df[evaluation_df['question_id'].isin(get_ids_from_file)].head(5)

### Get Questions

In [146]:
for i in get_5_rows_by_ids['question'].unique():
    print(i)

Who is the director of the film "We Are What We Are"?
How does Melanie's priorities shift throughout the movie?
What role does Pyramid Head play in the climax of the movie?
What is the main conflict in the plot of "Our Son, the Matchmaker"?
What is the main conflict in the plot of "High School High"?


### Get Context

In [154]:
for i in get_5_rows_by_ids['context'].unique():
    print('-'*100)
    print(i)

----------------------------------------------------------------------------------------------------
release_year: 2013
title: We Are What We Are
origin_ethnicity: American
director: Jim Mickle
cast: Bill Sage, Julia Garner, Ambyr Childers, Odeya Rush
genre: horror
wiki_page: https://en.wikipedia.org/wiki/We_Are_What_We_Are_(2013_film)
plot: During a torrential downpour, a woman, later identified as Emma Parker, confusedly staggers into a store as the butcher receives a delivery. After several attempts to address her, she finally responds and explains that the foul weather has strongly affected her. The butcher says that it will get worse before it gets better, and she purchases groceries. As she leaves the store, she sees a poster that advertises missing teenage girls. Before she can reach her car, she begins bleeding from her mouth and loses consciousness after striking her head against a structure as she falls into a rain-filled ditch, where she drowns.
Later, the sheriff tells Fra

### Get Reference Answers

In [155]:
for i in get_5_rows_by_ids['reference_answer'].unique():
    print('-'*100)
    print(i)

----------------------------------------------------------------------------------------------------
The director of the film "We Are What We Are" is Jim Mickle, known for his work in the horror genre.
----------------------------------------------------------------------------------------------------
Melanie's priorities shift from focusing on her job to realizing that she cares more about her son than her career.
----------------------------------------------------------------------------------------------------
Pyramid Head kills the Missionary, allowing Heather to rescue Vincent and Harry.
----------------------------------------------------------------------------------------------------
The main conflict revolves around Julie's past relationships and her current engagement conflicting with her rekindled feelings for her son's father, Steve.
----------------------------------------------------------------------------------------------------
The main conflict revolves around Richar

## Get Question1 Data for all Models

In [149]:
question_1 = get_ids_from_file[0]
data_question_1 = evaluation_df[evaluation_df['question_id'] == question_1].sort_values(by=['llm_model'])
data_question_1

Unnamed: 0,id,llm_model,embed_model,collection_name,question_id,question,context,reference_answer,response,answer_feedback,context_relevancy_score,context_relevancy_feedback,correctness_score,correctness_feedback,faithfulness_score,relevancy_score,relevancy_feedback,model_score,model_feedback,chat_mode
366,1438,oll_gemma_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,51,"Who is the director of the film ""We Are What W...",release_year: 2013\ntitle: We Are What We Are\...,"The director of the film ""We Are What We Are"" ...","The director of the film ""We Are What We Are"" ...","1. Yes, the response matches the subject matte...",1.0,1. The retrieved context matches the subject m...,4.0,The generated answer is relevant and fully cor...,1.0,1.0,YES,0.0,Assistant B provides a more informative respon...,CONTEXT
1311,2383,oll_gemma_instruct,mxbai,wiki_movie_plots_512_50_mxbai,51,"Who is the director of the film ""We Are What W...",release_year: 2013\ntitle: We Are What We Are\...,"The director of the film ""We Are What We Are"" ...","The director of the film ""We Are What We Are"" ...",1. Does the provided response match the subjec...,1.0,1. The retrieved context matches the subject m...,4.5,The generated answer is highly relevant to the...,1.0,1.0,YES,0.0,Assistant B provides a slightly more informati...,CONTEXT
2256,3328,oll_gemma_instruct,mxbai,wiki_movie_plots_2048_200_mxbai,51,"Who is the director of the film ""We Are What W...",release_year: 2013\ntitle: We Are What We Are\...,"The director of the film ""We Are What We Are"" ...","The director of the film ""We Are What We Are"" ...",1. Does the provided response match the subjec...,1.0,1. The retrieved context matches the subject m...,4.5,The generated answer is highly relevant to the...,1.0,1.0,YES,0.0,Assistant B provides a more informative respon...,CONTEXT
51,1123,oll_llama3_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,51,"Who is the director of the film ""We Are What W...",release_year: 2013\ntitle: We Are What We Are\...,"The director of the film ""We Are What We Are"" ...","According to the context information, Jim Mick...",1. Does the provided response match the subjec...,1.0,1. The retrieved context matches the subject m...,4.5,The generated answer is highly relevant to the...,1.0,1.0,YES,0.0,Assistant A and Assistant B both correctly ide...,CONTEXT
996,2068,oll_llama3_instruct,mxbai,wiki_movie_plots_512_50_mxbai,51,"Who is the director of the film ""We Are What W...",release_year: 2013\ntitle: We Are What We Are\...,"The director of the film ""We Are What We Are"" ...","According to the context information provided,...","1. Yes, the response matches the subject matte...",1.0,1. The retrieved context matches the subject m...,4.5,The generated answer is highly relevant to the...,1.0,1.0,YES,0.0,Assistant B provides a slightly more informati...,CONTEXT
1941,3013,oll_llama3_instruct,mxbai,wiki_movie_plots_2048_200_mxbai,51,"Who is the director of the film ""We Are What W...",release_year: 2013\ntitle: We Are What We Are\...,"The director of the film ""We Are What We Are"" ...","According to the context information provided,...",1. Does the provided response match the subjec...,1.0,1. The retrieved context matches the subject m...,4.5,The generated answer is highly relevant to the...,1.0,1.0,YES,0.0,Assistant B provides a slightly more informati...,CONTEXT
681,1753,oll_mistral_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,51,"Who is the director of the film ""We Are What W...",release_year: 2013\ntitle: We Are What We Are\...,"The director of the film ""We Are What We Are"" ...","The director of the movie ""We Are What We Are...",1. The response provided matches the subject m...,1.0,1. The retrieved context matches the subject m...,3.5,The generated answer correctly identifies the ...,1.0,1.0,YES,0.5,,CONTEXT
1626,2698,oll_mistral_instruct,mxbai,wiki_movie_plots_512_50_mxbai,51,"Who is the director of the film ""We Are What W...",release_year: 2013\ntitle: We Are What We Are\...,"The director of the film ""We Are What We Are"" ...","The director of the film ""We Are What We Are""...",1. Does the provided response match the subjec...,1.0,1. The retrieved context matches the subject m...,4.0,The generated answer correctly identifies the ...,1.0,1.0,YES,0.5,,CONTEXT
2571,3643,oll_mistral_instruct,mxbai,wiki_movie_plots_2048_200_mxbai,51,"Who is the director of the film ""We Are What W...",release_year: 2013\ntitle: We Are What We Are\...,"The director of the film ""We Are What We Are"" ...","The director of the film ""We Are What We Are""...",1. The response does match the subject matter ...,1.0,1. The retrieved context matches the subject m...,3.0,The generated answer provides relevant informa...,1.0,1.0,YES,1.0,Assistant A provides a more detailed and compr...,CONTEXT


In [151]:
feedback_block(data_question_1)

####################################################################################################
gemma_1024_100
Reference Answer: The director of the film "We Are What We Are" is Jim Mickle, known for his work in the horror genre. 
------------------------------------------------
Model Answer:
The director of the film "We Are What We Are" is Jim Mickle
------------------------------------------------
Context Relevancy Score: 1.0
Correctness Score: 4.0
Faithfulness Score: 1.0
Relevancy Score: 1.0
Model Score: 0.0

####################################################################################################
gemma_512_50
Reference Answer: The director of the film "We Are What We Are" is Jim Mickle, known for his work in the horror genre. 
------------------------------------------------
Model Answer:
The director of the film "We Are What We Are" is Jim Mickle
------------------------------------------------
Context Relevancy Score: 1.0
Correctness Score: 4.5
Faithfulness Score:

## Get Question2 Data for all Models

In [156]:
question_2 = get_ids_from_file[2]
data_question_2 = evaluation_df[evaluation_df['question_id'] == question_2].sort_values(by=['llm_model'])
data_question_2

Unnamed: 0,id,llm_model,embed_model,collection_name,question_id,question,context,reference_answer,response,answer_feedback,context_relevancy_score,context_relevancy_feedback,correctness_score,correctness_feedback,faithfulness_score,relevancy_score,relevancy_feedback,model_score,model_feedback,chat_mode
409,1481,oll_gemma_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,94,How does Melanie's priorities shift throughout...,release_year: 1996\ntitle: One Fine Day\norigi...,Melanie's priorities shift from focusing on he...,Melanie's priorities shift throughout the movi...,1. The response does not directly address the ...,1.0,1. The retrieved context matches the subject m...,3.0,The generated answer is relevant to the user q...,1.0,1.0,YES,0.0,Assistant B provides a more accurate and detai...,CONTEXT
1354,2426,oll_gemma_instruct,mxbai,wiki_movie_plots_512_50_mxbai,94,How does Melanie's priorities shift throughout...,release_year: 1996\ntitle: One Fine Day\norigi...,Melanie's priorities shift from focusing on he...,Melanie's priorities shift from her career amb...,1. The response does not directly address the ...,1.0,1. The retrieved context matches the subject m...,4.0,The generated answer captures the essence of M...,1.0,1.0,YES,1.0,Assistant A provides a more detailed and compr...,CONTEXT
2299,3371,oll_gemma_instruct,mxbai,wiki_movie_plots_2048_200_mxbai,94,How does Melanie's priorities shift throughout...,release_year: 1996\ntitle: One Fine Day\norigi...,Melanie's priorities shift from focusing on he...,Melanie's priorities shift from her initial fo...,1. The response partially matches the subject ...,1.0,1. The retrieved context matches the subject m...,4.0,The generated answer captures the essence of M...,1.0,1.0,YES,0.5,,CONTEXT
94,1166,oll_llama3_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,94,How does Melanie's priorities shift throughout...,release_year: 1996\ntitle: One Fine Day\norigi...,Melanie's priorities shift from focusing on he...,"According to the context information, Melanie'...",1. The response does not match the subject mat...,1.0,1. The retrieved context matches the subject m...,4.0,The generated answer provides a relevant and a...,1.0,1.0,YES,0.5,,CONTEXT
1039,2111,oll_llama3_instruct,mxbai,wiki_movie_plots_512_50_mxbai,94,How does Melanie's priorities shift throughout...,release_year: 1996\ntitle: One Fine Day\norigi...,Melanie's priorities shift from focusing on he...,"According to the context information provided,...",1. The response does match the subject matter ...,1.0,1. The retrieved context matches the subject m...,4.5,The generated answer provides a comprehensive ...,1.0,1.0,YES,0.0,Assistant A provides a more detailed and compr...,CONTEXT
1984,3056,oll_llama3_instruct,mxbai,wiki_movie_plots_2048_200_mxbai,94,How does Melanie's priorities shift throughout...,release_year: 1996\ntitle: One Fine Day\norigi...,Melanie's priorities shift from focusing on he...,"According to the context information provided,...",1. The response does match the subject matter ...,1.0,1. The retrieved context matches the subject m...,4.0,The generated answer provides a relevant and a...,1.0,1.0,YES,1.0,Assistant A provides a more detailed and compr...,CONTEXT
724,1796,oll_mistral_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,94,How does Melanie's priorities shift throughout...,release_year: 1996\ntitle: One Fine Day\norigi...,Melanie's priorities shift from focusing on he...,"Throughout the movie ""One Fine Day,"" Melanie'...",1. The response provided does not directly add...,1.0,1. The retrieved context matches the subject m...,3.0,The generated answer is somewhat relevant as i...,1.0,0.0,NO,0.0,,CONTEXT
1669,2741,oll_mistral_instruct,mxbai,wiki_movie_plots_512_50_mxbai,94,How does Melanie's priorities shift throughout...,release_year: 1996\ntitle: One Fine Day\norigi...,Melanie's priorities shift from focusing on he...,"Throughout the movie ""One Fine Day,"" Melanie'...",1. Does the provided response match the subjec...,1.0,1. The retrieved context matches the subject m...,4.5,The generated answer accurately captures the e...,1.0,1.0,YES,1.0,Assistant A provides a detailed and comprehens...,CONTEXT
2614,3686,oll_mistral_instruct,mxbai,wiki_movie_plots_2048_200_mxbai,94,How does Melanie's priorities shift throughout...,release_year: 1996\ntitle: One Fine Day\norigi...,Melanie's priorities shift from focusing on he...,"In the movie ""One Fine Day,"" Melanie's priori...",1. The response does match the subject matter ...,1.0,1. The retrieved context matches the subject m...,3.5,The generated answer provides a detailed summa...,1.0,1.0,YES,0.5,,CONTEXT


In [157]:
feedback_block(data_question_2)

####################################################################################################
gemma_1024_100
Reference Answer: Melanie's priorities shift from focusing on her job to realizing that she cares more about her son than her career. 
------------------------------------------------
Model Answer:
Melanie's priorities shift throughout the movie as she balances her desire to pursue her dreams with her responsibilities as a mother and daughter
------------------------------------------------
Context Relevancy Score: 1.0
Correctness Score: 3.0
Faithfulness Score: 1.0
Relevancy Score: 1.0
Model Score: 0.0

####################################################################################################
gemma_512_50
Reference Answer: Melanie's priorities shift from focusing on her job to realizing that she cares more about her son than her career. 
------------------------------------------------
Model Answer:
Melanie's priorities shift from her career ambitions to priorit

## Get Question3 Data for all Models

In [161]:
question_3 = get_ids_from_file[4]
data_question_3 = evaluation_df[evaluation_df['question_id'] == question_3].sort_values(by=['llm_model'])
data_question_3

Unnamed: 0,id,llm_model,embed_model,collection_name,question_id,question,context,reference_answer,response,answer_feedback,context_relevancy_score,context_relevancy_feedback,correctness_score,correctness_feedback,faithfulness_score,relevancy_score,relevancy_feedback,model_score,model_feedback,chat_mode
541,1613,oll_gemma_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,226,What role does Pyramid Head play in the climax...,release_year: 2012\ntitle: Silent Hill: Revela...,"Pyramid Head kills the Missionary, allowing He...",Pyramid Head is the main antagonist of the mov...,1. The response does not match the subject mat...,1.0,1. The retrieved context matches the subject m...,3.0,The generated answer is relevant to the movie ...,1.0,0.0,NO,0.0,Assistant A's response is inaccurate as it sta...,CONTEXT
1486,2558,oll_gemma_instruct,mxbai,wiki_movie_plots_512_50_mxbai,226,What role does Pyramid Head play in the climax...,release_year: 2012\ntitle: Silent Hill: Revela...,"Pyramid Head kills the Missionary, allowing He...",Pyramid Head is revealed to be the guardian of...,1. The response does not directly address the ...,1.0,1. The retrieved context matches the subject m...,4.0,The generated answer is relevant and fully cor...,1.0,0.0,NO,0.0,Assistant A's response provides a brief and in...,CONTEXT
2431,3503,oll_gemma_instruct,mxbai,wiki_movie_plots_2048_200_mxbai,226,What role does Pyramid Head play in the climax...,release_year: 2012\ntitle: Silent Hill: Revela...,"Pyramid Head kills the Missionary, allowing He...",Pyramid Head is the final boss of the movie an...,1. The response does not match the subject mat...,1.0,1. The retrieved context matches the subject m...,4.0,The generated answer is relevant and fully cor...,1.0,0.0,NO,0.0,Assistant A's response is not accurate as it s...,CONTEXT
226,1298,oll_llama3_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,226,What role does Pyramid Head play in the climax...,release_year: 2012\ntitle: Silent Hill: Revela...,"Pyramid Head kills the Missionary, allowing He...","According to the context information, Pyramid ...",1. The response partially matches the subject ...,1.0,1. The retrieved context matches the subject m...,4.0,The generated answer provides relevant informa...,1.0,1.0,YES,0.5,,CONTEXT
1171,2243,oll_llama3_instruct,mxbai,wiki_movie_plots_512_50_mxbai,226,What role does Pyramid Head play in the climax...,release_year: 2012\ntitle: Silent Hill: Revela...,"Pyramid Head kills the Missionary, allowing He...","According to the context information provided,...",1. The response partially matches the subject ...,1.0,1. The retrieved context matches the subject m...,4.0,The generated answer provides a relevant summa...,1.0,1.0,YES,0.5,,CONTEXT
2116,3188,oll_llama3_instruct,mxbai,wiki_movie_plots_2048_200_mxbai,226,What role does Pyramid Head play in the climax...,release_year: 2012\ntitle: Silent Hill: Revela...,"Pyramid Head kills the Missionary, allowing He...","According to the context information provided,...",1. The response does not match the subject mat...,1.0,1. The retrieved context matches the subject m...,3.0,The generated answer is partially relevant as ...,1.0,0.0,NO,0.5,,CONTEXT
856,1928,oll_mistral_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,226,What role does Pyramid Head play in the climax...,release_year: 2012\ntitle: Silent Hill: Revela...,"Pyramid Head kills the Missionary, allowing He...","In the climax of Silent Hill: Revelation 3D, ...",1. The response provided does not match the sp...,1.0,1. The retrieved context matches the subject m...,3.0,The generated answer provides relevant informa...,1.0,0.0,NO,0.5,,CONTEXT
1801,2873,oll_mistral_instruct,mxbai,wiki_movie_plots_512_50_mxbai,226,What role does Pyramid Head play in the climax...,release_year: 2012\ntitle: Silent Hill: Revela...,"Pyramid Head kills the Missionary, allowing He...","In the climax of the movie, Pyramid Head play...",1. The response provided does match the subjec...,1.0,1. The retrieved context matches the subject m...,3.0,The generated answer provides relevant informa...,1.0,1.0,YES,1.0,Assistant A provides a detailed and relevant r...,CONTEXT
2746,3818,oll_mistral_instruct,mxbai,wiki_movie_plots_2048_200_mxbai,226,What role does Pyramid Head play in the climax...,release_year: 2012\ntitle: Silent Hill: Revela...,"Pyramid Head kills the Missionary, allowing He...","In the movie ""Silent Hill: Revelation 3D,"" Py...",1. The response provided discusses the charact...,1.0,1. The retrieved context matches the subject m...,3.0,The generated answer provides relevant informa...,1.0,1.0,YES,0.0,,CONTEXT


In [162]:
feedback_block(data_question_3)

####################################################################################################
gemma_1024_100
Reference Answer: Pyramid Head kills the Missionary, allowing Heather to rescue Vincent and Harry. 
------------------------------------------------
Model Answer:
Pyramid Head is the main antagonist of the movie and is responsible for capturing and torturing Heather
------------------------------------------------
Context Relevancy Score: 1.0
Correctness Score: 3.0
Faithfulness Score: 1.0
Relevancy Score: 0.0
Model Score: 0.0

####################################################################################################
gemma_512_50
Reference Answer: Pyramid Head kills the Missionary, allowing Heather to rescue Vincent and Harry. 
------------------------------------------------
Model Answer:
Pyramid Head is revealed to be the guardian of the portal to Silent Hill, and he attacks Heather in
------------------------------------------------
Context Relevancy Score: 1.

## Get Question4 Data for all Models

In [168]:
question_4 = get_ids_from_file[1]
data_question_4 = evaluation_df[evaluation_df['question_id'] == question_4].sort_values(by=['llm_model'])
data_question_4

Unnamed: 0,id,llm_model,embed_model,collection_name,question_id,question,context,reference_answer,response,answer_feedback,context_relevancy_score,context_relevancy_feedback,correctness_score,correctness_feedback,faithfulness_score,relevancy_score,relevancy_feedback,model_score,model_feedback,chat_mode
543,1615,oll_gemma_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,228,"What is the main conflict in the plot of ""Our ...","release_year: 1996\ntitle: Our Son, the Matchm...",The main conflict revolves around Julie's past...,"The main conflict in the plot of ""Our Son, the...",1. The response provided does match the subjec...,1.0,1. The retrieved context matches the subject m...,3.0,The generated answer is relevant to the user q...,1.0,1.0,YES,0.5,,CONTEXT
1488,2560,oll_gemma_instruct,mxbai,wiki_movie_plots_512_50_mxbai,228,"What is the main conflict in the plot of ""Our ...","release_year: 1996\ntitle: Our Son, the Matchm...",The main conflict revolves around Julie's past...,"The main conflict in the plot of ""Our Son, the...",1. The response does not match the subject mat...,1.0,1. The retrieved context matches the subject m...,2.0,The generated answer is somewhat relevant as i...,1.0,0.0,NO,0.0,Assistant B provides a more comprehensive and ...,CONTEXT
2433,3505,oll_gemma_instruct,mxbai,wiki_movie_plots_2048_200_mxbai,228,"What is the main conflict in the plot of ""Our ...","release_year: 1996\ntitle: Our Son, the Matchm...",The main conflict revolves around Julie's past...,"The main conflict in the plot of ""Our Son, the...",1. Does the provided response match the subjec...,1.0,1. The retrieved context matches the subject m...,3.0,The generated answer is relevant to the user q...,1.0,1.0,YES,0.5,,CONTEXT
228,1300,oll_llama3_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,228,"What is the main conflict in the plot of ""Our ...","release_year: 1996\ntitle: Our Son, the Matchm...",The main conflict revolves around Julie's past...,"According to the context information, the main...",1. The response does not match the subject mat...,1.0,1. The retrieved context matches the subject m...,2.0,The generated answer is somewhat relevant as i...,1.0,0.0,NO,0.0,Assistant B provides a more accurate and relev...,CONTEXT
1173,2245,oll_llama3_instruct,mxbai,wiki_movie_plots_512_50_mxbai,228,"What is the main conflict in the plot of ""Our ...","release_year: 1996\ntitle: Our Son, the Matchm...",The main conflict revolves around Julie's past...,"According to the context information provided,...",1. The response does not match the subject mat...,1.0,1. The retrieved context matches the subject m...,3.0,The generated answer is relevant as it mention...,0.0,0.0,NO,0.0,Assistant B provides a more accurate and relev...,CONTEXT
2118,3190,oll_llama3_instruct,mxbai,wiki_movie_plots_2048_200_mxbai,228,"What is the main conflict in the plot of ""Our ...","release_year: 1996\ntitle: Our Son, the Matchm...",The main conflict revolves around Julie's past...,"According to the context information provided,...",1. The response partially matches the subject ...,1.0,1. The retrieved context matches the subject m...,3.0,The generated answer is relevant to the user q...,1.0,0.0,NO,0.5,,CONTEXT
858,1930,oll_mistral_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,228,"What is the main conflict in the plot of ""Our ...","release_year: 1996\ntitle: Our Son, the Matchm...",The main conflict revolves around Julie's past...,"The main conflict in the plot of ""Our Son, th...",1. The response provided does match the subjec...,0.5,1. The retrieved context does not directly add...,4.0,The generated answer provides a relevant summa...,1.0,0.0,NO,1.0,Assistant A provides a detailed and accurate r...,CONTEXT
1803,2875,oll_mistral_instruct,mxbai,wiki_movie_plots_512_50_mxbai,228,"What is the main conflict in the plot of ""Our ...","release_year: 1996\ntitle: Our Son, the Matchm...",The main conflict revolves around Julie's past...,"The main conflict in the plot of ""Our Son, th...",1. Does the provided response match the subjec...,1.0,1. The retrieved context matches the subject m...,4.0,The generated answer provides a relevant summa...,1.0,1.0,YES,0.5,,CONTEXT
2748,3820,oll_mistral_instruct,mxbai,wiki_movie_plots_2048_200_mxbai,228,"What is the main conflict in the plot of ""Our ...","release_year: 1996\ntitle: Our Son, the Matchm...",The main conflict revolves around Julie's past...,"In the movie ""Our Son, the Matchmaker,"" the m...",1. The response does not match the subject mat...,1.0,1. The retrieved context matches the subject m...,2.5,The generated answer is partially relevant as ...,1.0,0.0,NO,0.0,Assistant B provides a more accurate and relev...,CONTEXT


In [169]:
feedback_block(data_question_4)

####################################################################################################
gemma_1024_100
Reference Answer: The main conflict revolves around Julie's past relationships and her current engagement conflicting with her rekindled feelings for her son's father, Steve. 
------------------------------------------------
Model Answer:
The main conflict in the plot of "Our Son, the Matchmaker" revolves around Julie's reunion with her son, Scott, and her reconciliation with her ex-lover, Steve
------------------------------------------------
Context Relevancy Score: 1.0
Correctness Score: 3.0
Faithfulness Score: 1.0
Relevancy Score: 1.0
Model Score: 0.5

####################################################################################################
gemma_512_50
Reference Answer: The main conflict revolves around Julie's past relationships and her current engagement conflicting with her rekindled feelings for her son's father, Steve. 
-------------------------------

## Get Question5 Data for all Models

In [171]:
question_5 = get_ids_from_file[3]
data_question_5 = evaluation_df[evaluation_df['question_id'] == question_5].sort_values(by=['llm_model'])
data_question_5

Unnamed: 0,id,llm_model,embed_model,collection_name,question_id,question,context,reference_answer,response,answer_feedback,context_relevancy_score,context_relevancy_feedback,correctness_score,correctness_feedback,faithfulness_score,relevancy_score,relevancy_feedback,model_score,model_feedback,chat_mode
586,1658,oll_gemma_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,271,"What is the main conflict in the plot of ""High...",release_year: 1996\ntitle: High School High\no...,The main conflict revolves around Richard Clar...,The main conflict in the plot revolves around ...,1. The response does not match the subject mat...,1.0,1. The retrieved context matches the subject m...,2.0,The generated answer is somewhat relevant as i...,1.0,0.0,NO,0.0,Assistant B provides a more accurate and relev...,CONTEXT
1531,2603,oll_gemma_instruct,mxbai,wiki_movie_plots_512_50_mxbai,271,"What is the main conflict in the plot of ""High...",release_year: 1996\ntitle: High School High\no...,The main conflict revolves around Richard Clar...,"The main conflict in the plot of ""High School ...",1. The response does not match the subject mat...,1.0,1. The retrieved context matches the subject m...,2.0,The generated answer is somewhat relevant as i...,1.0,0.0,NO,0.0,Assistant B provides a much more accurate and ...,CONTEXT
2476,3548,oll_gemma_instruct,mxbai,wiki_movie_plots_2048_200_mxbai,271,"What is the main conflict in the plot of ""High...",release_year: 1996\ntitle: High School High\no...,The main conflict revolves around Richard Clar...,The main conflict in the plot is the conflict ...,1. The response does not match the subject mat...,1.0,1. The retrieved context matches the subject m...,2.0,The generated answer is somewhat relevant as i...,1.0,0.0,NO,0.0,Assistant B provides a more accurate and detai...,CONTEXT
271,1343,oll_llama3_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,271,"What is the main conflict in the plot of ""High...",release_year: 1996\ntitle: High School High\no...,The main conflict revolves around Richard Clar...,"According to the context information, the main...",1. The response does not match the subject mat...,1.0,1. The retrieved context matches the subject m...,2.0,The generated answer is somewhat relevant as i...,1.0,0.0,NO,0.0,Assistant B provides a more accurate and relev...,CONTEXT
1216,2288,oll_llama3_instruct,mxbai,wiki_movie_plots_512_50_mxbai,271,"What is the main conflict in the plot of ""High...",release_year: 1996\ntitle: High School High\no...,The main conflict revolves around Richard Clar...,"According to the context information provided,...",1. The response does not match the subject mat...,1.0,1. The retrieved context matches the subject m...,2.0,The generated answer is somewhat relevant as i...,1.0,0.0,NO,0.0,Assistant B provides a more accurate and relev...,CONTEXT
2161,3233,oll_llama3_instruct,mxbai,wiki_movie_plots_2048_200_mxbai,271,"What is the main conflict in the plot of ""High...",release_year: 1996\ntitle: High School High\no...,The main conflict revolves around Richard Clar...,"According to the context information provided,...",1. The response does not match the subject mat...,1.0,1. The retrieved context matches the subject m...,4.5,The generated answer is highly relevant to the...,1.0,0.0,NO,0.0,Assistant B provided a more accurate and relev...,CONTEXT
901,1973,oll_mistral_instruct,mxbai,wiki_movie_plots_1024_100_mxbai,271,"What is the main conflict in the plot of ""High...",release_year: 1996\ntitle: High School High\no...,The main conflict revolves around Richard Clar...,"The main conflict in the plot of ""High School...",1. The provided response does not match the su...,1.0,1. The retrieved context matches the subject m...,3.0,The generated answer is relevant to the movie ...,1.0,0.0,NO,0.0,Assistant B provides a more accurate and relev...,CONTEXT
1846,2918,oll_mistral_instruct,mxbai,wiki_movie_plots_512_50_mxbai,271,"What is the main conflict in the plot of ""High...",release_year: 1996\ntitle: High School High\no...,The main conflict revolves around Richard Clar...,"The main conflict in the plot of ""High School...","1. No, the provided response does not match th...",1.0,1. The retrieved context matches the subject m...,4.0,The generated answer provides a relevant summa...,1.0,0.0,NO,0.0,Assistant B provides a more accurate and relev...,CONTEXT
2791,3863,oll_mistral_instruct,mxbai,wiki_movie_plots_2048_200_mxbai,271,"What is the main conflict in the plot of ""High...",release_year: 1996\ntitle: High School High\no...,The main conflict revolves around Richard Clar...,"In the movie ""High School High,"" the main con...",1. The response does not match the subject mat...,1.0,1. The retrieved context matches the subject m...,2.5,The generated answer is somewhat relevant as i...,1.0,0.0,NO,0.0,Assistant B provides a more accurate and relev...,CONTEXT


In [172]:
feedback_block(data_question_5)

####################################################################################################
gemma_1024_100
Reference Answer: The main conflict revolves around Richard Clark, a prep school teacher, trying to transform the inner-city Marion Barry High School while facing opposition from a gang leader and dealing with tampered exam scores. 
------------------------------------------------
Model Answer:
The main conflict in the plot revolves around Will's attempt to balance his newfound love for basketball with his academic responsibilities
------------------------------------------------
Context Relevancy Score: 1.0
Correctness Score: 2.0
Faithfulness Score: 1.0
Relevancy Score: 0.0
Model Score: 0.0

####################################################################################################
gemma_512_50
Reference Answer: The main conflict revolves around Richard Clark, a prep school teacher, trying to transform the inner-city Marion Barry High School while facing opposit