In [9]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px

In [10]:


def load_excel_to_dataframe(file_path):
    # Load the Excel file into a pandas DataFrame
    df = pd.read_excel(file_path)
    return df

# Example usage:
# dataframe = load_excel_to_dataframe('your_file_path.xlsx')

In [7]:
def visualize_gemini_evaluation(file_path):
    # Load the data
    df = load_excel_to_dataframe(file_path)

    # Count the occurrences of each evaluation score
    value_counts = df['Column1.gemini_evaluation'].value_counts().sort_index()

    # Define pastel colors
    pastel_colors = ['#FFB3BA', '#BAFFC9', '#BAE1FF', '#FFDFBA']

    # Create a bar chart
    fig = go.Figure(data=[
        go.Bar(
            x=value_counts.index,
            y=value_counts.values,
            marker_color=pastel_colors,
            text=value_counts.values,
            textposition='outside'
        )
    ])

    # Update layout for better visualization
    fig.update_layout(
        title={
            'text': 'Distribution of Gemini Evaluation Scores',
            'font': {'size': 24, 'color': '#4a4a4a'}
        },
        xaxis_title='Gemini Evaluation Score',
        yaxis_title='Count',
        xaxis=dict(tickmode='linear', tick0=1, dtick=1),
        plot_bgcolor='rgba(0,0,0,0)',
        width=800,
        height=500,
        margin=dict(l=50, r=50, t=80, b=50)
    )

    # Add a subtle grid to the y-axis
    fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#e0e0e0')

    # Show the plot
    fig.show()

# Example usage:
# visualize_gemini_evaluation('path_to_your_file.xlsx')

In [11]:
visualize_gemini_evaluation('D:\\Dissertation - City, Univeristy of London\\Evaluating-AI-Learning-Assistants\\test output\\run1_gpt3-5_businessAnalyst.xlsx')

In [35]:
import pandas as pd
import os
import re

def process_and_save_file(file_path):
    df = pd.read_excel(file_path)
    
    # Remove 'Column1.' prefix from column names
    df.columns = [col.replace('Column1.', '') for col in df.columns]
    
    # Standardize the 'hit' column based on 'hitRelevance'
    df['hit'] = df['hitRelevance'].apply(lambda x: True if x >= 0.5 else False)
    
    # Extract metadata from filename
    filename = os.path.basename(file_path)
    match = re.search(r'run(\d+)_(gpt3-5|gpt4o)_(?i)(businessAnalyst|developer|static|tester)', filename)
    if match:
        df['run'] = match.group(1)
        df['model'] = match.group(2)
        df['persona'] = match.group(3).lower()
    else:
        df['run'] = 'unknown'
        df['model'] = 'unknown'
        df['persona'] = 'unknown'
        print(f"Warning: Couldn't parse metadata for file {file_path}")
    
    # Save the modified DataFrame back to the Excel file
    df.to_excel(file_path, index=False)
    
    return df

def process_all_files(file_paths):
    all_data = []
    for file_path in file_paths:
        df = process_and_save_file(file_path)
        all_data.append(df)
    
    return pd.concat(all_data, ignore_index=True)

# List of file paths
file_paths = [
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt3-5_businessAnalyst.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt3-5_developer.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt3-5_static.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt3-5_tester.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt4o_businessAnalyst.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt4o_developer.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt4o_static.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt4o_tester.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt3-5_businessAnalyst.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt3-5_developer.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt3-5_static.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt3-5_tester.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt4o_businessAnalyst.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt4o_developer.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt4o_Static.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt4o_tester.xlsx"
]

# Process all files and get combined data
combined_df = process_all_files(file_paths)

print("All files have been processed and updated.")

# Print summary of processed data
print("\nSummary of processed data:")
print(combined_df.groupby(['run', 'model', 'persona', 'hit']).size().reset_index(name='count'))


Pandas requires version '1.4.3' or newer of 'xlsxwriter' (version '1.2.9' currently installed).


Pandas requires version '1.4.3' or newer of 'xlsxwriter' (version '1.2.9' currently installed).


Pandas requires version '1.4.3' or newer of 'xlsxwriter' (version '1.2.9' currently installed).


Pandas requires version '1.4.3' or newer of 'xlsxwriter' (version '1.2.9' currently installed).


Pandas requires version '1.4.3' or newer of 'xlsxwriter' (version '1.2.9' currently installed).


Pandas requires version '1.4.3' or newer of 'xlsxwriter' (version '1.2.9' currently installed).


Pandas requires version '1.4.3' or newer of 'xlsxwriter' (version '1.2.9' currently installed).


Pandas requires version '1.4.3' or newer of 'xlsxwriter' (version '1.2.9' currently installed).


Pandas requires version '1.4.3' or newer of 'xlsxwriter' (version '1.2.9' currently installed).


Pandas requires version '1.4.3' or newer of 'xlsxwriter' (version '1.2.9' currently installed).


Pandas requires ver

All files have been processed and updated.

Summary of processed data:
   run   model          persona    hit  count
0    1  gpt3-5  businessanalyst  False     21
1    1  gpt3-5  businessanalyst   True     79
2    1  gpt3-5        developer  False      4
3    1  gpt3-5        developer   True     96
4    1  gpt3-5           static  False      9
5    1  gpt3-5           static   True     90
6    1  gpt3-5           tester  False     10
7    1  gpt3-5           tester   True     90
8    1   gpt4o  businessanalyst  False     29
9    1   gpt4o  businessanalyst   True     71
10   1   gpt4o        developer  False      7
11   1   gpt4o        developer   True     93
12   1   gpt4o           static  False     11
13   1   gpt4o           static   True     88
14   1   gpt4o           tester  False     20
15   1   gpt4o           tester   True     80
16   2  gpt3-5  businessanalyst  False     16
17   2  gpt3-5  businessanalyst   True     84
18   2  gpt3-5        developer  False      3
19   2  g

In [36]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import os

# Function to load all Excel files
def load_files(file_paths):
    dataframes = []
    for file_path in file_paths:
        df = pd.read_excel(file_path)
        # Remove 'Column1.' prefix from column names
        df.columns = [col.replace('Column1.', '') for col in df.columns]
        # Add filename as a column
        df['filename'] = os.path.basename(file_path)
        dataframes.append(df)
    return pd.concat(dataframes, ignore_index=True)

# Load all files
file_paths = [
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt3-5_businessAnalyst.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt3-5_developer.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt3-5_static.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt3-5_tester.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt4o_businessAnalyst.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt4o_developer.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt4o_static.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt4o_tester.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt3-5_businessAnalyst.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt3-5_developer.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt3-5_static.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt3-5_tester.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt4o_businessAnalyst.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt4o_developer.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt4o_static.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt4o_tester.xlsx"
]

df = load_files(file_paths)

# Set a consistent color palette and theme
color_palette = px.colors.qualitative.Pastel
theme_layout = dict(
    font=dict(family="Arial", size=12),
    plot_bgcolor='rgba(240,240,240,0.8)',
    paper_bgcolor='rgba(240,240,240,0.8)',
    title_font=dict(size=20, color="#333333"),
    legend_title_font=dict(size=14),
    legend_font=dict(size=12),
)

# 1. Number of hits (True/False)
def plot_hit_counts():
    hit_counts = df.groupby('filename')['hit'].value_counts().unstack()
    fig = px.bar(hit_counts, barmode='group', color_discrete_sequence=color_palette)
    fig.update_layout(
        title="Number of Hits by File",
        xaxis_title="Filename",
        yaxis_title="Count",
        legend_title="Hit",
        **theme_layout
    )
    fig.show()

# 2. HitRelevance box plots
def plot_hit_relevance_boxplots():
    fig = px.box(df, y='hitRelevance', x='filename', color='filename',
                 color_discrete_sequence=color_palette)
    fig.update_layout(title="Hit Relevance Distribution by File", **theme_layout)
    fig.show()

# 3. Gemini Score
def plot_gemini_score():
    fig = px.histogram(df, x='gemini_evaluation', color='filename',
                       color_discrete_sequence=color_palette)
    fig.update_layout(title="Gemini Score Distribution", **theme_layout)
    fig.show()

# 4. Follow up on topic
def plot_follow_up_on_topic():
    df['follow_up_on_topic'] = df['follow_up_on_topic'].str.contains('yes', case=False)
    follow_up_counts = df.groupby('filename')['follow_up_on_topic'].value_counts().unstack()
    fig = px.bar(follow_up_counts, barmode='group', color_discrete_sequence=color_palette)
    fig.update_layout(
        title="Follow-up on Topic by File",
        xaxis_title="Filename",
        yaxis_title="Count",
        legend_title="Follow-up on Topic",
        **theme_layout
    )
    fig.show()

# 5. HitRelevance vs Gemini score
def plot_hit_relevance_vs_gemini():
    fig = px.scatter(df, x='hitRelevance', y='gemini_evaluation', color='filename',
                     color_discrete_sequence=color_palette)
    fig.update_layout(title="Hit Relevance vs Gemini Score", **theme_layout)
    fig.show()

# 6. Average HitRelevance
def plot_avg_hit_relevance():
    avg_hit_relevance = df.groupby('filename')['hitRelevance'].mean().reset_index()
    fig = px.bar(avg_hit_relevance, x='filename', y='hitRelevance',
                 color_discrete_sequence=color_palette)
    fig.update_layout(title="Average Hit Relevance by File", **theme_layout)
    fig.show()

# 7. Unique questions generated in each file
def plot_unique_questions():
    unique_questions = df.groupby('filename')['question'].nunique().reset_index()
    fig = px.bar(unique_questions, x='filename', y='question',
                 color_discrete_sequence=color_palette)
    fig.update_layout(
        title="Unique Questions Generated by File",
        xaxis_title="Filename",
        yaxis_title="Number of Unique Questions",
        **theme_layout
    )
    fig.show()

# 8. Unique summaries counts compared
def plot_unique_summaries():
    unique_summaries = df.groupby('filename')['summary'].nunique().reset_index()
    fig = px.bar(unique_summaries, x='filename', y='summary',
                 color_discrete_sequence=color_palette)
    fig.update_layout(
        title="Unique Summaries Count by File",
        xaxis_title="Filename",
        yaxis_title="Number of Unique Summaries",
        **theme_layout
    )
    fig.show()

# Generate all visualizations
plot_hit_counts()
plot_hit_relevance_boxplots()
plot_gemini_score()
plot_follow_up_on_topic()
plot_hit_relevance_vs_gemini()
plot_avg_hit_relevance()
plot_unique_questions()
plot_unique_summaries()

In [32]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import os
import re

# Function to load all Excel files
def load_files(file_paths):
    dataframes = []
    for file_path in file_paths:
        df = pd.read_excel(file_path)
        # Remove 'Column1.' prefix from column names
        df.columns = [col.replace('Column1.', '') for col in df.columns]
        # Add filename as a column
        df['filename'] = os.path.basename(file_path)
        
        # Extract metadata from filename
        match = re.search(r'run(\d+)_(gpt3-5|gpt4o)_(businessAnalyst|developer|static|tester)', df['filename'][0])
        if match:
            df['run'] = match.group(1)
            df['model'] = match.group(2)
            df['persona'] = match.group(3)
        else:
            df['run'] = 'unknown'
            df['model'] = 'unknown'
            df['persona'] = 'unknown'
        
        dataframes.append(df)
    return pd.concat(dataframes, ignore_index=True)

# Load all files
file_paths = [
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt3-5_businessAnalyst.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt3-5_developer.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt3-5_static.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt3-5_tester.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt4o_businessAnalyst.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt4o_developer.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt4o_static.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt4o_tester.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt3-5_businessAnalyst.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt3-5_developer.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt3-5_static.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt3-5_tester.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt4o_businessAnalyst.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt4o_developer.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt4o_static.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt4o_tester.xlsx"
]

df = load_files(file_paths)

# Set a consistent color palette and theme
color_palette = px.colors.qualitative.Pastel
theme_layout = dict(
    font=dict(family="Arial", size=12),
    plot_bgcolor='rgba(240,240,240,0.8)',
    paper_bgcolor='rgba(240,240,240,0.8)',
    title_font=dict(size=20, color="#333333"),
)

def plot_hit_counts():
    hit_counts = df.groupby(['model', 'run'])['hit'].value_counts().unstack()
    
    # Reset the index to flatten the MultiIndex
    hit_counts_flat = hit_counts.reset_index()
    
    # Create a new column combining 'model' and 'run'
    hit_counts_flat['model_run'] = hit_counts_flat['model'] + ' - ' + hit_counts_flat['run']
    
    fig = px.bar(hit_counts_flat, x='model_run', y=[True, False], barmode='group', 
                 color_discrete_sequence=color_palette)
    
    fig.update_layout(
        title="Number of Hits by Model and Run",
        xaxis_title="Model - Run",
        yaxis_title="Count",
        legend_title="Hit",
        **theme_layout
    )
    fig.show()

# 2. HitRelevance box plots
def plot_hit_relevance_boxplots():
    fig = px.box(df, y='hitRelevance', x='model', color='model', facet_col='run',
                 color_discrete_sequence=color_palette)
    fig.update_layout(title="Hit Relevance Distribution by Model and Run", **theme_layout)
    fig.show()

# 3. Gemini Score
def plot_gemini_score():
    fig = px.histogram(df, x='gemini_evaluation', color='model', facet_col='run',
                       color_discrete_sequence=color_palette)
    fig.update_layout(title="Gemini Score Distribution by Model and Run", **theme_layout)
    fig.show()

# 4. Follow up on topic
def plot_follow_up_on_topic():
    df['follow_up_on_topic'] = df['follow_up_on_topic'].str.contains('yes', case=False)
    follow_up_counts = df.groupby(['model', 'run'])['follow_up_on_topic'].value_counts().unstack()
    
    fig = px.bar(follow_up_counts, barmode='group', color_discrete_sequence=color_palette)
    fig.update_layout(
        title="Follow-up on Topic by Model and Run",
        xaxis_title="Model",
        yaxis_title="Count",
        legend_title="Follow-up on Topic",
        **theme_layout
    )
    fig.show()

# 5. HitRelevance vs Gemini score
def plot_hit_relevance_vs_gemini():
    fig = px.scatter(df, x='hitRelevance', y='gemini_evaluation', color='model',
                     facet_col='run', color_discrete_sequence=color_palette)
    
    fig.update_layout(title="Hit Relevance vs Gemini Score by Model and Run", **theme_layout)
    fig.show()

# 6. Average HitRelevance
def plot_avg_hit_relevance():
    avg_hit_relevance = df.groupby(['model', 'run'])['hitRelevance'].mean().reset_index()
    
    fig = px.bar(avg_hit_relevance, x='model', y='hitRelevance',
                 color='run', color_discrete_sequence=color_palette)
    
    fig.update_layout(title="Average Hit Relevance by Model and Run", **theme_layout)
    fig.show()

# 7. Unique questions generated in each file
def plot_unique_questions():
    unique_questions = df.groupby(['model', 'run'])['question'].nunique().reset_index()
    
    fig = px.bar(unique_questions, x='model', y='question',
                 color='run', color_discrete_sequence=color_palette)
    
    fig.update_layout(
        title="Unique Questions Generated by Model and Run",
        xaxis_title="Model",
        yaxis_title="Number of Unique Questions",
        **theme_layout
    )
    
    fig.show()

# 8. Unique summaries counts compared
def plot_unique_summaries():
    unique_summaries = df.groupby(['model', 'run'])['summary'].nunique().reset_index()
    
    fig = px.bar(unique_summaries, x='model', y='summary',
                 color='run', color_discrete_sequence=color_palette)
    
    fig.update_layout(
        title="Unique Summaries Count by Model and Run",
        xaxis_title="Model",
        yaxis_title="Number of Unique Summaries",
        **theme_layout
    )
    
    fig.show()

# Generate all visualizations
plot_hit_counts()
plot_hit_relevance_boxplots()
plot_gemini_score()
plot_follow_up_on_topic()
plot_hit_relevance_vs_gemini()
plot_avg_hit_relevance()
plot_unique_questions()
plot_unique_summaries()

TypeError: Data frame index is a pandas MultiIndex. pandas MultiIndex is not supported by plotly express at the moment.

In [29]:
import pandas as pd
import os
import re

def load_files(file_paths):
    unknown_files = []
    dataframes = []
    for file_path in file_paths:
        df = pd.read_excel(file_path)
        # Remove 'Column1.' prefix from column names
        df.columns = [col.replace('Column1.', '') for col in df.columns]
        # Add filename as a column
        df['filename'] = os.path.basename(file_path)
        
        # Extract metadata from filename
        match = re.search(r'run(\d+)_(gpt3-5|gpt4o)_(businessAnalyst|developer|static|tester)', df['filename'][0])
        if match:
            df['run'] = match.group(1)
            df['model'] = match.group(2)
            df['persona'] = match.group(3)
        else:
            df['run'] = 'unknown'
            df['model'] = 'unknown'
            df['persona'] = 'unknown'
            unknown_files.append(file_path)
        
        dataframes.append(df)
    
    combined_df = pd.concat(dataframes, ignore_index=True)
    return combined_df, unknown_files

# List of file paths
file_paths = [
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt3-5_businessAnalyst.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt3-5_developer.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt3-5_static.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt3-5_tester.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt4o_businessAnalyst.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt4o_developer.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt4o_static.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run1_gpt4o_tester.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt3-5_businessAnalyst.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt3-5_developer.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt3-5_static.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt3-5_tester.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt4o_businessAnalyst.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt4o_developer.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt4o_static.xlsx",
    r"D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt4o_tester.xlsx"
]

# Load files and get unknown files
df, unknown_files = load_files(file_paths)

# Print paths of unknown files
if unknown_files:
    print("The following files could not be properly parsed:")
    for file_path in unknown_files:
        print(file_path)
else:
    print("All files were successfully parsed.")

# Print summary of parsed data
print("\nSummary of parsed data:")
print(df.groupby(['run', 'model', 'persona']).size().reset_index(name='count'))

The following files could not be properly parsed:
D:\Dissertation - City, Univeristy of London\Evaluating-AI-Learning-Assistants\test output\run2_gpt4o_Static.xlsx

Summary of parsed data:
        run    model          persona  count
0         1   gpt3-5  businessAnalyst    100
1         1   gpt3-5        developer    100
2         1   gpt3-5           static     99
3         1   gpt3-5           tester    100
4         1    gpt4o  businessAnalyst    100
5         1    gpt4o        developer    100
6         1    gpt4o           static     99
7         1    gpt4o           tester    100
8         2   gpt3-5  businessAnalyst    100
9         2   gpt3-5        developer    100
10        2   gpt3-5           static     99
11        2   gpt3-5           tester    100
12        2    gpt4o  businessAnalyst    100
13        2    gpt4o        developer    100
14        2    gpt4o           tester    100
15  unknown  unknown          unknown     99


In [14]:


def load_excel_to_dataframe(file_path):
    return pd.read_excel(file_path)

def visualize_hit_relevance_vs_gemini_evaluation(file_path):
    # Load the data
    df = load_excel_to_dataframe(file_path)

    # Create a color scale
    colors = px.colors.sequential.Viridis

    # Create the scatter plot
    fig = go.Figure(data=go.Scatter(
        x=df['Column1.hitRelevance'],
        y=df['Column1.gemini_evaluation'],
        mode='markers',
        marker=dict(
            size=10,
            color=df['Column1.gemini_evaluation'],
            colorscale=colors,
            colorbar=dict(title="Gemini Evaluation"),
            showscale=True
        ),
        text=df['Column1.gemini_evaluation'],
        hoverinfo='text'
    ))

    # Update layout for better visualization
    fig.update_layout(
        title={
            'text': 'Hit Relevance vs Gemini Evaluation',
            'font': {'size': 24, 'color': '#4a4a4a'}
        },
        xaxis_title='Hit Relevance (Cosine Similarity)',
        yaxis_title='Gemini Evaluation Score',
        xaxis=dict(tickformat='.2f'),
        yaxis=dict(tickmode='linear', tick0=1, dtick=1),
        plot_bgcolor='rgba(240,240,240,0.8)',
        width=900,
        height=600,
        margin=dict(l=60, r=60, t=80, b=60)
    )

    # Add a subtle grid
    fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='white')
    fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='white')

    # Add trendline
    fig.add_trace(go.Scatter(
        x=df['Column1.hitRelevance'],
        y=df['Column1.hitRelevance'].map(lambda x: x * 4),  # Assuming max Gemini score is 4
        mode='lines',
        name='Perfect Correlation',
        line=dict(color='rgba(255,0,0,0.5)', dash='dash')
    ))

    # Show the plot
    fig.show()


visualize_hit_relevance_vs_gemini_evaluation('D:\\Dissertation - City, Univeristy of London\\Evaluating-AI-Learning-Assistants\\test output\\run1_gpt3-5_businessAnalyst.xlsx')

In [16]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np

def visualize_hit_relevance_vs_gemini_evaluation_heatmap(file_path):
    df = pd.read_excel(file_path)
    
    # Create bins for hit relevance and gemini evaluation
    hit_relevance_bins = pd.cut(df['Column1.hitRelevance'], bins=10)
    gemini_eval_bins = pd.cut(df['Column1.gemini_evaluation'], bins=4)
    
    # Create a 2D histogram
    heatmap_data = pd.crosstab(hit_relevance_bins, gemini_eval_bins)
    
    # Create the heatmap
    fig = go.Figure(data=go.Heatmap(
        z=heatmap_data.values,
        x=gemini_eval_bins.categories.mid,
        y=hit_relevance_bins.categories.mid,
        colorscale='Viridis'
    ))
    
    fig.update_layout(
        title='Hit Relevance vs Gemini Evaluation Heatmap',
        xaxis_title='Gemini Evaluation Score',
        yaxis_title='Hit Relevance (Cosine Similarity)',
        width=800,
        height=600
    )
    
    fig.show()

# Usage:
visualize_hit_relevance_vs_gemini_evaluation('D:\\Dissertation - City, Univeristy of London\\Evaluating-AI-Learning-Assistants\\test output\\run1_gpt3-5_businessAnalyst.xlsx')

In [20]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

# Generate mock data
np.random.seed(42)
data = {
    'HitRelevance': np.random.choice([True, False], size=100, p=[0.7, 0.3]),
    'RelevanceScore': np.random.uniform(0, 1, size=100),
    'GeminiEvaluation': np.random.uniform(50, 100, size=100),
    'FollowUpQuestion': np.random.choice(['Yes', 'No'], size=100, p=[0.6, 0.4])
}

# Create a DataFrame
mock_data = pd.DataFrame(data)

# Save to Excel
mock_data.to_excel("mock_data.xlsx", index=False)

# Load the mock data (simulating loading from Excel)
mock_data = pd.read_excel("mock_data.xlsx")

# 1. Average Hit Relevance Visualization
avg_relevance = mock_data['RelevanceScore'].mean()
fig1 = px.bar(x=['Average Relevance'], y=[avg_relevance], title='Average Hit Relevance',
              labels={'x': 'Metric', 'y': 'Score'},
              color_discrete_sequence=['#6C5B7B'])
fig1.update_layout(
    title_font_size=24, title_x=0.5,
    plot_bgcolor='rgba(0,0,0,0)',
    yaxis_range=[0, 1],
    yaxis_tickformat='.2%'
)
fig1.update_traces(texttemplate='%{y:.2%}', textposition='outside')

# 2. Number of Hits (Counts of True and False values)
hit_counts = mock_data['HitRelevance'].value_counts().sort_index()
fig2 = px.bar(x=hit_counts.index.map({True: 'Relevant', False: 'Not Relevant'}), y=hit_counts.values,
              title='Number of Hits (Relevant vs Not Relevant)',
              labels={'x': 'Hit Relevance', 'y': 'Count'},
              color=hit_counts.index.map({True: 'Relevant', False: 'Not Relevant'}),
              color_discrete_map={'Relevant': '#355C7D', 'Not Relevant': '#F67280'})
fig2.update_layout(title_font_size=24, title_x=0.5, plot_bgcolor='rgba(0,0,0,0)')
fig2.update_traces(texttemplate='%{y}', textposition='outside')

# 3. HitRelevance (Box Plot)
fig3 = px.box(mock_data, y='RelevanceScore', title='Hit Relevance Distribution (Box Plot)',
              labels={'y': 'Relevance Score'},
              color_discrete_sequence=['#6C5B7B'])
fig3.update_layout(title_font_size=24, title_x=0.5, plot_bgcolor='rgba(0,0,0,0)')

# 4. Visualization of Gemini Evaluation Scores
fig4 = px.histogram(mock_data, x='GeminiEvaluation', title='Gemini Evaluation Scores',
                    labels={'x': 'Evaluation Score', 'y': 'Frequency'},
                    color_discrete_sequence=['#355C7D'])
fig4.update_layout(title_font_size=24, title_x=0.5, plot_bgcolor='rgba(0,0,0,0)')

# 5. Follow-up Question Counts (Yes vs No)
followup_counts = mock_data['FollowUpQuestion'].value_counts()
fig5 = px.pie(values=followup_counts.values, names=followup_counts.index,
              title='Follow-up Question Distribution',
              color_discrete_sequence=['#355C7D', '#F67280'])
fig5.update_traces(textinfo='percent+label', pull=[0.1, 0])
fig5.update_layout(title_font_size=24, title_x=0.5)

# Show all figures
fig1.show()
fig2.show()
fig3.show()
fig4.show()
fig5.show()


Pandas requires version '1.4.3' or newer of 'xlsxwriter' (version '1.2.9' currently installed).



In [18]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Generate mock data
np.random.seed(42)
n_samples = 100

data = {
    'HitRelevance': np.random.choice([True, False], size=n_samples, p=[0.7, 0.3]),
    'RelevanceScore': np.random.uniform(0, 1, size=n_samples),
    'GeminiEvaluation': np.random.uniform(1, 4, size=n_samples),
    'FollowUpQuestion': np.random.choice(['Yes', 'No'], size=n_samples, p=[0.6, 0.4])
}

df = pd.DataFrame(data)

# 1. Average Hit Relevance
avg_relevance = df['RelevanceScore'].mean()

fig1 = go.Figure(go.Indicator(
    mode = "gauge+number",
    value = avg_relevance,
    domain = {'x': [0, 1], 'y': [0, 1]},
    title = {'text': "Average Hit Relevance"},
    gauge = {
        'axis': {'range': [0, 1]},
        'bar': {'color': "darkblue"},
        'steps': [
            {'range': [0, 0.5], 'color': "lightgray"},
            {'range': [0.5, 0.75], 'color': "gray"},
            {'range': [0.75, 1], 'color': "darkgray"}
        ],
        'threshold': {
            'line': {'color': "red", 'width': 4},
            'thickness': 0.75,
            'value': 0.8
        }
    }
))

fig1.update_layout(height=400, font={'size': 16})

# 2. Number of Hits (Counts of True and False values)
hit_counts = df['HitRelevance'].value_counts().sort_index()

fig2 = px.bar(x=['Relevant', 'Not Relevant'], y=hit_counts.values,
              title='Number of Hits (Relevant vs Not Relevant)',
              labels={'x': 'Hit Relevance', 'y': 'Count'},
              color=['Relevant', 'Not Relevant'],
              color_discrete_map={'Relevant': '#1f77b4', 'Not Relevant': '#ff7f0e'})

fig2.update_layout(showlegend=False, height=400, font={'size': 16})
fig2.update_traces(texttemplate='%{y}', textposition='outside')

# 3. HitRelevance (Box Plot)
fig3 = px.box(df, y='RelevanceScore', title='Hit Relevance Distribution',
              labels={'RelevanceScore': 'Relevance Score'},
              color_discrete_sequence=['#1f77b4'])

fig3.update_layout(height=400, font={'size': 16})

# 4. Visualization of Gemini Evaluation scores
fig4 = px.histogram(df, x='GeminiEvaluation', nbins=20,
                    title='Distribution of Gemini Evaluation Scores',
                    labels={'GeminiEvaluation': 'Evaluation Score'},
                    color_discrete_sequence=['#1f77b4'])

fig4.update_layout(height=400, font={'size': 16})

# 5. Follow-up question on topic (Counts of Yes and No in a visualization)
followup_counts = df['FollowUpQuestion'].value_counts()

fig5 = px.pie(values=followup_counts.values, names=followup_counts.index,
              title='Follow-up Questions on Topic',
              color_discrete_sequence=['#1f77b4', '#ff7f0e'])

fig5.update_traces(textposition='inside', textinfo='percent+label')
fig5.update_layout(height=400, font={'size': 16})

# Combine all figures into a single plot
fig = make_subplots(rows=3, cols=2, specs=[[{'type': 'indicator'}, {'type': 'xy'}],
                                           [{'type': 'xy'}, {'type': 'xy'}],
                                           [{'type': 'xy'}, {'type': 'domain'}]],
                    subplot_titles=("Average Hit Relevance", "Number of Hits",
                                    "Hit Relevance Distribution", "Gemini Evaluation Scores",
                                    "Follow-up Questions on Topic"))

fig.add_trace(fig1.data[0], row=1, col=1)
fig.add_trace(fig2.data[0], row=1, col=2)
fig.add_trace(fig3.data[0], row=2, col=1)
fig.add_trace(fig4.data[0], row=2, col=2)
# fig.add_trace(fig5.data[0], row=3, col=1)

fig.update_layout(height=1200, width=1000, title_text="Content Gap Analysis Results")
fig.show()

In [None]:
import matplotlib.pyplot as plt

# Extract scores for visualization
scores = [evaluator.evaluate(original, summary) for original, summary in zip(original_contents, generated_summaries)]

# Plotting the scores
plt.bar(range(len(scores)), scores)
plt.xlabel('Summary Index')
plt.ylabel('Evaluation Score')
plt.title('Evaluation Scores of Summaries')
plt.show()

In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# Sample data structure (replace with actual data from save_results())
data = {
    "Run": ["Run 1", "Run 2", "Run 1", "Run 2"],
    "Model": ["GPT-3.5", "GPT-3.5", "GPT-4o", "GPT-4o"],
    "Metric": ["Hit Rate", "Hit Rate", "Hit Rate", "Hit Rate"],
    "Value": [0.76, 0.81, 0.88, 0.93]
}

# Convert data to DataFrame
df = pd.DataFrame(data)

# 1. Comparison of Hit Rates Across Runs and Models
fig1 = px.bar(df, x="Model", y="Value", color="Run", title="Hit Rates Across Models and Runs", labels={"Value": "Hit Rate"})
fig1.show()

# 2. Gemini Scores Across Models and Runs
data_gemini = {
    "Run": ["Run 1", "Run 2", "Run 1", "Run 2"],
    "Model": ["GPT-3.5", "GPT-3.5", "GPT-4o", "GPT-4o"],
    "Gemini Score": [2.2, 2.5, 3.1, 3.4]
}
df_gemini = pd.DataFrame(data_gemini)
fig2 = px.line(df_gemini, x="Run", y="Gemini Score", color="Model", markers=True, title="Gemini Scores Across Runs")
fig2.show()

# 3. Persona-Specific Hit Rates
data_persona = {
    "Persona": ["Developer", "Tester", "Business Analyst", "Developer", "Tester", "Business Analyst"],
    "Run": ["Run 1", "Run 1", "Run 1", "Run 2", "Run 2", "Run 2"],
    "Hit Rate": [0.77, 0.74, 0.75, 0.83, 0.80, 0.81]
}
df_persona = pd.DataFrame(data_persona)
fig3 = px.bar(df_persona, x="Persona", y="Hit Rate", color="Run", barmode="group", title="Persona-Specific Hit Rates")
fig3.show()

# 4. Summary Compliance (50 Words)
data_summary = {
    "Run": ["Run 1", "Run 2", "Run 1", "Run 2"],
    "Model": ["GPT-3.5", "GPT-3.5", "GPT-4o", "GPT-4o"],
    "Compliance (%)": [78.0, 85.6, 92.4, 96.8]
}
df_summary = pd.DataFrame(data_summary)
fig4 = px.bar(df_summary, x="Model", y="Compliance (%)", color="Run", title="Summary Compliance Across Models and Runs")
fig4.show()

# 5. Follow-Up Question Topicality
data_follow_up = {
    "Run": ["Run 1", "Run 2", "Run 1", "Run 2"],
    "Model": ["GPT-3.5", "GPT-3.5", "GPT-4o", "GPT-4o"],
    "On-Topic (%)": [85.5, 90.2, 96.0, 98.5]
}
df_follow_up = pd.DataFrame(data_follow_up)
fig5 = px.bar(df_follow_up, x="Model", y="On-Topic (%)", color="Run", title="Follow-Up Question Topicality")
fig5.show()

# 6. Improvement Between Runs for All Metrics
df_improvement = df.groupby(["Model", "Run"]).mean().reset_index()
fig6 = px.scatter(df_improvement, x="Model", y="Value", color="Run", size="Value", title="Metric Improvement Across Runs")
fig6.show()

# 7. Persona-Based Gemini Scores
data_persona_gemini = {
    "Persona": ["Developer", "Tester", "Business Analyst", "Developer", "Tester", "Business Analyst"],
    "Run": ["Run 1", "Run 1", "Run 1", "Run 2", "Run 2", "Run 2"],
    "Gemini Score": [2.3, 2.1, 2.2, 2.7, 2.4, 2.5]
}
df_persona_gemini = pd.DataFrame(data_persona_gemini)
fig7 = px.line(df_persona_gemini, x="Persona", y="Gemini Score", color="Run", title="Persona-Based Gemini Scores")
fig7.show()

# 8. Correlation Between Hit Rate and Summary Compliance
fig8 = px.scatter(df_summary, x="Compliance (%)", y="Hit Rate", color="Model", title="Correlation Between Hit Rate and Summary Compliance")
fig8.show()

# 9. Static Question Performance Across Runs
data_static = {
    "Run": ["Run 1", "Run 2", "Run 1", "Run 2"],
    "Model": ["GPT-3.5", "GPT-3.5", "GPT-4o", "GPT-4o"],
    "Static Hit Rate": [0.76, 0.82, 0.89, 0.94]
}
df_static = pd.DataFrame(data_static)
fig9 = px.bar(df_static, x="Model", y="Static Hit Rate", color="Run", title="Static Question Performance Across Runs")
fig9.show()

# 10. Hit Relevance Distribution
data_hit_relevance = {
    "Hit Relevance": [0.58, 0.62, 0.66, 0.69],
    "Model": ["GPT-3.5 Run 1", "GPT-3.5 Run 2", "GPT-4o Run 1", "GPT-4o Run 2"]
}
df_hit_relevance = pd.DataFrame(data_hit_relevance)
fig10 = px.histogram(df_hit_relevance, x="Hit Relevance", color="Model", title="Hit Relevance Distribution")
fig10.show()



The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



ValueError: Value of 'y' is not the name of a column in 'data_frame'. Expected one of ['Run', 'Model', 'Compliance (%)'] but received: Hit Rate

In [3]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# Load the CSV file
file_path = "D:\\Dissertation - City, Univeristy of London\\Evaluating-AI-Learning-Assistants\\test output\\run1_gpt3-5_developer.csv"  # Replace with the path to your CSV file
data = pd.read_csv(file_path)

# Ensure all relevant columns are present
required_columns = [
    "Column1.question", "Column1.enriched_question", "Column1.hit", "Column1.summary",
    "Column1.hitRelevance", "Column1.follow_up", "Column1.follow_up_on_topic", "Column1.gemini_evaluation"
]
if not all(col in data.columns for col in required_columns):
    raise ValueError("The CSV file is missing one or more required columns.")

# Visualization 1: Distribution of Hit Relevance Scores
fig1 = px.histogram(data, x="Column1.hitRelevance", nbins=20, title="Distribution of Hit Relevance Scores",
                    labels={"Column1.hitRelevance": "Hit Relevance Score"})
fig1.show()

# Visualization 2: Distribution of Gemini Evaluations
fig2 = px.histogram(data, x="Column1.gemini_evaluation", nbins=4, title="Distribution of Gemini Evaluations",
                    labels={"Column1.gemini_evaluation": "Gemini Evaluation Score"})
fig2.show()

# Visualization 3: Percentage of Hits (True/False)
hit_counts = data["Column1.hit"].value_counts(normalize=True).reset_index()
hit_counts.columns = ["Hit Status", "Percentage"]
fig3 = px.pie(hit_counts, names="Hit Status", values="Percentage", title="Percentage of Hits")
fig3.show()

# Visualization 4: Gemini Evaluation by Hit Status
fig4 = px.box(data, x="Column1.hit", y="Column1.gemini_evaluation", title="Gemini Evaluation by Hit Status",
              labels={"Column1.hit": "Hit Status", "Column1.gemini_evaluation": "Gemini Evaluation Score"})
fig4.show()

# Visualization 5: Average Hit Relevance by Question
average_relevance = data.groupby("Column1.question")["Column1.hitRelevance"].mean().reset_index()
fig5 = px.bar(average_relevance, x="Column1.question", y="Column1.hitRelevance",
              title="Average Hit Relevance by Question", labels={"Column1.hitRelevance": "Average Hit Relevance"})
fig5.update_xaxes(tickangle=45)
fig5.show()

# Visualization 6: Follow-Up Topic Distribution
follow_up_counts = data["Column1.follow_up_on_topic"].value_counts().reset_index()
follow_up_counts.columns = ["Follow-Up Topic", "Count"]
fig6 = px.bar(follow_up_counts, x="Follow-Up Topic", y="Count", title="Follow-Up Topic Distribution")
fig6.show()

# Visualization 7: Scatterplot of Hit Relevance vs Gemini Evaluation
fig7 = px.scatter(data, x="Column1.hitRelevance", y="Column1.gemini_evaluation", color="Column1.hit",
                  title="Hit Relevance vs Gemini Evaluation",
                  labels={"Column1.hitRelevance": "Hit Relevance Score", "Column1.gemini_evaluation": "Gemini Evaluation Score"})
fig7.show()

# Visualization 8: Word Count Distribution in Summaries
data['Summary Word Count'] = data["Column1.summary"].apply(lambda x: len(str(x).split()))
fig8 = px.histogram(data, x="Summary Word Count", nbins=20, title="Word Count Distribution in Summaries",
                    labels={"Summary Word Count": "Word Count"})
fig8.show()

# Visualization 9: Questions by Hit Relevance
top_questions = data.nlargest(10, "Column1.hitRelevance")[["Column1.question", "Column1.hitRelevance"]]
fig9 = px.bar(top_questions, x="Column1.question", y="Column1.hitRelevance",
              title="Top 10 Questions by Hit Relevance",
              labels={"Column1.hitRelevance": "Hit Relevance Score"})
fig9.update_xaxes(tickangle=45)
fig9.show()

# Visualization 10: Follow-Up Status by Hit Relevance
fig10 = px.box(data, x="Column1.follow_up_on_topic", y="Column1.hitRelevance", title="Follow-Up Status by Hit Relevance",
               labels={"Column1.follow_up_on_topic": "Follow-Up On Topic", "Column1.hitRelevance": "Hit Relevance Score"})
fig10.show()


FileNotFoundError: [Errno 2] No such file or directory: 'D:\\Dissertation - City, Univeristy of London\\Evaluating-AI-Learning-Assistants\\test output\\run1_gpt3-5_developer.csv'

In [5]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# Load the Excel file
file_path = "D:\\Dissertation - City, Univeristy of London\\Evaluating-AI-Learning-Assistants\\test output\\run1_gpt3-5_developer.xlxs"  # Replace with the path to your Excel file
data = pd.read_excel(file_path)

# Ensure all relevant columns are present
required_columns = [
    "Column1.question", "Column1.enriched_question", "Column1.hit", "Column1.summary",
    "Column1.hitRelevance", "Column1.follow_up", "Column1.follow_up_on_topic", "Column1.gemini_evaluation"
]
if not all(col in data.columns for col in required_columns):
    raise ValueError("The Excel file is missing one or more required columns.")

# Visualization 1: Distribution of Hit Relevance Scores
fig1 = px.histogram(data, x="Column1.hitRelevance", nbins=20, title="Distribution of Hit Relevance Scores",
                    labels={"Column1.hitRelevance": "Hit Relevance Score"})
fig1.show()

# Visualization 2: Distribution of Gemini Evaluations
fig2 = px.histogram(data, x="Column1.gemini_evaluation", nbins=4, title="Distribution of Gemini Evaluations",
                    labels={"Column1.gemini_evaluation": "Gemini Evaluation Score"})
fig2.show()

# Visualization 3: Percentage of Hits (True/False)
hit_counts = data["Column1.hit"].value_counts(normalize=True).reset_index()
hit_counts.columns = ["Hit Status", "Percentage"]
fig3 = px.pie(hit_counts, names="Hit Status", values="Percentage", title="Percentage of Hits")
fig3.show()

# Visualization 4: Gemini Evaluation by Hit Status
fig4 = px.box(data, x="Column1.hit", y="Column1.gemini_evaluation", title="Gemini Evaluation by Hit Status",
              labels={"Column1.hit": "Hit Status", "Column1.gemini_evaluation": "Gemini Evaluation Score"})
fig4.show()

# Visualization 5: Average Hit Relevance by Question
average_relevance = data.groupby("Column1.question")["Column1.hitRelevance"].mean().reset_index()
fig5 = px.bar(average_relevance, x="Column1.question", y="Column1.hitRelevance",
              title="Average Hit Relevance by Question", labels={"Column1.hitRelevance": "Average Hit Relevance"})
fig5.update_xaxes(tickangle=45)
fig5.show()

# Visualization 6: Follow-Up Topic Distribution
follow_up_counts = data["Column1.follow_up_on_topic"].value_counts().reset_index()
follow_up_counts.columns = ["Follow-Up Topic", "Count"]
fig6 = px.bar(follow_up_counts, x="Follow-Up Topic", y="Count", title="Follow-Up Topic Distribution")
fig6.show()

# Visualization 7: Scatterplot of Hit Relevance vs Gemini Evaluation
fig7 = px.scatter(data, x="Column1.hitRelevance", y="Column1.gemini_evaluation", color="Column1.hit",
                  title="Hit Relevance vs Gemini Evaluation",
                  labels={"Column1.hitRelevance": "Hit Relevance Score", "Column1.gemini_evaluation": "Gemini Evaluation Score"})
fig7.show()

# Visualization 8: Word Count Distribution in Summaries
data['Summary Word Count'] = data["Column1.summary"].apply(lambda x: len(str(x).split()))
fig8 = px.histogram(data, x="Summary Word Count", nbins=20, title="Word Count Distribution in Summaries",
                    labels={"Summary Word Count": "Word Count"})
fig8.show()

# Visualization 9: Questions by Hit Relevance
top_questions = data.nlargest(10, "Column1.hitRelevance")[["Column1.question", "Column1.hitRelevance"]]
fig9 = px.bar(top_questions, x="Column1.question", y="Column1.hitRelevance",
              title="Top 10 Questions by Hit Relevance",
              labels={"Column1.hitRelevance": "Hit Relevance Score"})
fig9.update_xaxes(tickangle=45)
fig9.show()

# Visualization 10: Follow-Up Status by Hit Relevance
fig10 = px.box(data, x="Column1.follow_up_on_topic", y="Column1.hitRelevance", title="Follow-Up Status by Hit Relevance",
               labels={"Column1.follow_up_on_topic": "Follow-Up On Topic", "Column1.hitRelevance": "Hit Relevance Score"})
fig10.show()


FileNotFoundError: [Errno 2] No such file or directory: 'D:\\Dissertation - City, Univeristy of London\\Evaluating-AI-Learning-Assistants\\test output\\run1_gpt3-5_developer.xlxs'