In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Data preparation
baseline_data = {
    'Model': ['Llama 2', 'Mistral Instruct', 'gpt-3.5-turbo-0125', 'gpt-4-turbo-2024-04-09', 'gpt-4o-2024-05-13'],
    'Accuracy': [21, 21, 87, 91, 94],
    'Precision': [0, 0, 5, 26, 31],
    'TCE': [4, 21, 7, 15, 15]
}

post_ft_data = {
    'Model': ['Llama 2 FT', 'Mistral Instruct FT', 'gpt-3.5-turbo-0125', 'gpt-4-turbo-2024-04-09', 'gpt-4o-2024-05-13'],
    'Accuracy': [77, 83, 87, 91, 94],
    'Precision': [35, 50, 5, 26, 31],
    'TCE': [9, 4, 7, 15, 15]
}

# Create DataFrames
baseline_df = pd.DataFrame(baseline_data)
post_ft_df = pd.DataFrame(post_ft_data)

# Calculate summary statistics
def calculate_summary_statistics(df, metric):
    mean = np.mean(df[metric])
    median = np.median(df[metric])
    std_dev = np.std(df[metric])
    return mean, median, std_dev

# Baseline statistics
baseline_accuracy_stats = calculate_summary_statistics(baseline_df, 'Accuracy')
baseline_precision_stats = calculate_summary_statistics(baseline_df, 'Precision')
baseline_tce_stats = calculate_summary_statistics(baseline_df, 'TCE')

# Post FT statistics
post_ft_accuracy_stats = calculate_summary_statistics(post_ft_df, 'Accuracy')
post_ft_precision_stats = calculate_summary_statistics(post_ft_df, 'Precision')
post_ft_tce_stats = calculate_summary_statistics(post_ft_df, 'TCE')

# Print summary statistics
print("Baseline Accuracy: Mean =", baseline_accuracy_stats[0], ", Median =", baseline_accuracy_stats[1], ", Std Dev =", baseline_accuracy_stats[2])
print("Baseline Precision: Mean =", baseline_precision_stats[0], ", Median =", baseline_precision_stats[1], ", Std Dev =", baseline_precision_stats[2])
print("Baseline TCE: Mean =", baseline_tce_stats[0], ", Median =", baseline_tce_stats[1], ", Std Dev =", baseline_tce_stats[2])

print("Post FT Accuracy: Mean =", post_ft_accuracy_stats[0], ", Median =", post_ft_accuracy_stats[1], ", Std Dev =", post_ft_accuracy_stats[2])
print("Post FT Precision: Mean =", post_ft_precision_stats[0], ", Median =", post_ft_precision_stats[1], ", Std Dev =", post_ft_precision_stats[2])
print("Post FT TCE: Mean =", post_ft_tce_stats[0], ", Median =", post_ft_tce_stats[1], ", Std Dev =", post_ft_tce_stats[2])

Baseline Accuracy: Mean = 62.8 , Median = 87.0 , Std Dev = 34.20175434096912
Baseline Precision: Mean = 12.4 , Median = 5.0 , Std Dev = 13.365627557282899
Baseline TCE: Mean = 12.4 , Median = 15.0 , Std Dev = 6.118823416311341
Post FT Accuracy: Mean = 86.4 , Median = 87.0 , Std Dev = 5.986651818838307
Post FT Precision: Mean = 29.4 , Median = 31.0 , Std Dev = 14.59588983241515
Post FT TCE: Mean = 10.0 , Median = 9.0 , Std Dev = 4.381780460041329


In [9]:
# Comparative analysis
accuracy_change = post_ft_df['Accuracy'] - baseline_df['Accuracy']
precision_change = post_ft_df['Precision'] - baseline_df['Precision']
tce_change = post_ft_df['TCE'] - baseline_df['TCE']

# Create a DataFrame to hold the changes with model names
changes_df = pd.DataFrame({
    'Model': baseline_df['Model'],
    'Accuracy Change': accuracy_change,
    'Precision Change': precision_change,
    'TCE Change': tce_change
})

# Print comparative analysis with model names
print(changes_df.iloc[0:2,:])

              Model  Accuracy Change  Precision Change  TCE Change
0           Llama 2               56                35           5
1  Mistral Instruct               62                50         -17
