In [None]:
import random

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from dotenv import load_dotenv
from matplotlib.patches import Patch
from matplotlib.ticker import FuncFormatter

load_dotenv()
random.seed(42)

In [None]:
results = pd.read_csv("../reports/model_metrics_2.csv")

In [None]:

try:# Extract domain information from the model column
    results['domain'] = results['model'].apply(lambda x: x.split('_')[1])
    results['embedding'] = results['model'].apply(lambda x: x.split('_')[2])
    results['model'] = results['model'].apply(lambda x: x.split('_')[0])
    results['latency'] = results['latency'] / 1_000_000
except Exception as e:
    print(e)

In [None]:
# Group by model, domain, and embedding type and calculate mean metrics
grouped_results = results.groupby(['model', 'domain']).agg({
    'accuracy': 'mean',
    'recall': 'mean',
    'precision': 'mean',
    'cost': 'mean',
    'latency': 'mean'
}).reset_index()

# Convert latency to log scale for better visualization
grouped_results['log_latency'] = np.log10(grouped_results['latency'])

# Save the grouped results to a CSV file
grouped_results.to_csv("../reports/grouped_metrics.csv", index=False)

In [None]:
grouped_results

In [None]:
# First, define a consistent color scheme to use in both visualizations
domain_colors = {'law': 'cornflowerblue', 'healthcare': 'orange', 'finance': 'mediumseagreen'}

In [None]:
# Create bar chart comparing model accuracy across domains
plt.figure(figsize=(14, 8))

# Pivot the data to get models as index and domains as columns
accuracy_by_domain_model = grouped_results.pivot_table(
    values='precision',
    index='model',
    columns='domain',
    aggfunc='mean'
)

# Plot the bar chart with custom colors
ax = accuracy_by_domain_model.plot(kind='bar', figsize=(14, 8), width=0.7, color=[domain_colors[col] for col in accuracy_by_domain_model.columns])

# Customize the plot
plt.title('Average Model Precision by Domain', fontsize=16)
plt.xlabel('Model Type', fontsize=14)
plt.ylabel('Average Precision (%)', fontsize=14)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.legend(title='Domain', fontsize=12)

# Add value labels on top of bars
for container in ax.containers:
    ax.bar_label(container, fmt='%.2f', padding=3, fontsize=9)

plt.tight_layout()
plt.show()

In [None]:
# Set the style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_context("talk")

# Group by model and calculate mean values for latency and precision
mean_grouped_results = grouped_results.groupby('model').mean(numeric_only=True).reset_index()

# Create figure and axis
fig, ax = plt.subplots(figsize=(12, 8), dpi=100)

# Define markers and colors for different model types
markers = {
    'SVM': 'o',
    'XGBoost': 's',
    'fastText': '^',
    'gpt4o-mini': 'D',
    'modernbert': 'p'
}
colors = {
    'SVM': '#1f77b4',         # blue
    'XGBoost': '#ff7f0e',       # orange
    'fastText': '#2ca02c',      # green
    'gpt4o-mini': '#d62728',    # red
    'modernbert': '#9467bd'     # purple
}

# Create scatter plot with markers for models and colors for domains
for idx, row in mean_grouped_results.iterrows():
    model = row['model']
    plt.scatter(row['latency'], row['precision'],
                marker=markers.get(model, '*'),
                color=colors.get(model, 'gray'),
                s=280,
                edgecolor='black',
                linewidth=1.5,
                alpha=0.8,
                zorder=10)

    # Add labels to each point with a styled annotation box
    label_text = f"{model}"
    bbox_props = {
        "boxstyle": 'round,pad=0.5',
        "fc": colors.get(model, 'gray'),
        "ec": "black",
        "alpha": 0.7,
        "lw": 1.5
    }
    plt.annotate(label_text,
                 xy=(row['latency'], row['precision']),
                 xytext=(15, 0),
                 textcoords='offset points',
                 fontsize=12,
                 fontweight='bold',
                 color='white',
                 bbox=bbox_props,
                 zorder=11)

# Add a horizontal line for average precision
avg_precision = mean_grouped_results['precision'].mean()
plt.axhline(y=avg_precision, color='gray', linestyle='--', alpha=0.7, linewidth=2,
            label=f'Avg Precision: {avg_precision:.2f}')

# Calculate and add a vertical line for average latency
avg_latency = mean_grouped_results['latency'].mean()
plt.axvline(x=avg_latency, color='gray', linestyle='--', alpha=0.7, linewidth=2,
            label=f'Avg Latency: {avg_latency:.2f}')

# Use logarithmic scale for x-axis to better visualize differences
plt.xscale('log')

# Create custom legend for models (and add average lines)
legend_elements = [
    Patch(facecolor=colors[model], edgecolor='black',
          label=f"{model} (Precision: {mean_grouped_results[mean_grouped_results['model'] == model]['precision'].values[0]:.2f})")
    for model in colors
]
legend_elements.append(Patch(facecolor='gray', alpha=0.7, label=f'Avg Precision: {avg_precision:.2f}'))
legend_elements.append(Patch(facecolor='gray', alpha=0.7, label=f'Avg Latency: {avg_latency:.2f}'))
plt.legend(handles=legend_elements, loc='upper right',
           frameon=True, framealpha=0.9, edgecolor='black',
           fontsize=10, title='Model Performance', title_fontsize=12)

# Customize plot with title and axis labels
plt.title('Model Performance: Precision vs. Latency', fontsize=20, fontweight='bold', pad=20)
plt.xlabel('Latency (ms, log scale)', fontsize=16, labelpad=15)
plt.ylabel('Precision', fontsize=16, labelpad=15)

# Add background gradient
gradient = np.linspace(0, 1, 100).reshape(-1, 1)
plt.imshow(gradient, cmap=plt.cm.Blues, alpha=0.1, aspect='auto',
           extent=[ax.get_xlim()[0], ax.get_xlim()[1], ax.get_ylim()[0], ax.get_ylim()[1]])

# Add grid styling
plt.grid(True, linestyle='--', alpha=0.4, linewidth=0.8)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

# Customize x-axis tick formatter
ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: f'{x:.1f}'))

# Get current axis limits for quadrant annotations
x_min, x_max = ax.get_xlim()
y_min, y_max = ax.get_ylim()

# For the x-axis (log scale), compute the geometric mean for left/right quadrant centers
q_left = np.exp((np.log(x_min) + np.log(avg_latency)) / 2)
q_right = np.exp((np.log(avg_latency) + np.log(x_max)) / 2)
# For the y-axis (linear), compute the arithmetic means for top/bottom quadrant centers
q_bottom = (y_min + avg_precision) / 2
q_top = (avg_precision + y_max) / 2

# Annotate the quadrants
# Top-left: Low Latency, High Precision (optimal performance)
ax.text(q_left, q_top, 'High Precision\nLow Latency', ha='center', va='center',
        fontsize=12, fontweight='bold', color='black', alpha=0.8, zorder=12)
# Bottom-left: Low Latency, Low Precision
ax.text(q_left, q_bottom, 'Low Precision\nLow Latency', ha='center', va='center',
        fontsize=12, fontweight='bold', color='black', alpha=0.8, zorder=12)
# Bottom-right: High Latency, Low Precision
ax.text(q_right, q_bottom, 'Low Precision\nHigh Latency', ha='center', va='center',
        fontsize=12, fontweight='bold', color='black', alpha=0.8, zorder=12)

# Add a subtle box around the plot
for spine in ax.spines.values():
    spine.set_linewidth(1.5)
    spine.set_color('#333333')

plt.tight_layout()
plt.show()


In [None]:
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("deep")
sns.set_context("paper", font_scale=1.2)

# Group data by model and calculate mean latency
latency_by_model = grouped_results.groupby('model')['latency'].mean().sort_values()

# Create the bar plot with improved styling
fig = plt.figure(figsize=(14, 8), dpi=100)
ax = fig.add_subplot(111)  # Correctly create the axis object

bars = plt.bar(latency_by_model.index, latency_by_model.values,
               color=sns.color_palette("viridis", len(latency_by_model)),
               edgecolor='black', linewidth=1.5, alpha=0.85,
               width=0.6)

# Add value labels on top of bars with better formatting
for bar in bars:
    height = bar.get_height()
    display_value = f'{height:.2f} ms'

    plt.text(bar.get_x() + bar.get_width()/2., height * 1.05,
             display_value, ha='center', va='bottom',
             fontsize=10, fontweight='bold', color='black')

# Customize the plot
plt.title('Average Inference Latency by Model Type', fontsize=18, fontweight='bold', pad=20)
plt.xlabel('Model Type', fontsize=14, labelpad=10)
plt.ylabel('Average Latency (ms, log scale)', fontsize=14, labelpad=10)

# Better grid
plt.grid(axis='y', linestyle='--', alpha=0.3)

# Improved x-axis labels
plt.xticks(rotation=30, ha='right', fontsize=12)

# Use logarithmic scale with better formatting
plt.yscale('log')
ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f'{x:.1f}'))

# Add a light background color to highlight the chart area
ax.set_facecolor('#f8f9fa')

# Add subtle spines
for spine in ax.spines.values():
    spine.set_linewidth(0.8)
    spine.set_color('#333333')

# Add light shading for visual appeal
plt.axhspan(latency_by_model.min()*0.95, latency_by_model.max()*1.1, alpha=0.05, color='blue')

plt.tight_layout()
plt.show()