In [1]:
import pandas as pd
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report
import numpy as np

# Load your data
df = pd.read_csv('HandCoding_human_vs_AI.csv')

# Remove any rows with missing values in the relevant columns
df_clean = df.dropna(subset=['coder1_emotion', 'coder1_topic', 'Gemini_emotion', 'Gemini_topic'])

# Calculate metrics for EMOTION
emotion_precision = precision_score(df_clean['coder1_emotion'], df_clean['Gemini_emotion'], average='weighted', zero_division=0)
emotion_recall = recall_score(df_clean['coder1_emotion'], df_clean['Gemini_emotion'], average='weighted', zero_division=0)
emotion_f1 = f1_score(df_clean['coder1_emotion'], df_clean['Gemini_emotion'], average='weighted', zero_division=0)

# Calculate metrics for TOPIC
topic_precision = precision_score(df_clean['coder1_topic'], df_clean['Gemini_topic'], average='weighted', zero_division=0)
topic_recall = recall_score(df_clean['coder1_topic'], df_clean['Gemini_topic'], average='weighted', zero_division=0)
topic_f1 = f1_score(df_clean['coder1_topic'], df_clean['Gemini_topic'], average='weighted', zero_division=0)

# Print summary table
print("GEMINI 3 PRO PERFORMANCE METRICS")
print("="*60)
print(f"{'Category':<15} {'Precision':<12} {'Recall':<12} {'F1-Score':<12}")
print("-"*60)
print(f"{'Emotion':<15} {emotion_precision:<12.3f} {emotion_recall:<12.3f} {emotion_f1:<12.3f}")
print(f"{'Topic':<15} {topic_precision:<12.3f} {topic_recall:<12.3f} {topic_f1:<12.3f}")
print("="*60)

# Detailed classification report for EMOTION
print("\nDETAILED EMOTION CLASSIFICATION:")
print(classification_report(df_clean['coder1_emotion'], df_clean['Gemini_emotion']))

# Detailed classification report for TOPIC
print("\nDETAILED TOPIC CLASSIFICATION:")
print(classification_report(df_clean['coder1_topic'], df_clean['Gemini_topic']))

# Calculate accuracy
emotion_accuracy = (df_clean['coder1_emotion'] == df_clean['Gemini_emotion']).mean()
topic_accuracy = (df_clean['coder1_topic'] == df_clean['Gemini_topic']).mean()

print(f"\nEmotion Classification Accuracy: {emotion_accuracy:.3f}")
print(f"Topic Classification Accuracy: {topic_accuracy:.3f}")

# Count total samples used
print(f"\nTotal samples analyzed: {len(df_clean)}")

GEMINI 3 PRO PERFORMANCE METRICS
Category        Precision    Recall       F1-Score    
------------------------------------------------------------
Emotion         0.356        0.360        0.338       
Topic           0.351        0.270        0.212       

DETAILED EMOTION CLASSIFICATION:
              precision    recall  f1-score   support

       anger       0.48      0.32      0.39        34
anticipation       0.14      0.50      0.22         2
   confusion       0.00      0.00      0.00         1
     disgust       0.33      0.14      0.20        14
        fear       0.00      0.00      0.00         3
         joy       0.14      0.17      0.15         6
     neutral       0.44      0.68      0.53        31
     sadness       0.00      0.00      0.00         4
    surprise       0.00      0.00      0.00         4
       trust       0.00      0.00      0.00         1

    accuracy                           0.36       100
   macro avg       0.15      0.18      0.15       100
wei

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
