In [None]:
import pandas as pd
import numpy as np
from src.evalute import evaluate_dataframe
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt
import os
import io
import sys

# To handle potential warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('data/processed_essay.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,text,cEXT,cNEU,cAGR,cCON,cOPN
0,0,"Well, right now I just woke up from a mid-day ...",low,high,high,low,high
1,1,"Well, here we go with the stream of consciousn...",low,low,high,low,low
2,2,An open keyboard and buttons to push. The thin...,low,high,low,high,high
3,3,I can't believe it! It's really happening! M...,high,low,high,high,low
4,4,"Well, here I go with the good old stream of co...",high,low,high,low,high


In [3]:
# The dataset contains NaN values for some labels. We'll drop rows where all trait labels are missing.
df_clean = df.dropna(subset=['cEXT', 'cNEU', 'cAGR', 'cCON', 'cOPN'], how='all').reset_index(drop=True)

# Take a small sample for this experiment to run it faster.
sample_df = df_clean.sample(n=5, random_state=42)

print(f"Cleaned dataset has {len(df_clean)} rows. Using a sample of {len(sample_df)} rows for this run.")
display(sample_df)

Cleaned dataset has 2468 rows. Using a sample of 5 rows for this run.


Unnamed: 0.1,Unnamed: 0,text,cEXT,cNEU,cAGR,cCON,cOPN
1713,1713,okay. my thoughts and feelings. I have no...,high,low,low,low,high
1462,1462,I am really enjoying myself to a certain point...,low,high,high,high,low
1701,1701,I just woke up and decided to do homework this...,high,high,low,low,low
2166,2166,Ah! 8secs! O no 16 sec! er. er. what am I supp...,low,high,high,low,high
1004,1004,Right now I am sitting by the phone debating o...,high,high,high,low,high


In [None]:
# This step makes API calls. This can take several minutes even for a small sample.

results_df = evaluate_dataframe(sample_df, text_column='text')
print("Evaluation complete. Displaying results with predictions:")
display(results_df)

Evaluating Extraversion:  20%|██        | 1/5 [00:04<00:16,  4.08s/it]


RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [None]:
# Create a directory for results if it doesn't exist
if not os.path.exists('results'):
    os.makedirs('results')

# Save the results to a CSV file
results_df.to_csv('results/essay_predictions_sample.csv', index=False)
print("Results DataFrame saved to results/essay_predictions_sample.csv")

# Capture all analysis output to a string buffer
output_buffer = io.StringIO()
sys.stdout = output_buffer

# Analyze the performance for each trait
BIG_FIVE_TRAITS = ['cEXT', 'cNEU', 'cAGR', 'cCON', 'cOPN']

for trait in BIG_FIVE_TRAITS:
    true_labels = results_df[trait]
    pred_labels = results_df[f'{trait}_pred']
    
    # Filter out 'unknown' predictions for metric calculations
    valid_indices = pred_labels.isin(['high', 'low'])
    true_labels_filtered = true_labels[valid_indices]
    pred_labels_filtered = pred_labels[valid_indices]

    if not true_labels_filtered.empty:
        print(f"--- Analysis for {trait} ---")
        
        # Classification Report
        print("Classification Report:")
        report = classification_report(true_labels_filtered, pred_labels_filtered, labels=['low', 'high'], zero_division=0)
        print(report)

    else:
        print(f"--- No valid predictions for {trait} to analyze ---")

# Restore original stdout for plotting
sys.stdout = sys.__stdout__

# Get the captured text output for reports
classification_report_output = output_buffer.getvalue()

# Save the captured classification reports to a text file
report_file_path = 'results/classification_reports_sample.txt'
with open(report_file_path, 'w') as f:
    f.write(classification_report_output)

print(f"All classification reports saved to {report_file_path}")

# Now generate and display confusion matrices separately to ensure they render in the notebook
for trait in BIG_FIVE_TRAITS:
    true_labels = results_df[trait]
    pred_labels = results_df[f'{trait}_pred']
    
    valid_indices = pred_labels.isin(['high', 'low'])
    true_labels_filtered = true_labels[valid_indices]
    pred_labels_filtered = pred_labels[valid_indices]

    if not true_labels_filtered.empty:
        print(f"Confusion Matrix for {trait}:")
        try:
            cm = confusion_matrix(true_labels_filtered, pred_labels_filtered, labels=['low', 'high'])
            plt.figure(figsize=(5, 3))
            sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['low', 'high'], yticklabels=['low', 'high'])
            plt.xlabel('Predicted')
            plt.ylabel('True')
            plt.title(f'Confusion Matrix for {trait}')
            plt.show()
        except Exception as e:
            print(f"Could not generate confusion matrix: {e}")
    else:
        print(f"No valid data to generate Confusion Matrix for {trait}.")