In [3]:
import os
import pandas as pd
from pathlib import Path

from analytics.sentiment_analysis import EnsembleSentimentAnalysisCSV
enron_file_path = "../data/email_datasets/enron/enron_processed_condensed.csv"

analyzer = EnsembleSentimentAnalysisCSV(
    file_path=enron_file_path,
    content_column="parsed_content"
)

print("Computing overall sentiment for Enron corpus...")
overall_sentiment = analyzer.compute_sentiment()
print("\nOverall Sentiment Distribution:")
print(f"Negative: {overall_sentiment['sentiment_neg']:.3f}")
print(f"Neutral: {overall_sentiment['sentiment_neu']:.3f}")
print(f"Positive: {overall_sentiment['sentiment_pos']:.3f}")

individual_sentiments = analyzer.get_individual_sentiments()
print(f"\nAnalyzed {len(individual_sentiments)} emails individually")

output_path = analyzer.save_sentiment_distribution(
    output_dir="output/enron_sentiment_distribution"
)
print(f"\nIndividual sentiment scores saved to: {output_path}")

sentiment_df = pd.DataFrame(individual_sentiments)

print("\nSentiment Statistics:")
print(f"Average Negative Sentiment: {sentiment_df['sentiment_neg'].mean():.3f}")
print(f"Average Neutral Sentiment: {sentiment_df['sentiment_neu'].mean():.3f}")
print(f"Average Positive Sentiment: {sentiment_df['sentiment_pos'].mean():.3f}")

print("\nSentiment Distribution Analysis:")
print(f"Emails with dominant negative sentiment (>0.5): {(sentiment_df['sentiment_neg'] > 0.5).sum()}")
print(f"Emails with dominant neutral sentiment (>0.5): {(sentiment_df['sentiment_neu'] > 0.5).sum()}")
print(f"Emails with dominant positive sentiment (>0.5): {(sentiment_df['sentiment_pos'] > 0.5).sum()}")

  from .autonotebook import tqdm as notebook_tqdm


Computing overall sentiment for Enron corpus...

Overall Sentiment Distribution:
Negative: 0.057
Neutral: 0.257
Positive: 0.686

Analyzed 4956 emails individually

Individual sentiment scores saved to: output/enron_sentiment_distribution\sentiment_distribution_unknown_unknown.csv

Sentiment Statistics:
Average Negative Sentiment: 0.283
Average Neutral Sentiment: 0.271
Average Positive Sentiment: 0.447

Sentiment Distribution Analysis:
Emails with dominant negative sentiment (>0.5): 1233
Emails with dominant neutral sentiment (>0.5): 1352
Emails with dominant positive sentiment (>0.5): 2180
