In [52]:
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import pandas as pd

In [53]:
raw_scores = pd.read_csv('../results/sentiment_scores_raw.csv')
daily_sentiment = pd.read_csv('../results/sentiment_scores_daily.csv')
monthly_sentiment = pd.read_csv('../results/sentiment_scores_monthly.csv')

In [54]:
# raw_scores.head()
daily_sentiment.head()
# monthly_sentiment.head()

Unnamed: 0.1,Unnamed: 0,textblob_polarity,textblob_polarity.1,textblob_subjectivity,textblob_subjectivity.1,nltk_si_compound,nltk_si_compound.1,nltk_si_pos,nltk_si_neg,nltk_si_neu,Complaint
0,,mean,std,mean,std,mean,std,mean,mean,mean,count
1,2015-03-19,0.0328,,0.3956,,0.3202,,0.041,0.047,0.912,1
2,2015-03-20,0.0662,0.059,0.4255,0.119,-0.9082,0.0077,0.0395,0.088,0.873,2
3,2015-03-21,0.0341,,0.1991,,-0.2158,,0.026,0.033,0.94,1
4,2015-03-22,-0.2,,0.4,,-0.7992,,0.089,0.327,0.584,1


In [55]:
# For monthly sentiment
monthly_sentiment = monthly_sentiment.iloc[1:].copy()  # Remove the 'mean'/'std' row
monthly_sentiment = monthly_sentiment.apply(pd.to_numeric, errors='coerce')
monthly_sentiment.index = pd.to_datetime(monthly_sentiment.index)

# For daily sentiment 
daily_sentiment = daily_sentiment.iloc[1:].copy()
daily_sentiment = daily_sentiment.apply(pd.to_numeric, errors='coerce')
daily_sentiment.index = pd.to_datetime(daily_sentiment.index)

In [56]:
def plot_sentiment_comparison(df):
    plt.figure(figsize=(15, 6))
    plt.plot(df.index, df['textblob_polarity'], label='TextBlob Polarity', marker='o')
    plt.plot(df.index, df['nltk_si_compound'], label='VADER Compound', marker='s')
    
    plt.title('Monthly Average Sentiment Scores: TextBlob vs VADER')
    plt.xlabel('Date')
    plt.ylabel('Sentiment Score')
    plt.legend()
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.xticks(rotation=45)
    plt.tight_layout()

def plot_sentiment_components(df):
    plt.figure(figsize=(15, 6))
    
    plt.stackplot(df.index,
                 [df['nltk_si_pos'], df['nltk_si_neu'], df['nltk_si_neg']],
                 labels=['Positive', 'Neutral', 'Negative'],
                 alpha=0.7)
    
    plt.title('Monthly Sentiment Components Distribution')
    plt.xlabel('Date')
    plt.ylabel('Proportion')
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.xticks(rotation=45)
    plt.tight_layout()

def plot_sentiment_heatmap(df):
    correlation_data = df[[
        'textblob_polarity', 'textblob_subjectivity',
        'nltk_si_compound', 'nltk_si_pos', 'nltk_si_neg', 'nltk_si_neu'
    ]].corr()
    
    plt.figure(figsize=(10, 8))
    sns.heatmap(correlation_data, 
                annot=True,
                cmap='RdYlBu',
                center=0,
                fmt='.2f')
    plt.title('Correlation between Sentiment Metrics')
    plt.tight_layout()

def plot_sentiment_distribution(df):
    plot_data = pd.DataFrame({
        'TextBlob Polarity': df['textblob_polarity'],
        'VADER Compound': df['nltk_si_compound']
    })
    
    plt.figure(figsize=(12, 6))
    sns.violinplot(data=plot_data)
    plt.title('Distribution of Sentiment Scores')
    plt.ylabel('Score')
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.tight_layout()

def plot_subjectivity_vs_sentiment(df):
    plt.figure(figsize=(10, 8))
    scatter = plt.scatter(df['textblob_subjectivity'],
                         df['textblob_polarity'],
                         c=df['nltk_si_compound'],
                         cmap='RdYlBu',
                         alpha=0.5)
    
    plt.colorbar(scatter, label='VADER Compound Score')
    plt.title('Subjectivity vs Polarity (colored by VADER score)')
    plt.xlabel('Subjectivity')
    plt.ylabel('Polarity')
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.tight_layout()

def create_all_visualizations(raw_scores, daily_sentiment, monthly_sentiment):
    
    # Create visualizations
    plot_sentiment_comparison(monthly_sentiment)
    plt.savefig('sentiment_comparison.png')
    plt.close()
    
    plot_sentiment_components(monthly_sentiment)
    plt.savefig('sentiment_components.png')
    plt.close()
    
    plot_sentiment_heatmap(daily_sentiment)
    plt.savefig('sentiment_heatmap.png')
    plt.close()
    
    plot_sentiment_distribution(raw_scores)
    plt.savefig('sentiment_distribution.png')
    plt.close()
    
    plot_subjectivity_vs_sentiment(raw_scores)
    plt.savefig('subjectivity_vs_sentiment.png')
    plt.close()

In [57]:
#visualization
create_all_visualizations(raw_scores, daily_sentiment, monthly_sentiment)