In [None]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load your theme summary CSV
df = pd.read_csv("../data/reviews_with_sentiment_hf.csv")

#Check the data
df.head()


In [None]:
possible_cols = ['sentiment_label', 'label', 'hf_label', 'vader_label', 'sentiment']
sent_col = next((c for c in possible_cols if c in df.columns), None)
if sent_col is None:
    raise ValueError(f"Could not find a sentiment column. Found: {df.columns.tolist()}")
print("Using sentiment column:", sent_col)

# Also ensure there's a bank column. Common name: 'bank'
if 'bank' not in df.columns:
    raise ValueError("Could not find a 'bank' column. Check your CSV for the column that stores bank names.")

In [None]:
# 4) Compute counts and percentages per bank
sentiment_counts = df.groupby(['bank', sent_col]).size().unstack(fill_value=0)

# Ensure the three columns exist in the order we want
for col in ['positive','neutral','negative']:
    if col not in sentiment_counts.columns:
        sentiment_counts[col] = 0

# Reorder columns as positive, neutral, negative
sentiment_counts = sentiment_counts[['positive','neutral','negative']]

# Convert to percent per bank
sentiment_percent = sentiment_counts.div(sentiment_counts.sum(axis=1), axis=0) * 100
sentiment_percent = sentiment_percent.round(1)
print("Counts:\n", sentiment_counts)
print("\nPercentages:\n", sentiment_percent)


In [None]:
# 5) Plot stacked bar chart
ax = sentiment_percent.plot(kind='bar', stacked=True, figsize=(8,5))
ax.set_ylabel("Percentage (%)")
ax.set_title("Sentiment Distribution per Bank")
ax.set_xticklabels(ax.get_xticklabels(), rotation=0)
plt.legend(title='Sentiment', bbox_to_anchor=(1.02, 1), loc='upper left')
plt.tight_layout()
plt.show()


In [None]:
sentiment_percent.plot(kind='bar', stacked=True, figsize=(8,5))
plt.title("Sentiment Distribution per Bank")
plt.ylabel("Percentage (%)")
plt.xticks(rotation=0)
plt.show()
