In [3]:
# !pip install textblob vaderSentiment scikit-learn

In [7]:
# !pip install vaderSentiment

### Load data

In [23]:
import pandas as pd
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sklearn.feature_extraction.text import TfidfVectorizer
from IPython.display import display

df = pd.read_csv('../data/all_banks_reviews_cleaned.csv')

### Sentiment Analysis using TextBlob

In [24]:
def textblob_sentiment(text):
    polarity = TextBlob(str(text)).sentiment.polarity
    if polarity > 0.05:
        return 'positive'
    elif polarity < -0.05:
        return 'negative'
    else:
        return 'neutral'

df['textblob_sentiment'] = df['review'].apply(textblob_sentiment)

### Sentiment Analysis using VADER

In [25]:
vader = SentimentIntensityAnalyzer()

def vader_sentiment(text):
    score = vader.polarity_scores(str(text))['compound']
    if score >= 0.05:
        return 'positive'
    elif score <= -0.05:
        return 'negative'
    else:
        return 'neutral'

df['vader_sentiment'] = df['review'].apply(vader_sentiment)


### Compare the Two Methods

In [27]:
sentiment_counts = df[['textblob_sentiment', 'vader_sentiment', 'bank']] \
    .groupby('bank') \
    .agg(lambda x: x.value_counts().to_dict())


display(sentiment_counts)

def extract_keywords(data, top_n=10):
    vectorizer = TfidfVectorizer(stop_words='english', max_features=100)
    tfidf_matrix = vectorizer.fit_transform(data)
    return vectorizer.get_feature_names_out()[:top_n]

# Save Output 
df.to_csv('task2_sentiment_output.csv', index=False)
print("\n Output saved to task2_sentiment_output.csv")


Unnamed: 0_level_0,textblob_sentiment,vader_sentiment
bank,Unnamed: 1_level_1,Unnamed: 2_level_1
Bank of Abyssinia,"{'positive': 466, 'neutral': 374, 'negative': ...","{'positive': 479, 'neutral': 316, 'negative': ..."
Commercial Bank of Ethiopia,"{'positive': 662, 'neutral': 272, 'negative': 66}","{'positive': 682, 'neutral': 222, 'negative': 96}"
Dashen Bank,"{'positive': 333, 'neutral': 88, 'negative': 28}","{'positive': 333, 'neutral': 90, 'negative': 26}"



 Output saved to task2_sentiment_output.csv
