In [5]:
import pandas as pd
import os
import sys
import warnings
os.chdir(r'D:\10academy\week2_update\Bank-reviews-analysis')
sys.path.append(os.getcwd())

# Suppress warnings
warnings.filterwarnings("ignore")
DATA_DIR = 'notebooks/data'
final_df = pd.read_csv(os.path.join(DATA_DIR, 'all_processed_reviews.csv'))
print("Loaded combined data for analysis.")
print(final_df.head())

Loaded combined data for analysis.
                                         review_text  rating        date  \
0           this good app but screenshot must enable       4  2025-08-18   
1                              the fast ❤❤❤❤❤❤❤❤❤❤🇪🇹       5  2025-08-18   
2  በፊት ብር ትራንስፈር የተደረገባቸዉን አካዉንቶች remove አርጓል የሚያ...       1  2025-08-18   
3                                               Good       4  2025-08-18   
4                                           Best app       5  2025-08-18   

  bank_name       source                                   processed_review  
0       CBE  Google Play                         good app screenshot enable  
1       CBE  Google Play                                               fast  
2       CBE  Google Play  በፊት ብር ትራንስፈር የተደረገባቸዉን አካዉንቶች remove አርጓል የሚያ...  
3       CBE  Google Play                                               good  
4       CBE  Google Play                                           good app  


In [6]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

analyzer = SentimentIntensityAnalyzer()

def get_sentiment(text):
    if not isinstance(text, str):
        return 0, 'neutral'
    score = analyzer.polarity_scores(text)['compound']
    if score >= 0.05:
        sentiment = 'positive'
    elif score <= -0.05:
        sentiment = 'negative'
    else:
        sentiment = 'neutral'
    return score, sentiment

final_df[['sentiment_score', 'sentiment']] = final_df['processed_review'].apply(
    lambda x: pd.Series(get_sentiment(x))
)
print("Sentiment analysis complete.")

Sentiment analysis complete.


In [7]:
from sklearn.feature_extraction.text import TfidfVectorizer

def get_top_themes(df, n=10):
    positive_reviews = df[df['sentiment'] == 'positive']['processed_review'].str.cat(sep=' ')
    negative_reviews = df[df['sentiment'] == 'negative']['processed_review'].str.cat(sep=' ')

    vectorizer = TfidfVectorizer(max_features=n)

    # Get top keywords for positive reviews
    vectorizer.fit_transform([positive_reviews])
    positive_themes = vectorizer.get_feature_names_out()

    # Get top keywords for negative reviews
    vectorizer.fit_transform([negative_reviews])
    negative_themes = vectorizer.get_feature_names_out()

    print("\nTop 10 Positive Themes:", positive_themes)
    print("Top 10 Negative Themes:", negative_themes)

get_top_themes(final_df)


Top 10 Positive Themes: ['app' 'bank' 'banking' 'dashen' 'easy' 'good' 'like' 'nice' 'use' 'work']
Top 10 Negative Themes: ['app' 'bad' 'bank' 'banking' 'crash' 'fix' 'mobile' 'time' 'update'
 'work']


In [8]:
final_df.to_csv(os.path.join(DATA_DIR, 'final_analyzed_reviews.csv'), index=False)
print("✅ Final analyzed data saved to final_analyzed_reviews.csv")

✅ Final analyzed data saved to final_analyzed_reviews.csv
