In [1]:
# Imports
import pandas as pd
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
import re

In [2]:
# Load your data with sentiment results (adjust path as needed)
df = pd.read_csv('data/bank_reviews_with_sentiment.csv')

# Preview data
df.head()

Unnamed: 0,review,rating,date,bank,source,sentiment_label,sentiment_score
0,the app is proactive and a good connections.,5,2025-06-05,Commercial Bank of Ethiopia,Google Play,POSITIVE,0.999868
1,I cannot send to cbebirr app. through this app.,3,2025-06-05,Commercial Bank of Ethiopia,Google Play,NEGATIVE,0.995335
2,good,4,2025-06-05,Commercial Bank of Ethiopia,Google Play,POSITIVE,0.999816
3,not functional,1,2025-06-05,Commercial Bank of Ethiopia,Google Play,NEGATIVE,0.999779
4,everytime you uninstall the app you have to re...,1,2025-06-04,Commercial Bank of Ethiopia,Google Play,NEGATIVE,0.990516


In [3]:
# Load spaCy English model
nlp = spacy.load("en_core_web_sm")

In [4]:
def extract_keywords(text):
    doc = nlp(text)
    keywords = [token.lemma_.lower() for token in doc if not token.is_stop and token.is_alpha]
    return keywords

In [5]:
# Apply keyword extraction
df['keywords'] = df['review'].apply(extract_keywords)

# Check keywords for first few reviews
df[['review', 'keywords']].head()

Unnamed: 0,review,keywords
0,the app is proactive and a good connections.,"[app, proactive, good, connection]"
1,I cannot send to cbebirr app. through this app.,"[send, cbebirr, app, app]"
2,good,[good]
3,not functional,[functional]
4,everytime you uninstall the app you have to re...,"[everytime, uninstall, app, reach, physically,..."


In [6]:
# Prepare corpus for TF-IDF
corpus = df['review'].tolist()

# Extract unigrams, bigrams, trigrams with TF-IDF
vectorizer = TfidfVectorizer(ngram_range=(1,3), stop_words='english', max_features=1000)
X = vectorizer.fit_transform(corpus)

# Get feature names (keywords and phrases)
tfidf_keywords = vectorizer.get_feature_names_out()

print(f"Top 20 keywords/phrases:\n{tfidf_keywords[:20]}")

Top 20 keywords/phrases:
['2025' 'a51' 'able' 'absolutely' 'abyssinia' 'abyssinia bank' 'acc'
 'access' 'access account' 'accessible' 'account' 'accounts' 'active'
 'actually' 'add' 'added' 'ahead' 'airtime' 'allow' 'allowed']


In [7]:
def identify_themes(keywords):
    themes = {
        'Account Access Issues': ['login', 'error', 'access', 'signin', 'password'],
        'Transaction Performance': ['transfer', 'slow', 'delay', 'payment', 'transaction'],
        'User Interface & Experience': ['ui', 'user', 'experience', 'interface', 'design'],
        'Customer Support': ['support', 'help', 'service', 'response', 'agent'],
        'Feature Requests': ['feature', 'request', 'need', 'add', 'improve']
    }
    identified_themes = []
    for theme, words in themes.items():
        pattern = re.compile('|'.join(words), re.IGNORECASE)
        if any(pattern.search(word) for word in keywords):
            identified_themes.append(theme)
    return identified_themes

In [8]:
df['identified_themes'] = df['keywords'].apply(identify_themes)

# Preview themes
df[['review', 'identified_themes']].head(10)

Unnamed: 0,review,identified_themes
0,the app is proactive and a good connections.,[]
1,I cannot send to cbebirr app. through this app.,[]
2,good,[]
3,not functional,[]
4,everytime you uninstall the app you have to re...,[]
5,አካውንት የምናስገባበት ቦታ ስም ጽፈን ነው ከዚህ በፊት የላክንባቸውን አ...,[]
6,best,[]
7,bezabih,[]
8,Best Mobile Banking app ever,[]
9,good,[]


In [9]:
# Explode themes for counting
theme_counts = df.explode('identified_themes')['identified_themes'].value_counts()

print("Theme frequency across all reviews:")
print(theme_counts)

Theme frequency across all reviews:
identified_themes
Feature Requests               111
Transaction Performance        105
User Interface & Experience     96
Customer Support                59
Account Access Issues           36
Name: count, dtype: int64


In [11]:
for theme in df['identified_themes'].explode().dropna().unique():
    print(f"\nSample reviews for theme: {theme}")
    theme_reviews = df[df['identified_themes'].apply(lambda x: theme in x if x else False)]['review']
    n_samples = min(3, len(theme_reviews))
    if n_samples > 0:
        sample_reviews = theme_reviews.sample(n_samples, random_state=42)
        for i, review in enumerate(sample_reviews, 1):
            print(f"{i}. {review}\n")
    else:
        print("No reviews found for this theme.\n")



Sample reviews for theme: Transaction Performance
1. very slow app. አቢሲኒያን ከሚያክል ባንክ የማይጠበቅ software app. ከቻላችሁ ሙሉ ለሙሉ እንደገና አሰሩት።በጣም ይመራያል ያሳፍራል።ለapp የወጣው ወጭ በሙስና የተበላ ነው የሚመስለው።

2. One of the best digital platforms I’ve used! From smooth transactions to a seamless shopping experience, Highly recommended

3. Dashen Super App is a game-changer! It’s fast, user-friendly, and packed with features that make everyday banking and transactions super convenient. I love how everything I need from mobile banking to utility payments is all in one place. The interface is clean, and everything works smoothly. Definitely one of the best apps out there. Highly recommended!


Sample reviews for theme: Account Access Issues
1. It has a Good performance but need more upgrade for more performance like when we login account not be fast balance show other thing looking good. Carry on. Thanks so much for Greatest service

2. the app gets a considerable improvements like language , QR scanner and unlimite