#import

In [3]:
# Imports
import pandas as pd
import numpy as np
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from tabulate import tabulate

# Initialize VADER
analyzer = SentimentIntensityAnalyzer()

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 50)

#Load data

In [5]:
# Load cleaned_reviews.csv and verify all banks are present.
df = pd.read_csv('cleaned_reviews.csv')
print("Banks present:", df['bank'].unique())
print("Number of reviews per bank:\n", df['bank'].value_counts())

Banks present: ['Bank of Abyssinia' 'Commercial Bank of Ethiopia' 'Dashen Bank']
Number of reviews per bank:
 bank
Bank of Abyssinia              400
Commercial Bank of Ethiopia    400
Dashen Bank                    400
Name: count, dtype: int64


In [6]:
def load_data(file_path='cleaned_reviews.csv'):
    try:
        df = pd.read_csv(file_path)
        print(f"Loaded {len(df)} reviews from {file_path}")
        print("Unique banks:", df['bank'].unique())
        print("Reviews per bank:")
        display(df['bank'].value_counts().to_frame())
        print("Sample data:")
        display(df.head())
        return df
    except FileNotFoundError:
        print(f"Error: {file_path} not found")
        return None
    except Exception as e:
        print(f"Error loading data: {e}")
        return None

df = load_data()
if df is None:
    raise SystemExit("Failed to load data")

Loaded 1200 reviews from cleaned_reviews.csv
Unique banks: ['Bank of Abyssinia' 'Commercial Bank of Ethiopia' 'Dashen Bank']
Reviews per bank:


Unnamed: 0_level_0,count
bank,Unnamed: 1_level_1
Bank of Abyssinia,400
Commercial Bank of Ethiopia,400
Dashen Bank,400


Sample data:


Unnamed: 0,review_id,review,rating,date,bank,source
0,2e219268-96fd-400c-8632-0b0d1044f487,This app is a joke. It crashes more than it wo...,1,2025-05-21,Bank of Abyssinia,Google Play
1,3559b91c-fad9-4032-bebe-cf99974b9628,"Hello, I’m facing a problem with the BOA Mobil...",1,2025-06-03,Bank of Abyssinia,Google Play
2,3271d45f-2900-41b6-878c-669ac563e60a,It keeps showing this pop up to turn off devel...,1,2025-04-19,Bank of Abyssinia,Google Play
3,7d475f3a-15b4-4e56-ae4b-1fc451749e61,"Edit: New bug, app not letting me type in my o...",1,2025-03-12,Bank of Abyssinia,Google Play
4,301f6927-9395-4cda-bb77-041a36323875,i entered incorrect security question by mista...,5,2025-05-10,Bank of Abyssinia,Google Play


#Standardize Bank Names

In [7]:
def standardize_bank_names(df):
    bank_mapping = {
        'CBE': 'Commercial Bank of Ethiopia',
        'BOA': 'Bank of Abyssinia',
        'DB': 'Dashen Bank',
        'Commercial Bank of Ethiopia': 'Commercial Bank of Ethiopia',
        'Bank of Abyssinia': 'Bank of Abyssinia',
        'Dashen Bank': 'Dashen Bank'
    }
    original_counts = df['bank'].value_counts()
    df['bank'] = df['bank'].map(bank_mapping).fillna(df['bank'])
    print("Before standardization:")
    display(original_counts.to_frame())
    print("After standardization, unique banks:", df['bank'].unique())
    print("Reviews per bank after standardization:")
    display(df['bank'].value_counts().to_frame())
    return df

df = standardize_bank_names(df)

Before standardization:


Unnamed: 0_level_0,count
bank,Unnamed: 1_level_1
Bank of Abyssinia,400
Commercial Bank of Ethiopia,400
Dashen Bank,400


After standardization, unique banks: ['Bank of Abyssinia' 'Commercial Bank of Ethiopia' 'Dashen Bank']
Reviews per bank after standardization:


Unnamed: 0_level_0,count
bank,Unnamed: 1_level_1
Bank of Abyssinia,400
Commercial Bank of Ethiopia,400
Dashen Bank,400


#Sentiment Analysis Function

In [9]:
def analyze_sentiment(review):
    try:
        if not isinstance(review, str) or pd.isna(review) or review.strip() == '':
            return 'neutral', 0.0
        scores = analyzer.polarity_scores(review)
        compound = scores['compound']
        label = 'positive' if compound > 0.05 else 'negative' if compound < -0.05 else 'neutral'
        return label, compound
    except Exception as e:
        print(f"Error analyzing review '{review}': {e}")
        return 'neutral', 0.0

#Apply Sentiment Analysis

In [10]:
print("Analyzing sentiments...")
original_len = len(df)
df[['sentiment_label', 'sentiment_score']] = df['review'].apply(
    lambda x: pd.Series(analyze_sentiment(x))
)
print(f"Rows before analysis: {original_len}, after: {len(df)}")
print("Reviews per bank after sentiment analysis:")
display(df['bank'].value_counts().to_frame())
print("Sample data with sentiment:")
display(df[['bank', 'review', 'rating', 'sentiment_label', 'sentiment_score']].head(10))
print(f"Sentiment coverage: {(df[['sentiment_label', 'sentiment_score']].notnull().all(axis=1).sum() / len(df) * 100):.2f}%")

Analyzing sentiments...
Rows before analysis: 1200, after: 1200
Reviews per bank after sentiment analysis:


Unnamed: 0_level_0,count
bank,Unnamed: 1_level_1
Bank of Abyssinia,400
Commercial Bank of Ethiopia,400
Dashen Bank,400


Sample data with sentiment:


Unnamed: 0,bank,review,rating,sentiment_label,sentiment_score
0,Bank of Abyssinia,This app is a joke. It crashes more than it wo...,1,positive,0.7757
1,Bank of Abyssinia,"Hello, I’m facing a problem with the BOA Mobil...",1,negative,-0.1884
2,Bank of Abyssinia,It keeps showing this pop up to turn off devel...,1,negative,-0.6571
3,Bank of Abyssinia,"Edit: New bug, app not letting me type in my o...",1,positive,0.7962
4,Bank of Abyssinia,i entered incorrect security question by mista...,5,negative,-0.4639
5,Bank of Abyssinia,I don't know what is wrong with BOA as a bank ...,1,negative,-0.9198
6,Bank of Abyssinia,What's wrong with App. this days? it doesn't w...,2,negative,-0.5569
7,Bank of Abyssinia,I’m giving this app one star because there are...,1,negative,-0.2135
8,Bank of Abyssinia,"I have a fitayah account, a type of interest f...",1,positive,0.8481
9,Bank of Abyssinia,Worst App ever. Totally unreliable. And it did...,1,negative,-0.6249


Sentiment coverage: 100.00%


#Generate Sentiment Summary

In [11]:
sentiment_summary = df.groupby(['bank', 'rating'], as_index=False).agg({
    'sentiment_score': ['mean', 'count'],
    'sentiment_label': lambda x: x.value_counts().to_dict()
})
sentiment_summary.columns = ['bank', 'rating', 'avg_sentiment_score', 'review_count', 'sentiment_distribution']
print("Sentiment Summary:")
display(sentiment_summary)
print("Unique banks in summary:", sentiment_summary['bank'].unique())

Sentiment Summary:


Unnamed: 0,bank,rating,avg_sentiment_score,review_count,sentiment_distribution
0,Bank of Abyssinia,1,-0.208835,255,"{'negative': 144, 'positive': 58, 'neutral': 53}"
1,Bank of Abyssinia,2,-0.092497,29,"{'negative': 15, 'positive': 9, 'neutral': 5}"
2,Bank of Abyssinia,3,0.096222,36,"{'positive': 16, 'neutral': 11, 'negative': 9}"
3,Bank of Abyssinia,4,0.454133,12,"{'positive': 10, 'negative': 1, 'neutral': 1}"
4,Bank of Abyssinia,5,0.504649,68,"{'positive': 54, 'neutral': 8, 'negative': 6}"
5,Commercial Bank of Ethiopia,1,-0.212844,128,"{'negative': 74, 'positive': 35, 'neutral': 19}"
6,Commercial Bank of Ethiopia,2,0.06356,40,"{'positive': 17, 'negative': 16, 'neutral': 7}"
7,Commercial Bank of Ethiopia,3,0.218295,63,"{'positive': 35, 'negative': 17, 'neutral': 11}"
8,Commercial Bank of Ethiopia,4,0.413567,82,"{'positive': 62, 'negative': 14, 'neutral': 6}"
9,Commercial Bank of Ethiopia,5,0.485815,87,"{'positive': 72, 'negative': 11, 'neutral': 4}"


Unique banks in summary: ['Bank of Abyssinia' 'Commercial Bank of Ethiopia' 'Dashen Bank']


#Save Results

In [12]:
df.to_csv('reviews_with_sentiment.csv', index=False)
sentiment_summary.to_csv('sentiment_summary.csv', index=False)
print(f"Saved sentiment results to reviews_with_sentiment.csv ({len(df)} rows)")
print(f"Saved summary to sentiment_summary.csv ({len(sentiment_summary)} rows)")

# Verify saved files
print("\nVerifying saved files:")
saved_df = pd.read_csv('reviews_with_sentiment.csv')
print("Reviews per bank in reviews_with_sentiment.csv:")
display(saved_df['bank'].value_counts().to_frame())
saved_summary = pd.read_csv('sentiment_summary.csv')
print("Unique banks in sentiment_summary.csv:")
display(saved_summary['bank'].unique())

Saved sentiment results to reviews_with_sentiment.csv (1200 rows)
Saved summary to sentiment_summary.csv (15 rows)

Verifying saved files:
Reviews per bank in reviews_with_sentiment.csv:


Unnamed: 0_level_0,count
bank,Unnamed: 1_level_1
Bank of Abyssinia,400
Commercial Bank of Ethiopia,400
Dashen Bank,400


Unique banks in sentiment_summary.csv:


array(['Bank of Abyssinia', 'Commercial Bank of Ethiopia', 'Dashen Bank'],
      dtype=object)