## 1 Setup: Import libraries and prepare environment for Google Play reviews analysis


In [69]:

from google_play_scraper import reviews
import pandas as pd
import os
from dotenv import load_dotenv
import sys

from tqdm import tqdm
sys.path.append(os.path.abspath('..'))
from scripts.preprocessing_scripts import DataSetLoadAndPreprocess
load_dotenv(dotenv_path='../configs/.env')

True

In [70]:
processor = DataSetLoadAndPreprocess()

### 2 Load Google Play App IDs from environment variables

In [71]:
CBE_APP_ID = os.environ.get('CBE_APP_ID', '')
DASHEN_APP_ID = os.environ.get('DASHEN_APP_ID', '')
ABYSSINIA_APP_ID = os.environ.get('ABYSSINIA_APP_ID', '')

In [72]:
dashen_reviews = processor.accumulate_reviews(app_id=DASHEN_APP_ID) # Scrape 400 unique records for dashen bank
df_dashen  = pd.DataFrame(dashen_reviews)
len(df_dashen)

400

In [73]:
cbe_reviews = processor.accumulate_reviews(app_id=CBE_APP_ID) # Scrape 400 unique records for cbe bank
df_cbe = pd.DataFrame(cbe_reviews)
len(df_cbe)

400

In [74]:
boa_reviews = processor.accumulate_reviews(app_id=ABYSSINIA_APP_ID) # Scrape 400 unique records for bank of Abyssinia
df_boa = pd.DataFrame(boa_reviews)
len(df_boa)

400

In [75]:
df_boa_cleaned = processor.preprocessing_data(bank_name='BoA', data_frame=df_boa) # standardize column names for Abysinnia bank
df_boa_cleaned.head()

Unnamed: 0,reviewId,userName,userImage,review,rating,thumbsUpCount,reviewCreatedVersion,date,replyContent,repliedAt,appVersion,bank,source
0,3463230e-f9f7-4be3-a632-fdd8d017ce84,Yasin Alemu,https://play-lh.googleusercontent.com/a/ACg8oc...,üôèüëç,5,0,25.05.03,2025-11-29,,,25.05.03,BoA,Google Play
1,a6cbfa34-f2b1-4a16-96b6-c94f58cea76f,Wariyo Dida,https://play-lh.googleusercontent.com/a-/ALV-U...,Very Good,5,0,,2025-11-28,,,,BoA,Google Play
2,fc67d12c-92e2-45aa-a9e0-011f58a583bc,Hailegebrail Tegegn,https://play-lh.googleusercontent.com/a-/ALV-U...,goof,5,0,,2025-11-28,,,,BoA,Google Play
3,11306fb9-5571-4950-8d32-604c5402242f,Tsegay ab,https://play-lh.googleusercontent.com/a/ACg8oc...,good!,5,0,,2025-11-28,,,,BoA,Google Play
4,809c46d2-730e-446a-9061-2a45e978ad9d,Yohanis Fikadu,https://play-lh.googleusercontent.com/a/ACg8oc...,good jop,5,0,25.09.03,2025-11-27,,,25.09.03,BoA,Google Play


In [76]:
df_cbe_cleaned = processor.preprocessing_data(bank_name='CBE', data_frame=df_cbe) # standardize column names for cbe bank
df_cbe_cleaned.head()

Unnamed: 0,reviewId,userName,userImage,review,rating,thumbsUpCount,reviewCreatedVersion,date,replyContent,repliedAt,appVersion,bank,source
0,cb37b096-e071-4f0f-a8fd-067b7d71706d,Kamil Tesfaye,https://play-lh.googleusercontent.com/a-/ALV-U...,CBE ·ã≠·àà·ã´·àç·ç¢,5,0,5.2.1,2025-11-29,,,5.2.1,CBE,Google Play
1,70f504ff-daed-40d9-9c89-cc49a95ef659,Abde Semed,https://play-lh.googleusercontent.com/a-/ALV-U...,it's special for me,5,0,5.2.1,2025-11-29,,,5.2.1,CBE,Google Play
2,28f229b5-0026-41b9-a1eb-b76e74736f63,TOMIZ Creativity,https://play-lh.googleusercontent.com/a-/ALV-U...,Make it user friendly.,2,0,,2025-11-29,,,,CBE,Google Play
3,68d8daea-db47-4e23-a692-755173dea983,Tesfaye Abdi,https://play-lh.googleusercontent.com/a-/ALV-U...,maaliif daddafee install gaafata,3,0,5.2.1,2025-11-28,,,5.2.1,CBE,Google Play
4,ee0dbb0e-4eb0-47b5-9874-c37877493f99,Betelhem Kebede,https://play-lh.googleusercontent.com/a/ACg8oc...,good app,5,0,,2025-11-28,,,,CBE,Google Play


In [77]:
df_dashen_cleaned = processor.preprocessing_data(bank_name='Dashen', data_frame=df_dashen) # standardize column names for dashen bank
df_dashen_cleaned.head()

Unnamed: 0,reviewId,userName,userImage,review,rating,thumbsUpCount,reviewCreatedVersion,date,replyContent,repliedAt,appVersion,bank,source
0,5860d6f3-15d5-456b-aabe-bf92bd885546,Berhanu Ashebir BA,https://play-lh.googleusercontent.com/a/ACg8oc...,very smart App easy to use and friendly,5,0,1.8.1,2025-11-29,,NaT,1.8.1,Dashen,Google Play
1,39f85efe-9d6d-4974-910c-4f7174f6a8e0,Gashu Mesfin,https://play-lh.googleusercontent.com/a-/ALV-U...,Very exemplery App to other Bank Aps !,5,0,,2025-11-29,,NaT,,Dashen,Google Play
2,3ea761da-10b0-472e-9c3f-89a9f23e4c88,Ashenafi sancho,https://play-lh.googleusercontent.com/a/ACg8oc...,good,5,0,1.8.1,2025-11-29,,NaT,1.8.1,Dashen,Google Play
3,157e868c-386f-4837-bb1f-1301dd194075,Esayas Dereje,https://play-lh.googleusercontent.com/a/ACg8oc...,It Is An Amazing app,4,0,1.0.14,2025-11-28,,NaT,1.0.14,Dashen,Google Play
4,9504f5cc-7f16-4fe0-8ba5-f76eef162f8b,Selamawit Yegebawal,https://play-lh.googleusercontent.com/a/ACg8oc...,its fast and easy to communicate to the app an...,5,0,1.8.1,2025-11-26,,NaT,1.8.1,Dashen,Google Play


In [78]:
(df_boa_cleaned.isna().sum()/df_boa_cleaned.shape[0])['rating']

np.float64(0.0)

In [79]:
df_boa_cleaned['date'] = pd.to_datetime(df_boa_cleaned['date'])
df_dashen_cleaned['date'] = pd.to_datetime(df_dashen_cleaned['date'])
df_cbe_cleaned['date'] = pd.to_datetime(df_cbe_cleaned['date'])

### 3 Export cleaned data from each bank to its respected csv file

In [80]:
processor.export_to_csv('../data/boa_cleaned.csv', data=df_boa_cleaned) # export cleaned boa dataset 
processor.export_to_csv('../data/cbe_cleaned.csv', data=df_cbe_cleaned) # export cleaned cbe dataset 
processor.export_to_csv('../data/dashen_cleaned.csv', data=df_dashen_cleaned) # export cleaned dashen dataset

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 400/400 [00:00<00:00, 18382.97it/s]
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 400/400 [00:00<00:00, 19980.25it/s]
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 400/400 [00:00<00:00, 22811.56it/s]
