In [4]:
import csv
from datetime import datetime
from google_play_scraper import Sort, reviews
import os
import warnings
import sys

# Set working directory
os.chdir(r'D:\10academy\Bank-reviews-analysis_W2\Bank-reviews-analysis')
sys.path.append(os.getcwd())  # Add the current working directory to Python's path
from scripts.preprocess_reviews import preprocess_reviews  # Import  preprocessing function

# Suppress warnings
warnings.filterwarnings("ignore")

# Define data directory
DATA_DIR = 'notebooks/data'

def scrape_play_store_reviews(app_id, bank_name):
    results, _ = reviews(
        app_id,
        lang='en',
        country='us',
        sort=Sort.NEWEST,
        count=400
    )

    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    filename = os.path.join(DATA_DIR, f'{bank_name}_reviews_{timestamp}.csv') 

    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=['review_text', 'rating', 'date', 'bank_name', 'source'])
        writer.writeheader()

        for entry in results:
            writer.writerow({
                'review_text': entry['content'],
                'rating': entry['score'],
                'date': entry['at'].strftime('%Y-%m-%d'),
                'bank_name': bank_name,
                'source': 'Google Play'
            })

    print(f"✅ Saved {len(results)} reviews to {filename}")
    return filename  # Return the filename for later use

def display_sample_data(filename):
    print(f"\nSample data from {filename}:")
    with open(filename, mode='r', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        sample_count = 5  # Number of samples to display
        for i, row in enumerate(reader):
            if i < sample_count:
                print(row)
            else:
                break

def display_sample_cleaned_data(filename):
    print(f"\nSample cleaned data from {filename}:")
    with open(filename, mode='r', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        sample_count = 5  # Number of samples to display
        for i, row in enumerate(reader):
            if i < sample_count:
                print(row)
            else:
                break

if __name__ == "__main__":
    banks = {
        "CBE": "com.combanketh.mobilebanking",
        "BOA": "com.boa.boaMobileBanking",
        "Dashen": "com.dashen.dashensuperapp"
    }

    for bank_name, app_id in banks.items():
        filename = scrape_play_store_reviews(app_id, bank_name)
        display_sample_data(filename) 
        
        cleaned_data = preprocess_reviews(filename)  # Preprocess the saved reviews
        
        cleaned_filename = os.path.join(DATA_DIR, f'cleaned_{bank_name}_reviews.csv')  # Save cleaned data in DATA_DIR
        cleaned_data.to_csv(cleaned_filename, index=False)  # Save cleaned data
        print(f"✅ Cleaned data saved for {bank_name}: {len(cleaned_data)} records")   # Show how many cleaned records were saved
        display_sample_cleaned_data(cleaned_filename)
        print('\n' * 2)
      

✅ Saved 400 reviews to notebooks/data\CBE_reviews_20250607_111502.csv

Sample data from notebooks/data\CBE_reviews_20250607_111502.csv:
{'review_text': '"Why don’t your ATMs support account-to-account transfers like other countries( Kenya, Nigeria , South africa)"', 'rating': '4', 'date': '2025-06-06', 'bank_name': 'CBE', 'source': 'Google Play'}
{'review_text': 'what is this app problem???', 'rating': '1', 'date': '2025-06-05', 'bank_name': 'CBE', 'source': 'Google Play'}
{'review_text': 'the app is proactive and a good connections.', 'rating': '5', 'date': '2025-06-05', 'bank_name': 'CBE', 'source': 'Google Play'}
{'review_text': 'I cannot send to cbebirr app. through this app.', 'rating': '3', 'date': '2025-06-05', 'bank_name': 'CBE', 'source': 'Google Play'}
{'review_text': 'good', 'rating': '4', 'date': '2025-06-05', 'bank_name': 'CBE', 'source': 'Google Play'}
✅ Cleaned data saved for CBE: 320 records

Sample cleaned data from notebooks/data\cleaned_CBE_reviews.csv:
{'review_text