In [None]:
# Task 1: Web Scraping Google Play Store Reviews

# Step 1: Import Necessary Libraries and Custom Functions
# --------------------------------------------------------
# Import the functions for web scraping from the custom module.
# The 'scraping' module contains functions for scraping and saving reviews. 
import sys
sys.path.append('../src')  # Adjust the path as needed to locate the module
from web_scraping import scrape_reviews, save_reviews  # Import functions
import pandas as pd  # For combining data
import os  # For directory management

# Step 2: Define Constants for Scraping
# -------------------------------------
# Use the BANK_APPS variable already defined in the notebook.
# Dictionary of bank names and their corresponding app IDs on the Google Play Store.
BANK_APPS = {
    "Commercial Bank of Ethiopia": "com.combanketh.mobilebanking",      # CBE Mobile Banking App
    "Bank of Abyssinia": "com.boa.boaMobileBanking",   # BoA Mobile app
    "Dashen Bank": "com.dashen.dashensuperapp"         # Dashen Bank Super App
}



# Specify the directory to save the scraped reviews.
OUTPUT_DIR = "data/raw"
os.makedirs(OUTPUT_DIR, exist_ok=True)  # Create the directory if it doesn't exist

# Step 3: Scrape Reviews for All Banks
# ------------------------------------
# Initialize an empty list to store all reviews from all banks.
all_reviews = []

for bank_name, app_id in BANK_APPS.items():
    # Scrape reviews for the current bank
    print(f"Starting scraping for {bank_name}...")
    bank_reviews = scrape_reviews(app_id, bank_name)
    
    # Save reviews to a CSV file if scraping is successful
    if not bank_reviews.empty:
        save_reviews(bank_reviews, bank_name)  # Save individual reviews
        all_reviews.append(bank_reviews)  # Append to the combined list
    else:
        print(f"No reviews scraped for {bank_name}.")

# Step 4: Combine All Reviews into a Single Dataset
# -------------------------------------------------
if all_reviews:
    # Combine all reviews into a single DataFrame
    combined_df = pd.concat(all_reviews, ignore_index=True)
    
    # Define the path for the combined dataset
    combined_path = os.path.join(OUTPUT_DIR, "all_banks_reviews.csv")
    
    # Save the combined dataset
    combined_df.to_csv(combined_path, index=False)
    print(f"Combined dataset saved successfully at {combined_path}.\n")

    # Optional: Display a preview of the combined dataset
    print("\nCombined Dataset Preview:")
    print(combined_df.head())
else:
    print("No reviews were scraped; no combined dataset to create.")


# Step 5: Verify the Combined Dataset
# -----------------------------------
# Load and display the first few rows of the combined dataset to verify the results.
if os.path.exists(combined_path):
    print("\nCombined Dataset Preview:")
    combined_df = pd.read_csv(combined_path)
    print(combined_df.head())


Starting scraping for Commercial Bank of Ethiopia...
Scraping reviews for Commercial Bank of Ethiopia...
