In [1]:
# notebooks/01_data_scraping.ipynb
# 1. Import necessary libraries
import pandas as pd
import os
import sys

# Add the 'scripts' directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), os.pardir, 'scripts')))
from scrape_reviews import scrape_and_preprocess_reviews

# 2. Define App IDs
app_ids = {
    "CBE": "com.cbe.mobilebanking", # Replace with actual CBE App ID
    "BOA": "com.bog.mobilebanking", # Replace with actual BOA App ID
    "Dashen": "com.dashenbank.app" # Replace with actual Dashen Bank App ID
}

# 3. Define output path
processed_data_dir = os.path.join(os.path.abspath(''), os.pardir, 'data', 'processed')
output_filepath = os.path.join(processed_data_dir, 'fintech_app_reviews_processed.csv')

# 4. Scrape and preprocess data
# This calls the function from your script
df_raw = scrape_and_preprocess_reviews(app_ids, min_reviews_per_bank=400)

# 5. Display basic info and save
if not df_raw.empty:
    print("\n--- Raw Data Overview (from scraping) ---")
    print(df_raw.head())
    print(f"Total reviews scraped: {len(df_raw)}")
    print("Reviews per bank:")
    print(df_raw['Bank/App Name'].value_counts())

    # Save to processed data folder
    df_raw.to_csv(output_filepath, index=False)
    print(f"\nProcessed data saved to {output_filepath}")
else:
    print("No data was scraped or processed.")

# 6. Verify the saved CSV (optional)
print("\n--- Verifying saved CSV ---")
if os.path.exists(output_filepath):
    df_loaded = pd.read_csv(output_filepath)
    print(f"Loaded {len(df_loaded)} reviews from {output_filepath}")
    print(df_loaded.info())
    print(df_loaded.isnull().sum())
else:
    print("Output CSV file not found.")

Scraping reviews for CBE (com.cbe.mobilebanking)...
Finished scraping 0 reviews for CBE.
Raw reviews for CBE saved to d:\10academy\10acadamey\week_02_challenge\scripts\..\data\raw\cbe_reviews_raw.csv
Scraping reviews for BOA (com.bog.mobilebanking)...
Finished scraping 0 reviews for BOA.
Raw reviews for BOA saved to d:\10academy\10acadamey\week_02_challenge\scripts\..\data\raw\boa_reviews_raw.csv
Scraping reviews for Dashen (com.dashenbank.app)...
Finished scraping 0 reviews for Dashen.
Raw reviews for Dashen saved to d:\10academy\10acadamey\week_02_challenge\scripts\..\data\raw\dashen_reviews_raw.csv
No reviews were scraped. Exiting.
No data was scraped or processed.

--- Verifying saved CSV ---
Output CSV file not found.
