In [7]:
import os
import sys
import pandas as pd
'''
# ---- Path Configuration ----
# Get current directory (where notebook is located)
current_dir = os.getcwd()
print(f"Current directory: {current_dir}")

# Calculate project root path (one level up)
project_root = os.path.dirname(current_dir)
print(f"Project root: {project_root}")

# Add project root to Python path
if project_root not in sys.path:
    sys.path.append(project_root)

# Create data directories if they don't exist
raw_data_path = os.path.join(project_root, 'data', 'raw')
os.makedirs(raw_data_path, exist_ok=True)
print(f"Data directory: {raw_data_path}")

# ---- Import Scraping Module ----
from scripts.scraping.scrape_reviews import main

# ---- Install Required Libraries ----
# Install google-play-scraper if needed
try:
    from google_play_scraper import reviews
except ImportError:
    print("Installing google-play-scraper...")
    %pip install google-play-scraper
    from google_play_scraper import reviews

# ---- Run Scraping ----
print("Starting scraping process...")
original_dir = os.getcwd()  # Save current directory

try:
    # Change to project root for proper path resolution
    os.chdir(project_root)
    print(f"Changed working directory to: {os.getcwd()}")
    
    # Run the main scraping function
    main()
    print("Scraping completed successfully!")
    
except Exception as e:
    print(f"Error during scraping: {e}")
    # Check if log file exists
    log_path = os.path.join(project_root, 'scraper.log')
    if os.path.exists(log_path):
        print("\nScraper log contents:")
        with open(log_path, 'r') as log_file:
            print(log_file.read())
finally:
    # Always return to original directory
    os.chdir(original_dir)
    print(f"Restored working directory to: {os.getcwd()}")

# ---- Load and Display Results ----
print("\nLoading scraped data...")
folder_path = os.path.join(project_root, 'data', 'raw')

# Check if any files were created
if not os.listdir(folder_path):
    print(f"No CSV files found in {folder_path}")
    print("Possible causes:")
    print("1. Scraping failed to fetch reviews")
    print("2. File saving failed")
    print("3. Directory is not accessible")
    
    # Check log file
    log_path = os.path.join(project_root, 'scraper.log')
    if os.path.exists(log_path):
        print("\nScraper log contents:")
        with open(log_path, 'r') as log_file:
            print(log_file.read())
else:
    # Initialize dictionary for DataFrames
    dataframes = {}
    print(f"Found {len(os.listdir(folder_path))} files in data directory")

    # Loop through files
    for filename in os.listdir(folder_path):
        if filename.endswith('.csv'):
            file_path = os.path.join(folder_path, filename)
            try:
                df = pd.read_csv(file_path)
                dataframes[filename] = df
                print(f"Loaded {len(df)} rows from {filename}")
            except Exception as e:
                print(f"Error loading {filename}: {e}")'''

 # ---- Load and Display Results ----
print("\nLoading and displaying scraped CSV data from raw/")

folder_path = os.path.join(project_root, 'data', 'raw')
csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]

if not csv_files:
    print(f"No CSV files found in {folder_path}")
    print("Possible causes:")
    print("1. Scraping failed to fetch reviews")
    print("2. File saving failed")
    print("3. Directory is not accessible")

    # Check log file
    log_path = os.path.join(project_root, 'scraper.log')
    if os.path.exists(log_path):
        print("\nScraper log contents:")
        with open(log_path, 'r') as log_file:
            print(log_file.read())
else:
    for csv_file in csv_files:
        file_path = os.path.join(folder_path, csv_file)
        try:
            df = pd.read_csv(file_path)
            print(f"\n--- {csv_file} ({len(df)} rows) ---")
            print(df.head(5))
        except Exception as e:
            print(f"Error reading {csv_file}: {e}")


Collecting tabulate
  Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)
Downloading tabulate-0.9.0-py3-none-any.whl (35 kB)
Installing collected packages: tabulate
Successfully installed tabulate-0.9.0
Note: you may need to restart the kernel to use updated packages.

Sample data from each file:

Dashen_Bank_reviews_20250609_230552.csv (450 rows):
--------------------------------------------------------------------------------
+----+---------------+----------+------------+-------------+-------------+
|    | review_text   |   rating | date       | bank_name   | source      |
|----+---------------+----------+------------+-------------+-------------|
|  0 | kalid         |        5 | 2025-06-08 | Dashen Bank | Google Play |
|  1 | I like        |        2 | 2025-06-07 | Dashen Bank | Google Play |
|    | this          |          |            |             |             |
|    | mobile        |          |            |             |             |
|    | bankin        |          |