In [1]:
# src/run_pipeline.py

import os
import pandas as pd

from config import APPS, COUNTRIES
from scrape_apple_reviews import fetch_apple_reviews
from build_weekly_tables import build_weekly_metrics, add_metadata, weekly_by_category
from forecast_prophet import prophet_forecast_weekly, last8_vs_next8_summary
from make_figures import ensure_dir, plot_forecast_card, plot_leaderboard

BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
DATA_PROCESSED = os.path.join(BASE_DIR, "data", "processed")
OUTPUT_FIG = os.path.join(BASE_DIR, "outputs", "figures")

def main():
    ensure_dir(DATA_PROCESSED)
    ensure_dir(OUTPUT_FIG)

    app_meta = pd.DataFrame([{
        "app_id": a["app_id"],
        "app_name": a["app_name"],
        "category": a["category"]
    } for a in APPS])

    # 1) Scrape reviews for each app across countries
    all_reviews = []
    for a in APPS:
        for c in COUNTRIES:
            df = fetch_apple_reviews(a["app_id"], country=c, max_pages=10, sleep=0.2)
            if len(df) > 0:
                all_reviews.append(df)

    if not all_reviews:
        print("No reviews pulled. Check app IDs/countries.")
        return

    reviews = pd.concat(all_reviews, ignore_index=True)

    # Save raw-ish but still ok (optional). If you do, keep it local only.
    # reviews.to_csv(os.path.join(DATA_PROCESSED, "reviews_flat.csv"), index=False)

    # 2) Weekly metrics by app
    weekly_app = build_weekly_metrics(reviews)
    weekly_app_meta = add_metadata(weekly_app, app_meta)

    weekly_app_meta.to_csv(os.path.join(DATA_PROCESSED, "weekly_adoption_by_app.csv"), index=False)

    # 3) Weekly by category
    weekly_cat = weekly_by_category(weekly_app_meta)
    weekly_cat.to_csv(os.path.join(DATA_PROCESSED, "weekly_adoption_by_category.csv"), index=False)

    # 4) Leaderboard
    leaderboard = weekly_cat.groupby("category")["review_count"].sum().sort_values(ascending=False).to_dict()
    plot_leaderboard(leaderboard, os.path.join(OUTPUT_FIG, "adoption_proxy_leaderboard.png"))

    # 5) Forecast only categories with enough signal (your “strong” ones)
    summaries = []
    for cat in ["Price intelligence", "Virtual try-on"]:
        df_cat = weekly_cat[weekly_cat["category"] == cat].sort_values("week")
        if len(df_cat) < 12:
            print(f"Skipping {cat}: too few weeks ({len(df_cat)}).")
            continue

        fc = prophet_forecast_weekly(df_cat[["week", "review_count"]], periods=8)
        summary = last8_vs_next8_summary(df_cat[["week", "review_count"]], fc)
        summary["category"] = cat
        summary["model"] = "Prophet"
        summary["horizon_weeks"] = 8
        summaries.append(summary)

        plot_forecast_card(
            fc,
            summary["last_observed_week"],
            title=f"{cat} — Weekly review velocity forecast (Next 8 weeks)",
            outpath=os.path.join(OUTPUT_FIG, f"forecast_{cat.lower().replace(' ', '_')}.png")
        )

        # export full forecast
        fc_out = fc.copy()
        fc_out["category"] = cat
        fc_out.to_csv(os.path.join(DATA_PROCESSED, f"forecast_timeseries_{cat.lower().replace(' ', '_')}.csv"), index=False)

    if summaries:
        pd.DataFrame(summaries).to_csv(os.path.join(DATA_PROCESSED, "forecast_summary.csv"), index=False)

    print("Done.")
    print("Processed data in: data/processed/")
    print("Figures in: outputs/figures/")

if __name__ == "__main__":
    main()


ModuleNotFoundError: No module named 'config'