# SmallBizPulse v2 Model Build (Colab)

This notebook trains `model_builder/v2` in Colab so you can use larger GPU resources, then bring artifacts back to local.

Workflow:
1. Mount Drive and set project/data/output paths.
2. Install `v2` dependencies from the copied folder.
3. Train models (GRU + baselines; optional topic/recommendation/resilience).
4. Validate model-driven runtime scoring (no precomputed score-table dependency).
5. Zip artifacts for download/copy.

In [None]:
from pathlib import Path
from google.colab import drive

drive.mount('/content/drive')

# EDIT THESE PATHS
PROJECT_ROOT = Path('/content/drive/MyDrive/smallbizpulse')
DATA_ROOT = PROJECT_ROOT / 'data' / 'external' / 'yelp_dataset_new'
OUTPUT_ROOT = PROJECT_ROOT / 'models' / 'v2_artifacts_colab'

print('PROJECT_ROOT:', PROJECT_ROOT)
print('DATA_ROOT:', DATA_ROOT)
print('OUTPUT_ROOT:', OUTPUT_ROOT)


In [None]:
import os

assert (PROJECT_ROOT / 'model_builder' / 'v2').exists(), (
    'Expected folder missing: copy your model_builder directory into PROJECT_ROOT first.'
)
assert DATA_ROOT.exists(), 'DATA_ROOT does not exist. Update the path above.'

os.chdir(PROJECT_ROOT)
print('cwd:', os.getcwd())


In [None]:
INSTALL_FULL_STACK = True  # True: include BERTopic deps. False: Component 1 only.

requirements_file = (
    'model_builder/v2/requirements-colab.txt'
    if INSTALL_FULL_STACK
    else 'model_builder/v2/requirements-core.txt'
)

!python -m pip install -q --upgrade pip
!python -m pip install -q -r {requirements_file}


In [None]:
import sys
sys.path.insert(0, str(PROJECT_ROOT))

from model_builder.v2 import (
    ModelBuilderV2,
    ModelBuilderV2Config,
    load_restaurant_tables,
    build_monthly_signal_panel,
)

print('v2 import OK')


In [None]:
# Data prep preview (aligned with model_exploration flow):
# JSON load -> restaurant filtering -> review VADER -> monthly aggregation.
tables = load_restaurant_tables(DATA_ROOT)
monthly_artifacts = build_monthly_signal_panel(tables)

print('business rows:', len(tables.business))
print('review rows:', len(tables.review))
print('monthly panel rows:', len(monthly_artifacts.monthly_panel))
monthly_artifacts.monthly_panel.head(5)


In [None]:
# Toggle optional components here.
RUN_TOPIC_MODEL = True
RUN_RECOMMENDATIONS = True
RUN_RESILIENCE = True

builder = ModelBuilderV2(
    ModelBuilderV2Config(
        output_root=OUTPUT_ROOT,
        run_topic_model=RUN_TOPIC_MODEL,
        run_recommendation_mapping=RUN_RECOMMENDATIONS,
        run_resilience_analysis=RUN_RESILIENCE,
    )
)

artifacts = builder.run(data_root=DATA_ROOT)
print('Run summary:', artifacts.run_summary_path)


In [None]:
import json
import pandas as pd
from model_builder.v2.inference import SurvivalRuntime

summary = json.loads((OUTPUT_ROOT / 'run_summary.json').read_text(encoding='utf-8'))
print(json.dumps(summary['component1'], indent=2))

# Runtime scoring from trained model files (live computation).
runtime = SurvivalRuntime.from_output_dir(OUTPUT_ROOT / 'component1_survival')
monthly_panel = pd.read_csv(OUTPUT_ROOT / 'monthly_signal_panel.csv')
runtime_scores = runtime.score_monthly_panel(monthly_panel)
runtime_scores.head(20)


In [None]:
import shutil
from pathlib import Path

bundle_base = OUTPUT_ROOT.parent / 'v2_artifacts_colab_bundle'
zip_path = Path(str(bundle_base) + '.zip')
if zip_path.exists():
    zip_path.unlink()

shutil.make_archive(str(bundle_base), 'zip', root_dir=OUTPUT_ROOT)
print('Created:', zip_path)
