## Add RQ1 after loading epayments

In [None]:
from src.config import EPAYMENTS_CSV, RQ1_OUTPUTS_DIR
from src.io_utils import read_csv
from src.cleaning import clean_epayments_long
from src.rq1_models import prepare_rq1_series, rq1_run_all, export_rq1_outputs

epay = clean_epayments_long(read_csv(EPAYMENTS_CSV))

pos_series = prepare_rq1_series(epay, channel="POS Transactions")
rq1_results = rq1_run_all(pos_series, target_col="log_value")

export_rq1_outputs(pos_series, rq1_results, RQ1_OUTPUTS_DIR)

rq1_results

## Run RQ2 once you add microdata

In [None]:
import json
from src.config import SURVEY_MICRODATA_CSV, RQ2_OUTPUTS_DIR
from src.io_utils import read_csv
from src.rq2_survey import prepare_survey_features, rq2_model_account_ownership, rq2_model_digital_usage

survey = read_csv(SURVEY_MICRODATA_CSV)
survey = prepare_survey_features(survey)

rq2_a = rq2_model_account_ownership(survey)
rq2_b = rq2_model_digital_usage(survey)

with open(RQ2_OUTPUTS_DIR / "rq2_account_ownership.json", "w", encoding="utf-8") as f:
    json.dump(rq2_a, f, indent=2)
with open(RQ2_OUTPUTS_DIR / "rq2_digital_usage.json", "w", encoding="utf-8") as f:
    json.dump(rq2_b, f, indent=2)

rq2_a


## RQ4 early warning model

In [None]:
from src.config import FEATURE_MATRIX_CSV, MODEL_DIR, METRICS_DIR, PREDICTIONS_DIR
from src.io_utils import read_csv
from src.labels import make_quantile_label
from src.modeling import train_ew_model, save_model

df = read_csv(FEATURE_MATRIX_CSV)

# Label for EW-FRI (fraud-risk): use fraud_loss_ratio or fraud_losses_ngn if present
# Here we label using fraud_loss_ratio (system-level intensity)
df_l = make_quantile_label(df, target_col="fraud_loss_ratio", q=0.75, label_col="y_high_risk")

feature_cols = [c for c in df_l.columns if c not in ("y_high_risk","year","period")]

result = train_ew_model(
    df_l,
    feature_cols=feature_cols,
    label_col="y_high_risk",
    time_col="year",
    calibrate=True
)

save_model(
    result,
    model_path=MODEL_DIR / "rq4_ew_logit_calibrated.joblib",
    metrics_path=METRICS_DIR / "rq4_metrics.json",
    predictions_path=PREDICTIONS_DIR / "rq4_predictions.csv"
)

result.metrics, result.predictions


## Run RQ3

In [None]:
from src.config import MERGED_PANEL_CSV, OUTPUTS_DIR
from src.io_utils import read_csv
from src.rq3_var_granger import rq3_var_granger_pipeline, export_rq3_outputs

panel = read_csv(MERGED_PANEL_CSV)

# Choose RQ3 variables (must exist in panel after feature engineering)
rq3_cols = [c for c in ["pay_value_g", "pay_volume_g", "fraud_loss_ratio", "fraud_cases_per_1m_txn"] if c in panel.columns]

rq3_results = rq3_var_granger_pipeline(panel, cols=rq3_cols, maxlags=3)
export_rq3_outputs(rq3_results, OUTPUTS_DIR / "rq3" / "rq3_var_granger_results.json")

rq3_results["var_result"]
