# 01_generate_data.py — Generador de datos PREX
# ---------------------------------------------
# Este script crea datasets sintéticos realistas para una fintech regional
# y los guarda en formato CSV (para SQL, Power BI, Excel, etc.)

In [1]:
import pandas as pd
import numpy as np
import random
from datetime import datetime

# ------------------------------
# Configuración y parámetros
# ------------------------------
np.random.seed(7)
random.seed(7)

MONTHS = 24  # 24 meses (2024-01 a 2025-12)
START_DATE = pd.Timestamp("2024-01-01")
COUNTRIES = ["ARG", "URU", "PER"]
PRODUCTS = ["Card", "P2P", "TopUp", "QR", "Loan"]
CHANNELS = ["Paid Ads", "Organic", "Referral"]

# Fees y costos (bps = basis points)
TAKE_RATE_BPS = {
    "Card": (60, 120),
    "P2P": (0, 30),
    "QR": (40, 80),
    "TopUp": (0, 0),
    "Loan": (800, 1200),
}
COST_BPS = {
    "Card": (20, 40),
    "P2P": (10, 20),
    "QR": (20, 40),
    "TopUp": (0, 0),
    "Loan": (100, 200),
}
TOPUP_FEE_FIXED = (50, 120)  # moneda local simulada

PRODUCT_MIX = {"Card": 0.40, "P2P": 0.20, "TopUp": 0.15, "QR": 0.20, "Loan": 0.05}
SEASONALITY = [1.00, 0.98, 1.02, 1.03, 1.05, 1.07, 1.02, 1.01, 0.99, 1.04, 1.10, 1.20]

BASE_TPV_COUNTRY = {"ARG": 1.0, "URU": 0.6, "PER": 0.8}

# ------------------------------
# Generar calendario y meses
# ------------------------------
months = pd.date_range(START_DATE, periods=MONTHS, freq="MS")

# ------------------------------
# Generar transacciones
# ------------------------------
records = []
for mi, m in enumerate(months):
    seasonal = SEASONALITY[m.month - 1]
    for country in COUNTRIES:
        base_multiplier = BASE_TPV_COUNTRY[country]
        n_tx = int(800 * base_multiplier * seasonal)  # cantidad de transacciones
        for _ in range(n_tx):
            product = random.choices(list(PRODUCT_MIX.keys()), weights=list(PRODUCT_MIX.values()))[0]
            amt = {
                "Card": np.random.uniform(2000, 80000),
                "P2P": np.random.uniform(500, 20000),
                "TopUp": np.random.uniform(1000, 15000),
                "QR": np.random.uniform(1500, 50000),
                "Loan": np.random.uniform(20000, 200000),
            }[product]
            fee_lo, fee_hi = TAKE_RATE_BPS[product]
            cost_lo, cost_hi = COST_BPS[product]
            fee_bps = np.random.randint(fee_lo, fee_hi + 1) if fee_hi > 0 else 0
            cost_bps = np.random.randint(cost_lo, cost_hi + 1) if cost_hi > 0 else 0
            topup_fee = 0 if product != "TopUp" else np.random.randint(*TOPUP_FEE_FIXED)

            records.append({
                "date": (m + pd.to_timedelta(np.random.randint(0, 28), unit="D")).date(),
                "country_id": country,
                "product": product,
                "txn_amount_local": round(float(amt), 2),
                "fee_bps": fee_bps,
                "cost_bps": cost_bps,
                "topup_fee_fixed": topup_fee,
                "channel": random.choice(CHANNELS),
            })

fact_txn = pd.DataFrame(records)

# ------------------------------
# Guardar CSVs
# ------------------------------
outdir = r"C:\Users\milag\OneDrive\Desktop\PROYECTOS\prex-fpa-sim\data\raw"   # ruta relativa dentro de tu proyecto
fact_txn.to_csv(f"{outdir}/fact_txn.csv", index=False)

print("✅ Dataset generado en:", f"{outdir}/fact_txn.csv")
print(f"Registros creados: {len(fact_txn)}")
print(fact_txn.head())


✅ Dataset generado en: C:\Users\milag\OneDrive\Desktop\PROYECTOS\prex-fpa-sim\data\raw/fact_txn.csv
Registros creados: 48022
         date country_id product  txn_amount_local  fee_bps  cost_bps  \
0  2024-01-26        ARG    Card           7952.05       74        28   
1  2024-01-12        ARG    Card           7619.99      104        20   
2  2024-01-28        ARG    Card          54597.77      115        36   
3  2024-01-04        ARG    Card          21881.74       83        23   
4  2024-01-24        ARG    Card          60531.97       88        27   

   topup_fee_fixed   channel  
0                0  Paid Ads  
1                0  Paid Ads  
2                0  Referral  
3                0  Referral  
4                0  Referral  
