# Estrategia de marketing basada en simulación

In [52]:
# 5.1) Librerías y carga de datos
import pandas as pd
import json
from joblib import load

# Carga el dataset de features (salida de tu FeatureEngineering)
df = pd.read_csv('../data/processed/modeling_dataset.csv')

# Carga los modelos ya entrenados
model_ltv = load('../models/LTV_180/blending.pkl')
model_cac = load('../models/CAC_source_30/ridge.pkl')

ridge = load('../models/LTV_180/ridge.pkl')
rf    = load('../models/LTV_180/rf.pkl')
xgb   = load('../models/LTV_180/xgb.pkl')

ridge_feats = json.load(open('../models/LTV_180/ridge_features.json'))
rf_feats    = json.load(open('../models/LTV_180/rf_features.json'))
xgb_feats   = json.load(open('../models/LTV_180/xgb_features.json'))

# 5.2) Carga las listas de features guardadas al entrenar
with open('../models/LTV_180/blending_features.json', 'r', encoding='utf-8') as f:
    feats_ltv = json.load(f)

with open('../models/CAC_source_30/ridge_features.json', 'r', encoding='utf-8') as f:
    feats_cac = json.load(f)

# Asegúrate de que todas las columnas existen en tu df
feats_ltv = [c for c in feats_ltv if c in df.columns]
feats_cac = [c for c in feats_cac if c in df.columns]



In [65]:
df['primer_source'].value_counts()

primer_source
3     10473
4     10296
5      6931
2      3506
1      2899
10     1329
9      1088
7         1
Name: count, dtype: int64

In [53]:
r_pred = ridge.predict(df[ridge_feats].fillna(0))
f_pred = rf   .predict(df[rf_feats].fillna(0))
x_pred = xgb  .predict(df[xgb_feats].fillna(0))

# construir la matriz meta‐features 2D
meta_X = pd.DataFrame({'ridge': r_pred, 'rf': f_pred, 'xgb': x_pred})

In [56]:
X_cac = df[feats_cac].fillna(0.0)

df['pred_LTV_180'] = model_ltv.predict(meta_X)
df['pred_CAC_30']  = model_cac.predict(X_cac)

In [60]:
# 5.3) Calcular ROMI por fuente
# Asumo que tu columna de canal/fuente se llama 'source'; cámbiala si difiere.
stats = (
    df
    .groupby('primer_source')
    .agg(
        avg_LTV=('pred_LTV_180','mean'),
        avg_CAC=('pred_CAC_30','mean'),
        n_users=('pred_CAC_30','count')
    )
    .reset_index()
)
stats['ROMI'] = stats['avg_LTV'] / stats['avg_CAC']
stats['budget'] = stats['n_users'] * stats['avg_CAC']  # gasto actual por fuente
stats


Unnamed: 0,primer_source,avg_LTV,avg_CAC,n_users,ROMI,budget
0,1,9.796521,0.330681,2899,29.625266,958.645055
1,2,13.000894,0.329137,3506,39.499962,1153.95387
2,3,5.984117,0.33151,10473,18.051077,3471.906927
3,4,5.898427,0.330774,10296,17.832208,3405.64675
4,5,7.917749,0.331241,6931,23.903262,2295.833821
5,7,2.150563,0.343806,1,6.255168,0.343806
6,9,5.306426,0.325173,1088,16.318796,353.787873
7,10,4.162009,0.331131,1329,12.569061,440.073437


In [78]:
# 5.4) Simulación de escenarios para _todos_ los sources
total_budget = stats['budget'].sum()
base_rev     = (stats['budget'] * stats['ROMI']).sum()

results = []

for src in stats['primer_source'].unique():
    # Escenario 1: +10% solo en src
    tmp1 = stats.copy()
    tmp1.loc[tmp1['primer_source'] == src, 'budget'] *= 1.10
    rev1 = (tmp1['budget'] * tmp1['ROMI']).sum()
    
    # Escenario 2: redistribuir ese extra
    extra = stats.loc[stats['primer_source'] == src, 'budget'].iat[0] * 0.10
    tmp2  = stats.copy()
    tmp2['budget'] += tmp2['budget'] / total_budget * extra
    rev2  = (tmp2['budget'] * tmp2['ROMI']).sum()
    
    results.append({
        'source':         src,
        'base_rev':       base_rev,
        'rev_+10%_solo':  rev1,
        'delta_+10%':     rev1 - base_rev,
        'rev_redistrib':  rev2,
        'delta_redistrib':rev2 - base_rev
    })

# Montamos un DataFrame ordenado por el mayor uplift en el primer escenario
res_df = pd.DataFrame(results).sort_values('delta_+10%', ascending=False)
res_df


Unnamed: 0,source,base_rev,rev_+10%_solo,delta_+10%,rev_redistrib,delta_redistrib
2,3,263567.87993,269835.045883,6267.165953,271142.951443,7575.071513
3,4,263567.87993,269640.900036,6073.020106,270998.38375,7430.50382
4,5,263567.87993,269055.671746,5487.791815,268576.973129,5009.093199
1,2,263567.87993,268125.993354,4558.113423,266085.598081,2517.718151
0,1,263567.87993,266407.891402,2840.011472,265659.469654,2091.589724
6,9,263567.87993,264145.219128,577.339198,264339.780934,771.901004
7,10,263567.87993,264121.010898,553.130968,264528.040392,960.160462
5,7,263567.87993,263568.094986,0.215056,263568.630052,0.750122


In [80]:
# 5.5) Cálculo de la asignación óptima de presupuesto tras redistribución proporcional

# 1. Identificar la fuente que maximiza el “delta_redistrib”
best_src = res_df.loc[res_df['delta_redistrib'].idxmax(), 'source']

# 2. Calcular el extra a redistribuir (10 % de su presupuesto actual)
extra = stats.loc[stats['primer_source'] == best_src, 'budget'].iat[0] * 0.10

# 3. Construir la tabla de presupuestos
allocation = (
    stats[['primer_source', 'budget']]
    .rename(columns={'budget': 'presupuesto_original'})
    .copy()
)
# extra proporcional según su peso en el total
allocation['extra_redistrib'] = allocation['presupuesto_original'] / total_budget * extra
# presupuesto final
allocation['presupuesto_final'] = allocation['presupuesto_original'] + allocation['extra_redistrib']

# 4. Mostrar resultados
print("Fuente óptima para redistribuir:", best_src)
print(allocation.to_string(index=False))

Fuente óptima para redistribuir: 3
 primer_source  presupuesto_original  extra_redistrib  presupuesto_final
             1            958.645055        27.551934         986.196989
             2           1153.953870        33.165206        1187.119076
             3           3471.906927        99.784326        3571.691253
             4           3405.646750        97.879976        3503.526726
             5           2295.833821        65.983402        2361.817223
             7              0.343806         0.009881           0.353687
             9            353.787873        10.168039         363.955912
            10            440.073437        12.647929         452.721366
