In [1]:
import pandas as pd
import datetime as dt

from lifetimes import BetaGeoFitter 
from lifetimes import GammaGammaFitter

import optuna

from mypackage import dir


# Environment variables
modality = 'p'
project = 'australian'
data = dir.make_dir_line(modality, project) 
processed = data('processed')
models = data('models')


# Función para cargar datos
def cargar_datos(table_name: str) -> pd.DataFrame:
    df = pd.read_parquet(processed / f'{table_name}.parquet.gzip')
    print(f'Loaded table: {table_name}')
    return df

# Función para cargar los datos en la base de datos
def cargar_en_db(df: pd.DataFrame, table_name: str) -> None:
    df.to_parquet(models/f'{table_name}.parquet.gzip', compression='gzip')
    print(f'Saved table: {table_name}')

def get_month(x): return dt.datetime(x.year, x.month, 1) 

def get_dates(df, col):
    
    year = df[col].dt.year
    month = df[col].dt.month
    day = df[col].dt.day
    
    return year, month, day

In [2]:
cltv = cargar_datos('cltv')
cltv.head()

Loaded table: cltv


Unnamed: 0,id_cliente,recency,live_purches,tenure,frequency,monetary
0,0,232,211.142857,178.0,13,82.830769
1,1,83,211.428571,199.571429,19,96.589474
2,2,90,211.0,198.142857,14,85.485714
3,3,8,212.0,210.857143,18,100.444444
4,4,2,187.0,186.714286,14,80.571429


In [4]:
# Define la función objetivo
def objective(trial):
    # Hiperparámetro a optimizar
    penalizer_coef = trial.suggest_loguniform('penalizer_coef', 1e-6, 1e-1)
    
    # Inicializa el modelo con el hiperparámetro actual
    bgf = BetaGeoFitter(penalizer_coef=penalizer_coef)
    
    # Entrena el modelo
    bgf.fit(frequency = cltv['frequency'],  # omitir la primera compra
            recency = cltv['tenure'],       # tenura, primera y ultima compra
            T = cltv['live_purches'])       # tiempo de observacion desde la primera compra

    
    # Calcula el log-likelihood como métrica de evaluación
    log_likelihood = bgf._negative_log_likelihood_
    
    # Queremos maximizar el log-likelihood
    return -log_likelihood  # Cambia el signo porque Optuna minimiza por defecto

# Configura el estudio
study = optuna.create_study(direction='minimize')  # Minimizamos el negativo del log-likelihood
study.optimize(objective, n_trials=10)

[I 2024-12-06 21:16:56,979] A new study created in memory with name: no-name-0ac44e0b-7c8b-46b5-b981-e1a80fab58a3
  result = getattr(ufunc, method)(*inputs, **kwargs)
[I 2024-12-06 21:16:57,118] Trial 0 finished with value: 27.32407575811921 and parameters: {'penalizer_coef': 3.338329454571578e-05}. Best is trial 0 with value: 27.32407575811921.
[I 2024-12-06 21:16:57,235] Trial 1 finished with value: 27.029496248002644 and parameters: {'penalizer_coef': 0.002032137768916933}. Best is trial 1 with value: 27.029496248002644.
[I 2024-12-06 21:16:57,364] Trial 2 finished with value: 27.11559952446011 and parameters: {'penalizer_coef': 0.0009287632548709532}. Best is trial 1 with value: 27.029496248002644.
[I 2024-12-06 21:16:57,487] Trial 3 finished with value: 26.95521650646756 and parameters: {'penalizer_coef': 0.0036072099847867696}. Best is trial 3 with value: 26.95521650646756.
  result = getattr(ufunc, method)(*inputs, **kwargs)
[I 2024-12-06 21:16:57,606] Trial 4 finished with valu

In [5]:
best_penalizer = study.best_params['penalizer_coef']

# Inicializa el modelo con el hiperparámetro actual
bgf = BetaGeoFitter(penalizer_coef=best_penalizer)

# Entrena el modelo
bgf.fit(frequency = cltv['frequency'],  # omitir la primera compra
        recency = cltv['tenure'],       # tenura, primera y ultima compra
        T = cltv['live_purches'])       # tiempo de observacion desde la primera compra

<lifetimes.BetaGeoFitter: fitted with 635 subjects, a: 0.00, alpha: 26.51, b: 0.00, r: 2.02>

In [6]:
t = 4 * 3  #4 semanas por 3 meses
cltv["b_exp_sales_3_month"] = bgf.predict(t,
                                          cltv['frequency'],
                                          cltv['tenure'],
                                          cltv['live_purches'])

cltv.head(10)

Unnamed: 0,id_cliente,recency,live_purches,tenure,frequency,monetary,b_exp_sales_3_month
0,0,232,211.142857,178.0,13,82.830769,0.758656
1,1,83,211.428571,199.571429,19,96.589474,1.060343
2,2,90,211.0,198.142857,14,85.485714,0.809637
3,3,8,212.0,210.857143,18,100.444444,1.00749
4,4,2,187.0,186.714286,14,80.571429,0.900645
5,5,30,190.428571,186.142857,17,86.588235,1.052355
6,6,156,212.142857,189.857143,14,83.542857,0.805759
7,7,16,197.142857,194.857143,11,110.836364,0.698837
8,8,23,188.0,184.714286,11,83.2,0.728623
9,9,60,162.0,153.428571,8,108.6,0.638147


In [7]:
t = 4 * 6  #4 semanas por 6 meses
cltv["b_exp_sales_6_month"] = bgf.predict(t,
                                          cltv['frequency'],
                                          cltv['tenure'],
                                          cltv['live_purches'])
cltv.head(10)

Unnamed: 0,id_cliente,recency,live_purches,tenure,frequency,monetary,b_exp_sales_3_month,b_exp_sales_6_month
0,0,232,211.142857,178.0,13,82.830769,0.758656,1.517313
1,1,83,211.428571,199.571429,19,96.589474,1.060343,2.120685
2,2,90,211.0,198.142857,14,85.485714,0.809637,1.619273
3,3,8,212.0,210.857143,18,100.444444,1.00749,2.01498
4,4,2,187.0,186.714286,14,80.571429,0.900645,1.801289
5,5,30,190.428571,186.142857,17,86.588235,1.052355,2.10471
6,6,156,212.142857,189.857143,14,83.542857,0.805759,1.611519
7,7,16,197.142857,194.857143,11,110.836364,0.698837,1.397675
8,8,23,188.0,184.714286,11,83.2,0.728623,1.457246
9,9,60,162.0,153.428571,8,108.6,0.638147,1.276294


In [8]:
# Guardar modelo (y cargar modelo)

bgf.save_model(models/'bgf.pkl')

bgf_loaded = BetaGeoFitter()
bgf_loaded.load_model(models/'bgf.pkl')
bgf_loaded

<lifetimes.BetaGeoFitter: fitted with 635 subjects, a: 0.00, alpha: 26.51, b: 0.00, r: 2.02>

In [9]:
t = 4 * 3
salida = bgf_loaded.predict(t, 2, 30, 90)
salida

0.41453441007790953

In [16]:
# Define la función objetivo
def objective(trial):
    # Hiperparámetro a optimizar
    penalizer_coef = trial.suggest_loguniform('penalizer_coef', 1e-6, 1e-1)
    
    # Inicializa el modelo con el hiperparámetro actual
    ggf = GammaGammaFitter(penalizer_coef=penalizer_coef)
    
    # Entrena el modelo
    ggf.fit(cltv['frequency'], cltv['monetary'])

    
    # Calcula el log-likelihood como métrica de evaluación
    log_likelihood = ggf._negative_log_likelihood_
    
    # Queremos maximizar el log-likelihood
    return -log_likelihood  # Cambia el signo porque Optuna minimiza por defecto

# Configura el estudio
study = optuna.create_study(direction='minimize')  # Minimizamos el negativo del log-likelihood
study.optimize(objective, n_trials=10)

[I 2024-12-06 21:17:20,161] A new study created in memory with name: no-name-1bed7be0-35f1-4017-b58d-fb2c9e13f7fc
[I 2024-12-06 21:17:20,208] Trial 0 finished with value: -5.768939135137508 and parameters: {'penalizer_coef': 0.0022054110396288636}. Best is trial 0 with value: -5.768939135137508.
[I 2024-12-06 21:17:20,264] Trial 1 finished with value: -4.100742139259022 and parameters: {'penalizer_coef': 1.0597944197967382e-06}. Best is trial 0 with value: -5.768939135137508.
[I 2024-12-06 21:17:20,307] Trial 2 finished with value: -4.544172369243242 and parameters: {'penalizer_coef': 0.00010025961597490005}. Best is trial 0 with value: -5.768939135137508.
[I 2024-12-06 21:17:20,361] Trial 3 finished with value: -4.100328127874157 and parameters: {'penalizer_coef': 1.0106763553458046e-06}. Best is trial 0 with value: -5.768939135137508.
[I 2024-12-06 21:17:20,420] Trial 4 finished with value: -5.473351520753031 and parameters: {'penalizer_coef': 0.0011385474789336995}. Best is trial 0 

In [19]:
best_penalizer = study.best_params['penalizer_coef']

# Inicializa el modelo con el hiperparámetro actual
ggf = GammaGammaFitter(penalizer_coef=best_penalizer)

# Entrena el modelo
ggf.fit(cltv['frequency'], cltv['monetary'])

<lifetimes.GammaGammaFitter: fitted with 635 subjects, p: 1.23, q: 0.26, v: 1.21>

In [None]:
cltv["expected_average_profit"] = ggf.conditional_expected_average_profit(cltv['frequency'],
                                                                          cltv['monetary'])

cltv.head(10)

Unnamed: 0,id_cliente,recency,live_purches,tenure,frequency,monetary,b_exp_sales_3_month,b_exp_sales_6_month,exp_sales_6_month,exp_sales_3_month,expected_average_profit
0,0,232,211.142857,178.0,13,82.830769,0.758656,1.517313,1.517313,86.978544,86.978544
1,1,83,211.428571,199.571429,19,96.589474,1.060343,2.120685,2.120685,99.837541,99.837541
2,2,90,211.0,198.142857,14,85.485714,0.809637,1.619273,1.619273,89.443931,89.443931
3,3,8,212.0,210.857143,18,100.444444,1.00749,2.01498,2.01498,104.013546,104.013546
4,4,2,187.0,186.714286,14,80.571429,0.900645,1.801289,1.801289,84.307313,84.307313
5,5,30,190.428571,186.142857,17,86.588235,1.052355,2.10471,2.10471,89.862876,89.862876
6,6,156,212.142857,189.857143,14,83.542857,0.805759,1.611519,1.611519,87.413175,87.413175
7,7,16,197.142857,194.857143,11,110.836364,0.698837,1.397675,1.397675,117.414975,117.414975
8,8,23,188.0,184.714286,11,83.2,0.728623,1.457246,1.457246,88.167408,88.167408
9,9,60,162.0,153.428571,8,108.6,0.638147,1.276294,1.276294,117.664491,117.664491


In [32]:
cltv["expected_average_profit"] = ggf.customer_lifetime_value(bgf, 
                                                              cltv['frequency'],
                                                              cltv['tenure'],
                                                              cltv['live_purches'],
                                                              cltv['monetary'],
                                                              time=6,                                    
                                                              discount_rate=0.01)
cltv.head(10)

Unnamed: 0,id_cliente,recency,live_purches,tenure,frequency,monetary,b_exp_sales_3_month,b_exp_sales_6_month,exp_sales_6_month,exp_sales_3_month,expected_average_profit
0,0,232,211.142857,178.0,13,82.830769,0.758656,1.517313,1.517313,86.978544,956.062797
1,1,83,211.428571,199.571429,19,96.589474,1.060343,2.120685,2.120685,99.837541,1533.801698
2,2,90,211.0,198.142857,14,85.485714,0.809637,1.619273,1.619273,89.443931,1049.228673
3,3,8,212.0,210.857143,18,100.444444,1.00749,2.01498,2.01498,104.013546,1518.307921
4,4,2,187.0,186.714286,14,80.571429,0.900645,1.801289,1.801289,84.307313,1100.139799
5,5,30,190.428571,186.142857,17,86.588235,1.052355,2.10471,2.10471,89.862876,1370.1615
6,6,156,212.142857,189.857143,14,83.542857,0.805759,1.611519,1.611519,87.413175,1020.49632
7,7,16,197.142857,194.857143,11,110.836364,0.698837,1.397675,1.397675,117.414975,1188.854533
8,8,23,188.0,184.714286,11,83.2,0.728623,1.457246,1.457246,88.167408,930.765077
9,9,60,162.0,153.428571,8,108.6,0.638147,1.276294,1.276294,117.664491,1087.915509


In [28]:
# Guardar modelo (y cargar modelo)

ggf.save_model(models/'ggf.pkl')

ggf_loaded = GammaGammaFitter()
ggf_loaded.load_model(models/'ggf.pkl')
ggf_loaded

<lifetimes.GammaGammaFitter: fitted with 635 subjects, p: 1.23, q: 0.26, v: 1.21>

In [29]:
salida = ggf_loaded.conditional_expected_average_profit(2, 30)
salida

43.911935523718284

In [42]:
df_input = pd.DataFrame({'frequency': [13],
                         'recency': [232],
                         'T': [178],
                         'monetary_value': [82]})

In [46]:
salida = ggf_loaded.customer_lifetime_value(transaction_prediction_model=bgf_loaded, 
                                            frequency=df_input['frequency'],
                                            recency=df_input['recency'],
                                            T=df_input['T'],
                                            monetary_value=df_input['monetary_value'],
                                            time=6,     
                                            discount_rate=0.01,
                                            )
salida[0]

1099.8705124126018

In [None]:
cargar_en_db(cltv, 'cltv')

In [18]:
print('Ok_')

Ok_
