In [2]:
import pandas as pd
import datetime as dt

from lifetimes import BetaGeoFitter 
from lifetimes import GammaGammaFitter

import optuna

from mypackage import dir


# Environment variables
modality = 'p'
project = 'australian'
data = dir.make_dir_line(modality, project) 
processed = data('processed')
models = data('models')


# Función para cargar datos
def cargar_datos(table_name: str) -> pd.DataFrame:
    df = pd.read_parquet(processed / f'{table_name}.parquet.gzip')
    print(f'Loaded table: {table_name}')
    return df

# Función para cargar los datos en la base de datos
def cargar_en_db(df: pd.DataFrame, table_name: str) -> None:
    df.to_parquet(models/f'{table_name}.parquet.gzip', compression='gzip')
    print(f'Saved table: {table_name}')

def get_month(x): return dt.datetime(x.year, x.month, 1) 

def get_dates(df, col):
    
    year = df[col].dt.year
    month = df[col].dt.month
    day = df[col].dt.day
    
    return year, month, day

In [3]:
cltv = cargar_datos('cltv')
cltv.head()

Loaded table: cltv


Unnamed: 0,id_cliente,recency,live_purches,tenure,frequency,monetary
0,0,232,211.142857,178.0,13,82.830769
1,1,83,211.428571,199.571429,19,96.589474
2,2,90,211.0,198.142857,14,85.485714
3,3,8,212.0,210.857143,18,100.444444
4,4,2,187.0,186.714286,14,80.571429


In [4]:
# Define la función objetivo
def objective(trial):
    # Hiperparámetro a optimizar
    penalizer_coef = trial.suggest_loguniform('penalizer_coef', 1e-6, 1e-1)
    
    # Inicializa el modelo con el hiperparámetro actual
    bgf = BetaGeoFitter(penalizer_coef=penalizer_coef)
    
    # Entrena el modelo
    bgf.fit(frequency = cltv['frequency'],  # omitir la primera compra
            recency = cltv['tenure'],       # tenura, primera y ultima compra
            T = cltv['live_purches'])       # tiempo de observacion desde la primera compra

    
    # Calcula el log-likelihood como métrica de evaluación
    log_likelihood = bgf._negative_log_likelihood_
    
    # Queremos maximizar el log-likelihood
    return -log_likelihood  # Cambia el signo porque Optuna minimiza por defecto

# Configura el estudio
study = optuna.create_study(direction='minimize')  # Minimizamos el negativo del log-likelihood
study.optimize(objective, n_trials=20)

[I 2024-12-04 11:41:06,934] A new study created in memory with name: no-name-af39cf73-4ee3-4ac5-8941-ba4b09da9388
[I 2024-12-04 11:41:07,485] Trial 0 finished with value: 27.26787736476085 and parameters: {'penalizer_coef': 0.00012170737157622279}. Best is trial 0 with value: 27.26787736476085.
[I 2024-12-04 11:41:08,234] Trial 1 finished with value: 27.388173738047104 and parameters: {'penalizer_coef': 1.4290845291402666e-06}. Best is trial 0 with value: 27.26787736476085.
  result = getattr(ufunc, method)(*inputs, **kwargs)
[I 2024-12-04 11:41:08,646] Trial 2 finished with value: 27.329974362156236 and parameters: {'penalizer_coef': 2.7964836434372826e-05}. Best is trial 0 with value: 27.26787736476085.
  result = getattr(ufunc, method)(*inputs, **kwargs)
[I 2024-12-04 11:41:08,926] Trial 3 finished with value: 27.318591215062874 and parameters: {'penalizer_coef': 3.898776657149029e-05}. Best is trial 0 with value: 27.26787736476085.
[I 2024-12-04 11:41:09,101] Trial 4 finished with 

In [5]:
best_penalizer = study.best_params['penalizer_coef']

# Inicializa el modelo con el hiperparámetro actual
bgf = BetaGeoFitter(penalizer_coef=best_penalizer)

# Entrena el modelo
bgf.fit(frequency = cltv['frequency'],  # omitir la primera compra
        recency = cltv['tenure'],       # tenura, primera y ultima compra
        T = cltv['live_purches'])       # tiempo de observacion desde la primera compra

<lifetimes.BetaGeoFitter: fitted with 635 subjects, a: 0.00, alpha: 21.87, b: 0.00, r: 1.67>

In [6]:
t = 4 * 3  #4 semanas por 3 meses
cltv["b_exp_sales_3_month"] = bgf.predict(t,
                                          cltv['frequency'],
                                          cltv['tenure'],
                                          cltv['live_purches'])

cltv.head(10)

Unnamed: 0,id_cliente,recency,live_purches,tenure,frequency,monetary,b_exp_sales_3_month
0,0,232,211.142857,178.0,13,82.830769,0.755534
1,1,83,211.428571,199.571429,19,96.589474,1.063227
2,2,90,211.0,198.142857,14,85.485714,0.807529
3,3,8,212.0,210.857143,18,100.444444,1.009319
4,4,2,187.0,186.714286,14,80.571429,0.900318
5,5,30,190.428571,186.142857,17,86.588235,1.055351
6,6,156,212.142857,189.857143,14,83.542857,0.803585
7,7,16,197.142857,194.857143,11,110.836364,0.694248
8,8,23,188.0,184.714286,11,83.2,0.724492
9,9,60,162.0,153.428571,8,108.6,0.631148


In [7]:
t = 4 * 6  #4 semanas por 6 meses
cltv["b_exp_sales_6_month"] = bgf.predict(t,
                                          cltv['frequency'],
                                          cltv['tenure'],
                                          cltv['live_purches'])
cltv.head(10)

Unnamed: 0,id_cliente,recency,live_purches,tenure,frequency,monetary,b_exp_sales_3_month,b_exp_sales_6_month
0,0,232,211.142857,178.0,13,82.830769,0.755534,1.511069
1,1,83,211.428571,199.571429,19,96.589474,1.063227,2.126455
2,2,90,211.0,198.142857,14,85.485714,0.807529,1.615058
3,3,8,212.0,210.857143,18,100.444444,1.009319,2.018638
4,4,2,187.0,186.714286,14,80.571429,0.900318,1.800635
5,5,30,190.428571,186.142857,17,86.588235,1.055351,2.110701
6,6,156,212.142857,189.857143,14,83.542857,0.803585,1.60717
7,7,16,197.142857,194.857143,11,110.836364,0.694248,1.388496
8,8,23,188.0,184.714286,11,83.2,0.724492,1.448985
9,9,60,162.0,153.428571,8,108.6,0.631148,1.262296


In [8]:
# Guardar modelo (y cargar modelo)

bgf.save_model(models/'bgf.pkl')

bgf_loaded = BetaGeoFitter()
bgf_loaded.load_model(models/'bgf.pkl')
bgf_loaded

<lifetimes.BetaGeoFitter: fitted with 635 subjects, a: 0.00, alpha: 21.87, b: 0.00, r: 1.67>

In [9]:
t = 4 * 3
salida = bgf_loaded.predict(t, 2, 30, 90)
salida

0.39375182249009905

In [12]:
# Define la función objetivo
def objective(trial):
    # Hiperparámetro a optimizar
    penalizer_coef = trial.suggest_loguniform('penalizer_coef', 1e-6, 1e-1)
    
    # Inicializa el modelo con el hiperparámetro actual
    ggf = GammaGammaFitter(penalizer_coef=penalizer_coef)
    
    # Entrena el modelo
    ggf.fit(cltv['frequency'], cltv['monetary'])

    
    # Calcula el log-likelihood como métrica de evaluación
    log_likelihood = ggf._negative_log_likelihood_
    
    # Queremos maximizar el log-likelihood
    return -log_likelihood  # Cambia el signo porque Optuna minimiza por defecto

# Configura el estudio
study = optuna.create_study(direction='minimize')  # Minimizamos el negativo del log-likelihood
study.optimize(objective, n_trials=20)

[I 2024-12-04 11:41:28,091] A new study created in memory with name: no-name-3ec13baa-a62e-4a3a-9182-c0daff0d4031
[I 2024-12-04 11:41:28,280] Trial 0 finished with value: -6.438064458494167 and parameters: {'penalizer_coef': 0.010860498359349288}. Best is trial 0 with value: -6.438064458494167.
[I 2024-12-04 11:41:28,629] Trial 1 finished with value: -4.106351936606212 and parameters: {'penalizer_coef': 1.738676304119386e-06}. Best is trial 0 with value: -6.438064458494167.
[I 2024-12-04 11:41:28,809] Trial 2 finished with value: -6.811830221121086 and parameters: {'penalizer_coef': 0.03269390363995669}. Best is trial 2 with value: -6.811830221121086.
[I 2024-12-04 11:41:28,961] Trial 3 finished with value: -6.920153050950396 and parameters: {'penalizer_coef': 0.04760704793719024}. Best is trial 3 with value: -6.920153050950396.
[I 2024-12-04 11:41:29,215] Trial 4 finished with value: -4.172336502736555 and parameters: {'penalizer_coef': 1.0694401586120287e-05}. Best is trial 3 with va

In [13]:
best_penalizer = study.best_params['penalizer_coef']

# Inicializa el modelo con el hiperparámetro actual
ggf = GammaGammaFitter(penalizer_coef=best_penalizer)

# Entrena el modelo
ggf.fit(cltv['frequency'], cltv['monetary'])

<lifetimes.GammaGammaFitter: fitted with 635 subjects, p: 1.13, q: 0.25, v: 1.11>

In [14]:
# Guardar modelo (y cargar modelo)

ggf.save_model(models/'ggf.pkl')

ggf_loaded = BetaGeoFitter()
ggf_loaded.load_model(models/'bgf.pkl')
ggf_loaded

<lifetimes.BetaGeoFitter: fitted with 635 subjects, a: 0.00, alpha: 21.87, b: 0.00, r: 1.67>

In [15]:
t = 4 * 3  #4 semanas por 3 meses
cltv["exp_sales_3_month"] = bgf.predict(t,
                                        cltv['frequency'],
                                        cltv['tenure'],
                                        cltv['live_purches'])

cltv.head(10)

Unnamed: 0,id_cliente,recency,live_purches,tenure,frequency,monetary,b_exp_sales_3_month,b_exp_sales_6_month,exp_sales_3_month
0,0,232,211.142857,178.0,13,82.830769,0.755534,1.511069,0.755534
1,1,83,211.428571,199.571429,19,96.589474,1.063227,2.126455,1.063227
2,2,90,211.0,198.142857,14,85.485714,0.807529,1.615058,0.807529
3,3,8,212.0,210.857143,18,100.444444,1.009319,2.018638,1.009319
4,4,2,187.0,186.714286,14,80.571429,0.900318,1.800635,0.900318
5,5,30,190.428571,186.142857,17,86.588235,1.055351,2.110701,1.055351
6,6,156,212.142857,189.857143,14,83.542857,0.803585,1.60717,0.803585
7,7,16,197.142857,194.857143,11,110.836364,0.694248,1.388496,0.694248
8,8,23,188.0,184.714286,11,83.2,0.724492,1.448985,0.724492
9,9,60,162.0,153.428571,8,108.6,0.631148,1.262296,0.631148


In [16]:
t = 4 * 6  #4 semanas por 6 meses
cltv["exp_sales_6_month"] = bgf.predict(t,
                                        cltv['frequency'],
                                        cltv['tenure'],
                                        cltv['live_purches'])
cltv.head(10)

Unnamed: 0,id_cliente,recency,live_purches,tenure,frequency,monetary,b_exp_sales_3_month,b_exp_sales_6_month,exp_sales_3_month,exp_sales_6_month
0,0,232,211.142857,178.0,13,82.830769,0.755534,1.511069,0.755534,1.511069
1,1,83,211.428571,199.571429,19,96.589474,1.063227,2.126455,1.063227,2.126455
2,2,90,211.0,198.142857,14,85.485714,0.807529,1.615058,0.807529,1.615058
3,3,8,212.0,210.857143,18,100.444444,1.009319,2.018638,1.009319,2.018638
4,4,2,187.0,186.714286,14,80.571429,0.900318,1.800635,0.900318,1.800635
5,5,30,190.428571,186.142857,17,86.588235,1.055351,2.110701,1.055351,2.110701
6,6,156,212.142857,189.857143,14,83.542857,0.803585,1.60717,0.803585,1.60717
7,7,16,197.142857,194.857143,11,110.836364,0.694248,1.388496,0.694248,1.388496
8,8,23,188.0,184.714286,11,83.2,0.724492,1.448985,0.724492,1.448985
9,9,60,162.0,153.428571,8,108.6,0.631148,1.262296,0.631148,1.262296


In [17]:
cargar_en_db(cltv, 'cltv')

Saved table: cltv


In [18]:
print('Ok_')

Ok_
