In [None]:
import os
import numpy as np
import datetime as dt
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from lifetimes.plotting import *
from lifetimes.utils import *
os.chdir("../")
from src.models import BetaGeoModel
from src.data import (
    getDataset,
    ProcessData,
    RawFeatures
)

In [None]:
dt_format_ = '%d/%m/%Y %H:%M'
_dt_format = '%Y-%m-%d %H:%M'
df_transaction = getDataset()
df_transaction[RawFeatures.TRANSACTION_DATE] = df_transaction[RawFeatures.TRANSACTION_DATE].apply(
                                                    lambda x: dt.datetime.strptime(x, dt_format_).strftime(_dt_format) if x==x else x)

In [None]:
df_transaction.head()


In [None]:
df_transaction[RawFeatures.TRANSACTION_DATE].sort_values()

In [None]:
# pd.to_datetime(
#    df_transaction[RawFeatures.TRANSACTION_DATE].dropna().apply(lambda x: dt.datetime.strptime(x, '%d/%m/%Y %H:%M')),
# ).dt.date

In [None]:
data_inst = ProcessData(df_transaction, 'D', '2011-06-30')
data_summary = data_inst.model_data()
data_summary.head()

In [None]:
data_summary.describe().iloc[1:, :]

In [None]:
bgf = BetaGeoModel(data_summary, 7)
bgf.fit_()

In [None]:
# Compute the probability that a customer with history (frequency, recency, T) is currently alive.
bgf.conditional_probability_alive(
    frequency=25.0,
    recency=858.0,
    T=1062
)

In [None]:
data_summary.apply(
    lambda x: bgf.conditional_probability_alive(
            frequency=x.frequency,
            recency=x.recency,
            T=data_summary["T"].max()
    )[0],
    axis=1
).sort_values().to_frame().reset_index().tail(2000)

In [None]:
data_summary[data_summary.index==16422.0]

# Model validation

In [None]:
# partition the dataset into a calibration and a holdout dataset
summary_cal_holdout = data_inst.model_cal_holdout_data()


In [None]:
# train BG/NBD model on the calibration data
bgf_cal = BetaGeoModel(
            data=summary_cal_holdout,
            T_prediction=7,
            penalizer_coef_=0.6
        )
bgf_cal.fit_validation()


# Plots

In [None]:
customer_id = 13140.0
days_since_birth = 365
sp_trans = df_transaction.loc[df_transaction[RawFeatures.CUSTOMER_ID] == customer_id]
plot_history_alive(
    bgf,
    days_since_birth,
    sp_trans,
    RawFeatures.TRANSACTION_DATE
)


In [None]:
help(plot_history_alive)

In [None]:
# plot_transaction_rate_heterogeneity(bgf);

# plot_dropout_rate_heterogeneity(bgf);

In [None]:
# fig = plt.figure(figsize=(12, 6))
# plot_frequency_recency_matrix(bgf, T = 7)

# fig = plt.figure(figsize=(12, 6))
# plot_probability_alive_matrix(bgf)

# plt.show()

In [None]:
# plot actual vs predicted frequency during the holdout period
# n represents the max frequency values to be plotted on the x-axis

plot_calibration_purchases_vs_holdout_purchases(
	bgf_cal,
	summary_cal_holdout,
	n=int(summary_cal_holdout['frequency_holdout'].max()),
	figsize = (8, 5)
)
plt.show();