# BTYD model

In [None]:
%matplotlib inline 
%pylab inline

In [None]:
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu
from lifetimes.utils import calculate_alive_path, expected_cumulative_transactions
from scipy import stats
from matplotlib import pyplot as plt
import seaborn as sns

In [None]:
# Import ensemble des acheteurs FR VP
tb_tra_combine_cali_test = dataiku.Dataset("tb_tra_combine_cali_test")
tra_membres_ori = tb_tra_combine_cali_test.get_dataframe()
tra_membres_ori.shape

In [None]:
import random
random.seed(100)

# Echantillon : 10 % des acheteurs
tra_membres=tra_membres_ori.sample(int(tra_membres_ori.shape[0]*0.1),replace=False)

In [None]:
# Rename t_cal
tra_membres.rename(columns={'t_cal' : 'T_cal'}, inplace=True)
#tra_membres.groupby('memberid').memberid.nunique()

## 1. Repeat model

In [None]:
# Distribution of the number of purchases per customer
n_purchases = tra_membres['frequency_cal']
n_purchases.hist(bins=(n_purchases.max(axis=0) - n_purchases.min(axis=0)) + 1)
plt.xlabel('Number of Purchases') 
plt.ylabel('Number of Customers')

In [None]:
# BG/NBD Model
data=tra_membres[['frequency_cal','recency_cal','T_cal']]
from lifetimes import BetaGeoFitter
bgf = BetaGeoFitter(penalizer_coef=0.0)
bgf.fit(data['frequency_cal'], data['recency_cal'], data['T_cal'])
print(bgf)

In [None]:
# Distribution of actual frequency of repeat transactios with the fitted one
from lifetimes.plotting import plot_period_transactions
plot_period_transactions(bgf)

In [None]:
# Test the fitted model with the hold-out period
from lifetimes.plotting import plot_calibration_purchases_vs_holdout_purchases
bgf.fit(tra_membres['frequency_cal'], tra_membres['recency_cal'], tra_membres['T_cal'])
# Plot fitted model
plot_calibration_purchases_vs_holdout_purchases(bgf,tra_membres[['frequency_cal','recency_cal','T_cal','frequency_holdout','duration_holdout']])

In [None]:
# Save model
path=dataiku.Folder("BTYD Model").get_path()
bgf.save_model(path+'/bgf.pkl')

In [None]:
# Load model
bgf_loaded=BetaGeoFitter()
bgf_loaded.load_model(path+'/bgf.pkl')

## 2. Monetary value model

In [None]:
tra_membres[['monetary_value', 'frequency_cal']].corr()

In [None]:
from lifetimes import GammaGammaFitter
tra_membres_cor = tra_membres[tra_membres['frequency_cal']>0] #  Extract only the members with a repeat purchase
ggf = GammaGammaFitter(penalizer_coef = 0.0)
ggf.fit(tra_membres_cor['frequency_cal'],
tra_membres_cor['monetary_value'])
print(ggf)

In [None]:
path=dataiku.Folder("BTYD Model").get_path()
ggf.save_model(path+'/ggf.pkl')

In [None]:
bgf_loaded=BetaGeoFitter()
bgf_loaded.load_model(path+'/bgf.pkl')
ggf_loaded=GammaGammaFitter()
ggf_loaded.load_model(path+'/ggf.pkl')

In [None]:
print("Expected conditional average profit: %s, Average profit: %s" % (
       ggf.conditional_expected_average_profit(
           tra_membres['frequency_cal'],
           tra_membres['monetary_value']
       ).mean(),
       tra_membres[tra_membres['frequency_cal']>0]['monetary_value'].mean()
   ))

In [None]:
bgf.fit(tra_membres['frequency_cal'], tra_membres['recency_cal'], tra_membres['T_cal'])

aa=ggf.customer_lifetime_value(
        bgf, #the model to use to predict the number of future transactions
        tra_membres['frequency_cal'],
        tra_membres['recency_cal'],
        tra_membres['T_cal'],
        tra_membres['monetary_value'],
        time=12, # months
        discount_rate=0.01 # monthly discount rate ~ 12.7% annually
)
    

In [None]:
bb=pd.DataFrame({'index':aa.index, 'CLV':aa.values})
bb.head()

In [None]:
tra_membres['predicted_purchases']=bgf.conditional_expected_number_of_purchases_up_to_time(t, tra_membres['frequency_cal'], tra_membres['recency_cal'], tra_membres['T_cal'])

In [None]:
tra_membres['A'] =tra_membres.index

In [None]:
result_value=pd.concat([tra_membres.set_index('A'),bb.set_index('index')], axis=1, join='inner')
result_value.head()

In [None]:
sns.distplot(result_value.CLV[result_value.CLV<1000], hist=False,label = 'Prediction of lifetime value')
sns.distplot(result_value.monetary_holdout[result_value.monetary_holdout<1000], hist=False,label='True monetary value')

plt.ylabel('Density')
plt.xlabel('CLV value')


plt.legend()
plt.show()

In [None]:
result_value.sort_values(by='CLV').tail(10)

In [None]:
c =  result_value[['CLV', 'monetary_holdout']].corr()
print 'Pearson correlation: %s' % c['CLV'][1]

In [None]:
BTYD_Application_Buyers_sample_TEMP = dataiku.Dataset("BTYD_Application_Buyers_sample_TEMP")
BTYD_Application_Buyers_sample_TEMP.write_with_schema(result_value)