In [1]:
from __future__ import print_function
from generate_data import simulate_data
import pandas as pd
from shifted_beta_survival import ShiftedBetaSurvival

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

# Getting Simulated Data

In [2]:
data = simulate_data(50000)
train, test, params = data['train'], data['test'], data['params']

In [3]:
train.head()

Unnamed: 0,id,category,counts,numerical,alpha_true,beta_true,age,alive
0,0,cat_b,0,1.486918,1.344507,1.5748,1,0
1,1,cat_b,1,1.111382,1.2318,1.3666,10,1
2,2,cat_a,0,0.570045,1.598971,2.125136,1,0
3,3,cat_a,0,0.996263,1.830555,2.152101,1,0
4,4,cat_a,0,1.193894,1.901688,2.149457,4,0


In [4]:
train.groupby('category').mean().drop('id', axis=1)

Unnamed: 0_level_0,counts,numerical,alpha_true,beta_true,age,alive
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cat_a,0.255798,1.001347,1.854426,2.087699,2.75,0.064034
cat_b,0.257501,0.999257,1.164272,1.511384,3.22313,0.113636
cat_c,0.239443,1.013674,1.669724,4.702833,4.373081,0.184021


In [5]:
train.groupby('counts').mean().drop('id', axis=1)

Unnamed: 0_level_0,numerical,alpha_true,beta_true,age,alive
counts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,1.003021,1.551465,2.40651,3.284314,0.108411
1,1.000961,1.649296,2.047841,2.918717,0.08209
2,0.997661,1.768826,1.774981,2.478477,0.057947
3,1.074961,1.971068,1.420142,2.160714,0.017857
4,1.14435,1.505985,0.867915,1.0,0.0


In [6]:
train[['numerical', 'alpha_true', 'beta_true', 'age', 'alive']].corr()

Unnamed: 0,numerical,alpha_true,beta_true,age,alive
numerical,1.0,0.580256,0.031246,-0.098951,-0.08504
alpha_true,0.580256,1.0,0.276225,-0.104831,-0.098576
beta_true,0.031246,0.276225,1.0,0.1626,0.10371
age,-0.098951,-0.104831,0.1626,1.0,0.779739
alive,-0.08504,-0.098576,0.10371,0.779739,1.0


 # Creating and Training the Model

In [None]:
# START MODELING
# Create the sbs object using all features. Lets keep gamma small and let
# the model "overfit" if necessary. We have enough data.
feature_list = ['category', 'counts', 'numerical']

# something
sbs = ShiftedBetaSurvival(age='age',
                          alive='alive',
                          features=feature_list,
                          gamma=1e-3,
                          verbose=True)

In [None]:
# Train model
sbs.fit(train)

In [None]:
sbs.summary()

In [None]:
# Make some predictions
pred = pd.concat([test,
                  sbs.predict_params(test)], axis=1)

In [None]:
print("Mean Absolute Error for Alpha: "
      "{}".format((pred['alpha_true'] -
                   pred['alpha']).abs().mean()))

print("Mean Absolute Error for Beta:  "
      "{}".format((pred['beta_true'] -
                   pred['beta']).abs().mean()))

# Visualizing the Hazard Curve

In [None]:
# Predict survival curves
harz_raw = sbs.predict_churn(test, age=1, n_periods=24)

harz_curve = pd.concat([test[feature_list], 
                        harz_raw], axis=1)

In [None]:
f, axis = plt.subplots(1, 1, figsize=(16, 8))

col_map = {'cat_a': 'b', 'cat_b': 'r', 'cat_c': 'g'}

for name, df in harz_curve.groupby('category'):
    axis.plot(df.iloc[:, 3:].mean(), alpha=1, c=col_map[name], label=name)

plt.legend()

# Visualizing the Retention Curve

In [None]:
# Predict survival curves
sc_raw = sbs.predict_survival(test, n_periods=24)

surv_curv = pd.concat([test[feature_list], 
                       sc_raw], axis=1)

In [None]:
f, axis = plt.subplots(1, 1, figsize=(16, 8))

col_map = {'cat_a': 'b', 'cat_b': 'r', 'cat_c': 'g'}

for name, df in surv_curv.groupby('category'):
    axis.plot(df.iloc[:, 3:].mean(), alpha=1, c=col_map[name], label=name)
        


plt.legend()

# Life-Time-Value

### Initial LTV

In [None]:
ltv_initial = sbs.predict_ltv(test, age=1, alive=1, arpu=10, discount_rate=0.005)

In [None]:
test['ltvi'] = ltv_initial

### Residual LTV

In [None]:
ltv_residual = sbs.predict_ltv(test, age=None, alive=None, arpu=10, discount_rate=0.005)

In [None]:
test['ltvr'] = ltv_residual

### Relation with number of counts

In [None]:
test.groupby(['counts']).mean()

### Correlation with alpha and beta parameters

In [None]:
sns.pairplot(test, vars=['alpha_true', 'beta_true', 'ltvi'], hue='category', diag_kind='kde', size=4)