https://www.datacamp.com/community/tutorials/customer-life-time-value


https://www.datacamp.com/community/tutorials/introduction-customer-segmentation-python

In [85]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import optimize
import seaborn as sns

%matplotlib inline

In [86]:
data = pd.read_csv('POC_Revenue_Extrapolation_f.csv', sep='\t')
data.head()

Unnamed: 0,Day,Unique Visitors,Cart Adds(Visitor),Cart Removal (Visitor)_SJ,Start Checkouts (Visitor),Orders (Visitor),Revenue
0,11/1/18,73543,6043,1850,19,1885,507698
1,11/2/18,73303,5866,1562,7,1983,400065
2,11/3/18,65880,5387,1404,3,1817,399943
3,11/4/18,68678,5637,1551,4,1841,223875
4,11/5/18,74023,5866,3345,1573,1698,369716


In [87]:
cols=['Date','Unique_Visitors', 'Cart_Adds', 'Cart_Removal', 'Start_Checkouts', 'Orders','Revenue']

In [88]:
data.columns = cols
data.head()

Unnamed: 0,Date,Unique_Visitors,Cart_Adds,Cart_Removal,Start_Checkouts,Orders,Revenue
0,11/1/18,73543,6043,1850,19,1885,507698
1,11/2/18,73303,5866,1562,7,1983,400065
2,11/3/18,65880,5387,1404,3,1817,399943
3,11/4/18,68678,5637,1551,4,1841,223875
4,11/5/18,74023,5866,3345,1573,1698,369716


In [89]:
data['Date'] = pd.to_datetime(data['Date'], dayfirst=True)
data.head()

Unnamed: 0,Date,Unique_Visitors,Cart_Adds,Cart_Removal,Start_Checkouts,Orders,Revenue
0,2018-01-11,73543,6043,1850,19,1885,507698
1,2018-02-11,73303,5866,1562,7,1983,400065
2,2018-03-11,65880,5387,1404,3,1817,399943
3,2018-04-11,68678,5637,1551,4,1841,223875
4,2018-05-11,74023,5866,3345,1573,1698,369716


In [90]:
data['Average-Order-Value'] = data['Revenue']/data['Orders']
data.head()

Unnamed: 0,Date,Unique_Visitors,Cart_Adds,Cart_Removal,Start_Checkouts,Orders,Revenue,Average-Order-Value
0,2018-01-11,73543,6043,1850,19,1885,507698,269.335809
1,2018-02-11,73303,5866,1562,7,1983,400065,201.747352
2,2018-03-11,65880,5387,1404,3,1817,399943,220.111723
3,2018-04-11,68678,5637,1551,4,1841,223875,121.605106
4,2018-05-11,74023,5866,3345,1573,1698,369716,217.73616


In [91]:
data['purchase_frequency'] = data['Orders']/data['Unique_Visitors']
data.head()

Unnamed: 0,Date,Unique_Visitors,Cart_Adds,Cart_Removal,Start_Checkouts,Orders,Revenue,Average-Order-Value,purchase_frequency
0,2018-01-11,73543,6043,1850,19,1885,507698,269.335809,0.025631
1,2018-02-11,73303,5866,1562,7,1983,400065,201.747352,0.027052
2,2018-03-11,65880,5387,1404,3,1817,399943,220.111723,0.02758
3,2018-04-11,68678,5637,1551,4,1841,223875,121.605106,0.026806
4,2018-05-11,74023,5866,3345,1573,1698,369716,217.73616,0.022939


In [92]:
## Manipulate repeat rate and churn rate

In [93]:
data['Num_of_Orders'] = data['Orders']*1.21
data.head()

Unnamed: 0,Date,Unique_Visitors,Cart_Adds,Cart_Removal,Start_Checkouts,Orders,Revenue,Average-Order-Value,purchase_frequency,Num_of_Orders
0,2018-01-11,73543,6043,1850,19,1885,507698,269.335809,0.025631,2280.85
1,2018-02-11,73303,5866,1562,7,1983,400065,201.747352,0.027052,2399.43
2,2018-03-11,65880,5387,1404,3,1817,399943,220.111723,0.02758,2198.57
3,2018-04-11,68678,5637,1551,4,1841,223875,121.605106,0.026806,2227.61
4,2018-05-11,74023,5866,3345,1573,1698,369716,217.73616,0.022939,2054.58


In [94]:
data.iloc[1:10,9] = data['Orders']*1.21
data.head()

Unnamed: 0,Date,Unique_Visitors,Cart_Adds,Cart_Removal,Start_Checkouts,Orders,Revenue,Average-Order-Value,purchase_frequency,Num_of_Orders
0,2018-01-11,73543,6043,1850,19,1885,507698,269.335809,0.025631,2280.85
1,2018-02-11,73303,5866,1562,7,1983,400065,201.747352,0.027052,2399.43
2,2018-03-11,65880,5387,1404,3,1817,399943,220.111723,0.02758,2198.57
3,2018-04-11,68678,5637,1551,4,1841,223875,121.605106,0.026806,2227.61
4,2018-05-11,74023,5866,3345,1573,1698,369716,217.73616,0.022939,2054.58


In [95]:
data.iloc[10:,9] = data['Orders']*1
data.head()

Unnamed: 0,Date,Unique_Visitors,Cart_Adds,Cart_Removal,Start_Checkouts,Orders,Revenue,Average-Order-Value,purchase_frequency,Num_of_Orders
0,2018-01-11,73543,6043,1850,19,1885,507698,269.335809,0.025631,2280.85
1,2018-02-11,73303,5866,1562,7,1983,400065,201.747352,0.027052,2399.43
2,2018-03-11,65880,5387,1404,3,1817,399943,220.111723,0.02758,2198.57
3,2018-04-11,68678,5637,1551,4,1841,223875,121.605106,0.026806,2227.61
4,2018-05-11,74023,5866,3345,1573,1698,369716,217.73616,0.022939,2054.58


In [96]:
repeat_rate = (data[(data['Num_of_Orders']/data['Orders'] )>1].shape[0])/data.shape[0]
repeat_rate

0.10869565217391304

In [97]:
data[(data['Num_of_Orders']/data['Orders'] )>1].shape[0]

10

In [98]:
churn_rate = 1- repeat_rate
churn_rate

0.8913043478260869

In [99]:
# profit margin
data['profit_margin'] =data['Revenue']*.05
data.head()

Unnamed: 0,Date,Unique_Visitors,Cart_Adds,Cart_Removal,Start_Checkouts,Orders,Revenue,Average-Order-Value,purchase_frequency,Num_of_Orders,profit_margin
0,2018-01-11,73543,6043,1850,19,1885,507698,269.335809,0.025631,2280.85,25384.9
1,2018-02-11,73303,5866,1562,7,1983,400065,201.747352,0.027052,2399.43,20003.25
2,2018-03-11,65880,5387,1404,3,1817,399943,220.111723,0.02758,2198.57,19997.15
3,2018-04-11,68678,5637,1551,4,1841,223875,121.605106,0.026806,2227.61,11193.75
4,2018-05-11,74023,5866,3345,1573,1698,369716,217.73616,0.022939,2054.58,18485.8


In [100]:
data['clv'] = (data['Average-Order-Value']*data['purchase_frequency'])/churn_rate
data['LTV'] = data['clv']*data['profit_margin']
data.head()

Unnamed: 0,Date,Unique_Visitors,Cart_Adds,Cart_Removal,Start_Checkouts,Orders,Revenue,Average-Order-Value,purchase_frequency,Num_of_Orders,profit_margin,clv,LTV
0,2018-01-11,73543,6043,1850,19,1885,507698,269.335809,0.025631,2280.85,25384.9,7.745297,196613.594293
1,2018-02-11,73303,5866,1562,7,1983,400065,201.747352,0.027052,2399.43,20003.25,6.123261,122485.124296
2,2018-03-11,65880,5387,1404,3,1817,399943,220.111723,0.02758,2198.57,19997.15,6.811119,136202.97343
3,2018-04-11,68678,5637,1551,4,1841,223875,121.605106,0.026806,2227.61,11193.75,3.657311,40939.028985
4,2018-05-11,74023,5866,3345,1573,1698,369716,217.73616,0.022939,2054.58,18485.8,5.603709,103589.035283


In [101]:
data['month_yr'] = data['Date'].apply(lambda x: x.strftime('%b-%Y'))
data.head()

Unnamed: 0,Date,Unique_Visitors,Cart_Adds,Cart_Removal,Start_Checkouts,Orders,Revenue,Average-Order-Value,purchase_frequency,Num_of_Orders,profit_margin,clv,LTV,month_yr
0,2018-01-11,73543,6043,1850,19,1885,507698,269.335809,0.025631,2280.85,25384.9,7.745297,196613.594293,Jan-2018
1,2018-02-11,73303,5866,1562,7,1983,400065,201.747352,0.027052,2399.43,20003.25,6.123261,122485.124296,Feb-2018
2,2018-03-11,65880,5387,1404,3,1817,399943,220.111723,0.02758,2198.57,19997.15,6.811119,136202.97343,Mar-2018
3,2018-04-11,68678,5637,1551,4,1841,223875,121.605106,0.026806,2227.61,11193.75,3.657311,40939.028985,Apr-2018
4,2018-05-11,74023,5866,3345,1573,1698,369716,217.73616,0.022939,2054.58,18485.8,5.603709,103589.035283,May-2018


In [102]:
#X=data[['Dec-2011','Nov-2011', 'Oct-2011','Sep-2011','Aug-2011','Jul-2011']]
X = data.drop(['clv', 'month_yr', 'Date'], axis=1)
X.head()

Unnamed: 0,Unique_Visitors,Cart_Adds,Cart_Removal,Start_Checkouts,Orders,Revenue,Average-Order-Value,purchase_frequency,Num_of_Orders,profit_margin,LTV
0,73543,6043,1850,19,1885,507698,269.335809,0.025631,2280.85,25384.9,196613.594293
1,73303,5866,1562,7,1983,400065,201.747352,0.027052,2399.43,20003.25,122485.124296
2,65880,5387,1404,3,1817,399943,220.111723,0.02758,2198.57,19997.15,136202.97343
3,68678,5637,1551,4,1841,223875,121.605106,0.026806,2227.61,11193.75,40939.028985
4,74023,5866,3345,1573,1698,369716,217.73616,0.022939,2054.58,18485.8,103589.035283


In [103]:
y=data[['clv']]
y.head()

Unnamed: 0,clv
0,7.745297
1,6.123261
2,6.811119
3,3.657311
4,5.603709


In [104]:
#split training set and test set
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,random_state=0)

In [105]:
# import model
from sklearn.linear_model import LinearRegression

# instantiate
linreg = LinearRegression()

# fit the model to the training data (learn the coefficients)
linreg.fit(X_train, y_train)

# make predictions on the testing set
y_pred = linreg.predict(X_test)

In [106]:
# print the intercept and coefficients
print(linreg.intercept_)
print(linreg.coef_)

[2.82002135]
[[-1.11940091e-04  1.27406591e-04 -9.35909393e-06  3.79450896e-05
   3.63302381e-03  1.11881912e-06  3.26128631e-02 -5.82624179e+01
  -8.64717387e-04  1.04008697e-07  4.43775083e-07]]


In [107]:
from sklearn import metrics

# compute the R Square for model
print("R-Square:",metrics.r2_score(y_test, y_pred))

R-Square: 0.9879245318798286


In [108]:
# calculate MAE using scikit-learn
print("MAE:",metrics.mean_absolute_error(y_test,y_pred))

#calculate mean squared error
print("MSE",metrics.mean_squared_error(y_test, y_pred))
# compute the RMSE of our predictions
print("RMSE:",np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

MAE: 0.550603932151338
MSE 0.9197377918978615
RMSE: 0.9590296095000725
