# Customer Value = Average Order Value * Purchase Frequency

## Average Order Value = Total Revenue / Total Number of Orders


## Purchase Frequency =  Total Number of Orders / Total Number of Customers


## Customer Lifetime=1/Churn Rate


## Churn Rate= 1-Repeat Rate


In [1]:
#import modules
import pandas as pd # for dataframes
import matplotlib.pyplot as plt # for plotting graphs
import seaborn as sns # for plotting graphs
import datetime as dt
import numpy as np

In [None]:
data = pd.read_excel("Online Retail.xlsx")

In [None]:
data.head()


In [None]:
filtered_data=data[['Country','CustomerID']].drop_duplicates()


In [None]:
#Top ten country's customer
filtered_data.Country.value_counts()[:10].plot(kind='bar')

In [None]:
uk_data=data[data.Country=='United Kingdom']
uk_data.info()

In [None]:
uk_data.describe()

In [None]:
uk_data = uk_data[(uk_data['Quantity']>0)]
uk_data.info()

In [None]:
uk_data=uk_data[['CustomerID','InvoiceDate','InvoiceNo','Quantity','UnitPrice']]

In [None]:
#Calulate total purchase

uk_data['TotalPurchase'] = uk_data['Quantity'] * uk_data['UnitPrice']

In [None]:
uk_data_group=uk_data.groupby('CustomerID').agg({'InvoiceDate': lambda date: (date.max() - date.min()).days,
                                        'InvoiceNo': lambda num: len(num),
                                        'Quantity': lambda quant: quant.sum(),
                                        'TotalPurchase': lambda price: price.sum()})

In [None]:
uk_data_group.head()


In [None]:
# Change the name of columns
uk_data_group.columns=['num_days','num_transactions','num_units','spent_money']
uk_data_group.head()

## Calculate CLTV using following formula:

CLTV = ((Average Order Value x Purchase Frequency)/Churn Rate) x Profit margin.

Customer Value = Average Order Value * Purchase Frequency

### 1. Calculate Average Order Value

In [None]:
uk_data_group['avg_order_value']=uk_data_group['spent_money']/uk_data_group['num_transactions']


In [None]:
uk_data_group.head()


### 2. Calculate Purchase Frequency

In [None]:
purchase_frequency=sum(uk_data_group['num_transactions'])/uk_data_group.shape[0]

### 3. Calculate Repeat Rate and Churn Rate

In [None]:
repeat_rate=uk_data_group[uk_data_group.num_transactions > 1].shape[0]/uk_data_group.shape[0]

In [None]:
#Churn Rate
churn_rate=1-repeat_rate

In [None]:
purchase_frequency,repeat_rate,churn_rate

### 4. Calculate Profit Margin

Profit margin is the commonly used profitability ratio. It represents how much percentage of total sales has earned as the gain. Let's assume our business has approx 5% profit on the total sale.

In [None]:
# Profit Margin
uk_data_group['profit_margin']=uk_data_group['spent_money']*0.05

In [None]:
uk_data_group.head()

## 5. Calculate Customer Lifetime Value

In [None]:
# Customer Value
uk_data_group['CLV']=(uk_data_group['avg_order_value']*purchase_frequency)/churn_rate

In [None]:
#Customer Lifetime Value
uk_data_group['cust_lifetime_value']=uk_data_group['CLV']*uk_data_group['profit_margin']

In [None]:
uk_data_group.head()

## Prediction Model for CLTV

Let's build the CLTV prediction model.

Here, you are going to predict CLTV using Linear Regression Model.

Let's first use the data loaded and filtered above.

In [None]:
uk_data.head()

In [None]:
uk_data['month_yr'] = uk_data['InvoiceDate'].apply(lambda x: x.strftime('%b-%Y'))

In [None]:
uk_data.head()

In [None]:
sale=uk_data.pivot_table(index=['CustomerID'],columns=['month_yr'],values='TotalPurchase',aggfunc='sum',fill_value=0).reset_index()

In [None]:
sale.head()


In [None]:
sale['CLV']=sale.iloc[:,2:].sum(axis=1)

In [None]:
sale.head(100)

In [None]:
## Selecting Feature

In [None]:
X=sale[['Dec-2011','Nov-2011', 'Oct-2011','Sep-2011','Aug-2011','Jul-2011']]
y=sale[['CLV']]

In [None]:
## Splitting Data

In [None]:
#split training set and test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,random_state=0)

## Model Development No.1

In [None]:
# import model
from sklearn.linear_model import LinearRegression

# instantiate
linreg = LinearRegression()

# fit the model to the training data (learn the coefficients)
linreg.fit(X_train, y_train)

# make predictions on the testing set
y_pred = linreg.predict(X_test)

In [None]:
# print the intercept and coefficients
print(linreg.intercept_)
print(linreg.coef_)

In [None]:
from sklearn import metrics

# compute the R Square for model
print("R-Square:",metrics.r2_score(y_test, y_pred))

### Model Evaluation
For regression problems following evaluation metrics used (Ritchie Ng):

Mean Absolute Error (MAE) is the mean of the absolute value of the errors.
Mean Squared Error (MSE) is the mean of the squared errors.
Root Mean Squared Error (RMSE) is the square root of the mean of the squared errors.

In [None]:
# calculate MAE using scikit-learn
print("MAE:",metrics.mean_absolute_error(y_test,y_pred))

#calculate mean squared error
print("MSE",metrics.mean_squared_error(y_test, y_pred))
# compute the RMSE of our predictions
print("RMSE:",np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

RMSE is more popular than MSE and MAE because RMSE is interpretable with y because of the same units.


## Model No.2 — Robust Regression

In [None]:
# import numpy as np
# from matplotlib import pyplot as plt
# from sklearn import linear_model, datasets

# n_samples = 1000
# n_outliers = 50
# X, y, coef = datasets.make_regression(n_samples=n_samples, n_features=1, n_informative=1, noise=10, coef=True, random_state=0)
# # Add outlier data
# np.random.seed(0)
# X[:n_outliers] = 3 + 0.5 * np.random.normal(size=(n_outliers, 1))
# y[:n_outliers] = -3 + 10 * np.random.normal(size=n_outliers)
# print("The independent variable X has {} observations/samples".format(X.shape[0]))
# print("The dependent variable y has shape {}".format(y.shape))

# plt.scatter(X,y)
# plt.show()