In [None]:

# 1. Data Preperation
# 2. Expected Sales Forecasting with BG-NBD Model
# 3. Expected Average Profit with Gamma-Gamma Model
# 4. Calculation CLTV with BG-NBD ve Gamma-Gamma Model
# 5. Building segmentation with CLTV

#  Data Preperation

##############################################################

In [None]:

#  Loading Necessary Library


In [None]:
pip install lifetimes

In [None]:
# pip install lifetimes

import datetime as dt
import pandas as pd
import matplotlib.pyplot as plt
from lifetimes import BetaGeoFitter
from lifetimes import GammaGammaFitter
from lifetimes.plotting import plot_period_transactions

pd.set_option('display.max_columns', None)
pd.set_option('display.width', 500)
pd.set_option('display.float_format', lambda x: '%.4f' % x)

from sklearn.preprocessing import MinMaxScaler

In [None]:
def outlier_winsorize(df, col):
    Q1 = df[col].quantile(0.01)
    Q3 = df[col].quantile(0.99)
    IQR = Q3 - Q1
    upper = Q3 + 1.5 * IQR
    lower = Q1 - 1.5 * IQR
    df.loc[(df[col] < lower), col] = lower
    df.loc[(df[col] > upper), col] = upper

In [None]:

# Read Excel file


In [None]:
df_orj = pd.read_csv("/kaggle/input/online-retail-ii-uci/online_retail_II.csv")
df = df_orj.copy()
df.head()

In [None]:
df.tail()

In [None]:
df.info()

In [None]:
df.shape

In [None]:
# Data preprocessing

In [None]:
df.describe().T

In [None]:
df.dropna(inplace=True)
df = df[~df["Invoice"].str.contains("C", na=False)]
df = df[df["Quantity"] > 0]

outlier_winsorize(df, "Quantity")
outlier_winsorize(df, "Price")
df.describe().T

In [None]:
df["TotalPrice"] = df["Quantity"] * df["Price"]

In [None]:
df['InvoiceDate'].max()

In [None]:
today_date = dt.datetime(2011, 12, 11)

In [None]:

# Preparation of Lifetime Data Structure


In [None]:
df.info()

In [None]:
df["InvoiceDate"] = pd.to_datetime(df['InvoiceDate'],format='%Y/%m/%d')

In [None]:
df.info()

In [None]:
# recency: The elapsed time since the last purchase and first purchase. Weekly. (according to analysis day on cltv_df, user specific here)
# Tenure: The age of the customer. Weekly. (how long before the analysis date the first purchase was made)
# frequency: total number of repeat purchases (frequency>1)
# monetary_value: average earnings per purchase

cltv_df = df.groupby('Customer ID').agg({'InvoiceDate': [lambda date: (date.max() - date.min()).days,
                                                         lambda date: (today_date - date.min()).days],
                                         'Invoice': lambda num: num.nunique(),
                                         'TotalPrice': lambda TotalPrice: TotalPrice.sum()})
print(cltv_df.head(3))
print('*'*33)

cltv_df.columns = cltv_df.columns.droplevel(0)
print(cltv_df.head(3))
print('*'*33)

cltv_df.columns = ['recency', 'Tenure', 'frequency', 'monetary']

#average spending money on each purchase
cltv_df["monetary"] = cltv_df["monetary"] / cltv_df["frequency"]
cltv_df = cltv_df[cltv_df["monetary"] > 0]

# week conversion
cltv_df["recency"] = cltv_df["recency"] / 7
cltv_df["Tenure"] = cltv_df["Tenure"] / 7

# frequency should be higher than 1
cltv_df = cltv_df[(cltv_df['frequency'] > 1)]
print(cltv_df.head(3))

In [None]:
cltv_df

In [None]:

#  BG-NBD Model Bulding


bgf = BetaGeoFitter(penalizer_coef=0.001)

bgf.fit(cltv_df['frequency'],
        cltv_df['recency'],
        cltv_df['Tenure'])


# 10 customers we expect the most to purchase in a week

cltv_df["expected_purc_1_week"] = bgf.predict(1,
                                              cltv_df['frequency'],
                                              cltv_df['recency'],
                                              cltv_df['Tenure'])

cltv_df["expected_purc_1_week"].sort_values(ascending=False).head(10)

In [None]:
cltv_df.head(10)

In [None]:

# top 10 customer in 1 month = 4*week

cltv_df["expected_purc_1_month"] = bgf.predict(4,
                                               cltv_df['frequency'],
                                               cltv_df['recency'],
                                               cltv_df['Tenure'])
cltv_df.sort_values("expected_purc_1_month", ascending=False).head(10)

In [None]:
dfx = cltv_df['expected_purc_1_month']/cltv_df['expected_purc_1_week']

In [None]:
dfx.sort_values()

In [None]:

# total expected purchase count of the company for 1 month

bgf.predict(4,
            cltv_df['frequency'],
            cltv_df['recency'],
            cltv_df['Tenure']).sum()

In [None]:

# total expected purchase count of the company for 3 month


bgf.predict(4 * 3,
            cltv_df['frequency'],
            cltv_df['recency'],
            cltv_df['Tenure']).sum()

In [None]:

# Evaluation

#Plots a histogram and returns a matrix comparing the actual and expected number of customers 
#who made a certain number of repeat transactions in the calibration period, 
#binned according to calibration period frequencies.

plot_period_transactions(bgf)
plt.show();

In [None]:

# GAMMA-GAMMA Model

ggf = GammaGammaFitter(penalizer_coef=0.01)
ggf.fit(cltv_df['frequency'], cltv_df['monetary'])
cltv_df["expected_average_profit"] = ggf.conditional_expected_average_profit(cltv_df['frequency'],
                                                                             cltv_df['monetary'])

cltv_df.sort_values("expected_average_profit", ascending=False).head(20)

In [None]:

# BG-NBD ve GG model building the CLTV

cltv = ggf.customer_lifetime_value(bgf,
                                   cltv_df['frequency'],
                                   cltv_df['recency'],
                                   cltv_df['Tenure'],
                                   cltv_df['monetary'],
                                   time=3,  #3 month
                                   freq="W",  # T unit.
                                   discount_rate=0.01)
cltv.head()

In [None]:
cltv = cltv.reset_index()
cltv.sort_values(by="clv", ascending=False).head(20)

In [None]:
cltv_final = cltv_df.merge(cltv, on="Customer ID", how="left")

cltv_final.sort_values(by="clv", ascending=False).head(10)

In [None]:
# standardization of CLTV  for segmentation

scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(cltv_final[["clv"]])
cltv_final["scaled_clv"] = scaler.transform(cltv_final[["clv"]])
cltv_final.sort_values(by="scaled_clv", ascending=False).head(10)

In [None]:

# Segmentation 

cltv_final["segment"] = pd.qcut(cltv_final["scaled_clv"], 4, labels=["D", "C", "B", "A"])
cltv_final.head()

In [None]:
cltv_final.groupby("segment").agg({"count", "mean", "sum"})

# Thanks. !