# 2: Gamma Gamma Model

When to use Gamma Gamma model: The model we are going to use to estimate the CLV for our userbase is called the Gamma-Gamma submodel, which relies upon an important assumption. The Gamma-Gamma submodel, in fact, assumes that there is no relationship between the monetary value and the purchase frequency. In practice we need to check whether the Pearson correlation between the two vectors is close to 0 in order to use this model. See # returning_customers_summary[['monetary_value', 'frequency']].corr()

In [1]:
from lifetimes.datasets import load_cdnow_summary_data_with_monetary_value
import pandas as pd

In [2]:
# Use real data
# data.head()
# new_names = {
#    'Recency': 'recency',
#    'Frequency': 'frequency',
#    'Monetary': 'monetary_value'
# }

# Rename columns using rename() function
# data = data.rename(columns=new_names)
# data.head()

In [3]:
summary_with_money_value = load_cdnow_summary_data_with_monetary_value()
summary_with_money_value.head()
returning_customers_summary = summary_with_money_value[summary_with_money_value['frequency']>0]

In [4]:
print(returning_customers_summary.head())

             frequency  recency      T  monetary_value
customer_id                                           
1                    2    30.43  38.86           22.35
2                    1     1.71  38.86           11.77
6                    7    29.43  38.86           73.74
7                    1     5.00  38.86           11.77
9                    2    35.71  38.86           25.55


In [5]:
returning_customers_summary[['monetary_value', 'frequency']].corr()

Unnamed: 0,monetary_value,frequency
monetary_value,1.0,0.113884
frequency,0.113884,1.0


In [6]:
from lifetimes import GammaGammaFitter
from lifetimes import BetaGeoFitter

In [7]:
ggf = GammaGammaFitter(penalizer_coef = 0)
ggf.fit(returning_customers_summary['frequency'],
        returning_customers_summary['monetary_value'])
print(ggf)

<lifetimes.GammaGammaFitter: fitted with 946 subjects, p: 6.25, q: 3.74, v: 15.45>


In [8]:
from lifetimes import BetaGeoFitter

# similar API to scikit-learn and lifelines.
bgf = BetaGeoFitter()

In [9]:
# refit the BG model to the summary_with_money_value dataset
bgf.fit(summary_with_money_value['frequency'], summary_with_money_value['recency'], summary_with_money_value['T'])

<lifetimes.BetaGeoFitter: fitted with 2357 subjects, a: 0.79, alpha: 4.41, b: 2.43, r: 0.24>

In [10]:
# change months and discount rate!!!!!

In [11]:
clv = ggf.customer_lifetime_value(
    bgf, #the model to use to predict the number of future transactions
    summary_with_money_value['frequency'],
    summary_with_money_value['recency'],
    summary_with_money_value['T'],
    summary_with_money_value['monetary_value'],
    time=12, # months
    discount_rate=0.0127 # monthly discount rate ~ 12.7% annually
)

In [12]:
summary_with_money_value

Unnamed: 0_level_0,frequency,recency,T,monetary_value
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,2,30.43,38.86,22.35
2,1,1.71,38.86,11.77
3,0,0.00,38.86,0.00
4,0,0.00,38.86,0.00
5,0,0.00,38.86,0.00
...,...,...,...,...
2353,0,0.00,27.00,0.00
2354,5,24.29,27.00,44.93
2355,0,0.00,27.00,0.00
2356,4,26.57,27.00,33.32


In [13]:
summary_with_clv = summary_with_money_value.copy()  # Make a copy of original DataFrame

In [14]:
# Append CLV values to the copy of the original DataFrame
summary_with_clv['CLV'] = clv

In [15]:
# Display the first 10 rows of the DataFrame with CLV values
print(summary_with_clv.head(10))

             frequency  recency      T  monetary_value         CLV
customer_id                                                       
1                    2    30.43  38.86           22.35  138.208161
2                    1     1.71  38.86           11.77   18.681684
3                    0     0.00  38.86            0.00   37.626436
4                    0     0.00  38.86            0.00   37.626436
5                    0     0.00  38.86            0.00   37.626436
6                    7    29.43  38.86           73.74  990.859176
7                    1     5.00  38.86           11.77   27.721229
8                    0     0.00  38.86            0.00   37.626436
9                    2    35.71  38.86           25.55  165.161952
10                   0     0.00  38.86            0.00   37.626436


In [16]:
# Export the DataFrame with CLV as CSV
summary_with_clv.to_csv('CLTV_RFM.csv', index=False, encoding='utf_8_sig')