# Import Libraries and Load Sample Data

In [17]:
import numpy as np
import pandas as pd
from lifetimes import BetaGeoBetaBinomFitter

import pymc as pm
from pymc_marketing.clv.distributions import BetaGeoBetaBinom
from pymc_marketing.prior import Prior

In [22]:
bgbb_donations = pd.read_csv("https://raw.githubusercontent.com/pymc-labs/pymc-marketing/main/data/bgbb_donations.csv")
bgbb_donations.head()

Unnamed: 0,customer_id,frequency,recency,T
0,0,0,0,6
1,1,0,0,6
2,2,0,0,6
3,3,0,0,6
4,4,0,0,6


# Predictive Methods
Fit a `lifetimes` model for comparision

In [23]:
bgbb_alt = BetaGeoBetaBinomFitter().fit(bgbb_donations['frequency'].values,
                             bgbb_donations['recency'].values,
                             bgbb_donations['T'].values,
                                   )
bgbb_alt

<lifetimes.BetaGeoBetaBinomFitter: fitted with 11104 subjects, alpha: 1.20, beta: 0.75, delta: 2.78, gamma: 0.66>

In [42]:
method_list = [method for method in dir(BetaGeoBetaBinomFitter) if not method.startswith('_')]
print(method_list)

['conditional_expected_number_of_purchases_up_to_time', 'conditional_probability_alive', 'expected_number_of_transactions_in_first_n_periods', 'fit', 'load_model', 'save_model', 'summary']


In [88]:
# equation 13 in paper
bgbb.conditional_expected_number_of_purchases_up_to_time(5,
    bgbb_donations['frequency'],
    bgbb_donations['recency'],
    bgbb_donations['T'])

0        0.072863
1        0.072863
2        0.072863
3        0.072863
4        0.072863
           ...   
11099    3.752544
11100    3.752544
11101    3.752544
11102    3.752544
11103    3.752544
Length: 11104, dtype: float64

In [89]:
# equation 11 in paper
bgbb.conditional_probability_alive(10,
    bgbb_donations['frequency'],
    bgbb_donations['recency'],
    bgbb_donations['T'])

0        0.070072
1        0.070072
2        0.070072
3        0.070072
4        0.070072
           ...   
11099    0.602936
11100    0.602936
11101    0.602936
11102    0.602936
11103    0.602936
Length: 11104, dtype: float64

### Expected transactions in N periods
**DISCUSSION REQUESTED**<br>
This expression will blow up to inf with large values of n (n=167 in this example), and recalculating on the log scale is not possible due to how expression is formulated. It is also equivalent to the mean of the PMF (differences are likely due to rounding error). 

In [70]:
# TODO: write and test (8) as a replacement. Compare against just aggregating means across the exploded DF 
# TODO: Can the arviz functions in the BetaGeoBetaBinom distribution block preclude the need for this?
# TODO: Replace this with (9) or (10) in a future PR, since that expression can predict interval ranges

# equation 7 in paper, but that's for probabilities. should it be 8 for predicting mean n?
# yeah, this function should be renamed for clarity. 
# it distributes customers in the dataset across n transaction opportunies
# it works better as an evaluation function, since it assumes a fixed customer population size
# if n > n_periods, it will keep right on predicting. This may be a bug
expected_purchases_lt = bgbb.expected_number_of_transactions_in_first_n_periods(n=6).reset_index()
expected_purchases_lt["model"] = np.round(expected_purchases_lt["model"].values,0).astype('int')

In [79]:
for row in zip(expected_purchases_lt["frequency"], expected_purchases_lt["model"], strict=False):  
    array = np.tile(row[0], row[1])  
    try:  
        concat_array = np.concatenate((concat_array, array), axis=0)  
    except NameError:  
        concat_array = array  

concat_array.mean()


2.2200108069164264

In [80]:
from scipy import special
from numpy import log,exp

n = 6
alpha,beta,delta,gamma = bgbb._unload_params('alpha','beta','delta','gamma')

term1 = alpha/(alpha+beta)*delta/(gamma-1)
term2 = 1-(special.gamma(gamma+delta))/special.gamma(gamma+delta+n)*(special.gamma(1+delta+n))/special.gamma(1+delta)
expected_purchases_n_periods = term1 * term2
print(f'average of {expected_purchases_n_periods} purchases expected in {n} opportunities')

average of 2.220169363676421 purchases expected in 6 opportunities


# Default Prior Selection

In [36]:
bgbb._unload_params('alpha','beta','delta','gamma')

[1.203522393608101, 0.7497163581757842, 2.783441982887136, 0.6567181695498788]

In [81]:
with pm.Model() as model:
    # alpha = pm.Gamma(name="alpha", alpha=1, beta=1)
    # beta = pm.Gamma(name="beta", alpha=1, beta=1)
    # gamma = pm.Gamma(name="gamma", alpha=1, beta=1)
    # delta = pm.Gamma(name="delta", alpha=1, beta=1)

    alpha = pm.HalfFlat(name="alpha")
    beta = pm.HalfFlat(name="beta")
    gamma = pm.HalfFlat(name="gamma")
    delta = pm.HalfFlat(name="delta")

    BetaGeoBetaBinom(
        name="beta_geo_beta_binom",
        alpha=alpha,
        beta=beta,
        gamma=gamma,
        delta=delta,
        T=bgbb_donations["T"].values,
        observed = bgbb_donations[["recency", "frequency"]].values
    )

    idata = pm.find_MAP()
    #idata = pm.sample()

idata

Output()

{'alpha_log__': array(0.18523607),
 'beta_log__': array(-0.28807547),
 'gamma_log__': array(-0.42042299),
 'delta_log__': array(1.02379304),
 'alpha': array(1.20350252),
 'beta': array(0.74970501),
 'gamma': array(0.65676895),
 'delta': array(2.78373359)}

In [39]:
with pm.Model() as pooled_model:
    # Heirarchical pooling of transaction rate priors.
    phi_purchase = pm.Uniform(
        "phi_purchase",
        lower=0,
        upper=1,
    )
    kappa_purchase = pm.Pareto(
        "kappa_purchase",
        alpha=1,
        m=1.5,
    )
    
    alpha = pm.Deterministic("alpha", phi_purchase * kappa_purchase)
    beta = pm.Deterministic("beta", (1.0 - phi_purchase) * kappa_purchase)

    # Heirarchical pooling of dropout rate priors.
    phi_dropout = pm.Uniform(
        "phi_dropout",
        lower=0,
        upper=1,
    )
    kappa_dropout = pm.Pareto(
        "kappa_dropout",
        alpha=1,
        m=1.5,
    )
    
    gamma = pm.Deterministic("gamma", phi_dropout * kappa_dropout)
    delta = pm.Deterministic("delta", (1.0 - phi_dropout) * kappa_dropout)

    BetaGeoBetaBinom(
            name="beta_geo_beta_binom",
            alpha=alpha,
            beta=beta,
            gamma=gamma,
            delta=delta,
            T=bgbb_donations["T"].values,
            observed = bgbb_donations[["recency", "frequency"]].values
        )

    idata_pool = pm.find_MAP()

idata_pool

Output()

{'phi_purchase_interval__': array(0.47476126),
 'kappa_purchase_interval__': array(-0.78675896),
 'phi_dropout_interval__': array(-1.43883362),
 'kappa_dropout_interval__': array(0.63268931),
 'phi_purchase': array(0.61651006),
 'kappa_purchase': array(1.95531811),
 'phi_dropout': array(0.19172603),
 'kappa_dropout': array(3.38266685),
 'alpha': array(1.20547329),
 'beta': array(0.74984482),
 'gamma': array(0.6485453),
 'delta': array(2.73412155)}