In [1]:
import pandas as pd
from lifetimes import BetaGeoBetaBinomFitter
from lifetimes.datasets import load_donations

In [66]:
discrete_noncontract_df = load_donations()

periods = 6
bgbb = BetaGeoBetaBinomFitter().fit(discrete_noncontract_df['frequency'].values,
                             discrete_noncontract_df['recency'].values,
                             discrete_noncontract_df['periods'].values,
                             discrete_noncontract_df['weights'].values)
bgbb

<lifetimes.BetaGeoBetaBinomFitter: fitted with 22 subjects, alpha: 1.20, beta: 0.75, delta: 2.78, gamma: 0.66>

In [16]:
method_list = [method for method in dir(BetaGeoBetaBinomFitter) if not method.startswith('_')]
print(method_list)



['conditional_expected_number_of_purchases_up_to_time', 'conditional_probability_alive', 'expected_number_of_transactions_in_first_n_periods', 'fit', 'load_model', 'save_model', 'summary']


In [30]:
# equation 13 in paper
bgbb.conditional_expected_number_of_purchases_up_to_time(5,
    discrete_noncontract_df['frequency'],
    discrete_noncontract_df['recency'],
    discrete_noncontract_df['periods'])

0     0.072863
1     0.085696
2     0.314238
3     0.593853
4     0.839396
5     1.021689
6     1.147885
7     0.119121
8     0.536111
9     1.057604
10    1.443042
11    1.668817
12    0.223595
13    1.034572
14    1.804703
15    2.189749
16    0.583192
17    2.030024
18    2.710681
19    1.812942
20    3.231612
21    3.752544
dtype: float64

In [25]:
# equation 11 in paper
bgbb.conditional_probability_alive(10,
    discrete_noncontract_df['frequency'],
    discrete_noncontract_df['recency'],
    discrete_noncontract_df['periods'])

0     0.070072
1     0.045012
2     0.165056
3     0.311927
4     0.440900
5     0.536651
6     0.602936
7     0.043038
8     0.193695
9     0.382108
10    0.521365
11    0.602936
12    0.061566
13    0.284864
14    0.496916
15    0.602936
16    0.129719
17    0.451538
18    0.602936
19    0.338249
20    0.602936
21    0.602936
dtype: float64

In [63]:
# TODO: write and test (8) as a replacement. Compare against just aggregating means across the exploded DF 
# TODO: Can the arviz functions in the BetaGeoBetaBinom distribution block preclude the need for this?
# TODO: Replace this with (9) or (10) in a future PR, since that expression can predict interval ranges

# equation 7 in paper, but that's for probabilities. should it be 8 for predicting mean n?
# yeah, this function should be renamed for clarity. 
# it distributes customers in the dataset across n transaction opportunies
# it works better as an evaluation function, since it assumes a fixed customer population size
# if n > n_periods, it will keep right on predicting. This may be a bug
bgbb.expected_number_of_transactions_in_first_n_periods(n=50)

Unnamed: 0_level_0,model
frequency,Unnamed: 1_level_1
0,3195.925987
1,1560.54902
2,964.135361
3,668.795916
4,497.960966
5,389.113685
6,314.983874


## Expected transactions in N periods
This expression will blow up to inf with large values of n (n=167 in this example). Recalculating on the log scale will allow for larger values, but this isn't possible if gamma < 1 because term1 will be negative.

The need to estimate averages purchases for such large values of n seems unlikely for a dataset where T=6, so we will forego the log scale. Further testing on datasets with T>100 is recommended.

In [201]:
from scipy import special
from numpy import log,exp

n = 167
alpha,beta,delta,gamma = bgbb._unload_params('alpha','beta','delta','gamma')

# add a larger gamma value for testing
#gamma = .9

log_scale = False

if not log_scale:
    term1 = alpha/(alpha+beta)*delta/(gamma-1)
    term2 = 1-(special.gamma(gamma+delta))/special.gamma(gamma+delta+n)*(special.gamma(1+delta+n))/special.gamma(1+delta)
    expected_purchases_n_periods = term1 * term2
else:
    term1 = log(alpha/(alpha+beta)) + log(delta/(gamma-1))
    term2 = special.gammaln(gamma+delta) - special.gammaln(gamma+delta+n) + special.gammaln(1+delta+n) - special.gammaln(1+delta)
    expected_purchases_n_periods = exp(term1) - exp(term2)

print(f'average of {expected_purchases_n_periods} purchases expected in {n} opportunities')

average of 14.710084983628928 purchases expected in 167 opportunities


In [174]:
bgbb._unload_params('alpha','beta','delta','gamma')

[1.2035223936080357,
 0.7497163581757648,
 2.7834419828877737,
 0.6567181695499797]