In [1]:
import os
import numpy as np
import datetime as dt
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from pymc_marketing import clv
os.chdir("../")
from src.data import (
    getDataset,
    ProcessData
)
from src.config import RawFeatures, RFM
from src.models import BetaGeoModel
from src.utils import get_customer_history_data, get_customer_whatif_data

In [2]:
import arviz as az
from arviz.labels import MapLabeller

In [3]:
dt_format_, _dt_format= '%d/%m/%Y %H:%M', '%Y-%m-%d %H:%M'
df_transaction = getDataset()
df_transaction[RawFeatures.TRANSACTION_DATE] = df_transaction[RawFeatures.TRANSACTION_DATE].apply(
                                                    lambda x: dt.datetime.strptime(x, dt_format_).strftime(_dt_format) if x==x else x
                                                )

In [4]:
df_transaction.head()

Unnamed: 0,Customer ID,InvoiceDate,Price,Quantity
0,13085.0,2009-12-01 07:45,6.95,12.0
1,13085.0,2009-12-01 07:45,6.75,12.0
2,13085.0,2009-12-01 07:45,6.75,12.0
3,13085.0,2009-12-01 07:45,2.1,48.0
4,13085.0,2009-12-01 07:45,1.25,24.0


In [5]:
data_inst = ProcessData(df_transaction, 'D', '2011-06-30')
data_summary = data_inst.model_data()
data_summary.head()

Unnamed: 0_level_0,frequency,recency,T,monetary_value
Customer ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
12346.0,7.0,400.0,725.0,11066.637143
12745.0,1.0,88.0,574.0,266.93
12747.0,25.0,730.0,732.0,355.8356
12748.0,202.0,735.0,735.0,279.101436
12749.0,6.0,518.0,521.0,1010.743333


In [6]:
data_summary.shape

(3820, 4)

In [7]:
data_summary.describe().iloc[[1, 2, 3, 5, 7], :]

Unnamed: 0,frequency,recency,T,monetary_value
mean,6.490052,386.782461,526.574346,422.646958
std,10.721823,227.335755,201.691737,2771.151697
min,1.0,1.0,9.0,3.9
50%,3.0,389.0,592.0,300.305
max,202.0,738.0,738.0,168469.6


In [8]:
RFM()

RFM(max_recency=0.0, max_T=0.0, date_last_purchase=datetime.date(2023, 4, 29))

# Modeling


In [9]:
bgm = BetaGeoModel(data_summary)
bgm.fit()
bgm.fit_summary()

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [a, b, alpha, r]


Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 28 seconds.


Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
a,0.113,0.012,0.092,0.136,0.0,0.0,2147.0,2296.0,1.0
b,1.276,0.196,0.941,1.646,0.004,0.003,2108.0,1946.0,1.0
alpha,95.844,2.938,90.479,101.365,0.065,0.046,2065.0,2121.0,1.0
r,1.434,0.038,1.362,1.505,0.001,0.001,2078.0,2154.0,1.0


# Plots

In [10]:
"""az.plot_posterior(bgm.fit_result);"""

'az.plot_posterior(bgm.fit_result);'

In [11]:
customer_id = 14114.0
customer_id_ = 12747.0
customer_id__ = 12745.0
n_period = 10
T_future_transac = 4

In [16]:
bgm.plot_probability_alive(
    customer_id__,
    n_period,
    [1500, 700],
    T_future_transac
)

In [17]:
bgm.plot_probability_alive(
    customer_id_,
    n_period,
    [1500, 700],
    T_future_transac
)

In [18]:
bgm.plot_probability_alive(
    customer_id,
    n_period,
    [1500, 700],
    T_future_transac
)

# Frontend Prep

```python
dt_format_, _dt_format= '%d/%m/%Y %H:%M', '%Y-%m-%d %H:%M'
df_transaction = getDataset()
df_transaction[RawFeatures.TRANSACTION_DATE] = df_transaction[RawFeatures.TRANSACTION_DATE].apply(
                                                    lambda x: dt.datetime.strptime(x, dt_format_).strftime(_dt_format) if x==x else x)

data_inst = ProcessData(df_transaction, 'D', '2011-06-30')
data_summary = data_inst.model_data()

bgm = BetaGeoModel(data_summary)
bgm.fit()
bgm.fit_summary()

customer_id = 14110.0
n_period = 14
T_future_transac = 7

bgm.plot_probability_alive(
    customer_id,
    n_period,
    [1500, 700],
    T_future_transac
)
```