### RUN IN TERMINAL FOR DATABASE CREATION

docker ps (to see the running containers)

docker stop id (write instead of id what you want to delete)

docker rm -f $(docker ps -aq) (delete every running container)

docker run --name my-postgres-db -e POSTGRES_USER=master -e POSTGRES_PASSWORD=pass -e POSTGRES_DB=GLOBBING -p 5432:5432 -d postgres

# Initialize the database by creating the tables

In [1]:
from zenq.api.prepare_db import db
m=db()
m.main()

[38;20m2023-05-01 20:51:22,685 - prepare_db.py - INFO - db (prepare_db.py:37)[0m
2023-05-01 20:51:22,685 main db


Initializing the database.. done


# Insert Facts into database

In [2]:
from zenq.api.endpoints import insert_facts
insert_facts('globbing.csv', 'Customer', 'Gender', 'InvoiceId', 'Date', 'Product_weight', 'Product_weight')

Inserting facts for Customer from file globbing.csv


[31;20m2023-05-01 20:53:08,492 - endpoints.py - ERROR - insert_facts (endpoints.py:123)[0m
2023-05-01 20:53:08,492 insert_facts insert_facts
2023-05-01 20:53:08,494 insert_facts insert_facts
[38;20m2023-05-01 20:53:08,495 - endpoints.py - INFO - insert_facts (endpoints.py:125)[0m
2023-05-01 20:53:08,495 insert_facts insert_facts


Finished inserting facts


# Define the Model and input data in result schema of database

In [3]:
from zenq.clvmodels.pareto import Model
model = Model()

## Compute key metrics for CLV

In [4]:
cltv = model.cltv_df()
cltv

2023-05-01 20:55:45,826 _init_num_threads NumExpr defaulting to 8 threads.
[38;20m2023-05-01 20:55:45,992 - pareto.py - INFO - cltv_df (pareto.py:61)[0m
2023-05-01 20:55:45,992 cltv_df cltv_df


Unnamed: 0,customer_id,min_date,recency,T,frequency,monetary
0,KVO444312,2022-09-13,163,230,14,62.43
1,VBV804469,2022-09-04,200,239,14,58.16
2,BAE240832,2022-09-13,196,230,3,8.97
3,AOK457989,2022-09-14,189,229,5,25.09
4,XZF813575,2022-09-05,179,238,11,42.98
...,...,...,...,...,...,...
1956,DXT747575,2022-09-12,206,231,7,28.18
1957,VOZ592840,2023-01-14,52,107,2,9.99
1958,ZCQ782298,2022-09-13,207,230,20,79.30
1959,YNW866090,2022-09-02,143,241,11,47.75


## Categorization of customers based on RFM scores

In [5]:
rfm = model.rfm_score()
rfm

[38;20m2023-05-01 20:56:08,597 - pareto.py - INFO - cltv_df (pareto.py:61)[0m
2023-05-01 20:56:08,597 cltv_df cltv_df
[38;20m2023-05-01 20:56:08,775 - pareto.py - INFO - rfm_score (pareto.py:87)[0m
2023-05-01 20:56:08,775 rfm_score rfm_score


Unnamed: 0,customer_id,recency_score,frequency_score,monetary_score,RFM_SCORE,segment
0,KVO444312,4,4,4,44,LOYAL CUSTOMER
1,VBV804469,2,4,4,24,AT RISK
2,BAE240832,2,1,1,21,HIBERNATING
3,AOK457989,3,1,2,31,ABOUT TO SLEEP
4,XZF813575,3,3,3,33,NEED ATTENTION
...,...,...,...,...,...,...
1956,DXT747575,1,2,2,12,HIBERNATING
1957,VOZ592840,5,1,1,51,NEW CUSTOMERS
1958,ZCQ782298,1,5,5,15,CANT LOSE
1959,YNW866090,4,3,3,43,POTENTIAL LOYALIST


## Fit into Pareto model

In [6]:
fit = model.fit_paretonbd()
fit

[38;20m2023-05-01 20:56:26,924 - pareto.py - INFO - cltv_df (pareto.py:61)[0m
2023-05-01 20:56:26,924 cltv_df cltv_df
  tmp = b * np.exp(a - a_max)


<lifetimes.ParetoNBDFitter: fitted with 1960 subjects, alpha: 132.27, beta: 358911.87, r: 7.18, s: 368.55>

## Model parameters

In [7]:
parameters = model.model_params()
parameters

[38;20m2023-05-01 20:56:59,454 - pareto.py - INFO - cltv_df (pareto.py:61)[0m
2023-05-01 20:56:59,454 cltv_df cltv_df
  tmp = b * np.exp(a - a_max)
[38;20m2023-05-01 20:57:20,061 - pareto.py - INFO - model_params (pareto.py:114)[0m
2023-05-01 20:57:20,061 model_params model_params
[31;20m2023-05-01 20:57:20,063 - pareto.py - ERROR - model_params (pareto.py:115)[0m
2023-05-01 20:57:20,063 model_params model_params


Unnamed: 0,r,alpha,s,beta
0,7.176766,132.266166,368.556939,358912.773048


## Predictions for 30,90,180,360 days

In [8]:
pareto = model.predict_paretonbd()
pareto

[38;20m2023-05-01 20:57:51,522 - pareto.py - INFO - cltv_df (pareto.py:61)[0m
2023-05-01 20:57:51,522 cltv_df cltv_df
  tmp = b * np.exp(a - a_max)
[38;20m2023-05-01 20:58:05,173 - pareto.py - INFO - cltv_df (pareto.py:61)[0m
2023-05-01 20:58:05,173 cltv_df cltv_df
[38;20m2023-05-01 20:58:05,631 - pareto.py - INFO - predict_paretonbd (pareto.py:134)[0m
2023-05-01 20:58:05,631 predict_paretonbd predict_paretonbd


Unnamed: 0,Customer,Expected_Purchases_30,Expected_Purchases_90,Expected_Purchases_180,Expected_Purchases_360
0,KVO444312,0.790104,2.299177,4.395544,8.049990
1,VBV804469,1.449216,4.217176,8.062362,14.765427
2,BAE240832,0.782368,2.276666,4.352508,7.971175
3,AOK457989,0.911659,2.652899,5.071785,9.288455
4,XZF813575,1.012153,2.945335,5.630868,10.312380
...,...,...,...,...,...
1956,DXT747575,1.103544,3.211277,6.139288,11.243484
1957,VOZ592840,0.918889,2.673911,5.111879,9.361616
1958,ZCQ782298,2.079288,6.050661,11.567593,21.184868
1959,YNW866090,0.260971,0.759417,1.451848,2.658918


## Customer aliveness

In [9]:
alive = model.customer_is_alive()
alive


[38;20m2023-05-01 20:58:20,509 - pareto.py - INFO - cltv_df (pareto.py:61)[0m
2023-05-01 20:58:20,509 cltv_df cltv_df
  tmp = b * np.exp(a - a_max)
[38;20m2023-05-01 20:58:42,683 - pareto.py - INFO - cltv_df (pareto.py:61)[0m
2023-05-01 20:58:42,683 cltv_df cltv_df
[38;20m2023-05-01 20:58:42,791 - pareto.py - INFO - customer_is_alive (pareto.py:148)[0m
2023-05-01 20:58:42,791 customer_is_alive customer_is_alive


Unnamed: 0,Customer,Probability_of_being_Alive
0,KVO444312,0.457540
1,VBV804469,0.860028
2,BAE240832,0.942686
3,AOK457989,0.915527
4,XZF813575,0.697921
...,...,...
1956,DXT747575,0.957151
1957,VOZ592840,0.810974
1958,ZCQ782298,0.938202
1959,YNW866090,0.181423


In [1]:
import pandas as pd

# Read the CSV file into a pandas dataframe
df = pd.read_csv('globbing.csv')

# Group the data by customer and count the number of invoices
grouped = df.groupby('Customer').agg({'InvoiceId': 'count'})

# Select only the customers with one invoice
result = grouped[grouped['InvoiceId'] == 1]

# Print the result
print(result)


           InvoiceId
Customer            
AAR366684          1
ATB262249          1
BRS840990          1
BSF439593          1
CKI107410          1
DAX787715          1
DFZ837701          1
DSE737276          1
EUF556994          1
GGL431742          1
GXL480310          1
HAH608808          1
IAY208308          1
IQN294014          1
IWU303142          1
JZG281834          1
KFT548996          1
KPO670477          1
KXH974893          1
LFG253534          1
NJM647149          1
OLY631292          1
RWD468759          1
SJZ770260          1
TNU309606          1
UCA347124          1
UYP958102          1
VXJ691842          1
XHB937113          1
XVA214225          1
