### RUN IN TERMINAL FOR DATABASE CREATION

docker ps (to see the running containers)

docker stop id (write instead of id what you want to delete)

docker rm -f $(docker ps -aq) (delete every running container)

docker run --name my-postgres-db -e POSTGRES_USER=master -e POSTGRES_PASSWORD=pass -e POSTGRES_DB=GLOBBING -p 5432:5432 -d postgres

# Initialize the database by creating the tables

In [1]:
from zenq.api.prepare_db import db
m=db()
m.main()

[38;20m2023-05-01 23:17:22,487 - prepare_db.py - INFO - db (prepare_db.py:37)[0m
2023-05-01 23:17:22,487 main db


Initializing the database.. done


# Data Preparation

In [2]:
from zenq.datapreparation.preparation import data_prep
prep = data_prep()

In [3]:
prep.read_data('globbing.csv')

Unnamed: 0,Customer,Date,Product_weight,Product_price,Gender,Branch/Locker,Location,InvoiceId
0,RXZ350571,2022-09-01,4.4,24200.0,M,G-Location 1,Yerevan,INV-101
1,ZFZ316415,2022-09-01,5.86,35160.0,M,G-Location 2,Yerevan,INV-102
2,KPR936365,2022-09-01,3.76,30080.0,M,G-Location 2,Yerevan,INV-103
3,PBI351070,2022-09-01,2.74,21920.0,F,G-Location 3,Yerevan,INV-104
4,RFI100548,2022-09-01,5.99,35940.0,M,G-Location 2,Yerevan,INV-105


## Shape of data

In [4]:
prep.shape()

((22000, 8),
 ['Customer',
  'Date',
  'Product_weight',
  'Product_price',
  'Gender',
  'Branch/Locker',
  'Location',
  'InvoiceId'])

## Info of data

In [5]:
prep.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22000 entries, 0 to 21999
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Customer        22000 non-null  object 
 1   Date            22000 non-null  object 
 2   Product_weight  22000 non-null  float64
 3   Product_price   22000 non-null  float64
 4   Gender          22000 non-null  object 
 5   Branch/Locker   22000 non-null  object 
 6   Location        22000 non-null  object 
 7   InvoiceId       22000 non-null  object 
dtypes: float64(2), object(6)
memory usage: 1.3+ MB


## Number of duplicates

In [6]:
prep.num_of_duplicate()

0

## Number of null values

In [7]:
prep.num_of_null()

Customer          0
Date              0
Product_weight    0
Product_price     0
Gender            0
Branch/Locker     0
Location          0
InvoiceId         0
dtype: int64

## Number of unique values in specified column

In [8]:
prep.num_of_unique_in_column('Gender')


[31;20m2023-05-01 23:11:41,385 - preparation.py - ERROR - num_of_unique_in_column (preparation.py:52)[0m
2023-05-01 23:11:41,385 num_of_unique_in_column num_of_unique_in_column


2

## Final Data

In [9]:
prep.final_data()

2023-05-01 23:11:49,039 _init_num_threads NumExpr defaulting to 8 threads.
[31;20m2023-05-01 23:11:49,047 - preparation.py - ERROR - final_data (preparation.py:65)[0m
2023-05-01 23:11:49,047 final_data final_data
2023-05-01 23:11:49,049 final_data final_data


Unnamed: 0,Customer,Date,Product_weight,Product_price,Gender,Branch/Locker,Location,InvoiceId
0,RXZ350571,2022-09-01,4.40,24200.0,M,G-Location 1,Yerevan,INV-101
1,ZFZ316415,2022-09-01,5.86,35160.0,M,G-Location 2,Yerevan,INV-102
2,KPR936365,2022-09-01,3.76,30080.0,M,G-Location 2,Yerevan,INV-103
3,PBI351070,2022-09-01,2.74,21920.0,F,G-Location 3,Yerevan,INV-104
4,RFI100548,2022-09-01,5.99,35940.0,M,G-Location 2,Yerevan,INV-105
...,...,...,...,...,...,...,...,...
21995,QAC630422,2023-04-08,4.09,22495.0,F,G-Location 5,Yerevan,INV-22096
21996,MUN988629,2023-04-08,4.34,26040.0,F,G-Location 5,Yerevan,INV-22097
21997,QRO989642,2023-04-08,5.16,10320.0,M,G-Location 2,Yerevan,INV-22098
21998,NTW716871,2023-04-08,1.39,8340.0,F,G-Location 1,Yerevan,INV-22099


# Insert Facts into database

In [5]:
from zenq.api.endpoints import insert_facts
insert_facts('globbing.csv', 'Customer', 'Gender', 'InvoiceId', 'Date', 'Product_weight', 'Product_weight')

[31;20m2023-05-01 23:21:55,323 - preparation.py - ERROR - final_data (preparation.py:65)[0m
2023-05-01 23:21:55,323 final_data final_data
2023-05-01 23:21:55,325 final_data final_data


Inserting facts for Customer from file csv


[31;20m2023-05-01 23:23:09,849 - endpoints.py - ERROR - insert_facts (endpoints.py:76)[0m
2023-05-01 23:23:09,849 insert_facts insert_facts
2023-05-01 23:23:09,851 insert_facts insert_facts
[38;20m2023-05-01 23:23:09,852 - endpoints.py - INFO - insert_facts (endpoints.py:78)[0m
2023-05-01 23:23:09,852 insert_facts insert_facts


Finished inserting facts


# Define the Model and input data in result schema of database

In [6]:
from zenq.clvmodels.pareto import Model
model = Model()

## Compute key metrics for CLV

In [7]:
cltv = model.cltv_df()
cltv

[38;20m2023-05-01 23:29:11,303 - pareto.py - INFO - cltv_df (pareto.py:61)[0m
2023-05-01 23:29:11,303 cltv_df cltv_df


Unnamed: 0,customer_id,min_date,recency,T,frequency,monetary
0,KVO444312,2022-09-13,163,230,14,62.43
1,VBV804469,2022-09-04,200,239,14,58.16
2,BAE240832,2022-09-13,196,230,3,8.97
3,AOK457989,2022-09-14,189,229,5,25.09
4,XZF813575,2022-09-05,179,238,11,42.98
...,...,...,...,...,...,...
1956,DXT747575,2022-09-12,206,231,7,28.18
1957,VOZ592840,2023-01-14,52,107,2,9.99
1958,ZCQ782298,2022-09-13,207,230,20,79.30
1959,YNW866090,2022-09-02,143,241,11,47.75


## Categorization of customers based on RFM scores

In [8]:
rfm = model.rfm_score()
rfm

[38;20m2023-05-01 23:29:12,696 - pareto.py - INFO - cltv_df (pareto.py:61)[0m
2023-05-01 23:29:12,696 cltv_df cltv_df
[38;20m2023-05-01 23:29:12,865 - pareto.py - INFO - rfm_score (pareto.py:87)[0m
2023-05-01 23:29:12,865 rfm_score rfm_score


Unnamed: 0,customer_id,recency_score,frequency_score,monetary_score,RFM_SCORE,segment
0,KVO444312,4,4,4,44,LOYAL CUSTOMER
1,VBV804469,2,4,4,24,AT RISK
2,BAE240832,2,1,1,21,HIBERNATING
3,AOK457989,3,1,2,31,ABOUT TO SLEEP
4,XZF813575,3,3,3,33,NEED ATTENTION
...,...,...,...,...,...,...
1956,DXT747575,1,2,2,12,HIBERNATING
1957,VOZ592840,5,1,1,51,NEW CUSTOMERS
1958,ZCQ782298,1,5,5,15,CANT LOSE
1959,YNW866090,4,3,3,43,POTENTIAL LOYALIST


## Fit into Pareto model

In [9]:
fit = model.fit_paretonbd()
fit

[38;20m2023-05-01 23:29:14,001 - pareto.py - INFO - cltv_df (pareto.py:61)[0m
2023-05-01 23:29:14,001 cltv_df cltv_df
  tmp = b * np.exp(a - a_max)


<lifetimes.ParetoNBDFitter: fitted with 1960 subjects, alpha: 132.27, beta: 358913.71, r: 7.18, s: 368.55>

## Model parameters

In [10]:
parameters = model.model_params()
parameters

[38;20m2023-05-01 23:29:42,746 - pareto.py - INFO - cltv_df (pareto.py:61)[0m
2023-05-01 23:29:42,746 cltv_df cltv_df
  tmp = b * np.exp(a - a_max)
[38;20m2023-05-01 23:30:13,401 - pareto.py - INFO - model_params (pareto.py:114)[0m
2023-05-01 23:30:13,401 model_params model_params
[31;20m2023-05-01 23:30:13,402 - pareto.py - ERROR - model_params (pareto.py:115)[0m
2023-05-01 23:30:13,402 model_params model_params


Unnamed: 0,r,alpha,s,beta
0,7.176777,132.26635,368.556772,358917.400665


## Predictions for 30,90,180,360 days

In [11]:
pareto = model.predict_paretonbd()
pareto

[38;20m2023-05-01 23:30:13,594 - pareto.py - INFO - cltv_df (pareto.py:61)[0m
2023-05-01 23:30:13,594 cltv_df cltv_df
  tmp = b * np.exp(a - a_max)
[38;20m2023-05-01 23:30:35,232 - pareto.py - INFO - cltv_df (pareto.py:61)[0m
2023-05-01 23:30:35,232 cltv_df cltv_df
[38;20m2023-05-01 23:30:35,725 - pareto.py - INFO - predict_paretonbd (pareto.py:134)[0m
2023-05-01 23:30:35,725 predict_paretonbd predict_paretonbd


Unnamed: 0,Customer,Expected_Purchases_30,Expected_Purchases_90,Expected_Purchases_180,Expected_Purchases_360
0,KVO444312,0.790157,2.299339,4.395875,8.050676
1,VBV804469,1.449239,4.217258,8.062561,14.765936
2,BAE240832,0.782352,2.276628,4.352456,7.971158
3,AOK457989,0.911652,2.652887,5.071788,9.288553
4,XZF813575,1.012189,2.945450,5.631115,10.312934
...,...,...,...,...,...
1956,DXT747575,1.103535,3.211263,6.139293,11.243602
1957,VOZ592840,0.918896,2.673939,5.111958,9.361853
1958,ZCQ782298,2.079313,6.050756,11.567832,21.185514
1959,YNW866090,0.261000,0.759506,1.452025,2.659268


## Customer aliveness

In [None]:
alive = model.customer_is_alive()
alive


[38;20m2023-05-01 23:30:35,910 - pareto.py - INFO - cltv_df (pareto.py:61)[0m
2023-05-01 23:30:35,910 cltv_df cltv_df
  tmp = b * np.exp(a - a_max)


In [None]:
import pandas as pd

# Read the CSV file into a pandas dataframe
df = pd.read_csv('globbing.csv')

# Group the data by customer and count the number of invoices
grouped = df.groupby('Customer').agg({'InvoiceId': 'count'})

# Select only the customers with one invoice
result = grouped[grouped['InvoiceId'] == 1]

# Print the result
print(result)
