In [30]:
import pandas as pd
from sqlalchemy import create_engine
import matplotlib.pyplot as plt

In [2]:
db_payments = pd.read_pickle('RFM_CHURNED_PAYMENTS.pkl')

In [47]:
db_payments

Unnamed: 0,uuid,fee,month
0,2b5812ac-8c23-4d05-96e9-578a5c2d7a4c,100.0,2022-06-01
1,19cac410-7755-4ea1-b21d-5a241c62521e,100.0,2022-06-01
2,1dcdfdab-0e66-408a-b09e-bd2b9d0f7b66,100.0,2022-06-01
3,3054aa44-2282-4240-8b39-fd7f96d75334,100.0,2022-06-01
4,3a044bd5-3360-49a9-a1bc-5d6d6a9db81e,100.0,2022-06-01
...,...,...,...
519,65acb95d-37a8-4c67-b9c7-c9da16fbd516,300.0,2023-01-01
520,b60f78cb-45bf-46e4-a7f3-f9d9759484e2,300.0,2023-01-01
521,790c35e5-b3e8-4d29-b3ee-a4f85532e26c,300.0,2023-01-01
522,386df59f-107b-42c1-85af-c2df768b2748,300.0,2023-01-01


# MULTIPLE USERS PAID

In [4]:
# Check users with multiple payments
user_month = db_payments.groupby('uuid')['month'].count()
user_month = user_month.to_frame('num_payments')
# Multipayments
multi_payment = user_month[user_month.num_payments > 1]
multi_payment = multi_payment.reset_index()
# Get user data
multi_payment_users = pd.merge(db_payments, multi_payment,on = 'uuid')
multi_payment_users = multi_payment_users.groupby('uuid')['month'].agg(list)
multi_payment_users = multi_payment_users.reset_index()
multi_payment_users.month.value_counts()

[2022-06-01 00:00:00, 2023-01-01 00:00:00]    7
[2022-06-01 00:00:00, 2022-11-01 00:00:00]    3
Name: month, dtype: int64

# USER RETENTION

## Query Retention

In [10]:
retention_query = '''
select
    date_trunc('month', t.timestamp_mx_created_at) AS transaction_month,
    count(transaction_id) as transactions,
    t.user_id as uuid
from analytics_bi.transactions t
where t.type in ('PURCHASE', 'DEPOSIT', 'TRANSFER', 'QUASICASH', 'FEE', 'DISBURSEMENT')
and user_id in {}
group by uuid, transaction_month
'''

In [8]:
# BD Connection
f = open('/Users/gabrielreynoso/Documents/Queries/db_klarprod_connection.txt', 'r')
postgres_str = f.read()
f.close()
cnx = create_engine(postgres_str)

In [11]:
retention_transactions = pd.read_sql_query(retention_query.format(tuple(db_payments.uuid.to_list())),cnx)

In [12]:
retention_transactions

Unnamed: 0,transaction_month,transactions,uuid
0,2022-05-01,7,ecfa01ce-8a88-406e-b1e4-0b8dc91fb27b
1,2022-11-01,7,ecfa01ce-8a88-406e-b1e4-0b8dc91fb27b
2,2022-07-01,1,ed33bf62-1e1e-4e1b-b5c4-efb22e4b3aa8
3,2022-10-01,4,ed372166-2d67-4a6c-8b71-6c146f00e842
4,2022-02-01,4,ed3c7853-53a1-4611-822c-c794f08386ee
...,...,...,...
62468,2022-03-01,1,ebeffc31-45e2-4af8-bd50-eed272ed6ed8
62469,2021-12-01,1,ec134fbb-1d87-4344-8f93-e795606a6aa3
62470,2022-02-01,2,ec1beaab-744b-4382-8a4a-355b0c2117d6
62471,2022-12-01,3,ec451b78-ba06-4997-8879-fca647df9547


# Retention Analysis

In [13]:
retention_db = pd.merge(retention_transactions,db_payments, on='uuid', how='left')

In [16]:
retention_db.head(30)

Unnamed: 0,transaction_month,transactions,uuid,fee,month
0,2022-05-01,7,ecfa01ce-8a88-406e-b1e4-0b8dc91fb27b,200.0,2022-11-01
1,2022-11-01,7,ecfa01ce-8a88-406e-b1e4-0b8dc91fb27b,200.0,2022-11-01
2,2022-07-01,1,ed33bf62-1e1e-4e1b-b5c4-efb22e4b3aa8,200.0,2022-06-01
3,2022-10-01,4,ed372166-2d67-4a6c-8b71-6c146f00e842,200.0,2022-07-01
4,2022-02-01,4,ed3c7853-53a1-4611-822c-c794f08386ee,200.0,2022-06-01
5,2022-07-01,6,ed3c7853-53a1-4611-822c-c794f08386ee,200.0,2022-06-01
6,2022-08-01,1,ed3c7853-53a1-4611-822c-c794f08386ee,200.0,2022-06-01
7,2022-09-01,29,ed3c7ce1-fa22-4e0a-a556-f0b5aac17f05,200.0,2022-07-01
8,2022-11-01,7,ed3c7ce1-fa22-4e0a-a556-f0b5aac17f05,200.0,2022-07-01
9,2022-12-01,1,ed3c7ce1-fa22-4e0a-a556-f0b5aac17f05,200.0,2022-07-01


In [42]:
retention_metric = retention_db[retention_db.transaction_month > retention_db.month].groupby(['month','transaction_month'])['uuid'].count()

In [43]:
retention_metric = retention_metric.reset_index()

In [44]:
retention_metric

Unnamed: 0,month,transaction_month,uuid
0,2022-06-01,2022-07-01,1416
1,2022-06-01,2022-08-01,915
2,2022-06-01,2022-09-01,753
3,2022-06-01,2022-10-01,694
4,2022-06-01,2022-11-01,587
5,2022-06-01,2022-12-01,557
6,2022-06-01,2023-01-01,533
7,2022-06-01,2023-02-01,440
8,2022-07-01,2022-08-01,1527
9,2022-07-01,2022-09-01,1111


In [45]:
# pivot the data into the correct shape
ret_plot = retention_metric.pivot(index='month', columns='transaction_month', values='uuid')
ret_plot.to_clipboard()

In [46]:
ret_plot

transaction_month,2022-07-01,2022-08-01,2022-09-01,2022-10-01,2022-11-01,2022-12-01,2023-01-01,2023-02-01
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-06-01,1416.0,915.0,753.0,694.0,587.0,557.0,533.0,440.0
2022-07-01,,1527.0,1111.0,865.0,732.0,658.0,631.0,510.0
2022-08-01,,,1342.0,1051.0,734.0,634.0,581.0,509.0
2022-10-01,,,,,1222.0,840.0,711.0,570.0
2022-11-01,,,,,,1117.0,918.0,619.0
2023-01-01,,,,,,,,485.0
