In [1]:
import pandas as pd
from sqlalchemy import create_engine
import sqlalchemy
from datetime import datetime
from dateutil.relativedelta import relativedelta

In [2]:
month = (datetime.today() + relativedelta(months=-1)).strftime('%Y-%m')

## Year - Month of Calculation

In [3]:
month

'2023-02'

# Start date and end date end for query

In [4]:
reference_day = datetime.today().replace(day=1)
end_date = (reference_day).strftime('%Y-%m-%d')
start_date = (reference_day - relativedelta(months = 1)).strftime('%Y-%m-%d')
print(start_date)
print(end_date)

2023-02-01
2023-03-01


# DB Connection

In [5]:
# Redshift connection
f = open('/Users/gabrielreynoso/Documents/Queries/db_klarprod_connection.txt', 'r')
postgres_str = f.read()
f.close()
cnx = create_engine(postgres_str)

# Cashback

In [6]:
query_transactions = '''
SELECT t.*
FROM analytics_bi.transactions t
where t.state = 'SETTLED'
AND t.timestamp_mx >= '{start_date}' + '00:00:00'
AND t.timestamp_mx < '{end_date}' + '00:00:00'
and t.source_account_internal_id <> '0000000000000000' AND t.source_account_internal_id <> '00000000-0000-0000-0000-000000000000'
AND t.amount <> 0
and ((t.provider_id = 'GALILEO' AND t.transaction_id NOT LIKE 'TGT%' AND t.transaction_id NOT LIKE 'SRC%') or (t.provider_id = 'ARCUS' and t.type = 'PURCHASE'))
order by t.user_id ASC;
'''

In [7]:
transactions_raw = pd.read_sql_query(sqlalchemy.text(query_transactions.format(start_date = start_date, end_date = end_date)),cnx)

## Providers

In [8]:
transactions_raw[transactions_raw.type.isin(['PURCHASE','ADJUSTMENT'])]['provider_id'].value_counts()

GALILEO    1220398
ARCUS        84944
Name: provider_id, dtype: int64

## Balance Categories

In [9]:
transactions_raw[transactions_raw.type.isin(['PURCHASE'])]['balance_category'].value_counts()

CHECKING                    819233
CREDIT_CARD                 377302
UNKNOWN_BALANCE_CATEGORY     85255
Name: balance_category, dtype: int64

## Delete CCK Purchase

In [10]:
wo_cck = transactions_raw[transactions_raw.balance_category!='CREDIT_CARD']
# Group by user_id the purchase and adjustments transactions counting items and sumint amounts
cashback_calculation = wo_cck[wo_cck.type.isin(['PURCHASE','ADJUSTMENT'])].groupby(['user_id','type', 'provider_id']).agg({'amount':'sum','transaction_id':'count'})
# Unstack in type
cashback_calculation = cashback_calculation.unstack(level=1)

In [11]:
# Unstack in provider_id
cashback_calculation = cashback_calculation.unstack(level=1).fillna(0)
# Flatten de column names
cashback_calculation.columns = ['_'.join(col) for col in cashback_calculation.columns.values]
cashback_calculation = cashback_calculation.reset_index()
# Rename columns
cashback_calculation.columns = ['user_id','ArcusAdjustAmount', 'GalileoAdjustAmount','ArcusPurchaseAmount', 'GalileoPurchaseAmount','ArcusAdjustNum', 'GalileoAdjustNum','ArcusPurchaseNum', 'GalileoPurchaseNum']
# Raw Export
cashback_calculation.to_csv('./Monthly_DB/Raw_Cashback' + month + '.csv', index=False)

In [12]:
cashback_calculation.shape

(141213, 9)

## ATM and Balance Check

In [13]:
query_atm = '''
SELECT t.*
FROM analytics_bi.transactions t
where t.state = 'SETTLED'
AND t.timestamp_mx >= '{start_date}' + '00:00:00'
AND t.timestamp_mx < '{end_date}' + '00:00:00'
and t.source_account_internal_id <> '0000000000000000' AND t.source_account_internal_id <> '00000000-0000-0000-0000-000000000000'
AND t.amount <> 0
and t.type in ('FEE', 'DISBURSEMENT')
order by t.user_id ASC;
'''

In [14]:
atm_raw = pd.read_sql_query(sqlalchemy.text(query_atm.format(start_date = start_date, end_date = end_date)),cnx)

In [15]:
# Group by user_id the purchase and adjustments transactions counting items and sumint amounts
atm_rewards = atm_raw[atm_raw.type.isin(['FEE', 'DISBURSEMENT'])].groupby(['user_id','type']).agg({'amount':'sum','transaction_id':'count'})

In [16]:
atm_rewards = atm_rewards.unstack(level=1).fillna(0)

In [17]:
# Flatten de column names
atm_rewards.columns = ['_'.join(col) for col in atm_rewards.columns.values]
atm_rewards = atm_rewards.reset_index()

In [18]:
atm_rewards.head(15)

Unnamed: 0,user_id,amount_DISBURSEMENT,amount_FEE,transaction_id_DISBURSEMENT,transaction_id_FEE
0,00024ed7-05c1-4421-ae51-0a4d1cd4ecd0,-2835.84,0.0,1.0,0.0
1,000334b8-aecf-45e0-aba5-c204c02e5fef,-2169.6,0.0,2.0,0.0
2,0005b798-ce29-4b58-b1b5-8ae8b822ec6d,0.0,-23.2,0.0,2.0
3,0008d053-6036-494b-96da-6c3e0a6121a6,-230.74,0.0,1.0,0.0
4,000de233-96dc-4c8f-824c-1fa3cf53d517,-4542.54,0.0,13.0,0.0
5,000f54fa-54dd-4c26-8e2b-9d943d869e78,-9022.96,0.0,4.0,0.0
6,00114f49-f903-4028-a100-a6c2abccb35d,0.0,-13.92,0.0,1.0
7,0013ef2b-74b0-4651-8d7a-1ccc96776dd3,-1185.84,0.0,1.0,0.0
8,00151988-05c1-4005-939c-4a152595e0b2,-230.74,0.0,1.0,0.0
9,0017bc15-659b-4994-9a33-d6a94ee2093d,-2133.98,0.0,4.0,0.0


In [35]:
atm_rewards.shape

(31515, 5)

In [19]:
# Raw Export
atm_rewards.to_csv('./Monthly_DB/Raw_ATM' + month + '.csv', index=False)