In [1]:
import pandas as pd
from sqlalchemy import create_engine
import sqlalchemy

In [2]:
month = 'December'

## Galileo and Arcus Query

In [3]:
# Redshift connection
f = open('/Users/gabrielreynoso/Documents/Queries/db_klarprod_connection.txt', 'r')
postgres_str = f.read()
f.close()
cnx = create_engine(postgres_str)

In [4]:
query_transactions = '''
SELECT t.*
FROM analytics_bi.transactions t
where t.state = 'SETTLED'
AND t.timestamp_mx >= '2022-12-01 00:00:00'
AND t.timestamp_mx < '2023-01-01 00:00:00'
and t.source_account_internal_id <> '0000000000000000' AND t.source_account_internal_id <> '00000000-0000-0000-0000-000000000000'
AND t.amount <> 0
and ((t.provider_id = 'GALILEO' AND t.transaction_id NOT LIKE 'TGT%' AND t.transaction_id NOT LIKE 'SRC%') or (t.provider_id = 'ARCUS' and t.type = 'PURCHASE'))
order by t.user_id ASC;
'''

In [5]:
query_raw = pd.read_sql_query(sqlalchemy.text(query_transactions),cnx)

## Providers

In [6]:
query_raw[query_raw.type.isin(['PURCHASE','ADJUSTMENT'])]['provider_id'].value_counts()

GALILEO    1214041
ARCUS        94116
Name: provider_id, dtype: int64

## Balance Categories

In [7]:
query_raw[query_raw.type.isin(['PURCHASE','ADJUSTMENT'])]['balance_category'].value_counts()

CHECKING                    1019634
CREDIT_CARD                  171553
UNKNOWN_BALANCE_CATEGORY     116970
Name: balance_category, dtype: int64

# Cashback Calc

### W/ CCK Purchases

In [None]:
# Group by user_id the purchase and adjustments transactions counting items and sumint amounts
cashback_calculation = query_raw[query_raw.type.isin(['PURCHASE','ADJUSTMENT'])].groupby(['user_id','type', 'provider_id']).agg({'amount':'sum','transaction_id':'count'})
# Unstack in type
cashback_calculation = cashback_calculation.unstack(level=1)

In [None]:
cashback_calculation.head(10)

In [None]:
# Unstack in provider_id
cashback_calculation = cashback_calculation.unstack(level=1).fillna(0)

In [None]:
cashback_calculation.head(20)

In [None]:
cashback_calculation.columns

In [None]:
# Flatten de column names
cashback_calculation.columns = ['_'.join(col) for col in cashback_calculation.columns.values]

In [None]:
cashback_calculation = cashback_calculation.reset_index()

In [None]:
cashback_calculation.columns

In [None]:
# Rename columns
cashback_calculation.columns = ['user_id','ArcusAdjustAmount', 'GalileoAdjustAmount','ArcusPurchaseAmount', 'GalileoPurchaseAmount','ArcusAdjustNum', 'GalileoAdjustNum','ArcusPurchaseNum', 'GalileoPurchaseNum']

In [None]:
# Raw Export
cashback_calculation.to_csv('./Cashback_Payments/Raw_' + month + '.csv', index=False)

In [None]:
cashback_calculation.shape

### W/O CCK Purchases

In [8]:
wo_cck = query_raw[query_raw.balance_category!='CREDIT_CARD']
# Group by user_id the purchase and adjustments transactions counting items and sumint amounts
cashback_calculation = wo_cck[wo_cck.type.isin(['PURCHASE','ADJUSTMENT'])].groupby(['user_id','type', 'provider_id']).agg({'amount':'sum','transaction_id':'count'})
# Unstack in type
cashback_calculation = cashback_calculation.unstack(level=1)

In [9]:
cashback_calculation.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,amount,amount,transaction_id,transaction_id
Unnamed: 0_level_1,type,ADJUSTMENT,PURCHASE,ADJUSTMENT,PURCHASE
user_id,provider_id,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
000048a5-6eb8-464a-ba04-9860337841d2,ARCUS,,-30.0,,1.0
00007e60-e21e-4616-81b4-4afae84713ee,GALILEO,,-281.0,,3.0
000096ca-40b4-4de0-8b71-6f16939af17f,GALILEO,,-527.78,,5.0
0000a054-8132-416b-8cf1-d4c6443bf8ce,GALILEO,,-89.5,,1.0
00017516-8f98-4f85-b58e-09b4fb9c2c3e,ARCUS,,-250.0,,1.0
0002325b-5c34-4e23-89d4-e289b52b1ab7,GALILEO,,-750.84,,4.0
00024ed7-05c1-4421-ae51-0a4d1cd4ecd0,GALILEO,,-1860.45,,1.0
0002c06d-33ce-492c-8a63-3b7b2ef43086,GALILEO,,-39.08,,1.0
0002f190-5527-4220-9b92-955bfc498869,ARCUS,,-50.0,,1.0
0003455b-033d-48ee-80d2-77c05664c154,GALILEO,,-697.45,,3.0


In [10]:
# Unstack in provider_id
cashback_calculation = cashback_calculation.unstack(level=1).fillna(0)

In [11]:
cashback_calculation.columns

MultiIndex([(        'amount', 'ADJUSTMENT',   'ARCUS'),
            (        'amount', 'ADJUSTMENT', 'GALILEO'),
            (        'amount',   'PURCHASE',   'ARCUS'),
            (        'amount',   'PURCHASE', 'GALILEO'),
            ('transaction_id', 'ADJUSTMENT',   'ARCUS'),
            ('transaction_id', 'ADJUSTMENT', 'GALILEO'),
            ('transaction_id',   'PURCHASE',   'ARCUS'),
            ('transaction_id',   'PURCHASE', 'GALILEO')],
           names=[None, 'type', 'provider_id'])

In [12]:
# Flatten de column names
cashback_calculation.columns = ['_'.join(col) for col in cashback_calculation.columns.values]
cashback_calculation = cashback_calculation.reset_index()

In [13]:
# Rename columns
cashback_calculation.columns = ['user_id','ArcusAdjustAmount', 'GalileoAdjustAmount','ArcusPurchaseAmount', 'GalileoPurchaseAmount','ArcusAdjustNum', 'GalileoAdjustNum','ArcusPurchaseNum', 'GalileoPurchaseNum']

In [14]:
# Raw Export
cashback_calculation.to_csv('./Cashback_Payments/Raw_WO_CCK_' + month + '.csv', index=False)

In [15]:
cashback_calculation.shape

(150170, 9)

## Excel Input

In [None]:
# Read both csv for cashback
cashback_sql1 = pd.read_csv('/Users/gabrielreynoso/PycharmProjects/Klar/Rewards/Cashback/Data/' + month + '_SQL1.csv')
cashback_sql2 = pd.read_csv('/Users/gabrielreynoso/PycharmProjects/Klar/Rewards/Cashback/Data/' + month + '_SQL2.csv')
# Generate the raw cashback file
cashback_raw = pd.concat([cashback_sql1,cashback_sql2])

In [None]:
cashback_raw.shape

In [None]:
cashback_raw[cashback_raw.type.isin(['PURCHASE','ADJUSTMENT'])].shape

## Galileo Query

In [None]:
query_galileo = '''
SELECT t.*
FROM analytics_bi.transactions t
where t.state = 'SETTLED' and t.provider_id = 'GALILEO'
AND t.timestamp_mx >= '2022-10-01 00:00:00'
AND t.timestamp_mx < '2022-11-01 00:00:00'
AND t.transaction_id NOT LIKE 'TGT%'
AND t.transaction_id NOT LIKE 'SRC%'
and t.source_account_internal_id <> '0000000000000000' AND t.source_account_internal_id <> '00000000-0000-0000-0000-000000000000'
AND t.amount <> 0
order by t.user_id ASC;
'''

In [None]:
query_raw_galileo = pd.read_sql_query(sqlalchemy.text(query_galileo),cnx)

In [None]:
query_raw_galileo.shape

In [None]:
query_raw_galileo[query_raw_galileo.type.isin(['PURCHASE','ADJUSTMENT'])].shape