In [16]:
import pandas as pd
from sqlalchemy import create_engine
import sqlalchemy
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import gspread_dataframe as gd

In [3]:
# Redshift connection
f = open('/Users/gabrielreynoso/Documents/Queries/db_klarprod_connection.txt', 'r')
postgres_str = f.read()
f.close()
cnx = create_engine(postgres_str)

### Output Directory

In [5]:
output_directory = '../Cashback_Payments/December_WO_CCK.csv'

## Cohort Info

In [41]:
cohort_query = '''
select
    distinct klar_user_id as user_id,
    case
        when segment_name like '%- 10p cohort' then '10p'
        when segment_name like '%3k%' then '3k'
        when segment_name like '%control%' then '10p'
        when segment_name like '%2k cohort%' then '2k'
        when segment_name like '%2k or 10p%' then '2k_or_10p'
    end as cashback_type,
    case
        when segment_name like '%control%' then 0
            else 1 end as needed_opt_in
from is_customer_io.segments as s
where segment_id in (1521,1522,1523,1524,1525)
and user_id is not null;
'''

In [42]:
cohorts_info = pd.read_sql_query(sqlalchemy.text(cohort_query),cnx)

In [43]:
cohorts_info.head(5)

Unnamed: 0,user_id,cashback_type,needed_opt_in
0,00001c85-e714-46f5-bb81-9164d49362bf,10p,0
1,0000c7fc-474c-45f5-aef5-58554e6c713b,10p,0
2,000173c7-995e-43f7-98da-306a2469712c,2k_or_10p,1
3,0002f190-5527-4220-9b92-955bfc498869,10p,1
4,0003e36f-092e-47f7-8842-b915afd24168,2k,1


In [36]:
# Duplicated check
cohorts_info[cohorts_info.user_id.duplicated()]

Unnamed: 0,user_id,cashback_type,needed_opt_in


In [44]:
# Duplicated for nulls in cashback_type
cohorts_info[cohorts_info.cashback_type.isna()]['user_id']

Series([], Name: user_id, dtype: object)

In [45]:
# Remap op-int column
cohorts_info.needed_opt_in = cohorts_info.needed_opt_in.map({1:True, 0:False})

In [46]:
# Cohorts segments
cohorts_info.cashback_type.value_counts()

10p          177543
2k_or_10p    152721
2k           127327
3k            50659
Name: cashback_type, dtype: int64

## Markdown Info

In [18]:
# Read and Load Credentials
credentials = ServiceAccountCredentials.from_json_keyfile_name('/Users/gabrielreynoso/Documents/GoogleCredentials/gabo_credentials.json')
gc = gspread.authorize(credentials)

# Open the whole Google Sheet
gsheet = gc.open("Benefits Registration December")

In [20]:
# Read a worksheet into a Dataframe
opt_in_info = pd.DataFrame(gsheet.worksheet("Form Responses").get_all_records())

In [21]:
opt_in_info.head(7)

Unnamed: 0,Submission Date,email,source,Nombre,Apellido,IP,Submission ID
0,2022-12-06 19:37:45,shaniaroman95@gmail.com,cf6371f0-03e0-4535-a412-9377f9e620a3,ddd,scho,201.168.3.242,5461826652421731232
1,2022-12-06 20:02:58,miguelvr264@gmail.com,230be166-6252-4bc4-a56f-a9c771d043c6,Miguel,Villanueva,201.162.232.240,5461841780425898227
2,2022-12-06 20:03:24,eveguzmanpl@gmail.com,21df3d63-8a23-4e25-8e30-e00422aae795,Evelyn Yamila,Guzmán Pérez,201.123.158.64,5461842044689695386
3,2022-12-06 20:04:53,caritthoa@gmail.com,1b49718b-594c-419f-b2c6-c91ccbdf847d,Norma Carolina,Aguirre Estevez,189.147.99.24,5461842934291697889
4,2022-12-06 20:05:38,serchnayarita@gmail.com,79df32d4-40ed-44c6-bf25-cc39069fa414,Sergio,Partida,200.68.167.102,5461843372014690058
5,2022-12-06 20:07:02,fredyalmaraz0510@gmail.com,9ba214f1-76df-40d2-b09a-1389b363c2c9,Fredy,Almaraz,200.63.41.174,5461844224713350263
6,2022-12-06 20:07:36,fredyalmaraz0510@gmail.com,9ba214f1-76df-40d2-b09a-1389b363c2c9,Fredy,Almaraz,200.63.41.174,5461844564714276989


In [22]:
# EDA optIn
opt_in_info = opt_in_info[['source']]
opt_in_info.columns = ['user_id']
opt_in_info = pd.DataFrame(opt_in_info.user_id.drop_duplicates(), columns = ['user_id'])
opt_in_info['optIn'] = True

## Cashback Calculation

In [53]:
cashback_december = pd.read_csv('../Cashback_Payments/Raw_WO_CCK_December.csv')

In [54]:
# Total purchases
cashback_december['total_purchases'] = cashback_december.ArcusPurchaseNum + cashback_december.GalileoPurchaseNum
# Total amount Purchases
cashback_december['amount_purch'] = cashback_december.GalileoPurchaseAmount + cashback_december.ArcusPurchaseAmount
# Calculate the right amount of spend for each user
cashback_december['cashback_amount'] = -1 * cashback_december.amount_purch - cashback_december.GalileoAdjustAmount - - cashback_december.ArcusAdjustAmount
# Calculate the cashback
cashback_december['cashback'] = cashback_december.cashback_amount * 0.01
# Top the cashback for max 1000
cashback_december['cashback'] = cashback_december['cashback'].apply(lambda x: 1000 if x > 1000 else round(x, 1))

In [55]:
dec_cashback = cashback_december[['user_id', 'amount_purch', 'total_purchases', 'cashback_amount', 'cashback']]

In [56]:
# Add the segment info
dec_cashback = pd.merge(dec_cashback, cohorts_info, on='user_id', how='left')
# Add optIn responses
dec_cashback = pd.merge(dec_cashback, opt_in_info, on='user_id', how='left')
# Filter the less than 1
dec_cashback = dec_cashback[dec_cashback['cashback'] >= 1]

In [57]:
# Fill users not in experiment
dec_cashback.cashback_type = dec_cashback.cashback_type.fillna('10p')
# Fill users with no required OptIn
dec_cashback.needed_opt_in = dec_cashback.needed_opt_in.fillna(False)
# Fill users with no OptIn
dec_cashback.optIn = dec_cashback.optIn.fillna(False)

In [58]:
dec_cashback.head(7)

Unnamed: 0,user_id,amount_purch,total_purchases,cashback_amount,cashback,cashback_type,needed_opt_in,optIn
1,00007e60-e21e-4616-81b4-4afae84713ee,-281.0,3.0,281.0,2.8,10p,True,False
2,000096ca-40b4-4de0-8b71-6f16939af17f,-527.78,5.0,527.78,5.3,10p,False,False
4,00017516-8f98-4f85-b58e-09b4fb9c2c3e,-250.0,1.0,250.0,2.5,10p,False,False
5,0002325b-5c34-4e23-89d4-e289b52b1ab7,-750.84,4.0,750.84,7.5,10p,True,False
6,00024ed7-05c1-4421-ae51-0a4d1cd4ecd0,-1860.45,1.0,1860.45,18.6,2k_or_10p,True,False
9,0003455b-033d-48ee-80d2-77c05664c154,-697.45,3.0,697.45,7.0,2k_or_10p,True,False
10,00038557-6c50-4a9f-9cd8-c49a28987754,-2586.5,10.0,2586.5,25.9,2k_or_10p,True,False


## Payment Filter

In [59]:
# Filter for payment
dec_cashback['Payment_filter'] = dec_cashback.apply(lambda x:
                                                    (
                                                        (
                                                            (True if x.cashback_amount >= 3000 else False)
                                                            # Third Level Filter
                                                            if x.cashback_type == '3k'
                                                            else(
                                                                (True if x.cashback_amount >= 2000 else False)
                                                                # Fourth Level Filter
                                                                if x.cashback_type == '2k'
                                                                else
                                                                    (
                                                                        (True if x.cashback_amount >= 2000 or x.total_purchases > 9 else False)
                                                                        # Fifth Level Filter
                                                                        if x.cashback_type == '2k_or_10p'
                                                                        else (True if x.total_purchases > 9 else False)
                                                                    )
                                                                )
                                                        )
                                                        # Second Level Filter
                                                        if x.optIn else False
                                                    )
                                                    # First Level Filter
                                                    if x.needed_opt_in
                                                    else(
                                                            (True if x.total_purchases > 9 else False)
                                                        # Second Level Filter
                                                        if x.cashback_type == '10p'
                                                        else
                                                            False
                                                        )
                                                    , axis=1)

In [61]:
dec_cashback[dec_cashback.optIn].head(25)

Unnamed: 0,user_id,amount_purch,total_purchases,cashback_amount,cashback,cashback_type,needed_opt_in,optIn,Payment_filter
16,00057154-a204-4f63-9a27-f70bb9885dc8,-4248.33,7.0,4248.33,42.5,2k,True,True,True
38,00103496-0e0e-4e4d-a970-1366bb3485b5,-1271.86,6.0,1271.86,12.7,2k,True,True,False
40,00114f49-f903-4028-a100-a6c2abccb35d,-459.8,3.0,459.8,4.6,2k_or_10p,True,True,False
41,00126573-2ace-4c36-b24b-a8dba3b9e9b7,-9557.91,21.0,9557.91,95.6,2k,True,True,True
45,0014400b-0570-4d22-9c59-1e9c11ec94a4,-2477.08,16.0,2456.31,24.6,2k,True,True,True
54,0018aa72-d2bb-4069-91c5-2a0271165367,-913.7,12.0,913.7,9.1,2k_or_10p,True,True,True
72,001f4e2e-774e-481c-bbd7-2898fd0f69fa,-8723.1,39.0,8723.1,87.2,10p,True,True,False
75,00203315-5ecf-462d-a616-0ee0c27ff362,-2331.12,9.0,2331.12,23.3,10p,True,True,False
78,0021bdea-c63e-44a2-94ad-e8301938f83f,-15772.55,49.0,15772.55,157.7,3k,True,True,True
80,00227fd4-62ea-424f-acdf-6a6d8bc696ee,-17728.26,71.0,17728.26,177.3,2k_or_10p,True,True,True


## Cashback to pay

In [62]:
dec_cashback[dec_cashback.Payment_filter]['cashback'].sum()

765203.1

In [63]:
dec_cashback.to_csv(output_directory, index = False)