In [1]:
import pandas as pd
from sqlalchemy import create_engine
import sqlalchemy
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from gspread import SpreadsheetNotFound
import gspread_dataframe as gd

In [2]:
# Redshift connection
f = open('/Users/gabrielreynoso/Documents/Queries/db_klarprod_connection.txt', 'r')
postgres_str = f.read()
f.close()
cnx = create_engine(postgres_str)

In [3]:
# Read and Load Credentials
credentials = ServiceAccountCredentials.from_json_keyfile_name('/Users/gabrielreynoso/Documents/GoogleCredentials/gabo_credentials.json')
gc = gspread.authorize(credentials)

### Output Directory


In [4]:
output_directory = '../Monthly_Payments/February_2023.csv'

In [6]:
sheet_name = 'KlarPlus_February_Payments'
try:
    payments_ws = gc.open(sheet_name)
    # Code if spreadsheet exists:
    print("Spreadsheet exists")
except SpreadsheetNotFound:
    # Code if spreadsheet doesn't exist:
    payments_ws = gc.create(title=sheet_name)
    payments_ws.share('gabriel.reynoso@klar.mx', perm_type='user', role='writer')
    payments_ws.share('ivette.ayala@klar.mx', perm_type='user', role='writer')
    payments_ws.share('jose@klar.mx', perm_type='user', role='writer')
    payments_ws.add_worksheet(title="Cashback", rows=1000,cols=10)
    payments_ws.add_worksheet(title="Balance Check", rows=1000,cols=10)
    payments_ws.add_worksheet(title="ATM Withdraws", rows=1000,cols=10)
    payments_ws.del_worksheet('Sheet 1')
    print("Spreadsheet created")

Spreadsheet exists


## Cohort Info


In [7]:
cohort_query = '''
select
    distinct klar_user_id as user_id,
    'Klar+'as cashback_type
from is_customer_io.segments as s
where segment_id = 1587
and user_id is not null;
'''

In [7]:
# Fix for february
cohort_query = '''
with delinquent_users as (select distinct user_id,
                                          'Delinquent_users' as status
                          from loans.crediklar_loanbook
                          where (loan_status = 'DELINQUENT' and days_past_due < -9)
                             or loan_status = 'DEFAULTED'
                          UNION ALL
--klar+ user delinquents adelanto
                          select distinct user_id,
                                          'Delinquent_users' as status
                          from loans.salary_advance_loanbook lb
                          where (loan_status = 'DELINQUENT' and days_past_due < -9)
                             or loan_status = 'DEFAULTED'
                          UNION ALL
-- cck
                          select distinct user_id,
                                          'Delinquent_users' as status
                          from loans.cck_loanbook
                          where (loan_status = 'DELINQUENT' and days_past_due < -9)
                             or loan_status = 'DEFAULTED'
                          UNION ALL
-- respaldo
                          select distinct user_id,
                                          'Delinquent_users' as status
                          from ds_overdraft.overdraft_loanbook
                          where loan_status in ('DELINQUENT', 'DEFAULTED')),
transaction_users as (
    select
        distinct t.user_id
    from analytics_bi.transactions t
    left join delinquent_users d on t.user_id = d.user_id
    where t.timestamp_mx_created_at between dateadd(days,-360,'2023-02-02') and '2023-02-02'
    and t.type in ('PURCHASE', 'TRANSFER', 'DEPOSIT', 'QUASI_CASH')
    )
select
    distinct user_id,
    'Klar+'as cashback_type
from transaction_users
'''

In [8]:
cohorts_info = pd.read_sql_query(sqlalchemy.text(cohort_query),cnx)

In [9]:
# Duplicated check
cohorts_info[cohorts_info.user_id.duplicated()]

Unnamed: 0,user_id,cashback_type


In [10]:
# Add needed opt-in column
cohorts_info['need_opt_in'] = True

In [11]:
cohorts_info.head(10)

Unnamed: 0,user_id,cashback_type,need_opt_in
0,72ca70fc-db9e-476e-a237-97ba62415f8b,Klar+,True
1,72cb354e-dd3e-4123-9b03-26a7efc22278,Klar+,True
2,72cd3394-c0cb-4899-b24b-5c6449fd846f,Klar+,True
3,72ced729-bca9-4147-abae-75c17779193e,Klar+,True
4,72cf1d58-e915-4a60-9d77-e0ec385abc53,Klar+,True
5,72d0aca8-35e2-4f9a-a485-d7ff8226bfe6,Klar+,True
6,72d0d314-0c1f-4d60-8db4-a605a076cc6a,Klar+,True
7,72d0e839-ca1d-435d-b515-3eb6bf4faca0,Klar+,True
8,72d1c84f-a537-4730-a7a4-c3777b0cf12f,Klar+,True
9,72d50278-d028-488e-9030-199111c15f49,Klar+,True


## Opt-In

In [15]:
# Open the whole Google Sheet
gsheet = gc.open("Klar Plus February Benefits registration")

In [16]:
""# Read a worksheet into a Dataframe
opt_in_info = pd.DataFrame(gsheet.worksheet("Form Responses").get_all_records())
limit_date = '2023-03-01'
# EDA optIn
opt_in_info = opt_in_info[opt_in_info['Submission Date'] < limit_date]
opt_in_info = opt_in_info[~opt_in_info.source.isna()][['source']]
opt_in_info.columns = ['user_id']
opt_in_info = pd.DataFrame(opt_in_info.user_id.drop_duplicates(), columns = ['user_id'])
opt_in_info['optIn'] = True

In [17]:
opt_in_info.shape

(79304, 2)

# DBs Integration

In [18]:
# Read all DBs
purchases = pd.read_csv('./Monthly_DB/Raw_Cashback2023-02.csv')
atm = pd.read_csv('./Monthly_DB/Raw_ATM2023-02.csv')

## Cashback Calculation

In [19]:
# Total purchases
purchases['total_purchases'] = purchases.ArcusPurchaseNum + purchases.GalileoPurchaseNum
# Total amount Purchases
purchases['amount_purch'] = purchases.GalileoPurchaseAmount + purchases.ArcusPurchaseAmount
# Calculate the right amount of spend for each user
purchases['cashback_amount'] = -1 * purchases.amount_purch - purchases.GalileoAdjustAmount - - purchases.ArcusAdjustAmount
# Calculate the cashback
purchases['cashback'] = purchases.cashback_amount * 0.01
# Top the cashback for max 1000
purchases['cashback'] = purchases['cashback'].apply(lambda x: 1000 if x > 1000 else round(x, 1))
# Important columns
month_cashback = purchases[['user_id', 'amount_purch', 'total_purchases', 'cashback_amount', 'cashback']]

In [20]:
print(month_cashback.shape[0])
month_cashback.head(10)

141213


Unnamed: 0,user_id,amount_purch,total_purchases,cashback_amount,cashback
0,00007e60-e21e-4616-81b4-4afae84713ee,-388.0,1.0,388.0,3.9
1,000096ca-40b4-4de0-8b71-6f16939af17f,-99.0,1.0,-30.0,-0.3
2,00010862-0f8a-42c1-a26e-53dd2cee6ae0,-25.0,1.0,25.0,0.2
3,00017516-8f98-4f85-b58e-09b4fb9c2c3e,-250.0,1.0,250.0,2.5
4,0001fec2-2153-4ba8-a510-c5be80c8c973,-25.0,1.0,25.0,0.2
5,0002325b-5c34-4e23-89d4-e289b52b1ab7,-378.0,2.0,378.0,3.8
6,00024ed7-05c1-4421-ae51-0a4d1cd4ecd0,-100.0,1.0,100.0,1.0
7,0002c06d-33ce-492c-8a63-3b7b2ef43086,-100.5,2.0,100.5,1.0
8,00030c5c-c952-4eb7-a348-de96391e1b1b,-5587.09,16.0,5587.09,55.9
9,000334b8-aecf-45e0-aba5-c204c02e5fef,-1219.06,10.0,1199.06,12.0


In [21]:
transactions = pd.merge(month_cashback, atm, on = 'user_id', how='outer')

In [22]:
transactions.head(10)

Unnamed: 0,user_id,amount_purch,total_purchases,cashback_amount,cashback,amount_DISBURSEMENT,amount_FEE,transaction_id_DISBURSEMENT,transaction_id_FEE
0,00007e60-e21e-4616-81b4-4afae84713ee,-388.0,1.0,388.0,3.9,,,,
1,000096ca-40b4-4de0-8b71-6f16939af17f,-99.0,1.0,-30.0,-0.3,,,,
2,00010862-0f8a-42c1-a26e-53dd2cee6ae0,-25.0,1.0,25.0,0.2,,,,
3,00017516-8f98-4f85-b58e-09b4fb9c2c3e,-250.0,1.0,250.0,2.5,,,,
4,0001fec2-2153-4ba8-a510-c5be80c8c973,-25.0,1.0,25.0,0.2,,,,
5,0002325b-5c34-4e23-89d4-e289b52b1ab7,-378.0,2.0,378.0,3.8,,,,
6,00024ed7-05c1-4421-ae51-0a4d1cd4ecd0,-100.0,1.0,100.0,1.0,-2835.84,0.0,1.0,0.0
7,0002c06d-33ce-492c-8a63-3b7b2ef43086,-100.5,2.0,100.5,1.0,,,,
8,00030c5c-c952-4eb7-a348-de96391e1b1b,-5587.09,16.0,5587.09,55.9,,,,
9,000334b8-aecf-45e0-aba5-c204c02e5fef,-1219.06,10.0,1199.06,12.0,-2169.6,0.0,2.0,0.0


In [23]:
transactions.shape

(148668, 9)

# Data analytics

In [24]:
transactions['purchases'] = ~transactions.total_purchases.isna()
transactions['disbursements'] = transactions.transaction_id_DISBURSEMENT > 0
transactions['fees'] = transactions.transaction_id_FEE > 0

In [25]:
transactions['user_category'] = transactions.apply(lambda row:
                                                   (
                                                       ('Purchase_Withdraw_Fee' if row.fees else 'Purchase_Withdraw')
                                                       if row.disbursements
                                                       else ('Purchase_Fee' if row.fees else 'Purchase')
                                                    )
                                                   if row.purchases
                                                   else (
                                                       ('Withdraw_Fee' if row.fees else 'Withdraw')
                                                       if row.disbursements
                                                       else ('Fee' if row.fees else 'None')
                                                   )

                                                   ,axis = 1)

In [26]:
transactions.user_category.value_counts()

Purchase                 119136
Purchase_Withdraw         19359
Withdraw                   5331
Fee                        1709
Purchase_Fee               1660
Purchase_Withdraw_Fee      1058
Withdraw_Fee                415
Name: user_category, dtype: int64

# Customer IO Cohort

In [27]:
transactions = pd.merge(transactions,cohorts_info, on = 'user_id', how = 'left')

In [28]:
transactions.shape

(148668, 15)

In [29]:
transactions.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 148668 entries, 0 to 148667
Data columns (total 15 columns):
 #   Column                       Non-Null Count   Dtype  
---  ------                       --------------   -----  
 0   user_id                      148668 non-null  object 
 1   amount_purch                 141213 non-null  float64
 2   total_purchases              141213 non-null  float64
 3   cashback_amount              141213 non-null  float64
 4   cashback                     141213 non-null  float64
 5   amount_DISBURSEMENT          29532 non-null   float64
 6   amount_FEE                   29532 non-null   float64
 7   transaction_id_DISBURSEMENT  29532 non-null   float64
 8   transaction_id_FEE           29532 non-null   float64
 9   purchases                    148668 non-null  bool   
 10  disbursements                148668 non-null  bool   
 11  fees                         148668 non-null  bool   
 12  user_category                148668 non-null  object 
 13 

# Split users between users in cohort and not in cohort

In [30]:
not_in_cohort = transactions[transactions.need_opt_in.isna()]
users_in_cohort = transactions[~transactions.need_opt_in.isna()]

In [31]:
sms_confirmed_query = '''
select
    user_id,
    sms_confirmed_mx
from klar.cck_funnel
where user_id in {}
'''

In [32]:
# Get the sms confirmed info for the users that are not in the cohort
filled_sms_confirmed_query = sqlalchemy.text(sms_confirmed_query.format(tuple(not_in_cohort.user_id.to_list())))
# Execute the query
sms_confirmed_info = pd.read_sql_query(filled_sms_confirmed_query,cnx)
# Add the info to the db of users not in the cohort
not_in_cohort = pd.merge(not_in_cohort,sms_confirmed_info, on = 'user_id')

In [33]:
new_users  = not_in_cohort[(not_in_cohort.sms_confirmed_mx > '2023-02-01')]
old_users_not_in_cohort = not_in_cohort[(not_in_cohort.sms_confirmed_mx < '2023-02-01')]

In [34]:
print('The total users base is: ' + str(transactions.shape[0]))
print('The users in the cohort is: ' + str(users_in_cohort.shape[0]))
print('The users not in the cohort that are new users is: ' + str(new_users.shape[0]))
print('The users not in the cohort that are old users is: ' + str(old_users_not_in_cohort.shape[0]))

The total users base is: 148668
The users in the cohort is: 138245
The users not in the cohort that are new users is: 7453
The users not in the cohort that are old users is: 2970


## Cohort of old users to pay

In [35]:
# Old users to pay
old_users_payment = old_users_not_in_cohort[(old_users_not_in_cohort.amount_purch <= -2000) | (old_users_not_in_cohort.total_purchases > 9)]

In [36]:
print('Total number of users to pay: ' + str(old_users_payment.shape[0]))

Total number of users to pay: 227


## Cohort of new users and segment to pay

In [37]:
new_users.loc['need_opt_in'] = True

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_users.loc['need_opt_in'] = True


In [38]:
users_needing_optin = pd.concat([users_in_cohort,new_users])

In [39]:
users_needing_optin.shape[0]

145699

# Add the opt_in

In [79]:
users_with_optin_and_txn= pd.merge(users_needing_optin, opt_in_info, how='left', on='user_id')
users_with_optin_no_txn = pd.merge(opt_in_info, users_needing_optin, how='left', on='user_id')

In [80]:
users_with_optin_and_txn = users_with_optin_and_txn[(~users_with_optin_and_txn.optIn.isna())]
users_with_optin_no_txn = users_with_optin_no_txn[~users_with_optin_no_txn.amount_purch.isna()]

In [42]:
print('The number of users with opt-in is: ' + str(users_with_optin_and_txn.shape[0]))
print('The number of users with opt-in and no TXN: ' + str(users_with_optin_no_txn.shape[0]))

The number of users with opt-in is: 36929
The number of users with opt-in and no TXN: 35375


In [None]:
users_with_optin_no_txn[users_with_optin_no_txn.user_id == '']

In [43]:
users_with_optin_and_txn_with_req = users_with_optin_and_txn[(users_with_optin_and_txn.amount_purch <= -2000) | (users_with_optin_and_txn.total_purchases > 9)]

In [44]:
print('Number of users with opt In and requirements is: ' + str(users_with_optin_and_txn_with_req.shape[0]))

Number of users with opt In and requirements is: 13421


In [45]:
new_users_with_optin_and_req = users_with_optin_and_txn_with_req[~users_with_optin_and_txn_with_req.sms_confirmed_mx.isna()]
old_users_with_optin_and_req = users_with_optin_and_txn_with_req[users_with_optin_and_txn_with_req.sms_confirmed_mx.isna()]

In [46]:
print('Number of new users with opt In and requirements is: ' + str(new_users_with_optin_and_req.shape[0]))
print('Number of old users with opt In and requirements is: ' + str(old_users_with_optin_and_req.shape[0]))

Number of new users with opt In and requirements is: 85
Number of old users with opt In and requirements is: 13336


## Final Cohort to Pay

In [47]:
new_users_with_optin_and_req['reason'] = 'New_users_w/req'
old_users_with_optin_and_req['reason'] = 'Old_users_w/req'
old_users_payment['reason'] = 'Not_in_cohort'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_users_with_optin_and_req['reason'] = 'New_users_w/req'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  old_users_with_optin_and_req['reason'] = 'Old_users_w/req'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  old_users_payment['reason'] = 'Not_in_cohort'


In [48]:
payment_cohort = pd.concat([new_users_with_optin_and_req, old_users_with_optin_and_req, old_users_payment])

In [49]:
payment_cohort

  output = repr(obj)
  return method()


Unnamed: 0,user_id,amount_purch,total_purchases,cashback_amount,cashback,amount_DISBURSEMENT,amount_FEE,transaction_id_DISBURSEMENT,transaction_id_FEE,purchases,disbursements,fees,user_category,cashback_type,need_opt_in,sms_confirmed_mx,optIn,reason
138293,01d3af80-99a1-435e-8dc0-e67d3634d6ca,-1427.23,18.0,1427.23,14.3,,,,,True,False,False,Purchase,,,2023-02-12 14:24:12.144404,True,New_users_w/req
138339,042ef57a-065d-4c3b-a403-d86ed85dc392,-4350.50,17.0,4350.50,43.5,,,,,True,False,False,Purchase,,,2023-02-03 13:15:25.010569,True,New_users_w/req
138351,049bb2cf-1513-4178-bb30-7946876806d9,-2102.66,3.0,2102.66,21.0,,,,,True,False,False,Purchase,,,2023-02-06 12:29:19.386784,True,New_users_w/req
138395,0633d4a2-196c-44fe-bd75-3da0c0a4e2f4,-35261.79,31.0,35162.79,351.6,-6434.56,-37.12,4.0,3.0,True,True,True,Purchase_Withdraw_Fee,,,2023-02-12 21:57:31.795052,True,New_users_w/req
138412,06c1154d-39f7-4c69-b47a-8dbde593e7f3,-1457.59,11.0,1457.59,14.6,,,,,True,False,False,Purchase,,,2023-02-05 08:44:31.916231,True,New_users_w/req
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9296,fd6c93ba-7294-4b57-aefc-492326f2f7ee,-576.00,12.0,576.00,5.8,,,,,True,False,False,Purchase,,,2020-11-20 20:07:00.630049,,Not_in_cohort
9312,fdc12d48-745c-4865-a4a6-b49e6ca100d3,-2000.00,2.0,2000.00,20.0,,,,,True,False,False,Purchase,,,2022-09-20 21:22:05.642955,,Not_in_cohort
9334,fe785fb6-a6fb-4887-bd00-ce8f42ea6c0d,-830.00,14.0,830.00,8.3,,,,,True,False,False,Purchase,,,2023-01-25 03:07:07.201230,,Not_in_cohort
9367,ff4b40cd-f7b9-40e4-8ca6-cb6f24034c0f,-2012.59,9.0,2012.59,20.1,,,,,True,False,False,Purchase,,,2023-01-31 17:03:26.262853,,Not_in_cohort


In [50]:
payment_cohort.reason.value_counts()

Old_users_w/req    13336
Not_in_cohort        227
New_users_w/req       85
Name: reason, dtype: int64

In [83]:
payment_cohort.head()

Unnamed: 0,user_id,amount_purch,total_purchases,cashback_amount,cashback,amount_DISBURSEMENT,amount_FEE,transaction_id_DISBURSEMENT,transaction_id_FEE,purchases,disbursements,fees,user_category,cashback_type,need_opt_in,sms_confirmed_mx,optIn,reason
138293,01d3af80-99a1-435e-8dc0-e67d3634d6ca,-1427.23,18.0,1427.23,14.3,,,,,True,False,False,Purchase,,,2023-02-12 14:24:12.144404,True,New_users_w/req
138339,042ef57a-065d-4c3b-a403-d86ed85dc392,-4350.5,17.0,4350.5,43.5,,,,,True,False,False,Purchase,,,2023-02-03 13:15:25.010569,True,New_users_w/req
138351,049bb2cf-1513-4178-bb30-7946876806d9,-2102.66,3.0,2102.66,21.0,,,,,True,False,False,Purchase,,,2023-02-06 12:29:19.386784,True,New_users_w/req
138395,0633d4a2-196c-44fe-bd75-3da0c0a4e2f4,-35261.79,31.0,35162.79,351.6,-6434.56,-37.12,4.0,3.0,True,True,True,Purchase_Withdraw_Fee,,,2023-02-12 21:57:31.795052,True,New_users_w/req
138412,06c1154d-39f7-4c69-b47a-8dbde593e7f3,-1457.59,11.0,1457.59,14.6,,,,,True,False,False,Purchase,,,2023-02-05 08:44:31.916231,True,New_users_w/req


In [92]:
payment_cohort.shape[0]

13648

In [84]:
blocked_users = '''
select
    distinct user_id
from operations.blocked_users
where user_id in {}
'''

In [88]:
# Get the sms confirmed info for the users that are not in the cohort
users_blocked_query = sqlalchemy.text(blocked_users.format(tuple(payment_cohort.user_id.to_list())))
# Execute the query
user_blocked = pd.read_sql_query(users_blocked_query,cnx)

In [91]:
print('The number of users blocked is: ' + str(user_blocked.shape[0]))

The number of users blocked is: 118


In [93]:
payment_cohort = payment_cohort[~payment_cohort.user_id.isin(user_blocked.user_id.to_list())]

In [94]:
print('The Final number of users is: ' + str(payment_cohort.shape[0]))

The Final number of users is: 13530


# Cashback

In [95]:
cashback_payment = payment_cohort[payment_cohort.purchases]
cashback_payment = cashback_payment[cashback_payment.cashback >= 1]
cashback_payment['cashback'] = cashback_payment['cashback'].apply(lambda x: 1000 if x > 1000 else round(x, 0))
print('Total of users with cashback payment: ' + str(payment_cohort.shape[0]))
print('Total amount of cashback to pay: $'+str(payment_cohort.cashback.sum()))

Total of users with cashback payment: 13530
Total amount of cashback to pay: $685029.1


In [96]:
cashback_payment.reason.value_counts()

Old_users_w/req    13206
Not_in_cohort        219
New_users_w/req       84
Name: reason, dtype: int64

In [97]:
cashback_payment.user_category.value_counts()

Purchase                 9085
Purchase_Withdraw        4076
Purchase_Withdraw_Fee     179
Purchase_Fee              169
Name: user_category, dtype: int64

In [98]:
cashback_payment.shape[0]

13509

## Cashback Statistics

#### Mean

In [99]:
cashback_payment.groupby('reason').agg({'cashback_amount':'mean','total_purchases':'mean'})

Unnamed: 0_level_0,cashback_amount,total_purchases
reason,Unnamed: 1_level_1,Unnamed: 2_level_1
New_users_w/req,4670.595357,15.119048
Not_in_cohort,2918.135342,13.461187
Old_users_w/req,5126.974149,18.923974


#### STD

In [100]:
cashback_payment.groupby('reason').agg({'cashback_amount':'std','total_purchases':'std'})

Unnamed: 0_level_0,cashback_amount,total_purchases
reason,Unnamed: 1_level_1,Unnamed: 2_level_1
New_users_w/req,6358.720065,10.688813
Not_in_cohort,3978.528334,9.041804
Old_users_w/req,7231.047218,14.493058


#### MIN

In [101]:
cashback_payment.groupby('reason').agg({'cashback_amount':'min','total_purchases':'min', 'cashback':'min'})

Unnamed: 0_level_0,cashback_amount,total_purchases,cashback
reason,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
New_users_w/req,142.01,1.0,1.0
Not_in_cohort,106.01,1.0,1.0
Old_users_w/req,108.33,1.0,1.0


#### MAX

In [102]:
cashback_payment.groupby('reason').agg({'cashback_amount':'max','total_purchases':'max', 'cashback':'max'})

Unnamed: 0_level_0,cashback_amount,total_purchases,cashback
reason,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
New_users_w/req,35162.79,56.0,352.0
Not_in_cohort,43175.77,67.0,432.0
Old_users_w/req,173455.35,334.0,1000.0


#### Standard Error

In [103]:
cashback_payment.groupby('reason').agg({'cashback_amount':'sem','total_purchases':'sem'})

Unnamed: 0_level_0,cashback_amount,total_purchases
reason,Unnamed: 1_level_1,Unnamed: 2_level_1
New_users_w/req,693.793239,1.166245
Not_in_cohort,268.844031,0.610988
Old_users_w/req,62.923888,0.126117


In [104]:
cashback_payment.columns

Index(['user_id', 'amount_purch', 'total_purchases', 'cashback_amount',
       'cashback', 'amount_DISBURSEMENT', 'amount_FEE',
       'transaction_id_DISBURSEMENT', 'transaction_id_FEE', 'purchases',
       'disbursements', 'fees', 'user_category', 'cashback_type',
       'need_opt_in', 'sms_confirmed_mx', 'optIn', 'reason'],
      dtype='object')

In [105]:
file_cashback = cashback_payment[['user_id', 'amount_purch', 'total_purchases','cashback', 'reason']]

In [106]:
# Append the dataframe again
gd.set_with_dataframe(payments_ws.worksheet("Cashback"), file_cashback, row=1, col=1)

# Balance Check

In [107]:
balance_payment = payment_cohort[payment_cohort.fees]
balance_payment['num_fees_to_pay'] = balance_payment['transaction_id_FEE'].apply(lambda x: 3 if x > 3 else x)
balance_payment['amount_to_pay'] = balance_payment['num_fees_to_pay']*15

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  balance_payment['num_fees_to_pay'] = balance_payment['transaction_id_FEE'].apply(lambda x: 3 if x > 3 else x)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  balance_payment['amount_to_pay'] = balance_payment['num_fees_to_pay']*15


In [108]:
print('Total of users with Balance Check payment: ' + str(balance_payment.shape[0]))
print('Total amount of Balance Check to pay: $'+str(balance_payment.amount_to_pay.sum()))

Total of users with Balance Check payment: 348
Total amount of Balance Check to pay: $6345.0


In [109]:
balance_payment.reason.value_counts()

Old_users_w/req    318
Not_in_cohort       22
New_users_w/req      8
Name: reason, dtype: int64

In [110]:
balance_payment.groupby('reason').agg({'num_fees_to_pay':'mean','amount_to_pay':'mean'})

Unnamed: 0_level_0,num_fees_to_pay,amount_to_pay
reason,Unnamed: 1_level_1,Unnamed: 2_level_1
New_users_w/req,1.5,22.5
Not_in_cohort,1.136364,17.045455
Old_users_w/req,1.213836,18.207547


In [111]:
file_balance_check = balance_payment[['user_id', 'num_fees_to_pay', 'amount_to_pay', 'reason']]

In [112]:
# Append the dataframe again
gd.set_with_dataframe(payments_ws.worksheet("Balance Check"), file_balance_check, row=1, col=1)

# ATM Withdraw


In [113]:
atm_payment = payment_cohort[payment_cohort.disbursements]
atm_payment['num_atm_to_pay'] = atm_payment['transaction_id_DISBURSEMENT'].apply(lambda x: 3 if x > 3 else x)
atm_payment['amount_to_pay'] = atm_payment['num_atm_to_pay']*25

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  atm_payment['num_atm_to_pay'] = atm_payment['transaction_id_DISBURSEMENT'].apply(lambda x: 3 if x > 3 else x)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  atm_payment['amount_to_pay'] = atm_payment['num_atm_to_pay']*25


In [114]:
print('Total of users with ATM Withdraw payment: ' + str(atm_payment.shape[0]))
print('Total amount of ATM Withdraw to pay: $'+str(atm_payment.amount_to_pay.sum()))

Total of users with ATM Withdraw payment: 4258
Total amount of ATM Withdraw to pay: $202150.0


In [115]:
atm_payment.reason.value_counts()

Old_users_w/req    4194
Not_in_cohort        42
New_users_w/req      22
Name: reason, dtype: int64

In [116]:
atm_payment.groupby('reason').agg({'num_atm_to_pay':'mean','amount_to_pay':'mean'})

Unnamed: 0_level_0,num_atm_to_pay,amount_to_pay
reason,Unnamed: 1_level_1,Unnamed: 2_level_1
New_users_w/req,1.636364,40.909091
Not_in_cohort,1.714286,42.857143
Old_users_w/req,1.902241,47.556032


In [117]:
atm_payment[atm_payment['reason']=='Old_users_w/req']['num_atm_to_pay'].value_counts()

1.0    1848
3.0    1438
2.0     908
Name: num_atm_to_pay, dtype: int64

In [118]:
file_atm = atm_payment[['user_id', 'num_atm_to_pay', 'amount_to_pay', 'reason']]

In [119]:
# Append the dataframe again
gd.set_with_dataframe(payments_ws.worksheet("ATM Withdraws"), file_atm, row=1, col=1)