<H1>Bonus</H1>

Drivers' segments description
https://docs.google.com/spreadsheets/d/11KIaZaywoBq3MymrCd8dmWJdBhFO989SNsrrSKYiB3Y/edit#gid=0

In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

<h3>Variables setup</h3>

In [2]:
path = "/Users/alexander/Documents/4. Corporate Life/In-Driver/бонусы/quito_v3.csv"
cohorts = 5           # quantity of cohorts
commission = 0.095    # indriver commission
win_rate = 0.30       # %drivers move to upper cohort
period_duration = 0.5 # period to forecast prospective rides from the total period
additinal_rides = 1   # quantity of rides to be added to the calculated amount for the upper cohorts 
bonus_days = 3        # days to offer the performance bonus

In [3]:
# percentile level calc
percentile_level = {}
percentile = 0
for i in range (1, cohorts):
    percentile = 100 / cohorts + percentile
    percentile_level[i] = int(percentile)

<h3>Data upload</h3>

In [4]:
df = pd.read_csv(path)  
df['gmv'] = pd.to_numeric(df['gmv'], errors='coerce')
dates = list(df.columns)[2:]
print("total drivers:", df.shape[0])
df.sample(n=5)

total drivers: 19687


Unnamed: 0,id,gmv,28.04,29.04,30.04,01.05,02.05,03.05,04.05,05.05,...,16.05,17.05,18.05,19.05,20.05,21.05,22.05,23.05,24.05,25.05
12731,110220684,30.0,1,0,0,0,0,0,0,0,...,0,5,1,0,0,0,0,0,0,0
4488,80157119,153.0,0,7,0,0,0,1,0,0,...,7,4,4,3,2,6,1,2,0,0
1076,70255470,13.0,0,0,0,0,0,0,0,0,...,0,4,0,0,0,0,0,0,0,0
9758,70302587,3.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2406,26251663,114.0,0,3,7,0,0,1,1,0,...,0,1,1,2,3,0,0,2,0,1


<h3>retention & avr. check calc</h3>

In [5]:
# split the period by 2 equal dates parts 
if len(dates)%2 == 0:
    index_2nd = len(dates)//2
    index_1st = 0
else:
    index_2nd = len(dates)//2+1
    index_1st = 1
    
first_half = dates[index_1st : index_2nd]
second_half = dates[index_2nd : len(dates)]
total_days = len(first_half) + len(second_half)


# retention & avr_check cals
df['first_half'] = df[first_half].sum(axis=1)
df['second_half'] = df[second_half].sum(axis=1)

def retention_calc(first_half, second_half):
    try:
        retention = second_half / first_half
    except:
        retention = 0
    retention = min (1, retention)
    return retention

df['retention'] = df.apply(lambda x: retention_calc(x['first_half'], x['second_half']), axis =  1)
df['total_rides'] = df['first_half'] + df['second_half']
df['avr_check'] = df['gmv'] / df['total_rides']
df = df[np.isfinite(df).all(1)] # infinity values drop (comes from a division by zero)


<h3>cohorts range calc</h3>

In [6]:
# exclude IDs which have zero rides during the first half of the period
df_activer_drivers = df.loc[df['first_half'] != 0]
    
all_cohorts = {}
range_start = 0

for percentile in percentile_level.keys():
    
    max_value = np.percentile(np.array(df_activer_drivers.total_rides.tolist()), percentile_level[percentile]) # percentile value
    all_cohorts[percentile] = range(int(range_start), int(max_value))
    range_start = max_value

last_percentile = int(max(list(percentile_level.keys())))+1
all_cohorts[last_percentile] = range(int(max_value),
                                     int(1e8)) # set a large number to keep the cohort range            

# cohort distribution for all IDs
def cohort_check(x, all_cohorts):
    for cohort in all_cohorts.keys():
        if x in all_cohorts[cohort]:
            return cohort
        else:
            pass      
df['cohort'] = df.apply(lambda x: cohort_check(x['total_rides'], all_cohorts), axis =  1)

print("cohorts range:")
all_cohorts

cohorts range:


{1: range(0, 10),
 2: range(10, 28),
 3: range(28, 57),
 4: range(57, 110),
 5: range(110, 100000000)}

<H3>cohorts features calc</H3>

In [7]:
avr_check = {}
for cohort in range(1, cohorts+1):
    avr_check[cohort] = round(df['avr_check'].loc[df['cohort'] == cohort].mean(),2)   

avr_rides = {}
for cohort in range(1, cohorts+1):
    avr_rides[cohort] = round(df['total_rides'].loc[df['cohort'] == cohort].mean(),1)

drivers = {}
for cohort in range(1, cohorts+1):
    drivers[cohort] = df['id'].loc[df['cohort'] == cohort].count()

avr_retention = {}
for cohort in range(1, cohorts+1):
    avr_retention[cohort] = round(df['retention'].loc[df['cohort'] == cohort].mean(),2)

df_cohort = pd.DataFrame()
cohort_df = pd.DataFrame(list(avr_check.items()),
                   columns=['cohort', 'avr_check'])
cohort_df['drivers'] = list(drivers.values())
cohort_df['avr_rides'] = list(avr_rides.values())

# retention calc
cohort_df = pd.merge(df.groupby('cohort')['retention'].mean(),
                     cohort_df,
                     on="cohort")

cohort_df['avr_rev_total'] = (cohort_df['avr_check'] * cohort_df['avr_rides'] * total_days * commission).astype(int)
cohort_df

Unnamed: 0,cohort,retention,avr_check,drivers,avr_rides,avr_rev_total
0,1,0.566912,3.7,4827,3.9,38
1,2,0.692998,3.61,3819,17.5,168
2,3,0.770567,3.58,3636,40.7,387
3,4,0.821257,3.58,3481,80.4,765
4,5,0.880697,3.58,3414,166.2,1582


<h3>Target for additional rides calc</h3>

In [8]:
cohort_df['W: rides'] = (cohort_df['avr_rides'] / total_days * bonus_days).astype(int) + 1
    # 'W: rides' - Weekend # Rides
    # add '1' to avoid rounding down
    
# calc the target rides
new_rides = {}
for cohort in range(1, cohorts):
    new_rides[cohort] = int(cohort_df.loc[cohort,'W: rides'] - cohort_df.loc[cohort-1,'W: rides'])

new_rides[cohorts] = 0 # set zero for the last upper cohort as there is no insentive
cohort_df['W: rides_extra'] = cohort_df['cohort'].map(new_rides)
    # 'W: rides_extra' - Weekend Extra Rides

cohort_df[['cohort', 'avr_rides', 'W: rides', 'W: rides_extra']]

Unnamed: 0,cohort,avr_rides,W: rides,W: rides_extra
0,1,3.9,1,1
1,2,17.5,2,3
2,3,40.7,5,4
3,4,80.4,9,9
4,5,166.2,18,0


<h3>Business case calc</h3>

In [9]:
# bonus to be given as an inventive by groups, % from average check of the cohort  
# 1 equals 100% from the GMV of the target rides
bonuses = {           
    1: 0.7,
    2: 0.7,
    3: 0.7,
    4: 0.7,
    5: 0}

<h3>Instant revenue and costs</h3>

In [10]:
# instant revenue

cohort_df['bonus, %'] = cohort_df['cohort'].map(bonuses)
cohort_df['win_rate'] = win_rate
cohort_df['W: driver_rev'] = cohort_df['W: rides'] * cohort_df['avr_check']
cohort_df['W: driver_rev_extra'] = cohort_df['W: rides_extra'] * cohort_df['avr_check'] * cohort_df['bonus, %']
cohort_df['W: driver_rev_total'] = cohort_df['W: driver_rev'] + cohort_df['W: driver_rev_extra']

cohort_df['W: ID_rev'] = cohort_df['W: driver_rev_extra'] * cohort_df['drivers'] * commission * win_rate
cohort_df['W: ID_costs'] = cohort_df['W: driver_rev_extra'] * cohort_df['drivers'] * win_rate
cohort_df['bonus'] = cohort_df['W: rides_extra'] * cohort_df['avr_check'] * cohort_df['bonus, %']

instant_rev = cohort_df['W: ID_rev'].sum()
instant_cost = cohort_df['W: ID_costs'].sum()
print("instant revenue:", int(instant_rev))
print("instant costs  :", int(instant_cost))
print("instant balance:", int(instant_rev - instant_cost))
cohort_df.round(1)

instant revenue: 4457
instant costs  : 46923
instant balance: -42465


Unnamed: 0,cohort,retention,avr_check,drivers,avr_rides,avr_rev_total,W: rides,W: rides_extra,"bonus, %",win_rate,W: driver_rev,W: driver_rev_extra,W: driver_rev_total,W: ID_rev,W: ID_costs,bonus
0,1,0.6,3.7,4827,3.9,38,1,1,0.7,0.3,3.7,2.6,6.3,356.3,3750.6,2.6
1,2,0.7,3.6,3819,17.5,168,2,3,0.7,0.3,7.2,7.6,14.8,825.1,8685.6,7.6
2,3,0.8,3.6,3636,40.7,387,5,4,0.7,0.3,17.9,10.0,27.9,1038.7,10934.2,10.0
3,4,0.8,3.6,3481,80.4,765,9,9,0.7,0.3,32.2,22.6,54.8,2237.5,23553.1,22.6
4,5,0.9,3.6,3414,166.2,1582,18,0,0.0,0.3,64.4,0.0,64.4,0.0,0.0,0.0


<h2>Perspective revenue projection</h2>

<h3>Target is not fullfilled</h3>

In [11]:
# target IS NOT fulfilled
# 'P: not FF: rev' - Perspective Not Fulfilled: Revenue

cohort_df['P: not FF: rev'] = cohort_df['avr_rides'] \
                                * cohort_df['avr_check'] \
                                * cohort_df['retention'] \
                                * period_duration \
                                * commission
cohort_df.loc[cohorts-1,'P: not FF: rev'] = 0 #keep the last cohort zero as no an insentive for the cohort

<h3>Target is fullfilled</h3>

In [12]:

# target IS fulfilled
FF_rides = {}
for i in range (1, cohorts):
    FF_rides[i] = avr_rides[i+1] * period_duration
FF_rides[cohorts] = 0
cohort_df['P: FF: rides'] = list(FF_rides.values())

FF_gmv = {}
for i in range (1, cohorts):
    FF_gmv[i] = int(FF_rides[i] * avr_check[i+1])
FF_gmv[cohorts] = 0
cohort_df['P: FF: gmv'] = list(FF_gmv.values())

FF_rev = {}
for i in range (1, cohorts):
    FF_rev[i] = int(FF_gmv[i] * commission * avr_retention[i+1])
FF_rev[cohorts] = 0
cohort_df['P: FF: rev'] = list(FF_rev.values())

cohort_df['P: rev_up'] = (cohort_df['P: FF: rev'] - cohort_df['P: not FF: rev']) \
                                * win_rate \
                                * cohort_df['drivers']

persp_rev_up = (cohort_df['P: rev_up'].sum())
revenue_total = cohort_df['W: ID_rev'].sum() + persp_rev_up

print('instant revenue      :', int(instant_rev))
print('perspective revenue  :', int(persp_rev_up))
print('revenue total        :', int(revenue_total))
print('costs:               :', int(instant_cost))

print('profit:              :', int(revenue_total - instant_cost))


cohort_df.round(1)

instant revenue      : 4457
perspective revenue  : 25198
revenue total        : 29656
costs:               : 46923
profit:              : -17267


Unnamed: 0,cohort,retention,avr_check,drivers,avr_rides,avr_rev_total,W: rides,W: rides_extra,"bonus, %",win_rate,...,W: driver_rev_extra,W: driver_rev_total,W: ID_rev,W: ID_costs,bonus,P: not FF: rev,P: FF: rides,P: FF: gmv,P: FF: rev,P: rev_up
0,1,0.6,3.7,4827,3.9,38,1,1,0.7,0.3,...,2.6,6.3,356.3,3750.6,2.6,0.4,8.8,31,2,2333.5
1,2,0.7,3.6,3819,17.5,168,2,3,0.7,0.3,...,7.6,14.8,825.1,8685.6,7.6,2.1,20.4,72,5,3346.0
2,3,0.8,3.6,3636,40.7,387,5,4,0.7,0.3,...,10.0,27.9,1038.7,10934.2,10.0,5.3,40.2,143,11,6181.4
3,4,0.8,3.6,3481,80.4,765,9,9,0.7,0.3,...,22.6,54.8,2237.5,23553.1,22.6,11.2,83.1,297,24,13337.6
4,5,0.9,3.6,3414,166.2,1582,18,0,0.0,0.3,...,0.0,64.4,0.0,0.0,0.0,0.0,0.0,0,0,0.0


<h2>Split insentive by cohorts and test groups</h2>

In [13]:
# some code for split cohorts on groups # 

bonuses_split = {
    "control": 1,
    "a": 1,       # 100% of target bonuses will be given
    "b": 0.8,     # 
    "c": 0.6}     # 

group_split = {
    "control": 0.25,
    "a": 0.25,
    "b": 0.25,
    "c": 0.25}

if sum(group_split.values()) != 1: print("split in test/control groups is NOT correct") 

In [14]:
df_groups = pd.DataFrame(columns = ['id', 'groups'])
for cohort in range (1, cohorts+1):
    group_length = df[df['cohort'] == cohort].shape[0]
    control = ['control' for i in range(1, int(group_length * group_split['control']))]
    a = ['a' for i in range(1, int(group_length * group_split['a']))]
    b = ['b' for i in range(1, int(group_length * group_split['b']))]
    c = ['c' for i in range (1, group_length - len(control) - len(a) - len(b) + 1)]

    df_group = df[df['cohort'] == cohort]
    df_group['groups'] = control + a + b + c
    df_group = df_group[['id', 'groups']]
    df_groups = pd.concat([df_groups, df_group])
    
df = df.merge(df_groups, how = 'left', on = 'id')

print("Split by group and cohorts\n")
print("total drivers:", df.shape[0])
for cohort in range (1, cohorts + 1):
    print(" cohort", cohort, ":", df[df['cohort'] == cohort].shape[0])
    for group in list(group_split.keys()):
        print("  ", group, ":", df[(df['cohort'] == cohort) & (df['groups'] == group)].shape[0])

Split by group and cohorts

total drivers: 19177
 cohort 1 : 4827
   control : 1205
   a : 1205
   b : 1205
   c : 1212
 cohort 2 : 3819
   control : 953
   a : 953
   b : 953
   c : 960
 cohort 3 : 3636
   control : 908
   a : 908
   b : 908
   c : 912
 cohort 4 : 3481
   control : 869
   a : 869
   b : 869
   c : 874
 cohort 5 : 3414
   control : 852
   a : 852
   b : 852
   c : 858


In [15]:
result_df = df.merge(cohort_df, how='inner', on='cohort')[['id','cohort','bonus','groups', 'W: rides_extra']]

result_df

Unnamed: 0,id,cohort,bonus,groups,W: rides_extra
0,93816214,3,10.024,control,4
1,67535877,3,10.024,control,4
2,31162874,3,10.024,control,4
3,14090479,3,10.024,control,4
4,93488555,3,10.024,control,4
...,...,...,...,...,...
19172,60980297,2,7.581,c,3
19173,65993731,2,7.581,c,3
19174,17399526,2,7.581,c,3
19175,15466266,2,7.581,c,3


In [16]:
result_df['groups_coeff'] = result_df['groups'].apply(lambda x: bonuses_split[x])
result_df['bonus_to_offer'] = result_df['groups_coeff'] * result_df['bonus']
result_df['bonus'] = result_df['bonus'].apply(lambda x: round(x,1))
result_df['bonus_to_offer'] = result_df['bonus_to_offer'].apply(lambda x: round(x,0))
result_df

Unnamed: 0,id,cohort,bonus,groups,W: rides_extra,groups_coeff,bonus_to_offer
0,93816214,3,10.0,control,4,1.0,10.0
1,67535877,3,10.0,control,4,1.0,10.0
2,31162874,3,10.0,control,4,1.0,10.0
3,14090479,3,10.0,control,4,1.0,10.0
4,93488555,3,10.0,control,4,1.0,10.0
...,...,...,...,...,...,...,...
19172,60980297,2,7.6,c,3,0.6,5.0
19173,65993731,2,7.6,c,3,0.6,5.0
19174,17399526,2,7.6,c,3,0.6,5.0
19175,15466266,2,7.6,c,3,0.6,5.0


In [17]:
result_df[['id', 'W: rides_extra', 'bonus_to_offer']]

Unnamed: 0,id,W: rides_extra,bonus_to_offer
0,93816214,4,10.0
1,67535877,4,10.0
2,31162874,4,10.0
3,14090479,4,10.0
4,93488555,4,10.0
...,...,...,...
19172,60980297,3,5.0
19173,65993731,3,5.0
19174,17399526,3,5.0
19175,15466266,3,5.0


In [18]:
# cohort_df.to_csv('quito_cohort.csv')
# df[['id', 'cohort', 'bonuses']].to_csv('quito_ids.csv')