<H1>Bonus</H1>

Drivers' segments description
https://docs.google.com/spreadsheets/d/11KIaZaywoBq3MymrCd8dmWJdBhFO989SNsrrSKYiB3Y/edit#gid=0

In [1]:
import pandas as pd
import numpy as np
import math
from datetime import datetime
import warnings
warnings.filterwarnings("ignore")

<h3>Variables setup</h3>

In [2]:
path = "testdataset.csv"
cohorts = 5           # quantity of cohorts
commission = 0.095    # indriver commission
win_rate = 0.15       # %drivers move to upper cohort
period_duration = 0.5 # period to forecast prospective rides from the total period
additinal_rides = 1   # quantity of rides to be added to the calculated amount for the upper cohorts 
bonus_days = 3        # days to offer the performance bonus

In [3]:
# percentile level calc
percentile_level = {}
percentile = 0
for i in range (1, cohorts):
    percentile = 100 / cohorts + percentile
    percentile_level[i] = int(percentile)

<h3>Data upload</h3>

In [4]:
df = pd.read_csv(path)  
df['gmv'] = pd.to_numeric(df['gmv'], errors='coerce')
dates = list(df.columns)[2:]
print("total drivers:", df.shape[0])
df.sample(n=5)

total drivers: 19687


Unnamed: 0,id,gmv,28.04.22,29.04.22,30.04.22,01.05.22,02.05.22,03.05.22,04.05.22,05.05.22,...,16.05.22,17.05.22,18.05.22,19.05.22,20.05.22,21.05.22,22.05.22,23.05.22,24.05.22,25.05.22
6452,71904675,99.0,1,1,1,0,2,0,1,4,...,3,2,0,3,1,0,0,0,2,0
2319,81169226,510.0,6,3,4,7,7,6,1,4,...,11,5,3,5,0,8,1,11,7,0
16600,15789266,182.0,1,0,5,0,6,4,0,0,...,2,3,0,0,4,4,0,2,3,0
17970,28243082,534.0,8,1,0,0,3,9,6,3,...,9,9,9,5,2,1,8,0,7,11
19358,16121591,315.0,10,3,11,1,0,16,7,8,...,0,0,0,0,0,10,0,0,0,0


<h3>retention & avr. check calc</h3>

In [5]:
# weekends selection (friday, saturday, sunday) 
fridays = []
saturdays = []
sundays = []
workdays = []

dates = list(filter(lambda x: "." in x, df.columns))
for weekday in dates:
    daynum = datetime.strptime(weekday, '%d.%m.%y').isoweekday()
    if daynum == 5: fridays.append(weekday)
    elif daynum == 6: saturdays.append(weekday)
    elif daynum == 7: sundays.append(weekday)
    else: workdays.append(weekday)

In [6]:
# split the period by 2 equal "dates" parts 
if len(dates)%2 == 0:
    index_2nd = len(dates)//2
    index_1st = 0
else:
    index_2nd = len(dates)//2+1
    index_1st = 1
    
first_half = dates[index_1st : index_2nd]
second_half = dates[index_2nd : len(dates)]
total_days = len(first_half) + len(second_half)


# retention & avr_check cals
df['first_half'] = df[first_half].sum(axis=1)
df['second_half'] = df[second_half].sum(axis=1)

def retention_calc(first_half, second_half):
    try:
        retention = second_half / first_half
    except:
        retention = 0
    retention = min (1, retention)
    return retention

df['retention'] = df.apply(lambda x: retention_calc(x['first_half'], x['second_half']), axis =  1)
df['total_rides'] = df['first_half'] + df['second_half']
df['avr_check'] = df['gmv'] / df['total_rides']
df['WK_rides'] = df[fridays + saturdays + sundays].sum(axis=1) # weekends rides
df['WD_rides'] = df[workdays].sum(axis=1) # weekends rides


key_fields = ['id', 'gmv','first_half','second_half','total_rides', 'WD_rides','WK_rides', 'avr_check', 'retention'] 
 
df = df[np.isfinite(df).all(1)] # infinity values drop (comes from a division by zero)
df = df.round({'gmv': 0, 'retention': 2, 'avr_check': 1})
df[key_fields].sample(5)

Unnamed: 0,id,gmv,first_half,second_half,total_rides,WD_rides,WK_rides,avr_check,retention
12397,91936129,24.0,0,7,7,3,4,3.4,1.0
1952,14847485,63.0,9,7,16,9,7,3.9,0.78
6056,21177887,253.0,41,26,67,35,32,3.8,0.63
5632,78553126,442.0,49,76,125,54,71,3.5,1.0
8351,88355585,184.0,19,32,51,34,17,3.6,1.0


<h3>cohorts range calc</h3>

In [7]:
# exclude IDs which have zero rides during the first half of the period
df_activer_drivers = df.loc[df['first_half'] != 0]
    
all_cohorts = {}
range_start = 0

for percentile in percentile_level.keys():
    
    max_value = np.percentile(np.array(df_activer_drivers.total_rides.tolist()), percentile_level[percentile]) # percentile value
    all_cohorts[percentile] = range(int(range_start), int(max_value))
    range_start = max_value

last_percentile = int(max(list(percentile_level.keys())))+1
all_cohorts[last_percentile] = range(int(max_value),
                                     int(1e8)) # set a large number to keep the cohort range            

# cohort distribution for all IDs
def cohort_check(x, all_cohorts):
    for cohort in all_cohorts.keys():
        if x in all_cohorts[cohort]:
            return cohort
        else:
            pass      
df['cohort'] = df.apply(lambda x: cohort_check(x['total_rides'], all_cohorts), axis =  1)

print("cohorts range:")
all_cohorts

cohorts range:


{1: range(0, 10),
 2: range(10, 28),
 3: range(28, 57),
 4: range(57, 110),
 5: range(110, 100000000)}

In [8]:
key_fields.append('cohort')
df[key_fields].sample(5)

Unnamed: 0,id,gmv,first_half,second_half,total_rides,WD_rides,WK_rides,avr_check,retention,cohort
3598,112496973,249.0,36,33,69,37,32,3.6,0.92,4
14816,62055266,66.0,15,3,18,15,3,3.7,0.2,2
15425,41543423,100.0,12,15,27,22,5,3.7,1.0,2
9654,104413371,3.0,1,0,1,1,0,3.0,0.0,1
7608,81245510,172.0,25,24,49,29,20,3.5,0.96,3


<H3>cohorts features calc</H3>

In [9]:
avr_check = {}
for cohort in range(1, cohorts+1):
    avr_check[cohort] = df['avr_check'].loc[df['cohort'] == cohort].mean()

avr_rides = {}
for cohort in range(1, cohorts+1):
    avr_rides[cohort] = df['total_rides'].loc[df['cohort'] == cohort].mean()
    
avr_wd_rides = {}
for cohort in range(1, cohorts+1):
    avr_wd_rides[cohort] = df['WD_rides'].loc[df['cohort'] == cohort].mean()
    
avr_wk_rides = {}
for cohort in range(1, cohorts+1):
    avr_wk_rides[cohort] = df['WK_rides'].loc[df['cohort'] == cohort].mean()

drivers = {}
for cohort in range(1, cohorts+1):
    drivers[cohort] = df['id'].loc[df['cohort'] == cohort].count()

avr_retention = {}
for cohort in range(1, cohorts+1):
    avr_retention[cohort] = df['retention'].loc[df['cohort'] == cohort].mean()

cohort_df = pd.DataFrame(list(avr_check.items()),
                   columns=['cohort', 'avr_check'])
cohort_df['drivers'] = list(drivers.values())
cohort_df['avr_rides'] = list(avr_rides.values())
cohort_df['WK_all_rides'] = list(avr_wk_rides.values())
cohort_df['WD_all_rides'] = list(avr_wd_rides.values())


# 'WK_avr_rides' - # avr. rides on a particular weekend
weekend_days = np.mean([len(fridays), len(saturdays), len(sundays)])
cohort_df['WK_avr_rides'] = cohort_df['WK_all_rides'] / weekend_days

# 'WD_avr_rides' - # avr. rides on a particular weekend
weekend_days = len(workdays)
cohort_df['WD_avr_rides'] = cohort_df['WD_all_rides'] / weekend_days

# retention calc
cohort_df = pd.merge(df.groupby('cohort')['retention'].mean(),
                     cohort_df,
                     on="cohort")

cohort_df['avr_rev_total'] = (cohort_df['avr_check'] * cohort_df['avr_rides'] * commission).astype(int)
cohort_df = cohort_df.round({'retention': 2,
                             'avr_check': 1,
                             'avr_rides': 1,
                             'WK_all_rides': 1,
                             'WD_all_rides': 1,
                             'WK_avr_rides': 1,
                             'WD_avr_rides': 1,
                            })
cohort_df

Unnamed: 0,cohort,retention,avr_check,drivers,avr_rides,WK_all_rides,WD_all_rides,WK_avr_rides,WD_avr_rides,avr_rev_total
0,1,0.57,3.7,4827,3.9,1.8,2.1,0.4,0.1,1
1,2,0.69,3.6,3819,17.5,8.4,9.1,2.1,0.6,5
2,3,0.77,3.6,3636,40.7,19.3,21.4,4.8,1.3,13
3,4,0.82,3.6,3481,80.4,38.0,42.5,9.5,2.7,27
4,5,0.88,3.6,3414,166.2,77.3,88.8,19.3,5.6,56


<h3>Target Rides and Incentive</h3>

In [10]:
# bonus to be given as an inventive by groups, % from average check of the cohort  
# 1 equals 100% from the GMV of the target rides
# example: gmv = $10, 0.35 = $3.5 as the bonus

bonuses = {           
    1: 0.35,
    2: 0.35,
    3: 0.35,
    4: 0.35,
    5: 0}

<h4>Target rides</h4>

In [11]:
cohort_df['cohort_rides'] = cohort_df[['WK_avr_rides', 'WD_avr_rides']].max(axis=1)

# rounding up all rides
cohort_df['cohort_rides'] = cohort_df['cohort_rides'].apply(lambda x: math.ceil(x))

# calc the target rides
new_rides = {}
for cohort in range(1, cohorts):
    new_rides[cohort] = cohort_df.loc[cohort,'cohort_rides'] - cohort_df.loc[cohort-1,'cohort_rides']

new_rides[cohorts] = 0 # set zero for the last upper cohort as there is no insentive
cohort_df['WK_rides_extra'] = cohort_df['cohort'].map(new_rides)
    # 'WK_rides_extra' - Weekend Extra Rides to be done
    
cohort_df['rides_tbd'] = cohort_df['cohort_rides'] + cohort_df['WK_rides_extra']
cohort_df.loc[cohorts-1,'rides_tbd'] = 0 # the target for the senior cohort shall be zero

# incentive for drivers
cohort_df['bonus, %'] = cohort_df['cohort'].map(bonuses)
cohort_df['driver_bonus'] = cohort_df['WK_rides_extra'] * cohort_df['avr_check'] * cohort_df['bonus, %']
cohort_df = cohort_df.round({'driver_bonus': 0})

cohort_df[['cohort',
           'WD_avr_rides',
           'WK_avr_rides',
           'cohort_rides',
           'WK_rides_extra',
           'rides_tbd',
           'driver_bonus',
           'avr_check']]

Unnamed: 0,cohort,WD_avr_rides,WK_avr_rides,cohort_rides,WK_rides_extra,rides_tbd,driver_bonus,avr_check
0,1,0.1,0.4,1,2,3,3.0,3.7
1,2,0.6,2.1,3,2,5,3.0,3.6
2,3,1.3,4.8,5,5,10,6.0,3.6
3,4,2.7,9.5,10,10,20,13.0,3.6
4,5,5.6,19.3,20,0,0,0.0,3.6


<h3>Business case calc</h3>

<h3>Instant revenue and costs</h3>

In [12]:
# instant revenue
cohort_df['win_rate'] = win_rate 

# costs
# cohort_df['drivers_extra_rev'] = cohort_df['WK_rides_extra'] * cohort_df['avr_check'] 
cohort_df['costs'] = cohort_df['driver_bonus'] * cohort_df['drivers'] * win_rate

# extra revenue for the company
cohort_df['ID_extra_rev'] = cohort_df['WK_rides_extra'] \
                            * cohort_df['avr_check'] \
                            * cohort_df['drivers'] \
                            * win_rate \
                            * commission

instant_rev = cohort_df['costs'].sum()
instant_cost = cohort_df['ID_extra_rev'].sum()

print("instant revenue:", int(instant_rev))
print("instant costs  :", int(instant_cost))
print("instant balance:", int(instant_rev - instant_cost))

instant revenue: 13951
instant costs  : 3619
instant balance: 10331


<h2>Perspective revenue projection</h2>

<h3>Target is not fullfilled</h3>

In [13]:
# target IS NOT fulfilled
# 'P: not FF: rev' - Perspective Not Fulfilled: Revenue

cohort_df['P: not FF: rev'] = cohort_df['avr_rides'] \
                                * cohort_df['avr_check'] \
                                * cohort_df['retention'] \
                                * period_duration \
                                * commission
cohort_df.loc[cohorts-1,'P: not FF: rev'] = 0 #keep the last cohort zero as no an insentive for the cohort

<h3>Target is fullfilled</h3>

In [14]:

# target IS fulfilled
FF_rides = {}
for i in range (1, cohorts):
    FF_rides[i] = avr_rides[i+1] * period_duration
FF_rides[cohorts] = 0
cohort_df['P: FF: rides'] = list(FF_rides.values())

FF_gmv = {}
for i in range (1, cohorts):
    FF_gmv[i] = int(FF_rides[i] * avr_check[i+1])
FF_gmv[cohorts] = 0
cohort_df['P: FF: gmv'] = list(FF_gmv.values())

FF_rev = {}
for i in range (1, cohorts):
    FF_rev[i] = int(FF_gmv[i] * commission * avr_retention[i+1])
FF_rev[cohorts] = 0
cohort_df['P: FF: rev'] = list(FF_rev.values())

cohort_df['P: rev_up'] = (cohort_df['P: FF: rev'] - cohort_df['P: not FF: rev']) \
                                * win_rate \
                                * cohort_df['drivers']

persp_rev_up = (cohort_df['P: rev_up'].sum())
revenue_total = cohort_df['ID_extra_rev'].sum() + persp_rev_up

print('instant revenue      :', int(instant_rev))
print('perspective revenue  :', int(persp_rev_up))
print('revenue total        :', int(revenue_total))
print('costs:               :', int(instant_cost))

print('profit:              :', int(revenue_total - instant_cost))


cohort_df.round(1)

instant revenue      : 13951
perspective revenue  : 12568
revenue total        : 16187
costs:               : 3619
profit:              : 12568


Unnamed: 0,cohort,retention,avr_check,drivers,avr_rides,WK_all_rides,WD_all_rides,WK_avr_rides,WD_avr_rides,avr_rev_total,...,"bonus, %",driver_bonus,win_rate,costs,ID_extra_rev,P: not FF: rev,P: FF: rides,P: FF: gmv,P: FF: rev,P: rev_up
0,1,0.6,3.7,4827,3.9,1.8,2.1,0.4,0.1,1,...,0.4,3.0,0.2,2172.2,509.0,0.4,8.7,31,2,1165.2
1,2,0.7,3.6,3819,17.5,8.4,9.1,2.1,0.6,5,...,0.4,3.0,0.2,1718.6,391.8,2.1,20.4,72,5,1681.4
2,3,0.8,3.6,3636,40.7,19.3,21.4,4.8,1.3,13,...,0.4,6.0,0.2,3272.4,932.6,5.4,40.2,143,11,3076.6
3,4,0.8,3.6,3481,80.4,38.0,42.5,9.5,2.7,27,...,0.4,13.0,0.2,6788.0,1785.8,11.3,83.1,297,24,6645.0
4,5,0.9,3.6,3414,166.2,77.3,88.8,19.3,5.6,56,...,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0,0,0.0


<h2>Split insentive by cohorts and test groups</h2>

In [15]:
# some code for split cohorts on groups # 

bonuses_split = {
    "control": 0,
    "a": 1,       # 100% of target bonuses will be given
    "b": 0.8,     # 
    "c": 0.6}     # 

group_split = {
    "control": 0.25,
    "a": 0.25,
    "b": 0.25,
    "c": 0.25}

if sum(group_split.values()) != 1: print("split in test/control groups is NOT correct") 

In [16]:
df_groups = pd.DataFrame(columns = ['id', 'groups'])
for cohort in range (1, cohorts+1):
    group_length = df[df['cohort'] == cohort].shape[0]
    control = ['control' for i in range(1, int(group_length * group_split['control']))]
    a = ['a' for i in range(1, int(group_length * group_split['a']))]
    b = ['b' for i in range(1, int(group_length * group_split['b']))]
    c = ['c' for i in range (1, group_length - len(control) - len(a) - len(b) + 1)]

    df_group = df[df['cohort'] == cohort]
    df_group['groups'] = control + a + b + c
    df_group = df_group[['id', 'groups']]
    df_groups = pd.concat([df_groups, df_group])
    
df = df.merge(df_groups, how = 'left', on = 'id')

print("Split by group and cohorts\n")
print("total drivers:", df.shape[0])
for cohort in range (1, cohorts + 1):
    print(" cohort", cohort, ":", df[df['cohort'] == cohort].shape[0])
    for group in list(group_split.keys()):
        print("  ", group, ":", df[(df['cohort'] == cohort) & (df['groups'] == group)].shape[0])

Split by group and cohorts

total drivers: 19177
 cohort 1 : 4827
   control : 1205
   a : 1205
   b : 1205
   c : 1212
 cohort 2 : 3819
   control : 953
   a : 953
   b : 953
   c : 960
 cohort 3 : 3636
   control : 908
   a : 908
   b : 908
   c : 912
 cohort 4 : 3481
   control : 869
   a : 869
   b : 869
   c : 874
 cohort 5 : 3414
   control : 852
   a : 852
   b : 852
   c : 858


In [17]:
cohort_df.head()

Unnamed: 0,cohort,retention,avr_check,drivers,avr_rides,WK_all_rides,WD_all_rides,WK_avr_rides,WD_avr_rides,avr_rev_total,...,"bonus, %",driver_bonus,win_rate,costs,ID_extra_rev,P: not FF: rev,P: FF: rides,P: FF: gmv,P: FF: rev,P: rev_up
0,1,0.57,3.7,4827,3.9,1.8,2.1,0.4,0.1,1,...,0.35,3.0,0.15,2172.15,509.00715,0.390692,8.745614,31,2,1165.219276
1,2,0.69,3.6,3819,17.5,8.4,9.1,2.1,0.6,5,...,0.35,3.0,0.15,1718.55,391.8294,2.064825,20.359323,72,5,1681.414999
2,3,0.77,3.6,3636,40.7,19.3,21.4,4.8,1.3,13,...,0.35,6.0,0.15,3272.4,932.634,5.358969,40.216174,143,11,3076.618307
3,4,0.82,3.6,3481,80.4,38.0,42.5,9.5,2.7,27,...,0.35,13.0,0.15,6787.95,1785.753,11.273688,83.093292,297,24,6645.043811
4,5,0.88,3.6,3414,166.2,77.3,88.8,19.3,5.6,56,...,0.0,0.0,0.15,0.0,0.0,0.0,0.0,0,0,0.0


In [21]:
result_df = df.merge(cohort_df, how='inner', on='cohort')[['id','cohort','driver_bonus','groups', 'rides_tbd']]

result_df['groups_coeff'] = result_df['groups'].apply(lambda x: bonuses_split[x])
result_df['bonus_to_offer'] = result_df['groups_coeff'] * result_df['driver_bonus']
result_df['bonus_to_offer'] = result_df['bonus_to_offer'].apply(lambda x: round(x,0))
result_df.sample(5)

Unnamed: 0,id,cohort,driver_bonus,groups,rides_tbd,groups_coeff,bonus_to_offer
4089,29429113,1,3.0,control,3,0.0,0.0
16509,66593917,2,3.0,a,5,1.0,3.0
11813,18840484,4,13.0,c,20,0.6,8.0
9395,94741580,4,13.0,a,20,1.0,13.0
11540,67400811,4,13.0,c,20,0.6,8.0


In [19]:
for cohort in range(1, cohorts):
    print('\ncohort:', cohort)
    print(result_df[result_df.cohort == cohort].groupby(['groups']).mean())


cohort: 1
         cohort  driver_bonus  rides_tbd  groups_coeff  bonus_to_offer
groups                                                                
a           1.0           3.0        3.0           1.0             3.0
b           1.0           3.0        3.0           0.8             2.0
c           1.0           3.0        3.0           0.6             2.0
control     1.0           3.0        3.0           0.0             0.0

cohort: 2
         cohort  driver_bonus  rides_tbd  groups_coeff  bonus_to_offer
groups                                                                
a           2.0           3.0        5.0           1.0             3.0
b           2.0           3.0        5.0           0.8             2.0
c           2.0           3.0        5.0           0.6             2.0
control     2.0           3.0        5.0           0.0             0.0

cohort: 3
         cohort  driver_bonus  rides_tbd  groups_coeff  bonus_to_offer
groups                                      

In [20]:
result_df.to_csv('bonus_table.csv')