In [182]:
import pandas as pd 
import numpy as np 
import random

## Revenue
### Generating dataset

In [183]:
base_revenue = 1731985.22

In [184]:
countries = ['UK', 'France', 'Germany', 'Italy', 'Spain'] 
total_countries = 20 

In [185]:
base_df = pd.DataFrame({'country': countries})

In [186]:
base_df['number'] = list(map(lambda x: x + 1, range(base_df.shape[0])))

In [187]:
base_df['weight'] = 1/base_df.number

In [188]:
other_weight = 0 
for i in range(base_df.shape[0] + 1, total_countries + 1):
    other_weight += 1/i # modelling Zipf distribution

In [189]:
base_df = pd.concat([base_df.drop('number', axis = 1), pd.DataFrame([{'country': 'other', 'weight': other_weight}])], 
         axis = 0)

In [190]:
base_df['weight'] = base_df.weight / base_df.weight.sum()

In [191]:
base_df['total_revenue'] = base_df.weight * base_revenue

In [192]:
base_df['new_users_share'] = list(map(
    lambda x: np.random.normal(0.3, 0.05), 
                                 base_df.weight))

In [193]:
base_df['new_users_revenue'] = base_df.total_revenue * base_df.new_users_share 
base_df['existing_users_revenue'] = base_df.total_revenue * (1 - base_df.new_users_share)

In [194]:
tmp = []

for rec in base_df.to_dict('records'):
    tmp.append(
        {
            'country': rec['country'],
            'maturity': 'new', 
            'revenue': rec['new_users_revenue']
        }
    )

    tmp.append(
        {
            'country': rec['country'],
            'maturity': 'existing', 
            'revenue': rec['existing_users_revenue']
        }
    )

In [195]:
df = pd.DataFrame(tmp)

In [196]:
df['revenue'] = df.revenue.map(lambda x: round(x, 2))

In [197]:
df = df.rename(columns = {'revenue': 'revenue_before'})

In [198]:
df

Unnamed: 0,country,maturity,revenue_before
0,UK,new,128324.22
1,UK,existing,353085.05
2,France,new,57901.91
3,France,existing,182802.72
4,Germany,new,48105.83
5,Germany,existing,112363.92
6,Italy,new,36941.57
7,Italy,existing,83410.74
8,Spain,new,32394.1
9,Spain,existing,63887.76


In [199]:
def get_weight_scenario_1(country, maturity):
    if maturity == 'new': 
        return np.random.normal(0.3, 0.05) 
    return np.random.normal(0.8, 0.05)

df['weight_scenario_1'] = list(map(
    get_weight_scenario_1,
    df.country,
    df.maturity
))

In [200]:
def get_weight_scenario_2(country, maturity):
    if (maturity == 'existing') and (country == 'France'): 
        return np.random.normal(0.3, 0.05) 
    return np.random.normal(1, 0.01)

df['weight_scenario_2'] = list(map(
    get_weight_scenario_2,
    df.country,
    df.maturity
))

In [201]:
df['revenue_after_scenario_1'] = (df.weight_scenario_1 * df.revenue_before).map(lambda x: round(x, 2))
df['revenue_after_scenario_2'] = (df.weight_scenario_2 * df.revenue_before).map(lambda x: round(x, 2))

In [202]:
lost_revenue = df[(df.country == 'France') & (df.maturity == 'existing')].iloc[0].revenue_before - df[(df.country == 'France') & (df.maturity == 'existing')].iloc[0].revenue_after_scenario_2

In [203]:
def get_scenario_3(country, maturity, base_revenue):
    if (maturity == 'existing') and (country == 'Spain'):
        return base_revenue + 0.5*lost_revenue
    return base_revenue

df['revenue_after_scenario_3'] = list(map(
    get_scenario_3,
    df.country, 
    df.maturity, 
    df.revenue_after_scenario_2))

### Analysis

In [204]:
def calculate_simple_growth_metrics(stats_df):
    before = stats_df.before.sum()
    after = stats_df.after.sum()
    print('Metric change: %.2f -> %.2f (%.2f%%)' % (before, after, 100*(after - before)/before))
    stats_df['diff_abs'] = stats_df.after - stats_df.before
    stats_df['diff_rate'] = 100*stats_df.diff_abs/stats_df.before
    stats_df['share_in_diff'] = 100*stats_df.diff_abs / stats_df.diff_abs.sum()
    stats_df['share_before'] = 100* stats_df.before / stats_df.before.sum()
    stats_df['impact_coef'] = stats_df.share_in_diff/stats_df.share_before
    return stats_df.sort_values('impact_coef', ascending = False)

In [205]:
calculate_simple_growth_metrics(
    df.groupby('country')[['revenue_before', 'revenue_after_scenario_1']].sum()\
        .sort_values('revenue_before', ascending = False).rename(
            columns = {'revenue_after_scenario_1': 'after', 'revenue_before': 'before'}
        )
)

Metric change: 1731985.21 -> 1107924.43 (-36.03%)


Unnamed: 0_level_0,before,after,diff_abs,diff_rate,share_in_diff,share_before,impact_coef
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Spain,96281.86,55061.02,-41220.84,-42.812675,6.60526,5.559046,1.1882
Italy,120352.31,71306.71,-49045.6,-40.75169,7.859106,6.948807,1.131001
other,632767.39,381711.22,-251056.17,-39.675902,40.229442,36.534226,1.101144
Germany,160469.75,105955.86,-54513.89,-33.971443,8.735349,9.265076,0.942825
France,240704.63,160645.18,-80059.45,-33.260453,12.82879,13.897615,0.923093
UK,481409.27,333244.44,-148164.83,-30.777311,23.742051,27.79523,0.854177


In [206]:
calculate_simple_growth_metrics(
    df.groupby('country')[['revenue_before', 'revenue_after_scenario_2']].sum()\
        .sort_values('revenue_before', ascending = False).rename(
            columns = {'revenue_after_scenario_2': 'after', 'revenue_before': 'before'}
        )
)

Metric change: 1731985.21 -> 1599065.55 (-7.67%)


Unnamed: 0_level_0,before,after,diff_abs,diff_rate,share_in_diff,share_before,impact_coef
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
France,240704.63,107857.06,-132847.57,-55.191115,99.945764,13.897615,7.191577
UK,481409.27,477033.02,-4376.25,-0.90905,3.292402,27.79523,0.118452
Germany,160469.75,159778.76,-690.99,-0.430605,0.519855,9.265076,0.056109
Spain,96281.86,96064.77,-217.09,-0.225473,0.163324,5.559046,0.02938
other,632767.39,637000.48,4233.09,0.66898,-3.184698,36.534226,-0.08717
Italy,120352.31,121331.46,979.15,0.81357,-0.736648,6.948807,-0.106011


In [207]:
calculate_simple_growth_metrics(
    df.groupby('country')[['revenue_before', 'revenue_after_scenario_3']].sum()\
        .sort_values('revenue_before', ascending = False).rename(
            columns = {'revenue_after_scenario_3': 'after', 'revenue_before': 'before'}
        )
)

Metric change: 1731985.21 -> 1665375.27 (-3.85%)


Unnamed: 0_level_0,before,after,diff_abs,diff_rate,share_in_diff,share_before,impact_coef
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
France,240704.63,107857.06,-132847.57,-55.191115,199.441074,13.897615,14.350741
UK,481409.27,477033.02,-4376.25,-0.90905,6.569966,27.79523,0.23637
Germany,160469.75,159778.76,-690.99,-0.430605,1.037368,9.265076,0.111965
other,632767.39,637000.48,4233.09,0.66898,-6.355043,36.534226,-0.173948
Italy,120352.31,121331.46,979.15,0.81357,-1.469976,6.948807,-0.211544
Spain,96281.86,162374.495,66092.635,68.64495,-99.223389,5.559046,-17.848995


In [208]:
calculate_simple_growth_metrics(
    df.groupby(['country', 'maturity'])[['revenue_before', 'revenue_after_scenario_1']].sum()\
        .sort_values('revenue_before', ascending = False).rename(
            columns = {'revenue_after_scenario_1': 'after', 'revenue_before': 'before'}
        )
)

Metric change: 1731985.21 -> 1107924.43 (-36.03%)


Unnamed: 0_level_0,Unnamed: 1_level_0,before,after,diff_abs,diff_rate,share_in_diff,share_before,impact_coef
country,maturity,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Spain,new,32394.1,7758.9,-24635.2,-76.048416,3.947564,1.870345,2.110607
UK,new,128324.22,34838.87,-93485.35,-72.850901,14.980167,7.409083,2.021865
Germany,new,48105.83,13678.94,-34426.89,-71.564902,5.516592,2.777497,1.986174
France,new,57901.91,17443.06,-40458.85,-69.874811,6.483159,3.343095,1.939268
other,new,233958.42,72666.89,-161291.53,-68.940254,25.845484,13.508107,1.913331
Italy,new,36941.57,11615.29,-25326.28,-68.557671,4.058303,2.132903,1.902713
Italy,existing,83410.74,59691.42,-23719.32,-28.43677,3.800803,4.815904,0.789219
Spain,existing,63887.76,47302.12,-16585.64,-25.960591,2.657696,3.688701,0.720496
other,existing,398808.97,309044.33,-89764.64,-22.50818,14.383958,23.026119,0.62468
France,existing,182802.72,143202.12,-39600.6,-21.663026,6.345632,10.55452,0.601224


In [209]:
calculate_simple_growth_metrics(
    df.groupby(['country', 'maturity'])[['revenue_before', 'revenue_after_scenario_3']].sum()\
        .sort_values('revenue_before', ascending = False).rename(
            columns = {'revenue_after_scenario_3': 'after', 'revenue_before': 'before'}
        )
)

Metric change: 1731985.21 -> 1665375.27 (-3.85%)


Unnamed: 0_level_0,Unnamed: 1_level_0,before,after,diff_abs,diff_rate,share_in_diff,share_before,impact_coef
country,maturity,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
France,existing,182802.72,50183.27,-132619.45,-72.547854,199.098603,10.55452,18.863824
UK,existing,353085.05,349728.15,-3356.9,-0.950734,5.039639,20.386147,0.247209
UK,new,128324.22,127304.87,-1019.35,-0.794355,1.530327,7.409083,0.206547
Germany,new,48105.83,47795.22,-310.61,-0.645681,0.466312,2.777497,0.167889
France,new,57901.91,57673.79,-228.12,-0.393977,0.342471,3.343095,0.102441
Germany,existing,112363.92,111983.54,-380.38,-0.338525,0.571056,6.48758,0.088023
Italy,new,36941.57,36893.3,-48.27,-0.130666,0.072467,2.132903,0.033976
other,new,233958.42,235306.99,1348.57,0.576414,-2.024578,13.508107,-0.149879
other,existing,398808.97,401693.49,2884.52,0.723284,-4.330465,23.026119,-0.188068
Spain,new,32394.1,32674.67,280.57,0.866115,-0.421213,1.870345,-0.225206


In [217]:
df[['country', 'maturity', 'revenue_before', 'revenue_after_scenario_1', 
    'revenue_after_scenario_2', 'revenue_after_scenario_3']].to_csv('absolute_metrics_example.csv', index = False, sep = '\t')

## Conversion
### Generating dataset

In [243]:
conv_df = df[['country', 'maturity', 'revenue_before']].copy()

In [244]:
conv_df['users_before'] = conv_df.revenue_before.map(lambda x: int(x/18))

In [245]:
def get_conversion_before(maturity):
    if maturity == 'new': 
        return np.random.normal(0.1, 0.05)
    return np.random.normal(0.75, 0.1)

In [246]:
conv_df['conversion_before'] = conv_df.maturity.map(get_conversion_before)

In [247]:
conv_df['converted_users_before'] = (conv_df.conversion_before * conv_df.users_before).map(int)

In [248]:
conv_df['users_after_scenario_1'] = list(map(
    lambda x: int(np.random.normal(1, 0.02) * x),
    conv_df.users_before
))

In [249]:
def get_conversion_after_scenario_1(maturity, conv_before):
    if maturity == 'new': 
        return conv_before + np.random.normal(0.2, 0.005)
    return conv_before + np.random.normal(0, 0.005)

In [250]:
conv_df['conversion_after_scenario_1'] = list(map(
    get_conversion_after_scenario_1,
    conv_df.maturity,
    conv_df.conversion_before
))

In [251]:
conv_df['converted_users_after_scenario_1'] = (conv_df.conversion_after_scenario_1 * conv_df.users_after_scenario_1).map(int)

In [252]:
def calculate_effects(df, numerator_field1, denominator_field1, 
                       numerator_field2, denominator_field2, dimensions):
    cmp_df = df.groupby(dimensions)[[numerator_field1, denominator_field1, numerator_field2, denominator_field2]].sum()
    cmp_df = cmp_df.rename(columns = {
        numerator_field1: 'j1', 
        numerator_field2: 'j2',
        denominator_field1: 'f1', 
        denominator_field2: 'f2'
    })
    cmp_df['prev_rate'] = cmp_df['j1']/cmp_df['f1']
    cmp_df['curr_rate'] = cmp_df['j2']/cmp_df['f2']
    
    y1 = cmp_df['j1'].sum()
    a1 = cmp_df['f1'].sum()
    y2 = cmp_df['j2'].sum()
    a2 = cmp_df['f2'].sum()
    
    cmp_df['df'] = (a1*cmp_df.f2 - a2*cmp_df.f1)/(a2 - cmp_df.f2)
    cmp_df['total_effect'] = (y1 - cmp_df.j1 + (cmp_df.f1 + cmp_df.df)*cmp_df.curr_rate)/(a1 + cmp_df.df) - y1/a1
    cmp_df['mix_change_effect'] = (y1 + cmp_df.df*cmp_df.prev_rate)/(a1 + cmp_df.df) - y1/a1
    cmp_df['conversion_change_effect'] = (cmp_df.f1*cmp_df.j2 - cmp_df.f2*cmp_df.j1)/(a1 * cmp_df.f2)
    
    for col in ['total_effect', 'mix_change_effect', 'conversion_change_effect', 'curr_rate', 'prev_rate']:
        cmp_df[col] = 100*cmp_df[col]
        
    cmp_df['success_rate_diff'] = cmp_df.curr_rate - cmp_df.prev_rate
    cmp_df['prev_dim_share'] = 100*cmp_df.f1/a1
    cmp_df['curr_dim_share'] = 100*cmp_df.f2/a2
    cmp_df = cmp_df[['f1', 'f2', 'prev_dim_share', 'curr_dim_share', 'prev_rate', 'curr_rate', 'success_rate_diff',
                     'total_effect', 'mix_change_effect', 'conversion_change_effect']]
    print('previous success rate = %.2f' % (100*y1/a1))
    print('current success rate = %.2f' % (100*y2/a2))
    print('total success rate change = %.2f' % (100*(y2/a2 - y1/a1)))
    return cmp_df

In [253]:
conv_df

Unnamed: 0,country,maturity,revenue_before,users_before,conversion_before,converted_users_before,users_after_scenario_1,conversion_after_scenario_1,converted_users_after_scenario_1
0,UK,new,128324.22,7129,0.140998,1005,7284,0.332614,2422
1,UK,existing,353085.05,19615,0.762442,14955,19670,0.765816,15063
2,France,new,57901.91,3216,0.066715,214,3324,0.270944,900
3,France,existing,182802.72,10155,0.705992,7169,10310,0.710888,7329
4,Germany,new,48105.83,2672,0.087291,233,2691,0.297247,799
5,Germany,existing,112363.92,6242,0.808362,5045,6452,0.815428,5261
6,Italy,new,36941.57,2052,0.113624,233,2014,0.308563,621
7,Italy,existing,83410.74,4633,0.676222,3132,4587,0.678528,3112
8,Spain,new,32394.1,1799,0.09378,168,1728,0.29104,502
9,Spain,existing,63887.76,3549,0.752769,2671,3630,0.756325,2745


In [254]:
calculate_effects(
    conv_df, 'converted_users_before', 'users_before', 
    'converted_users_after_scenario_1', 'users_after_scenario_1',
    'country'
)

previous success rate = 50.03
current success rate = 56.29
total success rate change = 6.27


Unnamed: 0_level_0,f1,f2,prev_dim_share,curr_dim_share,prev_rate,curr_rate,success_rate_diff,total_effect,mix_change_effect,conversion_change_effect
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
France,13371,13634,13.897002,14.078021,55.216513,60.356462,5.139948,0.734516,0.010913,0.714299
Germany,8914,9143,9.264668,9.440762,59.210231,66.280214,7.069983,0.685286,0.017825,0.65501
Italy,6685,6601,6.947981,6.815976,50.336574,56.552038,6.215463,0.423203,-0.000441,0.431849
Spain,5348,5358,5.558385,5.532495,53.085266,60.600971,7.515705,0.414967,-0.000839,0.417752
UK,26744,26954,27.796082,27.831815,59.676937,64.869778,5.192841,1.450039,0.004777,1.443406
other,35153,35156,36.535883,36.300931,37.854522,44.837297,6.982775,2.579871,0.045058,2.551219


In [255]:
calculate_effects(
    conv_df, 'converted_users_before', 'users_before', 
    'converted_users_after_scenario_1', 'users_after_scenario_1',
    'maturity'
)

previous success rate = 50.03
current success rate = 56.29
total success rate change = 6.27


Unnamed: 0_level_0,f1,f2,prev_dim_share,curr_dim_share,prev_rate,curr_rate,success_rate_diff,total_effect,mix_change_effect,conversion_change_effect
maturity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
existing,66350,66411,68.960141,68.573818,69.862849,70.336239,0.473391,0.077725,-0.246897,0.326451
new,29865,30435,31.039859,31.426182,5.953457,25.648103,19.694645,5.942378,-0.246897,6.11319


In [256]:
conv_df['users_after_scenario_2'] = list(map(
    lambda x, y, z: int(z) if ((x == 'existing') or (y != 'UK')) else int(np.random.normal(10, 0.01)*z),
    conv_df.maturity,
    conv_df.country,
    conv_df.users_after_scenario_1
))

In [257]:
conv_df['conversion_after_scenario_2'] = conv_df.conversion_before.map(
    lambda x: x + np.random.normal(0.1, 0.01)
)

In [258]:
conv_df['converted_users_after_scenario_2'] = (conv_df['users_after_scenario_2'] * conv_df.conversion_after_scenario_2).map(int)

In [259]:
calculate_effects(
    conv_df, 'converted_users_before', 'users_before', 
    'converted_users_after_scenario_2', 'users_after_scenario_2',
    'country'
)

previous success rate = 50.03
current success rate = 45.05
total success rate change = -4.97


Unnamed: 0_level_0,f1,f2,prev_dim_share,curr_dim_share,prev_rate,curr_rate,success_rate_diff,total_effect,mix_change_effect,conversion_change_effect
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
France,13371,13634,13.897002,8.396768,55.216513,62.014082,6.797569,0.239173,-0.331603,0.944658
Germany,8914,9143,9.264668,5.630897,59.210231,69.791097,10.580866,0.227966,-0.367832,0.980282
Italy,6685,6601,6.947981,4.065356,50.336574,59.869717,9.533142,0.377918,-0.009638,0.662361
Spain,5348,5358,5.558385,3.29983,53.085266,62.896603,9.811338,0.250583,-0.073175,0.545352
UK,26744,92480,27.796082,56.955633,59.676937,37.029628,-22.647309,-9.001171,3.897747,-6.295064
other,35153,35156,36.535883,21.651516,37.854522,47.641939,9.787417,4.9736,2.854476,3.575919


In [260]:
calculate_effects(
    conv_df, 'converted_users_before', 'users_before', 
    'converted_users_after_scenario_2', 'users_after_scenario_2',
    ['maturity', 'country']
).reset_index()

previous success rate = 50.03
current success rate = 45.05
total success rate change = -4.97


Unnamed: 0,maturity,country,f1,f2,prev_dim_share,curr_dim_share,prev_rate,curr_rate,success_rate_diff,total_effect,mix_change_effect,conversion_change_effect
0,existing,France,10155,10310,10.554487,6.349617,70.595766,77.371484,6.775718,-0.536786,-0.967018,0.715142
1,existing,Germany,6242,6452,6.487554,3.973592,80.823454,91.382517,10.559063,-0.408391,-0.827965,0.685025
2,existing,Italy,4633,4587,4.815257,2.824994,67.601986,76.913015,9.311029,-0.10448,-0.367516,0.44835
3,existing,Spain,3549,3630,3.688614,2.235607,75.260637,83.884298,8.623661,-0.187921,-0.380712,0.318094
4,existing,UK,19615,19670,20.386634,12.114158,76.242671,85.363498,9.120826,-1.61927,-2.724181,1.859429
5,existing,other,22156,21762,23.027594,13.402557,60.398989,69.929234,9.530245,-0.019864,-1.297161,2.194586
6,new,France,3216,3324,3.342514,2.047151,6.654229,14.380265,7.726036,0.739407,0.581243,0.258244
7,new,Germany,2672,2691,2.777114,1.657305,8.72006,18.02304,9.30298,0.629932,0.475754,0.258354
8,new,Italy,2052,2014,2.132724,1.240362,11.354776,21.052632,9.697856,0.472891,0.352603,0.206828
9,new,Spain,1799,1728,1.869771,1.064223,9.338521,18.80787,9.469349,0.434773,0.333998,0.177055


In [261]:
conv_df[['country', 'maturity', 'users_before', 'converted_users_before',
         'users_after_scenario_1', 'converted_users_after_scenario_1',
         'users_after_scenario_2', 'converted_users_after_scenario_2']].to_csv('conversion_metrics_example.csv', index = False, sep = '\t')