In [182]:
import pandas as pd 
import numpy as np 
import random

## Revenue
### Generating dataset

In [183]:
base_revenue = 1731985.22

In [184]:
countries = ['UK', 'France', 'Germany', 'Italy', 'Spain'] 
total_countries = 20 

In [185]:
base_df = pd.DataFrame({'country': countries})

In [186]:
base_df['number'] = list(map(lambda x: x + 1, range(base_df.shape[0])))

In [187]:
base_df['weight'] = 1/base_df.number

In [188]:
other_weight = 0 
for i in range(base_df.shape[0] + 1, total_countries + 1):
    other_weight += 1/i # modelling Zipf distribution

In [189]:
base_df = pd.concat([base_df.drop('number', axis = 1), pd.DataFrame([{'country': 'other', 'weight': other_weight}])], 
         axis = 0)

In [190]:
base_df['weight'] = base_df.weight / base_df.weight.sum()

In [191]:
base_df['total_revenue'] = base_df.weight * base_revenue

In [192]:
base_df['new_users_share'] = list(map(
    lambda x: np.random.normal(0.3, 0.05), 
                                 base_df.weight))

In [193]:
base_df['new_users_revenue'] = base_df.total_revenue * base_df.new_users_share 
base_df['existing_users_revenue'] = base_df.total_revenue * (1 - base_df.new_users_share)

In [194]:
tmp = []

for rec in base_df.to_dict('records'):
    tmp.append(
        {
            'country': rec['country'],
            'maturity': 'new', 
            'revenue': rec['new_users_revenue']
        }
    )

    tmp.append(
        {
            'country': rec['country'],
            'maturity': 'existing', 
            'revenue': rec['existing_users_revenue']
        }
    )

In [195]:
df = pd.DataFrame(tmp)

In [196]:
df['revenue'] = df.revenue.map(lambda x: round(x, 2))

In [197]:
df = df.rename(columns = {'revenue': 'revenue_before'})

In [198]:
df

Unnamed: 0,country,maturity,revenue_before
0,UK,new,128324.22
1,UK,existing,353085.05
2,France,new,57901.91
3,France,existing,182802.72
4,Germany,new,48105.83
5,Germany,existing,112363.92
6,Italy,new,36941.57
7,Italy,existing,83410.74
8,Spain,new,32394.1
9,Spain,existing,63887.76


In [199]:
def get_weight_scenario_1(country, maturity):
    if maturity == 'new': 
        return np.random.normal(0.3, 0.05) 
    return np.random.normal(0.8, 0.05)

df['weight_scenario_1'] = list(map(
    get_weight_scenario_1,
    df.country,
    df.maturity
))

In [200]:
def get_weight_scenario_2(country, maturity):
    if (maturity == 'existing') and (country == 'France'): 
        return np.random.normal(0.3, 0.05) 
    return np.random.normal(1, 0.01)

df['weight_scenario_2'] = list(map(
    get_weight_scenario_2,
    df.country,
    df.maturity
))

In [201]:
df['revenue_after_scenario_1'] = (df.weight_scenario_1 * df.revenue_before).map(lambda x: round(x, 2))
df['revenue_after_scenario_2'] = (df.weight_scenario_2 * df.revenue_before).map(lambda x: round(x, 2))

In [202]:
lost_revenue = df[(df.country == 'France') & (df.maturity == 'existing')].iloc[0].revenue_before - df[(df.country == 'France') & (df.maturity == 'existing')].iloc[0].revenue_after_scenario_2

In [203]:
def get_scenario_3(country, maturity, base_revenue):
    if (maturity == 'existing') and (country == 'Spain'):
        return base_revenue + 0.5*lost_revenue
    return base_revenue

df['revenue_after_scenario_3'] = list(map(
    get_scenario_3,
    df.country, 
    df.maturity, 
    df.revenue_after_scenario_2))

### Analysis

In [204]:
def calculate_simple_growth_metrics(stats_df):
    before = stats_df.before.sum()
    after = stats_df.after.sum()
    print('Metric change: %.2f -> %.2f (%.2f%%)' % (before, after, 100*(after - before)/before))
    stats_df['diff_abs'] = stats_df.after - stats_df.before
    stats_df['diff_rate'] = 100*stats_df.diff_abs/stats_df.before
    stats_df['share_in_diff'] = 100*stats_df.diff_abs / stats_df.diff_abs.sum()
    stats_df['share_before'] = 100* stats_df.before / stats_df.before.sum()
    stats_df['impact_coef'] = stats_df.share_in_diff/stats_df.share_before
    return stats_df.sort_values('impact_coef', ascending = False)

In [205]:
calculate_simple_growth_metrics(
    df.groupby('country')[['revenue_before', 'revenue_after_scenario_1']].sum()\
        .sort_values('revenue_before', ascending = False).rename(
            columns = {'revenue_after_scenario_1': 'after', 'revenue_before': 'before'}
        )
)

Metric change: 1731985.21 -> 1107924.43 (-36.03%)


Unnamed: 0_level_0,before,after,diff_abs,diff_rate,share_in_diff,share_before,impact_coef
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Spain,96281.86,55061.02,-41220.84,-42.812675,6.60526,5.559046,1.1882
Italy,120352.31,71306.71,-49045.6,-40.75169,7.859106,6.948807,1.131001
other,632767.39,381711.22,-251056.17,-39.675902,40.229442,36.534226,1.101144
Germany,160469.75,105955.86,-54513.89,-33.971443,8.735349,9.265076,0.942825
France,240704.63,160645.18,-80059.45,-33.260453,12.82879,13.897615,0.923093
UK,481409.27,333244.44,-148164.83,-30.777311,23.742051,27.79523,0.854177


In [206]:
calculate_simple_growth_metrics(
    df.groupby('country')[['revenue_before', 'revenue_after_scenario_2']].sum()\
        .sort_values('revenue_before', ascending = False).rename(
            columns = {'revenue_after_scenario_2': 'after', 'revenue_before': 'before'}
        )
)

Metric change: 1731985.21 -> 1599065.55 (-7.67%)


Unnamed: 0_level_0,before,after,diff_abs,diff_rate,share_in_diff,share_before,impact_coef
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
France,240704.63,107857.06,-132847.57,-55.191115,99.945764,13.897615,7.191577
UK,481409.27,477033.02,-4376.25,-0.90905,3.292402,27.79523,0.118452
Germany,160469.75,159778.76,-690.99,-0.430605,0.519855,9.265076,0.056109
Spain,96281.86,96064.77,-217.09,-0.225473,0.163324,5.559046,0.02938
other,632767.39,637000.48,4233.09,0.66898,-3.184698,36.534226,-0.08717
Italy,120352.31,121331.46,979.15,0.81357,-0.736648,6.948807,-0.106011


In [207]:
calculate_simple_growth_metrics(
    df.groupby('country')[['revenue_before', 'revenue_after_scenario_3']].sum()\
        .sort_values('revenue_before', ascending = False).rename(
            columns = {'revenue_after_scenario_3': 'after', 'revenue_before': 'before'}
        )
)

Metric change: 1731985.21 -> 1665375.27 (-3.85%)


Unnamed: 0_level_0,before,after,diff_abs,diff_rate,share_in_diff,share_before,impact_coef
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
France,240704.63,107857.06,-132847.57,-55.191115,199.441074,13.897615,14.350741
UK,481409.27,477033.02,-4376.25,-0.90905,6.569966,27.79523,0.23637
Germany,160469.75,159778.76,-690.99,-0.430605,1.037368,9.265076,0.111965
other,632767.39,637000.48,4233.09,0.66898,-6.355043,36.534226,-0.173948
Italy,120352.31,121331.46,979.15,0.81357,-1.469976,6.948807,-0.211544
Spain,96281.86,162374.495,66092.635,68.64495,-99.223389,5.559046,-17.848995


In [208]:
calculate_simple_growth_metrics(
    df.groupby(['country', 'maturity'])[['revenue_before', 'revenue_after_scenario_1']].sum()\
        .sort_values('revenue_before', ascending = False).rename(
            columns = {'revenue_after_scenario_1': 'after', 'revenue_before': 'before'}
        )
)

Metric change: 1731985.21 -> 1107924.43 (-36.03%)


Unnamed: 0_level_0,Unnamed: 1_level_0,before,after,diff_abs,diff_rate,share_in_diff,share_before,impact_coef
country,maturity,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Spain,new,32394.1,7758.9,-24635.2,-76.048416,3.947564,1.870345,2.110607
UK,new,128324.22,34838.87,-93485.35,-72.850901,14.980167,7.409083,2.021865
Germany,new,48105.83,13678.94,-34426.89,-71.564902,5.516592,2.777497,1.986174
France,new,57901.91,17443.06,-40458.85,-69.874811,6.483159,3.343095,1.939268
other,new,233958.42,72666.89,-161291.53,-68.940254,25.845484,13.508107,1.913331
Italy,new,36941.57,11615.29,-25326.28,-68.557671,4.058303,2.132903,1.902713
Italy,existing,83410.74,59691.42,-23719.32,-28.43677,3.800803,4.815904,0.789219
Spain,existing,63887.76,47302.12,-16585.64,-25.960591,2.657696,3.688701,0.720496
other,existing,398808.97,309044.33,-89764.64,-22.50818,14.383958,23.026119,0.62468
France,existing,182802.72,143202.12,-39600.6,-21.663026,6.345632,10.55452,0.601224


In [209]:
calculate_simple_growth_metrics(
    df.groupby(['country', 'maturity'])[['revenue_before', 'revenue_after_scenario_3']].sum()\
        .sort_values('revenue_before', ascending = False).rename(
            columns = {'revenue_after_scenario_3': 'after', 'revenue_before': 'before'}
        )
)

Metric change: 1731985.21 -> 1665375.27 (-3.85%)


Unnamed: 0_level_0,Unnamed: 1_level_0,before,after,diff_abs,diff_rate,share_in_diff,share_before,impact_coef
country,maturity,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
France,existing,182802.72,50183.27,-132619.45,-72.547854,199.098603,10.55452,18.863824
UK,existing,353085.05,349728.15,-3356.9,-0.950734,5.039639,20.386147,0.247209
UK,new,128324.22,127304.87,-1019.35,-0.794355,1.530327,7.409083,0.206547
Germany,new,48105.83,47795.22,-310.61,-0.645681,0.466312,2.777497,0.167889
France,new,57901.91,57673.79,-228.12,-0.393977,0.342471,3.343095,0.102441
Germany,existing,112363.92,111983.54,-380.38,-0.338525,0.571056,6.48758,0.088023
Italy,new,36941.57,36893.3,-48.27,-0.130666,0.072467,2.132903,0.033976
other,new,233958.42,235306.99,1348.57,0.576414,-2.024578,13.508107,-0.149879
other,existing,398808.97,401693.49,2884.52,0.723284,-4.330465,23.026119,-0.188068
Spain,new,32394.1,32674.67,280.57,0.866115,-0.421213,1.870345,-0.225206


In [217]:
df[['country', 'maturity', 'revenue_before', 'revenue_after_scenario_1', 
    'revenue_after_scenario_2', 'revenue_after_scenario_3']].to_csv('absolute_metrics_example.csv', index = False, sep = '\t')

## Conversion
### Generating dataset

In [148]:
conv_df = df[['country', 'maturity', 'revenue_before']].copy()

In [149]:
conv_df['users_before'] = conv_df.revenue_before.map(lambda x: int(x/18))

In [151]:
def get_conversion_before(maturity):
    if maturity == 'new': 
        return np.random.normal(0.1, 0.05)
    return np.random.normal(0.75, 0.1)

In [152]:
conv_df['conversion_before'] = conv_df.maturity.map(get_conversion_before)

In [153]:
conv_df['converted_users_before'] = (conv_df.conversion_before * conv_df.users_before).map(int)

In [154]:
conv_df['users_after_scenario_1'] = list(map(
    lambda x: int(np.random.normal(1, 0.02) * x),
    conv_df.users_before
))

In [160]:
def get_conversion_after_scenario_1(maturity, conv_before):
    if maturity == 'new': 
        return conv_before + np.random.normal(0.2, 0.005)
    return conv_before + np.random.normal(0, 0.005)

In [161]:
conv_df['conversion_after_scenario_1'] = list(map(
    get_conversion_after_scenario_1,
    conv_df.maturity,
    conv_df.conversion_before
))

In [162]:
conv_df['converted_users_after_scenario_1'] = (conv_df.conversion_after_scenario_1 * conv_df.users_after_scenario_1).map(int)

In [163]:
def calculate_effects(df, numerator_field1, denominator_field1, 
                       numerator_field2, denominator_field2, dimensions):
    cmp_df = df.groupby(dimensions)[[numerator_field1, denominator_field1, numerator_field2, denominator_field2]].sum()
    cmp_df = cmp_df.rename(columns = {
        numerator_field1: 'j1', 
        numerator_field2: 'j2',
        denominator_field1: 'f1', 
        denominator_field2: 'f2'
    })
    cmp_df['prev_rate'] = cmp_df['j1']/cmp_df['f1']
    cmp_df['curr_rate'] = cmp_df['j2']/cmp_df['f2']
    
    y1 = cmp_df['j1'].sum()
    a1 = cmp_df['f1'].sum()
    y2 = cmp_df['j2'].sum()
    a2 = cmp_df['f2'].sum()
    
    cmp_df['df'] = (a1*cmp_df.f2 - a2*cmp_df.f1)/(a2 - cmp_df.f2)
    cmp_df['total_effect'] = (y1 - cmp_df.j1 + (cmp_df.f1 + cmp_df.df)*cmp_df.curr_rate)/(a1 + cmp_df.df) - y1/a1
    cmp_df['mix_change_effect'] = (y1 + cmp_df.df*cmp_df.prev_rate)/(a1 + cmp_df.df) - y1/a1
    cmp_df['conversion_change_effect'] = (cmp_df.f1*cmp_df.j2 - cmp_df.f2*cmp_df.j1)/(a1 * cmp_df.f2)
    
    for col in ['total_effect', 'mix_change_effect', 'conversion_change_effect', 'curr_rate', 'prev_rate']:
        cmp_df[col] = 100*cmp_df[col]
        
    cmp_df['success_rate_diff'] = cmp_df.curr_rate - cmp_df.prev_rate
    cmp_df['prev_dim_share'] = 100*cmp_df.f1/a1
    cmp_df['curr_dim_share'] = 100*cmp_df.f2/a2
    cmp_df = cmp_df[['f1', 'f2', 'prev_dim_share', 'curr_dim_share', 'prev_rate', 'curr_rate', 'success_rate_diff',
                     'total_effect', 'mix_change_effect', 'conversion_change_effect']]
    print('previous success rate = %.2f' % (100*y1/a1))
    print('current success rate = %.2f' % (100*y2/a2))
    print('total success rate change = %.2f' % (100*(y2/a2 - y1/a1)))
    return cmp_df

In [164]:
conv_df

Unnamed: 0,country,maturity,revenue_before,users_before,conversion_before,converted_users_before,users_after_scenario_1,conversion_after_scenario_1,converted_users_after_scenario_1
0,UK,new,149901.33,8327,0.1037,863,8733,0.296876,2592
1,UK,existing,331507.94,18417,0.768544,14154,18636,0.764442,14246
2,France,new,65190.54,3621,0.048991,177,3675,0.256862,943
3,France,existing,175514.1,9750,0.789064,7693,9633,0.794389,7652
4,Germany,new,44149.35,2452,0.066169,162,2507,0.255287,640
5,Germany,existing,116320.4,6462,0.612614,3958,6427,0.617801,3970
6,Italy,new,53094.75,2949,0.102288,301,2923,0.302232,883
7,Italy,existing,67257.57,3736,0.675568,2523,3492,0.668699,2335
8,Spain,new,23808.57,1322,0.130463,172,1315,0.32205,423
9,Spain,existing,72473.28,4026,0.660402,2658,4137,0.664724,2749


In [165]:
calculate_effects(
    conv_df, 'converted_users_before', 'users_before', 
    'converted_users_after_scenario_1', 'users_after_scenario_1',
    'country'
)

previous success rate = 55.62
current success rate = 61.41
total success rate change = 5.80


Unnamed: 0_level_0,f1,f2,prev_dim_share,curr_dim_share,prev_rate,curr_rate,success_rate_diff,total_effect,mix_change_effect,conversion_change_effect
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
France,13371,13308,13.897002,13.782961,58.858724,64.585212,5.726488,0.784983,-0.004296,0.79581
Germany,8914,8934,9.264668,9.252853,46.21943,51.600627,5.381197,0.499138,0.001223,0.49855
Italy,6685,6415,6.947981,6.643951,42.243829,50.163679,7.919849,0.569879,0.043688,0.55027
Spain,5348,5452,5.558385,5.646581,52.916978,58.180484,5.263506,0.294689,-0.00252,0.292566
UK,26744,27369,27.796082,28.345796,56.150912,61.52216,5.371248,1.526603,0.00408,1.492996
other,35153,35076,36.535883,36.327858,59.309305,65.17847,5.869165,2.120033,-0.012109,2.144351


In [167]:
calculate_effects(
    conv_df, 'converted_users_before', 'users_before', 
    'converted_users_after_scenario_1', 'users_after_scenario_1',
    'maturity'
)

previous success rate = 55.62
current success rate = 61.41
total success rate change = 5.80


Unnamed: 0_level_0,f1,f2,prev_dim_share,curr_dim_share,prev_rate,curr_rate,success_rate_diff,total_effect,mix_change_effect,conversion_change_effect
maturity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
existing,66459,66534,69.073429,68.90859,77.024933,76.858448,-0.166484,-0.228837,-0.114115,-0.114996
new,29756,30020,30.926571,31.09141,7.796747,27.175217,19.37847,5.910924,-0.114115,5.993096


In [172]:
conv_df['users_after_scenario_2'] = list(map(
    lambda x, y, z: int(z) if ((x == 'existing') or (y != 'UK')) else int(np.random.normal(10, 0.01)*z),
    conv_df.maturity,
    conv_df.country,
    conv_df.users_after_scenario_1
))

In [176]:
conv_df['conversion_after_scenario_2'] = conv_df.conversion_before.map(
    lambda x: x + np.random.normal(0.1, 0.01)
)

In [177]:
conv_df['converted_users_after_scenario_2'] = (conv_df['users_after_scenario_2'] * conv_df.conversion_after_scenario_2).map(int)

In [178]:
calculate_effects(
    conv_df, 'converted_users_before', 'users_before', 
    'converted_users_after_scenario_2', 'users_after_scenario_2',
    'country'
)

previous success rate = 55.62
current success rate = 44.72
total success rate change = -10.90


Unnamed: 0_level_0,f1,f2,prev_dim_share,curr_dim_share,prev_rate,curr_rate,success_rate_diff,total_effect,mix_change_effect,conversion_change_effect
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
France,13371,13308,13.897002,7.598536,58.858724,68.770664,9.91194,0.515885,-0.237277,1.377462
Germany,8914,8934,9.264668,5.101091,46.21943,56.805462,10.586032,0.971139,0.431136,0.980761
Italy,6685,6415,6.947981,3.662805,42.243829,50.631333,8.387503,0.779284,0.472067,0.582762
Spain,5348,5452,5.558385,3.112956,52.916978,62.362436,9.445457,0.363895,0.069862,0.525015
UK,26744,105954,27.796082,60.497091,56.150912,31.033279,-25.117634,-14.952737,0.242701,-6.981718
other,35153,35076,36.535883,20.027521,59.309305,70.027939,10.718634,1.185717,-0.96096,3.916148


In [180]:
calculate_effects(
    conv_df, 'converted_users_before', 'users_before', 
    'converted_users_after_scenario_2', 'users_after_scenario_2',
    ['maturity', 'country']
).reset_index()

previous success rate = 55.62
current success rate = 44.72
total success rate change = -10.90


Unnamed: 0,maturity,country,f1,f2,prev_dim_share,curr_dim_share,prev_rate,curr_rate,success_rate_diff,total_effect,mix_change_effect,conversion_change_effect
0,existing,France,9750,9633,10.133555,5.500203,78.902564,90.055019,11.152455,-0.587256,-1.200663,1.13014
1,existing,Germany,6462,6427,6.716208,3.669657,61.250387,72.599969,11.349582,0.232446,-0.184045,0.762262
2,existing,Italy,3736,3492,3.88297,1.993845,67.53212,76.059565,8.527445,-0.0642,-0.234224,0.331118
3,existing,Spain,4026,4137,4.184379,2.362124,66.020864,74.40174,8.380876,6.5e-05,-0.197902,0.350688
4,existing,UK,18417,18636,19.141506,10.640691,76.852908,86.526079,9.673171,-1.203488,-2.232781,1.851591
5,existing,other,24068,24209,25.014811,13.822735,83.945488,94.345078,10.39959,-2.791016,-4.228523,2.601438
6,new,France,3621,3675,3.763446,2.098333,4.888152,12.979592,8.091439,1.047477,0.877691,0.304517
7,new,Germany,2452,2507,2.548459,1.431434,6.606852,16.31432,9.707468,0.700705,0.561749,0.247391
8,new,Italy,2949,2923,3.065011,1.66896,10.20685,20.253165,10.046315,0.821634,0.653965,0.307921
9,new,Spain,1322,1315,1.374006,0.750832,13.01059,24.486692,11.476102,0.355365,0.269199,0.157682


In [216]:
conv_df[['country', 'maturity', 'users_before', 'converted_users_before',
         'users_after_scenario_1', 'converted_users_after_scenario_1',
         'users_after_scenario_2', 'converted_users_after_scenario_2']].to_csv('conversion_metrics_example.csv', index = False, sep = '\t')