In [159]:
import pandas as pd
import numpy as np

### Country stats

In [160]:
# net annual earnings - https://doi.org/10.2908/EARN_NT_NET

In [161]:
earnings_df = pd.read_csv('net_earnings_eu.tsv', sep = '\t')[['freq,currency,estruct,ecase,geo\TIME_PERIOD', '2022 ']]
earnings_df.columns = ['segment', 'country_avg_annual_earning']
earnings_df['country_code'] = earnings_df['segment'].map(lambda x: x.split(',')[-1])
earnings_df = earnings_df.drop('segment', axis = 1)
earnings_df = earnings_df[['country_code', 'country_avg_annual_earning']]

In [162]:
# population - https://doi.org/10.2908/TPS00001

In [163]:
population_df = pd.read_csv('population.tsv', sep = '\t')[['freq,indic_de,geo\TIME_PERIOD', '2023 ']]
population_df.columns = ['segment', 'country_population']
population_df['country_code'] = population_df['segment'].map(lambda x: x.split(',')[-1])
population_df = population_df.drop('segment', axis = 1)
population_df = population_df[['country_code', 'country_population']]

In [164]:
iso_df = pd.read_csv('iso_codes.txt')[['name', 'alpha-2']]
iso_df.columns = ['country', 'country_code']

In [165]:
iso_df.head()

Unnamed: 0,country,country_code
0,Afghanistan,AF
1,Åland Islands,AX
2,Albania,AL
3,Algeria,DZ
4,American Samoa,AS


In [166]:
country_df = earnings_df.merge(population_df)

In [167]:
country_df = country_df[(country_df.country_avg_annual_earning != ': ') & (country_df.country_population != ': ')]

In [168]:
country_df = country_df[~country_df.country_code.isin(['EA19', 'EA20', 'EU27_2020'])]

In [169]:
country_df['country_avg_annual_earning'] = country_df.country_avg_annual_earning.map(
    lambda x: float(x.split(' ')[0])
)

country_df['country_population'] = country_df.country_population.map(
    lambda x: int(x.split(' ')[0])
)

In [170]:
country_df = country_df.merge(iso_df, how = 'left')

In [171]:
country_df.shape

(31, 4)

In [172]:
2**93

9903520314283042199192993792

In [173]:
total_users = 30000

In [174]:
country_df.head()

Unnamed: 0,country_code,country_avg_annual_earning,country_population,country
0,AT,68690.65,9104772,Austria
1,BE,70297.58,11754004,Belgium
2,BG,12923.66,6447710,Bulgaria
3,CH,106839.33,8812728,Switzerland
4,CY,27263.2,920701,Cyprus


### Generate stats

In [175]:
population_coefs = {
    'search': [0.0003, 0.00003],
    'social networks': [0.0003, 0.00003],
    'influencers': [0.00005, 0.0002]
}

ltv_coefs = {
    'search': 1.0,
    'social networks': 0.8, 
    'influencers': 1.5
}

In [177]:
tmp = []

for marketing_channel in ['social networks', 'influencers']:
    tmp_df = country_df.copy()
    tmp_df['users'] = tmp_df.country_population.map(
        lambda x: int(x*np.random.normal(population_coefs[marketing_channel][0], population_coefs[marketing_channel][1]))
    )

    tmp_df['users'] = tmp_df.users.map(
        lambda x: x if x > 0 else int(np.random.normal(100, 10))
    )

    tmp_df['ltv'] = tmp_df.country_avg_annual_earning.map(
        lambda x: round(ltv_coefs[marketing_channel]*x*np.random.normal(0.03, 0.01), 2)
    )

    tmp_df['ltv'] = list(map(
        lambda x, y: x if x > 0 else 0.03*y*ltv_coefs[marketing_channel],
        tmp_df.ltv,
        tmp_df.country_avg_annual_earning
    ))

    tmp_df['cac'] = tmp_df.ltv.map(
        lambda x: round(x*np.random.normal(0.3, 0.05), 2)
    )

    tmp_df['cs_contacts'] = tmp_df.users.map(
        lambda x: int(x*np.random.normal(0.05, 0.02))
    )
    
    tmp_df['channel'] = marketing_channel
    tmp.append(tmp_df)

In [178]:
fin_df = pd.concat(tmp)

In [179]:
fin_df[fin_df.ltv < 0]

Unnamed: 0,country_code,country_avg_annual_earning,country_population,country,users,ltv,cac,cs_contacts,channel


In [180]:
fin_df[fin_df.cac < 0]

Unnamed: 0,country_code,country_avg_annual_earning,country_population,country,users,ltv,cac,cs_contacts,channel


In [181]:
fin_df[fin_df.cs_contacts < 0]

Unnamed: 0,country_code,country_avg_annual_earning,country_population,country,users,ltv,cac,cs_contacts,channel


In [182]:
fin_df.groupby('channel').agg({'users': 'sum', 'ltv': 'mean', 'cac': 'mean', 'cs_contacts': 'sum'})

Unnamed: 0_level_0,users,ltv,cac,cs_contacts
channel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
influencers,35735,1998.979677,589.606774,1725
social networks,168030,1097.542903,302.920323,7764


In [184]:
fin_df['ltv_to_cac'] = fin_df.ltv/fin_df.cac

In [185]:
fin_df['marketing_spending'] = (fin_df.users * fin_df.cac).map(lambda x: round(x, 2))
fin_df['revenue'] = (fin_df.users * fin_df.ltv).map(lambda x: round(x, 2))

In [186]:
fin_df[['country', 'channel', 'users', 'cs_contacts', 'marketing_spending', 'revenue']].to_csv('marketing_campaign_estimations.csv', 
                                                                                               index = False, sep = '\t')