In [None]:
import pandas as pd
import numpy as np
import openpyxl
import warnings
import sys
import os

# Get the directory where this notebook is located
notebook_dir = os.path.dirname(os.path.abspath(''))

# Add the PythonPrep directory to the path
pythonprep_dir = os.path.join(os.path.dirname(notebook_dir), 'PythonPrep') if 'PythonPrep' not in notebook_dir else notebook_dir
sys.path.append(pythonprep_dir)

from paths import main_path

warnings.filterwarnings("ignore")


### Internet Users

In [None]:
path_input = main_path + "/Democracy/Democracy_Main/MainAnalysis/input"
df = pd.read_csv('number-of-internet-users.csv')
df


Unnamed: 0,Entity,Code,Year,Number of Internet users
0,Afghanistan,AFG,1990,0
1,Afghanistan,AFG,1991,0
2,Afghanistan,AFG,1992,0
3,Afghanistan,AFG,1993,0
4,Afghanistan,AFG,1994,0
...,...,...,...,...
6401,Zimbabwe,ZWE,2016,3341464
6402,Zimbabwe,ZWE,2017,3599269
6403,Zimbabwe,ZWE,2018,3763048
6404,Zimbabwe,ZWE,2019,3854006


In [3]:
df = df[(df['Year'] >= 2001) & (df['Year'] <= 2019)]
valid_countries = df[df['Year'] == 2001].groupby('Entity').filter(lambda x: x['Year'].min() == 2001)
df = df[df['Entity'].isin(valid_countries['Entity'].unique())]
df['growth_rate'] = df.groupby('Entity')['Number of Internet users'].pct_change() * 100

In [4]:
df

Unnamed: 0,Entity,Code,Year,Number of Internet users,growth_rate
6,Afghanistan,AFG,2001,930,
7,Afghanistan,AFG,2002,958,3.010753
8,Afghanistan,AFG,2003,19903,1977.557411
9,Afghanistan,AFG,2004,24922,25.217304
10,Afghanistan,AFG,2005,298829,1099.057058
...,...,...,...,...,...
6400,Zimbabwe,ZWE,2015,3219232,41.975312
6401,Zimbabwe,ZWE,2016,3341464,3.796930
6402,Zimbabwe,ZWE,2017,3599269,7.715331
6403,Zimbabwe,ZWE,2018,3763048,4.550341


In [None]:
periods = {
    '2001-2019': (2001, 2019)
}

result_df = pd.DataFrame()

for period, (start_year, end_year) in periods.items():

    period_df = df[(df['Year'] >= start_year) & (df['Year'] <= end_year)]
    avg_life_expectancy = period_df.groupby('Entity')['growth_rate'].mean().rename(period)
    
    result_df = pd.concat([result_df, avg_life_expectancy], axis=1)

result_df.reset_index(inplace=True)
result_df['country'] = result_df['index']
result_df = result_df.drop('index', axis=1)
result_df = result_df[['country'] + [col for col in result_df.columns if col != 'country']]

result_df.rename(columns={
    '2001-2019': 'mean_internet_user_2001_2019',
}, inplace=True)

result_df = result_df.dropna()
result_df

Unnamed: 0,country,mean_internet_user_2001_2019
0,Afghanistan,193.876300
1,Africa,27.395994
2,Albania,41.708168
3,Algeria,34.218321
4,Angola,44.839198
...,...,...
205,Vietnam,29.233954
206,World,12.664717
207,Yemen,71.119621
208,Zambia,33.370302


In [6]:
result_df[result_df['mean_internet_user_2001_2019']==np.inf]

Unnamed: 0,country,mean_internet_user_2001_2019


In [7]:
result_df.describe()

Unnamed: 0,mean_internet_user_2001_2019
count,209.0
mean,31.333915
std,67.190124
min,3.420524
25%,12.165046
50%,21.528759
75%,33.292319
max,863.055588


In [8]:
###

In [9]:
df = pd.read_csv('number-of-internet-users.csv')
df


Unnamed: 0,Entity,Code,Year,Number of Internet users
0,Afghanistan,AFG,1990,0
1,Afghanistan,AFG,1991,0
2,Afghanistan,AFG,1992,0
3,Afghanistan,AFG,1993,0
4,Afghanistan,AFG,1994,0
...,...,...,...,...
6401,Zimbabwe,ZWE,2016,3341464
6402,Zimbabwe,ZWE,2017,3599269
6403,Zimbabwe,ZWE,2018,3763048
6404,Zimbabwe,ZWE,2019,3854006


In [10]:
df = df[df['Year']==2000] 
pivot_df = df.pivot_table(index=['Entity', 'Code'], columns='Year', values='Number of Internet users').reset_index()
df_renamed = pivot_df.rename(columns={'Entity': 'country', 2000: 'mean_internet_user_2000'})
merged_df = pd.merge(result_df, df_renamed, on='country', how='left')
result_df = merged_df.drop('Code', axis=1).dropna()

In [11]:
result_df

Unnamed: 0,country,mean_internet_user_2001_2019,mean_internet_user_2000
2,Albania,41.708168,3631.0
3,Algeria,34.218321,151321.0
4,Angola,44.839198,17221.0
5,Antigua and Barbuda,17.011189,4866.0
6,Argentina,13.854244,2609294.0
...,...,...,...
204,Vietnam,29.233954,200859.0
205,World,12.664717,414012675.0
206,Yemen,71.119621,15369.0
207,Zambia,33.370302,18899.0


In [12]:
###

In [None]:
result_df.to_csv(f"{path_input}/channels21st/internet/internet.csv", index=False)



### Share of Internet Users

In [20]:
df = pd.read_csv('share-of-individuals-using-the-internet.csv')
df


Unnamed: 0,Entity,Code,Year,Individuals using the Internet (% of population)
0,Afghanistan,AFG,1990,0.000000
1,Afghanistan,AFG,1991,0.000000
2,Afghanistan,AFG,1992,0.000000
3,Afghanistan,AFG,1993,0.000000
4,Afghanistan,AFG,1994,0.000000
...,...,...,...,...
6950,Zimbabwe,ZWE,2017,24.400000
6951,Zimbabwe,ZWE,2018,25.000000
6952,Zimbabwe,ZWE,2019,26.588274
6953,Zimbabwe,ZWE,2020,29.298565


In [21]:
df = df[(df['Year'] >= 2001) & (df['Year'] <= 2019)]
valid_countries = df[df['Year'] == 2001].groupby('Entity').filter(lambda x: x['Year'].min() == 2001)
df = df[df['Entity'].isin(valid_countries['Entity'].unique())]
df['growth_rate'] = df.groupby('Entity')['Individuals using the Internet (% of population)'].pct_change() * 100

In [22]:
 periods = {
    '2001-2019': (2001, 2019)
}

result_df = pd.DataFrame()

for period, (start_year, end_year) in periods.items():

    period_df = df[(df['Year'] >= start_year) & (df['Year'] <= end_year)]
    avg_life_expectancy = period_df.groupby('Entity')['growth_rate'].mean().rename(period)
    
    result_df = pd.concat([result_df, avg_life_expectancy], axis=1)

result_df.reset_index(inplace=True)
result_df['country'] = result_df['index']
result_df = result_df.drop('index', axis=1)
result_df = result_df[['country'] + [col for col in result_df.columns if col != 'country']]

result_df.rename(columns={
    '2001-2019': 'mean_sh_internet_user_2001_2019',
}, inplace=True)

result_df = result_df.dropna()
result_df

Unnamed: 0,country,mean_sh_internet_user_2001_2019
0,Afghanistan,179.418051
1,Albania,42.598249
2,Algeria,32.072202
3,Angola,39.674101
4,Antigua and Barbuda,16.631711
...,...,...
208,Vietnam,27.931555
209,World,11.265893
210,Yemen,66.368080
211,Zambia,29.091896


In [23]:
result_df[result_df['mean_sh_internet_user_2001_2019']==np.inf]

Unnamed: 0,country,mean_sh_internet_user_2001_2019


In [24]:
result_df.describe()

Unnamed: 0,mean_sh_internet_user_2001_2019
count,212.0
mean,28.194852
std,50.292448
min,2.406233
25%,11.242419
50%,19.601599
75%,31.660184
max,581.022965


In [25]:
###

In [26]:
df = pd.read_csv('share-of-individuals-using-the-internet.csv')
df

Unnamed: 0,Entity,Code,Year,Individuals using the Internet (% of population)
0,Afghanistan,AFG,1990,0.000000
1,Afghanistan,AFG,1991,0.000000
2,Afghanistan,AFG,1992,0.000000
3,Afghanistan,AFG,1993,0.000000
4,Afghanistan,AFG,1994,0.000000
...,...,...,...,...
6950,Zimbabwe,ZWE,2017,24.400000
6951,Zimbabwe,ZWE,2018,25.000000
6952,Zimbabwe,ZWE,2019,26.588274
6953,Zimbabwe,ZWE,2020,29.298565


In [28]:
df = df[df['Year']==2000] 
pivot_df = df.pivot_table(index=['Entity', 'Code'], columns='Year', values='Individuals using the Internet (% of population)').reset_index()
df_renamed = pivot_df.rename(columns={'Entity': 'country', 2000: 'mean_sh_internet_user_2000'})
merged_df = pd.merge(result_df, df_renamed, on='country', how='left')
result_df = merged_df.drop('Code', axis=1).dropna()

In [29]:
result_df

Unnamed: 0,country,mean_sh_internet_user_2001_2019,mean_sh_internet_user_2000
1,Albania,42.598249,0.114097
2,Algeria,32.072202,0.491706
3,Angola,39.674101,0.105046
4,Antigua and Barbuda,16.631711,6.482226
5,Argentina,12.739178,7.038683
...,...,...,...
207,Vietnam,27.931555,0.254248
208,World,11.265893,6.743720
209,Yemen,66.368080,0.082500
210,Zambia,29.091896,0.191072


In [30]:
###

In [None]:
result_df.to_csv(f"{path_input}/channels21st/internet/sh_internet.csv", index=False)



### Landline Internet Subcription

In [32]:
df = pd.read_csv('landline-internet-subscriptions.csv')
df

Unnamed: 0,Entity,Code,Year,Fixed broadband subscriptions
0,Afghanistan,AFG,2004,200.0
1,Afghanistan,AFG,2005,220.0
2,Afghanistan,AFG,2006,500.0
3,Afghanistan,AFG,2007,500.0
4,Afghanistan,AFG,2008,500.0
...,...,...,...,...
4262,Zimbabwe,ZWE,2018,203056.0
4263,Zimbabwe,ZWE,2019,204424.0
4264,Zimbabwe,ZWE,2020,203461.0
4265,Zimbabwe,ZWE,2021,205333.0


In [33]:
df = df[(df['Year'] >= 2001) & (df['Year'] <= 2019)]
valid_countries = df[df['Year'] == 2001].groupby('Entity').filter(lambda x: x['Year'].min() == 2001)
df = df[df['Entity'].isin(valid_countries['Entity'].unique())]
df['growth_rate'] = df.groupby('Entity')['Fixed broadband subscriptions'].pct_change() * 100

In [34]:
 periods = {
    '2001-2019': (2001, 2019)
}

result_df = pd.DataFrame()

for period, (start_year, end_year) in periods.items():

    period_df = df[(df['Year'] >= start_year) & (df['Year'] <= end_year)]
    avg_life_expectancy = period_df.groupby('Entity')['growth_rate'].mean().rename(period)
    
    result_df = pd.concat([result_df, avg_life_expectancy], axis=1)

result_df.reset_index(inplace=True)
result_df['country'] = result_df['index']
result_df = result_df.drop('index', axis=1)
result_df = result_df[['country'] + [col for col in result_df.columns if col != 'country']]

result_df.rename(columns={
    '2001-2019': 'mean_landline_2001_2019',
}, inplace=True)

result_df = result_df.dropna()
result_df

Unnamed: 0,country,mean_landline_2001_2019
0,Argentina,32.100612
1,Armenia,641.021199
2,Australia,32.627629
3,Austria,13.015397
4,Bahamas,33.047874
...,...,...
84,Upper-middle-income countries,61.303732
85,Venezuela,30.128682
86,World,22.457504
87,Zambia,90.693955


In [35]:
result_df[result_df['mean_landline_2001_2019']==np.inf]

Unnamed: 0,country,mean_landline_2001_2019


In [36]:
result_df.describe()

Unnamed: 0,mean_landline_2001_2019
count,89.0
mean,58.43573
std,75.075046
min,6.072733
25%,22.457504
50%,39.180799
75%,71.494934
max,641.021199


In [None]:
###

In [37]:
df = pd.read_csv('landline-internet-subscriptions.csv')
df

Unnamed: 0,Entity,Code,Year,Fixed broadband subscriptions
0,Afghanistan,AFG,2004,200.0
1,Afghanistan,AFG,2005,220.0
2,Afghanistan,AFG,2006,500.0
3,Afghanistan,AFG,2007,500.0
4,Afghanistan,AFG,2008,500.0
...,...,...,...,...
4262,Zimbabwe,ZWE,2018,203056.0
4263,Zimbabwe,ZWE,2019,204424.0
4264,Zimbabwe,ZWE,2020,203461.0
4265,Zimbabwe,ZWE,2021,205333.0


In [38]:
df = df[df['Year']==2000] 
pivot_df = df.pivot_table(index=['Entity', 'Code'], columns='Year', values='Fixed broadband subscriptions').reset_index()
df_renamed = pivot_df.rename(columns={'Entity': 'country', 2000: 'mean_landline_2000'})
merged_df = pd.merge(result_df, df_renamed, on='country', how='left')
result_df = merged_df.drop('Code', axis=1).dropna()

In [40]:
###

In [None]:
result_df.to_csv(f"{path_input}/channels21st/internet/landline.csv", index=False)



### Fixed telephone subscription

In [60]:
df = pd.read_csv('ict-adoption.csv')
df = df[['Entity', 'Year', 'Fixed telephone subscriptions']]
df

Unnamed: 0,Entity,Year,Fixed telephone subscriptions
0,Afghanistan,1960,7700.0
1,Afghanistan,1961,7700.0
2,Afghanistan,1962,7700.0
3,Afghanistan,1963,7700.0
4,Afghanistan,1964,7700.0
...,...,...,...
13170,Zimbabwe,2018,268849.0
13171,Zimbabwe,2019,265734.0
13172,Zimbabwe,2020,252067.0
13173,Zimbabwe,2021,243421.0


In [61]:
df = df[(df['Year'] >= 2001) & (df['Year'] <= 2019)]
valid_countries = df[df['Year'] == 2001].groupby('Entity').filter(lambda x: x['Year'].min() == 2001)
df = df[df['Entity'].isin(valid_countries['Entity'].unique())]
df['growth_rate'] = df.groupby('Entity')['Fixed telephone subscriptions'].pct_change() * 100

In [62]:
 periods = {
    '2001-2019': (2001, 2019)
}

result_df = pd.DataFrame()

for period, (start_year, end_year) in periods.items():

    period_df = df[(df['Year'] >= start_year) & (df['Year'] <= end_year)]
    avg_life_expectancy = period_df.groupby('Entity')['growth_rate'].mean().rename(period)
    
    result_df = pd.concat([result_df, avg_life_expectancy], axis=1)

result_df.reset_index(inplace=True)
result_df['country'] = result_df['index']
result_df = result_df.drop('index', axis=1)
result_df = result_df[['country'] + [col for col in result_df.columns if col != 'country']]

result_df.rename(columns={
    '2001-2019': 'mean_telephone_2001_2019',
}, inplace=True)

result_df = result_df.dropna()
result_df

Unnamed: 0,country,mean_telephone_2001_2019
0,Afghanistan,41.783678
2,Albania,1.598046
3,Algeria,5.471196
4,American Samoa,-1.347363
5,Andorra,0.781359
...,...,...
222,Vietnam,5.579703
223,World,-0.566153
224,Yemen,6.518839
225,Zambia,1.582911


In [63]:
result_df[result_df['mean_telephone_2001_2019']==np.inf]

Unnamed: 0,country,mean_telephone_2001_2019


In [64]:
result_df.describe()

Unnamed: 0,mean_telephone_2001_2019
count,220.0
mean,1.407128
std,7.39432
min,-17.535762
25%,-1.489073
50%,0.315669
75%,3.136511
max,71.086077


In [65]:
###

In [66]:
df = pd.read_csv('ict-adoption.csv')
df = df[['Entity', 'Year', 'Fixed telephone subscriptions']]
df

Unnamed: 0,Entity,Year,Fixed telephone subscriptions
0,Afghanistan,1960,7700.0
1,Afghanistan,1961,7700.0
2,Afghanistan,1962,7700.0
3,Afghanistan,1963,7700.0
4,Afghanistan,1964,7700.0
...,...,...,...
13170,Zimbabwe,2018,268849.0
13171,Zimbabwe,2019,265734.0
13172,Zimbabwe,2020,252067.0
13173,Zimbabwe,2021,243421.0


In [67]:
df = df[df['Year']==2000] 
pivot_df = df.pivot_table(index=['Entity'], columns='Year', values='Fixed telephone subscriptions').reset_index()
df_renamed = pivot_df.rename(columns={'Entity': 'country', 2000: 'mean_telephone_2000'})
merged_df = pd.merge(result_df, df_renamed, on='country', how='left')
result_df = merged_df.dropna()

In [68]:
result_df

Unnamed: 0,country,mean_telephone_2001_2019,mean_telephone_2000
0,Afghanistan,41.783678,29000.0
1,Albania,1.598046,152687.0
2,Algeria,5.471196,1761327.0
3,American Samoa,-1.347363,10252.0
4,Andorra,0.781359,34215.0
...,...,...,...
215,Vietnam,5.579703,2542718.0
216,World,-0.566153,975057860.0
217,Yemen,6.518839,346709.0
218,Zambia,1.582911,83326.0


In [69]:
###

In [None]:
result_df.to_csv(f"{path_input}/channels21st/internet/telephone.csv", index=False)



### Mobile subscription

In [81]:
df = pd.read_csv('ict-adoption.csv')
df = df[['Entity', 'Year', 'Mobile cellular subscriptions']]
df

Unnamed: 0,Entity,Year,Mobile cellular subscriptions
0,Afghanistan,1960,0.0
1,Afghanistan,1961,
2,Afghanistan,1962,
3,Afghanistan,1963,
4,Afghanistan,1964,
...,...,...,...
13170,Zimbabwe,2018,12908992.0
13171,Zimbabwe,2019,13195902.0
13172,Zimbabwe,2020,13191708.0
13173,Zimbabwe,2021,14257590.0


In [82]:
df = df[(df['Year'] >= 2001) & (df['Year'] <= 2019)]
valid_countries = df[df['Year'] == 2001].groupby('Entity').filter(lambda x: x['Year'].min() == 2001)
df = df[df['Entity'].isin(valid_countries['Entity'].unique())]
df['growth_rate'] = df.groupby('Entity')['Mobile cellular subscriptions'].pct_change() * 100

In [83]:
 periods = {
    '2001-2019': (2001, 2019)
}

result_df = pd.DataFrame()

for period, (start_year, end_year) in periods.items():

    period_df = df[(df['Year'] >= start_year) & (df['Year'] <= end_year)]
    avg_life_expectancy = period_df.groupby('Entity')['growth_rate'].mean().rename(period)
    
    result_df = pd.concat([result_df, avg_life_expectancy], axis=1)

result_df.reset_index(inplace=True)
result_df['country'] = result_df['index']
result_df = result_df.drop('index', axis=1)
result_df = result_df[['country'] + [col for col in result_df.columns if col != 'country']]

result_df.rename(columns={
    '2001-2019': 'mean_mobile_2001_2019',
}, inplace=True)

result_df = result_df.dropna()
result_df

Unnamed: 0,country,mean_mobile_2001_2019
2,Albania,14.133435
3,Algeria,62.628759
4,American Samoa,0.337172
5,Andorra,6.954486
6,Angola,41.126273
...,...,...
222,Vietnam,35.584174
223,World,12.985248
224,Yemen,36.953674
225,Zambia,35.145824


In [84]:
result_df[result_df['mean_mobile_2001_2019']==np.inf]

Unnamed: 0,country,mean_mobile_2001_2019


In [85]:
result_df.describe()

Unnamed: 0,mean_mobile_2001_2019
count,209.0
mean,24.393997
std,24.29098
min,0.337172
25%,8.813575
50%,17.184619
75%,33.132682
max,194.673651


In [86]:
###

In [87]:
df = pd.read_csv('ict-adoption.csv')
df = df[['Entity', 'Year', 'Mobile cellular subscriptions']]
df

Unnamed: 0,Entity,Year,Mobile cellular subscriptions
0,Afghanistan,1960,0.0
1,Afghanistan,1961,
2,Afghanistan,1962,
3,Afghanistan,1963,
4,Afghanistan,1964,
...,...,...,...
13170,Zimbabwe,2018,12908992.0
13171,Zimbabwe,2019,13195902.0
13172,Zimbabwe,2020,13191708.0
13173,Zimbabwe,2021,14257590.0


In [88]:
df = df[df['Year']==2000] 
pivot_df = df.pivot_table(index=['Entity'], columns='Year', values='Mobile cellular subscriptions').reset_index()
df_renamed = pivot_df.rename(columns={'Entity': 'country', 2000: 'mean_mobile_2000'})
merged_df = pd.merge(result_df, df_renamed, on='country', how='left')
result_df = merged_df.dropna()

In [89]:
result_df

Unnamed: 0,country,mean_mobile_2001_2019,mean_mobile_2000
0,Albania,14.133435,29791.0
1,Algeria,62.628759,86000.0
2,American Samoa,0.337172,1992.0
3,Andorra,6.954486,23543.0
4,Angola,41.126273,25806.0
...,...,...,...
204,Vietnam,35.584174,788559.0
205,World,12.985248,738157900.0
206,Yemen,36.953674,32042.0
207,Zambia,35.145824,98853.0


In [90]:
###

In [None]:
result_df.to_csv(f"{path_input}/channels21st/internet/mobile.csv", index=False)

