<a href="https://colab.research.google.com/github/radosty/radosty.github.io/blob/main/visualistion4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import pandas as pd


indicator = "IT.NET.USER.ZS"
year = 2021


european_countries = [
    "ALB", "AND", "ARM", "AUT", "AZE", "BEL", "BIH", "BLR", "BGR", "CHE", "CYP", "CZE", "DEU", "DNK", "ESP", "EST",
    "FIN", "FRA", "GBR", "GEO", "GRC", "HRV", "HUN", "IRL", "ISL", "ITA", "KAZ", "KGZ", "LIE", "LTU", "LUX", "LVA",
    "MCO", "MDA", "MKD", "MLT", "MNE", "NLD", "NOR", "POL", "PRT", "ROU", "RUS", "SMR", "SRB", "SVK", "SVN", "SWE",
    "TJK", "TKM", "UKR", "UZB"
]


url = f"https://api.worldbank.org/v2/country/{';'.join(european_countries)}/indicator/{indicator}?date={year}&format=json&per_page=1000"


response = requests.get(url)
if response.status_code == 200:
    data = response.json()
    if data and len(data) > 1 and data[1]:
        records = [
            {
                "Country": record["country"]["value"],
                "Country Code": record["country"]["id"],
                "Year": record["date"],
                "Internet Penetration (% of population)": record["value"],
            }
            for record in data[1]
            if record and "value" in record and record["value"] is not None
        ]

        df = pd.DataFrame(records)

        print(df.head())

        filename = "europe_internet_penetration_2021.csv"
        df.to_csv(filename, index=False)



      Country Country Code  Year  Internet Penetration (% of population)
0     Albania           AL  2021                                 79.3237
1     Andorra           AD  2021                                 93.8975
2     Armenia           AM  2021                                 78.6123
3     Austria           AT  2021                                 92.5292
4  Azerbaijan           AZ  2021                                 86.0000


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import pandas as pd

gdp_internet_path = "/content/gdpppcinternetpenetration2021.csv"
findex_data_path = "/content/findexdata.csv"


gdp_internet_data = pd.read_csv(gdp_internet_path)


findex_data = pd.read_csv(findex_data_path, encoding='latin1')

gdp_internet_relevant = gdp_internet_data[['Country', 'Country Code', 'Year', '.GDP PPP', 'Internet Penetration %']]
gdp_internet_relevant.rename(columns={
    'Country Code': 'economycode',
    'Year': 'year'
}, inplace=True)

findex_relevant = findex_data[['economycode',  'anydigpayment', 'year', 'fin5', 'fin6', 'fin14a', 'fin14a1', 'fin14b']]

iso2_to_iso3 = {
    'AL': 'ALB', 'AD': 'AND', 'AM': 'ARM', 'AT': 'AUT', 'AZ': 'AZE',
    'BY': 'BLR', 'BE': 'BEL', 'BA': 'BIH', 'BG': 'BGR', 'HR': 'HRV',
    'CY': 'CYP', 'CZ': 'CZE', 'DK': 'DNK', 'EE': 'EST', 'FI': 'FIN',
    'FR': 'FRA', 'GE': 'GEO', 'DE': 'DEU', 'GR': 'GRC', 'HU': 'HUN',
    'IS': 'ISL', 'IE': 'IRL', 'IT': 'ITA', 'KZ': 'KAZ', 'KG': 'KGZ',
    'LV': 'LVA', 'LI': 'LIE', 'LT': 'LTU', 'LU': 'LUX', 'MT': 'MLT',
    'MD': 'MDA', 'MC': 'MCO', 'ME': 'MNE', 'NL': 'NLD', 'MK': 'MKD',
    'NO': 'NOR', 'PL': 'POL', 'PT': 'PRT', 'RO': 'ROU', 'RU': 'RUS',
    'SM': 'SMR', 'RS': 'SRB', 'SK': 'SVK', 'SI': 'SVN', 'ES': 'ESP',
    'SE': 'SWE', 'CH': 'CHE', 'TJ': 'TJK', 'UA': 'UKR', 'GB': 'GBR',
    'UZ': 'UZB'
}
gdp_internet_relevant['economycode'] = gdp_internet_relevant['economycode'].map(iso2_to_iso3)


findex_relevant = findex_relevant[findex_relevant['year'] == 2021]

merged_data = pd.merge(gdp_internet_relevant, findex_relevant, on=['economycode', 'year'], how='inner')


output_path = "gdpinternetpenetrationfindex7.csv"
merged_data.to_csv(output_path, index=False)



In [None]:
import pandas as pd


file_path = "/content/gdpinternetpenetrationfindex7.csv"
data = pd.read_csv(file_path)


columns_to_average = ['anydigpayment', 'fin5', 'fin6', 'fin14a', 'fin14a1', 'fin14b']


additional_columns = ['.GDP PPP', 'Internet Penetration %']

data_filtered = data.dropna(subset=columns_to_average + additional_columns)

# Exclude values greater than 2 for each column
for column in columns_to_average:
    data_filtered = data_filtered[data_filtered[column] <= 2]

# Calculate the average for each column grouped by 'Country'
averages = data_filtered.groupby('Country')[columns_to_average + additional_columns].mean().reset_index()


numerical_cols = columns_to_average + additional_columns
averages[numerical_cols] = averages[numerical_cols].round(0).astype(int)

averages['Financial Inclusion Index'] = (
    averages['anydigpayment'] -
    (averages['fin5'] +
     averages['fin6'] +
     averages['fin14a'] +
     averages['fin14a1'] +
     averages['fin14b'])
)

final_data = averages[['Country', '.GDP PPP', 'Internet Penetration %', 'Financial Inclusion Index']]


output_path = "financial_inclusion_index_gdp_internet.csv"
final_data.to_csv(output_path, index=False)
