<a href="https://colab.research.google.com/github/radosty/radosty.github.io/blob/main/extractinggdpppandinternetpenetration2021.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [34]:
import pandas as pd

# Load the datasets
gdp_internet_path = "/content/gdpppcinternetpenetration2021.csv"
findex_data_path = "/content/findexdata.csv"

# Load GDP and Internet Penetration dataset
gdp_internet_data = pd.read_csv(gdp_internet_path)

# Load Findex dataset with appropriate encoding
findex_data = pd.read_csv(findex_data_path, encoding='latin1')

# Select relevant columns for GDP and Internet Penetration dataset
gdp_internet_relevant = gdp_internet_data[['Country', 'Country Code', 'Year', '.GDP PPP', 'Internet Penetration %']]
gdp_internet_relevant.rename(columns={
    'Country Code': 'economycode',
    'Year': 'year'
}, inplace=True)

findex_relevant = findex_data[['economycode',  'anydigpayment', 'year', 'fin5', 'fin6', 'fin14a', 'fin14a1', 'fin14b']]

# Map ISO2 to ISO3 codes for the GDP dataset
iso2_to_iso3 = {
    'AL': 'ALB', 'AD': 'AND', 'AM': 'ARM', 'AT': 'AUT', 'AZ': 'AZE',
    'BY': 'BLR', 'BE': 'BEL', 'BA': 'BIH', 'BG': 'BGR', 'HR': 'HRV',
    'CY': 'CYP', 'CZ': 'CZE', 'DK': 'DNK', 'EE': 'EST', 'FI': 'FIN',
    'FR': 'FRA', 'GE': 'GEO', 'DE': 'DEU', 'GR': 'GRC', 'HU': 'HUN',
    'IS': 'ISL', 'IE': 'IRL', 'IT': 'ITA', 'KZ': 'KAZ', 'KG': 'KGZ',
    'LV': 'LVA', 'LI': 'LIE', 'LT': 'LTU', 'LU': 'LUX', 'MT': 'MLT',
    'MD': 'MDA', 'MC': 'MCO', 'ME': 'MNE', 'NL': 'NLD', 'MK': 'MKD',
    'NO': 'NOR', 'PL': 'POL', 'PT': 'PRT', 'RO': 'ROU', 'RU': 'RUS',
    'SM': 'SMR', 'RS': 'SRB', 'SK': 'SVK', 'SI': 'SVN', 'ES': 'ESP',
    'SE': 'SWE', 'CH': 'CHE', 'TJ': 'TJK', 'UA': 'UKR', 'GB': 'GBR',
    'UZ': 'UZB'
}
gdp_internet_relevant['economycode'] = gdp_internet_relevant['economycode'].map(iso2_to_iso3)

# Filter Findex dataset for the year 2021
findex_relevant = findex_relevant[findex_relevant['year'] == 2021]

# Merge the datasets on 'Country Code' and 'Year'
merged_data = pd.merge(gdp_internet_relevant, findex_relevant, on=['economycode', 'year'], how='inner')

# Save the merged dataset to a CSV file
output_path = "gdpinternetpenetrationfindex7.csv"
merged_data.to_csv(output_path, index=False)

# Output results
print(f"Merged data saved to: {output_path}")


Merged data saved to: gdpinternetpenetrationfindex7.csv


In [46]:
import pandas as pd

# Load the dataset
file_path = "/content/gdpinternetpenetrationfindex6.csv"
data = pd.read_csv(file_path)

# Specify columns to calculate averages
columns_to_average = ['anydigpayment', 'fin5', 'fin6', 'fin14a', 'fin14a1', 'fin14b']

# Include GDP and Internet Penetration columns
additional_columns = ['.GDP PPP', 'Internet Penetration %']

# Filter out rows with missing values for relevant columns
data_filtered = data.dropna(subset=columns_to_average + additional_columns)

# Exclude values greater than 2 for each column
for column in columns_to_average:
    data_filtered = data_filtered[data_filtered[column] <= 2]

# Calculate the average for each column grouped by 'Country'
averages = data_filtered.groupby('Country')[columns_to_average + additional_columns].mean().reset_index()

# Round and convert only numerical columns to integers
numerical_cols = columns_to_average + additional_columns
averages[numerical_cols] = averages[numerical_cols].round(0).astype(int) # Apply round and astype to numerical columns only

# Create the Financial Inclusion Index column we are minusing fin5 etc beacause 1=yes 2=no and in anydigpayment 1=yes 0=no
averages['Financial Inclusion Index'] = (
    averages['anydigpayment'] -
    (averages['fin5'] +
     averages['fin6'] +
     averages['fin14a'] +
     averages['fin14a1'] +
     averages['fin14b'])
)

# Keep only the required columns
final_data = averages[['Country', '.GDP PPP', 'Internet Penetration %', 'Financial Inclusion Index']]

# Save the final dataset to a CSV file
output_path = "financial_inclusion_index_gdp_internet.csv"
final_data.to_csv(output_path, index=False)

# Print confirmation
print(f"Dataset with Financial Inclusion Index, GDP, and Internet Penetration saved to: {output_path}")

Dataset with Financial Inclusion Index, GDP, and Internet Penetration saved to: financial_inclusion_index_gdp_internet.csv
