<a href="https://colab.research.google.com/github/radosty/radosty.github.io/blob/main/giniandfintechextractionandmerger.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import requests
import pandas as pd
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression

# Define European ISO3 country codes
europe_iso3 = [
    "ALB", "AND", "AUT", "BLR", "BEL", "BIH", "BGR", "HRV", "CYP", "CZE",
    "DNK", "EST", "FIN", "FRA", "DEU", "GRC", "HUN", "ISL", "IRL", "ITA",
    "KOS", "LVA", "LIE", "LTU", "LUX", "MLT", "MDA", "MCO", "MNE", "NLD",
    "MKD", "NOR", "POL", "PRT", "ROU", "SMR", "SRB", "SVK", "SVN", "ESP",
    "SWE", "CHE", "UKR", "GBR"
]

# Define API base URL for Gini coefficient
base_url = "http://api.worldbank.org/v2/country/{}/indicator/SI.POV.GINI?date=2000:2023&format=json"

# Fetch data
all_data = []
for iso3 in europe_iso3:
    url = base_url.format(iso3)
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        if data and len(data) > 1:
            country_data = data[1]
            for entry in country_data:
                if "value" in entry:
                    all_data.append({
                        "Country": entry.get("country", {}).get("value", iso3),
                        "ISO3": iso3,
                        "Year": entry.get("date"),
                        "GiniCoefficient": entry.get("value")
                    })

# Create DataFrame
df = pd.DataFrame(all_data)

# Clean data
df.dropna(subset=["GiniCoefficient"], inplace=True)
df["Year"] = pd.to_numeric(df["Year"])
df["GiniCoefficient"] = pd.to_numeric(df["GiniCoefficient"])

# Calculate average Gini coefficient per year
average_gini = df.groupby("Year")["GiniCoefficient"].mean().reset_index()
average_gini.rename(columns={"GiniCoefficient": "Average Gini"}, inplace=True)

# Prepare data for projections
X = average_gini["Year"].values.reshape(-1, 1)  # Independent variable (Years)
y = average_gini["Average Gini"].values  # Dependent variable (Gini Coefficient)

# Fit Polynomial Regression model
poly = PolynomialFeatures(degree=2)  # Use quadratic fit
X_poly = poly.fit_transform(X)
model = LinearRegression()
model.fit(X_poly, y)

# Predict future years
future_years = np.array([2022, 2023, 2024, 2025, 2026, 2027, 2028]).reshape(-1, 1)
future_years_poly = poly.transform(future_years)
future_projections = model.predict(future_years_poly)

# Apply constraints to ensure realistic Gini values (0 ≤ Gini ≤ 100)
future_projections = np.clip(future_projections, 0, 100)

# Combine historical and projected data
future_data = pd.DataFrame({
    "Year": future_years.flatten(),
    "Average Gini": future_projections
})
gini_data = pd.concat([average_gini, future_data], ignore_index=True)

# Round all numerical values to 2 decimal places
gini_data = gini_data.round(2)

# Save to CSV
gini_data.to_csv("average_gini_projections.csv", index=False)

# Load fintech data and calculate total users
fintech_users = pd.read_csv("https://raw.githubusercontent.com/radosty/radosty.github.io/refs/heads/main/numberoffintechusersinEurope.csv")
fintech_users['Total Users'] = fintech_users[['Digital Payments', 'Digital Assets', 'Neobanking', 'Digital Investment']].sum(axis=1)

# Round all numerical values to 2 decimals in fintech data
fintech_users = fintech_users.round(2)
fintech_users.to_csv("fintech_users2.csv", index=False)

# Load data for merging
gini_data1 = pd.read_csv("average_gini_projections.csv")
fintech_users1 = pd.read_csv("fintech_users2.csv")

# Merge datasets
merged_data = pd.merge(fintech_users, gini_data1, on='Year', how='outer')

# Drop rows with any missing values in critical columns
critical_columns = ['Total Users', 'Average Gini']
merged_data_clean = merged_data.dropna(subset=critical_columns, how='any')

# Round all numerical values in the merged dataset
merged_data_clean = merged_data_clean.round(2)

# Save merged dataset to CSV
merged_data_clean.to_csv("fintechandgini5.csv", index=False)

