Libraries

In [1]:
import pandas as pd
import numpy as np
import requests
import folium

import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
import pickle as pkl

from pathlib import Path

In [2]:
DATA_DIR = Path("..") / "data"

In [3]:
# Extracting data from Eurostat to choose the countries we are studying
eurostat_satisfaction_query = "https://ec.europa.eu/eurostat/api/dissemination/statistics/1.0/data/ilc_pw01?lang=en&isced11=TOTAL&indic_wb=LIFESAT&sex=T&age=Y_GE16&geo=EA20&geo=BE&geo=BG&geo=CZ&geo=DK&geo=DE&geo=EE&geo=IE&geo=EL&geo=ES&geo=FR&geo=HR&geo=IT&geo=CY&geo=LV&geo=LT&geo=LU&geo=HU&geo=MT&geo=NL&geo=AT&geo=PL&geo=PT&geo=RO&geo=SI&geo=SK&geo=FI&geo=SE&geo=IS&geo=NO&geo=CH&geo=UK&geo=ME&geo=MK&geo=AL&geo=RS&geo=TR&geo=XK&time=2022"
satisfaction_response = requests.get(eurostat_satisfaction_query)
satisfaction_json = satisfaction_response.json()

countries_id = satisfaction_json["dimension"]["geo"]["category"]["index"]
satisfaction_values = satisfaction_json["value"]
countries_full_name = satisfaction_json["dimension"]["geo"]["category"]["label"]

satisfaction_values_dict = {}
satisfaction_per_country_code = {}
final_satisf_dict = {}

for id in countries_id.values():
    satisfaction_values_dict[id] = satisfaction_values.get(str(id))

for i, j in zip(countries_id, satisfaction_values_dict):
    satisfaction_per_country_code[i] = satisfaction_values_dict[j]

for i, j in zip(countries_full_name.values(), satisfaction_per_country_code):
    final_satisf_dict.update({i: satisfaction_per_country_code[j]})


eurostat_gpd_query = "https://ec.europa.eu/eurostat/api/dissemination/statistics/1.0/data/tipsau10?lang=en&unit=CP_MNAC&geo=EA20&geo=BE&geo=BG&geo=CZ&geo=DK&geo=DE&geo=EE&geo=IE&geo=EL&geo=ES&geo=FR&geo=HR&geo=IT&geo=CY&geo=LV&geo=LT&geo=LU&geo=HU&geo=MT&geo=NL&geo=AT&geo=PL&geo=PT&geo=RO&geo=SI&geo=SK&geo=FI&geo=SE&time=2022"
gpd_response = requests.get(eurostat_gpd_query)
gpd_json = gpd_response.json()

gpd_values = gpd_json["value"]

gpd_values_dict = {}
gpd_per_country_code = {}
final_gpd_dict = {}

for id in countries_id.values():
    gpd_values_dict[id] = gpd_values.get(str(id))

for i, j in zip(countries_id, gpd_values_dict):
    gpd_per_country_code[i] = gpd_values_dict[j]

for i, j in zip(countries_full_name.values(), gpd_per_country_code):
    final_gpd_dict.update({i: gpd_per_country_code[j]})

In [4]:
# Creating a Dataframe with the info found
eurostat_data = pd.DataFrame(
    {
        "Country": final_satisf_dict.keys(),
        "Satisfaction": final_satisf_dict.values(),
        "GPD": final_gpd_dict.values(),
    }
)

eurostat_data = eurostat_data.drop(0)  # Aqui elemininamos los 23 paises
eurostat_data.loc[37, "Country"] = "Kosovo"

eurostat_data

Unnamed: 0,Country,Satisfaction,GPD
1,Belgium,7.6,554044.3
2,Bulgaria,5.6,167809.0
3,Czechia,7.4,6786742.0
4,Denmark,7.5,2831643.9
5,Germany,6.5,3876810.0
6,Estonia,7.2,36011.1
7,Ireland,7.4,506282.4
8,Greece,6.7,206620.4
9,Spain,7.1,1346377.0
10,France,7.0,2639092.0


In [5]:
# Transforming info to make it easy to work with
# Converting all coins to Euro to be able to study them
frankfurter_url = "https://api.frankfurter.app"
ffdate = "2022-12-21"
non_euro_coins = ["RON", "HUF", "CHF", "CZK", "SEK", "PLN", "DKK", "BGN"]
conversors = {}

hrk = (
    7,
    53450,
)  # since 2023 Croatia has been using the euro, so we take the last exchange rate from 2022

hrk = float(hrk[0])

for coin in non_euro_coins:
    endpoint = f"{frankfurter_url}/{ffdate}?to={coin}"
    conversor_dict = requests.get(endpoint).json()
    conversor = conversor_dict["rates"][coin]
    conversors[coin] = conversor

conversors["HRK"] = hrk

non_euro_countries_coins = {
    "RON": "Romania",
    "HUF": "Hungary",
    "CHF": "Switzerland",
    "CZK": "Czechia",
    "SEK": "Sweden",
    "PLN": "Poland",
    "DKK": "Denmark",
    "BGN": "Bulgaria",
    "HRK": "Croatia",
}


# Filter the rows of the eurostat_data dataframe where Country is equal to non_euro_countries_coins

filtered_df = eurostat_data[
    eurostat_data["Country"].isin(non_euro_countries_coins.values())
]


# Iterating over the filtered rows and dividing the GPD value by the corresponding conversion in the conversors dictionary

# iterrows() returns the index and the row as a Series
for index, row in filtered_df.iterrows():

    country = row["Country"]

    """
    The following line of code is making a list of the keys of the non_euro_countries_coins dictionary to be able
    to make indexing. Then we are making a list from the values of the dictionary and we are getting the index of the
    country we are iterating.
    """
    coin = list(non_euro_countries_coins.keys())[
        list(non_euro_countries_coins.values()).index(country)
    ]

    gpd_value = row["GPD"]
    conversion_rate = conversors[coin]
    converted_gpd = gpd_value / conversion_rate
    eurostat_data.loc[index, "GPD"] = round(converted_gpd, 3)

In [6]:
# Creating stardarized data to make a ponderation
eurostat_data["GPD"] = round(
    eurostat_data["GPD"].fillna(eurostat_data["GPD"].mean()), 3
)
eurostat_data["Satisfaction"] = eurostat_data["Satisfaction"].fillna(
    eurostat_data["Satisfaction"].mean()
)
eurostat_data["Standarized Satisfaction"] = round(
    (eurostat_data["Satisfaction"] - eurostat_data["Satisfaction"].mean())
    / eurostat_data["Satisfaction"].std(),
    3,
)
eurostat_data["Standarized GPD"] = round(
    (eurostat_data["GPD"] - eurostat_data["GPD"].mean()) / eurostat_data["GPD"].std(), 3
)
eurostat_data["Ponderation"] = (
    eurostat_data["Standarized Satisfaction"] * 0.6
    + eurostat_data["Standarized GPD"] * 0.4
)

In [7]:
eurostat_data

Unnamed: 0,Country,Satisfaction,GPD,Standarized Satisfaction,Standarized GPD,Ponderation
1,Belgium,7.6,554044.3,0.873,-0.043,0.5066
2,Bulgaria,5.6,85800.695,-2.993,-0.651,-2.0562
3,Czechia,7.4,280235.445,0.486,-0.398,0.1324
4,Denmark,7.5,380699.637,0.68,-0.268,0.3008
5,Germany,6.5,3876810.0,-1.253,4.276,0.9586
6,Estonia,7.2,36011.1,0.1,-0.716,-0.2264
7,Ireland,7.4,506282.4,0.486,-0.105,0.2496
8,Greece,6.7,206620.4,-0.867,-0.494,-0.7178
9,Spain,7.1,1346377.0,-0.094,0.987,0.3384
10,France,7.0,2639092.0,-0.287,2.667,0.8946


In [None]:
# Upload of ponderations to Airtable
airtable_base_url = "https://api.airtable.com/v0"
atTOKEN = (
    "patXW3EcWQjnbHnOb.d33113d79b3f425be1e4712fdf16cb0291bb974b6ce1d5653435db3faddb9ffb"
)
BASE_ID = "appZYI77p6tdWcbXo"
TABLE_ID1 = "tblfPPHv9mkLBVYK2"

headers = {"Authorization": f"Bearer {atTOKEN}", "Content-Type": "application/json"}

eurostat_table_endpoint = f"{airtable_base_url}/{BASE_ID}/{TABLE_ID1}"

for i in range(0, eurostat_data.shape[0], 10):
    try:

        datos_subir = {
            "records": [
                {"fields": eurostat_data.iloc[j, :].to_dict()} for j in range(i, i + 10)
            ],
            "typecast": True,
        }

    except:

        datos_subir = {
            "records": [
                {"fields": eurostat_data.iloc[j, :].to_dict()}
                for j in range(i, eurostat_data.shape[0])
            ],
            "typecast": True,
        }
    response = requests.post(
        url=eurostat_table_endpoint, json=datos_subir, headers=headers
    )

In [8]:
# Choosing our countries to study
eurostat_data.sort_values("Ponderation", ascending=False).head(3).reset_index().drop(
    "index", axis=1
)

Unnamed: 0,Country,Satisfaction,GPD,Standarized Satisfaction,Standarized GPD,Ponderation
0,Switzerland,8.0,586778.948,1.646,0.0,0.9876
1,Germany,6.5,3876810.0,-1.253,4.276,0.9586
2,France,7.0,2639092.0,-0.287,2.667,0.8946


In [9]:
# Countries to study
final_cities = {
    "bern": {"lat": 46.94809, "lon": 7.44744},
    "berlin": {"lat": 52.52437, "lon": 13.41053},
    "paris": {"lat": 48.85341, "lon": 2.3488},
}

cities = list(final_cities.keys())

In [10]:
with open(DATA_DIR / "cities.pkl", "bw") as f:
    pkl.dump(cities, f)

In [11]:
# Data extraction of current weather in each country
API_KEY_opw = "b4af990166923720f3f2ca0daa2ee53e"
search_url_opw = "https://api.openweathermap.org/data/2.5/weather"

weathers = []

for fcountry in final_cities.items():
    endpoint = f'{search_url_opw}?lat={fcountry[1]["lat"]}&lon={fcountry[1]["lon"]}&appid={API_KEY_opw}&units=metric'
    response = requests.get(url=endpoint)
    weather = response.json()
    weathers.append(weather)

In [12]:
# Creating a DataFrame with the info of weathers extracted
countries_id = []
cities = []
mains = []
temperatures = []
temps_min = []
temps_max = []
pressures = []
humidities = []
wind_speeds = []

for weather in weathers:

    country = weather["sys"].get("country")
    city = weather.get("name")
    main = weather["weather"][0].get("main")
    temperature = weather["main"].get("temp")
    temp_min = weather["main"].get("temp_min")
    temp_max = weather["main"].get("temp_max")
    pressure = weather["main"].get("pressure")
    humidity = weather["main"].get("humidity")
    wind_speed = weather["wind"].get("speed")

    countries_id.append(country)
    cities.append(city)
    mains.append(main)
    temperatures.append(temperature)
    temps_min.append(temp_min)
    temps_max.append(temp_max)
    pressures.append(pressure)
    humidities.append(humidity)
    wind_speeds.append(wind_speed)

weather_data = pd.DataFrame(
    {
        "Country": countries_id,
        "City": cities,
        "Main": mains,
        "Temperature (ºC)": temperatures,
        "Minimun temperature (ºC)": temps_min,
        "Maximun temperature (ºC)": temps_max,
        "Pressure (hPa)": pressures,
        "Humidity (%)": humidities,
        "Wind speed (meter/sec)": wind_speeds,
    }
)

weather_data

Unnamed: 0,Country,City,Main,Temperature (ºC),Minimun temperature (ºC),Maximun temperature (ºC),Pressure (hPa),Humidity (%),Wind speed (meter/sec)
0,CH,Bern,Clouds,12.03,9.95,13.54,1026,65,1.03
1,DE,Berlin,Clouds,10.25,8.94,11.85,1008,83,3.13
2,FR,Paris,Clear,15.65,13.07,17.28,1025,65,2.06


In [None]:
# Upload of current weather data to Airtable
TABLE_ID2 = "tblaqVEaHP4edqx7T"

headers = {"Authorization": f"Bearer {atTOKEN}", "Content-Type": "application/json"}

weather_table_endpoint = f"{airtable_base_url}/{BASE_ID}/{TABLE_ID2}"

for i in range(0, weather_data.shape[0], 10):
    try:

        datos_subir = {
            "records": [
                {"fields": weather_data.iloc[j, :].to_dict()} for j in range(i, i + 10)
            ],
            "typecast": True,
        }

    except:

        datos_subir = {
            "records": [
                {"fields": weather_data.iloc[j, :].to_dict()}
                for j in range(i, weather_data.shape[0])
            ],
            "typecast": True,
        }
    response = requests.post(
        url=weather_table_endpoint, json=datos_subir, headers=headers
    )

In [13]:
# Data extraction of interesting places (museums, colleges, churchs)
categories_data = pd.read_csv(DATA_DIR / "categories id.csv")

categorie_Ids = categories_data[
    categories_data["Category Label"].isin(
        ["Museums", "Colleges and Universities", "Spiritual Centers"]
    )
]["Category ID"].to_list()

CLIENT_ID_fsq = "MMU5IDIAX0XGQO5W15DRXXHZSBWH0TRIYV3WT342FAC1EQBI"
CLIENT_SECRET_fsq = "3XN11U5ERZ0ZFFHXZPQLO0LKIPFRWFXAWEJFHKZZO12LAFJN"
API_KEY_fsq = "fsq3OpNt1w78JygPc1Dt+Om4ekR/16kTn94lfiYSWW3Ci74="
search_url_fsq = "https://api.foursquare.com/v3/places/search"

In [14]:
fsq_ids = []
countries = []
localities = []
categories = []
names = []
locations = []
latitudes = []
longitudes = []
institutions = ["museum", "College and University", "Spiritual Center"]

for fcountry in final_cities.items():

    for institution in institutions:

        url_params = {
            "query": institution,
            "ll": f"{fcountry[1]['lat']},{fcountry[1]['lon']}",
            "open_now": None,
            "limit": 50,
            "radius": 10_000,
            "category": ",".join(categorie_Ids),
        }

        headers = {"accept": "application/json", "Authorization": API_KEY_fsq}
        response = requests.get(url=search_url_fsq, headers=headers, params=url_params)
        foursquare_json = response.json()

        for value in range(len(foursquare_json["results"])):

            fsq_id = foursquare_json["results"][value].get("fsq_id")
            country = foursquare_json["results"][value]["location"].get("country")
            locality = foursquare_json["results"][value]["location"].get("locality")
            category = foursquare_json["results"][value]["categories"][0].get("name")
            name = foursquare_json["results"][value].get("name")
            location = foursquare_json["results"][value]["location"].get(
                "formatted_address"
            )
            latitude = foursquare_json["results"][value]["geocodes"]["main"].get(
                "latitude"
            )
            longitude = foursquare_json["results"][value]["geocodes"]["main"].get(
                "longitude"
            )

            fsq_ids.append(fsq_id)
            countries.append(country)
            localities.append(locality)
            categories.append(category)
            names.append(name)
            locations.append(location)
            latitudes.append(latitude)
            longitudes.append(longitude)

foursquare_data = pd.DataFrame(
    {
        "Fsq_Id": fsq_ids,
        "Country": countries,
        "Locality": localities,
        "Category": categories,
        "Name": names,
        "Location": locations,
        "Latitude": latitudes,
        "Longitude": longitudes,
    }
)

In [15]:
# Dataframe with the data about interesting places extracted
foursquare_data

Unnamed: 0,Fsq_Id,Country,Locality,Category,Name,Location,Latitude,Longitude
0,5214e0cd11d227044a812cba,CH,,College and University,Konservatorium Bern,,46.948044,7.449014
1,4bd5b3207b1876b0cb268b86,CH,Bern,College and University,IFA Bern,"Bubenbergplatz 10, 3008 Bern",46.947420,7.437871
2,ca0043d7b0254c1d47e29afb,CH,Bern,College and University,IFA Weiterbildung AG,"Bogenschützenstr., 3011 Bern",46.947536,7.437948
3,4fd86343e4b02579a1850c95,CH,,College and University,Kuppelsaal Uni Bern,,46.950354,7.438110
4,2bb83dd2a36344d6ed18aeed,CH,Bern,College and University,Dodis - Diplomatische Dokumente der Schweiz,"Hallwylstr. 4, 3005 Bern",46.940758,7.447425
...,...,...,...,...,...,...,...,...
395,4bea8d9ba9900f4772981640,FR,Paris,Community and Government,Sanctuaire Sainte-Thérèse,40 rue Jean de la Fontaine (Apprentis d'Auteui...,48.851557,2.271186
396,4cc2e10cb2beb1f7459d124c,FR,Neuilly-sur-Seine,Church,Église Saint Pierre,"boulevard Jean Mermoz, 92200 Neuilly-sur-Seine",48.884183,2.271652
397,4adcda09f964a520f83321e3,FR,Saint-Denis,Structure,Basilica of Saint Denis (Basilique Saint-Denis),"1 rue de la Légion d'Honneur, 93200 Saint-Denis",48.935497,2.359765
398,dbbc68275e11478976a4ed8e,FR,Paris,Spiritual Center,Paroisses Catholiques,"2 avenue Stéphane Mallarmé, 75017 Paris",48.886862,2.293162


In [None]:
# Upload of interesting places to Airtable
TABLE_ID3 = "tblDuWR3nXGmk9OHl"

headers = {"Authorization": f"Bearer {atTOKEN}", "Content-Type": "application/json"}

foursquare_table_endpoint = f"{airtable_base_url}/{BASE_ID}/{TABLE_ID3}"

for i in range(0, foursquare_data.shape[0], 10):
    try:

        datos_subir = {
            "records": [
                {"fields": foursquare_data.iloc[j, :].to_dict()}
                for j in range(i, i + 10)
            ],
            "typecast": True,
        }

    except:

        datos_subir = {
            "records": [
                {"fields": foursquare_data.iloc[j, :].to_dict()}
                for j in range(i, foursquare_data.shape[0])
            ],
            "typecast": True,
        }
    response = requests.post(
        url=foursquare_table_endpoint, json=datos_subir, headers=headers
    )

Now we proceed to analyze the data obtained

In [16]:
# Visualization of the ponderations
px.bar(
    data_frame=eurostat_data,
    x="Country",
    y="Ponderation",
    color="Standarized Satisfaction",
)

In [17]:
# Visualization of current weather data
px.bar(
    data_frame=weather_data,
    x="City",
    y="Temperature (ºC)",
    hover_data=[
        "Main",
        "Minimun temperature (ºC)",
        "Maximun temperature (ºC)",
        "Pressure (hPa)",
        "Humidity (%)",
        "Wind speed (meter/sec)",
    ],
    color="Humidity (%)",
    color_continuous_scale=[(0, "lightblue"), (1, "blue")],
)

In [18]:
# Visualization of Valencia map with the interesting places
sf_map_bern = folium.Map(
    location=[final_cities["bern"]["lat"], final_cities["bern"]["lon"]],
    zoom_start=14,
)

interest_places_bern = folium.map.FeatureGroup()

for lat, lon, name in zip(
    foursquare_data[foursquare_data["Country"] == "CH"]["Latitude"],
    foursquare_data[foursquare_data["Country"] == "CH"]["Longitude"],
    foursquare_data["Name"],
):

    interest_places_bern.add_child(folium.Marker(location=[lat, lon], popup=name))

sf_map_bern.add_child(interest_places_bern)

sf_map_bern

In [19]:
# Visualization of London map with the interesting places
sf_map_berlin = folium.Map(
    location=[final_cities["berlin"]["lat"], final_cities["berlin"]["lon"]],
    zoom_start=12,
)

interest_places_berlin = folium.map.FeatureGroup()

for lat, lon, name in zip(
    foursquare_data[foursquare_data["Country"] == "DE"]["Latitude"],
    foursquare_data[foursquare_data["Country"] == "DE"]["Longitude"],
    foursquare_data["Name"],
):

    interest_places_berlin.add_child(folium.Marker(location=[lat, lon], popup=name))

sf_map_berlin.add_child(interest_places_berlin)

sf_map_berlin

In [20]:
# Visualization of Amsterdam map with the interesting places
sf_map_paris = folium.Map(
    location=[final_cities["paris"]["lat"], final_cities["paris"]["lon"]],
    zoom_start=14,
)

interest_places_paris = folium.map.FeatureGroup()

for lat, lon, name in zip(
    foursquare_data[foursquare_data["Country"] == "FR"]["Latitude"],
    foursquare_data[foursquare_data["Country"] == "FR"]["Longitude"],
    foursquare_data["Name"],
):

    interest_places_paris.add_child(folium.Marker(location=[lat, lon], popup=name))

sf_map_paris.add_child(interest_places_paris)

sf_map_paris