In [None]:
import os
import pandas as pd
import json
import folium
import copy
from itertools import chain
import numpy as np

#### 1. Europe

In [None]:
# load the data regarding the ratio of unemployement
europe_unemploy_rate_path = r'topojson/lfsa_urgan_1_Data.csv'
europe_unemploy_rate = pd.read_csv(europe_unemploy_rate_path)
europe_unemploy_rate = europe_unemploy_rate.loc[\
    (europe_unemploy_rate.TIME==2016) & (europe_unemploy_rate.SEX=="Total") & (~europe_unemploy_rate.GEO.str.contains("European|Euro")), \
    ["GEO","Value"]]
europe_unemploy_rate.loc[europe_unemploy_rate.GEO.str.contains("Germany"), "GEO"] = "Germany"
europe_unemploy_rate.loc[europe_unemploy_rate.GEO.str.contains("Former Yugoslav Republic of Macedonia"), "GEO"] = "The former Yugoslav Republic of Macedonia"
print("We have the unemployement rate the following states: ", europe_unemploy_rate.shape[0])
europe_unemploy_rate

In [None]:
# load the europe data (geometry of the states) 
europe_topo_path = r'topojson/europe.topojson.json'
topo_data = json.load(open(europe_topo_path))
topo_states = [state["properties"]["NAME"] for state in topo_data["objects"]["europe"]["geometries"]]
print("We draw the boundaries of the following states:")
len(topo_states), topo_states

In [None]:
# we don't have the data of all the states!
missing_states = list(set(topo_states)-set(europe_unemploy_rate.GEO))
print("We are missing the unemployement rate about these states:")
len(missing_states), missing_states

In [None]:
def fill_invalid(state):
    if state in list(missing_states):
        return 0.7
    else:
        return 0

In [None]:
def optimize(data):
    used_arcs = set()
    for o in data['objects']:
        for geom in data['objects'][o]['geometries']:
            if geom['type'] == 'MultiPolygon':
                it = chain.from_iterable(geom['arcs'])
            else:
                it = geom['arcs']

            for i in chain.from_iterable(it):
                used_arcs.add(i if i >= 0 else ~i)
    for i in range(len(data['arcs'])):
        if i not in used_arcs:
            data['arcs'][i] = []
    return data

In [None]:
europe_location = [55, 15]

m = folium.Map(location=europe_location, zoom_start=3)

# fill the states with colors depending on their unemployement rate
m.choropleth(
    geo_data=topo_data, 
    topojson='objects.europe',
    data=europe_unemploy_rate,
    columns=['GEO', 'Value'],
    #threshold_scale=[0, 5, 10, 15, 20, 25],
    key_on='properties.NAME',
    fill_color='BuGn', 
    fill_opacity=0.7, 
#     line_opacity=1,
    legend_name='Percentage of unemployement (%)')

# fill with gray the states of which we have no info and also add a popup to every state to show more information 
# about the states
for state in topo_data['objects']['europe']['geometries']:
    state_name = state["properties"]["NAME"]
    unemp_rate = europe_unemploy_rate.Value[europe_unemploy_rate.GEO == state_name]
    if unemp_rate.size == 0:
        unemp_rate = "not available"
    else:
        unemp_rate = "{0:.1f}".format(unemp_rate.iloc[0])

    tdata = copy.deepcopy(topo_data)
    tdata['objects']['europe']['geometries'] = [state]
    folium.TopoJson(
        optimize(tdata),
        'objects.europe',
        name=state['id'],
        style_function=lambda geometry: {
#                 'color' : 'transparent',
#                 'fillColor': 'trasparent',
                # grey color to the state of which we don't have an
                'fillColor': "#424949", 
                'fillOpacity': fill_invalid(geometry["properties"]["NAME"]),
            }
    ).add_child(folium.Popup("<span style=\"font-weight:bold;\"> Rate: </span> " + unemp_rate)).add_to(m)

#m.save('test.html')

m

#### 2-3. Switzerland
Importat notions (from https://www.amstat.ch/v2/definition.jsp?lang=it):
- The unemplyement ratio is computed as unemployed people / actice population where the unemployed people are the people who are looking for a job without having one. 
- "Numero dei disoccupati registrati, nel giorno di riferimento (ultimo giorno del mese), diviso per il numero di persone attive, moltiplicato per 100."
- Persone attive: Persone che svolgono un’attività lucrativa di almeno un’ora per settimana e le persone non occupate.
- Il tasso di disoccupazione è calcolato prendendo come denominatore il numero delle persone attive

Problems for question 3:
- The site does not allow to download the rate of unemployement divided by age and nationality at the same time. However, it allows to download the number of unemployed people divided in such a way.
- We are interested in the ratio, therefore, when downloading the number of unemployed peopole divided by age and nationatlity we still need the active population for each canton(ratio=number of unemployed people/active population). We could not find the latter in the site therefore we computed it: (number of unemployed people in canton Ci)/(unemployement ratio of canton Ci)

In order to avoid downloading one dataset per task we decided to proceed as follows:
1. Download the dataset with the unemployement ratio and the number of unemployed people divided by cantons. From this dataset we compute the active population for each canton.
2. Download the complete dataset with the unemployed peopled divided by canton, age class and nationality, since, as explained before, the site does not allow to dounload this information with the ratios.
3. With the latter dataset and with the computed active population we copute the ratio of unemployed people for each category.
4. To answer the questions we will use groupby on the field we are intereted in.

In [None]:
# Helper function (integers are in an invalid format)
# Parses a string to an integer, removing invalid characters
def parseInt(numStr):
    cleaned = [x for x in numStr if x.isdigit()]
    return int("".join(cleaned))

In [None]:
# load the swiss topology (geometry of the cantons) 
ch_topo_path = r'topojson/ch-cantons.topojson.json'
topo_data_ch = json.load(open(ch_topo_path, encoding="utf-8"))
topo_cantons = [state["properties"]["name"] for state in topo_data_ch["objects"]["cantons"]["geometries"]]
print("We draw the boundaries of the following cantons:")
len(topo_cantons), ", ".join(topo_cantons)

    1. Load the data regarding the ratio of unemployement so to compute the active population per canton.

In [None]:
ch_unemploy_rate_path = r'topojson/ch_unemploy_rate.csv'
ch_unemploy_rate = pd.read_csv(ch_unemploy_rate_path)

# 1. drop and rename the columns
column_rename = {
    "Cantone":"Canton", 
    "Tasso di disoccupazione": "Unemployement rate", 
    "Disoccupati registrati": "Unemployed"
}
ch_unemploy_rate = ch_unemploy_rate[list(column_rename.keys())]
ch_unemploy_rate.rename(columns=column_rename, inplace=True)

# 2. converto objects to integers
ch_unemploy_rate["Unemployed"] = ch_unemploy_rate["Unemployed"].apply(parseInt)

# 3.compute the active population
ch_unemploy_rate["Active Population"] = (100*ch_unemploy_rate["Unemployed"]/ch_unemploy_rate["Unemployement rate"]).astype("int")

ch_unemploy_rate.head()

    2-3. Load the complete dataset and compute the unemployement ratios

In [None]:
ch_unemploy_rate_path = r'topojson/ch_unemploy_rate_byage_bynationality.csv'
ch_unemploy_rate_complete = pd.read_csv(ch_unemploy_rate_path)
# dataset cleaning:
#1. drop columns and column renaming
column_rename = {
    "Cantone": "Canton",
    "Nazionalità": "Nationality",
    "Classi d'età 15-24, 15-49, 50 anni e più": "Age Classes",     
    "Persone in cerca d'impiego": "Jobseekers",
    "Disoccupati registrati": "Unemployed Jobseekers",
    "Persone in cerca d'impiego non disoccupate": "Employed Jobseekers"
}
ch_unemploy_rate_complete = ch_unemploy_rate_complete[list(column_rename.keys())]
ch_unemploy_rate_complete.rename(columns=column_rename, inplace=True)

# 2. Drop totals
ch_unemploy_rate_complete = ch_unemploy_rate_complete[~(ch_unemploy_rate_complete=="Totale").any(axis=1)]

# 3. Convert objects to integers
ch_unemploy_rate_complete["Jobseekers"] = ch_unemploy_rate_complete["Jobseekers"].apply(parseInt)
ch_unemploy_rate_complete["Unemployed Jobseekers"] = ch_unemploy_rate_complete["Unemployed Jobseekers"].apply(parseInt)
ch_unemploy_rate_complete["Employed Jobseekers"] = ch_unemploy_rate_complete["Employed Jobseekers"].apply(parseInt)

# 4. Add column with active population
ch_unemploy_rate_complete = pd.merge(ch_unemploy_rate_complete, ch_unemploy_rate[["Canton", "Active Population"]], on="Canton", how='left')

# # 5. Convert population to ratio
# act_pop = ch_unemploy_rate_complete["Active Population"]
# ch_unemploy_rate_complete["Jobseekers"] = round(ch_unemploy_rate_complete["Jobseekers"]*100/act_pop, 2)
# ch_unemploy_rate_complete["Unemployed Jobseekers"] = round(ch_unemploy_rate_complete["Unemployed Jobseekers"]*100/act_pop, 2)
# ch_unemploy_rate_complete["Employed Jobseekers"] = round(ch_unemploy_rate_complete["Employed Jobseekers"]*100/act_pop, 2)

# # 6. Canton's name rename, nationality rename
# # create dictionary to map cantons between the name of the cantons in the topology and the one in the dataset
# # (in the dataset there is the the Italian name while in the topology there is the "local" name)
# # generate the correct Canton column
# canton_names = [name for canton in topo_data_ch['objects']['cantons']["geometries"] for name in [canton["properties"]["name"]]*6]
# ch_unemploy_rate_complete.Canton = canton_names

ch_unemploy_rate_complete.Nationality = ch_unemploy_rate_complete.Nationality.apply(
    lambda natio: "Foreigners" if (natio=="stranieri") else "Swiss")  
    
ch_unemploy_rate_complete.head(12)

    4. Answer the questions grouping on the field we are interested in.

- **question 2**: ... "unemployment rates in Switzerland at a recent date"

In [None]:
ch_unemploy_rate_canton = ch_unemploy_rate_complete.groupby(by="Canton").sum().reset_index()

First show only the unemployed people ratio

In [None]:
ch_location = [46.8, 8.5]
m = folium.Map(location=ch_location, zoom_start=8)

# fill the cantons with colors depending on their unemployement rate
m.choropleth(
    geo_data=topo_data_ch, 
    topojson='objects.cantons',
    data=ch_unemploy_rate_canton,
    columns=['Canton', 'Unemployed Jobseekers'],
    #threshold_scale=[0, 5, 10, 15, 20, 25],
    key_on='properties.name',
    fill_color='BuGn', 
    fill_opacity=0.7, 
#     line_opacity=1,
    legend_name='Percentage of unemployement (%)')
m

Then show all the jobseekers, considering the employed ones too.

In [None]:
m = folium.Map(location=ch_location, zoom_start=8)

# fill the cantons with colors depending on their unemployement rate
m.choropleth(
    geo_data=topo_data_ch, 
    topojson='objects.cantons',
    data=ch_unemploy_rate_canton,
    columns=['Canton', 'Jobseekers'],
    #threshold_scale=[0, 5, 10, 15, 20, 25],
    key_on='properties.name',
    fill_color='BuGn', 
    fill_opacity=0.7, 
#     line_opacity=1,
    legend_name='Percentage of unemployement (%)')
m

TODO: comment differencies

- **question 3**: ...

In [None]:
# group by Canton and Nationality
ch_unemploy_rate_canton_nationality = ch_unemploy_rate_complete.groupby(by=["Canton", "Nationality"]).sum().reset_index()
ch_unemploy_rate_canton_nationality.head(4)

In [None]:
# pivot on "Unemployed Jobseekers"
ch_unemploy_rate_canton_nationality = ch_unemploy_rate_canton_nationality.pivot(
    index="Canton", columns="Nationality", values="Unemployed Jobseekers").reset_index()
ch_unemploy_rate_canton_nationality.columns = \
    pd.MultiIndex(
        levels=[["", "Unemployed Jobseekers"], ["Canton", "Foreigners", "Swiss"]], 
        labels=[[0,1,1], [0,1,2]])

ch_unemploy_rate_canton_nationality.head(2)

In [None]:
# add ratio between unemployed swiss and unemployes foreigners
ch_unemploy_rate_canton_nationality["", "Ratio (Swiss/Foreigners)"] = \
    round(ch_unemploy_rate_canton_nationality["Unemployed Jobseekers", "Swiss"]/ch_unemploy_rate_canton_nationality["Unemployed Jobseekers", "Foreigners"], 2)
ch_unemploy_rate_canton_nationality.head()

In [None]:
m = folium.Map(location=ch_location, zoom_start=8)

m.choropleth(
    geo_data=topo_data_ch, 
    topojson='objects.cantons',
    data=ch_unemploy_rate_canton_nationality,
    columns=[('', 'Canton'), ('', 'Ratio (Swiss/Foreigners)')],
    #threshold_scale=[0, 5, 10, 15, 20, 25],
    key_on='properties.name',
    fill_color='BuGn', 
    fill_opacity=0.7, 
#     line_opacity=1,
    legend_name='Percentage of unemployement (%)')
m