In [12]:
import os
import pandas as pd
import numpy as np
import json
import folium
import glob
import csv
import string

# Q2 : Canton wise unemployment rate

For the second task we have downloaded from the amstat website a dataset that includes the rates of unemployed people for the last 15 years, as well as other information. Specifically:
* Registered unemployed
* Registered job seekers
* Job seekers with a job 

For our analysis we choose to keep only the fraction of people who strictly belong to the definition of unemployment of the Bureau of Labor Statistics https://www.bls.gov/cps/cps_htgm.htm: "People who are jobless, looking for a job, and available for work". That is, the number of registered job seekers minus the ones who have already a job. We can easily see that this number corresponds to the registered unemployed people. It is important to check this difference to exclude the possbility that the "registered unemployed" class includes jobless people that are not looking for a job (and that thus don't belong to our definition of unemployment).
Moreover, we are interested in the unemployment rate, which is the number of unemployed people divided by the number of people in the labor force. 

The labor force is the sum of employed and unemplyed people. Even though this information is not provided in the dataset, we assume that the "Unemployment rate" has been computed from the "Registered unemployed" data and thus we will use this information for our analysis.

Now we can import and clean the data.

In [13]:
#Import
ch_geo = os.path.join('./topojson/', 'ch-cantons.topojson.json')
ch_rate = os.path.join('./ch_rate.xlsx')

#Reading
ch_rate_data = pd.read_excel(ch_rate, index=False)

#Clean 1st dataset
ch_rate_data = ch_rate_data.iloc[1:,:]

In [14]:
ch_rate_data.head()

Unnamed: 0,Kanton,Januar 2002,Januar 2002.1,Januar 2002.2,Februar 2002,Februar 2002.1,Februar 2002.2,März 2002,März 2002.1,März 2002.2,...,Oktober 2016.2,November 2016,November 2016.1,November 2016.2,Dezember 2016,Dezember 2016.1,Dezember 2016.2,Gesamt,Gesamt.1,Gesamt.2
1,Zürich,2.6,18757,5989,2.7,19279,6266,2.7,19617,6622,...,6923,3.7,30651,7069,3.8,31570,7021,3.4,4642209,1329157
2,Bern,1.6,8517,4281,1.7,8656,4476,1.6,8261,4892,...,4948,2.8,15753,5208,3.0,16636,5001,2.4,2308491,1042472
3,Luzern,1.8,3378,1370,1.8,3467,1380,1.8,3393,1425,...,2952,2.0,4429,3120,2.2,4883,2988,2.3,840599,463094
4,Uri,0.7,117,111,0.6,106,111,0.6,107,124,...,149,1.1,218,158,1.3,242,165,1.1,36644,31181
5,Schwyz,1.0,740,578,1.1,797,574,1.1,753,596,...,773,1.8,1557,818,1.9,1683,829,1.7,239129,155084


The first objective is to group the different categories by year and rename the data properly.

In [15]:
for year in np.arange(0,15,1):
    rate_data = pd.DataFrame({'rate_ue_{}'.format(year+2002): ch_rate_data.iloc[:,(1+36*year):(1+36*(year+1)-1):3].apply(pd.to_numeric).mean(axis=1)})
    reg_ue_data = pd.DataFrame({'reg_ue_{}'.format(year+2002): ch_rate_data.iloc[:,(2+36*year):(2+36*(year+1)-1):3].apply(pd.to_numeric).mean(axis=1)})
    js_with_job = pd.DataFrame({'js_with_job_{}'.format(year+2002): ch_rate_data.iloc[:,(3+36*year):(3+36*(year+1)-1):3].apply(pd.to_numeric).mean(axis=1)})
    ch_rate_data = pd.concat((ch_rate_data,rate_data,reg_ue_data,js_with_job),axis=1)

In [16]:
beginning_of_dataset = ch_rate_data.iloc[:,:1]
end_of_dataset = ch_rate_data.iloc[:,-45:]
ch_rate_data = pd.concat((beginning_of_dataset,end_of_dataset),axis=1)

In [17]:
ch_rate_data.head()

Unnamed: 0,Kanton,rate_ue_2002,reg_ue_2002,js_with_job_2002,rate_ue_2003,reg_ue_2003,js_with_job_2003,rate_ue_2004,reg_ue_2004,js_with_job_2004,...,js_with_job_2013,rate_ue_2014,reg_ue_2014,js_with_job_2014,rate_ue_2015,reg_ue_2015,js_with_job_2015,rate_ue_2016,reg_ue_2016,js_with_job_2016
1,Zürich,2.991667,21595.75,7283.416667,4.516667,32574.333333,9444.666667,4.483333,32402.166667,10368.5,...,5790.833333,3.175,26013.166667,5923.583333,3.416667,27985.333333,6232.916667,3.65,30083.916667,6702.0
2,Bern,1.75,9162.916667,5155.666667,2.833333,14860.416667,6651.333333,2.916667,15212.583333,7452.5,...,4514.916667,2.35,13189.333333,4866.0,2.5,14116.416667,4922.666667,2.708333,15283.166667,4801.083333
3,Luzern,2.0,3780.0,1560.583333,3.116667,5881.666667,2334.833333,3.141667,5955.333333,2676.666667,...,2489.75,1.866667,4147.083333,2435.916667,1.975,4383.333333,2643.916667,2.058333,4601.75,2864.833333
4,Uri,0.716667,124.416667,169.5,1.066667,188.0,181.5,1.183333,208.833333,200.666667,...,182.75,1.05,202.916667,148.083333,1.033333,199.666667,163.833333,1.041667,201.5,161.916667
5,Schwyz,1.225,878.166667,730.5,2.108333,1490.5,1034.25,2.283333,1630.333333,1019.666667,...,773.0,1.525,1344.083333,794.25,1.583333,1388.333333,758.666667,1.766667,1537.166667,752.416667


Our next step is to use folium to visualize Swiss cantons and assign the corresponding unemployment rates.
Notice that we have downloaded the data in German in order to better match the names of the cantons with the ones provided in the geo data.

In [18]:
#Loading
ch_geo = open('geojson/switzerland.geojson')
ch_geo = ch_geo.read()
ch_geo = json.loads(ch_geo)

#Assign geoJson
ch_map = folium.Map(location=[46.8,8.1], zoom_start=7.5)
folium.GeoJson(ch_geo, name='ch_geo').add_to(ch_map)

#Visualize
ch_map

In [20]:
ch_geo

{'features': [{'geometry': {'coordinates': [[[8.807614761476149,
       47.220891289128915],
      [8.792644464446445, 47.222881488148815],
      [8.74092889288929, 47.211935393539356],
      [8.713710171017102, 47.2029794979498],
      [8.709627362736274, 47.19999419941994],
      [8.703729972997301, 47.20198439843984],
      [8.702822682268227, 47.20098929892989],
      [8.702822682268227, 47.19999419941994],
      [8.702822682268227, 47.19899909990999],
      [8.702822682268227, 47.19780498049805],
      [8.701915391539154, 47.19780498049805],
      [8.699647164716472, 47.1968098809881],
      [8.697832583258327, 47.1968098809881],
      [8.695564356435645, 47.1968098809881],
      [8.694657065706572, 47.1968098809881],
      [8.693749774977498, 47.1968098809881],
      [8.693749774977498, 47.19581478147815],
      [8.68467686768677, 47.18685888588859],
      [8.68195499549955, 47.18486868686868],
      [8.68059405940594, 47.18387358735873],
      [8.679686768676868, 47.178898089808

In [21]:
ch_rate_data

Unnamed: 0,Kanton,rate_ue_2002,reg_ue_2002,js_with_job_2002,rate_ue_2003,reg_ue_2003,js_with_job_2003,rate_ue_2004,reg_ue_2004,js_with_job_2004,...,js_with_job_2013,rate_ue_2014,reg_ue_2014,js_with_job_2014,rate_ue_2015,reg_ue_2015,js_with_job_2015,rate_ue_2016,reg_ue_2016,js_with_job_2016
1,Zürich,2.991667,21595.75,7283.416667,4.516667,32574.333333,9444.666667,4.483333,32402.166667,10368.5,...,5790.833333,3.175,26013.166667,5923.583333,3.416667,27985.333333,6232.916667,3.65,30083.916667,6702.0
2,Bern,1.75,9162.916667,5155.666667,2.833333,14860.416667,6651.333333,2.916667,15212.583333,7452.5,...,4514.916667,2.35,13189.333333,4866.0,2.5,14116.416667,4922.666667,2.708333,15283.166667,4801.083333
3,Luzern,2.0,3780.0,1560.583333,3.116667,5881.666667,2334.833333,3.141667,5955.333333,2676.666667,...,2489.75,1.866667,4147.083333,2435.916667,1.975,4383.333333,2643.916667,2.058333,4601.75,2864.833333
4,Uri,0.716667,124.416667,169.5,1.066667,188.0,181.5,1.183333,208.833333,200.666667,...,182.75,1.05,202.916667,148.083333,1.033333,199.666667,163.833333,1.041667,201.5,161.916667
5,Schwyz,1.225,878.166667,730.5,2.108333,1490.5,1034.25,2.283333,1630.333333,1019.666667,...,773.0,1.525,1344.083333,794.25,1.583333,1388.333333,758.666667,1.766667,1537.166667,752.416667
6,Obwalden,0.941667,167.583333,193.583333,1.566667,274.583333,239.75,1.691667,294.916667,269.583333,...,180.75,0.916667,196.25,170.833333,0.891667,187.666667,170.833333,1.0,213.0,163.166667
7,Nidwalden,1.1,230.5,236.583333,1.908333,399.166667,328.666667,1.95,407.583333,349.75,...,216.083333,0.983333,234.416667,204.5,1.058333,255.833333,230.25,1.158333,278.083333,234.333333
8,Glarus,1.541667,320.0,348.0,2.358333,482.333333,486.5,2.383333,489.75,539.0,...,317.166667,2.216667,500.583333,306.833333,2.158333,483.25,327.0,2.366667,531.25,316.083333
9,Zug,2.658333,1536.916667,420.583333,3.608333,2094.333333,643.583333,3.45,1992.333333,810.666667,...,1000.5,2.158333,1455.75,1023.583333,2.2,1498.25,1021.0,2.358333,1602.75,1050.833333
10,Freiburg,1.775,2255.583333,2451.166667,2.758333,3491.166667,2597.166667,2.916667,3696.416667,2523.916667,...,3180.333333,2.683333,4408.833333,3070.833333,2.8,4597.166667,3280.0,2.833333,4643.75,3655.083333


In [19]:
ch_map.choropleth(geo_data=ch_geo, data=ch_rate_data,
             columns=['Kanton', 'rate_ue_2008'],
             key_on='feature.properties.name',
             fill_color='OrRd', fill_opacity=0.7, line_opacity=0.2,
             legend_name='Unemployment rate (%)')
folium.LayerControl().add_to(ch_map)
ch_map

## Calculate the same as above by removing the job seekers with jobs

# TBD LATER : Calculate the same but by using other categories like maybe young people

## Calculate rate by using `ch_nationality` and `ch_age datasets`, use the same preprocessing as above (rates are available)

# IMP : 
You have to find out the total number of each category by using the `ch_total` dataset and calculate the rates like that. 

By using the language region information, calculate an average unemployment rate across the two major linguistic regions and comment on the Röstigraben

Comment on the general trends of all the previous questions. Example : in times of crisis, foreign workers are usually kicked out. 