In [1]:
import os
import pandas as pd
import json
import folium
import re
from geopy.geocoders import Nominatim
from folium.plugins import MarkerCluster
import branca

**The Dataset and topojson file**

* tepsr_wc170.tsv: this is the Dataset that will be used to retreive the european unemployment rates.
* europe.topojson.json: this file contains the european countries abbreviations and arcs that forms the boundaries for  each country.

In [2]:
europe_topo_path = './europe.topojson.json'
path = './tepsr_wc170.tsv'

**Dataset information**

The dataset describes the unemployment rate based on the EU Labour force survey (from 2005 upto 2016). The unemployment rate represents the unemployed as a percentage of the labour force, while the labour force is the total number of people both employed and unemployed. 

The dataset is divided into several age categories:

* 15-24 Years old
* 15-74 Years old
* 20-64 Years old
* 25-29 Years old
* 25-54 Years old
* 55-64 Years old

The list of countries available in the dataset:

* The 28 European Union countries.
* 3 of the European Free Trade Association (EFTA) (Liechtenstein not included).
* 2 of the EU candidate countries (Turkey and the former Yugoslav Republic of Macedonia).

In [3]:
topo_json_data = json.load(open(europe_topo_path))
df = pd.read_csv(path, sep = ',|\t', engine = 'python')

In [4]:
df.head()

Unnamed: 0,unit,sex,age,isced11,geo\time,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
0,PC,T,Y15-24,TOTAL,AT,11.0 b,9.8,9.4 b,8.5,10.7,9.5,8.9,9.4,9.7,10.3,10.6,11.2
1,PC,T,Y15-24,TOTAL,BE,21.5 b,20.5,18.8,18.0,21.9,22.4,18.7 b,19.8,23.7,23.2,22.1,20.1
2,PC,T,Y15-24,TOTAL,BG,22.3 b,19.5,15.1,12.7 b,16.2,21.9 b,25.0 b,28.1,28.4,23.8,21.6,17.2
3,PC,T,Y15-24,TOTAL,CH,8.8 b,7.7,7.1,7.0,8.5,8.2 b,7.7,8.3,8.7,8.6,8.8,8.6
4,PC,T,Y15-24,TOTAL,CY,13.9 b,10.0,10.2,9.0,13.8 b,16.6,22.4,27.7,38.9,36.0,32.8,29.1


**Strings to Float**

The year columns containing the unemployment rates in the dataset are strings, and some of them have letters. The following function aims to take these strings and extracts the float number. Then, it returns a clean list of float numbers containting the unemployment rates.

In [5]:
def string_to_float(unemployment_pc_fl):
    float_list = []
# Extracting all the strings into a list
    string_list = list(unemployment_pc_fl['Unemployment Rate'])
# Separating the number from the letter in the string (eg. '21.5 b' becomes '21.5')
    for value in string_list:
        float_list.extend((re.findall("\d+\.\d+", value)))
# Now the list only contains numbers but they are still a string type data
    float_list = [float(n) for n in float_list]
    return float_list

**Filtering the DataFrame**

In order to be able to illustrate a specific year and age category on the map, we need to create a dataframe containing the abbreviations of the countries as indexes (since both files share this element), and age range and unemployment rate(years) as columns.

**DataFrame Corrections**

Upon exploring the data, it was found that 2 countries (United Kingdom and Greece) in the dataset has different abbreviations from the topojson file. Thus, we need to match these data before we can use them.

In [6]:
# Setting the abbreviations(geo\time) as indexes to the dataframe
df = df.set_index('geo\\time')
df.head()

Unnamed: 0_level_0,unit,sex,age,isced11,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
geo\time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
AT,PC,T,Y15-24,TOTAL,11.0 b,9.8,9.4 b,8.5,10.7,9.5,8.9,9.4,9.7,10.3,10.6,11.2
BE,PC,T,Y15-24,TOTAL,21.5 b,20.5,18.8,18.0,21.9,22.4,18.7 b,19.8,23.7,23.2,22.1,20.1
BG,PC,T,Y15-24,TOTAL,22.3 b,19.5,15.1,12.7 b,16.2,21.9 b,25.0 b,28.1,28.4,23.8,21.6,17.2
CH,PC,T,Y15-24,TOTAL,8.8 b,7.7,7.1,7.0,8.5,8.2 b,7.7,8.3,8.7,8.6,8.8,8.6
CY,PC,T,Y15-24,TOTAL,13.9 b,10.0,10.2,9.0,13.8 b,16.6,22.4,27.7,38.9,36.0,32.8,29.1


In [7]:
# Error 1: EL is set to be the abbreviation for Greece instead of GR
df[10:13]

Unnamed: 0_level_0,unit,sex,age,isced11,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
geo\time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
EE,PC,T,Y15-24,TOTAL,15.1 b,12.1,10.1,12.0,27.4,32.9,22.4,20.9,18.7,15.0,13.1,13.4
EL,PC,T,Y15-24,TOTAL,25.8 b,25.0,22.7,21.9,25.7 b,33.0,44.7,55.3,58.3,52.4,49.8,47.3
ES,PC,T,Y15-24,TOTAL,19.6 b,17.9,18.1,24.5,37.7,41.5,46.2,52.9,55.5,53.2,48.3,44.4


In [8]:
# Error 2: UK is set to be the abbreviation for United Kingdom instead of GB
df.tail(3)

Unnamed: 0_level_0,unit,sex,age,isced11,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
geo\time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
SK,PC,T,Y55-64,TOTAL,13.4 b,9.8,8.2,6.4,7.7,10.1,10.1 b,11.2,11.0,10.6,9.3,9.0
TR,PC,T,Y55-64,TOTAL,:,3.6,3.5,4.4,5.6,5.0,4.3,3.9,4.7,6.0 b,6.7,6.2
UK,PC,T,Y55-64,TOTAL,2.7 b,3.0,3.2 b,3.1 b,4.6,4.7,5.0,4.9,4.8,4.0,3.4,3.7


In [9]:
def CleaningDF(df, year, age):
    
    desired_columns = str('age') + '|' + str(year)
    unemployment_pc = df.filter(regex = (desired_columns))
# Changing the name of the index column(geo\time) into 'Abbreviations'
    unemployment_pc.index.names=['Abbreviations']
# Creating new columns which will contain the 'Age Range' and values for the 'Unemployment Rate'
    unemployment_pc.columns = ['Age Range','Unemployment Rate']
    unemployment_pc_fl = unemployment_pc[unemployment_pc['Age Range'] == age]
    
# Changing UK and EL to GR and GB to match the topojson file
    indices = unemployment_pc_fl.index.tolist()
    ind_1 = indices.index('EL')
    ind_2 = indices.index('UK')
    indices[ind_1] = 'GR'
    indices[ind_2] = 'GB'
    unemployment_pc_fl.index = indices
    unemployment_pc_fl.index.name = 'Abbreviations'
    
# Changing the unemployment rate str column to a float column
    unemployment_rate = string_to_float(unemployment_pc_fl)

# Replacing the str unemployment rate data column with the floated one
# DataFrame.drop(labels=None, axis=0, index=None, columns=None, level=None, inplace=False, errors='raise')
    unemployment_pc_fl = unemployment_pc_fl.drop('Unemployment Rate', axis = 1)
    unemployment_pc_fl['Unemployment Rate'] = unemployment_rate
    
    return unemployment_pc_fl

In [10]:
CleaningDF(df, 2016, 'Y15-74')

Unnamed: 0_level_0,Age Range,Unemployment Rate
Abbreviations,Unnamed: 1_level_1,Unnamed: 2_level_1
AT,Y15-74,6.0
BE,Y15-74,7.8
BG,Y15-74,7.6
CH,Y15-74,5.0
CY,Y15-74,13.0
CZ,Y15-74,4.0
DE,Y15-74,4.1
DK,Y15-74,6.2
EA18,Y15-74,10.1
EA19,Y15-74,10.0


In [11]:
topo_json_data

{'arcs': [[[9439, 2245],
   [-14, 6],
   [-31, 9],
   [-31, 11],
   [-12, 5],
   [-11, 5],
   [-2, 7],
   [0, 8],
   [-10, 25],
   [-5, 9],
   [-2, 1],
   [-1, -2],
   [-3, -2],
   [-16, 7],
   [-1, 2],
   [1, 4],
   [-2, 11],
   [-2, 6],
   [-7, 14],
   [-9, 11],
   [-3, 1],
   [-3, 3],
   [-3, 11],
   [-3, 14],
   [-2, 17],
   [-2, 3],
   [-8, 2]],
  [[9257, 2433], [0, 5], [-3, 12], [-2, 2]],
  [[9252, 2452],
   [3, 2],
   [8, 3],
   [3, 0],
   [5, -2],
   [7, 4],
   [3, 4],
   [4, 6],
   [5, 1],
   [3, -3],
   [24, -37],
   [7, -17],
   [2, -2],
   [17, -11],
   [5, 2],
   [12, 12],
   [17, 8],
   [3, 0],
   [8, -5],
   [3, -3],
   [1, -27],
   [4, -21],
   [18, -18],
   [2, -6],
   [0, -2],
   [-5, -8],
   [0, -10],
   [18, -55],
   [10, -22]],
  [[9313, 2757], [-1, -2], [-3, 3], [-1, 3], [2, 2], [3, -4], [0, -2]],
  [[9288, 2771],
   [-3, -1],
   [-2, 1],
   [-3, 3],
   [-1, 3],
   [1, 5],
   [1, 2],
   [5, -1],
   [3, -5],
   [0, -5],
   [-1, -2]],
  [[9760, 2963],
   [4, -3],
  

In [12]:
topo_json_data.keys()

dict_keys(['type', 'objects', 'arcs', 'transform'])

In [13]:
topo_json_data['objects']

{'europe': {'geometries': [{'arcs': [[[0, 1, 2]],
     [[3]],
     [[4]],
     [[5, 6, 7, 8, 9, 10], [11]]],
    'id': 'AZ',
    'properties': {'NAME': 'Azerbaijan'},
    'type': 'MultiPolygon'},
   {'arcs': [[12, 13, 14, 15, 16, 17, 18]],
    'id': 'AL',
    'properties': {'NAME': 'Albania'},
    'type': 'Polygon'},
   {'arcs': [[[-12]], [[19, -3, 20, 21, -7], [-5], [-4]]],
    'id': 'AM',
    'properties': {'NAME': 'Armenia'},
    'type': 'MultiPolygon'},
   {'arcs': [[22, 23, 24, 25, 26, 27, 28, 29, 30, 31]],
    'id': 'BA',
    'properties': {'NAME': 'Bosnia and Herzegovina'},
    'type': 'Polygon'},
   {'arcs': [[32, 33, 34, 35, 36, 37]],
    'id': 'BG',
    'properties': {'NAME': 'Bulgaria'},
    'type': 'Polygon'},
   {'arcs': [[38]],
    'id': 'CY',
    'properties': {'NAME': 'Cyprus'},
    'type': 'Polygon'},
   {'arcs': [[[39]],
     [[40]],
     [[41]],
     [[42]],
     [[43]],
     [[44]],
     [[45]],
     [[46]],
     [[47]],
     [[48]],
     [[49]],
     [[50]],
     [

In [14]:
object_europe = 'objects.europe'

**Extracting the European Countries Names and their Abbreviations from the topojson file**

Since the topojson file contains european country names and their abbreviations, a function is created to extract only those data into a new dataframe to be used later for matching the data in both files.

In [15]:
def europe_abbreviations(topo_json_data):
    
    europe_countries = pd.DataFrame()
    
    europe_countries['Country'] = [country['properties']['NAME'] for country in topo_json_data['objects']['europe']['geometries']]
    
    europe_countries['Abbreviations'] = [abrv['id'] for abrv in topo_json_data['objects']['europe']['geometries']]
    
# Setting dataframe index to the abbreviations of the countries
    europe_countries = europe_countries.set_index('Abbreviations')
    return europe_countries

In [16]:
europe_abbreviations(topo_json_data)

Unnamed: 0_level_0,Country
Abbreviations,Unnamed: 1_level_1
AZ,Azerbaijan
AL,Albania
AM,Armenia
BA,Bosnia and Herzegovina
BG,Bulgaria
CY,Cyprus
DK,Denmark
IE,Ireland
EE,Estonia
AT,Austria


**Merging Dataframes into one Dataframe to be used in the Folium Map**

Now we have two dataframes with one common column (Abbreviations):
* **Dataframe 1**: Abbreviations, Age Range, Unemployment Rate.
* **Dataframe 2**: Abbreviations, Country Names.

In this step, we merge the dataframes so that the output will only contain the countries shared between the two dataframes by matching the abbreviations and discarding unavailable data. The abbreviations are then returned into a list and new indexes are set for the dataframe.

In [17]:
def MergeDataFrame(df_1, df_2):
    
# Joining the dataset dataframes
    country_rate = df_1.join(df_2)
    
# Removing the countries that aren't present in the dataset
    country_rate = country_rate[country_rate['Country'].notnull()]
    
# Creating a list of the abbreviations to be used in filtering the topojson file, and resetting the indexes to numbers
    abbreviations = list(country_rate.index.values)
    country_rate = country_rate.reset_index()
    
# Removing the age range column as it won't be needed for the folium map
    country_rate = country_rate.drop('Age Range', axis = 1)
    
    return abbreviations, country_rate

In [18]:
year = 2016
age = 'Y15-74'

MergeDataFrame(CleaningDF(df, year, age), europe_abbreviations(topo_json_data))

(['AT',
  'BE',
  'BG',
  'CH',
  'CY',
  'CZ',
  'DE',
  'DK',
  'EE',
  'GR',
  'ES',
  'FI',
  'FR',
  'HR',
  'HU',
  'IE',
  'IS',
  'IT',
  'LT',
  'LU',
  'LV',
  'MK',
  'MT',
  'NL',
  'NO',
  'PL',
  'PT',
  'RO',
  'SE',
  'SI',
  'SK',
  'TR',
  'GB'],
    Abbreviations  Unemployment Rate                                    Country
 0             AT                6.0                                    Austria
 1             BE                7.8                                    Belgium
 2             BG                7.6                                   Bulgaria
 3             CH                5.0                                Switzerland
 4             CY               13.0                                     Cyprus
 5             CZ                4.0                             Czech Republic
 6             DE                4.1                                    Germany
 7             DK                6.2                                    Denmark
 8             

**Filtering the topojson file**

Here we remove the country boundaries (arcs) with country abbreviations which does not exist in the DataFrame we created, by matching the culled abbreviations list we have with the country's id:(abbreviation) in the topojson file, and then replacing the old list of boundaries with the new list in the dataframe.

In [19]:
def filtering_topojson(topo_json_data, abbreviations):
    
    new_boundaries = []

# Extracting the country data we need that are only present in our dataset, using the list of country
# abbreviations we got above
    topo_data = topo_json_data
    
    new_boundaries [:] = [country for country in topo_data['objects']['europe']['geometries'] if country['id'] in abbreviations]
    topo_data['objects']['europe']['geometries'] = new_boundaries
    
    return topo_data

**Adding Choropleth to the map**

The following function creates a choropleth map that contains the values obtained from the new DataFrame and the keys 'Abbreviations' which will be used to match the ID's on the map.

In [20]:
def AddingChoropleth(dataframe, map_, feature, legend_name, key, scale_color, json_data, object_):
    
    map_.choropleth(data = dataframe,
                   geo_data = json_data,
                   key_on = 'feature.id',
                   columns = [key, feature],
                   legend_name = legend_name,
                   fill_color = scale_color,
                   fill_opacity = 0.5,
                   line_opacity = 0.2,
                   topojson = object_,
                   highlight = True)

**Adding Markers to the Map**

* Here we add circular markers onto the map over the european countries by obtaining the longitude and latitude for each country and placing them in that position.
* By setting the marker's radius to the feature 'unemployment rate value', The marker's sizes become proportional to the rate of unemployment.
* Pop-up is then added to the marker to show the unemployment rate for each country.
* Then, these inputs are added to a marker cluster to create an interactive map.

In [21]:
def AddingMarkers(dataframe, map_, key, feature, marker_color, legend_name):
    
# Creates a cluster of circles that expands/condenses with regard to zoom extenct
    marker_cluster = MarkerCluster().add_to(map_)
    
# Examining the data in the dataframe row by row
    for index, row in dataframe.iterrows():
# This function returns the country names and their abbreviations
        place = dataframe.at[index, key] + ", " + dataframe.at[index,'Abbreviations']

# This function returns the longitude and latitude for each country
        location = geolocator.geocode(place)
    
# Here we are appending a list of longitudes and latitudes for each country into the dataframe
        dataframe.loc[index, 'Latitude'] = location.latitude
        dataframe.loc[index, 'Longitude'] = location.longitude
        
        folium.CircleMarker(
            location = [location.latitude, location.longitude],
            radius = dataframe.at[index, feature],
            fill = True,
            fill_color = marker_color,
            color = marker_color,
            popup = folium.Popup(str(dataframe.at[index, key]) + ": " + str(dataframe.at[index,feature]) + "%")
        ).add_to(marker_cluster)

In [22]:
geolocator = Nominatim()
location = geolocator.geocode('europe')

location

Location(Europe, (51.0, 10.0, 0.0))

**Drawing the Map**

The data displayed on the map can be changed by assigning different years and age categories to the functions.

In [23]:
europe_map_2016 = folium.Map(location = [51,10], zoom_start = 4)
europe_map_2006 = folium.Map(location = [51,10], zoom_start = 4)

In [24]:
europe_map_2016

In [25]:
year = 2006
age_category = 'Y15-24'

unemployment_pc_fl = CleaningDF(df, year, age_category)

europe_abrv = europe_abbreviations(topo_json_data)

abbreviations, country_rate = MergeDataFrame(unemployment_pc_fl, europe_abrv)

topo_json_data = filtering_topojson(topo_json_data, abbreviations)

In [26]:
AddingChoropleth(country_rate, europe_map_2006, 'Unemployment Rate', 'Percentage of Unemployment(%) 2006', 'Abbreviations', 'YlOrRd', topo_json_data, object_europe)

In [27]:
europe_map_2006

In [28]:
AddingMarkers(country_rate, europe_map_2006, 'Country', 'Unemployment Rate', 'blue', 'Percentage of Unemployment (%) 2006')

In [29]:
europe_map_2006

In [30]:
year = 2016
age_category = 'Y15-24'

unemployment_pc_fl = CleaningDF(df, year, age_category)

europe_abrv = europe_abbreviations(topo_json_data)

abbreviations, country_rate = MergeDataFrame(unemployment_pc_fl, europe_abrv)

topo_json_data = filtering_topojson(topo_json_data, abbreviations)

In [31]:
AddingChoropleth(country_rate, europe_map_2016, 'Unemployment Rate', 'Percentage of Unemployment(%) 2016', 'Abbreviations', 'YlOrRd', topo_json_data, object_europe)

In [32]:
AddingMarkers(country_rate, europe_map_2016, 'Country', 'Unemployment Rate', 'blue', 'Percentage of Unemployment (%) 2016')

In [33]:
europe_map_2016

**Displaying 2 Maps with different years for the same age category (or vise versa) for Comparision**

In [34]:
f = branca.element.Figure()
m1 = f.add_subplot(1, 2, 1)
m2 = f.add_subplot(1, 2, 2)
m1.add_child(europe_map_2006)
m2.add_child(europe_map_2016)

**What can be deduced from the maps**

**Age Range 15-24Y**
* In 2007, The former Yugoslav Republic of Macedonia had almost 58% of unemployment rate for people between the ages of 15-24Y, ranking the highiest amongst the other EU countries. In 2016, this rate seem to decrease to 48%, yet still relatively high, indicating a chronic problem of long-term unemployment. This could be related to the economic recession its going through.
___
**Age Range 15-74Y**
* Greece also, ranks between the countries with the highest rates of unemployments. In 2006, Greece had only a 9% rate of unemployment and it shoot up to reach 23% in 2016. This increase could also be correlated to the financial crisis of year 2007.

* On a wider range, The former Yugoslav Republic of Macedonia had a bit of improvement in the unemployment rate between 2006 and 2016, as it decreased from 36.1% to 23.7%.

* Yet Spain seem to have an increase in this rate, as it went from 8.5% to 19.6%.

* Iceland can be found to have the least rate of unemployment in 2006 and 2016 with 2.8% and 3.0% respectively, followed by Norway, which has a rate of 3.4% and 4.6% for the same years, along with other countries such as Switzerland and Great Britain with a 1~2% margin.
___
**Age Range 55-64Y**
* In 2006, Spain and Greece had a low rate of unemployment amongst the older age category, with a rate of 8.5% and 9% respectively, while in 2016, it goes up to 17% and 19.2%.

___
* In general, by alternating between the age categories for different years, we can see that the unemployment rate is relatively lower at the early years starting from 2005 compared to 2016. The increase in the rate is influence by many factors such as global recessions, finacial crisis, technological advances or seasonal fluctuations.


In [1]:
import turtle

screen = turtle.Screen()
screen.setup(800,600)

circle = turtle.Turtle()
circle.shape('circle')
circle.color('red')
circle.speed('fastest')
circle.up()

square = turtle.Turtle()
square.shape('square')
square.color('green')
square.speed('fastest')
square.up()

circle.goto(0,280)
circle.stamp()

k = 0
for i in range(1, 17):
    y = 30*i
    for j in range(i-k):
        x = 30*j
        square.goto(x,-y+280)
        square.stamp()
        square.goto(-x,-y+280)
        square.stamp()

    if i % 4 == 0:
        x =  30*(j+1)
        circle.color('red')
        circle.goto(-x,-y+280)
        circle.stamp()
        circle.goto(x,-y+280)
        circle.stamp()        
        k += 2

    if i % 4 == 3:
        x =  30*(j+1)
        circle.color('yellow')
        circle.goto(-x,-y+280)
        circle.stamp()
        circle.goto(x,-y+280)
        circle.stamp() 

square.color('brown')
for i in range(17,20):
    y = 30*i
    for j in range(3):    
        x = 30*j
        square.goto(x,-y+280)
        square.stamp()
        square.goto(-x,-y+280)
        square.stamp()        

christmas = turtle.Turtle()
christmas.pencolor('red')
christmas.write("Merry Christmas!", move=False, align="center", font=("Arial", 35, "normal"))
turtle.exitonclick()