# Irish Migration

In [1]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import folium

In [19]:
#read in multi-index data, indexed on age group and birthplace. note that csv lacks header so three columns are named explicitly
df=pd.read_csv('irish-census-2011-16-sex-age-birthplace.csv',header=None,index_col=[1,2],names=['gender','2011','2016'])
df.head(125)

Unnamed: 0,Unnamed: 1,gender,2011,2016
0 - 14 years,Ireland - county of usual residence,Both sexes,731189,761950
0 - 14 years,Ireland - county other than county of usual residence,Both sexes,148981,165421
0 - 14 years,Northern Ireland,Both sexes,7808,7105
0 - 14 years,England and Wales,Both sexes,22045,12772
0 - 14 years,Scotland,Both sexes,1250,820
0 - 14 years,...,...,...,...
0 - 14 years,New Zealand,Both sexes,299,306
15 - 24 years,Ireland - county of usual residence,Both sexes,367727,353306
15 - 24 years,Ireland - county other than county of usual residence,Both sexes,93369,103362
15 - 24 years,Northern Ireland,Both sexes,4840,4656


In [20]:
# clean up the dataset to remove unnecessary columns (eg. 'gender') 
df=df.drop(['gender'], axis=1)

# label the two indices
df.index.names=['age group','birthplace']

df.head(125)

Unnamed: 0_level_0,Unnamed: 1_level_0,2011,2016
age group,birthplace,Unnamed: 2_level_1,Unnamed: 3_level_1
0 - 14 years,Ireland - county of usual residence,731189,761950
0 - 14 years,Ireland - county other than county of usual residence,148981,165421
0 - 14 years,Northern Ireland,7808,7105
0 - 14 years,England and Wales,22045,12772
0 - 14 years,Scotland,1250,820
0 - 14 years,...,...,...
0 - 14 years,New Zealand,299,306
15 - 24 years,Ireland - county of usual residence,367727,353306
15 - 24 years,Ireland - county other than county of usual residence,93369,103362
15 - 24 years,Northern Ireland,4840,4656


In [43]:
# data types: 
df.dtypes

2011    int64
2016    int64
dtype: object

In [42]:
#size:
df.shape

(605, 2)

There are 605 rows and 2 columns in this birthplace of Irish residents database.

In [89]:
# Age groupings:
print("There are "+str(len(df.index.levels[0]))+" age groups.\n")
age_groups=df.index.levels[0].tolist()
print(age_groups)

There are 5 age groups.

['0 - 14 years', '15 - 24 years', '25 - 44 years', '45 - 64 years', '65 years and over']


In [117]:
# Birthplaces:
print("There are "+str(len(df.index.levels[1]))+" different birthplace locations.\n")

all_birthplaces=df.index.levels[1].unique().tolist()

print(all_birthplaces)

There are 121 different birthplace locations.

['Afghanistan', 'Albania', 'Algeria', 'Angola', 'Argentina', 'Australia', 'Austria', 'Bahrain', 'Bangladesh', 'Belarus', 'Belgium', 'Bosnia and Herzegovina', 'Botswana', 'Brazil', 'Bulgaria', 'Cameroon', 'Canada', 'Chile', 'China', 'Colombia', 'Congo', "Cote d'Ivoire", 'Croatia', 'Cuba', 'Cyprus', 'Czech Republic', 'Denmark', 'Egypt', 'England and Wales', 'Estonia', 'Ethiopia', 'Finland', 'France', 'Georgia', 'Germany', 'Ghana', 'Greece', 'Guatemala', 'Hong Kong', 'Hungary', 'India', 'Indonesia', 'Iran, Islamic Republic of', 'Iraq', 'Ireland - county of usual residence', 'Ireland - county other than county of usual residence', 'Israel', 'Italy', 'Jamaica', 'Japan', 'Jordan', 'Kazakhstan', 'Kenya', 'Korea, Republic of', 'Kosovo', 'Kuwait', "Lao People's Democratic Republic", 'Latvia', 'Lebanon', 'Liberia', 'Libya', 'Lithuania', 'Luxembourg', 'Malawi', 'Malaysia', 'Malta', 'Mauritius', 'Mexico', 'Moldova, Republic of', 'Mongolia', 'Morocco',

We can see that this database includes people born in Ireland, and residing in the the same county as birth, and people born in Ireland and residing in a different county than birth.

People born in the UK are also distinguished by nation of birth: England and Wales, Scotland, Northern Ireland. 

Let's make separate lists for international birthplaces and UK birthplaces in order to filter the data accordingly.

In [166]:
# create a list of international birthplaces, with the UK listed as one country (not separate nations)
ireland=['Ireland - county of usual residence','Ireland - county other than county of usual residence']

#create list without those born in Ireland, including individual UK nations
int_w_UKnations=[c for c in all_birthplaces if c not in ireland]

#create international list, replacing UK nations with 'UK'
UK_nations=['Northern Ireland','England and Wales','Scotland']
international=['United Kingdom' if c in UK_nations else c for c in int_w_UKnations]

#remove duplicate 'United Kingdoms' using set(), and converting back to list
international=list(set(international))
international.sort()
print(international)

['Afghanistan', 'Albania', 'Algeria', 'Angola', 'Argentina', 'Australia', 'Austria', 'Bahrain', 'Bangladesh', 'Belarus', 'Belgium', 'Bosnia and Herzegovina', 'Botswana', 'Brazil', 'Bulgaria', 'Cameroon', 'Canada', 'Chile', 'China', 'Colombia', 'Congo', "Cote d'Ivoire", 'Croatia', 'Cuba', 'Cyprus', 'Czech Republic', 'Denmark', 'Egypt', 'Estonia', 'Ethiopia', 'Finland', 'France', 'Georgia', 'Germany', 'Ghana', 'Greece', 'Guatemala', 'Hong Kong', 'Hungary', 'India', 'Indonesia', 'Iran, Islamic Republic of', 'Iraq', 'Israel', 'Italy', 'Jamaica', 'Japan', 'Jordan', 'Kazakhstan', 'Kenya', 'Korea, Republic of', 'Kosovo', 'Kuwait', "Lao People's Democratic Republic", 'Latvia', 'Lebanon', 'Liberia', 'Libya', 'Lithuania', 'Luxembourg', 'Malawi', 'Malaysia', 'Malta', 'Mauritius', 'Mexico', 'Moldova, Republic of', 'Mongolia', 'Morocco', 'Myanmar', 'Nepal', 'Netherlands', 'New Zealand', 'Nigeria', 'Norway', 'Oman', 'Other Africa', 'Other America', 'Other Asia', 'Other Europe', 'Pakistan', 'Peru', '

In [125]:
#check that list of international countries is the right length. 
# Should be 121-2(irish born)-3(UK nations)+1(United Kingdom)=117
len(international)

117

Note that the census data includes a number of country names that will need reformatting to match standard country names in geo json files, such as 'United States', 'Tanzania, United Republic of', 'Russian Federation', etc. This is done in the section on total immigration below.

In [126]:
df.describe()
#From this we can see that the average population has gone up since 2011, 
# but as this database still includes those who were born in Ireland, it doesn't necessarily 
# represent migration going up.

Unnamed: 0,2011,2016
count,605.0,605.0
mean,7479.510744,7751.01157
std,56577.958426,57704.627657
min,0.0,0.0
25%,43.0,57.0
50%,143.0,172.0
75%,565.0,684.0
max,736424.0,761950.0


In [181]:
#examples of df slicing on multiple indices
df.index.levels[0]

Index(['0 - 14 years', '15 - 24 years', '25 - 44 years', '45 - 64 years',
       '65 years and over'],
      dtype='object', name='age group')

In [182]:
#examples of df slicing on multiple indices
df.xs("2011",axis=1)

age group          birthplace                                           
0 - 14 years       Ireland - county of usual residence                      731189
                   Ireland - county other than county of usual residence    148981
                   Northern Ireland                                           7808
                   England and Wales                                         22045
                   Scotland                                                   1250
                                                                             ...  
65 years and over  Trinidad and Tobago                                          32
                   Venezuela                                                     2
                   Other America                                                48
                   Australia                                                   163
                   New Zealand                                                  74
Name: 2011, Le

In [185]:
#examples of df slicing on multiple indices
df.xs("Afghanistan",level=1)

Unnamed: 0_level_0,2011,2016
age group,Unnamed: 1_level_1,Unnamed: 2_level_1
0 - 14 years,44,394
15 - 24 years,141,411
25 - 44 years,323,765
45 - 64 years,43,141
65 years and over,4,18


In [186]:
#examples of df slicing on multiple indices
df.xs("Zimbabwe",level=1)

Unnamed: 0_level_0,2011,2016
age group,Unnamed: 1_level_1,Unnamed: 2_level_1
0 - 14 years,350,153
15 - 24 years,435,491
25 - 44 years,1412,1331
45 - 64 years,525,737
65 years and over,68,99


In [187]:
#examples of df slicing on multiple indices
df.xs("United Arab Emirates",level=1)

Unnamed: 0_level_0,2011,2016
age group,Unnamed: 1_level_1,Unnamed: 2_level_1
0 - 14 years,196,256
15 - 24 years,179,243
25 - 44 years,123,184
45 - 64 years,1,6
65 years and over,0,3


In [21]:
#examples of df slicing on multiple indices
age_2544=pd.DataFrame(df.xs("25 - 44 years",level=0)['2016'])
age_2544=age_2544.reset_index()
age_2544

Unnamed: 0,birthplace,2016
0,Ireland - county of usual residence,707020
1,Ireland - county other than county of usual re...,298769
2,Northern Ireland,16610
3,England and Wales,54266
4,Scotland,4518
...,...,...
116,Trinidad and Tobago,93
117,Venezuela,1390
118,Other America,806
119,Australia,2445


## Data exploration - total immigration

1. Group all ages groups together and calculate total number of people residing in Ireland, based on birthplace. Investigate stats.  
2. Map country names so that they match the standard in countries.geo.json file.


In [4]:
#reset index to make it easier to group and map choropleth
df=df.reset_index()
df

Unnamed: 0,age group,birthplace,2011,2016
0,0 - 14 years,Ireland - county of usual residence,731189,761950
1,0 - 14 years,Ireland - county other than county of usual re...,148981,165421
2,0 - 14 years,Northern Ireland,7808,7105
3,0 - 14 years,England and Wales,22045,12772
4,0 - 14 years,Scotland,1250,820
...,...,...,...,...
600,65 years and over,Trinidad and Tobago,32,44
601,65 years and over,Venezuela,2,7
602,65 years and over,Other America,48,73
603,65 years and over,Australia,163,196


In [5]:
#group by birthplace and sum over all ages to get total people in ireland by birth
df_total=df.groupby("birthplace").sum()
df_total

Unnamed: 0_level_0,2011,2016
birthplace,Unnamed: 1_level_1,Unnamed: 2_level_1
Afghanistan,555,1729
Albania,823,1081
Algeria,1372,1412
Angola,1089,910
Argentina,756,1193
...,...,...
Uzbekistan,242,242
Venezuela,482,1729
Vietnam,1711,1932
Zambia,689,673


In [6]:
# total number of people in Ireland in 2016, who indicated their birthplace:
total_by_birthplace=df_total["2016"].sum()
CSO_official_pop_total=4761865
percent=(total_by_birthplace/CSO_official_pop_total)*100
print("Total residents: "+str(total_by_birthplace))
print("This is %.2f percent of the official total population in 2016 from the CSO" % percent)

Total residents: 4689362
This is 98.48 percent of the official total population in 2016 from the CSO


This total is slightly less than the official total population of Ireland reported by the CSO in 2016: "4,761,865 persons in April 2016", from the [2016 Irish census summary](https://www.cso.ie/en/media/csoie/newsevents/documents/census2016summaryresultspart1/Census2016SummaryPart1.pdf).

This might be due to some people not indicating their birthplace on the census form.

In [7]:
# percentage of population born in Ireland in 2016
df_irish=df_total.loc[["Ireland - county of usual residence","Ireland - county other than county of usual residence"]]

born_ire_11=df_irish["2011"].sum()
percent_irish_11=(born_ire_11/total_by_birthplace)*100
percent_abroad_11=((total_by_birthplace-born_ire_11)/total_by_birthplace)*100

print("In 2011, %.2f percent of the population were born in Ireland." % percent_irish_11)
print("    %.2f percent were born abroad." % percent_abroad_11)

born_ire_16=df_irish["2016"].sum()
percent_irish_16=(born_ire_16/total_by_birthplace)*100
percent_abroad_16=((total_by_birthplace-born_ire_16)/total_by_birthplace)*100

print("In 2016, %.2f percent of the population were born in Ireland" % percent_irish_16)
print("    %.2f percent were born abroad." % percent_abroad_16)

In 2011, 80.15 percent of the population were born in Ireland.
    19.85 percent were born abroad.
In 2016, 82.73 percent of the population were born in Ireland
    17.27 percent were born abroad.


Note that there were more people born abroad in Ireland in 2011 compared to 2016.  
This will be due to a number of international and economic factors, and may also be due to an increase in second-generation children born to parents who immigrated from abroad.

A dictionary of correct mappings is made by manually comparing the 121 birthplaces listed below with the countries listed in the the 'countries.geo.json' file that's used for choropleth mapping.

In [8]:
# replace birthplace names using dictionary of names that match countries.geo.json file
df_total=df_total.reset_index() #make birthplace a column for mapping
df_total

Unnamed: 0,birthplace,2011,2016
0,Afghanistan,555,1729
1,Albania,823,1081
2,Algeria,1372,1412
3,Angola,1089,910
4,Argentina,756,1193
...,...,...,...
116,Uzbekistan,242,242
117,Venezuela,482,1729
118,Vietnam,1711,1932
119,Zambia,689,673


In [9]:
# dict of countries that need to be renamed to match json file:
# Note this won't include the people born in the following categories: 'Other Africa', 'Other America', 'Other Asia', 'Other Europe'

census_to_json_names={"United States":"United States of America","Ireland - county of usual residence":"Ireland","Ireland - county other than county of usual residence":"Ireland","Northern Ireland":"United Kingdom","England and Wales":"United Kingdom","Scotland":"United Kingdom","Russian Federation":"Russia","Korea, Republic of":"South Korea","Cote d'Ivoire":"Ivory Coast","Hong Kong":"China","Iran, Islamic Republic of":"Iran","Lao People's Democratic Republic":"Laos","Moldova, Republic of":"Moldova","Syrian Arab Republic":"Syria","Taiwan, Province of China":"Taiwan","Tanzania, United Republic of":"United Republic of Tanzania"}

df_total['birthplace']=df_total['birthplace'].map(census_to_json_names).fillna(df_total['birthplace'])

In [10]:
# check that these country name subsitutions worked
df_total[df_total['birthplace']=='Ireland']

Unnamed: 0,birthplace,2011,2016
44,Ireland,2825408,2891726
45,Ireland,933103,987789


In [11]:
# check that these country name subsitutions worked
df_total[df_total['birthplace']=='United Kingdom']

Unnamed: 0,birthplace,2011,2016
28,United Kingdom,212286,203173
76,United Kingdom,58470,57389
91,United Kingdom,17871,16644


In [12]:
# Ireland and United Kingdom will have duplicates due to replacing multiple rows
# For this dataset of total immigration, combine and sum these duplicates

df_total=df_total.groupby('birthplace').sum().reset_index()

In [13]:
#check this worked:
print(len(df_total))
df_total[df_total['birthplace']=='United Kingdom']

117


Unnamed: 0,birthplace,2011,2016
109,United Kingdom,288627,277206


## Choropleth world map of Irish immigration

In [22]:
# define the world map
m = folium.Map(location=[25, -7], zoom_start=2)

# display world map
m

In [37]:
# geo json file is required to map out country boundaries. 
# I've downloaded the file from https://github.com/johan/world.geo.json
# Included addition of Singapore, and edit of Malaysian boundaries, as detailed here: https://github.com/johan/world.geo.json/pull/40
import json
# file name - file is located in the working directory:
world_geo=r'countries.geo.json'

# open the json file - json.load() methods returns a python dictionary

with open(world_geo) as file:
    world_json = json.load(file)


# we loop through the dictionary to obtain the name of the communities in the json file
names_json = []
for index in range(len(world_json['features'])):
    names_json.append(world_json['features'][index]['properties']) #['properties']['name']
    
names_json
        
# key_on='object.geometry.properties.COMUNE',

[{'name': 'Afghanistan'},
 {'name': 'Angola'},
 {'name': 'Albania'},
 {'name': 'United Arab Emirates'},
 {'name': 'Argentina'},
 {'name': 'Armenia'},
 {'name': 'Antarctica'},
 {'name': 'French Southern and Antarctic Lands'},
 {'name': 'Australia'},
 {'name': 'Austria'},
 {'name': 'Azerbaijan'},
 {'name': 'Burundi'},
 {'name': 'Belgium'},
 {'name': 'Benin'},
 {'name': 'Burkina Faso'},
 {'name': 'Bangladesh'},
 {'name': 'Bulgaria'},
 {'name': 'The Bahamas'},
 {'name': 'Bosnia and Herzegovina'},
 {'name': 'Belarus'},
 {'name': 'Belize'},
 {'name': 'Bermuda'},
 {'name': 'Bolivia'},
 {'name': 'Brazil'},
 {'name': 'Brunei'},
 {'name': 'Bhutan'},
 {'name': 'Botswana'},
 {'name': 'Central African Republic'},
 {'name': 'Canada'},
 {'name': 'Switzerland'},
 {'name': 'Chile'},
 {'name': 'China'},
 {'name': 'Ivory Coast'},
 {'name': 'Cameroon'},
 {'name': 'Democratic Republic of the Congo'},
 {'name': 'Republic of the Congo'},
 {'name': 'Colombia'},
 {'name': 'Costa Rica'},
 {'name': 'Cuba'},
 {'n

In [30]:
# generate choropleth map using the total immigration of each country to Canada from 1980 to 2013
folium.Choropleth(
    geo_data=world_geo,
    data=age_2544,
    columns=['birthplace', '2016'],
    key_on='features.properties.name',
    fill_color='YlOrRd', 
    fill_opacity=1, 
    line_opacity=1,
    legend_name='Immigration to Ireland').add_to(m)

# display map
m

AttributeError: 'NoneType' object has no attribute 'get'