In [5]:
# Initialisations

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import scipy.stats as stats
from scipy.stats import poisson

import plotly.offline as py
import cufflinks as cf
import plotly.figure_factory as ff
from plotly.graph_objs import *
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

In [10]:
# Load in preprocessed migration data
migratory_patterns_compiled = pd.read_csv("migratory_patterns_compiled.csv")
migratory_patterns_compiled.head()

Unnamed: 0,Year,Origin,origin_lat,origin_lon,Destination,dest_lat,dest_lon,Value
0,1999,Afghanistan,33.0,66.0,Australia,-25.0,135.0,609.0
1,1999,Afghanistan,33.0,66.0,Austria,47.333333,13.333333,2209.0
2,1999,Afghanistan,33.0,66.0,Belgium,50.833333,4.0,401.0
3,1999,Afghanistan,33.0,66.0,Bulgaria,43.0,25.0,277.0
4,1999,Afghanistan,33.0,66.0,Canada,60.0,-96.0,539.0


In [11]:
# Load country latitude and longitude data for plotting
df_countries = pd.read_csv("country_centroids_use.csv")
df_countries = df_countries.rename(columns = {'Destination':'Country', 
                                                'dest_lat':'lat',
                                                'dest_lon':'long'})

# Create dictionary of country locations
countries = [dict(type = 'scattergeo', locationmode = 'country names',
                    lon = df_countries['long'], lat = df_countries['lat'],
                    hoverinfo = 'text', text = df_countries['Country'],
                    mode = 'markers', marker = dict(size=2, color='rgb(156, 81, 182)',
                                                line = dict(width=3, color='rgba(68, 68, 68, 0)')))]

# Migration Patterns by Destination

In [12]:
# Take user input
num_countries_dest = int(input('Please enter the number of countries you want to analyse.\nIt must fall between 1-257.\n \n '))
year_dest = int(input('Please enter the year you want to analyse.\nIt must fall between 1999-2017.\n \n '))

# Find the defined number of most common origins in the defined year
df_dest = migratory_patterns_compiled[migratory_patterns_compiled['Year'] == year_dest]
df_new = df_dest[['Year', 'Destination', 'Value']]
df_new = df_new.groupby(['Destination', 'Year'], axis = 0, as_index=False).sum()
df_new = df_new.sort_values(by=['Value'], ascending = False)
df_new = df_new.head(num_countries_dest)
df_dest_list = df_new['Destination'].tolist()

# Filter to only include the defined number
df_dest = df_dest[df_dest['Destination'].isin(df_dest_list)]
df_dest = df_dest.reset_index(drop = True)

# Print statement
print('\n \nThe ' + str(num_countries_dest) + ' most common destination in ' + str(year_dest) + ' were:', df_dest['Destination'].unique())

paths_dest = [] # Empty array for migration paths

# Iterate through the filtered dataset
for i in range(len(df_dest)):
    
    paths_dest.append(dict(type = 'scattergeo',
                             lon = [df_dest['origin_lon'][i], df_dest['dest_lon'][i]], # Add longitude
                             lat = [df_dest['origin_lat'][i], df_dest['dest_lat'][i]], # Add latitude
                             mode = 'lines', line = dict(width = 2, color = 'hotpink'), # Add colour and width information
                             opacity = float(df_dest['Value'][i])/float(df_dest['Value'].max()))) # Normalise opacity
    
layout = dict(title = str(str(year_dest) + ' Migration Journeys (by Destination)'), showlegend = False, 
                geo = dict(showframe = False, showcoastlines = True,
                projection = dict(type = 'equirectangular'), showland = True,
                landcolor = 'rgb(243, 243, 243)', countrycolor = 'rgb(204, 204, 204)'))

# Plot migration patterns 
fig = dict(data = paths_dest + countries, layout = layout ) 
py.iplot(fig)

Please enter the number of countries you want to analyse.
It must fall between 1-257.
 
 2
Please enter the year you want to analyse.
It must fall between 1999-2017.
 
 2003

 
The 2 most common destination in 2003 were: ['France' 'United Kingdom']


# Migration Patterns by Origin

In [13]:
# Take user input
num_countries_origin = int(input('Please enter the number of countries you want to analyse.\nIt must fall between 1-257.\n \n '))
year_origin = int(input('Please enter the year you want to analyse.\nIt must fall between 1999-2017.\n \n '))

# Find the defined number of most common origins in the defined year
df_origin = migratory_patterns_compiled[migratory_patterns_compiled['Year'] == year_origin]
df_new = df_origin[['Year', 'Origin', 'Value']]
df_new = df_new.groupby(['Origin', 'Year'], axis = 0, as_index=False).sum()
df_new = df_new.sort_values(by=['Value'], ascending = False)
df_new = df_new.head(num_countries_origin)
df_origin_list = df_new['Origin'].tolist()

# Filter to only include the defined number
df_origin = df_origin[df_origin['Origin'].isin(df_origin_list)]
df_origin = df_origin.reset_index(drop = True)

# Print statement
print('\n \nThe ' + str(num_countries_origin) + ' most common origins in ' + str(year_origin) + ' were:', df_origin['Origin'].unique())

paths_origin = [] # Empty array for migration paths

# Iterate through the filtered dataset
for i in range(len(df_origin)):
    
    paths_origin.append(dict(type = 'scattergeo',
                             lon = [df_origin['origin_lon'][i], df_origin['dest_lon'][i]], # Add longitude
                             lat = [df_origin['origin_lat'][i], df_origin['dest_lat'][i]], # Add latitude
                             mode = 'lines', line = dict(width = 2, color = 'hotpink'), # Add colour and width information
                             opacity = float(df_origin['Value'][i])/float(df_origin['Value'].max()))) # Normalise opacity
    
layout = dict(title = str(str(year_origin) + ' Migration Journeys (by Origin)'), showlegend = False, 
                geo = dict(showframe = False, showcoastlines = True,
                projection = dict(type = 'equirectangular'), showland = True,
                landcolor = 'rgb(243, 243, 243)', countrycolor = 'rgb(204, 204, 204)'))

# Plot migration patterns 
fig = dict(data = paths_origin + countries, layout = layout ) 
py.iplot(fig)

Please enter the number of countries you want to analyse.
It must fall between 1-257.
 
 2
Please enter the year you want to analyse.
It must fall between 1999-2017.
 
 2004

 
The 2 most common origins in 2004 were: ['Russia' 'Serbia']
