



# Exploring  Immigration to Canada Dataset with *python* <a id="0"></a>




Dataset: Immigration to Canada from 2015 to 2021 - [International migration flows to and from selected countries - The 2015 revision](http://www.un.org/en/development/desa/population/migration/data/empirical2/migrationflows.shtml?utm_medium=Exinfluencer&utm_source=Exinfluencer&utm_content=000026UJ&utm_term=10006555&utm_id=NA-SkillsNetwork-Channel-SkillsNetworkCoursesIBMDeveloperSkillsNetworkDV0101ENSkillsNetwork20297740-2021-01-01) from United Nation's website.

The dataset contains annual data on the flows of international migrants as recorded by the countries of destination. The data presents both inflows and outflows according to the place of birth, citizenship or place of previous / next residence both for foreigners and nationals. 

<center>
    <img src="https://scontent.fgzt3-1.fna.fbcdn.net/v/t39.30808-6/269769316_6581410261929355_1313272683140605885_n.jpg?_nc_cat=100&ccb=1-5&_nc_sid=730e14&_nc_ohc=-Ff9AtrzPb4AX9hSZKc&_nc_ht=scontent.fgzt3-1.fna&oh=00_AT8AzZ2BUs0Hwe2Od-4SP7uMTY2xsG22RCxXZj4AnFJZqQ&oe=61C76CC0" width="300" alt="cognitiveclass.ai logo"  />
</center>



In [1]:
# Import required libraries
import plotly.express as px
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv ('Canada Admissions of Permanent Residents.csv') 

In [3]:
data.head()

Unnamed: 0,Country,Continent,iso_alpha,1/1/2015,2/1/2015,3/1/2015,4/1/2015,5/1/2015,6/1/2015,7/1/2015,...,1/1/2021,2/1/2021,3/1/2021,4/1/2021,5/1/2021,6/1/2021,7/1/2021,8/1/2021,9/1/2021,10/1/2021
0,Afghanistan,Asia,AFG,90,125,220,135,220,195,250,...,200,210,135,140,150,180,180,1005,1535,1130
1,Albania,Europe,ALB,15,25,50,45,45,75,45,...,45,45,25,35,35,45,60,55,75,45
2,Algeria,Africa,DZA,80,125,235,255,340,280,310,...,175,165,125,90,110,245,585,280,280,365
3,Andorra,Europe,AND,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Angola,Africa,AGO,0,0,0,10,0,0,0,...,5,10,20,5,0,0,15,15,10,25


In [4]:
country_list = data.index.values 
continent_list = data['Continent'].values 
date_col = data.columns.values[3:]

In [5]:
unpivot = pd.melt(data, id_vars=['Continent' , 'Country' ,'iso_alpha'], value_vars=date_col ,var_name='Date', value_name='Number' )
df = unpivot.set_index('Date')

In [6]:
df.index = pd.to_datetime(df.index)

In [7]:
df['Month'] = df.index.month
df['Year'] = df.index.year

In [8]:
df

Unnamed: 0_level_0,Continent,Country,iso_alpha,Number,Month,Year
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-01,Asia,Afghanistan,AFG,90,1,2015
2015-01-01,Europe,Albania,ALB,15,1,2015
2015-01-01,Africa,Algeria,DZA,80,1,2015
2015-01-01,Europe,Andorra,AND,0,1,2015
2015-01-01,Africa,Angola,AGO,0,1,2015
...,...,...,...,...,...,...
2021-10-01,Europe,"Virgin Islands, British",VGB,0,10,2021
2021-10-01,Africa,Western Sahara,ESH,0,10,2021
2021-10-01,Asia,Yemen,YEM,155,10,2021
2021-10-01,Africa,Zambia,ZMB,10,10,2021


In [73]:
d = df.query("Number > 0 ")
fig = px.sunburst(d, path=['Continent', 'Country'], values='Number',
                  color='Number')
fig.update_layout( 
    title={'text': "Exploring Immigration to Canada Dataset",
            'y':0.95, 'x':0.45,'xanchor': 'center','yanchor': 'top'} )


fig.show()
d.head(10)

Unnamed: 0_level_0,Continent,Country,iso_alpha,Number,Month,Year
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-01,Asia,Afghanistan,AFG,90,1,2015
2015-01-01,Europe,Albania,ALB,15,1,2015
2015-01-01,Africa,Algeria,DZA,80,1,2015
2015-01-01,South America,Argentina,ARG,10,1,2015
2015-01-01,Europe,Armenia,ARM,10,1,2015
2015-01-01,Oceania,Australia,AUS,95,1,2015
2015-01-01,Europe,Austria,AUT,10,1,2015
2015-01-01,Europe,Azerbaijan,AZE,5,1,2015
2015-01-01,Asia,Bangladesh,BGD,110,1,2015
2015-01-01,Europe,Belarus,BLR,10,1,2015


In [74]:
#Total Admissions by year
Total_admissions_by_year = df.groupby('Year' , as_index = True).agg({'Number':'sum'}).rename(
    columns = {'Number':'Total_admissions'})

fig = px.line(Total_admissions_by_year, x=Total_admissions_by_year.index
              , y="Total_admissions", markers=True )

fig.update_layout( 
    title={'text': "Total admissions year by year ",
            'y':0.95, 'x':0.45,'xanchor': 'center','yanchor': 'top'} )

fig.show()
Total_admissions_by_year

Unnamed: 0_level_0,Total_admissions
Year,Unnamed: 1_level_1
2015,271155
2016,295670
2017,285775
2018,320295
2019,340460
2020,183600
2021,313105


In [77]:
#Total Admissions by Month
Total_admissions_by_month = df.groupby('Month' , as_index = True).agg({'Number':'sum'}).rename(
    columns = {'Number':'Total_admissions'})

fig = px.bar(Total_admissions_by_month, x=Total_admissions_by_month.index
              , y="Total_admissions", title='Total Admissions by month' , color = 'Total_admissions')

fig.update_layout( 
    title={'text': "Total admissions month by month ",
            'y':0.95, 'x':0.45,'xanchor': 'center','yanchor': 'top'}   )

fig.show()
Total_admissions_by_month

Unnamed: 0_level_0,Total_admissions
Month,Unnamed: 1_level_1
1,153140
2,166020
3,175205
4,154860
5,168575
6,204050
7,198680
8,173525
9,192490
10,182015


In [87]:
#Total Admissions by Country

Total_admissions_by_country = df.groupby('Country' , as_index = True).agg({'Number':'sum'}).rename(
    columns = {'Number':'Total_admissions'}).sort_values(by = 'Total_admissions' , ascending = False)[0:10]

fig = px.bar(Total_admissions_by_country , x = Total_admissions_by_country.index ,
            y = 'Total_admissions' , title = 'Total admissions by country (Top 10)' ,
            color = Total_admissions_by_country.index)

fig.update_layout( 
    title={'text': "Total admissions by country (Top 10) ",
            'y':0.95, 'x':0.45,'xanchor': 'center','yanchor': 'top'} ,yaxis_title="Number of admissions"  )

fig.show()
Total_admissions_by_country

Unnamed: 0_level_0,Total_admissions
Country,Unnamed: 1_level_1
India,425745
Philippines,220700
China,177955
Syria,88365
Pakistan,63450
USA,62860
Nigeria,56315
Iran,47130
France,45105
United Kingdom,36570


In [86]:
#Total Admissions by Country and year
Top_10 = df.groupby('Country').agg({'Number':'sum'}).sort_values(by = 'Number' , ascending = False)[0:10].index.values.tolist()
Total = df.groupby(['Country' ,  'Year']).agg({'Number' : 'sum'}).reset_index() 
df_filtered = Total[Total['Country'].isin(Top_10)]

#df = px.data.gapminder()
fig = px.scatter(df_filtered, x="Country", y="Number", animation_frame="Year",
           size="Year", color="Country", range_y=[df_filtered['Number'].min()-4000   , df_filtered['Number'].max()+1000]
                , title = 'Total admissions by country and year(Top 10)')
fig.update_layout( 
    title={'text': "Total admissions by country Top 10 ",
            'y':0.95, 'x':0.45,'xanchor': 'center','yanchor': 'top'} ,yaxis_title="Number of admissions"  )

fig.show()
df_filtered

Unnamed: 0,Country,Year,Number
252,China,2015,19455
253,China,2016,26785
254,China,2017,30250
255,China,2018,29710
256,China,2019,30245
...,...,...,...
1339,United Kingdom,2017,5310
1340,United Kingdom,2018,5660
1341,United Kingdom,2019,5635
1342,United Kingdom,2020,3410


In [85]:
df_box = px.data.tips()

fig = px.box(df_filtered, x="Country", y="Number", color="Country")
fig.update_traces(quartilemethod="inclusive") 
fig.update_layout( 
    title={'text': "Total admissions by country (Top 10)",
            'y':0.95, 'x':0.45,'xanchor': 'center','yanchor': 'top'} ,yaxis_title="Number of admissions"  )
fig.show()

In [84]:
fig = px.line(df_filtered, x='Year', y='Number', color='Country', markers=True)

fig.update_layout( 
    title={'text': "Total admissions by country and year ",
            'y':0.95, 'x':0.45,'xanchor': 'center','yanchor': 'top'} ,yaxis_title="Number of admissions"
            , xaxis_title="Year" )

fig.show()

In [83]:

df_map = df.groupby(['Country','Continent' , 'Year','iso_alpha']).agg({'Number' : 'sum'}).reset_index() 
df_filtered_map = df_map[df_map['Country'].isin(Top_10)]


fig = px.scatter_geo(df_map, locations="iso_alpha", color="Country",
                     hover_name="Country", size="Number",
                     animation_frame="Year",
                     projection="natural earth"  )


fig.show()