In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data = pd.read_html('https://www.canada.ca/en/immigration-refugees-citizenship/corporate/mandate/policies-operational-instructions-agreements/ministerial-instructions/express-entry-rounds.html')
df = data[0]

In [None]:
df.head(4)

In [None]:
df.info()

In [None]:
df['Date'] = pd.to_datetime(df['Date'])
df['Date (hidden)'] = pd.to_datetime(df['Date (hidden)'])

In [None]:
df['Month'] = df['Date'].dt.month
df['Year'] = df['Date'].dt.year
df['month_year'] = df['Date'].dt.strftime('%b-%Y')
df['month_year'] = pd.to_datetime(df['month_year'])
df['Date Full'] = df['Date'].dt.strftime("%d-%b-%Y")

In [None]:
df.info()

In [None]:
df['Immigration program'].replace({'Provincial Nominee Class': 'Provincial Nominee Program'}, inplace=True)

In [None]:
plt.figure(figsize=(26,10))
sns.set_style('darkgrid')
s = sns.lineplot(x='month_year', y= 'Invitations issued', lw=3,data=df,hue='Immigration program')
sns.scatterplot(x='month_year', y= 'Invitations issued', data=df, hue='Immigration program', s=140)
g =plt.xticks(rotation=60)
s.set_xlabel('Month and Years', fontsize=25)
s.set_ylabel('Number of Invitations', fontsize=25)
s.set_title('Timeline since 2015 for Number of Invites', fontsize=35)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.setp(s.get_legend().get_texts(), fontsize='32') # for legend text
plt.setp(s.get_legend().get_title(), fontsize='32') # for legend 
f = plt.xticks(fontsize=20)
f = plt.yticks(fontsize=20)

In [None]:
plt.figure(figsize=(26,10))
sns.set_style('darkgrid')
s = sns.lineplot(x='month_year', y= 'CRS score of lowest-ranked candidate invited',lw=3, data=df, hue='Immigration program')
sns.scatterplot(x='month_year', y= 'CRS score of lowest-ranked candidate invited', data=df, hue='Immigration program', s=140)
g =plt.xticks(rotation=0)
s.set_xlabel('Month and Years', fontsize=25)
s.set_ylabel('CRS Scores', fontsize=25)
s.set_title('Timeline since 2015 for CRS Scores', fontsize=35)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.setp(s.get_legend().get_texts(), fontsize='32') # for legend text
plt.setp(s.get_legend().get_title(), fontsize='32') # for legend 
f = plt.xticks(fontsize=20)
f = plt.yticks(fontsize=20)


In [None]:
plt.figure(figsize=(26,10))
sns.set_style('darkgrid')
s = sns.lineplot(x='Invitations issued', y= 'CRS score of lowest-ranked candidate invited',lw=3, data=df, hue='Immigration program')
g =plt.xticks(rotation=0)
s.set_xlabel('Invitations', fontsize=25)
s.set_ylabel('CRS Scores', fontsize=25)
s.set_title('CRS Scores vs Invitations', fontsize=35)
plt.setp(s.get_legend().get_texts(), fontsize='32') # for legend text
plt.setp(s.get_legend().get_title(), fontsize='32') # for legend 
f = plt.xticks(fontsize=20)
f = plt.yticks(fontsize=20)

In [None]:
df_all = df[df['Immigration program'] == 'No program specified']
df_all = df_all[df_all['Year'] > 2017]

In [None]:
df_m = df_all.groupby('Month').sum().reset_index()
df_m.head()

In [None]:
plt.figure(figsize=(26,10))
sns.set_style('darkgrid')
f = sns.barplot(x='Month', y='Invitations issued', data=df_m, palette='magma')
for p in f.patches:
    f.annotate(format(p.get_height(), '.1f'), 
                   (p.get_x() + p.get_width() / 2., p.get_height()), 
                   ha = 'center', va = 'center', 
                   xytext = (0,9), size=20,
                   textcoords = 'offset points')

g =plt.xticks(rotation=0)
f.set_xlabel('Months', fontsize=25)
f.set_ylabel('Invitations', fontsize=25)
f.set_title('Invitations by Month from 2018 to 2020 (No Program Specified)', fontsize=35)
f = plt.xticks(fontsize=20)
f = plt.yticks(fontsize=20)

In [None]:
df_r = df_all.groupby('Month').mean().reset_index()
df_r.head()

In [None]:
plt.figure(figsize=(26,10))
sns.set_style('darkgrid')
f = sns.barplot(x='Month', y='CRS score of lowest-ranked candidate invited', data=df_r, palette='magma')
for p in f.patches:
    f.annotate(format(p.get_height(), '.1f'), 
                   (p.get_x() + p.get_width() / 2., p.get_height()), 
                   ha = 'center', va = 'center', 
                   xytext = (0,9), size=20,
                   textcoords = 'offset points')

g =plt.xticks(rotation=0)
f.set_xlabel('Months', fontsize=25)
f.set_ylabel('CRS Scores', fontsize=25)
f.set_title('CRS Scores by Month from 2018 to 2020 (No Program Specified)', fontsize=35)
f = plt.xticks(fontsize=20)
f = plt.yticks(fontsize=20)
plt.ylim(430,500)

In [None]:
last_20_draws = df_all.iloc[:20]
plt.figure(figsize=(26,10))
sns.set_style('darkgrid')
s = sns.lineplot(x='#', y= 'CRS score of lowest-ranked candidate invited', data=last_20_draws, color='skyblue', lw=3.5)
s = sns.scatterplot(x='#', y= 'CRS score of lowest-ranked candidate invited', data=last_20_draws, color='red', s=200)
g =plt.xticks(rotation=0)
s.set_xlabel('Draw Numbers #', fontsize=25)
s.set_ylabel('CRS Scores', fontsize=25)
s.set_title('CRS Scores of last 20 Draws (No Program Specific)', fontsize=35)
#plt.setp(s.get_legend().get_texts(), fontsize='22') # for legend text
#plt.setp(s.get_legend().get_title(), fontsize='22') # for legend 
f = plt.xticks(fontsize=20)
f = plt.yticks(fontsize=20)

In [None]:
df_19 = df_all[df_all['Year'].isin([2019])].groupby('Month').count().reset_index()
df_20 = df_all[df_all['Year'].isin([2020])].groupby('Month').count().reset_index()
df_21 = df_all[df_all['Year'].isin([2021])].groupby('Month').count().reset_index()

fig, ax = plt.subplots(nrows=3,ncols=1, figsize=(20,10))
s = sns.lineplot(x='Month', y= '#', data=df_19, ax=ax[0], lw=3)
s = sns.scatterplot(x='Month', y= '#', data=df_19, ax=ax[0], color='orange', s=150)
s.set_xlabel('Month', fontsize=15)
s.set_ylabel('Number of Draws', fontsize=15)
s.set_title('Number of Draws per Month in 2019 (No Program Specific)', fontsize=15)

w = sns.lineplot(x='Month', y= '#', data=df_20, ax=ax[1], color='red', lw=3)
w = sns.scatterplot(x='Month', y= '#', data=df_20, ax=ax[1], color='orange', s=150)
w.set_xlabel('Month', fontsize=15)
w.set_ylabel('Number of Draws', fontsize=15)
w.set_title('Number of Draws per Month in 2020 (No Program Specific)', fontsize=15)

t = sns.lineplot(x='Month', y= '#', data=df_21, ax=ax[2], color='green', lw=3)
t = sns.scatterplot(x='Month', y= '#', data=df_21, ax=ax[2], color='orange', s=150)
t.set_xlabel('Month', fontsize=15)
t.set_ylabel('Number of Draws', fontsize=15)
t.set_title('Number of Draws per Month in 2021 (No Program Specific)', fontsize=15)

plt.tight_layout()