# PROJECT I - The impact of Covid-19 in asylum claims (and decisions) in the EU

In [1]:
#Import libraries
import pandas as pd
import numpy as np
import math
import statistics as stats
import sys
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline 

## Cleaning UNHCR application and decision dataframes

### 1. From UNHCR - Annual data
 - Type of procedure: G = Government procedure; U = UNHCR procedure, J= Joint
 - Application/ Decision type / Stage of procedure: 
       N - New applications
       R - Repeat applications
       A - Appeal applications
       NA - New and appeal applications: where the data was provided together
       NR - New and repeat applications: where the data was provided together
       FA - First and appeal applications: used by France in 2006, Israel in 2007 and Chad in 2017
       J - Judiciary: applications are at the Judicial level.
       BL - Backlog: Repeat applications processed during specific events to reduce the backlog in cases. Used by Italy in 2007 and South Africa in 2008
       SP - Subsidiary protection: used prior to the inclusion of subsidiary protection in Europe. Used by Belgium in 2008 and Ireland in 2011
       V - Various/unknown
       TP - Temporary protection
       TA - Temporary asylum. Used in the Russian Federation
       TR - Temporary leave to remain outside the asylum procedure
       CA - Cantonal regulations in Switzerland
 - Case_persons P – Persons; C – Cases;
 - UNHCR uses the following non-standard ISO3 country codes:
       UNK for Various/unknown
       STA for Stateless

### 1.1. Asylum applications from 2000 to mid-2021 per country of origin in country of asylum.

In [2]:
applications = pd.read_csv('data/UNHCR/asylum-applications.csv')
applications.columns = list(map(lambda x: x.lower().replace(' ', '_'), applications.columns))

In [3]:
applications.shape

(94883, 10)

In [4]:
applications.isna().sum()/len(applications)

year                       0.000000
country_of_origin          0.000000
country_of_origin_(iso)    0.009991
country_of_asylum          0.000000
country_of_asylum_(iso)    0.000000
authority                  0.000000
application_type           0.004469
stage_of_procedure         0.024009
cases_/_persons            0.000000
applied                    0.000000
dtype: float64

In [5]:
#Nan value in country_of_origin_(iso) corresponds exactly to the number of country_of_origin's 'Unknown '.
applications['country_of_origin'].isin(['Unknown ']).value_counts()
applications['country_of_origin'] = list(map(lambda x: x.replace('Unknown ', 'Unknown'), applications['country_of_origin']))
applications['country_of_origin_(iso)'] = applications['country_of_origin_(iso)'].fillna('UNK')

In [6]:
#Application type is a very small number compared to the total rows. V and UNK are interchangeble.
applications['application_type'] = applications['application_type'].fillna('UNK')
applications['application_type'] = np.where(applications['application_type'].isin(['V']), 'UNK', applications['application_type'])

In [7]:
#Stage of procedure is a very small number compared to the total rows, but since there are no Unknown values in this column, fill with the mode, which is FI.
applications['stage_of_procedure'].value_counts()
applications['stage_of_procedure'] = applications['stage_of_procedure'].fillna('FI')
applications['stage_of_procedure'] = np.where(applications['stage_of_procedure'].isin(['fi']), 'FI', applications['stage_of_procedure'])
applications['stage_of_procedure'] = np.where(applications['stage_of_procedure'].isin(['ar']), 'AR', applications['stage_of_procedure'])

In [8]:
applications.apply(pd.Series.unique)

year                       [2000, 2001, 2002, 2003, 2004, 2005, 2006, 200...
country_of_origin          [Afghanistan, Albania, Algeria, Angola, Antigu...
country_of_origin_(iso)    [AFG, ALB, DZA, AGO, ATG, EGY, ARG, ARM, AUS, ...
country_of_asylum          [Albania, Egypt, Australia, Austria, Azerbaija...
country_of_asylum_(iso)    [ALB, EGY, AUS, AUT, AZE, BEL, BLR, BGR, KHM, ...
authority                                                          [G, U, J]
application_type                               [UNK, A, N, NR, R, J, FA, SP]
stage_of_procedure                      [FI, AR, FA, RA, EO, IN, JR, SP, TA]
cases_/_persons                                                       [C, P]
applied                    [5, 93, 90, 1326, 4205, 902, 861, 229, 675, 48...
dtype: object

In [9]:
#Staying with iso codes only
applications = applications.drop(['country_of_origin', 'country_of_asylum'], axis = 1)

In [10]:
applications.head()

Unnamed: 0,year,country_of_origin_(iso),country_of_asylum_(iso),authority,application_type,stage_of_procedure,cases_/_persons,applied
0,2000,AFG,ALB,G,UNK,FI,C,5
1,2000,AFG,EGY,U,UNK,FI,P,93
2,2000,AFG,AUS,G,UNK,AR,C,90
3,2000,AFG,AUS,G,UNK,FI,C,1326
4,2000,AFG,AUT,G,UNK,FA,P,4205


In [11]:
applications['new_applications'] = np.where(applications['application_type'].isin(['N']), applications['applied'], 0)
total_yearly_app = applications.pivot_table(index=['year'], values = ['applied', 'new_applications'], margins=True, margins_name='Total', aggfunc= {'applied': 'sum', 'new_applications':'sum'})
total_yearly_app = total_yearly_app.rename(columns = {'applied':'total_applications'})
display(total_yearly_app)

Unnamed: 0_level_0,total_applications,new_applications
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,1092173,0
2001,917266,0
2002,947565,0
2003,839854,0
2004,680170,0
2005,672759,0
2006,608900,430229
2007,642137,449307
2008,863688,646269
2009,937247,734192


### 1.2 Asylum applications from 2000 to mid-2021 per country of origin in EU countries.

In [12]:
#Data for EU countries for comparison:
eu28_countries_iso3 = ['AUT','BEL', 'BGR', 'HRV', 'CYP', 'CZE', 'DNK', 
                    'EST', 'FIN', 'FRA', 'DEU','GRC', 'HUN', 'IRL', 
                    'ITA', 'LVA', 'LTU', 'LUX', 'MLT', 'NLD', 'POL',
                    'PRT', 'ROU', 'SVK', 'SVN', 'ESP', 'SWE', 'GBR']
#Selecting EU 28 as country of asylum, even though GBR left in 31st Jan 2021.
EU_applications = applications[applications['country_of_asylum_(iso)'].isin(eu28_countries_iso3)]

#Excluding EU 28 as country of origin, even though GBR left in 31st Jan 2021.
EU_applications = EU_applications[~EU_applications['country_of_origin_(iso)'].isin(eu28_countries_iso3)]

In [13]:
EU_applications['new_applications'] = np.where(EU_applications['application_type'].isin(['N']), EU_applications['applied'], 0)
EU_total_yearly_app = EU_applications.pivot_table(index=['year'], values = ['applied', 'new_applications'], margins=True, margins_name='Total', aggfunc= {'applied': 'sum', 'new_applications':'sum'})
EU_total_yearly_app = EU_total_yearly_app.rename(columns = {'applied':'total_applications'})
display(EU_total_yearly_app)

Unnamed: 0_level_0,total_applications,new_applications
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,495963,0
2001,538844,0
2002,504644,0
2003,459343,0
2004,394057,0
2005,338099,0
2006,282372,154334
2007,313096,215677
2008,328826,171479
2009,335839,231226


### 1.3. Asylum decisions from 2000 to mid-2021 per country of origin in country of asylum.

In [14]:
decisions = pd.read_csv('data/UNHCR/asylum-decisions.csv')
decisions.columns = list(map(lambda x: x.lower().replace(' ', '_'), decisions.columns))

In [15]:
decisions.shape

(86208, 13)

In [16]:
decisions.isna().sum()

year                           0
country_of_origin              0
country_of_origin_(iso)      921
country_of_asylum              0
country_of_asylum_(iso)        0
authority                      0
stage_of_procedure          2170
cases_/_persons                0
recognized_decisions           0
complementary_protection       0
rejected_decisions             0
otherwise_closed               0
total_decisions                0
dtype: int64

In [17]:
#Nan value in country_of_origin_(iso) corresponds to country_of_origin 'Unknown'.
decisions['country_of_origin'].isin(['Unknown ']).value_counts()
decisions['country_of_origin_(iso)'] = decisions['country_of_origin_(iso)'].fillna('UNK')

In [18]:
# Stage of procedure has few nan, but it also has Unknown as a value. There are also lower case spelling for fi,ar and Fa. It's ok to keep all these types for now.
#Since some of them are applied to situations before 2010, they will eventually disappear from the analysis.
decisions['stage_of_procedure'] = decisions['stage_of_procedure'].fillna('UNK')
decisions['stage_of_procedure'] = np.where(decisions['stage_of_procedure'].isin(['fi']), 'FI', decisions['stage_of_procedure'])
decisions['stage_of_procedure'] = np.where(decisions['stage_of_procedure'].isin(['ar']), 'AR', decisions['stage_of_procedure'])
decisions['stage_of_procedure'] = np.where(decisions['stage_of_procedure'].isin(['Fa']), 'FA', decisions['stage_of_procedure'])

In [19]:
decisions.apply(pd.Series.unique)

year                        [2000, 2001, 2002, 2003, 2004, 2005, 2006, 200...
country_of_origin           [Afghanistan, Albania, Algeria, Angola, Egypt,...
country_of_origin_(iso)     [AFG, ALB, DZA, AGO, EGY, ARG, ARM, AUS, AUT, ...
country_of_asylum           [Albania, Egypt, Australia, Austria, Azerbaija...
country_of_asylum_(iso)     [ALB, EGY, AUS, AUT, AZE, BEL, BLR, BGR, CAN, ...
authority                                                           [G, U, J]
stage_of_procedure          [FI, AR, FA, BL, UNK, RA, CA, EO, IN, TR, JR, ...
cases_/_persons                                                        [C, P]
recognized_decisions        [0, 31, 38, 1329, 295, 239, 46, 94, 436, 5, 18...
complementary_protection    [0, 171, 303, 56, 91, 44, 10, 695, 721, 31, 17...
rejected_decisions          [0, 77, 25, 94, 217, 24, 15, 22, 10, 127, 31, ...
otherwise_closed            [5, 0, 10, 3343, 12, 207, 156, 23, 55, 1147, 1...
total_decisions             [5, 108, 63, 1433, 3855, 275, 15, 68

In [20]:
decisions['stage_of_procedure'].value_counts()

FI     49297
AR     15153
FA      6859
RA      4004
EO      2804
IN      2799
UNK     2170
JR      2162
TA       329
SP       285
CA       177
BL        85
TP        83
TR         1
Name: stage_of_procedure, dtype: int64

In [21]:
#Staying with iso codes only
decisions = decisions.drop(['country_of_origin', 'country_of_asylum'], axis = 1)

In [22]:
decisions.head()

Unnamed: 0,year,country_of_origin_(iso),country_of_asylum_(iso),authority,stage_of_procedure,cases_/_persons,recognized_decisions,complementary_protection,rejected_decisions,otherwise_closed,total_decisions
0,2000,AFG,ALB,G,FI,C,0,0,0,5,5
1,2000,AFG,EGY,U,FI,P,31,0,77,0,108
2,2000,AFG,AUS,G,AR,C,38,0,25,0,63
3,2000,AFG,AUS,G,FI,C,1329,0,94,10,1433
4,2000,AFG,AUT,G,FA,P,295,0,217,3343,3855


In [23]:
total_yearly_dec = decisions.groupby(['year']).agg({'recognized_decisions': np.sum,'rejected_decisions': np.sum, 'total_decisions': np.sum})
display(total_yearly_dec)

Unnamed: 0_level_0,recognized_decisions,rejected_decisions,total_decisions
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000,202950,542843,1088196
2001,168946,451971,945074
2002,161367,524268,1008917
2003,146046,521674,1001417
2004,127731,444745,889728
2005,152585,361594,827576
2006,140769,306352,690764
2007,148835,259851,642283
2008,155665,325490,699094
2009,228470,321385,760932


In [24]:
app_dec = pd.merge(left = total_yearly_dec,
                                  right = total_yearly_app,
                                  how = 'left', 
                                  left_on = "year", 
                                  right_on= "year")
app_dec['dec_per_app']= app_dec['total_decisions']/app_dec['total_applications']
app_dec

Unnamed: 0_level_0,recognized_decisions,rejected_decisions,total_decisions,total_applications,new_applications,dec_per_app
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000,202950,542843,1088196,1092173,0,0.996359
2001,168946,451971,945074,917266,0,1.030316
2002,161367,524268,1008917,947565,0,1.064747
2003,146046,521674,1001417,839854,0,1.19237
2004,127731,444745,889728,680170,0,1.308097
2005,152585,361594,827576,672759,0,1.230123
2006,140769,306352,690764,608900,430229,1.134446
2007,148835,259851,642283,642137,449307,1.000227
2008,155665,325490,699094,863688,646269,0.809429
2009,228470,321385,760932,937247,734192,0.81188


### 1.4 Asylum decisions from 2000 to mid-2021 per country of origin in EU countries.

In [25]:
#Selecting EU 28 as country of asylum, even though GBR left in 31st Jan 2021.
EU_decisions = decisions[decisions['country_of_asylum_(iso)'].isin(eu28_countries_iso3)]

#Excluding EU 28 as country of origin, even though GBR left in 31st Jan 2021.
EU_decisions = EU_decisions[~EU_decisions['country_of_origin_(iso)'].isin(eu28_countries_iso3)]

In [26]:
EU_total_yearly_dec = EU_decisions.groupby(['year']).agg({'recognized_decisions': np.sum,'rejected_decisions': np.sum, 'total_decisions': np.sum})
display(EU_total_yearly_dec)

Unnamed: 0_level_0,recognized_decisions,rejected_decisions,total_decisions
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000,42266,245862,478232
2001,65496,303015,572635
2002,49519,370989,597824
2003,46360,373850,560759
2004,43449,317630,472568
2005,46040,260597,418131
2006,29003,171408,282630
2007,41419,172729,313924
2008,42627,173774,298348
2009,42184,196728,329614


In [27]:
EU_app_dec = pd.merge(left = EU_total_yearly_dec,
                                  right = EU_total_yearly_app,
                                  how = 'left', 
                                  left_on = "year", 
                                  right_on= "year")
EU_app_dec['dec_per_app']= EU_app_dec['total_decisions']/EU_app_dec['total_applications']
EU_app_dec

Unnamed: 0_level_0,recognized_decisions,rejected_decisions,total_decisions,total_applications,new_applications,dec_per_app
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000,42266,245862,478232,495963,0,0.964249
2001,65496,303015,572635,538844,0,1.06271
2002,49519,370989,597824,504644,0,1.184645
2003,46360,373850,560759,459343,0,1.220785
2004,43449,317630,472568,394057,0,1.199238
2005,46040,260597,418131,338099,0,1.236712
2006,29003,171408,282630,282372,154334,1.000914
2007,41419,172729,313924,313096,215677,1.002645
2008,42627,173774,298348,328826,171479,0.907313
2009,42184,196728,329614,335839,231226,0.981464


### 1.5. Displaced population worldwide

In [28]:
pop = pd.read_csv('data/UNHCR/population.csv', skiprows=14)
pop.columns = list(map(lambda x: x.lower().replace(' ', '_'), pop.columns))

In [29]:
pop.shape

(94472, 10)

In [30]:
pop.isna().sum()

year                                  0
country_of_origin                     0
country_of_origin_(iso)             995
country_of_asylum                     0
country_of_asylum_(iso)               0
refugees_under_unhcr's_mandate        0
asylum-seekers                        0
idps_of_concern_to_unhcr              0
venezuelans_displaced_abroad      94389
stateless_persons                     0
dtype: int64

In [31]:
#Nan value in country_of_origin_(iso) corresponds to country_of_origin 'Unknown'.
pop['country_of_origin'].isin(['Unknown ']).value_counts()
pop['country_of_origin_(iso)'] = pop['country_of_origin_(iso)'].fillna('UNK')
#Only 89
pop = pop.drop(['venezuelans_displaced_abroad'], axis =1)
#Staying with iso codes only
pop = pop.drop(['country_of_origin', 'country_of_asylum'], axis = 1)

In [32]:
pop

Unnamed: 0,year,country_of_origin_(iso),country_of_asylum_(iso),refugees_under_unhcr's_mandate,asylum-seekers,idps_of_concern_to_unhcr,stateless_persons
0,2000,AFG,AFG,0,0,758625,0
1,2000,AFG,EGY,60,21,0,0
2,2000,AFG,AUS,4358,376,0,0
3,2000,AFG,AUT,679,0,0,0
4,2000,AFG,AZE,172,843,0,0
...,...,...,...,...,...,...,...
94467,2021,ZWE,SWZ,11,8,0,0
94468,2021,ZWE,SWE,17,13,0,0
94469,2021,ZWE,CHE,9,5,0,0
94470,2021,ZWE,THA,9,0,0,0


## Creating .csv files

In [33]:
from pathlib import Path  

# UNHCR annual world applications - 2000-mid-2021: 
filepath = Path('data/Cleaned/UNHCR_applications.csv')  
filepath.parent.mkdir(parents=True, exist_ok=True)  
applications.to_csv(filepath)  

# UNHCR annual EU applications - 2000-mid-2021:
filepath = Path('data/Cleaned/UNHCR_EU_applications.csv')  
filepath.parent.mkdir(parents=True, exist_ok=True)  
EU_applications.to_csv(filepath) 

# UNHCR annual decisions - 2000-mid-2021:
filepath = Path('data/Cleaned/UNHCR_decisions.csv')  
filepath.parent.mkdir(parents=True, exist_ok=True)  
decisions.to_csv(filepath) 

# UNHCR annual EU decisions - 2000-mid-2021:
filepath = Path('data/Cleaned/UNHCR_EU_decisions.csv')  
filepath.parent.mkdir(parents=True, exist_ok=True)  
EU_decisions.to_csv(filepath) 

# UNHCR annual population - 2000-mid-2021:
filepath = Path('data/Cleaned/UNHCR_population.csv')  
filepath.parent.mkdir(parents=True, exist_ok=True)  
pop.to_csv(filepath) 