In [1]:
import pandas as pd

# Refugees

[Data source: Refugee data finder](https://www.unhcr.org/refugee-statistics/download?data_finder%5BdataGroup%5D=displacement&data_finder%5Bdataset%5D=population&data_finder%5BdisplayType%5D=totals&data_finder%5BpopulationType%5D%5B0%5D=REF&data_finder%5BpopulationType%5D%5B1%5D=OOC&data_finder%5Byear__filterType%5D=range&data_finder%5Byear__rangeFrom%5D=2008&data_finder%5Byear__rangeTo%5D=2024&data_finder%5Bcoo__displayType%5D=doNotDisplay&data_finder%5Bcoa__displayType%5D=custom&data_finder%5Bcoa__country%5D%5B0%5D=154&data_finder%5Bcoa__country%5D%5B1%5D=30&data_finder%5Bcoa__country%5D%5B2%5D=158&data_finder%5Bcoa__country%5D%5B3%5D=85&data_finder%5Bcoa__country%5D%5B4%5D=64&data_finder%5Bcoa__country%5D%5B5%5D=94&data_finder%5Bcoa__country%5D%5B6%5D=72&data_finder%5Bcoa__country%5D%5B7%5D=183&data_finder%5Bcoa__country%5D%5B8%5D=173&data_finder%5Bcoa__country%5D%5B9%5D=76&data_finder%5Bcoa__country%5D%5B10%5D=181&data_finder%5Bcoa__country%5D%5B11%5D=12&data_finder%5Bcoa__country%5D%5B12%5D=17&data_finder%5Bcoa__country%5D%5B13%5D=84&data_finder%5Bcoa__country%5D%5B14%5D=48&data_finder%5Bcoa__country%5D%5B15%5D=49&data_finder%5Bcoa__country%5D%5B16%5D=50&data_finder%5Bcoa__country%5D%5B17%5D=57&data_finder%5Bcoa__country%5D%5B18%5D=61&data_finder%5Bcoa__country%5D%5B19%5D=90&data_finder%5Bcoa__country%5D%5B20%5D=115&data_finder%5Bcoa__country%5D%5B21%5D=113&data_finder%5Bcoa__country%5D%5B22%5D=114&data_finder%5Bcoa__country%5D%5B23%5D=132&data_finder%5Bcoa__country%5D%5B24%5D=138&data_finder%5Byear__%5D=&data_finder%5Bcoo__%5D=&data_finder%5Bcoa__%5D=&data_finder%5Badvanced__%5D=&data_finder%5Bsubmit%5D=&data-finder=on&page=19)

Data format:
* year – year of observation
* geo – country of asylum
* Refugees - refugees present in country
* Refugees_change – Year-to-year change in refugee population
* Refugees_arrived – Number of newly arrived refugees:
    * Equals Refugees_change if positive
    * Equals 0 if negative or zero


In [2]:
df = pd.read_csv('../../data/refugees_in_EU_plus_UK.csv')

In [3]:
df.head()

Unnamed: 0,Year,Country of Asylum,Country of Origin,Country of Asylum ISO,Country of Origin ISO,Refugees,Returned Refugees,Asylum Seekers,IDPs,Returned IDPs,Stateless,HST,OOC
0,2008,Austria,-,AUT,-,37546,299,36687,0,0,464,0,0
1,2008,Belgium,-,BEL,-,17025,29,14261,0,0,548,0,0
2,2008,Bulgaria,-,BGR,-,5108,0,1060,0,0,0,0,0
3,2008,Cyprus,-,CYP,-,1458,0,8003,0,0,0,0,0
4,2008,Czechia,-,CZE,-,2095,0,1422,0,0,0,0,0


In [4]:
df = df[['Year', 'Country of Asylum', 'Refugees']]

In [4]:
df.head()

Unnamed: 0,Year,Country of Asylum,Refugees
0,2008,Austria,37546
1,2008,Belgium,17025
2,2008,Bulgaria,5108
3,2008,Cyprus,1458
4,2008,Czechia,2095


In [5]:
df = df.sort_values(by=['Country of Asylum', 'Year'])

In [6]:
df.head()

Unnamed: 0,Year,Country of Asylum,Refugees
0,2008,Austria,37546
25,2009,Austria,38895
50,2010,Austria,42613
75,2011,Austria,47055
100,2012,Austria,51707


In [7]:
df['Refugees_change'] = df.groupby('Country of Asylum')['Refugees'].diff()


In [11]:
df.head(20)

Unnamed: 0,Year,Country of Asylum,Refugees,Refugees_change
0,2008,Austria,37546,
25,2009,Austria,38895,1349.0
50,2010,Austria,42613,3718.0
75,2011,Austria,47055,4442.0
100,2012,Austria,51707,4652.0
125,2013,Austria,55575,3868.0
150,2014,Austria,60724,5149.0
175,2015,Austria,72198,11474.0
200,2016,Austria,93242,21044.0
225,2017,Austria,115252,22010.0


In [12]:
df['Refugees_arrived'] = df['Refugees_change'].clip(lower=0)

In [13]:
df.head()

Unnamed: 0,Year,Country of Asylum,Refugees,Refugees_change,Refugees_arrived
0,2008,Austria,37546,,
25,2009,Austria,38895,1349.0,1349.0
50,2010,Austria,42613,3718.0,3718.0
75,2011,Austria,47055,4442.0,4442.0
100,2012,Austria,51707,4652.0,4652.0


In [18]:
df = df.rename(columns={'Year': 'year', 'Country of Asylum': 'geo'})


In [19]:
df.head()

Unnamed: 0,year,geo,Refugees,Refugees_change,Refugees_arrived
0,2008,Austria,37546,,
25,2009,Austria,38895,1349.0,1349.0
50,2010,Austria,42613,3718.0,3718.0
75,2011,Austria,47055,4442.0,4442.0
100,2012,Austria,51707,4652.0,4652.0


In [21]:
df["geo"] = df["geo"].replace("Netherlands (Kingdom of the)", "Netherlands")


In [22]:
df.to_csv("../../processed_data/refugees.csv", index=False)

# Ukrainian refugees analysis

### Total

In [32]:
ukr_total_ref = pd.read_csv('../../data/ukraine_refugees_total.csv')

In [33]:
ukr_total_ref.head()

Unnamed: 0,Year,Country of Asylum,Country of Origin,Country of Asylum ISO,Country of Origin ISO,Refugees,Returned Refugees,Asylum Seekers,IDPs,Returned IDPs,Stateless,HST,OOC
0,2008,-,Ukraine,-,UKR,28416,0,1900,0,0,0,0,0
1,2009,-,Ukraine,-,UKR,24512,0,1530,0,0,0,0,0
2,2010,-,Ukraine,-,UKR,25102,0,1250,0,0,0,0,0
3,2011,-,Ukraine,-,UKR,25372,0,1039,0,0,0,0,0
4,2012,-,Ukraine,-,UKR,25248,0,1209,0,0,0,0,0


In [34]:
ukr_total_ref = ukr_total_ref[['Year', 'Country of Origin', 'Refugees']]

In [35]:
ukr_total_ref.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17 entries, 0 to 16
Data columns (total 3 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Year               17 non-null     int64 
 1   Country of Origin  17 non-null     object
 2   Refugees           17 non-null     int64 
dtypes: int64(2), object(1)
memory usage: 540.0+ bytes


In [36]:
ukr_total_ref.rename(columns={'Year': 'year'}, inplace=True)

In [37]:
ukr_total_ref.to_csv("../../processed_data/ukraine_refugees_total.csv", index=False)

### In EU countires

In [38]:
ukr_eu = pd.read_csv('../../data/ukraine_refugees_in_EU.csv')

In [39]:
ukr_eu.head()

Unnamed: 0,Year,Country of Asylum,Country of Origin,Country of Asylum ISO,Country of Origin ISO,Refugees,Returned Refugees,Asylum Seekers,IDPs,Returned IDPs,Stateless,HST,OOC
0,2008,Austria,Ukraine,AUT,UKR,221,0,515,0,0,0,0,0
1,2009,Austria,Ukraine,AUT,UKR,259,0,368,0,0,0,0,0
2,2010,Austria,Ukraine,AUT,UKR,282,0,238,0,0,0,0,0
3,2011,Austria,Ukraine,AUT,UKR,308,0,141,0,0,0,0,0
4,2012,Austria,Ukraine,AUT,UKR,309,0,131,0,0,0,0,0


In [40]:
ukr_eu = ukr_eu[['Year', 'Country of Asylum', 'Country of Origin', 'Country of Asylum ISO', 'Refugees']]

In [41]:
ukr_eu.head()

Unnamed: 0,Year,Country of Asylum,Country of Origin,Country of Asylum ISO,Refugees
0,2008,Austria,Ukraine,AUT,221
1,2009,Austria,Ukraine,AUT,259
2,2010,Austria,Ukraine,AUT,282
3,2011,Austria,Ukraine,AUT,308
4,2012,Austria,Ukraine,AUT,309


In [42]:
ukr_eu.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 374 entries, 0 to 373
Data columns (total 5 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   Year                   374 non-null    int64 
 1   Country of Asylum      374 non-null    object
 2   Country of Origin      374 non-null    object
 3   Country of Asylum ISO  374 non-null    object
 4   Refugees               374 non-null    int64 
dtypes: int64(2), object(3)
memory usage: 14.7+ KB


In [43]:
ukr_eu.rename(columns={'Year': 'year', 'Country of Asylum': 'geo', 'Country of Asylum ISO': 'iso_a3'}, inplace=True)

In [44]:
ukr_eu.head()

Unnamed: 0,year,geo,Country of Origin,iso_a3,Refugees
0,2008,Austria,Ukraine,AUT,221
1,2009,Austria,Ukraine,AUT,259
2,2010,Austria,Ukraine,AUT,282
3,2011,Austria,Ukraine,AUT,308
4,2012,Austria,Ukraine,AUT,309


In [45]:
ukr_eu.to_csv("../../processed_data/ukraine_refugees_eu.csv", index=False)