In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
refugees_df = pd.read_csv('../data/refugees.csv', skipfooter=4, engine='python')

In [3]:
refugees_df.head()

Unnamed: 0,Country or territory of asylum or residence,Country or territory of origin,Year,Refugees*,Refugees assisted by UNHCR,Total refugees and people in refugee-like situations**,Total refugees and people in refugee-like situations assisted by UNHCR
0,Afghanistan,Iran (Islamic Rep. of),2021,38,38,38,38
1,Afghanistan,Pakistan,2021,72188,123,72188,123
2,Albania,China,2021,14,0,14,0
3,Albania,Egypt,2021,5,0,5,0
4,Albania,Iraq,2021,5,0,5,0


In [4]:
refugees_df = refugees_df.rename(columns={"Country or territory of asylum or residence": "Acceptor", "Country or territory of origin":"Origin", "Total refugees and people in refugee-like situations**": "Refugees"})

In [5]:
refugees_df.isnull().values.any()

False

In [6]:
refugees_df = refugees_df.drop(columns=['Refugees assisted by UNHCR','Refugees*','Total refugees and people in refugee-like situations assisted by UNHCR'])

In [7]:
refugees_df.head()

Unnamed: 0,Acceptor,Origin,Year,Refugees
0,Afghanistan,Iran (Islamic Rep. of),2021,38
1,Afghanistan,Pakistan,2021,72188
2,Albania,China,2021,14
3,Albania,Egypt,2021,5
4,Albania,Iraq,2021,5


How many countries of origin and acceptors?

In [8]:
refugees_df["Origin"].nunique()

201

In [9]:
refugees_df["Acceptor"].nunique()

183

How many years does the data span? 

In [10]:
refugees_df["Year"].nunique()

47

Total number of refugees accepted by each country over the entire period

In [11]:
total_accepted_df = refugees_df.groupby(by="Acceptor").sum()
total_accepted_df = total_accepted_df.reset_index()
total_accepted_df = total_accepted_df.drop(columns="Year")

Top five highest accepting countries

In [12]:
total_accepted_df = total_accepted_df.rename(columns={"Refugees" : "Total Refugees"})

In [13]:
total_accepted_df.sort_values(by="Total Refugees", ascending=False)

Unnamed: 0,Acceptor,Total Refugees
123,Pakistan,78221903
77,Iran (Islamic Rep. of),71483585
63,Germany,31540659
165,Turkey,26264664
155,Sudan,24056451
...,...,...
106,Micronesia (Federated States of),10
138,Saint Kitts and Nevis,10
139,Saint Lucia,10
66,Grenada,5


Top five highest departing countries

In [18]:
total_departing_df = refugees_df.groupby(by="Origin").sum()
total_departing_df = total_departing_df.reset_index()
total_departing_df = total_departing_df.drop(columns="Year")
total_departing_df.sort_values(by="Refugees", ascending=False)

Unnamed: 0,Origin,Refugees
0,Afghanistan,142234967
192,Unknown/other,54982960
174,Syrian Arab Rep.,50782628
85,Iraq,28266939
62,Ethiopia,23856215
...,...,...
141,Puerto Rico,36
122,Nauru,25
152,San Marino,20
104,Luxembourg,10


In [15]:
total_departing_df.head()

Unnamed: 0,Origin,Refugees
0,Afghanistan,142234967
1,Albania,338564
2,Algeria,195008
3,Andorra,87
4,Angola,13054928


In [16]:
total_departing_df.tail()

Unnamed: 0,Origin,Refugees
196,Viet Nam,16457922
197,Western Sahara,6157643
198,Yemen,598120
199,Zambia,15671
200,Zimbabwe,814630


In [19]:
total_departing_df[total_departing_df["Origin"]=="United States of America"]

Unnamed: 0,Origin,Refugees
191,United States of America,39369


**Data for 2022**

*Top 5 most accepting countries*

In [30]:
refugees_2021_df = refugees_df[refugees_df["Year"]==2021]
total_accepted_2021_df = refugees_2021_df.groupby(by="Acceptor").sum()
total_accepted_2021_df = total_accepted_2021_df.reset_index()
total_accepted_2021_df = total_accepted_2021_df.drop(columns="Year")
total_accepted_2021_df.sort_values(by="Refugees", ascending=False)

Unnamed: 0,Acceptor,Refugees
148,Turkey,3696831
151,Uganda,1475311
113,Pakistan,1438523
58,Germany,1235160
138,Sudan,1068339
...,...,...
149,Turkmenistan,20
139,Suriname,19
158,Uzbekistan,13
52,Fiji,13


**Making a function to produce a df that gives the origin/host and number of refugees by year**

In [31]:
def make_aggregated(country_type, yr):
    refugees_yr_df = refugees_df[refugees_df["Year"]==yr]
    total_accepted_df = refugees_yr_df.groupby(by=country_type).sum()
    total_accepted_df = total_accepted_df.reset_index()
    total_accepted_df = total_accepted_df.drop(columns="Year")
    return total_accepted_df.sort_values(by="Refugees", ascending=False)

In [53]:
df = make_aggregated("Origin", 2021)

In [54]:
df.head()

Unnamed: 0,Origin,Refugees
162,Syrian Arab Rep.,6761560
0,Afghanistan,2610067
153,South Sudan,2277919
111,Myanmar,1127586
46,Dem. Rep. of the Congo,864510


In [51]:
df = make_aggregated("Acceptor", 2021)

In [52]:
df.head()

Unnamed: 0,Acceptor,Refugees
148,Turkey,3696831
151,Uganda,1475311
113,Pakistan,1438523
58,Germany,1235160
138,Sudan,1068339


**Show the history of accepting refugees**

In [60]:
country_list = refugees_df["Acceptor"].unique()

In [61]:
country_list

array(['Afghanistan', 'Albania', 'Algeria', 'Angola', 'Argentina',
       'Armenia', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas',
       'Bahrain', 'Bangladesh', 'Belarus', 'Belgium', 'Belize', 'Benin',
       'Bolivia (Plurinational State of)', 'Bosnia and Herzegovina',
       'Botswana', 'Brazil', 'Bulgaria', 'Burkina Faso', 'Burundi',
       'Cambodia', 'Cameroon', 'Canada', 'Cayman Islands',
       'Central African Rep.', 'Chad', 'Chile', 'China',
       'China, Hong Kong SAR', 'Colombia', 'Congo, Republic of',
       'Costa Rica', "Côte d'Ivoire", 'Croatia', 'Cuba', 'Curaçao',
       'Cyprus', 'Czechia', 'Dem. Rep. of the Congo', 'Denmark',
       'Djibouti', 'Dominican Rep.', 'Ecuador', 'Egypt', 'El Salvador',
       'Eritrea', 'Estonia', 'Eswatini', 'Ethiopia', 'Fiji', 'Finland',
       'France', 'Gabon', 'Gambia', 'Georgia', 'Germany', 'Ghana',
       'Greece', 'Guatemala', 'Guinea', 'Guinea-Bissau', 'Guyana',
       'Honduras', 'Hungary', 'Iceland', 'India', 'Indonesia',
 

In [64]:
refugees_df[refugees_df["Acceptor"]=="United States of America"].groupby(by="Year").sum()

Unnamed: 0_level_0,Refugees
Year,Unnamed: 1_level_1
1975,530000
1976,500000
1977,684700
1978,734000
1979,734000
1980,403684
1981,537375
1982,620257
1983,655532
1984,626909
