In [1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

In [2]:
#Read in Migration data
migration_data = pd.read_csv('Raw Data/Persons of Concern/for_map_unhcr_popstats_export_persons_of_concern_all_data.csv', dtype=str)

#Rename data
migration_data = migration_data.rename(columns={'Country / territory of asylum/residence': 'Country', 'Refugees (incl. refugee-like situations)': 'Refugees', 'Asylum-seekers (pending cases)': 'Asylum', 'Internally displaced persons (IDPs)': 'Internally displaced persons'})

In [3]:
migration_data.head()

Unnamed: 0,Year,Country,Origin,Refugees,Asylum,Returned refugees,Internally displaced persons,Returned IDPs,Stateless persons,Others of concern,Total Population
0,1951,Australia,Various/Unknown,180000,,,,,,,180000
1,1951,Austria,Various/Unknown,282000,,,,,,,282000
2,1951,Belgium,Various/Unknown,55000,,,,,,,55000
3,1951,Canada,Various/Unknown,168511,,,,,,,168511
4,1951,Switzerland,Various/Unknown,10000,,,,,,,10000


In [4]:
#Make a new dataset just for refugees going into Brazil and reset the index of that dataset
refugees_in = migration_data.loc[migration_data['Country'] == 'Brazil']
refugees_in = refugees_in.reset_index(drop=True)

#Note - there are a few values that have an * which denotes an unclear count in that category. 
#In our data, we will replace them with zeros

refugees_in.replace(['*'], [0], inplace=True) 
refugees_in.fillna(0, inplace=True)

#Convert columns to numeric where necessary. 
refugees_in[['Year','Refugees','Asylum','Returned refugees','Internally displaced persons','Returned IDPs','Stateless persons','Others of concern','Total Population']] = refugees_in[['Year','Refugees','Asylum','Returned refugees','Internally displaced persons','Returned IDPs','Stateless persons','Others of concern','Total Population']].apply(pd.to_numeric)

#Export refugees_in to CSV
refugees_in.to_csv('Processed Data/refugees_in.csv')

In [5]:
#Make a new dataset that summarizes in-migration of refugees by year
refugees_in_by_year = refugees_in.groupby(['Year'])['Refugees','Asylum','Returned refugees','Internally displaced persons','Returned IDPs','Stateless persons','Others of concern','Total Population'].sum()
refugees_in_by_year.head()

#Export refugees_out_by_year to CSV
refugees_in_by_year.to_csv('Processed Data/refugees_in_by_year.csv')

In [6]:
#Make a new dataset just for refugees coming out Brazil and reset the index of that dataset
refugees_out = migration_data.loc[migration_data['Origin'] == 'Brazil']
refugees_out = refugees_out.reset_index(drop=True)

#Note - there are a few values that have an * which denotes an unclear count in that category. 
#In our data, we will replace them with zeros

refugees_out.replace(['*'], [0], inplace=True) 
refugees_out.fillna(0, inplace=True)

#Convert columns to numeric where necessary. 
refugees_out[['Year','Refugees','Asylum','Returned refugees','Internally displaced persons','Returned IDPs','Stateless persons','Others of concern','Total Population']] = refugees_out[['Year','Refugees','Asylum','Returned refugees','Internally displaced persons','Returned IDPs','Stateless persons','Others of concern','Total Population']].apply(pd.to_numeric)

#Export refugees_out to CSV
refugees_out.to_csv('Processed Data/refugees_out.csv')


In [7]:
#Make a new dataset that summarizes out-migration of refugees by year
refugees_out_by_year = refugees_out.groupby(['Year'])['Refugees','Asylum','Returned refugees','Internally displaced persons','Returned IDPs','Stateless persons','Others of concern','Total Population'].sum()
refugees_out_by_year.head()

#Export refugees_out_by_year to CSV
refugees_out_by_year.to_csv('Processed Data/refugees_out_by_year.csv')


In [8]:
#Now merge to complete dataset.


refugees_net_by_year = pd.merge(refugees_in_by_year.add_suffix('_in'), refugees_out_by_year.add_suffix('_out'), left_index=True, right_index=True, how='outer')

#Clean to get rid of the NaN - fill them in with zeros!
refugees_net_by_year.fillna(0, inplace=True)
refugees_net_by_year

#Write a function to get net values
out_vars = list(refugees_out_by_year.add_suffix('_out').columns.values)
in_vars = list(refugees_in_by_year.add_suffix('_in').columns.values)
root_vars = list(refugees_in_by_year.columns.values)

#for i in :
    #root_vars[i] = out_vars[i] - in_vars[i]
    
#root_vars_test = ['Refugees', 'Asylum']
    
for i in root_vars:
    refugees_net_by_year[f'{i}_net'] = refugees_net_by_year[f'{i}_out'] - refugees_net_by_year[f'{i}_in']
    
refugees_net_by_year

refugees_net_by_year.to_csv('Processed Data/refugees_net_by_year.csv')

