In [1]:
#Uploading packages we'll need later

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
pd.options.display.max_columns = None 

In [2]:
#Read in raw migration data
migration_data = pd.read_csv('Raw Data/Persons of Concern/for_map_unhcr_popstats_export_persons_of_concern_all_data.csv', dtype=str)

#Rename data
migration_data = migration_data.rename(columns={'Country / territory of asylum/residence': 'Country', 'Refugees (incl. refugee-like situations)': 'Refugees', 'Asylum-seekers (pending cases)': 'Asylum', 'Internally displaced persons (IDPs)': 'Internally displaced persons'})
migration_data['Country'].replace(['Viet Nam'], ['Vietnam'],inplace=True)
migration_data['Origin'].replace(['Viet Nam'], ['Vietnam'],inplace=True)

In [3]:
#Make a new dataset just for refugees going into Brazil and reset the index of that dataset so it can be a dataframe
refugees_in = migration_data.loc[migration_data['Country'] == 'Brazil']
refugees_in = refugees_in.reset_index(drop=True)

#Note - there are a few values that have an * which denotes an unclear count in that category. 
#In our data, we will replace them with zeros

refugees_in.replace(['*'], [0], inplace=True) 
refugees_in.fillna(0, inplace=True)

#Convert columns to numeric where necessary. 
refugees_in[['Year','Refugees','Asylum','Returned refugees','Internally displaced persons','Returned IDPs','Stateless persons','Others of concern','Total Population']] = refugees_in[['Year','Refugees','Asylum','Returned refugees','Internally displaced persons','Returned IDPs','Stateless persons','Others of concern','Total Population']].apply(pd.to_numeric)

#Export refugees_in to CSV
refugees_in.to_csv('Processed Data/refugees_in.csv')

In [4]:
#Make a new dataset just for refugees coming out Brazil and reset the index of that dataset
refugees_out = migration_data.loc[migration_data['Origin'] == 'Brazil']
refugees_out = refugees_out.reset_index(drop=True)

#Note - there are a few values that have an * which denotes an unclear count in that category. 
#In our data, we will replace them with zeros

refugees_out.replace(['*'], [0], inplace=True) 
refugees_out.fillna(0, inplace=True)

#Convert columns to numeric where necessary. 
refugees_out[['Year','Refugees','Asylum','Returned refugees','Internally displaced persons','Returned IDPs','Stateless persons','Others of concern','Total Population']] = refugees_out[['Year','Refugees','Asylum','Returned refugees','Internally displaced persons','Returned IDPs','Stateless persons','Others of concern','Total Population']].apply(pd.to_numeric)

#Export refugees_out to CSV
refugees_out.to_csv('Processed Data/refugees_out.csv')

In [5]:
#Make the same dataset for the Top 5 countries sending refugees to Brazil
refugees_in_top5_sources = refugees_in.loc[refugees_in.groupby('Year')['Total Population'].nlargest(5).index.get_level_values(1)]
#.drop(refugees_in.loc[refugees_in.groupby('Year')['Total Population'].nlargest(5).index.get_level_values(1)].columns[0], axis=1)
refugees_in_top5_sources.to_csv('Processed Data/refugees_in_top5_sources.csv')
refugees_in_top5_sources.head()

Unnamed: 0,Year,Country,Origin,Refugees,Asylum,Returned refugees,Internally displaced persons,Returned IDPs,Stateless persons,Others of concern,Total Population
0,1969,Brazil,Various/Unknown,40000,0,0,0,0,0,0,40000
1,1970,Brazil,Various/Unknown,40000,0,0,0,0,0,0,40000
2,1971,Brazil,Cuba,37800,0,0,0,0,0,0,37800
3,1971,Brazil,Various/Unknown,200,0,0,0,0,0,0,200
5,1972,Brazil,Various/Unknown,36800,0,0,0,0,0,0,36800


In [6]:
#Make a dataset that summarizes the top 5 countries Brazilian refugees go to
refugees_out_top5_sources = refugees_out.loc[refugees_out.groupby('Year')['Total Population'].nlargest(5).index.get_level_values(1)]
#.drop(refugees_out.loc[refugees_out.groupby('Year')['Total Population'].nlargest(5).index.get_level_values(1)].columns[0], axis=1)
refugees_out_top5_sources.to_csv('Processed Data/refugees_out_top5_sources.csv')
refugees_out_top5_sources.head()

Unnamed: 0,Year,Country,Origin,Refugees,Asylum,Returned refugees,Internally displaced persons,Returned IDPs,Stateless persons,Others of concern,Total Population
0,1971,Chile,Brazil,2000,0,0,0,0,0,0,2000
1,1972,Chile,Brazil,2000,0,0,0,0,0,0,2000
2,1980,Italy,Brazil,5,0,0,0,0,0,0,5
3,1981,Italy,Brazil,5,0,0,0,0,0,0,5
4,1982,Italy,Brazil,5,0,0,0,0,0,0,5


In [7]:
#Make a count of how many times countries are found in the Top 5 source countries

refugees_in_top5_sources['Origin'].value_counts().reset_index().rename(columns={'index':'Country of Origin', 'Origin':'Years of Top 5 Status'}).to_csv('Processed Data/refugees_in_top5_sources_count.csv')
refugees_in_top5_sources['Origin'].value_counts().reset_index().rename(columns={'index':'Country of Origin', 'Origin':'Years of Top 5 Status'})

Unnamed: 0,Country of Origin,Years of Top 5 Status
0,Various/Unknown,30
1,Angola,23
2,Liberia,20
3,Dem. Rep. of the Congo,20
4,Colombia,11
5,Cuba,9
6,Haiti,7
7,Sierra Leone,6
8,Chile,4
9,Romania,4


In [8]:
#Make a count of how many times countries are found in the Top 5 destination countries

refugees_out_top5_sources['Country'].value_counts().reset_index().rename(columns={'index':'Destination Country', 'Country':'Years of Top 5 Status'}).to_csv('Processed Data/refugees_out_top5_sources_count.csv')
refugees_out_top5_sources['Country'].value_counts().reset_index().rename(columns={'index':'Destination Country', 'Country':'Years of Top 5 Status'})

Unnamed: 0,Destination Country,Years of Top 5 Status
0,United States of America,24
1,Canada,23
2,Italy,15
3,Germany,15
4,Australia,14
5,Sweden,10
6,United Kingdom,6
7,Ireland,5
8,Switzerland,3
9,Mexico,3


In [9]:
#Some preliminary tables that help us make the final timeline by country:

refugees_in_top5_sources_pivot = refugees_in_top5_sources[['Year','Origin']].copy()
refugees_in_top5_sources_pivot['Count'] = 1

refugees_out_top5_sources_pivot = refugees_out_top5_sources[['Year','Country']].copy()
refugees_out_top5_sources_pivot['Count'] = 1

In [10]:
refugees_in_top5_by_year = refugees_in_top5_sources_pivot[['Year','Origin','Count']].pivot(index='Origin', columns='Year', values='Count')
refugees_in_top5_by_year.fillna('', inplace=True)
refugees_in_top5_by_year.replace([1], ['*'], inplace=True) 
refugees_in_top5_by_year.to_csv('Processed Data/refugees_in_top5_by_year.csv')
refugees_in_top5_by_year

Year,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
Origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1
Angola,,,,,,,,,,,,,,,,,,,,,,,,,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,,*,*
Bangladesh,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,*,*,,
Chile,,,,,,,,,,,,,,,,,,*,,,,,,,*,*,*,,,,,,,,,,,,,,,,,,,,,
Colombia,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,*,*,*,*,*,*,*,*,*,*,*,,
Cuba,,,*,*,,,,,,,,,,,,,,,,,,,,,,,,*,*,*,*,,,,,,*,*,*,,,,,,,,,
Dem. Rep. of the Congo,,,,,,,,,,,,,,,,,,,,,,,,,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,,,,
Haiti,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,*,*,*,*,*,*,*
Iraq,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,*,*,,,,,,,
Liberia,,,,,,,,,,,,,,,,,,,,,,,,,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,,,,
Nigeria,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,*,


In [11]:
refugees_out_top5_by_year = refugees_out_top5_sources_pivot[['Year','Country','Count']].pivot(index='Country', columns='Year', values='Count')
refugees_out_top5_by_year.fillna('', inplace=True)
refugees_out_top5_by_year.replace([1], ['*'], inplace=True) 
refugees_out_top5_by_year.to_csv('Processed Data/refugees_out_top5_by_year.csv')
refugees_out_top5_by_year

Year,1971,1972,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1
Australia,,,,,,,,,,,,,,,,,,,*,*,*,*,*,*,*,*,*,*,*,*,*,,*,,,,,,
Belgium,,,,,,,,,,,,,,,,,,,,,*,*,*,,,,,,,,,,,,,,,,
Brazil,,,,,,,,,,,,,,,,,,,,,,,,,,,,*,,,,*,,,,,,,
Canada,,,,,,,,,,,,,,,,,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*
Chile,*,*,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Cuba,,,,,,,,,,,,,,,,,*,,,,,,,,,,,,,,,,,,,,,,
Denmark,,,,,,,,,,,,*,*,,,,,,,,,,,,,,,,,,,,,,,,,,
France,,,,,,,,,,,,,,,,,,,,,,,,,,,,,*,,,,,,,,,,
Germany,,,,,,,,,,,,,,,,,,,,,,,,,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*
Ireland,,,,,,,,,,,,,,,,,,,,,,,,,,*,,,,*,*,,*,*,,,,,


In [12]:
#Create dataframe with just the years in it and the order for merging

years_df = pd.DataFrame(np.repeat(list(range(1900,2018)),5), columns=['Year'])

from itertools import cycle

seq = cycle([1, 2, 3, 4, 5])
years_df['Order'] = [next(seq) for count in range(years_df.shape[0])]

years_df[:10]

Unnamed: 0,Year,Order
0,1900,1
1,1900,2
2,1900,3
3,1900,4
4,1900,5
5,1901,1
6,1901,2
7,1901,3
8,1901,4
9,1901,5


In [13]:
#Prepare the top 5 refugees out database for merging.

out_for_merge= refugees_out_top5_sources[['Year','Country','Total Population']].copy()
out_for_merge["Order"] = out_for_merge.groupby("Year").cumcount()+1

in_for_merge= refugees_in_top5_sources[['Year','Origin','Total Population']].copy()
in_for_merge["Order"] = in_for_merge.groupby("Year").cumcount()+1

In [14]:
#Merge the two dataframes together to prepare for merging

out_for_export = pd.merge(years_df, out_for_merge, left_on=["Year","Order"], right_on=["Year","Order"], how='left')

out_for_export.fillna(0,inplace=True)
out_for_export.head()

in_for_export = pd.merge(years_df, in_for_merge, left_on=["Year","Order"], right_on=["Year","Order"], how='left')

in_for_export.fillna(0,inplace=True)
in_for_export.head()

Unnamed: 0,Year,Order,Origin,Total Population
0,1900,1,0,0.0
1,1900,2,0,0.0
2,1900,3,0,0.0
3,1900,4,0,0.0
4,1900,5,0,0.0


In [15]:
in_for_export['New Pop'] = in_for_export['Total Population']/10
#in_for_export[in_for_export['New Pop']>0]['New Pop'].describe(percentiles=[.1,.2,.3,.4,.5,.6,.7,.8,.9,1])

In [16]:
out_for_export['New Pop'] = out_for_export['Total Population']/10
#out_for_export[out_for_export['New Pop']>0]['New Pop'].describe(percentiles=[.1,.2,.3,.4,.5,.6,.7,.8,.9,1])

In [17]:
in_for_export_base = in_for_export.copy()
in_for_export_100 = in_for_export.copy()
in_for_export_500 = in_for_export.copy()

out_for_export_base = out_for_export.copy()
out_for_export_100 = out_for_export.copy()
out_for_export_500 = out_for_export.copy()

In [18]:
a = np.array(in_for_export_base['New Pop'].values.tolist())
b = np.array(out_for_export_base['New Pop'].values.tolist())
in_for_export_100['New Pop'] = np.where(a > 100, 100, a).tolist()
in_for_export_500['New Pop'] = np.where(a > 500, 500, a).tolist()

out_for_export_100['New Pop'] = np.where(b > 100, 100, b).tolist()
out_for_export_500['New Pop'] = np.where(b > 500, 500, b).tolist()

In [19]:
#Create a list based on the total population that allows us to make a histogram
    
out_pop_count_base = [int(i) for i in out_for_export_base['Total Population'].values.tolist()]
in_pop_count_base = [int(i) for i in in_for_export_base['Total Population'].values.tolist()]
out_pop_count_100 = [int(i) for i in out_for_export_100['New Pop'].values.tolist()]
in_pop_count_100 = [int(i) for i in in_for_export_100['New Pop'].values.tolist()]
out_pop_count_500 = [int(i) for i in out_for_export_500['New Pop'].values.tolist()]
in_pop_count_500 = [int(i) for i in in_for_export_500['New Pop'].values.tolist()]

In [20]:
#in_pop_count_100
in_pop_count_100_txt = str([int(i) for i in in_for_export_100['New Pop'].values.tolist()])
#in_pop_count_100_txt

for ch in ['[',']']:
    if ch in in_pop_count_100_txt:
        in_pop_count_100_txt=in_pop_count_100_txt.replace(ch,'')
        
for ch in [', ']:
    if ch in in_pop_count_100_txt:
        in_pop_count_100_txt=in_pop_count_100_txt.replace(ch,'+')
in_pop_count_100_txt

#Save population data to a text file
textfile = open('Processed Data/Top_5_refugee_countries_in_by_year_100_text.txt', 'w')
textfile.write(in_pop_count_100_txt)
textfile.close()

out_pop_count_100_txt = str([int(i) for i in out_for_export_100['New Pop'].values.tolist()])
#in_pop_count_100_txt

for ch in ['[',']']:
    if ch in out_pop_count_100_txt:
        out_pop_count_100_txt=out_pop_count_100_txt.replace(ch,'')
        
for ch in [', ']:
    if ch in out_pop_count_100_txt:
        out_pop_count_100_txt=out_pop_count_100_txt.replace(ch,'+')
out_pop_count_100_txt

#Save population data to a text file
textfile = open('Processed Data/Top_5_refugee_countries_out_by_year_100_text.txt', 'w')
textfile.write(out_pop_count_100_txt)
textfile.close()

in_pop_count_base_txt = str([int(i) for i in in_for_export_base['New Pop'].values.tolist()])
#in_pop_count_100_txt

for ch in ['[',']']:
    if ch in in_pop_count_base_txt:
        in_pop_count_base_txt=in_pop_count_base_txt.replace(ch,'')
        
for ch in [', ']:
    if ch in in_pop_count_base_txt:
        in_pop_count_base_txt=in_pop_count_base_txt.replace(ch,'+')
        
in_pop_count_base_txt

#Save population data to a text file
textfile = open('Processed Data/Top_5_refugee_countries_in_by_year_base_text.txt', 'w')
textfile.write(in_pop_count_base_txt)
textfile.close()

out_pop_count_base_txt = str([int(i) for i in out_for_export_base['New Pop'].values.tolist()])
#in_pop_count_100_txt

for ch in ['[',']']:
    if ch in out_pop_count_base_txt:
        out_pop_count_base_txt=out_pop_count_base_txt.replace(ch,'')
        
for ch in [', ']:
    if ch in out_pop_count_base_txt:
        out_pop_count_base_txt=out_pop_count_base_txt.replace(ch,'+')

#Save population data to a text file
textfile = open('Processed Data/Top_5_refugee_countries_out_by_year_base_text.txt', 'w')
textfile.write(in_pop_count_base_txt)
textfile.close()

In [23]:
#Clean up that export dataframe for refugees out
#out_for_export_base['Total Population'].replace([0.0],[''],inplace=True)
out_for_export_base['Country'].replace([0],[''],inplace=True)
#out_for_export_base['New Pop'].replace([0],[''],inplace=True)
out_for_export_base.drop('Order', axis=1).to_csv('Processed Data/refugees_out_base_1900_to_2017_by_year.csv')
out_for_export_base.drop(['Year','Order','Total Population','New Pop'], axis=1).to_csv('Processed Data/refugees_out_base_1900_to_2017_by_year_for_print.csv')
#out_for_export_base.drop(['Year','Order','Histogram'], axis=1).to_csv('Processed Data/refugees_out_base_1900_to_2017_by_year_for_print_wno.csv')
out_for_export_base.drop(['Year','Order'], axis=1).to_csv('Processed Data/refugees_out_base_1900_to_2017_by_year_for_print_wno.csv')

#out_for_export_100['Total Population'].replace([0.0],[''],inplace=True)
out_for_export_100['Country'].replace([0],[''],inplace=True)
#out_for_export_100['New Pop'].replace([0],[''],inplace=True)
out_for_export_100.drop('Order', axis=1).to_csv('Processed Data/refugees_out_100_1900_to_2017_by_year.csv')
out_for_export_100.drop(['Year','Order','Total Population','New Pop'], axis=1).to_csv('Processed Data/refugees_out_100_1900_to_2017_by_year_for_print.csv')
#out_for_export_100.drop(['Year','Order','Histogram'], axis=1).to_csv('Processed Data/refugees_out_100_1900_to_2017_by_year_for_print_wno.csv')
out_for_export_100.drop(['Year','Order'], axis=1).to_csv('Processed Data/refugees_out_100_1900_to_2017_by_year_for_print_wno.csv')

#out_for_export_500['Total Population'].replace([0.0],[''],inplace=True)
out_for_export_500['Country'].replace([0],[''],inplace=True)
#out_for_export_500['New Pop'].replace([0],[''],inplace=True)
out_for_export_500.drop('Order', axis=1).to_csv('Processed Data/refugees_out_500_1900_to_2017_by_year.csv')
out_for_export_500.drop(['Year','Order','Total Population','New Pop'], axis=1).to_csv('Processed Data/refugees_out_500_1900_to_2017_by_year_for_print.csv')
#out_for_export_500.drop(['Year','Order','Histogram'], axis=1).to_csv('Processed Data/refugees_out_500_1900_to_2017_by_year_for_print_wno.csv')
out_for_export_500.drop(['Year','Order'], axis=1).to_csv('Processed Data/refugees_out_500_1900_to_2017_by_year_for_print_wno.csv')


In [24]:
#Clean up that export dataframe for refugees in
#in_for_export_base['Total Population'].replace([0.0],[''],inplace=True)
in_for_export_base['Origin'].replace([0],[''],inplace=True)
#in_for_export_base['New Pop'].replace([0],[''],inplace=True)
in_for_export_base.drop('Order', axis=1).to_csv('Processed Data/refugees_in_base_1900_to_2017_by_year.csv')
in_for_export_base.drop(['Year','Order','Total Population','New Pop'], axis=1).to_csv('Processed Data/refugees_in_base_1900_to_2017_by_year_for_print.csv')
#in_for_export_base.drop(['Year','Order','Histogram'], axis=1).to_csv('Processed Data/refugees_in_base_1900_to_2017_by_year_for_print_wno.csv')
in_for_export_base.drop(['Year','Order'], axis=1).to_csv('Processed Data/refugees_in_base_1900_to_2017_by_year_for_print_wno.csv')

#in_for_export_100['Total Population'].replace([0.0],[''],inplace=True)
in_for_export_100['Origin'].replace([0],[''],inplace=True)
#in_for_export_100['New Pop'].replace([0],[''],inplace=True)
in_for_export_100.drop('Order', axis=1).to_csv('Processed Data/refugees_in_100_1900_to_2017_by_year.csv')
in_for_export_100.drop(['Year','Order','Total Population','New Pop'], axis=1).to_csv('Processed Data/refugees_in_100_1900_to_2017_by_year_for_print.csv')
#in_for_export_100.drop(['Year','Order','Histogram'], axis=1).to_csv('Processed Data/refugees_in_100_1900_to_2017_by_year_for_print_wno.csv')
in_for_export_100.drop(['Year','Order'], axis=1).to_csv('Processed Data/refugees_in_100_1900_to_2017_by_year_for_print_wno.csv')

#in_for_export_500['Total Population'].replace([0.0],[''],inplace=True)
in_for_export_500['Origin'].replace([0],[''],inplace=True)
#in_for_export_500['New Pop'].replace([0],[''],inplace=True)
in_for_export_500.drop('Order', axis=1).to_csv('Processed Data/refugees_in_500_1900_to_2017_by_year.csv')
in_for_export_500.drop(['Year','Order','Total Population','New Pop'], axis=1).to_csv('Processed Data/refugees_in_500_1900_to_2017_by_year_for_print.csv')
#in_for_export_500.drop(['Year','Order','Histogram'], axis=1).to_csv('Processed Data/refugees_in_500_1900_to_2017_by_year_for_print_wno.csv')
in_for_export_500.drop(['Year','Order'], axis=1).to_csv('Processed Data/refugees_in_500_1900_to_2017_by_year_for_print_wno.csv')


In [None]:
pd.options.display.max_rows = None 


In [58]:
years_base = pd.DataFrame(list(range(1900,2020)), columns=['Year'])
years_base

import itertools
lst = range(1,25)
every_5 = list(itertools.chain.from_iterable(itertools.repeat(x, 5) for x in lst))
every_10 = list(itertools.chain.from_iterable(itertools.repeat(x, 10) for x in lst))

every_5_array = np.asarray(every_5)[:121]
every_10_array = np.asarray(every_10)[:120]

years_base['Every_5'] = every_5_array
years_base['Every_10'] = every_10_array
years_base["Every_5_counter"] = years_base.groupby("Every_5").cumcount()+1
years_base['Every_10_counter'] = years_base.groupby('Every_10').cumcount()+1

years_base.shape, every_5_array.shape, every_10_array.shape

years_base

Unnamed: 0,Year,Every_5,Every_10,Every_5_counter,Every_10_counter
0,1900,1,1,1,1
1,1901,1,1,2,2
2,1902,1,1,3,3
3,1903,1,1,4,4
4,1904,1,1,5,5
5,1905,2,1,1,6
6,1906,2,1,2,7
7,1907,2,1,3,8
8,1908,2,1,4,9
9,1909,2,1,5,10


In [59]:
refugees_in_by_year_group = pd.merge(years_base, refugees_in, on="Year", how='left')
refugees_out_by_year_group = pd.merge(years_base, refugees_out, on="Year", how='left')


In [60]:
#Make the same dataset for the Top 5 countries sending refugees to Brazil

#Summarize refugees in by 5 year or 10 year blocks.
sum_in_by_5_year = refugees_in_by_year_group.groupby(by=['Every_5','Origin'])['Total Population'].sum().reset_index()
sum_in_by_10_year = refugees_in_by_year_group.groupby(by=['Every_10','Origin'])['Total Population'].sum().reset_index()

top5_in_by_5_year = sum_in_by_5_year.loc[sum_in_by_5_year.groupby('Every_5')['Total Population'].nlargest(5).index.get_level_values(1)]
top5_in_by_5_year["Every_5_counter"] = top5_in_by_5_year.groupby("Every_5").cumcount()+1
top5_in_by_10_year = sum_in_by_10_year.loc[sum_in_by_10_year.groupby('Every_10')['Total Population'].nlargest(5).index.get_level_values(1)]
top5_in_by_10_year["Every_10_counter"] = top5_in_by_10_year.groupby("Every_10").cumcount()+1
top10_in_by_10_year = sum_in_by_10_year.loc[sum_in_by_10_year.groupby('Every_10')['Total Population'].nlargest(10).index.get_level_values(1)]
top10_in_by_10_year["Every_10_counter"] = top10_in_by_10_year.groupby("Every_10").cumcount()+1


In [61]:
#Summarize refugees out by 5 year or 10 year blocks.
sum_out_by_5_year = refugees_out_by_year_group.groupby(by=['Every_5','Country'])['Total Population'].sum().reset_index()
sum_out_by_10_year = refugees_out_by_year_group.groupby(by=['Every_10','Country'])['Total Population'].sum().reset_index()

top5_out_by_5_year = sum_out_by_5_year.loc[sum_out_by_5_year.groupby('Every_5')['Total Population'].nlargest(5).index.get_level_values(1)]
top5_out_by_5_year["Every_5_counter"] = top5_out_by_5_year.groupby("Every_5").cumcount()+1
top5_out_by_10_year = sum_out_by_10_year.loc[sum_out_by_10_year.groupby('Every_10')['Total Population'].nlargest(5).index.get_level_values(1)]
top5_out_by_10_year["Every_10_counter"] = top5_out_by_10_year.groupby("Every_10").cumcount()+1
top10_out_by_10_year = sum_out_by_10_year.loc[sum_out_by_10_year.groupby('Every_10')['Total Population'].nlargest(10).index.get_level_values(1)]
top10_out_by_10_year["Every_10_counter"] = top10_out_by_10_year.groupby("Every_10").cumcount()+1


In [62]:
top10_out_by_10_year_for_export = pd.merge(years_base, top10_out_by_10_year, left_on=["Every_10","Every_10_counter"], right_on=["Every_10","Every_10_counter"], how='left')

top10_in_by_10_year_for_export = pd.merge(years_base, top10_in_by_10_year, left_on=["Every_10","Every_10_counter"], right_on=["Every_10","Every_10_counter"], how='left')


In [63]:

five_yr_dict = {1: '1900-1904', 2: '1905-1909', 3: '1910-1914', 4: '1915-1919', 5: '1920-1924', 
                6: '1925-1929', 7: '1930-1934', 8: '1935-1939', 9: '1940-1944', 10: '1945-1949', 
                11: '1950-1954', 12: '1955-1959', 13: '1960-1964', 14: '1965-1969', 15: '1970-1974', 
                16: '1975-1979', 17: '1980-1984', 18: '1985-1989', 19: '1990-1994', 20: '1995-1999', 
                21: '2000-2004', 22: '2005-2009', 23: '2010-2014', 24: '2015-2019',}

ten_yr_dict = {1: '1900-1909', 2: '1910-1919', 3: '1920-1929', 4: '1930-1939', 5: '1940-1949', 
                6: '1950-1959', 7: '1960-1969', 8: '1970-1979', 9: '1980-1989', 10: '1990-1999', 
                11: '2000-2009', 12: '2010-2019',}


In [79]:
try_10_for_10_in = top10_in_by_10_year_for_export.drop(['Year','Every_5','Every_5_counter','Every_10_counter'], axis=1)
try_10_for_10_in.replace({"Every_10": ten_yr_dict}, inplace=True)
try_10_for_10_in['Origin'].fillna('', inplace=True)
try_10_for_10_in['Total Population'].fillna(10, inplace=True)
try_10_for_10_in['New Pop'] = try_10_for_10_in['Total Population']/10
try_10_for_10_in_base = try_10_for_10_in.copy()
try_10_for_10_in_100 = try_10_for_10_in.copy()
a = np.array(try_10_for_10_in_100['New Pop'].values.tolist())

try_10_for_10_in_100['New Pop'] = np.where(a > 100, 100, a).tolist()

In [80]:
try_10_for_10_out = top10_out_by_10_year_for_export.drop(['Year','Every_5','Every_5_counter','Every_10_counter'], axis=1)
try_10_for_10_out.replace({"Every_10": ten_yr_dict}, inplace=True)
try_10_for_10_out['Country'].fillna('', inplace=True)
try_10_for_10_out['Total Population'].fillna(10, inplace=True)
try_10_for_10_out['New Pop'] = try_10_for_10_out['Total Population']/10
try_10_for_10_out_base = try_10_for_10_out.copy()
try_10_for_10_out_100 = try_10_for_10_out.copy()
b = np.array(try_10_for_10_out_100['New Pop'].values.tolist())

try_10_for_10_out_100['New Pop'] = np.where(b > 100, 100, b).tolist()

In [81]:
try_10_for_10_out_100

Unnamed: 0,Every_10,Country,Total Population,New Pop
0,1900-1909,,10.0,1.0
1,1900-1909,,10.0,1.0
2,1900-1909,,10.0,1.0
3,1900-1909,,10.0,1.0
4,1900-1909,,10.0,1.0
5,1900-1909,,10.0,1.0
6,1900-1909,,10.0,1.0
7,1900-1909,,10.0,1.0
8,1900-1909,,10.0,1.0
9,1900-1909,,10.0,1.0


In [82]:
try_10_for_10_in_base['Origin'].replace([0],[''],inplace=True)
try_10_for_10_in_base.to_csv('Processed Data/refugees_in_base_1900_to_2017_by_decade.csv')
try_10_for_10_in_base.drop(['Every_10','Total Population'], axis=1).to_csv('Processed Data/refugees_in_base_1900_to_2017_by_decade_for_print.csv')

try_10_for_10_in_100['Origin'].replace([0],[''],inplace=True)
try_10_for_10_in_100.to_csv('Processed Data/refugees_in_100_1900_to_2017_by_decade.csv')
try_10_for_10_in_100.drop(['Every_10','Total Population'], axis=1).to_csv('Processed Data/refugees_in_100_1900_to_2017_by_decade_for_print.csv')

try_10_for_10_out_base['Country'].replace([0],[''],inplace=True)
try_10_for_10_out_base.to_csv('Processed Data/refugees_out_base_1900_to_2017_by_decade.csv')
try_10_for_10_out_base.drop(['Every_10','Total Population'], axis=1).to_csv('Processed Data/refugees_out_base_1900_to_2017_by_decade_for_print.csv')

try_10_for_10_out_100['Country'].replace([0],[''],inplace=True)
try_10_for_10_out_100.to_csv('Processed Data/refugees_out_100_1900_to_2017_by_decade.csv')
try_10_for_10_out_100.drop(['Every_10','Total Population'], axis=1).to_csv('Processed Data/refugees_out_100_1900_to_2017_by_decade_for_print.csv')


In [83]:
try_10_for_10_in_100_txt = str([int(i) for i in try_10_for_10_in_100['New Pop'].values.tolist()])

for ch in ['[',']']:
    if ch in try_10_for_10_in_100_txt:
        try_10_for_10_in_100_txt=try_10_for_10_in_100_txt.replace(ch,'')
        
for ch in [', ']:
    if ch in try_10_for_10_in_100_txt:
        try_10_for_10_in_100_txt=try_10_for_10_in_100_txt.replace(ch,'+')
try_10_for_10_in_100_txt

#Save population data to a text file
textfile = open('Processed Data/refugees_in_100_1900_to_2017_by_decade_text.txt', 'w')
textfile.write(try_10_for_10_in_100_txt)
textfile.close()

try_10_for_10_out_100_txt = str([int(i) for i in try_10_for_10_out_100['New Pop'].values.tolist()])

for ch in ['[',']']:
    if ch in try_10_for_10_out_100_txt:
        try_10_for_10_out_100_txt=try_10_for_10_out_100_txt.replace(ch,'')
        
for ch in [', ']:
    if ch in try_10_for_10_out_100_txt:
        try_10_for_10_out_100_txt=try_10_for_10_out_100_txt.replace(ch,'+')
try_10_for_10_out_100_txt

#Save population data to a text file
textfile = open('Processed Data/refugees_out_100_1900_to_2017_by_decade_text.txt', 'w')
textfile.write(try_10_for_10_out_100_txt)
textfile.close()


**Good Code To Save**

In [None]:
def repeat_to_length(string_to_expand, length):
    cell = (string_to_expand * (int(length/len(string_to_expand))+1))[:length]
    graph_list.append(cell)

In [None]:
#Cut off by 100

#Add the histogram column to the initial refugees out dataframe
graph_list = []

for i in out_pop_count_100:
    repeat_to_length('|', i)
    
pd.set_option('display.max_colwidth', -1)
grapharray = np.asarray(graph_list)
out_for_export_100['Histogram'] = grapharray
out_for_export_100.head()

#Add the histogram column to the initial refugees in dataframe

graph_list = []

for i in in_pop_count_100:
    repeat_to_length('|', i)
    
pd.set_option('display.max_colwidth', -1)
grapharray = np.asarray(graph_list)
in_for_export_100['Histogram'] = grapharray
in_for_export_100.head()

#Cut off by 500

#Add the histogram column to the initial refugees out dataframe
graph_list = []

for i in out_pop_count_500:
    repeat_to_length('|', i)
    
pd.set_option('display.max_colwidth', -1)
grapharray = np.asarray(graph_list)
out_for_export_500['Histogram'] = grapharray
out_for_export_500.head()

#Add the histogram column to the initial refugees in dataframe

graph_list = []

for i in in_pop_count_500:
    repeat_to_length('|', i)
    
pd.set_option('display.max_colwidth', -1)
grapharray = np.asarray(graph_list)
in_for_export_500['Histogram'] = grapharray
in_for_export_500.head()

In [None]:
#Add the histogram column to the initial refugees out dataframe
graph_list = []

for i in out_pop_count_base:
    repeat_to_length('|', i)
    
pd.set_option('display.max_colwidth', -1)
grapharray = np.asarray(graph_list)
out_for_export_base['Histogram'] = grapharray
out_for_export_base

#Add the histogram column to the initial refugees in dataframe

graph_list = []

for i in in_pop_count_base:
    repeat_to_length('|', i)
    
pd.set_option('display.max_colwidth', -1)
grapharray = np.asarray(graph_list)
in_for_export_base['Histogram'] = grapharray
in_for_export_base

In [None]:
#top5_in_by_5_year_for_export = pd.merge(years_base, top5_in_by_5_year, left_on=["Every_5","Every_5_counter"], right_on=["Every_5","Every_5_counter"], how='left')
#top5_in_by_10_year_for_export = pd.merge(years_base, top5_in_by_10_year, left_on=["Every_10","Every_10_counter"], right_on=["Every_10","Every_10_counter"], how='left')

#top5_out_by_5_year_for_export = pd.merge(years_base, top5_out_by_5_year, left_on=["Every_5","Every_5_counter"], right_on=["Every_5","Every_5_counter"], how='left')
#top5_out_by_10_year_for_export = pd.merge(years_base, top5_out_by_10_year, left_on=["Every_10","Every_10_counter"], right_on=["Every_10","Every_10_counter"], how='left')
