In [None]:
!pip install pymongo

In [1]:
import pandas as pd
import numpy as np
import requests as req
import json
from pprint import pprint
from itertools import islice
#import pymongo

pd.options.display.max_rows = 400

In [2]:
full_df = pd.read_csv("All_COO_COD.csv", encoding='ISO-8859-1')
full_df.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,Year,Country / territory of asylum/residence,Origin,Population type,Value
0,1951,Australia,Various/Unknown,Refugees (incl. refugee-like situations),180000
1,1951,Austria,Various/Unknown,Refugees (incl. refugee-like situations),282000
2,1951,Belgium,Various/Unknown,Refugees (incl. refugee-like situations),55000
3,1951,Canada,Various/Unknown,Refugees (incl. refugee-like situations),168511
4,1951,Switzerland,Various/Unknown,Refugees (incl. refugee-like situations),10000


In [3]:
# Read in the full dataframe of origins and destinations
full_df = full_df.rename(columns={'Country / territory of asylum/residence':'Destination', 'Population type':'Type'})
full_df.head()

Unnamed: 0,Year,Destination,Origin,Type,Value
0,1951,Australia,Various/Unknown,Refugees (incl. refugee-like situations),180000
1,1951,Austria,Various/Unknown,Refugees (incl. refugee-like situations),282000
2,1951,Belgium,Various/Unknown,Refugees (incl. refugee-like situations),55000
3,1951,Canada,Various/Unknown,Refugees (incl. refugee-like situations),168511
4,1951,Switzerland,Various/Unknown,Refugees (incl. refugee-like situations),10000


# Retrieve Resource Watch API Metadata

### To get proper country codes

In [4]:
# Base URL for getting dataset metadata from RW API
# Metadata = Data that describes Data 
url = "https://api.resourcewatch.org/v1/dataset?sort=slug,-provider,userId&status=saved&includes=metadata,vocabulary,widget,layer"

# page[size] tells the API the maximum number of results to send back
# There are currently between 200 and 300 datasets on the RW API
payload = { "application":"rw", "page[size]": 1000}

# Request all datasets, and extract the data from the response
res = req.get(url, params=payload)
data = res.json()["data"]

#############################################################

### Convert the json object returned by the API into a pandas DataFrame
# Another option: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.io.json.json_normalize.html
datasets_on_api = {}
for ix, dset in enumerate(data):
    atts = dset["attributes"]
    metadata = atts["metadata"]
    layers = atts["layer"]
    widgets = atts["widget"]
    tags = atts["vocabulary"]
    datasets_on_api[atts["name"]] = {
        "rw_id":dset["id"],
        "table_name":atts["tableName"],
        "provider":atts["provider"],
        "date_updated":atts["updatedAt"],
        "num_metadata":len(metadata),
        "metadata": metadata,
        "num_layers":len(layers),
        "layers": layers,
        "num_widgets":len(widgets),
        "widgets": widgets,
        "num_tags":len(tags),
        "tags":tags
    }

# Create the DataFrame, name the index, and sort by date_updated
# More recently updated datasets at the top
current_datasets_on_api = pd.DataFrame.from_dict(datasets_on_api, orient='index')
current_datasets_on_api.index.rename("Dataset", inplace=True)
current_datasets_on_api.sort_values(by=["date_updated"], inplace=True, ascending = False)

## Retrieve GDP Data from RW API

In [5]:
query_base = "https://api.resourcewatch.org/v1/query/{}?sql={}"

sql = "SELECT * FROM {} WHERE year = 2015"
DT_FORMAT = '%Y-%m-%dT%H:%M:%SZ'

#[ds for ds in current_datasets_on_api.index if 'gross' in ds.lower()]

ds = [ds for ds in current_datasets_on_api.index if 'gross' in ds.lower()][0]
ds_id = current_datasets_on_api.loc[ds, 'rw_id']
table_name = current_datasets_on_api.loc[ds, 'table_name']

query_sql = sql.format(table_name)
query = query_base.format(ds_id, query_sql) 
res = req.get(query).json()

gdp = pd.DataFrame(res['data'])

In [6]:
alias = gdp[['rw_country_code', 'rw_country_name']]

#temp_mrg = full_df.merge(gdp, left_on='Destination', right_on='rw_country_name')
#temp_mrg.head()
#temp_mrg['rw_country_code'].value_counts()
full_df.head()
full_df.describe()
# full_df has 197,665 rows

Unnamed: 0,Year
count,197665.0
mean,2008.078942
std,7.734538
min,1951.0
25%,2004.0
50%,2010.0
75%,2014.0
max,2017.0


In [7]:
def match_countries(name, alias):
    if name in alias['rw_country_name'].values:
        code = alias.loc[alias['rw_country_name'].isin([name]), 'rw_country_code'].values[0]
        return code
    else:
        print(name)
        code = input("Country Code? ")
        if code:
            return code
        else:
            return None

In [8]:
# Merge on Country of Destination

uq_names = full_df['Destination'].unique()
uq_names = pd.DataFrame(uq_names)
uq_names = uq_names.rename(columns={0:'country_name'})
uq_names.head()
# You need to make a unique() list from full_df... then get all the codes there, then merge to full_df on country_name

uq_names['dcode'] = uq_names.apply(lambda row: match_countries(row['country_name'], alias), axis=1)
uq_names

China, Hong Kong SAR
Country Code? HKG
Dem. Rep. of the Congo
Country Code? COD
United Rep. of Tanzania
Country Code? TZA
Iran (Islamic Rep. of)
Country Code? IRN
Central African Rep.
Country Code? CAF
China, Macao SAR
Country Code? MAC
Serbia and Kosovo (S/RES/1244 (1999))
Country Code? SRB
CÌ«te d'Ivoire
Country Code? CIV
Congo
Country Code? COG
Venezuela (Bolivarian Republic of)
Country Code? VEN
Dominican Rep.
Country Code? DOM
Viet Nam
Country Code? VNM
Bolivia (Plurinational State of)
Country Code? BOL
Lao People's Dem. Rep.
Country Code? LAO
Rep. of Korea
Country Code? KOR
Syrian Arab Rep.
Country Code? SYR
Guinea-Bissau
Country Code? GNB
French Guiana
Country Code? GUF
Cabo Verde
Country Code? CPV
Czech Rep.
Country Code? CZE
Bahamas
Country Code? BHS
The former Yugoslav Republic of Macedonia
Country Code? MKD
Russian Federation
Country Code? RUS
Rep. of Moldova
Country Code? MDA
Micronesia (Federated States of)
Country Code? FSM
Cayman Islands
Country Code? CYM
British Virgin 

Unnamed: 0,country_name,dcode
0,Australia,AUS
1,Austria,AUT
2,Belgium,BEL
3,Canada,CAN
4,Switzerland,CHE
5,Germany,DEU
6,Denmark,DNK
7,Spain,ESP
8,France,FRA
9,United Kingdom,GBR


In [9]:
my_df = full_df.merge(uq_names, left_on="Destination", right_on="country_name")
del my_df['country_name']
my_df.describe()
#my_df has 197,665 rows

Unnamed: 0,Year
count,197665.0
mean,2008.078942
std,7.734538
min,1951.0
25%,2004.0
50%,2010.0
75%,2014.0
max,2017.0


In [11]:
# Now merge on country of origin

uq_names_org = my_df['Origin'].unique()
uq_names_org = pd.DataFrame(uq_names_org)
uq_names_org = uq_names_org.rename(columns={0: 'country_name'})
uq_names_org.head()

uq_names_org['ocode'] = uq_names_org.apply(lambda row: match_countries(row['country_name'], alias), axis=1)

Various/Unknown
Country Code? 
Dem. Rep. of the Congo
Country Code? COD
Czech Rep.
Country Code? CZE
Iran (Islamic Rep. of)
Country Code? IRN
Rep. of Korea
Country Code? KOR
Lao People's Dem. Rep.
Country Code? LAO
Rep. of Moldova
Country Code? MDA
The former Yugoslav Republic of Macedonia
Country Code? MKD
Dem. People's Rep. of Korea
Country Code? PRK
Palestinian
Country Code? PSE
Russian Federation
Country Code? RUS
Serbia and Kosovo (S/RES/1244 (1999))
Country Code? SRB
Syrian Arab Rep.
Country Code? SYR
United Rep. of Tanzania
Country Code? TZA
Viet Nam
Country Code? VNM
Stateless
Country Code? 
Congo
Country Code? COG
Bolivia (Plurinational State of)
Country Code? BOL
China, Hong Kong SAR
Country Code? HKG
CÌ«te d'Ivoire
Country Code? CIV
Dominican Rep.
Country Code? DOM
Guinea-Bissau
Country Code? GNB
Venezuela (Bolivarian Republic of)
Country Code? VEN
French Polynesia
Country Code? PYF
Tibetan
Country Code? TAR
Brunei Darussalam
Country Code? BRN
Central African Rep.
Country Co

In [12]:
complete_df = my_df.merge(uq_names_org, left_on="Origin", right_on="country_name", how="left")
del complete_df['country_name']
complete_df.to_csv('fully_prepped.csv',index=False)

# Country of Origin v Destination 2007-2017

In [15]:
# Separate the Refugees 
complete_df['Value'] = complete_df['Value'].replace('*', 0)
complete_df['Value'] = pd.to_numeric(complete_df['Value'])
fugees_df = complete_df.loc[complete_df['Type']!='Asylum-seekers']
fugees_df.reset_index(drop=True, inplace=True)
fugees_df.describe()
fugees_df.to_csv('fugees_df.csv')
# 109,632 rows

In [16]:
# Create a df that just has COO refugee totals 2007-2017
del fugees_df['Type']
fugees_2 = fugees_df.loc[fugees_df['Year'] >= 2007,:]
origins = fugees_2.groupby('Origin')
origins = origins['Value'].sum()
origins = pd.DataFrame(origins)
origins.to_csv("origins.csv")

# There are 221 rows in origins

In [17]:
# Create a df that has COO x COD refugee totals 2007-2017
ref_dyads = fugees_2.groupby(['Origin', 'Destination'])
ref_dyads = ref_dyads['Value'].sum()
ref_dyads = pd.DataFrame(ref_dyads)
ref_dyads.head()
ref_dyads.to_csv('ref_dyads.csv')

# There should be 9,279 rows here!

In [19]:
# Merge uq_names AND uq_names_org with ref_dyads 
ref_dyads.reset_index(drop=False, inplace=True)
ref_dyads_2 = ref_dyads.merge(uq_names, left_on="Destination", right_on="country_name", how="left")
del ref_dyads_2['country_name']

ref_dyads_3 = ref_dyads_2.merge(uq_names_org, left_on="Origin", right_on="country_name", how="left")
del ref_dyads_3['country_name']
ref_dyads_3.head()

refugees_4 = ref_dyads_3.merge(origins, left_on="Origin", right_on="Origin", how="left")
refugees_4 = refugees_4.rename(columns={'Value_x': 'COO_COD', 'Value_y': 'COO_total'})
refugees_4.to_csv('refugees_4.csv')

# Execute a double_sort on Value and country of origin
refugees_4['Pct'] = refugees_4['COO_COD'] / refugees_4['COO_total'] 

refugees_4 = refugees_4.sort_values(['Origin', 'Pct'], ascending=[True, False])
refugees_4.reset_index(drop=True, inplace=True)
refugees_4.to_csv('refugees_4.csv')
refugees_4['Pct'] = round(refugees_4['Pct'], 3)
refugees_4.head()

Unnamed: 0,index,Origin,Destination,COO_COD,dcode,ocode,COO_total,Pct
0,77,Afghanistan,Pakistan,18220178,PAK,AFG,30027956,0.607
1,46,Afghanistan,Iran (Islamic Rep. of),10173716,IRN,AFG,30027956,0.339
2,39,Afghanistan,Germany,404959,DEU,AFG,30027956,0.013
3,109,Afghanistan,United Kingdom,165161,GBR,AFG,30027956,0.006
4,7,Afghanistan,Austria,133448,AUT,AFG,30027956,0.004


In [20]:
del refugees_4['index']
refugees_4.head()

Unnamed: 0,Origin,Destination,COO_COD,dcode,ocode,COO_total,Pct
0,Afghanistan,Pakistan,18220178,PAK,AFG,30027956,0.607
1,Afghanistan,Iran (Islamic Rep. of),10173716,IRN,AFG,30027956,0.339
2,Afghanistan,Germany,404959,DEU,AFG,30027956,0.013
3,Afghanistan,United Kingdom,165161,GBR,AFG,30027956,0.006
4,Afghanistan,Austria,133448,AUT,AFG,30027956,0.004


In [21]:
# Take the dataframe 'refugees_4' and jsonify
temp_df = pd.DataFrame()
coo_cod_array = {}

for country in refugees_4['Origin'].unique():
    temp_df = refugees_4.loc[refugees_4['Origin']==country,:]
    temp_df = temp_df.loc[temp_df['Pct'] >= 0.01,:]
    temp_array = {}
    for x in range(len(temp_df)):
        row = temp_df.iloc[x,:]
        temp_array[row[3]] = {row[1]: {'Number': row[2], 'Percentage': row[6]}}
        ocode = row[4]
    coo_cod_array[country] = {ocode: temp_array}
    
pprint(coo_cod_array)

{'Afghanistan': {'AFG': {'DEU': {'Germany': {'Number': 404959,
                                             'Percentage': 0.013}},
                         'IRN': {'Iran (Islamic Rep. of)': {'Number': 10173716,
                                                            'Percentage': 0.339}},
                         'PAK': {'Pakistan': {'Number': 18220178,
                                              'Percentage': 0.607}}}},
 'Albania': {'ALB': {'AUT': {'Austria': {'Number': 1554, 'Percentage': 0.011}},
                     'BEL': {'Belgium': {'Number': 3683, 'Percentage': 0.026}},
                     'CAN': {'Canada': {'Number': 19370, 'Percentage': 0.138}},
                     'DEU': {'Germany': {'Number': 13896, 'Percentage': 0.099}},
                     'FRA': {'France': {'Number': 24429, 'Percentage': 0.173}},
                     'GBR': {'United Kingdom': {'Number': 17471,
                                                'Percentage': 0.124}},
                     'IRL': {'Ir

 'Ecuador': {'ECU': {'ARG': {'Argentina': {'Number': 136, 'Percentage': 0.013}},
                     'AUS': {'Australia': {'Number': 293, 'Percentage': 0.027}},
                     'BRA': {'Brazil': {'Number': 403, 'Percentage': 0.037}},
                     'CAN': {'Canada': {'Number': 2172, 'Percentage': 0.201}},
                     'CHL': {'Chile': {'Number': 168, 'Percentage': 0.016}},
                     'DEU': {'Germany': {'Number': 915, 'Percentage': 0.084}},
                     'FRA': {'France': {'Number': 302, 'Percentage': 0.028}},
                     'GBR': {'United Kingdom': {'Number': 1600,
                                                'Percentage': 0.148}},
                     'ITA': {'Italy': {'Number': 131, 'Percentage': 0.012}},
                     'SWE': {'Sweden': {'Number': 418, 'Percentage': 0.039}},
                     'USA': {'United States of America': {'Number': 3723,
                                                          'Percentage': 0.344}}}},


                   'SWE': {'Sweden': {'Number': 3449, 'Percentage': 0.067}},
                   'TCD': {'Chad': {'Number': 1009, 'Percentage': 0.019}},
                   'TUN': {'Tunisia': {'Number': 1052, 'Percentage': 0.02}},
                   'USA': {'United States of America': {'Number': 1534,
                                                        'Percentage': 0.03}}}},
 'Liechtenstein': {'LBY': {}},
 'Lithuania': {'LTU': {'BEL': {'Belgium': {'Number': 137, 'Percentage': 0.037}},
                       'CAN': {'Canada': {'Number': 974, 'Percentage': 0.265}},
                       'DEU': {'Germany': {'Number': 1302,
                                           'Percentage': 0.354}},
                       'FRA': {'France': {'Number': 122, 'Percentage': 0.033}},
                       'GBR': {'United Kingdom': {'Number': 43,
                                                  'Percentage': 0.012}},
                       'IRL': {'Ireland': {'Number': 168, 'Percentage': 0.046}},
    

 'Sierra Leone': {'SLE': {'AGO': {'Angola': {'Number': 3329,
                                             'Percentage': 0.026}},
                          'BRA': {'Brazil': {'Number': 1515,
                                             'Percentage': 0.012}},
                          'CAN': {'Canada': {'Number': 2031,
                                             'Percentage': 0.016}},
                          'DEU': {'Germany': {'Number': 5689,
                                              'Percentage': 0.044}},
                          'FRA': {'France': {'Number': 4775,
                                             'Percentage': 0.037}},
                          'GBR': {'United Kingdom': {'Number': 17841,
                                                     'Percentage': 0.137}},
                          'GIN': {'Guinea': {'Number': 14523,
                                             'Percentage': 0.112}},
                          'GMB': {'Gambia': {'Number': 15736,
               

                    'NLD': {'Netherlands': {'Number': 39, 'Percentage': 0.014}},
                    'SWE': {'Sweden': {'Number': 52, 'Percentage': 0.019}},
                    'USA': {'United States of America': {'Number': 913,
                                                         'Percentage': 0.336}},
                    'ZAF': {'South Africa': {'Number': 415,
                                             'Percentage': 0.153}}}},
 'Zimbabwe': {'ZWE': {'AUS': {'Australia': {'Number': 8649,
                                            'Percentage': 0.039}},
                      'BWA': {'Botswana': {'Number': 8406,
                                           'Percentage': 0.038}},
                      'CAN': {'Canada': {'Number': 34332, 'Percentage': 0.153}},
                      'GBR': {'United Kingdom': {'Number': 120397,
                                                 'Percentage': 0.537}},
                      'IRL': {'Ireland': {'Number': 3263, 'Percentage': 0.015}},
        

# Country of Origin vs Destination Asylum Seekers 2007-2017

In [22]:
#Separate the seekers
seekers_df = complete_df.loc[complete_df['Type']=='Asylum-seekers',:]
seekers_df.reset_index(drop=True, inplace=True)
seekers_df.describe()
#88,033 rows

Unnamed: 0,Year,Value
count,88033.0,88033.0
mean,2010.306419,268.591028
std,4.790444,4545.877383
min,2000.0,-1.0
25%,2007.0,1.0
50%,2011.0,3.0
75%,2014.0,23.0
max,2017.0,940668.0


In [23]:
# Create a df that just has COO asylum seeker totals 2007-2017
del seekers_df['Type']
seekers_2 = seekers_df.loc[seekers_df['Year'] >= 2007,:]
origins_as = seekers_2.groupby('Origin')
origins_as = origins_as['Value'].sum()
origins_as = pd.DataFrame(origins_as)
origins_as.to_csv("origins_as.csv")

# There are 222 rows in origins

In [24]:
# Create a df that has COO x COD asylum seeker totals 2007-2017
ref_dyads_as = seekers_2.groupby(['Origin', 'Destination'])
ref_dyads_as = ref_dyads_as['Value'].sum()
ref_dyads_as = pd.DataFrame(ref_dyads_as)
ref_dyads_as.head()
ref_dyads_as.to_csv('ref_dyads_as.csv')

# There should be 10,384 rows here!

In [25]:
# Merge uq_names AND uq_names_org with ref_dyads_as 
ref_dyads_as.reset_index(drop=False, inplace=True)
ref_dyads_as_2 = ref_dyads_as.merge(uq_names, left_on="Destination", right_on="country_name", how="left")
del ref_dyads_as_2['country_name']

ref_dyads_as_3 = ref_dyads_as_2.merge(uq_names_org, left_on="Origin", right_on="country_name", how="left")
ref_dyads_as_3
del ref_dyads_as_3['country_name']
#ref_dyads_as_3.head()

seekers_4 = ref_dyads_as_3.merge(origins_as, left_on="Origin", right_on="Origin", how="left")
seekers_4 = seekers_4.rename(columns={'Value_x': 'COO_COD', 'Value_y': 'COO_total'})
seekers_4.to_csv('seekers_4.csv')

# Execute a double_sort on Value and country of origin
seekers_4['Pct'] = seekers_4['COO_COD'] / seekers_4['COO_total'] 

seekers_4 = seekers_4.sort_values(['Origin', 'Pct'], ascending=[True, False])
seekers_4.reset_index(drop=True, inplace=True)
seekers_4.to_csv('seekers_4.csv')
seekers_4['Pct'] = round(seekers_4['Pct'], 3)


In [26]:
del seekers_4['level_0']
del seekers_4['index']
seekers_4.head()

KeyError: 'level_0'

In [28]:
# Take the dataframe 'seekers_4' and jsonify
temp_df = pd.DataFrame()
coo_cod_as_array = {}

for country in seekers_4['Origin'].unique():
    temp_df = seekers_4.loc[seekers_4['Origin']==country,:]
    temp_df = temp_df.loc[temp_df['Pct'] >= 0.01,:]
    temp_array = {}
    for x in range(len(temp_df)):
        row = temp_df.iloc[x,:]
        temp_array[row[3]] = {row[1]: {'Number': row[2], 'Percentage': row[6]}}
        ocode = row[4]
    coo_cod_as_array[country] = {ocode: temp_array}
    
pprint(coo_cod_as_array)

{'Afghanistan': {'AFG': {'AUT': {'Austria': {'Number': 111338,
                                             'Percentage': 0.083}},
                         'BEL': {'Belgium': {'Number': 17036,
                                             'Percentage': 0.013}},
                         'BGR': {'Bulgaria': {'Number': 16740,
                                              'Percentage': 0.013}},
                         'CHE': {'Switzerland': {'Number': 29923,
                                                 'Percentage': 0.022}},
                         'DEU': {'Germany': {'Number': 331927,
                                             'Percentage': 0.248}},
                         'GBR': {'United Kingdom': {'Number': 13664,
                                                    'Percentage': 0.01}},
                         'GRC': {'Greece': {'Number': 23028,
                                            'Percentage': 0.017}},
                         'HUN': {'Hungary': {'Number': 13747,
     

                                               'Percentage': 0.013}},
                            'ISR': {'Israel': {'Number': 3756,
                                               'Percentage': 0.027}},
                            'ITA': {'Italy': {'Number': 20936,
                                              'Percentage': 0.152}},
                            'MAR': {'Morocco': {'Number': 2401,
                                                'Percentage': 0.017}},
                            'MLI': {'Mali': {'Number': 7364,
                                             'Percentage': 0.053}},
                            'MRT': {'Mauritania': {'Number': 1688,
                                                   'Percentage': 0.012}},
                            'SEN': {'Senegal': {'Number': 3037,
                                                'Percentage': 0.022}},
                            'TGO': {'Togo': {'Number': 2156,
                                             'Percentage': 0.016

                        'CZE': {'Czech Rep.': {'Number': 695,
                                               'Percentage': 0.05}},
                        'DEU': {'Germany': {'Number': 1193,
                                            'Percentage': 0.086}},
                        'FRA': {'France': {'Number': 474, 'Percentage': 0.034}},
                        'KOR': {'Rep. of Korea': {'Number': 1592,
                                                  'Percentage': 0.115}},
                        'NOR': {'Norway': {'Number': 134, 'Percentage': 0.01}},
                        'POL': {'Poland': {'Number': 197, 'Percentage': 0.014}},
                        'SWE': {'Sweden': {'Number': 2318,
                                           'Percentage': 0.167}},
                        'TUR': {'Turkey': {'Number': 274, 'Percentage': 0.02}},
                        'USA': {'United States of America': {'Number': 4279,
                                                             'Percentage': 0.30

                                            'Percentage': 0.031}},
                         'HKG': {'China, Hong Kong SAR': {'Number': 883,
                                                          'Percentage': 0.046}},
                         'ISR': {'Israel': {'Number': 349,
                                            'Percentage': 0.018}},
                         'ITA': {'Italy': {'Number': 303, 'Percentage': 0.016}},
                         'JPN': {'Japan': {'Number': 6954,
                                           'Percentage': 0.366}},
                         'KOR': {'Rep. of Korea': {'Number': 653,
                                                   'Percentage': 0.034}},
                         'USA': {'United States of America': {'Number': 4900,
                                                              'Percentage': 0.258}}}},
 'Poland': {'POL': {'CAN': {'Canada': {'Number': 1372, 'Percentage': 0.362}},
                    'DEU': {'Germany': {'Number': 105, 'Percenta

                  'CAN': {'Canada': {'Number': 311, 'Percentage': 0.014}},
                  'CHE': {'Switzerland': {'Number': 820, 'Percentage': 0.037}},
                  'CIV': {"CÌ«te d'Ivoire": {'Number': 402,
                                             'Percentage': 0.018}},
                  'DEU': {'Germany': {'Number': 4097, 'Percentage': 0.184}},
                  'FRA': {'France': {'Number': 949, 'Percentage': 0.043}},
                  'GHA': {'Ghana': {'Number': 5125, 'Percentage': 0.23}},
                  'IRL': {'Ireland': {'Number': 288, 'Percentage': 0.013}},
                  'ITA': {'Italy': {'Number': 2037, 'Percentage': 0.091}},
                  'MLI': {'Mali': {'Number': 252, 'Percentage': 0.011}},
                  'SEN': {'Senegal': {'Number': 799, 'Percentage': 0.036}},
                  'USA': {'United States of America': {'Number': 1836,
                                                       'Percentage': 0.082}},
                  'ZAF': {'South Africa': 

# Countries of Origin over Time

In [29]:
# Manipulate the Country of Origin over time

#complete_df['Value'] = complete_df['Value'].replace('*', 0)
#complete_df['Value'] = pd.to_numeric(complete_df['Value'])
country_over_time = complete_df.groupby(["Origin","Year","Type"])
country_over_time = country_over_time['Value'].sum()
country_over_time = pd.DataFrame(country_over_time)
country_over_time.head(50)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Value
Origin,Year,Type,Unnamed: 3_level_1
Afghanistan,1979,Refugees (incl. refugee-like situations),500000
Afghanistan,1980,Refugees (incl. refugee-like situations),1734921
Afghanistan,1981,Refugees (incl. refugee-like situations),3879984
Afghanistan,1982,Refugees (incl. refugee-like situations),4488214
Afghanistan,1983,Refugees (incl. refugee-like situations),4712735
Afghanistan,1984,Refugees (incl. refugee-like situations),4417651
Afghanistan,1985,Refugees (incl. refugee-like situations),4653193
Afghanistan,1986,Refugees (incl. refugee-like situations),5094283
Afghanistan,1987,Refugees (incl. refugee-like situations),5511740
Afghanistan,1988,Refugees (incl. refugee-like situations),5622982


In [30]:
# Countries over time (to be compared with conflict data)
cot=country_over_time.reset_index(drop=False)
cot = cot.merge(uq_names_org, left_on="Origin", right_on="country_name", how="left")
del cot['country_name']
## It gets really sparse before 1964
cot.to_csv("countries_over_time.csv", index=False)

In [None]:
#cot.head(50)
cot['Year'].value_counts()

In [31]:
temp_df = pd.DataFrame()
cot_array = {}

for country in cot['Origin'].unique():
    temp_df = cot.loc[cot['Origin']==country,:]
    temp_array = {}
    counter = iter(range(len(temp_df)))
    for x in counter:
        row = temp_df.iloc[x,:]
        if (x == len(temp_df)-1):
            temp_array[row[1]] = {row[2]: row[3]}
        else:
            row2 = temp_df.iloc[(x+1),:]
            if row[1] == row2[1]:
                temp_array[row[1]] = {row[2]: row[3], row2[2]: row2[3]}
                next(islice(counter,1,1), None)
            else:
                temp_array[row[1]] = {row[2]: row[3]}
        ccode = row[4]
    cot_array[country] = {ccode: temp_array}
    
pprint(cot_array)

# This is now in a json array... that I might host somewhere? 
#(mlab or mongoDB?)

{'Afghanistan': {'AFG': {1979: {'Refugees (incl. refugee-like situations)': 500000},
                         1980: {'Refugees (incl. refugee-like situations)': 1734921},
                         1981: {'Refugees (incl. refugee-like situations)': 3879984},
                         1982: {'Refugees (incl. refugee-like situations)': 4488214},
                         1983: {'Refugees (incl. refugee-like situations)': 4712735},
                         1984: {'Refugees (incl. refugee-like situations)': 4417651},
                         1985: {'Refugees (incl. refugee-like situations)': 4653193},
                         1986: {'Refugees (incl. refugee-like situations)': 5094283},
                         1987: {'Refugees (incl. refugee-like situations)': 5511740},
                         1988: {'Refugees (incl. refugee-like situations)': 5622982},
                         1989: {'Refugees (incl. refugee-like situations)': 5643989},
                         1990: {'Refugees (incl. refuge

                     2003: {'Asylum-seekers': 2098,
                            'Refugees (incl. refugee-like situations)': 7815},
                     2004: {'Asylum-seekers': 1410,
                            'Refugees (incl. refugee-like situations)': 8231},
                     2005: {'Asylum-seekers': 1431,
                            'Refugees (incl. refugee-like situations)': 8857},
                     2006: {'Asylum-seekers': 1709,
                            'Refugees (incl. refugee-like situations)': 9371},
                     2007: {'Asylum-seekers': 1207,
                            'Refugees (incl. refugee-like situations)': 4970},
                     2008: {'Asylum-seekers': 1079,
                            'Refugees (incl. refugee-like situations)': 5384},
                     2009: {'Asylum-seekers': 981,
                            'Refugees (incl. refugee-like situations)': 5525},
                     2010: {'Asylum-seekers': 739,
                            'Refu

                        2010: {'Asylum-seekers': 12,
                               'Refugees (incl. refugee-like situations)': 25},
                        2011: {'Asylum-seekers': 9,
                               'Refugees (incl. refugee-like situations)': 27},
                        2012: {'Asylum-seekers': 13,
                               'Refugees (incl. refugee-like situations)': 25},
                        2013: {'Asylum-seekers': 22,
                               'Refugees (incl. refugee-like situations)': 28},
                        2014: {'Asylum-seekers': 42,
                               'Refugees (incl. refugee-like situations)': 27},
                        2015: {'Asylum-seekers': 102,
                               'Refugees (incl. refugee-like situations)': 28},
                        2016: {'Asylum-seekers': 103,
                               'Refugees (incl. refugee-like situations)': 13},
                        2017: {'Asylum-seekers': 175,
              

                               'Refugees (incl. refugee-like situations)': 344},
                        2010: {'Asylum-seekers': 76,
                               'Refugees (incl. refugee-like situations)': 352},
                        2011: {'Asylum-seekers': 40,
                               'Refugees (incl. refugee-like situations)': 331},
                        2012: {'Asylum-seekers': 27,
                               'Refugees (incl. refugee-like situations)': 325},
                        2013: {'Asylum-seekers': 461,
                               'Refugees (incl. refugee-like situations)': 463},
                        2014: {'Asylum-seekers': 127,
                               'Refugees (incl. refugee-like situations)': 417},
                        2015: {'Asylum-seekers': 218,
                               'Refugees (incl. refugee-like situations)': 379},
                        2016: {'Asylum-seekers': 308,
                               'Refugees (incl. refugee-li

                               2000: {'Asylum-seekers': 42,
                                      'Refugees (incl. refugee-like situations)': 509},
                               2001: {'Asylum-seekers': 42,
                                      'Refugees (incl. refugee-like situations)': 529},
                               2002: {'Asylum-seekers': 67,
                                      'Refugees (incl. refugee-like situations)': 552},
                               2003: {'Asylum-seekers': 61,
                                      'Refugees (incl. refugee-like situations)': 591},
                               2004: {'Asylum-seekers': 59,
                                      'Refugees (incl. refugee-like situations)': 546},
                               2005: {'Asylum-seekers': 59,
                                      'Refugees (incl. refugee-like situations)': 477},
                               2006: {'Asylum-seekers': 32,
                                      'Refugees (inc

                           1980: {'Refugees (incl. refugee-like situations)': 5002},
                           1981: {'Refugees (incl. refugee-like situations)': 4002},
                           1982: {'Refugees (incl. refugee-like situations)': 5002},
                           1983: {'Refugees (incl. refugee-like situations)': 5002},
                           1984: {'Refugees (incl. refugee-like situations)': 5012},
                           1985: {'Refugees (incl. refugee-like situations)': 5352},
                           1986: {'Refugees (incl. refugee-like situations)': 5354},
                           1987: {'Refugees (incl. refugee-like situations)': 5002},
                           1988: {'Refugees (incl. refugee-like situations)': 5001},
                           1989: {'Refugees (incl. refugee-like situations)': 5004},
                           1990: {'Refugees (incl. refugee-like situations)': 5003},
                           1991: {'Refugees (incl. refugee-like s

                        2016: {'Asylum-seekers': 2237,
                               'Refugees (incl. refugee-like situations)': 2364},
                        2017: {'Asylum-seekers': 3505,
                               'Refugees (incl. refugee-like situations)': 2386}}},
 'Kenya': {'KEN': {1975: {'Refugees (incl. refugee-like situations)': 90},
                   1977: {'Refugees (incl. refugee-like situations)': 1200},
                   1978: {'Refugees (incl. refugee-like situations)': 80},
                   1979: {'Refugees (incl. refugee-like situations)': 80},
                   1988: {'Refugees (incl. refugee-like situations)': 2},
                   1989: {'Refugees (incl. refugee-like situations)': 2},
                   1990: {'Refugees (incl. refugee-like situations)': 10},
                   1991: {'Refugees (incl. refugee-like situations)': 33},
                   1992: {'Refugees (incl. refugee-like situations)': 63},
                   1993: {'Refugees (incl. refuge

                      2013: {'Asylum-seekers': 283,
                             'Refugees (incl. refugee-like situations)': 485},
                      2014: {'Asylum-seekers': 957,
                             'Refugees (incl. refugee-like situations)': 468},
                      2015: {'Asylum-seekers': 2833,
                             'Refugees (incl. refugee-like situations)': 439},
                      2016: {'Asylum-seekers': 6165,
                             'Refugees (incl. refugee-like situations)': 456},
                      2017: {'Asylum-seekers': 9453,
                             'Refugees (incl. refugee-like situations)': 530}}},
 'Maldives': {'MDV': {1988: {'Refugees (incl. refugee-like situations)': 1},
                      1989: {'Refugees (incl. refugee-like situations)': 1},
                      1990: {'Refugees (incl. refugee-like situations)': 1},
                      1991: {'Refugees (incl. refugee-like situations)': 1},
                      1996: {'Re

                              'Refugees (incl. refugee-like situations)': 1468},
                       2012: {'Asylum-seekers': 123,
                              'Refugees (incl. refugee-like situations)': 1531},
                       2013: {'Asylum-seekers': 515,
                              'Refugees (incl. refugee-like situations)': 1538},
                       2014: {'Asylum-seekers': 727,
                              'Refugees (incl. refugee-like situations)': 1587},
                       2015: {'Asylum-seekers': 1248,
                              'Refugees (incl. refugee-like situations)': 1476},
                       2016: {'Asylum-seekers': 2126,
                              'Refugees (incl. refugee-like situations)': 1419},
                       2017: {'Asylum-seekers': 2719,
                              'Refugees (incl. refugee-like situations)': 1467}}},
 'Niger': {'NER': {1991: {'Refugees (incl. refugee-like situations)': 2},
                   1992: {'Refugees 

                           2007: {'Asylum-seekers': 389,
                                  'Refugees (incl. refugee-like situations)': 1188},
                           2008: {'Asylum-seekers': 511,
                                  'Refugees (incl. refugee-like situations)': 1104},
                           2009: {'Asylum-seekers': 319,
                                  'Refugees (incl. refugee-like situations)': 573},
                           2010: {'Asylum-seekers': 240,
                                  'Refugees (incl. refugee-like situations)': 585},
                           2011: {'Asylum-seekers': 184,
                                  'Refugees (incl. refugee-like situations)': 514},
                           2012: {'Asylum-seekers': 190,
                                  'Refugees (incl. refugee-like situations)': 558},
                           2013: {'Asylum-seekers': 216,
                                  'Refugees (incl. refugee-like situations)': 500},
           

                          'Refugees (incl. refugee-like situations)': 2},
                   2008: {'Asylum-seekers': 0,
                          'Refugees (incl. refugee-like situations)': 4},
                   2009: {'Asylum-seekers': 1},
                   2010: {'Asylum-seekers': 2,
                          'Refugees (incl. refugee-like situations)': 1},
                   2011: {'Asylum-seekers': 0,
                          'Refugees (incl. refugee-like situations)': 1},
                   2012: {'Asylum-seekers': 12,
                          'Refugees (incl. refugee-like situations)': 1},
                   2013: {'Asylum-seekers': 9,
                          'Refugees (incl. refugee-like situations)': 1},
                   2014: {'Asylum-seekers': 10,
                          'Refugees (incl. refugee-like situations)': 1},
                   2015: {'Asylum-seekers': 13,
                          'Refugees (incl. refugee-like situations)': 1},
                   2016: {'A

                             1997: {'Refugees (incl. refugee-like situations)': 34},
                             1998: {'Refugees (incl. refugee-like situations)': 34},
                             1999: {'Refugees (incl. refugee-like situations)': 34},
                             2000: {'Asylum-seekers': 23,
                                    'Refugees (incl. refugee-like situations)': 34},
                             2001: {'Asylum-seekers': 13,
                                    'Refugees (incl. refugee-like situations)': 42},
                             2002: {'Asylum-seekers': 4,
                                    'Refugees (incl. refugee-like situations)': 54},
                             2003: {'Asylum-seekers': 11,
                                    'Refugees (incl. refugee-like situations)': 60},
                             2004: {'Asylum-seekers': 10,
                                    'Refugees (incl. refugee-like situations)': 61},
                             20

                              'Refugees (incl. refugee-like situations)': 221}}},
 'Sweden': {'SWE': {1994: {'Refugees (incl. refugee-like situations)': 11},
                    1995: {'Refugees (incl. refugee-like situations)': 25},
                    1996: {'Refugees (incl. refugee-like situations)': 38},
                    1997: {'Refugees (incl. refugee-like situations)': 40},
                    1998: {'Refugees (incl. refugee-like situations)': 41},
                    1999: {'Refugees (incl. refugee-like situations)': 31},
                    2000: {'Asylum-seekers': 5,
                           'Refugees (incl. refugee-like situations)': 18},
                    2001: {'Asylum-seekers': 9,
                           'Refugees (incl. refugee-like situations)': 5},
                    2002: {'Asylum-seekers': 16,
                           'Refugees (incl. refugee-like situations)': 28},
                    2003: {'Asylum-seekers': 24,
                           'Refugees (inc

                                        'Refugees (incl. refugee-like situations)': 63},
                                 2006: {'Asylum-seekers': 158,
                                        'Refugees (incl. refugee-like situations)': 153},
                                 2007: {'Asylum-seekers': 175,
                                        'Refugees (incl. refugee-like situations)': 211},
                                 2008: {'Asylum-seekers': 239,
                                        'Refugees (incl. refugee-like situations)': 231},
                                 2009: {'Asylum-seekers': 272,
                                        'Refugees (incl. refugee-like situations)': 240},
                                 2010: {'Asylum-seekers': 146,
                                        'Refugees (incl. refugee-like situations)': 255},
                                 2011: {'Asylum-seekers': 105,
                                        'Refugees (incl. refugee-like situations)':

                                      2000: {'Asylum-seekers': 92,
                                             'Refugees (incl. refugee-like situations)': 176},
                                      2001: {'Asylum-seekers': 111,
                                             'Refugees (incl. refugee-like situations)': 219},
                                      2002: {'Asylum-seekers': 235,
                                             'Refugees (incl. refugee-like situations)': 383},
                                      2003: {'Asylum-seekers': 352,
                                             'Refugees (incl. refugee-like situations)': 395},
                                      2004: {'Asylum-seekers': 273,
                                             'Refugees (incl. refugee-like situations)': 451},
                                      2005: {'Asylum-seekers': 263,
                                             'Refugees (incl. refugee-like situations)': 683},
                       

                            2007: {'Asylum-seekers': 29,
                                   'Refugees (incl. refugee-like situations)': 116594},
                            2008: {'Asylum-seekers': 31,
                                   'Refugees (incl. refugee-like situations)': 116530},
                            2009: {'Asylum-seekers': 21,
                                   'Refugees (incl. refugee-like situations)': 116474},
                            2010: {'Asylum-seekers': 34,
                                   'Refugees (incl. refugee-like situations)': 116415},
                            2011: {'Asylum-seekers': 14,
                                   'Refugees (incl. refugee-like situations)': 116413},
                            2012: {'Asylum-seekers': 126,
                                   'Refugees (incl. refugee-like situations)': 116452},
                            2013: {'Asylum-seekers': 411,
                                   'Refugees (incl. refugee-like situat

In [32]:
refugees_4.head()

Unnamed: 0,Origin,Destination,COO_COD,dcode,ocode,COO_total,Pct
0,Afghanistan,Pakistan,18220178,PAK,AFG,30027956,0.607
1,Afghanistan,Iran (Islamic Rep. of),10173716,IRN,AFG,30027956,0.339
2,Afghanistan,Germany,404959,DEU,AFG,30027956,0.013
3,Afghanistan,United Kingdom,165161,GBR,AFG,30027956,0.006
4,Afghanistan,Austria,133448,AUT,AFG,30027956,0.004


In [37]:
# Country Codes for Kaori
cc_articles = refugees_4[["Origin", "ocode"]]
cc_articles = cc_articles.drop_duplicates()
cc_articles.reset_index(drop=True, inplace=True)
cc_articles.head()

Unnamed: 0,Origin,ocode
0,Afghanistan,AFG
1,Albania,ALB
2,Algeria,DZA
3,American Samoa,ASM
4,Andorra,AND


In [38]:
cc_articles = cc_articles.rename(columns={'Origin':'Country Name', 'ocode':'Country Code'})
cc_articles.head()

Unnamed: 0,Country Name,Country Code
0,Afghanistan,AFG
1,Albania,ALB
2,Algeria,DZA
3,American Samoa,ASM
4,Andorra,AND


In [41]:
cc_dict = {}
for x in range(len(cc_articles)):
    row = cc_articles.iloc[x,:]
    cc_dict[row[1]] = row[0]
pprint(cc_dict)

{None: 'Various/Unknown',
 'ABW': 'Aruba',
 'AFG': 'Afghanistan',
 'AGO': 'Angola',
 'AIA': 'Anguilla',
 'ALB': 'Albania',
 'AND': 'Andorra',
 'ARE': 'United Arab Emirates',
 'ARG': 'Argentina',
 'ARM': 'Armenia',
 'ASM': 'American Samoa',
 'ATG': 'Antigua and Barbuda',
 'AUS': 'Australia',
 'AUT': 'Austria',
 'AZE': 'Azerbaijan',
 'BDI': 'Burundi',
 'BEL': 'Belgium',
 'BEN': 'Benin',
 'BFA': 'Burkina Faso',
 'BGD': 'Bangladesh',
 'BGR': 'Bulgaria',
 'BHR': 'Bahrain',
 'BHS': 'Bahamas',
 'BIH': 'Bosnia and Herzegovina',
 'BLR': 'Belarus',
 'BLZ': 'Belize',
 'BMU': 'Bermuda',
 'BOL': 'Bolivia (Plurinational State of)',
 'BRA': 'Brazil',
 'BRB': 'Barbados',
 'BRN': 'Brunei Darussalam',
 'BTN': 'Bhutan',
 'BWA': 'Botswana',
 'CAF': 'Central African Rep.',
 'CAN': 'Canada',
 'CHE': 'Switzerland',
 'CHL': 'Chile',
 'CHN': 'China',
 'CIV': "CÌ«te d'Ivoire",
 'CMR': 'Cameroon',
 'COD': 'Dem. Rep. of the Congo',
 'COG': 'Congo',
 'COK': 'Cook Islands',
 'COL': 'Colombia',
 'COM': 'Comoros',
 '