In [1]:
# Import and intizialize
import pandas as pd
import json
from math import isnan


In [2]:
# Read datasets
migration_flow = pd.read_csv('migration_flows_bilateral_2015_clean.csv')
iso_codes = pd.read_csv('all.csv')

In [3]:
# Copy for splitting csv into two without changing the original variable
immigrants = migration_flow.copy()
emigrants = migration_flow.copy()

# Immigrants
immigrants = immigrants[immigrants.columns.drop(list(immigrants.filter(regex='EMIGRANTS')))]
immigrants = immigrants.iloc[:,:-8]

# Emigrants
emigrants = emigrants[emigrants.columns.drop(list(emigrants.filter(regex='IMMIGRANTS')))]
emigrants = emigrants.iloc[:,:-8] # drop last 8 columns since they are micro nations
emigrants.columns = emigrants.columns.str.replace("EMIGRANTS_FROM_","")

In [4]:
# Create a dictionary to rename column names
countries = emigrants.columns[2:] 

# Create a dictionary with country names as key and index as value
country_dict = {key: json.dumps(idx) for idx, key in enumerate(countries)}

# Replace column headers with numbers instead of country names
emigrants.rename(columns=country_dict, inplace=True) 

# convert dataframe to dictionary
flow = emigrants.iloc[:,2:].to_dict(orient='index')

# remove all nan values so dictionary is clean 
for i in flow: 
    for j in list(flow[i]):
        if isnan(flow[i][j]):
            del flow[i][j]
flower = json.dumps(flow)

In [None]:
# Add the flow dictionary values to column in dataframe where the key is row number
emigrants_15 = emigrants.iloc[:,1:2]
emigrants_15['flows'] = pd.Series(flow)
emigrants_15['short'] = emigrants_15['COUNTRY'].str[:4]

In [6]:
# Rearrange iso dataset for join with migration dataset
iso_codes['name'] = iso_codes['name'].str.upper()
iso = iso_codes.iloc[:,:2]
iso = iso.rename(columns = {'name':'COUNTRY', 'alpha-2':'ISO Code'})
iso['short'] = iso['COUNTRY'].str[:4]
iso = iso.iloc[:,1:3]

# Actual join via merge()
iso_emigrants = pd.merge(emigrants_15, iso, how="left", on=['short'])

In [7]:
# Manual data cleaning -> future check if values are correctly assigned!

# Cleaning ISO values
iso_emigrants.xs(39)['ISO Code'] = 'CV'
iso_emigrants.xs(40)['ISO Code'] = 'MQ' # As of now Martinique 
iso_emigrants.xs(44)['ISO Code'] = 'JE' # As of now Jersey
iso_emigrants.xs(53)['ISO Code'] = 'CI'
iso_emigrants.xs(59)['ISO Code'] = 'CD'
iso_emigrants.xs(74)['ISO Code'] = 'FO'
iso_emigrants.xs(125)['ISO Code'] = 'LA'
iso_emigrants.xs(162)['ISO Code'] = 'NA'
iso_emigrants.xs(193)['ISO Code'] = 'PF' # As of now French Polynesia
iso_emigrants.xs(197)['ISO Code'] = 'RE'
iso_emigrants.xs(323)['ISO Code'] = 'VA'

# We do not have values for North/South Korea



In [8]:
# Create a csv file to join in arcGis
iso_emigrants.to_csv('emi_15_flow.csv', index=False, encoding='utf-8')

In [9]:
# Reintroduce the argis geoJson for further processing
f = open('emigrants_15_4326.geoJson', 'r')
data = json.load(f)
f.close()

In [10]:
# Change flows to json format instead of string
for i in range(len(data['features'])):
    flow_json = iso_emigrants['flows'][i]
    data['features'][i]['properties']['flows'] = json.dumps(flow_json)

for i in range(len(data['features'])):
    flow_json = json.loads(data['features'][i]['properties']['flows'])
    data['features'][i]['properties']['flows'] = flow_json

In [11]:
# Create a centroid attribute where x,y are elements in list centroid 
for i in data['features']:
    i['geometry']['centroid'] = [i['properties']['X'],i['properties']['Y']]

# X is long, y is lat in dataset from arcgis

In [16]:
data['features'][5]['properties']

{'OBJECTID': 7,
 'Country': 'Samoa',
 'ISO Code': 'WS',
 'Affiliation': 'Samoa',
 'Affiliation ISO Code': 'WS',
 'X': -172.1594594644777,
 'Y': -13.758366525659419,
 'Shape_Length': 3.0196618946939964,
 'Shape_Area': 0.24551890561299944,
 'COUNTRY': 'SAMOA',
 'flows': {'5': -618288.0,
  '37': 8662.0,
  '38': 0,
  '41': 31.0,
  '47': 997.0,
  '56': 75667.0,
  '88': 9252.0,
  '141': 235.0,
  '143': 839.0,
  '167': 0,
  '168': 5867.0,
  '174': 1186.0,
  '184': 6441.0,
  '196': 5917.0,
  '235': 290.0},
 'short': ''}

In [12]:
# Write Json File
json_object = json.dumps(data)
with open('e15.json', 'w') as outfile:
    outfile.write(json_object)
