In [None]:
# Import and intizialize
import pandas as pd
import json
from math import isnan


In [None]:
# Read datasets
migration_flow = pd.read_csv('migration_flows_bilateral_2015_clean.csv')
iso_codes = pd.read_csv('all.csv')

In [None]:
# Copy for splitting csv into two without changing the original variable
immigrants = migration_flow.copy()
emigrants = migration_flow.copy()

# Immigrants
immigrants = immigrants[immigrants.columns.drop(list(immigrants.filter(regex='EMIGRANTS')))]
immigrants = immigrants.iloc[:,:-8]

# Emigrants
emigrants = emigrants[emigrants.columns.drop(list(emigrants.filter(regex='IMMIGRANTS')))]
emigrants = emigrants.iloc[:,:-8] # drop last 8 columns since they are micro nations
emigrants.columns = emigrants.columns.str.replace("EMIGRANTS_FROM_","")

In [None]:
# Create a dictionary to rename column names
countries = emigrants.columns[2:] 

# Create a dictionary with country names as key and index as value
country_dict = {key: json.dumps(idx) for idx, key in enumerate(countries)}

# Replace column headers with numbers instead of country names
emigrants.rename(columns=country_dict, inplace=True) 

# convert dataframe to dictionary
flow = emigrants.iloc[:,2:].to_dict(orient='index')

# remove all nan values so dictionary is clean 
for i in flow: 
    for j in list(flow[i]):
        if isnan(flow[i][j]):
            del flow[i][j]
flower = json.dumps(flow)

In [None]:
# Add the flow dictionary values to column in dataframe where the key is row number
emigrants_15 = emigrants.iloc[:,1:2]
emigrants_15['flows'] = pd.Series(flow)
emigrants_15['short'] = emigrants_15['COUNTRY'].str[:4]

In [None]:
# Rearrange iso dataset for join with migration dataset
iso_codes['name'] = iso_codes['name'].str.upper()
iso = iso_codes.iloc[:,:2]
iso = iso.rename(columns = {'name':'COUNTRY', 'alpha-2':'ISO Code'})
iso['short'] = iso['COUNTRY'].str[:4]
iso = iso.iloc[:,1:3]

# Actual join via merge()
iso_emigrants = pd.merge(emigrants_15, iso, how="left", on=['short'])

In [None]:
# Manual data cleaning -> future check if values are correctly assigned!

# Cleaning ISO values
iso_emigrants.xs(39)['ISO Code'] = 'CV'
iso_emigrants.xs(40)['ISO Code'] = 'MQ' # As of now Martinique 
iso_emigrants.xs(44)['ISO Code'] = 'JE' # As of now Jersey
iso_emigrants.xs(53)['ISO Code'] = 'CI'
iso_emigrants.xs(59)['ISO Code'] = 'CD'
iso_emigrants.xs(74)['ISO Code'] = 'FO'
iso_emigrants.xs(125)['ISO Code'] = 'LA'
iso_emigrants.xs(162)['ISO Code'] = 'NA'
iso_emigrants.xs(193)['ISO Code'] = 'PF' # As of now French Polynesia
iso_emigrants.xs(197)['ISO Code'] = 'RE'
iso_emigrants.xs(323)['ISO Code'] = 'VA'

# We do not have values for North/South Korea



In [None]:
# Create a csv file to join in arcGis
iso_emigrants.to_csv('emi_15_flow.csv', index=False, encoding='utf-8')

In [50]:
# Import and intizialize
import pandas as pd
import json
from math import isnan

In [None]:
# Reintroduce the argis geoJson for further processing
f = open('i20.geoJson', 'r')
data = json.load(f)
f.close()

In [None]:
for i in range(len(data['features'])):
    string = data['features'][i]['properties']['flows']
    string = string.replace("'",'"') 
    data['features'][i]['properties']['flows'] = json.loads(string)

In [None]:
# Change flows to json format instead of string
for i in range(len(data['features'])):
    flow_json = iso_emigrants['flows'][i]
    data['features'][i]['properties']['flows'] = json.dumps(flow_json)

for i in range(len(data['features'])):
    flow_json = json.loads(data['features'][i]['properties']['flows'])
    data['features'][i]['properties']['flows'] = flow_json

In [None]:
# Create a centroid attribute where x,y are elements in list centroid 
for i in data['features']:
    i['geometry']['centroid'] = [i['properties']['X'],i['properties']['Y']]

# X is long, y is lat in dataset from arcgis

In [None]:
data['features'][5]['properties']

In [None]:
# Write Json File
json_object = json.dumps(data)
with open('i20.json', 'w') as outfile:
    outfile.write(json_object)


Part ot calculate the difference in migration between the years:

In [51]:
# Reintroduce the argis geoJson for further processing
f = open('i20.json', 'r')
i20 = json.load(f)
f.close()

f = open('i15.json', 'r')
i15 = json.load(f)
f.close()

f = open('i10.json', 'r')
i10 = json.load(f)
f.close()

f = open('e20.json', 'r')
e20 = json.load(f)
f.close()

f = open('e15.json', 'r')
e15 = json.load(f)
f.close()

f = open('e10.json', 'r')
e10 = json.load(f)
f.close()

In [None]:
## Iterate through flows of every country and then compare it key in order to match & subtract

e15_20 = []

i20['features'][i]['properties']['flows']

counter = 0

for i in range(len(e20['features'])):
        for key in e15['features'][i]['properties']['flows']:
            k = e20['features'][i]['properties']['flows'][key]
            l = e15['features'][i]['properties']['flows'][key]
            value = k-l
            e15_20.append({key: value})



In [52]:
e20_w0 = e20.copy()

for i in range(len(e20['features'])):
    new_flows = {key:val for key,val in e20['features'][i]['properties']['flows'].items() if val != 0}
    e20_w0['features'][i]['properties']['flows'] = new_flows

# Write Json File
json_object = json.dumps(e20_w0)
with open('e20_w0.json', 'w') as outfile:
    outfile.write(json_object)
           

In [60]:
i20_w0 = i20.copy()

for i in range(len(i20['features'])):
    new_flows = {key:(int(val)*(-1)) for key,val in i20['features'][i]['properties']['flows'].items() if val != 0}
    i20_w0['features'][i]['properties']['flows'] = new_flows
    

json_object = json.dumps(i20_w0)
with open('i20_w0.json', 'w') as outfile:
    outfile.write(json_object)

In [63]:
feature = i20_w0['features'][0]['properties']['flows']

len(i20_w0['features'])


228