# Analysis of migration data: inflow and outflow, local and US-level

In [1]:
import pysal as ps
import numpy as np
import pandas as pd
counties = '/home/anna/data/FUTURES/counties/us_county.shp'
migration_table = '/home/anna/Projects/FUTURES/migration/migration_table.csv'

## Process migration table

In [2]:
df_migration = pd.read_csv(migration_table, index_col=2,  dtype={"origin": "Int64", "destination": "int64"})
# sum over years
df_migration['sum'] = df_migration.loc[:,'1990':'2015'].sum(1)
df_migration = df_migration.drop(columns=df_migration.loc[:,'1990':'2015'])
df_migration = df_migration[~df_migration.origin.isin([57001, 57003, 57005, 57007])]
df_migration.loc[df_migration.origin == df_migration.destination, 'sum'] = 0
# transform into matrix
df_migration.set_index(['origin', 'destination'], inplace=True)
df_migration = df_migration.unstack(level=-1, fill_value=0)['sum']
#df_outflow = df_migration.div(df_migration.sum(axis=1), axis=0) * 100
df_migration

destination,1001,1003,1005,1007,1009,1011,1013,1015,1017,1019,...,56027,56029,56031,56033,56035,56037,56039,56041,56043,56045
origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001,0.0,240.0,0.0,0.0,0.0,0.0,11.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1003,162.0,0.0,0.0,0.0,21.0,0.0,34.0,154.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1005,1545.0,11.0,0.0,0.0,0.0,480.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1007,1041.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1009,2248.0,77.0,0.0,0.0,0.0,0.0,0.0,67.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56037,3354.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,30.0,0.0,116.0,689.0,0.0,37.0,1277.0,11.0,0.0
56039,2554.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,90.0,0.0,10.0,228.0,25.0,0.0,0.0,0.0,0.0
56041,1845.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,12.0,37.0,1372.0,0.0,0.0,0.0,0.0
56043,754.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,417.0,0.0,90.0,0.0,0.0,0.0,0.0,0.0,0.0


## Analyse infow and outflow

In [3]:
in_out = pd.DataFrame(index=df_migration.index)
in_out['inflow'] = df_migration.sum(axis=0)
in_out['outflow'] = df_migration.sum(axis=1)
in_out['max'] = in_out[['inflow', 'outflow']].max(axis=1)
in_out['percentage'] = 100 * (in_out['inflow'] - in_out['outflow'])/in_out['max']
in_out

Unnamed: 0_level_0,inflow,outflow,max,percentage
origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1001,18019116.0,22801.0,18019116.0,99.873462
1003,65663.0,44231.0,65663.0,32.639386
1005,5861.0,8980.0,8980.0,-34.732739
1007,7598.0,8078.0,8078.0,-5.942065
1009,21939.0,20821.0,21939.0,5.095948
...,...,...,...,...
56037,11459.0,17056.0,17056.0,-32.815432
56039,6155.0,10656.0,10656.0,-42.239114
56041,5287.0,8188.0,8188.0,-35.429897
56043,1026.0,2607.0,2607.0,-60.644419


## Neighborhood analysis

In [4]:
qW = ps.queen_from_shapefile(counties, idVariable='GEOID')



In [5]:
dataframe = ps.pdio.read_files(counties)
dataframe.set_index(dataframe.GEOID, inplace=True)

In [6]:
Wmatrix, ids = qW.full()

In [7]:
int_ids = [int(i) for i in ids]
df_neigh = pd.DataFrame(data=Wmatrix, index=int_ids, columns=int_ids)
int_ids = sorted(int_ids)
df_neigh = df_neigh.reindex(columns=int_ids)
df_neigh = df_neigh.reindex(int_ids)
df_neigh

Unnamed: 0,1001,1003,1005,1007,1009,1011,1013,1015,1017,1019,...,72141,72143,72145,72147,72149,72151,72153,78010,78020,78030
1001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1005,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1007,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1009,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72151,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
72153,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
78010,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
78020,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [21]:
df_migration2 = df_migration.reindex(df_neigh.index, fill_value=0)
df_migration2 = df_migration2.reindex(columns=df_neigh.index, fill_value=0)

## Combine neighborhood analysis and in/out analysis

In [9]:
df_multiplied = df_migration2.multiply(df_neigh)
df_multiplied


Unnamed: 0,1001,1003,1005,1007,1009,1011,1013,1015,1017,1019,...,72141,72143,72145,72147,72149,72151,72153,78010,78020,78030
1001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1005,0.0,0.0,0.0,0.0,0.0,480.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1007,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1009,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72151,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
72153,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
78010,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
78020,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
df_in_out2 = pd.DataFrame(index=df_multiplied.index)
df_in_out2['inflow'] = df_multiplied.sum(axis=0)
df_in_out2['outflow'] = df_multiplied.sum(axis=1)
df_in_out2['max'] = df_in_out2[['inflow', 'outflow']].max(axis=1)
df_in_out2['percentage'] = 100 * (df_in_out2['inflow'] - df_in_out2['outflow'])/df_in_out2['max']
df_in_out2['percentage_scaled'] = (df_in_out2['percentage'] / 2) + 50
df_in_out2 = df_in_out2.drop(columns=['max'])
df_in_out2['all_inflow'] = in_out['inflow']
df_in_out2['all_outflow'] = in_out['outflow']
df_in_out2['all_percentage'] = in_out['percentage']
df_in_out2['all_percentage_scaled'] = (in_out['percentage'] / 2) + 50
#df_in_out2.loc[37000:38000]
df_in_out2

Unnamed: 0,inflow,outflow,percentage,percentage_scaled,all_inflow,all_outflow,all_percentage,all_percentage_scaled
1001,20905.0,18281.0,12.552021,56.276011,18019116.0,22801.0,99.873462,99.936731
1003,32683.0,23542.0,27.968669,63.984334,65663.0,44231.0,32.639386,66.319693
1005,3765.0,4353.0,-13.507926,43.246037,5861.0,8980.0,-34.732739,32.633630
1007,7576.0,7023.0,7.299366,53.649683,7598.0,8078.0,-5.942065,47.028968
1009,20409.0,16780.0,17.781371,58.890685,21939.0,20821.0,5.095948,52.547974
...,...,...,...,...,...,...,...,...
72151,0.0,0.0,,,,,,
72153,0.0,0.0,,,,,,
78010,0.0,0.0,,,,,,
78020,0.0,0.0,,,,,,


## Transform to within-county migration

Compute migrants within county assuming ratio = 0 => 50% migrants stay.

In [11]:
within_migrants = (df_migration2.sum(axis=1) * df_in_out2['all_percentage_scaled']) / (100 - df_in_out2['all_percentage_scaled'])

Update diagonal of migration matrix with these values.

In [12]:
df_migration3 = df_migration2.copy()
array = df_migration3.values
np.fill_diagonal(array, within_migrants)
df_migration3 = pd.DataFrame(array, index=df_migration3.index, columns=df_migration3.columns)

In [13]:
df_migration_adjusted = df_migration3.div(df_migration3.sum(axis=1), axis=0) * 100
df_migration_adjusted = df_migration_adjusted.fillna(0)

In [20]:
df_migration_adjusted.to_csv('/home/anna/Projects/FUTURES/migration/migration_matrix_within.csv', header=True, index=True, index_label=None)
df_in_out2.to_csv('/home/anna/Projects/FUTURES/migration/migration_inoutratio.csv', columns=['percentage', 'all_percentage'], header=True, index=True, index_label=None)