# Analysis of migration data: inflow and outflow, local and US-level

In [2]:
import pysal as ps
import numpy as np
import pandas as pd
counties = '/home/anna/data/FUTURES/counties/us_county.shp'
migration_table = '/home/anna/Projects/FUTURES/migration/migration_table.csv'
msa_table = '/home/anna/Projects/FUTURES/data/recode_counties_MSA.txt'

## Process migration table

In [3]:
msa = pd.read_csv(msa_table, delimiter=':', usecols=[0, 2], names=['FIPS', 'msa'], dtype={"FIPS": "int64", "msa": "int64"})
msa

Unnamed: 0,FIPS,msa
0,48253,10180
1,48059,10180
2,48441,10180
3,39133,10420
4,39153,10420
...,...,...
1170,42085,49660
1171,39099,49660
1172,6115,49700
1173,6101,49700


In [5]:
df_migration = pd.read_csv(migration_table, index_col=2,  dtype={"origin": "Int64", "destination": "int64"})
# sum over years
df_migration['sum'] = df_migration.loc[:,'1990':'2015'].sum(1)
df_migration = df_migration.drop(columns=df_migration.loc[:,'1990':'2015'])
df_migration = df_migration[~df_migration.origin.isin([57001, 57003, 57005, 57007])]
df_migration.loc[df_migration.origin == df_migration.destination, 'sum'] = 0
df_migration

Unnamed: 0_level_0,origin,destination,sum
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
201653033,2016,53033,1011.0
201602020,2016,2020,1138.0
201606073,2016,6073,382.0
201653035,2016,53035,170.0
201653053,2016,53053,90.0
...,...,...,...
4845337057,48453,37057,22.0
4845355009,48453,55009,22.0
4845309007,48453,9007,20.0
4845354039,48453,54039,20.0


## Aggregate to MSA

In [6]:
# aggregate origin column
df1 = df_migration.merge(msa, left_on='origin', right_on='FIPS', how="left")
df1['msa'] = df1['msa'].fillna(df1['origin'])
df1.drop(["origin", "FIPS"], inplace=True, axis=1)
df1.rename(columns={'msa':'origin'}, inplace=True)
df1 = df1[['origin', 'destination', 'sum']]
display(df1)

Unnamed: 0,origin,destination,sum
0,2016.0,53033,1011.0
1,2016.0,2020,1138.0
2,2016.0,6073,382.0
3,2016.0,53035,170.0
4,2016.0,53053,90.0
...,...,...,...
167257,12420.0,37057,22.0
167258,12420.0,55009,22.0
167259,12420.0,9007,20.0
167260,12420.0,54039,20.0


In [7]:
# aggregate destination column
df2 = df1.merge(msa,left_on='destination', right_on='FIPS', how="left")
df2['msa'] = df2['msa'].fillna(df2['destination'])
df2.drop(["destination", "FIPS"], inplace=True, axis=1)
df2.rename(columns={'msa':'destination'}, inplace=True)
df2 = df2[['origin', 'destination', 'sum']]
display(df2)

Unnamed: 0,origin,destination,sum
0,2016.0,42660.0,1011.0
1,2016.0,2020.0,1138.0
2,2016.0,41740.0,382.0
3,2016.0,14740.0,170.0
4,2016.0,42660.0,90.0
...,...,...,...
167257,12420.0,49180.0,22.0
167258,12420.0,24580.0,22.0
167259,12420.0,25540.0,20.0
167260,12420.0,16620.0,20.0


Change the same MSA into 0, ignoring migration within MSA.

In [14]:
df2.loc[df2.origin == df2.destination, 'sum'] = 0
df2

Unnamed: 0,origin,destination,sum
0,2016.0,42660.0,1011.0
1,2016.0,2020.0,1138.0
2,2016.0,41740.0,382.0
3,2016.0,14740.0,170.0
4,2016.0,42660.0,90.0
...,...,...,...
167257,12420.0,49180.0,22.0
167258,12420.0,24580.0,22.0
167259,12420.0,25540.0,20.0
167260,12420.0,16620.0,20.0


In [18]:
df3 = df2.groupby(by=["origin", "destination"])['sum'].sum().reset_index().astype('int32')
df3

Unnamed: 0,origin,destination,sum
0,1005,1005,0
1,1005,1011,480
2,1005,1031,90
3,1005,1045,1100
4,1005,1087,11
...,...,...,...
80804,56045,56001,71
80805,56045,56005,653
80806,56045,56011,107
80807,56045,56033,11


## Transform to matrix

In [19]:
df3.set_index(['origin', 'destination'], inplace=True)
df_migration_MSA = df3.unstack(level=-1, fill_value=0)['sum']
df_migration_MSA

destination,1005,1011,1013,1017,1019,1023,1025,1027,1029,1031,...,56027,56029,56031,56033,56035,56037,56039,56041,56043,56045
origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1005,0,480,0,0,0,0,0,0,0,90,...,0,0,0,0,0,0,0,0,0,0
1011,433,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1013,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1017,0,0,0,0,0,0,0,12,0,0,...,0,0,0,0,0,0,0,0,0,0
1019,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56037,0,0,0,0,0,0,0,0,0,0,...,0,30,0,116,689,0,37,1277,11,0
56039,0,0,0,0,0,0,0,0,0,0,...,0,90,0,10,228,25,0,0,0,0
56041,0,0,0,0,0,0,0,0,0,0,...,0,0,0,12,37,1372,0,0,0,0
56043,0,0,0,0,0,0,0,0,0,0,...,0,417,0,90,0,0,0,0,0,0


## Analyse infow and outflow

In [22]:
df_in_out = pd.DataFrame(index=df_migration_MSA.index)
df_in_out['inflow'] = df_migration_MSA.sum(axis=0)
df_in_out['outflow'] = df_migration_MSA.sum(axis=1)
df_in_out['max'] = df_in_out[['inflow', 'outflow']].max(axis=1)
df_in_out['percentage'] = 100 * (df_in_out['inflow'] - df_in_out['outflow']) / df_in_out['max']
df_in_out['percentage_scaled'] = (df_in_out['percentage'] / 2) + 50
df_in_out

Unnamed: 0_level_0,inflow,outflow,max,percentage,percentage_scaled
origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1005,5861,8980,8980,-34.732739,32.633630
1011,2254,3657,3657,-38.364780,30.817610
1013,3447,4357,4357,-20.885931,39.557035
1017,11657,14953,14953,-22.042400,38.978800
1019,7063,7566,7566,-6.648163,46.675919
...,...,...,...,...,...
56037,11459,17056,17056,-32.815432,33.592284
56039,6155,10656,10656,-42.239114,28.880443
56041,5287,8188,8188,-35.429897,32.285051
56043,1026,2607,2607,-60.644419,19.677791


## Transform to within-county migration

Compute migrants within county assuming ratio = 0 => 50% migrants stay.

In [23]:
within_migrants = (df_migration_MSA.sum(axis=1) * df_in_out['percentage_scaled']) / (100 - df_in_out['percentage_scaled'])

Update diagonal of migration matrix with these values.

In [24]:
array = df_migration_MSA.values
np.fill_diagonal(array, within_migrants)
df_migration_MSA = pd.DataFrame(array, index=df_migration_MSA.index, columns=df_migration_MSA.columns)

In [25]:
df_migration_MSA

destination,1005,1011,1013,1017,1019,1023,1025,1027,1029,1031,...,56027,56029,56031,56033,56035,56037,56039,56041,56043,56045
origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1005,4350,480,0,0,0,0,0,0,0,90,...,0,0,0,0,0,0,0,0,0,0
1011,433,1629,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1013,0,0,2851,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1017,0,0,0,9551,0,0,0,12,0,0,...,0,0,0,0,0,0,0,0,0,0
1019,0,0,0,0,6622,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56037,0,0,0,0,0,0,0,0,0,0,...,0,30,0,116,689,8627,37,1277,11,0
56039,0,0,0,0,0,0,0,0,0,0,...,0,90,0,10,228,25,4327,0,0,0
56041,0,0,0,0,0,0,0,0,0,0,...,0,0,0,12,37,1372,0,3903,0,0
56043,0,0,0,0,0,0,0,0,0,0,...,0,417,0,90,0,0,0,0,638,0


Compute probabilities:

In [29]:
df_migration_adjusted = df_migration_MSA.div(df_migration_MSA.sum(axis=1), axis=0) * 100
df_migration_adjusted = df_migration_adjusted.fillna(0)
df_migration_adjusted

destination,1005,1011,1013,1017,1019,1023,1025,1027,1029,1031,...,56027,56029,56031,56033,56035,56037,56039,56041,56043,56045
origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1005,32.633158,3.600900,0.000000,0.00000,0.000000,0.0,0.0,0.000000,0.0,0.675169,...,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1011,8.191449,30.817253,0.000000,0.00000,0.000000,0.0,0.0,0.000000,0.0,0.000000,...,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1013,0.000000,0.000000,39.553274,0.00000,0.000000,0.0,0.0,0.000000,0.0,0.000000,...,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1017,0.000000,0.000000,0.000000,38.97731,0.000000,0.0,0.0,0.048972,0.0,0.000000,...,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1019,0.000000,0.000000,0.000000,0.00000,46.673245,0.0,0.0,0.000000,0.0,0.000000,...,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56037,0.000000,0.000000,0.000000,0.00000,0.000000,0.0,0.0,0.000000,0.0,0.000000,...,0.0,0.116809,0.0,0.451661,2.682708,33.590313,0.144064,4.972161,0.042830,0.000000
56039,0.000000,0.000000,0.000000,0.00000,0.000000,0.0,0.0,0.000000,0.0,0.000000,...,0.0,0.600681,0.0,0.066742,1.521725,0.166856,28.879397,0.000000,0.000000,0.000000
56041,0.000000,0.000000,0.000000,0.00000,0.000000,0.0,0.0,0.000000,0.0,0.000000,...,0.0,0.000000,0.0,0.099247,0.306013,11.347283,0.000000,32.280208,0.000000,0.000000
56043,0.000000,0.000000,0.000000,0.00000,0.000000,0.0,0.0,0.000000,0.0,0.000000,...,0.0,12.850539,0.0,2.773498,0.000000,0.000000,0.000000,0.000000,19.661017,0.000000


In [28]:
df_migration_adjusted.to_csv('/home/anna/Projects/FUTURES/migration/migration_matrix_within.csv', header=True, index=True, index_label=None)
df_in_out.to_csv('/home/anna/Projects/FUTURES/migration/migration_inoutratio.csv', columns=['percentage'], header=True, index=True, index_label=None)