# Neighbor migration analysis

In [59]:
import pysal as ps
import numpy as np
import pandas as pd
counties = '/home/anna/data/FUTURES/counties/us_county.shp'
migration_table = '/home/anna/Projects/FUTURES/migration/migration_table.csv'

## Process migration table

In [60]:
df_migration = pd.read_csv(migration_table, index_col=2,  dtype={"origin": "Int64", "destination": "int64"})
# sum over years
df_migration['sum'] = df_migration.loc[:,'1990':'2015'].sum(1)
df_migration = df_migration.drop(columns=df_migration.loc[:,'1990':'2015'])
df_migration = df_migration[~df_migration.origin.isin([57001, 57003, 57005, 57007])]
df_migration.loc[df_migration.origin == df_migration.destination, 'sum'] = 0
# transform into matrix
df_migration.set_index(['origin', 'destination'], inplace=True)
df_migration = df_migration.unstack(level=-1, fill_value=0)['sum']
df_migration = df_migration.div(df_migration.sum(axis=1), axis=0) * 100
df_migration

destination,1001,1003,1005,1007,1009,1011,1013,1015,1017,1019,...,56027,56029,56031,56033,56035,56037,56039,56041,56043,56045
origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001,0.000000,1.052585,0.0,0.0,0.000000,0.000000,0.048243,0.000000,0.0,0.0,...,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
1003,0.366259,0.000000,0.0,0.0,0.047478,0.000000,0.076869,0.348172,0.0,0.0,...,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
1005,17.204900,0.122494,0.0,0.0,0.000000,5.345212,0.000000,0.000000,0.0,0.0,...,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
1007,12.886853,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,...,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
1009,10.796792,0.369819,0.0,0.0,0.000000,0.000000,0.000000,0.321790,0.0,0.0,...,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56037,19.664634,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,...,0.0,0.175891,0.0,0.680113,4.039634,0.000000,0.216932,7.487101,0.064493,0.0
56039,23.967718,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,...,0.0,0.844595,0.0,0.093844,2.139640,0.234610,0.000000,0.000000,0.000000,0.0
56041,22.532975,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,...,0.0,0.000000,0.0,0.146556,0.451881,16.756229,0.000000,0.000000,0.000000,0.0
56043,28.922133,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,...,0.0,15.995397,0.0,3.452244,0.000000,0.000000,0.000000,0.000000,0.000000,0.0


## Neighborhood analysis

In [61]:
qW = ps.queen_from_shapefile(counties, idVariable='GEOID')



In [62]:
dataframe = ps.pdio.read_files(counties)
dataframe.set_index(dataframe.GEOID, inplace=True)

In [64]:
Wmatrix, ids = qW.full()

In [89]:
int_ids = [int(i) for i in ids]
df_neigh = pd.DataFrame(data=Wmatrix, index=int_ids, columns=int_ids)
int_ids = sorted(int_ids)
df_neigh = df_neigh.reindex(columns=int_ids)
df_neigh = df_neigh.reindex(int_ids)
df_neigh

Unnamed: 0,1001,1003,1005,1007,1009,1011,1013,1015,1017,1019,...,72141,72143,72145,72147,72149,72151,72153,78010,78020,78030
1001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1005,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1007,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1009,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72151,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
72153,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
78010,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
78020,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [90]:
df_migration2 = df_migration.reindex(df_neigh.index, fill_value=0)
df_migration2 = df_migration2.reindex(columns=df_neigh.index, fill_value=0)
df_migration2

Unnamed: 0,1001,1003,1005,1007,1009,1011,1013,1015,1017,1019,...,72141,72143,72145,72147,72149,72151,72153,78010,78020,78030
1001,0.000000,1.052585,0.0,0.0,0.000000,0.000000,0.048243,0.000000,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
1003,0.366259,0.000000,0.0,0.0,0.047478,0.000000,0.076869,0.348172,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
1005,17.204900,0.122494,0.0,0.0,0.000000,5.345212,0.000000,0.000000,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
1007,12.886853,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
1009,10.796792,0.369819,0.0,0.0,0.000000,0.000000,0.000000,0.321790,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72151,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
72153,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
78010,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
78020,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0


In [91]:
df_multiplied = df_migration2.multiply(df_neigh)
df_multiplied


Unnamed: 0,1001,1003,1005,1007,1009,1011,1013,1015,1017,1019,...,72141,72143,72145,72147,72149,72151,72153,78010,78020,78030
1001,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1003,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1005,0.0,0.0,0.0,0.0,0.0,5.345212,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1007,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1009,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72151,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
72153,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
78010,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
78020,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [95]:
df_neigh_inverted = df_neigh.copy()
df_neigh_inverted[df_neigh_inverted == 1] = -1
df_neigh_inverted[df_neigh_inverted == 0] = 1
df_neigh_inverted[df_neigh_inverted == -1] = 0
df_multiplied_inverted = df_migration2.multiply(df_neigh_inverted)

In [94]:
results = pd.DataFrame(index=df_migration2.index)
results['neigbors_max'] = df_multiplied.max(1)
results['neigbors_median'] = df_multiplied.median(1)
results['neigbors_sum'] = df_multiplied.sum(1)
results['not_neigbors_max'] = df_multiplied_inverted.max(1)
results['not_neigbors_median'] = df_multiplied_inverted.median(1)
results['not_neigbors_sum'] = df_multiplied_inverted.sum(1)
results

Unnamed: 0,neigbors_max,neigbors_median,neigbors_sum,not_neigbors_max,not_neigbors_median,not_neigbors_sum
1001,36.634358,0.0,80.176308,4.311214,0.0,19.823692
1003,34.032692,0.0,53.225114,5.756144,0.0,46.774886
1005,12.249443,0.0,48.474388,17.204900,0.0,51.525612
1007,29.920772,0.0,86.939837,12.886853,0.0,13.060163
1009,42.509966,0.0,80.591710,10.796792,0.0,19.408290
...,...,...,...,...,...,...
72151,0.000000,0.0,0.000000,0.000000,0.0,0.000000
72153,0.000000,0.0,0.000000,0.000000,0.0,0.000000
78010,0.000000,0.0,0.000000,0.000000,0.0,0.000000
78020,0.000000,0.0,0.000000,0.000000,0.0,0.000000


In [99]:
results.to_csv('/home/anna/Projects/FUTURES/migration/migration_analysis.csv', header=True, index=True, index_label=None)

In [101]:
origin = '37101'
self_and_neighbors = [origin]
self_and_neighbors.extend(qW.neighbors[origin])
print(self_and_neighbors)
df_migration2.loc[map(int, self_and_neighbors), 37000:38000]

['37101', '37191', '37183', '37163', '37069', '37127', '37195', '37085']


Unnamed: 0,37001,37003,37005,37007,37009,37011,37013,37015,37017,37019,...,37181,37183,37185,37187,37189,37191,37193,37195,37197,37199
37101,0.43964,0.0,0.0,0.0,0.0,0.0,0.036206,0.0,0.0,0.479294,...,0.0,43.351953,0.0,0.0,0.0,5.832557,0.0,4.339506,0.0,0.0
37191,0.117796,0.0,0.0,0.0,0.0,0.0,0.071122,0.0,0.0,0.213367,...,0.0,9.752628,0.0,0.0,0.0,0.0,0.0,4.289556,0.0,0.0
37183,0.779515,0.002278,0.0,0.0,0.037586,0.007517,0.188387,0.045787,0.092713,0.500694,...,0.413448,0.0,0.15809,0.018224,0.299323,0.584522,0.048065,0.600923,0.007062,0.0
37163,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.237544,0.422748,...,0.0,8.208354,0.0,0.0,0.0,5.777554,0.0,0.065425,0.0,0.0
37069,0.062174,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045594,...,9.205836,51.007212,2.163641,0.0,0.0,0.0,0.0,0.580287,0.0,0.0
37127,0.066087,0.0,0.0,0.0,0.0,0.0,0.1555,0.0,0.0,0.0,...,0.04665,13.582911,0.064144,0.0,0.019437,0.828037,0.0,13.04838,0.0,0.0
37195,0.0,0.0,0.0,0.0,0.0,0.0,0.147302,0.035927,0.0,0.0,...,0.0,14.651146,0.0,0.0,0.0,6.833369,0.0,0.0,0.0,0.0
37085,0.171961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.17728,0.391788,...,0.0,19.800383,0.0,0.0,0.0,0.489292,0.0,0.093958,0.0,0.0
