In [95]:
import geopandas as gpd
import numpy as np
import pandas as pd
from pyproj import CRS
from shapely import wkt

from setup import *

In [96]:
# shapefile of census blocks 2010 
gblk = pd.read_csv(data_dir+'data_raw/CensusBlockTIGER2010.csv')
# craete a geometric object
gblk['the_geom'] = gblk['the_geom'].apply(wkt.loads)
# create the geo dataframe
block_gdf = gpd.GeoDataFrame(gblk, geometry='the_geom')
# Calculate the area of the blocks/polygons
block_gdf.crs = CRS('epsg:4326')
block_gdf = block_gdf.to_crs("epsg:26916")

In [97]:
tract_gdf = block_gdf.dissolve(by=['STATEFP10','COUNTYFP10','TRACTCE10'], 
                               as_index=False)[['STATEFP10','COUNTYFP10','TRACTCE10','the_geom']]
tract_gdf = tract_gdf.sort_values(by=['STATEFP10','COUNTYFP10','TRACTCE10'])
tract_gdf['area'] = tract_gdf['the_geom'].map(lambda p:p.area)


In [98]:
tract_gdf['GEOID10'] = tract_gdf['STATEFP10'].astype(str)+"_"+tract_gdf['COUNTYFP10'].astype(str)+"_"+tract_gdf['TRACTCE10'].astype(str)

In [99]:
tract_gdf.head()


Unnamed: 0,STATEFP10,COUNTYFP10,TRACTCE10,the_geom,area,GEOID10
0,17,31,10100,"POLYGON ((444549.916 4652143.244, 444519.037 4...",380786.520453,17_31_10100
1,17,31,10201,"POLYGON ((443435.773 4651384.784, 443429.832 4...",504106.868051,17_31_10201
2,17,31,10202,"POLYGON ((444070.591 4651397.512, 444070.177 4...",351465.386243,17_31_10202
3,17,31,10300,"POLYGON ((444506.712 4651405.077, 444506.245 4...",471676.98294,17_31_10300
4,17,31,10400,"POLYGON ((445292.636 4649795.935, 445292.560 4...",548357.927006,17_31_10400


In [100]:
tract_centroid = tract_gdf['the_geom'].centroid
tract_polygon = tract_gdf['the_geom']

In [101]:
# self connections not included here. Added later in convolution layers.

# 1. Euclidean

In [102]:
euc_dist = tract_centroid.apply(lambda x: tract_centroid.distance(x)).to_numpy().flatten()

In [103]:
census_adj = pd.DataFrame(np.array([np.repeat(tract_gdf['GEOID10'].to_numpy(), len(tract_gdf)), 
                       np.tile(tract_gdf['GEOID10'].to_numpy(), len(tract_gdf)),
                       euc_dist]).T, columns=['start_tract','end_tract','euc'])

In [104]:
th = 3000
# around 5% considered connected (1), the rest decays with distance
census_adj.loc[(0<census_adj['euc'])&(census_adj['euc']<3000),'euc']  = 1
census_adj.loc[census_adj['euc']>3000,'euc']  = 3000 / census_adj.loc[census_adj['euc']>3000]['euc']

# 2. Adjacent Connection

In [105]:
touch = tract_polygon.apply(lambda x: tract_polygon.touches(x)).to_numpy(dtype=np.float32).flatten()

In [106]:
census_adj['con'] = touch

# 3. Functional Similarity

In [107]:
spatial = pd.read_csv(project_data_dir+"data_processed/census_tract/other/spatial.csv")
spatial['pct_adults'] = spatial['pct25_34yrs']+spatial['pct35_50yrs']
for i in ['tot_population', 'jobs', 'avg_tt_to_work','inc_per_capita',
          'entertainment', 'restaurant', 'school', 'shop']:
    spatial.loc[:,i] = spatial[i]/spatial[i].max()
s = spatial[['tot_population','pct_adults','pctover65yrs',
         'pctwhite_alone', 'jobs', 
         'pctPTcommute','avg_tt_to_work','inc_per_capita',
         'entertainment', 'restaurant', 'school', 'shop']]

In [108]:
n = len(spatial)

In [109]:
s1 = np.transpose(np.tile(s, (n,1,1)),(1,0,2))
s2 = np.tile(s, (n,1,1))
s3 = np.sum(np.sqrt((s1-s2)*(s1-s2)), axis=2)
s4 = s3[(s3 > 0)].min()/s3
s4 = s4.flatten()
s4[~np.isfinite(s4)] = 0

  s4 = s3[(s3 > 0)].min()/s3


In [110]:
ids=spatial['STATION_ID'].astype(int).to_numpy()
i1 = np.transpose(np.tile(ids, (n,1)), (1,0)).flatten()
i2 = np.tile(ids, (n,1)).flatten()

KeyError: 'STATION_ID'

In [None]:
func_adj = pd.DataFrame(np.array([np.repeat(spatial['GEOID10'].to_numpy(), n), 
                       np.tile(spatial['GEOID10'].to_numpy(), n),
                       s4]).T, columns=['start_tract','end_tract','func'])

In [None]:
census_adj = pd.merge(census_adj, func_adj, on=['start_tract', 'end_tract'], how='left').fillna(0)

In [None]:
import matplotlib.pyplot as plt
plt.imshow(func_adj.pivot('start_tract','end_tract','func').to_numpy(dtype=np.float32))
plt.colorbar();

In [None]:
census_adj.to_csv(data_dir+"data_processed/census_tract/other/adjlist.csv", index=False)