# Constructing local centralization functions

All the data and supplementary files can be found at: https://github.com/renanxcortes/inequality-segregation-supplementary-files

In [1]:
import pandas as pd
import pysal as ps
import geopandas as gpd
import scipy
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances
from scipy.ndimage.interpolation import shift

import os



In [2]:
os.chdir('C:/Users/renan/Desktop/inequality-segregation-supplementary-files/')

## Reading data

In [3]:
census_2010 = pd.read_csv('data/std_2010_fullcount.csv', encoding = "ISO-8859-1", sep = ",")
df = census_2010.loc[census_2010.county == "Riverside County"][['trtid10','tract','pop10','nhblk10']]
map_gpd = gpd.read_file('Tracts_grouped_by_County/06065.json')
map_gpd['INTGEOID10'] = pd.to_numeric(map_gpd["GEOID10"])
df_map = map_gpd.merge(df, left_on = 'INTGEOID10', right_on = 'trtid10')
df_mp = df_map[['GEOID10', 'tract', 'geometry', 'pop10', 'nhblk10']]
df_mp = df_mp.assign(c_lons = df_mp.centroid.map(lambda p: p.x),
                     c_lats = df_mp.centroid.map(lambda p: p.y))
df_mp.head()

Unnamed: 0,GEOID10,tract,geometry,pop10,nhblk10,c_lons,c_lats
0,6065042012,Census Tract 420.12,"POLYGON ((-117.319414 33.902109, -117.322528 3...",6242,677,-117.320504,33.910879
1,6065041911,Census Tract 419.11,"POLYGON ((-117.504056 33.800257, -117.502758 3...",10258,844,-117.495803,33.74289
2,6065041910,Census Tract 419.10,"POLYGON ((-117.472451 33.762031, -117.475661 3...",6342,405,-117.492951,33.786264
3,6065040816,Census Tract 408.16,"POLYGON ((-117.5285 33.89736, -117.529425 33.8...",2594,346,-117.524611,33.902457
4,6065040815,Census Tract 408.15,"POLYGON ((-117.501299 33.89424899999999, -117....",3586,429,-117.5115,33.893075


Filtering only the $k$ nearest rows from the first unit.

In [4]:
points = list(zip(df_mp.c_lons, df_mp.c_lats))
kd = ps.cg.kdtree.KDTree(np.array(points))
wnnk = ps.weights.KNN(kd, k = 3)

In [5]:
x = list(wnnk.neighbors.values())[0]
x.append(list(wnnk.neighbors.keys())[0])

In [6]:
df_mp = df_mp.iloc[x,:]
df_mp.head()

Unnamed: 0,GEOID10,tract,geometry,pop10,nhblk10,c_lons,c_lats
429,6065042217,Census Tract 422.17,"POLYGON ((-117.292139 33.920173, -117.292018 3...",5461,522,-117.322242,33.927844
268,6065042014,Census Tract 420.14,"POLYGON ((-117.31399 33.898499, -117.31416 33....",10516,1045,-117.322609,33.891047
430,6065042013,Census Tract 420.13,"POLYGON ((-117.313874 33.888199, -117.313816 3...",8333,743,-117.301931,33.894414
0,6065042012,Census Tract 420.12,"POLYGON ((-117.319414 33.902109, -117.322528 3...",6242,677,-117.320504,33.910879


In [7]:
df_mp.c_lons.iloc[df_mp.shape[0]-1]

-117.32050396496223

In [8]:
df_mp = df_mp.assign(xi = df_mp.loc[:,'nhblk10'],
                     yi = df_mp.loc[:,'pop10'] - df_mp.loc[:,'nhblk10'],
    
                     center_lon = df_mp.c_lons.iloc[df_mp.shape[0]-1],
                     center_lat = df_mp.c_lats.iloc[df_mp.shape[0]-1])
df_mp['center_dist'] = np.sqrt((df_mp.c_lons - df_mp.center_lon)**2 + (df_mp.c_lats - df_mp.center_lat)**2)
df_mp_sort_cent = df_mp.sort_values('center_dist')
df_mp_sort_cent.head()

Unnamed: 0,GEOID10,tract,geometry,pop10,nhblk10,c_lons,c_lats,xi,yi,center_lon,center_lat,center_dist
0,6065042012,Census Tract 420.12,"POLYGON ((-117.319414 33.902109, -117.322528 3...",6242,677,-117.320504,33.910879,677,5565,-117.320504,33.910879,0.0
429,6065042217,Census Tract 422.17,"POLYGON ((-117.292139 33.920173, -117.292018 3...",5461,522,-117.322242,33.927844,522,4939,-117.320504,33.910879,0.017053
268,6065042014,Census Tract 420.14,"POLYGON ((-117.31399 33.898499, -117.31416 33....",10516,1045,-117.322609,33.891047,1045,9471,-117.320504,33.910879,0.019944
430,6065042013,Census Tract 420.13,"POLYGON ((-117.313874 33.888199, -117.313816 3...",8333,743,-117.301931,33.894414,743,7590,-117.320504,33.910879,0.024821


In [9]:
X = df_mp_sort_cent.xi.sum()
Y = df_mp_sort_cent.yi.sum()

df_mp_sort_cent['Xi'] = np.cumsum(df_mp_sort_cent.xi) / X
df_mp_sort_cent['Yi'] = np.cumsum(df_mp_sort_cent.yi) / Y
df_mp_sort_cent.head()

Unnamed: 0,GEOID10,tract,geometry,pop10,nhblk10,c_lons,c_lats,xi,yi,center_lon,center_lat,center_dist,Xi,Yi
0,6065042012,Census Tract 420.12,"POLYGON ((-117.319414 33.902109, -117.322528 3...",6242,677,-117.320504,33.910879,677,5565,-117.320504,33.910879,0.0,0.226649,0.201886
429,6065042217,Census Tract 422.17,"POLYGON ((-117.292139 33.920173, -117.292018 3...",5461,522,-117.322242,33.927844,522,4939,-117.320504,33.910879,0.017053,0.401406,0.381063
268,6065042014,Census Tract 420.14,"POLYGON ((-117.31399 33.898499, -117.31416 33....",10516,1045,-117.322609,33.891047,1045,9471,-117.320504,33.910879,0.019944,0.751255,0.724651
430,6065042013,Census Tract 420.13,"POLYGON ((-117.313874 33.888199, -117.313816 3...",8333,743,-117.301931,33.894414,743,7590,-117.320504,33.910879,0.024821,1.0,1.0


In [10]:
local_RCE = (shift(df_mp_sort_cent.Xi, 1, cval=np.NaN) * df_mp_sort_cent.Yi).sum() - \
            (df_mp_sort_cent.Xi * shift(df_mp_sort_cent.Yi, 1, cval=np.NaN)).sum()
local_RCE

0.036537274923301055

# Constructing the local centralization function

In [111]:
def local_centralization(data, group_pop_var, total_pop_var, k_neigh):
    '''
    data: a geopandas DataFrame that contains a geometry column
    group_pop_var: the name of variable that contains the population size of the group of interest
    total_pop_var: the name of variable that contains the total population of the unit
    k: number of assumed neighbors for local context
    '''
    data = data.rename(columns={group_pop_var: 'group_pop_var', total_pop_var: 'total_pop_var'})
    data = data.assign(xi = data.loc[:,'group_pop_var'],
                       yi = data.loc[:,'total_pop_var'] - data.loc[:,'group_pop_var'],
        
                       c_lons = data.centroid.map(lambda p: p.x),
                       c_lats = data.centroid.map(lambda p: p.y))
    
    points = list(zip(data.c_lons, data.c_lats))
    kd = ps.cg.kdtree.KDTree(np.array(points))
    wnnk = ps.weights.KNN(kd, k = k_neigh)
    
    local_RCEs   = np.empty(data.shape[0])
    
    for i in np.array(range(data.shape[0])):
    
        x = list(wnnk.neighbors.values())[i]
        x.append(list(wnnk.neighbors.keys())[i])

        local_data = data.iloc[x,:]

        local_data = local_data.assign(center_lon = local_data.c_lons.iloc[local_data.shape[0]-1], 
                                       center_lat = local_data.c_lats.iloc[local_data.shape[0]-1]) 

        local_data['center_dist'] = np.sqrt((local_data.c_lons - local_data.center_lon)**2 + (local_data.c_lats - local_data.center_lat)**2)
        local_data_sort_cent = local_data.sort_values('center_dist')

        X = local_data_sort_cent.xi.sum()
        Y = local_data_sort_cent.yi.sum()

        local_data_sort_cent['Xi'] = np.cumsum(local_data_sort_cent.xi) / X
        local_data_sort_cent['Yi'] = np.cumsum(local_data_sort_cent.yi) / Y


        local_RCE = (shift(local_data_sort_cent.Xi, 1, cval=np.NaN) * local_data_sort_cent.Yi).sum() - \
                    (local_data_sort_cent.Xi * shift(local_data_sort_cent.Yi, 1, cval=np.NaN)).sum()
        
        local_RCEs[i] = local_RCE
        
    return local_RCEs

# Testing the function

In [112]:
census_2010 = pd.read_csv('data/std_2010_fullcount.csv', encoding = "ISO-8859-1", sep = ",")
df = census_2010.loc[census_2010.county == "Riverside County"][['trtid10','tract','pop10','nhblk10']]
map_gpd = gpd.read_file('Tracts_grouped_by_County/06065.json')
map_gpd['INTGEOID10'] = pd.to_numeric(map_gpd["GEOID10"])
df_map = map_gpd.merge(df, left_on = 'INTGEOID10', right_on = 'trtid10')
df_mp = df_map[['GEOID10', 'tract', 'geometry', 'pop10', 'nhblk10']]

local_centralization(df_mp, 'nhblk10', 'pop10', k_neigh = 3)

array([ 3.65372749e-02,  1.41502794e-01,  1.04626258e-02,  3.53188492e-02,
        2.93904352e-01,  2.28845715e-02, -2.79010930e-01, -6.53334359e-01,
        2.17502782e-01,  1.24843810e-01,  8.69537850e-02,  1.54611854e-01,
        2.29660268e-02,  4.13547144e-02,  8.38279877e-02,  1.06812520e-01,
       -7.15215840e-02,  1.49880125e-01,  8.43092402e-02,  6.04025599e-02,
       -1.96101929e-04,  7.13599579e-02, -3.81252598e-02,  2.77017160e-01,
        5.86947927e-02,  2.03050273e-01, -1.32746498e-01,  8.79488833e-02,
        5.83509841e-02, -2.41095979e-02,  2.23828185e-02,  2.06516693e-01,
        2.22197707e-01,  1.25367861e-01,  2.78578068e-02,  5.55682557e-02,
       -3.43322463e-02, -1.49250263e-01, -5.59821817e-02, -1.60020511e-02,
       -2.80583357e-01,  1.82135083e-01,  1.07885710e-01,  2.20145674e-01,
       -1.89128946e-01, -5.37312200e-02,  1.13540309e-01,  6.55113664e-02,
       -3.01820621e-01, -2.26719886e-01, -6.13070812e-02,  4.40597619e-02,
       -7.44204361e-02,  