# Measure contextualised characters

date: 2020-03-07 Amsterdam Data

In [1]:
import geopandas as gpd
import momepy as mm
from tqdm import tqdm
from momepy import limit_range
import numpy as np
import pandas as pd
from pysal.explore.inequality.theil import Theil
import libpysal
import scipy as sp

You can install them with  `pip install urbanaccess pandana` or `conda install -c udst pandana urbanaccess`
  "You need pandana and urbanaccess to work with segregation's network module\n"
  from .sqlite import head_to_sql, start_sql


In [2]:
gdf = pd.read_csv('files/AMS/primary.csv', index_col=0)

In [3]:
spatial_weights = libpysal.io.open('files/AMS/AMSqueen3.gal', 'r').read()
spatial_weights.neighbors = {int(k): [int(i) for i in v] for k, v in spatial_weights.neighbors.items()}

 There are 148 disconnected components.


In [4]:
gdf.set_index('uID', inplace=True)

In [5]:
gdf

Unnamed: 0_level_0,stcOri,sdcLAL,sdcAre,sscCCo,sscERI,sicCAR,sicFAR,mtcWNe,mdcAre,licGDe,...,ldsCDL,xcnSCl,mtdMDi,lddNDe,linWID,lddRea,lddARe,sddAre,midRea,midAre
uID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,27.510060,216.792883,28760.794790,0.779150,1.081927,0.004502,0.004502,0.001577,56629.313623,0.003862,...,39.617021,0.000000,1292.397204,0.000000,0.000000,33.0,558410.517442,56629.313623,4.0,116032.091487
1,27.569911,214.663688,27868.518833,0.770029,1.080976,0.003202,0.003202,0.001599,56629.313623,0.003862,...,39.617021,0.000000,1292.397204,0.000000,0.000000,33.0,558410.517442,56629.313623,4.0,116032.091487
2,26.780955,142.946654,4725.131782,0.294426,0.944924,0.063007,0.063007,0.008920,43845.834221,0.025565,...,3334.105306,0.000000,1566.836623,0.024562,0.049753,96.0,853610.178449,340966.253401,44.0,458284.275492
3,38.230240,184.595244,15683.586526,0.586023,1.018656,0.052321,0.052321,0.006100,35810.564680,0.042358,...,3334.105306,0.000000,1566.836623,0.024562,0.049753,96.0,853610.178449,340966.253401,44.0,458284.275492
4,9.362836,151.244275,6032.094361,0.335753,0.822057,0.082069,0.082069,0.015487,84209.214520,0.025565,...,3334.105306,0.000000,1566.836623,0.024562,0.049753,96.0,853610.178449,340966.253401,44.0,458284.275492
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19452,32.905625,122.129802,678.408432,0.057911,0.799167,0.117692,0.353076,0.011775,3549.480638,0.478379,...,36.094747,0.333333,90.941175,0.055412,0.109620,77.0,197239.194862,10262.154015,18.0,49180.208879
19453,32.907696,119.689689,1907.120365,0.169502,0.895196,0.041436,0.124307,0.015075,6763.119547,1.016754,...,36.094747,0.333333,90.941175,0.055412,0.109620,77.0,197239.194862,10262.154015,18.0,49180.208879
19454,40.925191,26.664661,178.598830,0.319828,0.983183,0.285785,0.857355,0.079462,3261.843269,0.682345,...,54.774919,0.035714,94.820452,0.066672,0.122414,120.0,246336.056677,11429.168476,28.0,53796.131602
19455,42.884230,42.515494,888.019281,0.625515,1.006693,0.332357,1.329426,0.050626,7891.125350,0.682345,...,36.094747,0.333333,90.941175,0.055412,0.109620,77.0,197239.194862,10262.154015,18.0,49180.208879


In [6]:
characters = gdf.columns

In [7]:
means = {}
ranges = {}
theils = {}

for ch in characters:
    means[ch] = []
    ranges[ch] = []
    theils[ch] = []

In [8]:
unique_id = 'uID'

In [9]:
gdf = gdf.fillna(0)

In [10]:
gdf['lcdMes'] = gdf.apply(
            lambda row: row.lcdMes if row.lcdMes >= 0 else 0,
            axis=1,
        )

In [11]:
def theil(y):
    y = np.array(y)
    n = len(y)
    plus = y + np.finfo('float').tiny * (y == 0)  # can't have 0 values
    yt = plus.sum(axis=0)
    s = plus / (yt * 1.0)
    lns = np.log(n * s)
    slns = s * lns
    t = sum(slns)
    return t


for index, row in tqdm(gdf.iterrows(), total=gdf.shape[0]):
    neighbours = spatial_weights.neighbors[index].copy()
    neighbours.append(index)
    
    for ch in characters:
        values_list = gdf.loc[neighbours][ch]        
        idec = limit_range(values_list.tolist(), rng=(10, 90))
        iquar = limit_range(values_list.tolist(), rng=(25, 75))
        
        means[ch].append(np.mean(iquar))
        ranges[ch].append(sp.stats.iqr(values_list, rng=(25, 75)))
        theils[ch].append(theil(idec))

100%|██████████| 252385/252385 [7:49:41<00:00,  8.96it/s]  


In [12]:
for ch in characters:
    gdf[ch + '_meanIQ3'] = means[ch]
    gdf[ch + '_rangeIQ3'] = ranges[ch]
    gdf[ch + '_theilID3'] = theils[ch]

In [13]:
pat = [x for x in gdf.columns if '_' in x]

In [14]:
pat

['stcOri_meanIQ3',
 'stcOri_rangeIQ3',
 'stcOri_theilID3',
 'sdcLAL_meanIQ3',
 'sdcLAL_rangeIQ3',
 'sdcLAL_theilID3',
 'sdcAre_meanIQ3',
 'sdcAre_rangeIQ3',
 'sdcAre_theilID3',
 'sscCCo_meanIQ3',
 'sscCCo_rangeIQ3',
 'sscCCo_theilID3',
 'sscERI_meanIQ3',
 'sscERI_rangeIQ3',
 'sscERI_theilID3',
 'sicCAR_meanIQ3',
 'sicCAR_rangeIQ3',
 'sicCAR_theilID3',
 'sicFAR_meanIQ3',
 'sicFAR_rangeIQ3',
 'sicFAR_theilID3',
 'mtcWNe_meanIQ3',
 'mtcWNe_rangeIQ3',
 'mtcWNe_theilID3',
 'mdcAre_meanIQ3',
 'mdcAre_rangeIQ3',
 'mdcAre_theilID3',
 'licGDe_meanIQ3',
 'licGDe_rangeIQ3',
 'licGDe_theilID3',
 'ltcWRB_meanIQ3',
 'ltcWRB_rangeIQ3',
 'ltcWRB_theilID3',
 'stcSAl_meanIQ3',
 'stcSAl_rangeIQ3',
 'stcSAl_theilID3',
 'sdbHei_meanIQ3',
 'sdbHei_rangeIQ3',
 'sdbHei_theilID3',
 'sdbAre_meanIQ3',
 'sdbAre_rangeIQ3',
 'sdbAre_theilID3',
 'sdbVol_meanIQ3',
 'sdbVol_rangeIQ3',
 'sdbVol_theilID3',
 'sdbPer_meanIQ3',
 'sdbPer_rangeIQ3',
 'sdbPer_theilID3',
 'sdbCoA_meanIQ3',
 'sdbCoA_rangeIQ3',
 'sdbCoA_theilID3

In [15]:
gdf[pat].reset_index().to_csv('files/AMS/context_data.csv')

In [16]:
simpson = pd.read_csv('files/AMS/simpson.csv', index_col=0)

In [17]:
simpson

Unnamed: 0_level_0,sdcLAL_simpson,sdcAre_simpson,sicFAR_simpson,mdcAre_simpson,licGDe_simpson,ltcWRB_simpson,stcSAl_simpson,sdbHei_simpson,sdbAre_simpson,sdbVol_simpson,...,ldsCDL_simpson,xcnSCl_simpson,mtdMDi_simpson,lddNDe_simpson,linWID_simpson,lddRea_simpson,lddARe_simpson,sddAre_simpson,midRea_simpson,midAre_simpson
uID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.000000,1.000000,1.000000,1.000000,1.000000,1.0,1.000000,1.000000,1.000000,1.000000,...,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
1,1.000000,1.000000,1.000000,1.000000,1.000000,1.0,1.000000,1.000000,1.000000,1.000000,...,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
2,0.387755,0.346939,1.000000,0.551020,1.000000,1.0,0.428571,1.000000,0.510204,0.591837,...,0.591837,1.000000,1.000000,1.000000,1.000000,0.591837,0.591837,0.591837,0.591837,0.591837
3,0.256198,0.272727,1.000000,0.537190,1.000000,1.0,0.256198,1.000000,0.504132,0.834711,...,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
4,0.387755,0.346939,1.000000,0.551020,1.000000,1.0,0.428571,1.000000,0.510204,0.591837,...,0.591837,1.000000,1.000000,1.000000,1.000000,0.591837,0.591837,0.591837,0.591837,0.591837
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19452,0.342561,0.640138,0.695502,0.508651,0.515571,1.0,0.418685,0.889273,0.792388,0.709343,...,1.000000,0.404844,0.640138,1.000000,1.000000,0.584775,0.508651,0.584775,0.889273,0.889273
19453,0.304688,0.539062,0.593750,0.468750,0.570312,1.0,0.390625,0.773438,0.679688,0.601562,...,1.000000,0.367188,0.695312,1.000000,1.000000,0.531250,0.460938,0.531250,0.781250,0.882812
19454,0.327977,0.586957,0.460302,0.380907,0.534026,1.0,0.599244,0.762760,0.639887,0.603970,...,0.547259,0.386578,0.354442,0.957467,0.916824,0.327977,0.444234,0.500945,0.414934,0.519849
19455,0.327977,0.586957,0.460302,0.380907,0.534026,1.0,0.599244,0.762760,0.639887,0.603970,...,0.547259,0.386578,0.354442,0.957467,0.916824,0.327977,0.444234,0.500945,0.414934,0.519849


In [21]:
patterned = gdf[pat].merge(simpson, left_index=True, right_index=True)

In [22]:
patterned

Unnamed: 0_level_0,stcOri_meanIQ3,stcOri_rangeIQ3,stcOri_theilID3,sdcLAL_meanIQ3,sdcLAL_rangeIQ3,sdcLAL_theilID3,sdcAre_meanIQ3,sdcAre_rangeIQ3,sdcAre_theilID3,sscCCo_meanIQ3,...,ldsCDL_simpson,xcnSCl_simpson,mtdMDi_simpson,lddNDe_simpson,linWID_simpson,lddRea_simpson,lddARe_simpson,sddAre_simpson,midRea_simpson,midAre_simpson
uID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,27.539986,0.029926,5.903783e-07,215.728286,1.064598,0.000012,28314.656812,446.137978,0.000124,0.774589,...,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
1,27.539986,0.029926,5.903783e-07,215.728286,1.064598,0.000012,28314.656812,446.137978,0.000124,0.774589,...,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
2,27.376524,15.287430,6.786512e-02,165.119895,60.349187,0.013950,10780.086406,7414.403564,0.054318,0.481731,...,0.591837,1.000000,1.000000,1.000000,1.000000,0.591837,0.591837,0.591837,0.591837,0.591837
3,31.751999,14.433147,5.272121e-02,116.010516,90.511917,0.112148,5078.970289,8830.542588,0.392929,0.495462,...,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
4,27.376524,15.287430,6.786512e-02,165.119895,60.349187,0.013950,10780.086406,7414.403564,0.054318,0.481731,...,0.591837,1.000000,1.000000,1.000000,1.000000,0.591837,0.591837,0.591837,0.591837,0.591837
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19452,37.493494,8.431445,6.527028e-03,80.571042,79.614308,0.097360,646.907689,539.572227,0.176220,0.216770,...,1.000000,0.404844,0.640138,1.000000,1.000000,0.584775,0.508651,0.584775,0.889273,0.889273
19453,37.011002,8.552335,6.648164e-03,81.405301,81.483123,0.101711,811.882093,1471.286550,0.329740,0.276944,...,1.000000,0.367188,0.695312,1.000000,1.000000,0.531250,0.460938,0.531250,0.781250,0.882812
19454,41.082484,2.350902,3.195354e-03,62.124290,84.110704,0.138473,611.970826,1352.082203,0.496895,0.274728,...,0.547259,0.386578,0.354442,0.957467,0.916824,0.327977,0.444234,0.500945,0.414934,0.519849
19455,41.082484,2.350902,3.195354e-03,62.124290,84.110704,0.138473,611.970826,1352.082203,0.496895,0.274728,...,0.547259,0.386578,0.354442,0.957467,0.916824,0.327977,0.444234,0.500945,0.414934,0.519849


In [25]:
list(patterned.columns)

['stcOri_meanIQ3',
 'stcOri_rangeIQ3',
 'stcOri_theilID3',
 'sdcLAL_meanIQ3',
 'sdcLAL_rangeIQ3',
 'sdcLAL_theilID3',
 'sdcAre_meanIQ3',
 'sdcAre_rangeIQ3',
 'sdcAre_theilID3',
 'sscCCo_meanIQ3',
 'sscCCo_rangeIQ3',
 'sscCCo_theilID3',
 'sscERI_meanIQ3',
 'sscERI_rangeIQ3',
 'sscERI_theilID3',
 'sicCAR_meanIQ3',
 'sicCAR_rangeIQ3',
 'sicCAR_theilID3',
 'sicFAR_meanIQ3',
 'sicFAR_rangeIQ3',
 'sicFAR_theilID3',
 'mtcWNe_meanIQ3',
 'mtcWNe_rangeIQ3',
 'mtcWNe_theilID3',
 'mdcAre_meanIQ3',
 'mdcAre_rangeIQ3',
 'mdcAre_theilID3',
 'licGDe_meanIQ3',
 'licGDe_rangeIQ3',
 'licGDe_theilID3',
 'ltcWRB_meanIQ3',
 'ltcWRB_rangeIQ3',
 'ltcWRB_theilID3',
 'stcSAl_meanIQ3',
 'stcSAl_rangeIQ3',
 'stcSAl_theilID3',
 'sdbHei_meanIQ3',
 'sdbHei_rangeIQ3',
 'sdbHei_theilID3',
 'sdbAre_meanIQ3',
 'sdbAre_rangeIQ3',
 'sdbAre_theilID3',
 'sdbVol_meanIQ3',
 'sdbVol_rangeIQ3',
 'sdbVol_theilID3',
 'sdbPer_meanIQ3',
 'sdbPer_rangeIQ3',
 'sdbPer_theilID3',
 'sdbCoA_meanIQ3',
 'sdbCoA_rangeIQ3',
 'sdbCoA_theilID3

In [29]:
patterned = patterned.drop(columns=['ssbERI_simpson.1'])

In [30]:
patterned.shape

(252385, 296)

In [31]:
patterned.to_csv('files/AMS/context_data.csv')