In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
import rasterio
from rasterio import Affine, features
from shapely.geometry import shape
from shapely.ops import cascaded_union
from math import floor, ceil, sqrt
import fiona
%matplotlib inline



In [10]:
# Function to return mean neighborhood overlap
# Still slow, not using numpy enough for speed
def overlap(shapes):
    '''shapes = []

    with fiona.open("data/{0}/{0}.geojson".format(neighborhood)) as hp:
        for f in hp:
            if shape(f['geometry']).is_valid:
                shapes.append(shape(f['geometry']))
    
    # Get count shapes to return
    count_shapes = len(shapes)'''
    
    max_shape = cascaded_union([shape(s) for s in shapes])
    minx, miny, maxx, maxy = max_shape.bounds
    dx = dy = 0.00001  # grid resolution; very small because not projected coordinates
    lenx = dx * (ceil(maxx / dx) - floor(minx / dx))
    leny = dy * (ceil(maxy / dy) - floor(miny / dy))
    nx = int(lenx / dx)
    ny = int(leny / dy)
    gt = Affine(
        dx, 0.0, dx * floor(minx / dx),
        0.0, -dy, dy * ceil(maxy / dy))
    pa = np.zeros((ny, nx), 'd')
    for s in shapes:
        r = features.rasterize([s], (ny, nx), transform=gt)
        pa[r > 0] += 1
    # Mask items that are 0, get mean of those greater
    pa_mask = np.ma.masked_where(pa == 0, pa)
    return np.mean(pa_mask)

In [4]:
# List from DNAInfo
neighborhoods = [
    'bucktown',
    'garfield-park',
    'norwood-park',
    'canaryville',
    'gladstone-park',
    'mckinley-park',
    'sauganash',
    'west-beverly',
    'montclare',
    'marquette-park',
    'river-west',
    'west-pullman',
    'lakeview',
    'old-irving-park',
    'south-loop',
    'bronzeville',
    'noble-square',
    'uptown',
    'west-humboldt-park',
    'albany-park',
    'pullman',
    'burnside',
    'jefferson-park',
    'ravenswood-gardens',
    'west-lawn',
    'gold-coast',
    'mayfair',
    'chicago-lawn',
    'kenwood',
    'edison-park',
    'ukrainian-village',
    'greektown',
    'bush',
    'grand-crossing',
    'river-north',
    'oakland',
    'back-of-yards',
    'washington-park',
    'avalon-park',
    'ravenswood-manor',
    'independence-park',
    'east-garfield-park',
    'gap',
    'ashburn',
    'wrigleyville',
    'south-austin',
    'pilsen',
    'lincoln-square',
    'avondale',
    'north-lawndale',
    'auburn-gresham',
    'east-side',
    'south-deering',
    'garfield-ridge',
    'englewood',
    'tri-taylor',
    'ravenswood',
    'bridgeport',
    'logan-square',
    'chinatown',
    'old-edgebrook',
    'ford-city',
    'armour-square',
    'mt-greenwood',
    'beverly',
    'university-village',
    'rogers-park',
    'north-park',
    'morgan-park',
    'loop',
    'humboldt-park',
    'near-west-side',
    'hyde-park',
    'north-edgebrook',
    'little-italy',
    'brainerd',
    'belmont-cragin',
    'fuller-park',
    'west-town',
    'irving-park',
    'dunning',
    'buena-park',
    'edgewater',
    'west-englewood',
    'gage-park',
    'heart-of-italy',
    'west-elsdon',
    'medical-district',
    'roseland',
    'boystown',
    'clearing',
    'andersonville',
    'east-village',
    'douglas',
    'roscoe-village',
    'south-lawndale',
    'galewood',
    'forest-glen',
    'calumet-heights',
    'south-chicago',
    'altgeld-gardens',
    'hermosa',
    'west-ridge',
    'archer-heights',
    'edgebrook',
    'streeterville',
    'hegewisch',
    'west-rogers-park',
    'pill-hill',
    'portage-park',
    'heart-of-chicago',
    'grand-boulevard',
    'wicker-park',
    'new-city',
    'lincoln-park',
    'chatham',
    'south-shore',
    'downtown',
    'woodlawn',
    'little-village',
    'washington-heights',
    'brighton-park',
    'old-town',
    'north-center',
    'jackson-highlands',
    'ohare',
    'riverdale',
    'austin',
    'midway',
    'west-loop'
]

In [None]:
neighborhood_results = []

for n in neighborhoods:
    shapes = []

    with fiona.open("data/{0}/{0}.geojson".format(n)) as hp:
        for f in hp:
            if shape(f['geometry']).is_valid:
                shapes.append(shape(f['geometry']))
    
    # Get count shapes to return
    count_shapes = len(shapes)
    
    if count_shapes == 0:
        continue
    else:
        result = {
            "neighborhood": n,
            "count": count_shapes
        }
        #count_overlap = overlap_mean(n)
        result["overlap"] = overlap(shapes)
        #result["overlap"] = count_overlap[1]
        neighborhood_results.append(result)

nb_overlap = pd.DataFrame(neighborhood_results)

nb_overlap.head()

Self-intersection at or near point -87.686438916304667 41.910271695783507
Self-intersection at or near point -87.788491453218157 41.98601286335942
Self-intersection at or near point -87.729263305664062 41.983356261006165
Too few points in geometry component at or near point -87.6424241065979 41.892278647917657
Self-intersection at or near point -87.662614948420355 41.944892143035375
Self-intersection at or near point -87.630632952110147 41.866987407744745
Self-intersection at or near point -87.656278069661781 41.965673971769824
Self-intersection at or near point -87.651886333931159 41.973543484149701
Self-intersection at or near point -87.645185495158671 41.954885875733162
Self-intersection at or near point -87.696653134388384 41.975925779834952
Self-intersection at or near point -87.703229443024071 41.965477682669018
Self-intersection at or near point -87.717500791285502 41.941382570329075
Self-intersection at or near point -87.787422365928435 41.970491745681166
Self-intersection at o

In [2]:
nb_overlap.to_csv("neighborhood_overlap.csv")

NameError: name 'nb_overlap' is not defined

In [9]:
from __future__ import division

overlaps = pd.read_csv('neighborhood_overlap.csv')
overlaps["overlap_pct"] = overlaps["overlap"] / overlaps["count"]
overlaps.drop('Unnamed: 0', axis=1, inplace=True)
overlaps.to_csv('neighborhoods_overlap.csv', index=False)

In [15]:
overlaps_count = overlaps[overlaps["count"] > 5]
overlaps_count.sort_values(by=["overlap_pct"], ascending=False, inplace=True)
print len(overlaps_count)
overlaps_count[:10]
overlaps_ten = overlaps[overlaps["count"] > 10]
overlaps_ten.sort_values(by=["overlap_pct"], ascending=False, inplace=True)
print len(overlaps_ten)
overlaps_ten[:10]

85
60


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,count,neighborhood,overlap,overlap_pct
11,20,old-irving-park,9.999639,0.499982
34,13,ravenswood-manor,5.714806,0.4396
44,11,east-side,4.797347,0.436122
46,14,garfield-ridge,6.056305,0.432593
104,18,little-village,7.606654,0.422592
88,30,west-ridge,11.272721,0.375757
90,14,edgebrook,5.097225,0.364088
110,11,austin,3.920699,0.356427
70,12,buena-park,4.147292,0.345608
5,16,sauganash,4.939765,0.308735
