In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
import pysal
%matplotlib inline

In [3]:
humboldt_park = gpd.read_file("data/humboldt-park/humboldt-park.geojson")
humboldt_park["area"] = humboldt_park["geometry"].area
humboldt_park.head()

Unnamed: 0,geometry,neighborhood,area
0,POLYGON ((-87.68707752227783 41.91032572790956...,humboldt-park,0.00032
1,POLYGON ((-87.74608612060547 41.90968698030152...,humboldt-park,0.001431
2,POLYGON ((-87.68677711486815 41.89582458304622...,humboldt-park,0.000345
3,"POLYGON ((-87.6873779296875 41.91415807933598,...",humboldt-park,0.00072
4,POLYGON ((-87.68694877624512 41.91805406740491...,humboldt-park,0.000574


In [4]:
hp_intersects = humboldt_park["geometry"][5].intersection(humboldt_park["geometry"][1])
print hp_intersects

POLYGON ((-87.68680779355323 41.89045345823286, -87.72600173950195 41.89001042401827, -87.72650729163038 41.90989937774629, -87.68720626831055 41.91032572790956, -87.68680779355323 41.89045345823286))


In [5]:
def get_intersections(gdf, idx):
    i_gdf = []
    for i, val in enumerate(gdf["geometry"]):
        if not idx == i and gdf["geometry"][i].is_valid:
            inter = gdf["geometry"][idx].intersection(gdf["geometry"][i])
            inter_dict = {
                "orig": idx,
                "inter": i,
                "geometry": inter,
                "area": inter.area
            }
            i_gdf.append(inter_dict)
    return pd.DataFrame(i_gdf)

In [6]:
hp_test_intersect = get_intersections(humboldt_park, 5)
hp_test_intersect[:10]

Self-intersection at or near point -87.71170156851403 41.904652090768906


Unnamed: 0,area,geometry,inter,orig
0,0.000317,POLYGON ((-87.68715523255919 41.91032510606678...,0,5
1,0.00078,POLYGON ((-87.68680779355323 41.89045345823286...,1,5
2,0.000344,"POLYGON ((-87.7054415663806 41.89024282879227,...",2,5
3,0.000719,"POLYGON ((-87.72660255432127 41.9136471124402,...",3,5
4,0.000438,POLYGON ((-87.68729209899902 41.91409420869771...,4,5
5,0.00057,POLYGON ((-87.72618416085484 41.89718707236523...,6,5
6,0.000603,POLYGON ((-87.72660137829735 41.91364712581566...,7,5
7,0.000523,POLYGON ((-87.68729209899902 41.91409420869771...,8,5
8,0.000208,POLYGON ((-87.70668983459473 41.89831620347219...,9,5
9,0.000314,POLYGON ((-87.68718001480042 41.91100757293528...,10,5


In [7]:
humboldt_park["valid"] = humboldt_park["geometry"].is_valid
#humboldt_park["valid"] == False
humboldt_park[:10]

Self-intersection at or near point -87.71170156851403 41.904652090768906


Unnamed: 0,geometry,neighborhood,area,valid
0,POLYGON ((-87.68707752227783 41.91032572790956...,humboldt-park,0.00032,True
1,POLYGON ((-87.74608612060547 41.90968698030152...,humboldt-park,0.001431,True
2,POLYGON ((-87.68677711486815 41.89582458304622...,humboldt-park,0.000345,True
3,"POLYGON ((-87.6873779296875 41.91415807933598,...",humboldt-park,0.00072,True
4,POLYGON ((-87.68694877624512 41.91805406740491...,humboldt-park,0.000574,True
5,POLYGON ((-87.68729209899902 41.91409420869771...,humboldt-park,0.000933,True
6,POLYGON ((-87.68724918365479 41.91032572790956...,humboldt-park,0.000571,True
7,POLYGON ((-87.68733501434326 41.91397445107869...,humboldt-park,0.000604,True
8,POLYGON ((-87.71556713181781 41.89570153118875...,humboldt-park,0.000649,True
9,POLYGON ((-87.70668983459473 41.89831620347219...,humboldt-park,0.000208,True


In [8]:
from __future__ import division

def get_mean_overlap(gs, idx):
    overlap_list = []
    
    if not gs["geometry"][idx].is_valid:
        return 0
    
    for i, val in gs.iterrows():
        if not idx == i and gs["geometry"][i].is_valid:
            intersection = gs["geometry"][idx].intersection(gs["geometry"][i])
            overlap_pct = intersection.area / gs["geometry"][idx].area
            overlap_list.append(overlap_pct)
    
    return sum(overlap_list) / len(overlap_list)
    

In [9]:
dna_info = gpd.read_file("dna_neighborhoods.geojson")
dna_info.head()

Unnamed: 0,geometry,neighborhood
0,POLYGON ((-87.68754959106445 41.92488743920406...,bucktown
1,POLYGON ((-87.68772125244141 41.92693092226143...,bucktown
2,POLYGON ((-87.68763542175293 41.92725021057667...,bucktown
3,POLYGON ((-87.66776561737061 41.91074091042736...,bucktown
4,"POLYGON ((-87.65356063842772 41.9112199638231,...",bucktown


In [10]:
dna_info["valid"] = dna_info["geometry"].is_valid
dna_info = dna_info[dna_info["valid"] == True]
dna_info[:25]

Self-intersection at or near point -87.686438916304667 41.910271695783507
Self-intersection at or near point -87.788491453218157 41.98601286335942
Self-intersection at or near point -87.729263305664062 41.983356261006165
Too few points in geometry component at or near point -87.6424241065979 41.892278647917657
Self-intersection at or near point -87.662614948420355 41.944892143035375
Self-intersection at or near point -87.630632952110147 41.866987407744745
Self-intersection at or near point -87.656278069661781 41.965673971769824
Self-intersection at or near point -87.651886333931159 41.973543484149701
Self-intersection at or near point -87.645185495158671 41.954885875733162
Self-intersection at or near point -87.696653134388384 41.975925779834952
Self-intersection at or near point -87.703229443024071 41.965477682669018
Self-intersection at or near point -87.717500791285502 41.941382570329075
Self-intersection at or near point -87.787422365928435 41.970491745681166
Self-intersection at o

Unnamed: 0,geometry,neighborhood,valid
0,POLYGON ((-87.68754959106445 41.92488743920406...,bucktown,True
1,POLYGON ((-87.68772125244141 41.92693092226143...,bucktown,True
2,POLYGON ((-87.68763542175293 41.92725021057667...,bucktown,True
3,POLYGON ((-87.66776561737061 41.91074091042736...,bucktown,True
4,"POLYGON ((-87.65356063842772 41.9112199638231,...",bucktown,True
5,"POLYGON ((-87.67995357513428 41.9103576651222,...",bucktown,True
6,POLYGON ((-87.67759323120116 41.91026185343638...,bucktown,True
7,POLYGON ((-87.66780853271484 41.91486065213846...,bucktown,True
8,POLYGON ((-87.68724918365479 41.91029379068096...,bucktown,True
9,POLYGON ((-87.66772270202635 41.91064509931678...,bucktown,True


In [5]:
dna_info["overlap"] = 0


for neighb in dna_info["neighborhood"].values:
    gs_neighb = dna_info[dna_info["neighborhood"] == neighb]
    for idx, row in gs_neighb.iterrows():
        dna_info["overlap"][idx] = get_mean_overlap(gs_neighb, idx)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


KeyboardInterrupt: 

In [None]:
list_neighborhoods = []

for val in dna_info["neighborhood"].values:
    list_neighborhoods.append(dna_info[dna_info["neighborhood"] == val])

for n in list_neighborhoods:
    n["overlap"] = 0
    for idx, row in n.iterrows():
        n["overlap"][idx] = get_mean_overlap(n, idx)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
