In [1]:
import intersect
import pandas as pd
import geopandas as gpd
import fiona.crs
from shapely.geometry.polygon import Polygon
from shapely.geometry.multipolygon import MultiPolygon

In [2]:
EPSG = 2263

CRS = {
    'proj': 'latlong',
    'init': 'epsg:{:d}'.format(EPSG)
}

In [20]:
def people_df(path):
    df = pd.read_csv(path)
    df['geometry'] = df.apply(lambda x: intersect.to_point(x), axis=1)
    points = gpd.GeoDataFrame(df, geometry='geometry')
    points.crs = CRS
    return points

In [21]:
people_df("/Users/asiega/Desktop/CVHpeople.csv").head(2)

Unnamed: 0,Internal Contact ID,Latitude,Longitude,geometry
0,5,40.687482,-73.963384,POINT (-73.96338399999999 40.687482)
1,226,40.769909,-73.992111,POINT (-73.99211099999999 40.76990900000001)


In [18]:
people.head(2)

Unnamed: 0,Internal Contact ID,Latitude,Longitude,geometry
0,5,40.687482,-73.963384,POINT (-73.96338399999999 40.687482)
1,226,40.769909,-73.992111,POINT (-73.99211099999999 40.76990900000001)


In [5]:
def explode(indata):
    """Break down multipolygons in geojson to single polygons per row"""
    indf = gpd.GeoDataFrame.from_file(indata)
    outdf = gpd.GeoDataFrame(columns=indf.columns)
    for idx, row in indf.iterrows():
        if type(row.geometry) == Polygon:
            outdf = outdf.append(row,ignore_index=True)
        if type(row.geometry) == MultiPolygon:
            multdf = gpd.GeoDataFrame(columns=indf.columns)
            recs = len(row.geometry)
            multdf = multdf.append([row]*recs,ignore_index=True)
            for geom in range(recs):
                multdf.loc[geom,'geometry'] = row.geometry[geom]
            outdf = outdf.append(multdf,ignore_index=True)
    return outdf

# raw_shapes = explode("/Users/asiega/Desktop/NYCHA.geojson")

In [6]:
def shapes_df(path):
    raw_shapes = explode(path)
    raw_shapes.crs = fiona.crs.from_epsg(EPSG)
    zones = raw_shapes.to_crs(CRS)
    return zones

In [7]:
shapes = shapes_df("/Users/asiega/Desktop/NYCHA.geojson")

In [37]:
people["geometry"].sort_values().head()

3870     POINT (-73.89386 40.82770900000001)
3871          POINT (-73.8682632 40.8273939)
3872    POINT (-73.93904000000001 40.818048)
3873            POINT (-73.854483 40.879013)
3874    POINT (-73.91856949999999 40.696593)
Name: geometry, dtype: object

In [9]:
shapes["geometry"].head()

0    POLYGON ((-73.93559183347882 40.65949014265728...
1    POLYGON ((-73.77692207729832 40.66568721649431...
2    POLYGON ((-73.79065421549551 40.66843585296066...
3    POLYGON ((-73.76820155521375 40.66907900074256...
4    POLYGON ((-73.74246999481437 40.67015238507819...
Name: geometry, dtype: object

In [None]:
len(shapes)

In [54]:
def merge_within(shapes, people):
    merged = gpd.sjoin(people, shapes, how='left', op='intersects')
    del merged['geometry']
    del merged['index_right']
    return merged

In [55]:
merged_within = merge_within(shapes, people)

In [57]:
merged_within

Unnamed: 0,Internal Contact ID,Latitude,Longitude,BOROUGH,CUR_UNIT11,DEVELOPMEN,NONRES_BLD,RES_BLDG,TDS_NUM,TOT_POP11
0,5,40.687482,-73.963384,,,,,,,
1,226,40.769909,-73.992111,,,,,,,
2,228,40.746794,-73.982509,,,,,,,
3,230,40.823687,-73.868344,,,,,,,
4,232,40.760444,-73.973270,,,,,,,
5,242,40.747270,-73.980064,,,,,,,
6,244,40.732253,-73.987410,,,,,,,
7,258,40.744691,-73.996780,,,,,,,
8,260,40.692248,-73.989345,,,,,,,
9,270,40.772990,-73.982013,,,,,,,


In [None]:
merged_within.to_csv("/Users/asiega/Desktop/preliminary_merge.csv")

In [None]:
## TEST SAMPLE DATA

In [22]:
json_shapes = shapes_df("sample_data/shapes/nycha.json")

In [23]:
json_shapes.head()

Unnamed: 0,BoroCD,Shape_Area,Shape_Leng,geometry
0,311.0,103175900.0,51566.989012,POLYGON ((-73.97299433938896 40.60881414180223...
1,412.0,267333600.0,65933.851319,POLYGON ((-73.80168266553365 40.66632235257088...
2,481.0,47503130.0,53795.119097,POLYGON ((-73.83591564875908 40.74343089339249...
3,314.0,82175670.0,49291.791191,POLYGON ((-73.95630035122711 40.65504828183112...
4,313.0,88149530.0,65746.939737,POLYGON ((-73.98372152615246 40.59582107821704...


In [24]:
sample_people = people_df("sample_data/people/cvh_people.csv")

In [25]:
sample_people.head()

Unnamed: 0,Internal Contact ID,Latitude,Longitude,geometry
0,5,40.687482,-73.963384,POINT (-73.96338399999999 40.687482)
1,226,40.769909,-73.992111,POINT (-73.99211099999999 40.76990900000001)
2,228,40.746794,-73.982509,POINT (-73.9825089 40.7467938)
3,230,40.823687,-73.868344,POINT (-73.86834350000001 40.8236871)
4,232,40.760444,-73.97327,POINT (-73.97327 40.760444)


In [29]:
sample_results = merge_within(json_shapes, sample_people)
sample_r = intersect.merge(json_shapes, sample_people)

In [30]:
sample_results.head()

Unnamed: 0,Internal Contact ID,Latitude,Longitude,BoroCD,Shape_Area,Shape_Leng
0,5,40.687482,-73.963384,302.0,79329620.0,74177.748868
1,226,40.769909,-73.992111,104.0,49291840.0,67935.227103
2,228,40.746794,-73.982509,105.0,43790500.0,35291.343877
3,230,40.823687,-73.868344,209.0,114265500.0,62240.858104
4,232,40.760444,-73.97327,105.0,43790500.0,35291.343877


In [31]:
sample_r.head()

Unnamed: 0,Internal Contact ID,Latitude,Longitude,BoroCD,Shape_Area,Shape_Leng
0,5,40.687482,-73.963384,302.0,79329620.0,74177.748868
1,226,40.769909,-73.992111,104.0,49291840.0,67935.227103
2,228,40.746794,-73.982509,105.0,43790500.0,35291.343877
3,230,40.823687,-73.868344,209.0,114265500.0,62240.858104
4,232,40.760444,-73.97327,105.0,43790500.0,35291.343877


In [None]:
raw_shapes.crs = CRS
zones = raw_shapes.to_crs(people.crs)

In [None]:
zones.head()

In [None]:
places["geometry"].head()
# if you explore here, you notice that with to_crs, every polygon value becomes "inf"

In [None]:
# this runs fine, but...
places.iloc[0]

In [None]:
# this kills my kernel (Py root)
places.iloc[0]["geometry"]

In [None]:
for geo in places["geometry"]:
    print geo[0]
    print "\n"
    print geo.bounds[0]
    break

### Compare json & geojson inputs

In [None]:
raw_json = gpd.read_file("sample_data/shapes/nycha.json")

In [None]:
raw_json["geometry"].head()

In [None]:
raw_places["geometry"].head()

In [None]:
# multipolygon vs regular polygon

raw_places["geometry"]

In [None]:
EPSG = 2263

CRS = {
    'proj': 'latlong',
    'init': 'epsg:{:d}'.format(EPSG)
}

zones = exploded_df.to_crs(fiona.crs.from_epsg(EPSG))
zones_crs = zones.to_crs(CRS)

In [None]:
zones_crs.head()

In [None]:
exp_head = exploded_df.head(50)

merged_df = intersect.merge(exp_head, people)