In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [11]:
import os
import fiona
import pyproj
from shapely import geometry
import sys

script_dir = '/Users/mattiesanseverino/Code/the-landfill/src/notebooks/'


shapeSets = {
    'sffind': ("sffind_neighborhoods/SFFind_Neighborhoods.shp",
               "name")}


def getSFNeighNames(locs, shapes=None, projection=None):
    """
    Given an array of lon, lat tuples [(lon, lat), (lon, lat)...]
    we return the neighborhood name in SF
    """
    if projection is None:
        projection = defProjection

    if shapes is None:
        shapes = shapeData

    places = []

    for locx, locy in locs:
        locx, locy = projection(locx, locy)

        point = geometry.Point(locx, locy)

        for shape, bbox, place in shapes:
            # first, check if inside bounding box
            if locx < bbox[0] or locx > bbox[2]:
                continue

            if locy < bbox[1] or locy > bbox[3]:
                continue

            # Alternative method (but slower)
            # if point.within(shape):
            if shape.contains(point):
                places.append(place)
                break
        else:
            places.append(None)

    return places


def getShapeData(dataset="sffind"):
    """
    Initialize the shape data used by main method
    to determine neighborhoods.
    Data courtesy of sfgov.
    """
    shpfile = os.path.join(script_dir,
                           shapeSets[dataset][0])
    shapes = [shape for shape in fiona.open(shpfile)]

    shapeInfo = []
    for shaperec in shapes:
        place = shaperec['properties'][shapeSets[dataset][1]]
        shape = geometry.shape(shaperec['geometry'])
        bbox = shape.bounds
        shapeInfo.append((shape, bbox, place))

    # data for projecting lon, lat to
    # realtor and planning shapefile coords
    # +proj=lcc
    # +lat_1=37.06666666666667
    # +lat_2=38.43333333333333
    # +lat_0=36.5
    # +lon_0=-120.5
    # +x_0=2000000
    # +y_0=500000.0000000002
    # +ellps=GRS80
    # +datum=NAD83
    # +to_meter=0.3048006096012192

    if dataset == 'sffind':
        projection = lambda x, y: (x, y)
    else:
        proj = pyproj.Proj(proj='lcc',
                           lat_1=37.06666666666667,
                           lat_2=38.43333333333333,
                           lat_0=36.5,
                           lon_0=-120.5,
                           x_0=2000000,
                           y_0=500000.0000000002,
                           ellps='GRS80',
                           datum='NAD83')

        # meter to feet conversion
        mtof = 3.2808333333333337
        projection = lambda x, y, proj=proj, mtof=mtof:\
            [mtof*z for z in proj(x, y)]

    return shapeInfo, projection


shapeData, defProjection = getShapeData('sffind')

if __name__ == '__main__':
    from pprint import pprint as pp
    import time

    def test_getSFNeigh():
        locs = [(-122.424612993055, 37.8014488257836),
                (-122.420120319211, 37.7877570602182),
                (-122.42025048261,  37.7800745746105),
                (-122.390718076188, 37.7385560584619),
                (-122.433084166809, 37.7851499161314),
                (-122.422727873548, 37.7503729275448),
                (-122.401786, 37.782562),
                (-122.391873, 37.783105),
                ]

        expected = [u'Marina',
                    u'Lower Nob Hill',
                    u'Civic Center',
                    u'Bayview',
                    u'Lower Pacific Heights',
                    u'Dolores Heights',
                    u'South of Market',
                    u'South Beach']

        rv = getSFNeighNames(locs)
        if expected != rv:
            print("Expected: \n%s" % str(expected))
            print("Actual output: \n%s" % str(rv))
            raise Exception("Test Fail: Did not get expected result")
        return rv

    st = time.time()

    pp(test_getSFNeigh())

    print("\n\nOK. Tests passed.\nDone in %6.4fs secs" % (time.time() - st))

['Marina',
 'Lower Nob Hill',
 'Civic Center',
 'Bayview',
 'Lower Pacific Heights',
 'Dolores Heights',
 'South of Market',
 'South Beach']


OK. Tests passed.
Done in 0.0018s secs


In [3]:
df = pd.read_csv("/Users/mattiesanseverino/Downloads/parcels_1791_1922.csv", low_memory=False)

In [15]:
# Function that operates on a row
def get_neighborhood(row):
    return getSFNeighNames([[row['POINT_X'], row['POINT_Y']]])

# Apply function to each row and store result in a new column
df['neighborhood'] = df.apply(get_neighborhood, axis=1)

In [16]:
df.head()

Unnamed: 0,OBJECTID,closed_rol,property_l,parcel_num,block,lot,volume_num,use_code,use_defini,property_c,...,supervis_2,analysis_n,row_id,date_data_,time_data_,date_dat_2,time_dat_2,POINT_X,POINT_Y,neighborhood
0,109356,2017,2731 2701 TAYLOR ST0000,12002,12,2,1,COMR,Commercial Retail,C,...,3.0,North Beach,20170012002,2023-10-02 0:00:00,12:14:02.000,2023-10-04 0:00:00,09:38:21.000,-122.41587,37.807534,[Fishermans Wharf]
1,109357,2017,0000 0408 FRANCISCO ST0000,41132,41,132,1,SRES,Single Family Residential,Z,...,3.0,North Beach,20170041132,2023-10-02 0:00:00,12:14:02.000,2023-10-04 0:00:00,09:38:21.000,-122.412052,37.805038,[North Beach]
2,109358,2017,0000 0410 FRANCISCO ST0000,41133,41,133,1,SRES,Single Family Residential,Z,...,3.0,North Beach,20170041133,2023-10-02 0:00:00,12:14:02.000,2023-10-04 0:00:00,09:38:21.000,-122.412052,37.805038,[North Beach]
3,109359,2017,0440B0434 CHESTNUT ST0000,53005,53,5,1,MRES,Multi-Family Residential,A5,...,3.0,North Beach,20170053005,2023-10-02 0:00:00,12:14:02.000,2023-10-04 0:00:00,09:38:21.000,-122.410634,37.804369,[North Beach]
4,109360,2017,0000 2115 MASON ST0000,65008,65,8,1,SRES,Single Family Residential,D,...,3.0,North Beach,20170065008,2023-10-02 0:00:00,12:14:02.000,2023-10-04 0:00:00,09:38:21.000,-122.41335,37.80308,[North Beach]


In [None]:
# pull out Multi-Family Residential
# group by neighborhood and:
#### sum number of units
#### sum number of units before 1979 and after
#### divide up by lot area?