Import libraries

In [3]:
import requests
import pandas as pd
import numpy as np


Scrape NYC Neighborhood mapping from NY State Health website. These are not quite the mappings a realtor would use. But should be good enough

In [13]:
url = 'https://www.health.ny.gov/statistics/cancer/registry/appendix/neighborhoods.htm'

df = pd.read_html(url)[0]

Do preprocessing to get it in a format that I can use

In [14]:
df = df.loc[1:]

df['shift'] = False
df.loc[df.iloc[:,2].isnull(), 'shift'] = True

df.loc[df.loc[:,'shift'], 2] = df.loc[df.loc[:,'shift'],1]
df.loc[df.loc[:,'shift'], 1] = df.loc[df.loc[:,'shift'],0]
df.loc[df['shift'] == True, 0 ] = None

df = df.iloc[:,0:3]

df.columns = ['Borough', 'Neighborhood', 'Zips']

df.Borough = df.Borough.fillna(method='ffill')
df.head()


Unnamed: 0,Borough,Neighborhood,Zips
1,Bronx,Central Bronx,"10453, 10457, 10460"
2,Bronx,Bronx Park and Fordham,"10458, 10467, 10468"
3,Bronx,High Bridge and Morrisania,"10451, 10452, 10456"
4,Bronx,Hunts Point and Mott Haven,"10454, 10455, 10459, 10474"
5,Bronx,Kingsbridge and Riverdale,"10463, 10471"


Zip codes are there as CSVs. Convert them to a list

In [15]:
df.Zips = df.Zips.str.split(',')

In [16]:
df.head()

Unnamed: 0,Borough,Neighborhood,Zips
1,Bronx,Central Bronx,"[10453, 10457, 10460]"
2,Bronx,Bronx Park and Fordham,"[10458, 10467, 10468]"
3,Bronx,High Bridge and Morrisania,"[10451, 10452, 10456]"
4,Bronx,Hunts Point and Mott Haven,"[10454, 10455, 10459, 10474]"
5,Bronx,Kingsbridge and Riverdale,"[10463, 10471]"


Manipulate data to get it in the format: Borough, Neighborhood, ZipCode

In [20]:
tempZipDF = pd.concat([pd.DataFrame({'ZipCode':v}, index=np.repeat(k,len(v))) 
            for k,v in df.Zips.to_dict().items()])   
tempZipDF

neighborhoodsDF = df

neighborhoodMappingsDF = (
    neighborhoodsDF.merge(tempZipDF, left_index=True, right_index=True)
    .loc[:,['Borough', 'Neighborhood', 'ZipCode']]
)

neighborhoodMappingsDF.head()

Unnamed: 0,Borough,Neighborhood,ZipCode
1,Bronx,Central Bronx,10453
1,Bronx,Central Bronx,10457
1,Bronx,Central Bronx,10460
2,Bronx,Bronx Park and Fordham,10458
2,Bronx,Bronx Park and Fordham,10467


In [21]:
neighborhoodMappingsDF.to_pickle('neighborhoodMapp')

Unnamed: 0,Borough,Neighborhood,ZipCode
1,Bronx,Central Bronx,10453
1,Bronx,Central Bronx,10457
1,Bronx,Central Bronx,10460
2,Bronx,Bronx Park and Fordham,10458
2,Bronx,Bronx Park and Fordham,10467
2,Bronx,Bronx Park and Fordham,10468
3,Bronx,High Bridge and Morrisania,10451
3,Bronx,High Bridge and Morrisania,10452
3,Bronx,High Bridge and Morrisania,10456
4,Bronx,Hunts Point and Mott Haven,10454
