In [1]:
import os
import io
import zipfile
import requests

import numpy as np
import pandas as pd
import geopandas as gpd

In [2]:
data_dir = 'data'
output_file = 'benchmarks.geojson'
PLUTO_cache = 'data/{}_mappluto_16v2'
PLUTO_shpfile = '{}MapPLUTO.shp'
PLUTO_cache_gj = 'pluto.geojson'
PLUTO_url = 'https://www1.nyc.gov/assets/planning/download/zip/data-maps/open-data/{}_mappluto_16v2.zip'
borough_codes = ['mn', 'bk', 'bx', 'qn', 'si']

# Make sure directory exists
if not os.path.isdir('data'):
    os.makedirs('data')

In [5]:
# Load PLUTO shapefile
if not os.path.isfile(PLUTO_cache_gj):
    for code in borough_codes:
        if not os.path.exists(PLUTO_cache.format(code)):
            print('downloading to {}...'.format(PLUTO_cache.format(code)))
            zipfile.ZipFile(
                io.BytesIO(requests.get(PLUTO_url.format(code)).content)
            ).extractall(PLUTO_cache.format(code))

    df_pluto = gpd.GeoDataFrame(pd.concat([
        gpd.read_file( os.path.join(PLUTO_cache.format(code), PLUTO_shpfile.format(code.upper())) )[['BBL', 'geometry']]
        for code in borough_codes
    ], ignore_index=True))
    
    df_pluto.to_file(PLUTO_cache_gj, driver="GeoJSON")

df_pluto = gpd.read_file(PLUTO_cache_gj)
print(df_pluto.shape)
df_pluto.head()

(857237, 2)


Unnamed: 0,BBL,geometry
0,1000010000.0,"POLYGON ((973154.7117999941 194614.3315999955,..."
1,1000090000.0,"POLYGON ((980492.3622000068 195423.9451999962,..."
2,1000158000.0,"POLYGON ((979999.0820000023 196493.2800000012,..."
3,1000130000.0,"POLYGON ((980424.4003999978 196274.334800005, ..."
4,1000010000.0,"POLYGON ((979561.8711999953 191884.2495999932,..."


<IPython.core.display.Javascript object>

In [6]:
df = pd.read_csv('final_df.csv').drop('Unnamed: 0', axis=1)
print(df.shape)
df.head()

(9718, 66)


Unnamed: 0,BBL,EUI_2013,floorArea_2013,EUI_2014,floorArea_2014,EUI_2015,floorArea_2015,EUI_2016,floorArea_2016,Occupancy,...,NER_km,Pred_EUI_lr,EER_lr,Pred_WUI_lr,WER_lr,Pred_NUI_lr,NER_lr,avgEER,avgWER,avgNER
0,1011860091,60.9,63121.0,70.9,58266.0,68.6,58266.0,68.3,58266.0,100.0,...,0.023065,118.101796,0.578315,65.218895,0.405557,39.393148,0.029372,0.575079,0.877585,1.525277
1,1000167501,64.9,199141.0,74.2,220821.0,77.0,220821.0,76.2,220821.0,100.0,...,1.165229,157.38702,0.484157,36.699431,1.417188,49.438675,0.956415,0.450685,1.733629,1.693
2,1000167502,71.5,150667.0,74.8,150855.0,41.1,150855.0,74.2,150855.0,100.0,...,0.308266,140.92932,0.526505,31.887616,1.398662,48.740912,0.317269,0.56998,1.21318,1.500425
3,1000167504,74.6,112312.0,77.7,91774.0,73.3,91774.0,71.7,91774.0,100.0,...,0.954659,150.952137,0.474985,40.670656,0.964332,48.983661,0.97767,0.530359,1.09377,1.446322
4,1000167505,116.7,235366.0,106.2,257092.0,106.7,257092.0,101.7,257092.0,100.0,...,1.608733,151.619315,0.670759,31.528965,1.788831,49.478796,1.319371,0.611942,2.458374,2.258548


In [7]:
print(df.shape, df_pluto.shape)
gdf = gpd.GeoDataFrame(pd.merge(df, df_pluto, on='BBL', how='inner'), crs={'init':'epsg:2263'}).to_crs(epsg=4326)
df.shape

((9718, 66), (857237, 2))


(9718, 66)

In [9]:
gdf.to_file(output_file, driver="GeoJSON")

<IPython.core.display.Javascript object>