## Importing libraries

In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import altair as alt



In [2]:
alt.data_transformers.enable('json')
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('json')

## Importing Energy Grade Data

### Importing 2020 Data

In [3]:
energyData2020 = gpd.read_file('https://github.com/juanfrans/building-energy-grades/blob/main/input/ll33_2020.zip?raw=true')

In [4]:
energyData2020.head()

Unnamed: 0,OBJECTID,ID,Required_t,Boro,Block_1,Lot_1,Esmnt,Building_C,Tax_Class,Building_1,...,BBL_Duplic,dobnyc_G_1,BBL_1,BBL_MapPLU,Shape_Leng,Shape_Area,EnergyStar,LetterGrad,DCAS_City,geometry
0,1,24369,,1,1,10,,Y4,0,124,...,,D/1,1000010010,1000010010,12277.823358,7550339.0,1,D,Y,"POLYGON ((-74.02240 40.68443, -74.02404 40.683..."
1,2,24370,,1,2,23,,T2,0,1,...,,F/-,1000020023,1000020023,2949.77917,96902.37,0,F,Y,"MULTIPOLYGON (((-74.01107 40.70151, -74.01107 ..."
2,3,1,N,1,4,7501,,R0,2,1,...,,C/61,1000047501,1000047501,1360.324896,116801.1,61,C,N,"POLYGON ((-74.01264 40.70240, -74.01256 40.702..."
3,4,3,N,1,5,7501,,R0,2,1,...,,B/76,1000057501,1000057501,979.466956,55990.25,76,B,N,"POLYGON ((-74.01120 40.70243, -74.01012 40.702..."
4,5,7,Y,1,8,7501,,R0,2,1,...,,D/13,1000087501,1000087501,415.017124,10538.33,13,D,N,"POLYGON ((-74.01278 40.70275, -74.01289 40.703..."


In [5]:
energyData2020.tail()

Unnamed: 0,OBJECTID,ID,Required_t,Boro,Block_1,Lot_1,Esmnt,Building_C,Tax_Class,Building_1,...,BBL_Duplic,dobnyc_G_1,BBL_1,BBL_MapPLU,Shape_Leng,Shape_Area,EnergyStar,LetterGrad,DCAS_City,geometry
21676,21677,26626,,5,3696,100,,W1,0,2,...,,A/97,5036960100,5036960100,2098.428554,257526.88897,97,A,Y,"POLYGON ((-74.09842 40.57677, -74.09855 40.576..."
21677,21678,24319,Y,5,3983,65,,D3,2,2,...,,B/82,5039830065,5039830065,1994.304592,192135.569684,82,B,N,"POLYGON ((-74.11093 40.56352, -74.11288 40.564..."
21678,21679,24340,N,5,5497,7,,K6,4,1,...,,D/28,5054970007,5054970007,2405.170686,221538.034369,28,D,N,"POLYGON ((-74.15969 40.54598, -74.16183 40.544..."
21679,21680,26649,,5,6544,1,,W1,0,4,...,,A/96,5065440001,5065440001,1694.87969,157236.325985,96,A,Y,"POLYGON ((-74.18614 40.52383, -74.18516 40.522..."
21680,21681,26664,,5,7971,250,,K1,0,2,...,,A/89,5079710250,5079710250,2431.620527,283879.990929,89,A,N,"POLYGON ((-74.23587 40.52091, -74.23590 40.520..."


In [6]:
energyData2020.shape

(21681, 26)

In [7]:
energyData2020.columns

Index(['OBJECTID', 'ID', 'Required_t', 'Boro', 'Block_1', 'Lot_1', 'Esmnt',
       'Building_C', 'Tax_Class', 'Building_1', 'DOF_Gross_', 'Street_Num',
       'Street_Nam', 'Zipcode_1', 'BoroughNam', 'BBL_Altere', 'BBL_Duplic',
       'dobnyc_G_1', 'BBL_1', 'BBL_MapPLU', 'Shape_Leng', 'Shape_Area',
       'EnergyStar', 'LetterGrad', 'DCAS_City', 'geometry'],
      dtype='object')

Does the `Building_C` column correspond to the values on the PLUTO dataset?

The columns that originally came with the PDF are:

* BBL
* Street number
* Street name
* DOF square footage (dept. of finance?)
* Energy start 1-100 score
* Energy efficiency grade

In [8]:
energyData2020.sample(5)

Unnamed: 0,OBJECTID,ID,Required_t,Boro,Block_1,Lot_1,Esmnt,Building_C,Tax_Class,Building_1,...,BBL_Duplic,dobnyc_G_1,BBL_1,BBL_MapPLU,Shape_Leng,Shape_Area,EnergyStar,LetterGrad,DCAS_City,geometry
3863,3864,6160,N,1,1460,1,,D4,2,1,...,,C/60,1014600001,1014600001,432.16709,11670.869641,60,C,N,"POLYGON ((-73.95853 40.76349, -73.95850 40.763..."
9162,9163,10414,Y,2,2712,23,,D1,2,1,...,,C/64,2027120023,2027120023,421.824509,11101.749996,64,C,N,"POLYGON ((-73.89437 40.82086, -73.89437 40.820..."
5046,5047,5006,N,1,1268,7503,,R0,2,1,...,,D/15,1012687503,1012687503,568.334906,18989.75347,15,D,N,"POLYGON ((-73.97699 40.76083, -73.97718 40.760..."
9315,9316,13128,Y,2,3915,23,,W2,4,1,...,,A/100,2039150023,2039150023,448.696029,12388.09178,100,A,N,"POLYGON ((-73.86832 40.83761, -73.86867 40.837..."
15120,15121,21554,N,4,1918,65,,D1,2,1,...,,A/86,4019180065,4019180065,910.302964,51582.919181,86,A,N,"POLYGON ((-73.86196 40.73708, -73.86253 40.736..."


In [9]:
energyData2020.dtypes

OBJECTID         int64
ID               int64
Required_t      object
Boro             int64
Block_1          int64
Lot_1            int64
Esmnt           object
Building_C      object
Tax_Class        int64
Building_1       int64
DOF_Gross_       int64
Street_Num      object
Street_Nam      object
Zipcode_1        int64
BoroughNam      object
BBL_Altere      object
BBL_Duplic      object
dobnyc_G_1      object
BBL_1            int64
BBL_MapPLU       int64
Shape_Leng     float64
Shape_Area     float64
EnergyStar       int64
LetterGrad      object
DCAS_City       object
geometry      geometry
dtype: object

### Importing 2021 Data

In [10]:
energyData2021 = pd.read_excel('https://github.com/juanfrans/building-energy-grades/blob/main/input/Preliminary%202021%20LL33%20Data%20Disclosure.xlsx?raw=true')

In [11]:
energyData2021.head()

Unnamed: 0,10 Digit BBL,2021 score,2021 grade,Boro,Block,Lot,Building Count,DOF Gross Square Footage,Street Number,Street Name
0,1008567502,73.0,B,1,856,7502,1,341125,225,5 AVENUE
1,4012380040,51.0,D,4,1238,40,1,208252,39-60,54 STREET
2,1008380021,86.0,A,1,838,21,1,57636,35,WEST 36 STREET
3,1007620025,64.0,C,1,762,25,1,274209,307,WEST 38 STREET
4,2036000004,51.0,D,2,3600,4,11,1021752,1850,LAFAYETTE AVENUE


In [12]:
energyData2021.tail()

Unnamed: 0,10 Digit BBL,2021 score,2021 grade,Boro,Block,Lot,Building Count,DOF Gross Square Footage,Street Number,Street Name
20355,5076260001,100.0,A,5,7626,1,1,31300,2,ARTHUR KILL ROAD
20356,5079910100,88.0,A,5,7991,100,1,50451,99,ELLIS STREET
20357,5080080134,,F,5,8008,134,4,64167,250,PAGE AVENUE
20358,2025260090,22.0,D,2,2526,90,1,475438,1131,OGDEN AVENUE
20359,3005020038,,F,3,502,38,1,33000,133,VAN DUZER STREET


In [13]:
energyData2021.shape

(20360, 10)

In [14]:
energyData2021.columns

Index(['10 Digit BBL ', '2021 score', '2021 grade', 'Boro', 'Block', 'Lot',
       'Building Count', 'DOF Gross Square Footage ', 'Street Number',
       'Street Name'],
      dtype='object')

In [15]:
energyData2021.dtypes

10 Digit BBL                   int64
2021 score                   float64
2021 grade                    object
Boro                           int64
Block                          int64
Lot                            int64
Building Count                 int64
DOF Gross Square Footage       int64
Street Number                 object
Street Name                   object
dtype: object

In [16]:
energyData2021.sample(5)

Unnamed: 0,10 Digit BBL,2021 score,2021 grade,Boro,Block,Lot,Building Count,DOF Gross Square Footage,Street Number,Street Name
13439,3023467502,,F,3,2346,7502,1,36515,349,METROPOLITAN AVENUE
20116,5000050051,89.0,A,5,5,51,1,168722,60,BAY STREET
6208,1018950038,59.0,C,1,1895,38,1,102951,404,RIVERSIDE DRIVE
9396,2029900050,87.0,A,2,2990,50,1,46100,1665,VYSE AVENUE
2826,1010100061,37.0,D,1,1010,61,1,120738,911,7 AVENUE


## Cleaning the data

In [17]:
energyData2020['LetterGrad'].unique()

array(['D', 'F', 'C', 'B', 'A'], dtype=object)

In [18]:
energyData2020['LetterGrad'].value_counts(ascending=False)

D    9163
B    3610
C    3376
A    3364
F    2168
Name: LetterGrad, dtype: int64

In [19]:
energyData2020 = energyData2020[energyData2020['LetterGrad'] != 'F'].copy(deep=True)

In [20]:
energyData2020.shape

(19513, 26)

In [21]:
energyData2021['2021 grade'].value_counts(ascending=False)

D    7977
A    4046
B    3340
C    3139
F    1858
Name: 2021 grade, dtype: int64

In [22]:
energyData2021 = energyData2021[energyData2021['2021 grade'] != 'F'].copy(deep=True)

In [23]:
energyData2021.shape

(18502, 10)

In [24]:
energyData2020 = energyData2020[['Boro', 'Block_1', 'Lot_1', 'Building_1', 'DOF_Gross_', 'Street_Num',
       'Street_Nam', 'BBL_1', 'EnergyStar', 'LetterGrad', 'geometry']].copy(deep=True)

In [25]:
energyData2020.rename(columns={'Block_1':'Block', 'Lot_1':'Lot', 'Building_1':'BuildingCount', 'DOF_Gross_':'GrossSF', 'Street_Num':'StreetNumber',
       'Street_Nam':'StreetName', 'BBL_1':'BBL', 'EnergyStar':'EnergyScore', 'LetterGrad':'EnergyGrade'}, inplace=True)
energyData2021.rename(columns={'10 Digit BBL ':'BBL', '2021 score':'EnergyScore', '2021 grade':'EnergyGrade',
       'Building Count':'BuildingCount', 'DOF Gross Square Footage ':'GrossSF', 'Street Number':'StreetNumber',
       'Street Name':'StreetName'}, inplace=True)

## Summary Statistics

In [26]:
energyData2020['EnergyScore'].max()

100

In [27]:
energyData2020['EnergyScore'].min()

1

In [28]:
energyData2020['EnergyScore'].describe()

count    19513.000000
mean        54.395839
std         28.729948
min          1.000000
25%         32.000000
50%         57.000000
75%         78.000000
max        100.000000
Name: EnergyScore, dtype: float64

In [29]:
energyData2021['EnergyScore'].describe()

count    18502.000000
mean        57.278835
std         28.921142
min          1.000000
25%         35.000000
50%         61.000000
75%         82.000000
max        100.000000
Name: EnergyScore, dtype: float64

In [30]:
alt.Chart(energyData2020).mark_bar().encode(
    x=alt.X('EnergyGrade:O'),
    y=alt.Y('count():Q')
)

In [31]:
alt.Chart(energyData2020).mark_bar().encode(
    color=alt.Color('EnergyGrade:O'),
    x=alt.X('count():Q', stack='normalize', axis=alt.Axis(format='.0%'))
)

In [32]:
alt.Chart(energyData2021).mark_bar().encode(
    color=alt.Color('EnergyGrade:O'),
    x=alt.X('count():Q', stack='normalize', axis=alt.Axis(format='.0%'))
)

In [33]:
chart2020 = alt.Chart(energyData2020).mark_bar().encode(
    color=alt.Color('EnergyGrade:O'),
    x=alt.X('count():Q', stack='normalize', axis=alt.Axis(format='.0%'))
)
chart2021 = alt.Chart(energyData2021).mark_bar().encode(
    color=alt.Color('EnergyGrade:O'),
    x=alt.X('count():Q', stack='normalize', axis=alt.Axis(format='.0%'))
)
alt.vconcat(chart2020, chart2021)

In [34]:
alt.Chart(energyData2020).mark_point().encode(
    color=alt.Color('EnergyGrade:N'),
    x=alt.X('EnergyScore:Q'),
    y=alt.Y('GrossSF:Q')
)

In [35]:
energyData2020['GrossSF'].describe()

count    1.951300e+04
mean     1.168086e+05
std      2.759876e+05
min      0.000000e+00
25%      4.057300e+04
50%      6.280500e+04
75%      1.139690e+05
max      1.709504e+07
Name: GrossSF, dtype: float64

In [36]:
alt.Chart(energyData2020[energyData2020['GrossSF'] > 0]).mark_point().encode(
    color=alt.Color('EnergyGrade:O'),
    x=alt.X('EnergyScore:Q'),
    y=alt.Y('GrossSF:Q', scale=alt.Scale(type='log', base=10))
)

In [37]:
chart2020 = alt.Chart(energyData2020).mark_boxplot().encode(
    x=alt.X('EnergyScore:Q')
)
chart2021 = alt.Chart(energyData2021).mark_boxplot().encode(
    x=alt.X('EnergyScore:Q')
)
alt.vconcat(chart2020, chart2021)

# Ignore outliers by setting the `extent='min-max'` in the main property

## Grouping By

In [38]:
boroughs2020 = energyData2020[['Boro','EnergyScore']].groupby('Boro').agg(['count','max','min','mean','median','std']).reset_index().droplevel(0, axis=1)
boroughs2020.rename(columns={'':'Borough'},inplace=True)

Possible aggregation functions are

* count() – Number of non-null observations
* sum() – Sum of values
* mean() – Mean of values
* median() – Arithmetic median of values
* min() – Minimum
* max() – Maximum
* mode() – Mode
* std() – Standard deviation
* var() – Variance

In [39]:
boroughs2020.head()

Unnamed: 0,Borough,count,max,min,mean,median,std
0,1,7268,100,1,53.967529,58.0,29.005787
1,2,3941,100,1,51.065466,51.0,29.195878
2,3,4794,100,1,56.171882,59.0,28.221008
3,4,3236,100,1,56.257108,59.0,27.986805
4,5,274,100,1,60.60219,64.0,27.060618


In [40]:
boroughs2021 = energyData2021[['Boro','EnergyScore']].groupby('Boro').agg(['count','max','min','mean','median','std']).reset_index().droplevel(0, axis=1)
boroughs2021.rename(columns={'':'Borough'},inplace=True)

In [41]:
boroughs2021.head()

Unnamed: 0,Borough,count,max,min,mean,median,std
0,1,7201,100.0,1.0,60.210943,66.0,28.641987
1,2,3729,100.0,1.0,50.893001,51.0,29.637601
2,3,4441,100.0,1.0,57.124296,60.0,28.644377
3,4,2945,100.0,1.0,58.31511,61.0,27.873194
4,5,186,100.0,2.0,59.069892,63.0,28.043797


Manhattan really increased the score
    1 = Manhattan
    2 = Bronx
    3 = Brooklyn
    4 = Queens
    5 = Staten Island

In [42]:
boroughs2020['year'] = 2020
boroughs2021['year'] = 2021
boroughs = pd.concat([boroughs2020, boroughs2021])

In [43]:
boroughs

Unnamed: 0,Borough,count,max,min,mean,median,std,year
0,1,7268,100.0,1.0,53.967529,58.0,29.005787,2020
1,2,3941,100.0,1.0,51.065466,51.0,29.195878,2020
2,3,4794,100.0,1.0,56.171882,59.0,28.221008,2020
3,4,3236,100.0,1.0,56.257108,59.0,27.986805,2020
4,5,274,100.0,1.0,60.60219,64.0,27.060618,2020
0,1,7201,100.0,1.0,60.210943,66.0,28.641987,2021
1,2,3729,100.0,1.0,50.893001,51.0,29.637601,2021
2,3,4441,100.0,1.0,57.124296,60.0,28.644377,2021
3,4,2945,100.0,1.0,58.31511,61.0,27.873194,2021
4,5,186,100.0,2.0,59.069892,63.0,28.043797,2021


In [44]:
alt.Chart(boroughs).mark_bar().encode(
    x=alt.X('mean:Q'),
    y=alt.Y('year:O'),
    color=alt.Color('year:O'),
    row=alt.Row('Borough:O')
)

## Finding buildings that have improved/worsen the most

In [45]:
buildingEnergyData = pd.merge(energyData2020, energyData2021, on='BBL', how='inner')

In [None]:
buildingEnergyData.head()

In [None]:
buildingEnergyData.columns

In [46]:
buildingEnergyData.drop(columns=['Boro_y', 'Block_y', 'Lot_y', 'BuildingCount_y', 'GrossSF_y', 'StreetNumber_y','StreetName_y'], inplace=True)

In [47]:
buildingEnergyData.rename(columns={'Boro_x':'Borough', 'Block_x':'Block', 'Lot_x':'Lot', 'BuildingCount_x':'BuildingCount', 'GrossSF_x':'GrossSF',
       'StreetNumber_x':'StreetNumber', 'StreetName_x':'StreetName', 'BBL':'BBL', 'EnergyScore_x':'EnergyScore2020',
       'EnergyGrade_x':'EnergyGrade2020', 'EnergyScore_y':'EnergyScore2021', 'EnergyGrade_y':'EnergyGrade2021'},inplace=True)

In [None]:
buildingEnergyData.head()

In [48]:
buildingEnergyData['change'] = buildingEnergyData['EnergyScore2021'] - buildingEnergyData['EnergyScore2020']
buildingEnergyData['perChange'] = buildingEnergyData['change'] / buildingEnergyData['EnergyScore2020']

In [None]:
buildingEnergyData.head()

In [None]:
buildingEnergyData.sort_values(by='perChange',ascending=False).head(10)

In [None]:
buildingEnergyData[buildingEnergyData['EnergyScore2020'] > 50].sort_values(by='perChange',ascending=False).head(10)

Find info on individual buildings [here](https://a810-dobnow.nyc.gov/publish/#!/) but no documents of their submission

In [None]:
buildingEnergyData['perChange'].describe()

In [None]:
alt.Chart(buildingEnergyData).mark_boxplot().encode(
    x=alt.X('perChange:Q')
)

In [None]:
alt.Chart(buildingEnergyData[(buildingEnergyData['perChange'] < buildingEnergyData['perChange'].quantile(0.81)) & (buildingEnergyData['perChange'] > buildingEnergyData['perChange'].quantile(0.08))]).mark_boxplot().encode(
    x=alt.X('perChange:Q')
)

In [None]:
alt.Chart(buildingEnergyData[(buildingEnergyData['perChange'] < buildingEnergyData['perChange'].quantile(0.9)) & (buildingEnergyData['perChange'] > buildingEnergyData['perChange'].quantile(0.05))]).mark_boxplot().encode(
    x=alt.X('perChange:Q'),
    row=alt.Row('Borough:N')
)

## Mapping buildings

In [None]:
type(buildingEnergyData)

In [None]:
alt.Chart(buildingEnergyData).mark_geoshape().encode(
    color=alt.Color('EnergyScore2020:Q')
)

In [49]:
ntaData = gpd.read_file('https://services5.arcgis.com/GfwWNkhOj9bNBqoJ/arcgis/rest/services/NYC_Neighborhood_Tabulation_Areas_2020/FeatureServer/0/query?where=1=1&outFields=*&outSR=4326&f=pgeojson')

In [None]:
ntaData.head()

In [None]:
buildingEnergyData.shape

In [None]:
buildingEnergyData.sjoin(ntaData, how='left').shape

In [None]:
buildingEnergyData[buildingEnergyData.sjoin(ntaData, how='left', predicate='within')['NTAName'].isna()]

In [None]:
buildingEnergyData.sjoin(ntaData, how='left', predicate='contains').head()

In [50]:
buildingEnergyData.to_crs(epsg=2263, inplace=True)

In [51]:
centroids = buildingEnergyData.centroid

In [52]:
centroids

0        POINT (980917.523 195093.462)
1        POINT (981308.988 195134.684)
2        POINT (980734.263 195378.599)
3        POINT (980562.547 195314.421)
4        POINT (980936.448 195695.557)
                     ...              
16788    POINT (967156.143 156137.347)
16789    POINT (956219.411 152388.623)
16790    POINT (954038.272 151304.564)
16791    POINT (953264.158 144919.698)
16792    POINT (939476.287 138144.503)
Length: 16793, dtype: geometry

In [None]:
buildingEnergyData.head()

In [53]:
buildingCentroids = gpd.GeoDataFrame(data=buildingEnergyData.copy(deep=True), geometry=centroids, crs='epsg:2263')

In [54]:
buildingCentroids.head()

Unnamed: 0,Borough,Block,Lot,BuildingCount,GrossSF,StreetNumber,StreetName,BBL,EnergyScore2020,EnergyGrade2020,geometry,EnergyScore2021,EnergyGrade2021,change,perChange
0,1,4,7501,1,2542563,1,WATER STREET,1000047501,61,C,POINT (980917.523 195093.462),55.0,C,-6.0,-0.098361
1,1,5,7501,1,1354691,125,BROAD STREET,1000057501,76,B,POINT (981308.988 195134.684),84.0,B,8.0,0.105263
2,1,8,7501,1,169061,2,WATER STREET,1000087501,13,D,POINT (980734.263 195378.599),4.0,D,-9.0,-0.692308
3,1,9,1,1,692431,34,WHITEHALL STREET,1000090001,72,B,POINT (980562.547 195314.421),83.0,B,11.0,0.152778
4,1,10,16,1,336025,90,BROAD STREET,1000100016,81,B,POINT (980936.448 195695.557),81.0,B,0.0,0.0


In [55]:
buildingEnergyData.head()

Unnamed: 0,Borough,Block,Lot,BuildingCount,GrossSF,StreetNumber,StreetName,BBL,EnergyScore2020,EnergyGrade2020,geometry,EnergyScore2021,EnergyGrade2021,change,perChange
0,1,4,7501,1,2542563,1,WATER STREET,1000047501,61,C,"POLYGON ((980743.918 195179.570, 980767.866 19...",55.0,C,-6.0,-0.098361
1,1,5,7501,1,1354691,125,BROAD STREET,1000057501,76,B,"POLYGON ((981143.319 195192.427, 981444.122 19...",84.0,B,8.0,0.105263
2,1,8,7501,1,169061,2,WATER STREET,1000087501,13,D,"POLYGON ((980706.216 195308.503, 980676.743 19...",4.0,D,-9.0,-0.692308
3,1,9,1,1,692431,34,WHITEHALL STREET,1000090001,72,B,"POLYGON ((980485.523 195315.698, 980495.527 19...",83.0,B,11.0,0.152778
4,1,10,16,1,336025,90,BROAD STREET,1000100016,81,B,"POLYGON ((980867.416 195630.104, 980871.694 19...",81.0,B,0.0,0.0


In [56]:
buildingCentroids.to_crs(epsg=4326, inplace=True)

In [None]:
alt.Chart(buildingCentroids).mark_geoshape().encode(
    color=alt.Color('EnergyScore2020:Q')
)

In [57]:
buildingCentroids = buildingCentroids.sjoin(ntaData, how='left')

In [None]:
buildingCentroids.head()

In [58]:
buildingCentroids['weightedScore2020'] = buildingCentroids['EnergyScore2020'] * buildingCentroids['GrossSF']
buildingCentroids['weightedScore2021'] = buildingCentroids['EnergyScore2021'] * buildingCentroids['GrossSF']

In [59]:
ntaEnergyData = buildingCentroids.groupby('NTA2020').agg(buildingCount=('EnergyScore2020','count'),
meanScore2020=('EnergyScore2020','mean'),
stdScore2020=('EnergyScore2020','std'),
sumScore2020=('EnergyScore2020','sum'),
meanScore2021=('EnergyScore2021','mean'),
sumScore2021=('EnergyScore2021','sum'),
stdScore2021=('EnergyScore2021','std'),
meanWeightedScore2020=('weightedScore2020','mean'),
sumWeightedScore2020=('weightedScore2020','sum'),
meanWeightedScore2021=('weightedScore2021','mean'),
sumWeightedScore2021=('weightedScore2021','sum'),
meanGrossSF=('GrossSF','mean'),
sumGrossSF=('GrossSF','sum')).reset_index()

In [None]:
ntaEnergyData.head()

In [60]:
ntaEnergyData['meanScoreSF2020'] = ntaEnergyData['sumWeightedScore2020'] / ntaEnergyData['sumGrossSF']
ntaEnergyData['meanScoreSF2021'] = ntaEnergyData['sumWeightedScore2021'] / ntaEnergyData['sumGrossSF']

In [None]:
ntaEnergyData.head()

In [None]:
ntaEnergyData['meanScoreSF2020'].describe()

In [61]:
ntaData = ntaData.merge(ntaEnergyData, on='NTA2020', how='left')

In [69]:
map2020 = alt.Chart(ntaData).mark_geoshape().encode(
    color=alt.Color('meanScoreSF2020:Q'),
    tooltip=alt.Tooltip(['NTAName:N','meanScoreSF2020:Q','buildingCount:Q','NTA2020:N'])
)
map2021 = alt.Chart(ntaData).mark_geoshape().encode(
    color=alt.Color('meanScoreSF2021:Q'),
    tooltip=alt.Tooltip(['NTAName:N','meanScoreSF2021:Q','buildingCount:Q','NTA2020:N'])
)
alt.hconcat(map2020, map2021)

In [63]:
ntaData['meanScoreChange'] = (ntaData['meanScoreSF2021'] - ntaData['meanScoreSF2020']) / ntaData['meanScoreSF2020']
ntaData['rawMeanScoreChange'] = (ntaData['meanScoreSF2021'] - ntaData['meanScoreSF2020'])

In [64]:
alt.Chart(ntaData).mark_geoshape().encode(
    color=alt.Color('rawMeanScoreChange:Q')
)

In [65]:
alt.Chart(ntaData).mark_geoshape().encode(
    color=alt.Color('buildingCount:Q')
)

In [None]:
buildingEnergyData.head()

In [67]:
buildingEnergyData = buildingEnergyData.merge(buildingCentroids[['BBL','NTA2020']], on='BBL', how='left')

In [68]:
buildingEnergyData.head()

Unnamed: 0,Borough,Block,Lot,BuildingCount,GrossSF,StreetNumber,StreetName,BBL,EnergyScore2020,EnergyGrade2020,geometry,EnergyScore2021,EnergyGrade2021,change,perChange,NTA2020
0,1,4,7501,1,2542563,1,WATER STREET,1000047501,61,C,"POLYGON ((980743.918 195179.570, 980767.866 19...",55.0,C,-6.0,-0.098361,MN0101
1,1,5,7501,1,1354691,125,BROAD STREET,1000057501,76,B,"POLYGON ((981143.319 195192.427, 981444.122 19...",84.0,B,8.0,0.105263,MN0101
2,1,8,7501,1,169061,2,WATER STREET,1000087501,13,D,"POLYGON ((980706.216 195308.503, 980676.743 19...",4.0,D,-9.0,-0.692308,MN0101
3,1,9,1,1,692431,34,WHITEHALL STREET,1000090001,72,B,"POLYGON ((980485.523 195315.698, 980495.527 19...",83.0,B,11.0,0.152778,MN0101
4,1,10,16,1,336025,90,BROAD STREET,1000100016,81,B,"POLYGON ((980867.416 195630.104, 980871.694 19...",81.0,B,0.0,0.0,MN0101


In [77]:
buildingEnergyData.to_crs(epsg=4326, inplace=True)

In [84]:
alt.Chart(buildingEnergyData[buildingEnergyData['NTA2020'].isin(['MN0502','MN0402','MN0401','MN0501','MN0604','MN0603'])]).mark_geoshape().encode(
    color=alt.Color('change:Q', scale=alt.Scale(scheme='redblue'))
)

* Age of building (join to PLUTO)
* Age of rennovation
* Building class or zonning
* NTA with the highest and lowest avg grade
* NTA with the highest and lowest grade normalized by sqft
* NTA with the greatest weighted change normalized by sqft
* Compare different universities
* Look for NTA with the highest std
* Look for NTA with the hightest number of buildings and map that one

In [None]:
buildingCentroids.head()

* Bring in the NTA data
* Spatial join:
  * Do we need centroids?
  * Can we do it with text attribute?
  * Spatial join
* Visualize the NTAs
* Use the NTAs to choose areas:
  * very low grades or very high
  * most overall change
  * Greatest area
* Do we do grade multiplied by area?

In [None]:
alt.Chart(energyData2020).mark_geoshape().encode(
    color=alt.Color('EnergyStar:Q')
)

In [None]:
energyData2020[energyData2020['BBL'].isin(energyData2021['BBL'])]

In [None]:
energyData2021[energyData2021['BBL'].isin(energyData2020['BBL'])]

In [None]:
energyData2020[~energyData2020['BBL'].isin(energyData2021['BBL'])]

In [None]:
energyData2021[~energyData2021['BBL'].isin(energyData2020['BBL'])]