## Importing libraries

In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import altair as alt



In [2]:
alt.data_transformers.enable('json')
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('json')

## Importing Energy Grade Data

### Importing 2020 Data

In [3]:
energyData2020 = gpd.read_file('../input/ll33_2020.zip')

In [4]:
energyData2020.head()

Unnamed: 0,OBJECTID,ID,Required_t,Boro,Block_1,Lot_1,Esmnt,Building_C,Tax_Class,Building_1,...,BBL_Duplic,dobnyc_G_1,BBL_1,BBL_MapPLU,Shape_Leng,Shape_Area,EnergyStar,LetterGrad,DCAS_City,geometry
0,1,24369,,1,1,10,,Y4,0,124,...,,D/1,1000010010,1000010010,12277.823358,7550339.0,1,D,Y,"POLYGON ((-74.02240 40.68443, -74.02404 40.683..."
1,2,24370,,1,2,23,,T2,0,1,...,,F/-,1000020023,1000020023,2949.77917,96902.37,0,F,Y,"MULTIPOLYGON (((-74.01107 40.70151, -74.01107 ..."
2,3,1,N,1,4,7501,,R0,2,1,...,,C/61,1000047501,1000047501,1360.324896,116801.1,61,C,N,"POLYGON ((-74.01264 40.70240, -74.01256 40.702..."
3,4,3,N,1,5,7501,,R0,2,1,...,,B/76,1000057501,1000057501,979.466956,55990.25,76,B,N,"POLYGON ((-74.01120 40.70243, -74.01012 40.702..."
4,5,7,Y,1,8,7501,,R0,2,1,...,,D/13,1000087501,1000087501,415.017124,10538.33,13,D,N,"POLYGON ((-74.01278 40.70275, -74.01289 40.703..."


In [5]:
energyData2020.tail()

Unnamed: 0,OBJECTID,ID,Required_t,Boro,Block_1,Lot_1,Esmnt,Building_C,Tax_Class,Building_1,...,BBL_Duplic,dobnyc_G_1,BBL_1,BBL_MapPLU,Shape_Leng,Shape_Area,EnergyStar,LetterGrad,DCAS_City,geometry
21676,21677,26626,,5,3696,100,,W1,0,2,...,,A/97,5036960100,5036960100,2098.428554,257526.88897,97,A,Y,"POLYGON ((-74.09842 40.57677, -74.09855 40.576..."
21677,21678,24319,Y,5,3983,65,,D3,2,2,...,,B/82,5039830065,5039830065,1994.304592,192135.569684,82,B,N,"POLYGON ((-74.11093 40.56352, -74.11288 40.564..."
21678,21679,24340,N,5,5497,7,,K6,4,1,...,,D/28,5054970007,5054970007,2405.170686,221538.034369,28,D,N,"POLYGON ((-74.15969 40.54598, -74.16183 40.544..."
21679,21680,26649,,5,6544,1,,W1,0,4,...,,A/96,5065440001,5065440001,1694.87969,157236.325985,96,A,Y,"POLYGON ((-74.18614 40.52383, -74.18516 40.522..."
21680,21681,26664,,5,7971,250,,K1,0,2,...,,A/89,5079710250,5079710250,2431.620527,283879.990929,89,A,N,"POLYGON ((-74.23587 40.52091, -74.23590 40.520..."


In [6]:
energyData2020.shape

(21681, 26)

In [7]:
energyData2020.columns

Index(['OBJECTID', 'ID', 'Required_t', 'Boro', 'Block_1', 'Lot_1', 'Esmnt',
       'Building_C', 'Tax_Class', 'Building_1', 'DOF_Gross_', 'Street_Num',
       'Street_Nam', 'Zipcode_1', 'BoroughNam', 'BBL_Altere', 'BBL_Duplic',
       'dobnyc_G_1', 'BBL_1', 'BBL_MapPLU', 'Shape_Leng', 'Shape_Area',
       'EnergyStar', 'LetterGrad', 'DCAS_City', 'geometry'],
      dtype='object')

Does the `Building_C` column correspond to the values on the PLUTO dataset?

The columns that originally came with the PDF are:

* BBL
* Street number
* Street name
* DOF square footage (dept. of finance?)
* Energy start 1-100 score
* Energy efficiency grade

In [8]:
energyData2020.sample(5)

Unnamed: 0,OBJECTID,ID,Required_t,Boro,Block_1,Lot_1,Esmnt,Building_C,Tax_Class,Building_1,...,BBL_Duplic,dobnyc_G_1,BBL_1,BBL_MapPLU,Shape_Leng,Shape_Area,EnergyStar,LetterGrad,DCAS_City,geometry
16249,16250,25901,,3,6328,1,,D1,0,1,...,,D/46,3063280001,3063280001,563.609168,18238.433173,46,D,N,"POLYGON ((-73.99885 40.60732, -73.99860 40.607..."
6336,6337,6794,Y,1,1580,38,,D1,2,1,...,,A/87,1015800038,1015800038,362.761651,7997.495749,87,A,N,"POLYGON ((-73.94722 40.77384, -73.94703 40.774..."
10952,10953,12048,N,2,3225,106,,D1,2,1,...,,C/57,2032250106,2032250106,408.97559,9785.723722,57,C,N,"POLYGON ((-73.90727 40.86228, -73.90743 40.862..."
6004,6005,9110,Y,1,2180,18,,C1,2,1,...,,C/61,1021800018,1021800018,344.624346,7044.883821,61,C,N,"POLYGON ((-73.93709 40.85162, -73.93717 40.851..."
4948,4949,4903,N,1,1253,35,,D3,2,1,...,,F/-,1012530035,1012530035,366.002247,8109.761202,0,F,N,"POLYGON ((-73.97446 40.79483, -73.97465 40.794..."


In [9]:
energyData2020.dtypes

OBJECTID         int64
ID               int64
Required_t      object
Boro             int64
Block_1          int64
Lot_1            int64
Esmnt           object
Building_C      object
Tax_Class        int64
Building_1       int64
DOF_Gross_       int64
Street_Num      object
Street_Nam      object
Zipcode_1        int64
BoroughNam      object
BBL_Altere      object
BBL_Duplic      object
dobnyc_G_1      object
BBL_1            int64
BBL_MapPLU       int64
Shape_Leng     float64
Shape_Area     float64
EnergyStar       int64
LetterGrad      object
DCAS_City       object
geometry      geometry
dtype: object

### Importing 2021 Data

In [10]:
energyData2021 = pd.read_excel('../input/Preliminary 2021 LL33 Data Disclosure.xlsx')

In [11]:
energyData2021.head()

Unnamed: 0,10 Digit BBL,2021 score,2021 grade,Boro,Block,Lot,Building Count,DOF Gross Square Footage,Street Number,Street Name
0,1008567502,73.0,B,1,856,7502,1,341125,225,5 AVENUE
1,4012380040,51.0,D,4,1238,40,1,208252,39-60,54 STREET
2,1008380021,86.0,A,1,838,21,1,57636,35,WEST 36 STREET
3,1007620025,64.0,C,1,762,25,1,274209,307,WEST 38 STREET
4,2036000004,51.0,D,2,3600,4,11,1021752,1850,LAFAYETTE AVENUE


In [12]:
energyData2021.tail()

Unnamed: 0,10 Digit BBL,2021 score,2021 grade,Boro,Block,Lot,Building Count,DOF Gross Square Footage,Street Number,Street Name
20355,5076260001,100.0,A,5,7626,1,1,31300,2,ARTHUR KILL ROAD
20356,5079910100,88.0,A,5,7991,100,1,50451,99,ELLIS STREET
20357,5080080134,,F,5,8008,134,4,64167,250,PAGE AVENUE
20358,2025260090,22.0,D,2,2526,90,1,475438,1131,OGDEN AVENUE
20359,3005020038,,F,3,502,38,1,33000,133,VAN DUZER STREET


In [13]:
energyData2021.shape

(20360, 10)

In [14]:
energyData2021.columns

Index(['10 Digit BBL ', '2021 score', '2021 grade', 'Boro', 'Block', 'Lot',
       'Building Count', 'DOF Gross Square Footage ', 'Street Number',
       'Street Name'],
      dtype='object')

In [15]:
energyData2021.dtypes

10 Digit BBL                   int64
2021 score                   float64
2021 grade                    object
Boro                           int64
Block                          int64
Lot                            int64
Building Count                 int64
DOF Gross Square Footage       int64
Street Number                 object
Street Name                   object
dtype: object

In [16]:
energyData2021.sample(5)

Unnamed: 0,10 Digit BBL,2021 score,2021 grade,Boro,Block,Lot,Building Count,DOF Gross Square Footage,Street Number,Street Name
8701,2027650198,42.0,D,2,2765,198,1,33200,670,BARRETTO STREET
2449,1008780069,,F,1,878,69,2,32948,110,EAST 23 STREET
20125,5000080060,70.0,B,5,8,60,1,79150,120,STUYVESANT PLACE
2628,1009087501,72.0,B,1,908,7501,1,63828,200,EAST 28 STREET
14847,3052510038,36.0,D,3,5251,38,1,62320,616,EAST 29 STREET


## Cleaning the data

In [17]:
energyData2020['LetterGrad'].unique()

array(['D', 'F', 'C', 'B', 'A'], dtype=object)

In [18]:
energyData2020['LetterGrad'].value_counts(ascending=False)

D    9163
B    3610
C    3376
A    3364
F    2168
Name: LetterGrad, dtype: int64

In [19]:
energyData2020 = energyData2020[energyData2020['LetterGrad'] != 'F'].copy(deep=True)

In [20]:
energyData2020.shape

(19513, 26)

In [21]:
energyData2021['2021 grade'].value_counts(ascending=False)

D    7977
A    4046
B    3340
C    3139
F    1858
Name: 2021 grade, dtype: int64

In [22]:
energyData2021 = energyData2021[energyData2021['2021 grade'] != 'F'].copy(deep=True)

In [23]:
energyData2021.shape

(18502, 10)

In [24]:
energyData2020 = energyData2020[['Boro', 'Block_1', 'Lot_1', 'Building_1', 'DOF_Gross_', 'Street_Num',
       'Street_Nam', 'BBL_1', 'EnergyStar', 'LetterGrad', 'geometry']].copy(deep=True)

In [25]:
energyData2020.rename(columns={'Block_1':'Block', 'Lot_1':'Lot', 'Building_1':'BuildingCount', 'DOF_Gross_':'GrossSF', 'Street_Num':'StreetNumber',
       'Street_Nam':'StreetName', 'BBL_1':'BBL', 'EnergyStar':'EnergyScore', 'LetterGrad':'EnergyGrade'}, inplace=True)
energyData2021.rename(columns={'10 Digit BBL ':'BBL', '2021 score':'EnergyScore', '2021 grade':'EnergyGrade',
       'Building Count':'BuildingCount', 'DOF Gross Square Footage ':'GrossSF', 'Street Number':'StreetNumber',
       'Street Name':'StreetName'}, inplace=True)

## Summary Statistics

In [26]:
energyData2020['EnergyScore'].max()

100

In [27]:
energyData2020['EnergyScore'].min()

1

In [28]:
energyData2020['EnergyScore'].describe()

count    19513.000000
mean        54.395839
std         28.729948
min          1.000000
25%         32.000000
50%         57.000000
75%         78.000000
max        100.000000
Name: EnergyScore, dtype: float64

In [29]:
energyData2021['EnergyScore'].describe()

count    18502.000000
mean        57.278835
std         28.921142
min          1.000000
25%         35.000000
50%         61.000000
75%         82.000000
max        100.000000
Name: EnergyScore, dtype: float64

In [30]:
alt.Chart(energyData2020).mark_bar().encode(
    x=alt.X('EnergyGrade:O'),
    y=alt.Y('count():Q')
)

In [31]:
alt.Chart(energyData2020).mark_bar().encode(
    color=alt.Color('EnergyGrade:O'),
    x=alt.X('count():Q', stack='normalize', axis=alt.Axis(format='.0%'))
)

In [32]:
alt.Chart(energyData2021).mark_bar().encode(
    color=alt.Color('EnergyGrade:O'),
    x=alt.X('count():Q', stack='normalize', axis=alt.Axis(format='.0%'))
)

In [33]:
chart2020 = alt.Chart(energyData2020).mark_bar().encode(
    color=alt.Color('EnergyGrade:O'),
    x=alt.X('count():Q', stack='normalize', axis=alt.Axis(format='.0%'))
)
chart2021 = alt.Chart(energyData2021).mark_bar().encode(
    color=alt.Color('EnergyGrade:O'),
    x=alt.X('count():Q', stack='normalize', axis=alt.Axis(format='.0%'))
)
alt.vconcat(chart2020, chart2021)

In [34]:
alt.Chart(energyData2020).mark_point().encode(
    color=alt.Color('EnergyGrade:N'),
    x=alt.X('EnergyScore:Q'),
    y=alt.Y('GrossSF:Q')
)

In [35]:
energyData2020['GrossSF'].describe()

count    1.951300e+04
mean     1.168086e+05
std      2.759876e+05
min      0.000000e+00
25%      4.057300e+04
50%      6.280500e+04
75%      1.139690e+05
max      1.709504e+07
Name: GrossSF, dtype: float64

In [36]:
alt.Chart(energyData2020[energyData2020['GrossSF'] > 0]).mark_point().encode(
    color=alt.Color('EnergyGrade:O'),
    x=alt.X('EnergyScore:Q'),
    y=alt.Y('GrossSF:Q', scale=alt.Scale(type='log', base=10))
)

In [37]:
chart2020 = alt.Chart(energyData2020).mark_boxplot().encode(
    x=alt.X('EnergyScore:Q')
)
chart2021 = alt.Chart(energyData2021).mark_boxplot().encode(
    x=alt.X('EnergyScore:Q')
)
alt.vconcat(chart2020, chart2021)

# Ignore outliers by setting the `extent='min-max'` in the main property

## Grouping By

In [72]:
boroughs2020 = energyData2020[['Boro','EnergyScore']].groupby('Boro').agg(['count','max','min','mean','median','std']).reset_index().droplevel(0, axis=1)
boroughs2020.rename(columns={'':'Borough'},inplace=True)

Possible aggregation functions are

* count() – Number of non-null observations
* sum() – Sum of values
* mean() – Mean of values
* median() – Arithmetic median of values
* min() – Minimum
* max() – Maximum
* mode() – Mode
* std() – Standard deviation
* var() – Variance

In [73]:
boroughs2020.head()

Unnamed: 0,Borough,count,max,min,mean,median,std
0,1,7268,100,1,53.967529,58.0,29.005787
1,2,3941,100,1,51.065466,51.0,29.195878
2,3,4794,100,1,56.171882,59.0,28.221008
3,4,3236,100,1,56.257108,59.0,27.986805
4,5,274,100,1,60.60219,64.0,27.060618


In [74]:
boroughs2021 = energyData2021[['Boro','EnergyScore']].groupby('Boro').agg(['count','max','min','mean','median','std']).reset_index().droplevel(0, axis=1)
boroughs2021.rename(columns={'':'Borough'},inplace=True)

In [75]:
boroughs2021.head()

Unnamed: 0,Borough,count,max,min,mean,median,std
0,1,7201,100.0,1.0,60.210943,66.0,28.641987
1,2,3729,100.0,1.0,50.893001,51.0,29.637601
2,3,4441,100.0,1.0,57.124296,60.0,28.644377
3,4,2945,100.0,1.0,58.31511,61.0,27.873194
4,5,186,100.0,2.0,59.069892,63.0,28.043797


Manhattan really increased the score
    1 = Manhattan
    2 = Bronx
    3 = Brooklyn
    4 = Queens
    5 = Staten Island

In [80]:
boroughs2020['year'] = 2020
boroughs2021['year'] = 2021
boroughs = pd.concat([boroughs2020, boroughs2021])

In [81]:
boroughs

Unnamed: 0,Borough,count,max,min,mean,median,std,year
0,1,7268,100.0,1.0,53.967529,58.0,29.005787,2020
1,2,3941,100.0,1.0,51.065466,51.0,29.195878,2020
2,3,4794,100.0,1.0,56.171882,59.0,28.221008,2020
3,4,3236,100.0,1.0,56.257108,59.0,27.986805,2020
4,5,274,100.0,1.0,60.60219,64.0,27.060618,2020
0,1,7201,100.0,1.0,60.210943,66.0,28.641987,2021
1,2,3729,100.0,1.0,50.893001,51.0,29.637601,2021
2,3,4441,100.0,1.0,57.124296,60.0,28.644377,2021
3,4,2945,100.0,1.0,58.31511,61.0,27.873194,2021
4,5,186,100.0,2.0,59.069892,63.0,28.043797,2021


In [88]:
alt.Chart(boroughs).mark_bar().encode(
    x=alt.X('mean:Q'),
    y=alt.Y('year:O'),
    color=alt.Color('year:O'),
    row=alt.Row('Borough:O')
)

## Finding buildings that have improved/worsen the most

In [91]:
buildingEnergyData = pd.merge(energyData2020, energyData2021, on='BBL', how='inner')

In [92]:
buildingEnergyData.head()

Unnamed: 0,Boro_x,Block_x,Lot_x,BuildingCount_x,GrossSF_x,StreetNumber_x,StreetName_x,BBL,EnergyScore_x,EnergyGrade_x,geometry,EnergyScore_y,EnergyGrade_y,Boro_y,Block_y,Lot_y,BuildingCount_y,GrossSF_y,StreetNumber_y,StreetName_y
0,1,4,7501,1,2542563,1,WATER STREET,1000047501,61,C,"POLYGON ((-74.01264 40.70240, -74.01256 40.702...",55.0,C,1,4,7501,1,2542563,1,WATER STREET
1,1,5,7501,1,1354691,125,BROAD STREET,1000057501,76,B,"POLYGON ((-74.01120 40.70243, -74.01012 40.702...",84.0,B,1,5,7501,1,1354691,125,BROAD STREET
2,1,8,7501,1,169061,2,WATER STREET,1000087501,13,D,"POLYGON ((-74.01278 40.70275, -74.01289 40.703...",4.0,D,1,8,7501,1,169061,2,WATER STREET
3,1,9,1,1,692431,34,WHITEHALL STREET,1000090001,72,B,"POLYGON ((-74.01358 40.70277, -74.01354 40.702...",83.0,B,1,9,1,1,692431,34,WHITEHALL STREET
4,1,10,16,1,336025,90,BROAD STREET,1000100016,81,B,"POLYGON ((-74.01220 40.70364, -74.01218 40.703...",81.0,B,1,10,16,1,336025,90,BROAD STREET


In [93]:
buildingEnergyData.columns

Index(['Boro_x', 'Block_x', 'Lot_x', 'BuildingCount_x', 'GrossSF_x',
       'StreetNumber_x', 'StreetName_x', 'BBL', 'EnergyScore_x',
       'EnergyGrade_x', 'geometry', 'EnergyScore_y', 'EnergyGrade_y', 'Boro_y',
       'Block_y', 'Lot_y', 'BuildingCount_y', 'GrossSF_y', 'StreetNumber_y',
       'StreetName_y'],
      dtype='object')

In [94]:
buildingEnergyData.drop(columns=['Boro_y', 'Block_y', 'Lot_y', 'BuildingCount_y', 'GrossSF_y', 'StreetNumber_y','StreetName_y'], inplace=True)

In [95]:
buildingEnergyData.rename(columns={'Boro_x':'Borough', 'Block_x':'Block', 'Lot_x':'Lot', 'BuildingCount_x':'BuildingCount', 'GrossSF_x':'GrossSF',
       'StreetNumber_x':'StreetNumber', 'StreetName_x':'StreetName', 'BBL':'BBL', 'EnergyScore_x':'EnergyScore2020',
       'EnergyGrade_x':'EnergyGrade2020', 'EnergyScore_y':'EnergyScore2021', 'EnergyGrade_y':'EnergyGrade2021'},inplace=True)

In [96]:
buildingEnergyData.head()

Unnamed: 0,Borough,Block,Lot,BuildingCount,GrossSF,StreetNumber,StreetName,BBL,EnergyScore2020,EnergyGrade2020,geometry,EnergyScore2021,EnergyGrade2021
0,1,4,7501,1,2542563,1,WATER STREET,1000047501,61,C,"POLYGON ((-74.01264 40.70240, -74.01256 40.702...",55.0,C
1,1,5,7501,1,1354691,125,BROAD STREET,1000057501,76,B,"POLYGON ((-74.01120 40.70243, -74.01012 40.702...",84.0,B
2,1,8,7501,1,169061,2,WATER STREET,1000087501,13,D,"POLYGON ((-74.01278 40.70275, -74.01289 40.703...",4.0,D
3,1,9,1,1,692431,34,WHITEHALL STREET,1000090001,72,B,"POLYGON ((-74.01358 40.70277, -74.01354 40.702...",83.0,B
4,1,10,16,1,336025,90,BROAD STREET,1000100016,81,B,"POLYGON ((-74.01220 40.70364, -74.01218 40.703...",81.0,B


In [97]:
buildingEnergyData['change'] = buildingEnergyData['EnergyScore2021'] - buildingEnergyData['EnergyScore2020']
buildingEnergyData['perChange'] = buildingEnergyData['change'] / buildingEnergyData['EnergyScore2020']

In [98]:
buildingEnergyData.head()

Unnamed: 0,Borough,Block,Lot,BuildingCount,GrossSF,StreetNumber,StreetName,BBL,EnergyScore2020,EnergyGrade2020,geometry,EnergyScore2021,EnergyGrade2021,change,perChange
0,1,4,7501,1,2542563,1,WATER STREET,1000047501,61,C,"POLYGON ((-74.01264 40.70240, -74.01256 40.702...",55.0,C,-6.0,-0.098361
1,1,5,7501,1,1354691,125,BROAD STREET,1000057501,76,B,"POLYGON ((-74.01120 40.70243, -74.01012 40.702...",84.0,B,8.0,0.105263
2,1,8,7501,1,169061,2,WATER STREET,1000087501,13,D,"POLYGON ((-74.01278 40.70275, -74.01289 40.703...",4.0,D,-9.0,-0.692308
3,1,9,1,1,692431,34,WHITEHALL STREET,1000090001,72,B,"POLYGON ((-74.01358 40.70277, -74.01354 40.702...",83.0,B,11.0,0.152778
4,1,10,16,1,336025,90,BROAD STREET,1000100016,81,B,"POLYGON ((-74.01220 40.70364, -74.01218 40.703...",81.0,B,0.0,0.0


In [123]:
buildingEnergyData.sort_values(by='perChange',ascending=False).head(10)

Unnamed: 0,Borough,Block,Lot,BuildingCount,GrossSF,StreetNumber,StreetName,BBL,EnergyScore2020,EnergyGrade2020,geometry,EnergyScore2021,EnergyGrade2021,change,perChange
3898,1,2109,46,1,66190,2017,AMSTERDAM AVENUE,1021090046,1,D,"POLYGON ((-73.93994 40.83517, -73.94018 40.834...",100.0,A,99.0,99.0
13122,3,1893,4,2,60210,97,GRAND STREET,3018930004,1,D,"POLYGON ((-73.96339 40.69455, -73.96376 40.694...",100.0,A,99.0,99.0
16649,4,15810,55,3,189500,125,BEACH 19 STREET,4158100055,1,D,"POLYGON ((-73.75395 40.59374, -73.75331 40.593...",97.0,A,96.0,96.0
12735,3,1158,61,1,60450,225,PARK PLACE,3011580061,1,D,"POLYGON ((-73.96990 40.67741, -73.97002 40.677...",96.0,A,95.0,95.0
4540,1,1274,7502,1,96137,57,WEST 58 STREET,1012747502,1,D,"POLYGON ((-73.97618 40.76486, -73.97651 40.765...",96.0,A,95.0,95.0
1214,1,759,26,1,63363,325,WEST 35 STREET,1007590026,1,D,"POLYGON ((-73.99370 40.75357, -73.99388 40.753...",95.0,A,94.0,94.0
12414,4,3530,39,1,50460,60-52,MADISON STREET,4035300039,1,D,"POLYGON ((-73.89835 40.70644, -73.89848 40.706...",94.0,A,93.0,93.0
5232,1,1521,69,1,174910,1175,PARK AVENUE,1015210069,1,D,"POLYGON ((-73.95331 40.78427, -73.95349 40.784...",92.0,A,91.0,91.0
16662,4,16177,1,1,293677,107-10,SHORE FRONT PARKWAY,4161770001,1,D,"POLYGON ((-73.82761 40.58240, -73.82881 40.582...",91.0,A,90.0,90.0
960,1,496,1,1,48479,79,CROSBY STREET,1004960001,1,D,"POLYGON ((-73.99734 40.72281, -73.99749 40.722...",90.0,A,89.0,89.0


In [126]:
buildingEnergyData[buildingEnergyData['EnergyScore2020'] > 50].sort_values(by='perChange',ascending=False).head(10)

Unnamed: 0,Borough,Block,Lot,BuildingCount,GrossSF,StreetNumber,StreetName,BBL,EnergyScore2020,EnergyGrade2020,geometry,EnergyScore2021,EnergyGrade2021,change,perChange
10568,3,1933,1,1,371890,21,ST. JAMES PLACE,3019330001,51,D,"POLYGON ((-73.96376 40.68857, -73.96478 40.688...",100.0,A,49.0,0.960784
7030,2,2504,36,1,114941,941,JEROME AVENUE,2025040036,52,D,"POLYGON ((-73.92756 40.83062, -73.92756 40.830...",100.0,A,48.0,0.923077
3386,1,1011,13,1,122813,145,WEST 58 STREET,1010110013,51,D,"POLYGON ((-73.97837 40.76580, -73.97823 40.766...",98.0,A,47.0,0.921569
6390,2,3304,180,1,56265,219,MIRIAM STREET,2033040180,53,D,"POLYGON ((-73.89128 40.86849, -73.89119 40.868...",100.0,A,47.0,0.886792
10965,3,140,160,1,212087,339,ADAMS STREET,3001400160,53,D,"POLYGON ((-73.98806 40.69332, -73.98807 40.693...",100.0,A,47.0,0.886792
13586,4,626,1,1,50000,34-15,31 AVENUE,4006260001,52,D,"POLYGON ((-73.92008 40.76322, -73.92029 40.762...",98.0,A,46.0,0.884615
5634,1,1756,39,1,60520,58,EAST 132 STREET,1017560039,51,D,"POLYGON ((-73.93657 40.80939, -73.93666 40.809...",96.0,A,45.0,0.882353
13743,3,4597,1,1,54868,1044,EAST NEW YORK AVENUE,3045970001,54,D,"POLYGON ((-73.92726 40.66443, -73.92758 40.664...",100.0,A,46.0,0.851852
10401,2,4999,68,1,60968,730,EAST 236 STREET,2049990068,54,D,"POLYGON ((-73.85468 40.89504, -73.85482 40.894...",100.0,A,46.0,0.851852
6587,1,1895,1,1,78701,400,RIVERSIDE DRIVE,1018950001,53,D,"POLYGON ((-73.96690 40.80635, -73.96664 40.806...",98.0,A,45.0,0.849057


Find info on individual buildings [here](https://a810-dobnow.nyc.gov/publish/#!/) but no documents of their submission

In [99]:
buildingEnergyData['perChange'].describe()

count    16793.000000
mean         0.489799
std          3.776643
min         -0.990000
25%         -0.060000
50%          0.032258
75%          0.228070
max         99.000000
Name: perChange, dtype: float64

In [101]:
alt.Chart(buildingEnergyData).mark_boxplot().encode(
    x=alt.X('perChange:Q')
)

In [115]:
alt.Chart(buildingEnergyData[(buildingEnergyData['perChange'] < buildingEnergyData['perChange'].quantile(0.81)) & (buildingEnergyData['perChange'] > buildingEnergyData['perChange'].quantile(0.08))]).mark_boxplot().encode(
    x=alt.X('perChange:Q')
)

In [118]:
alt.Chart(buildingEnergyData[(buildingEnergyData['perChange'] < buildingEnergyData['perChange'].quantile(0.9)) & (buildingEnergyData['perChange'] > buildingEnergyData['perChange'].quantile(0.05))]).mark_boxplot().encode(
    x=alt.X('perChange:Q'),
    row=alt.Row('Borough:N')
)

Do the same for 2021 and compare both years

In [38]:
alt.Chart(energyData2020).mark_geoshape().encode(
    color=alt.Color('EnergyStar:Q')
)

In [39]:
energyData2020[energyData2020['BBL'].isin(energyData2021['BBL'])]

Unnamed: 0,Boro,Block,Lot,BuildingCount,GrossSF,StreetNumber,StreetName,BBL,EnergyScore,EnergyGrade,geometry
2,1,4,7501,1,2542563,1,WATER STREET,1000047501,61,C,"POLYGON ((-74.01264 40.70240, -74.01256 40.702..."
3,1,5,7501,1,1354691,125,BROAD STREET,1000057501,76,B,"POLYGON ((-74.01120 40.70243, -74.01012 40.702..."
4,1,8,7501,1,169061,2,WATER STREET,1000087501,13,D,"POLYGON ((-74.01278 40.70275, -74.01289 40.703..."
5,1,9,1,1,692431,34,WHITEHALL STREET,1000090001,72,B,"POLYGON ((-74.01358 40.70277, -74.01354 40.702..."
6,1,10,16,1,336025,90,BROAD STREET,1000100016,81,B,"POLYGON ((-74.01220 40.70364, -74.01218 40.703..."
...,...,...,...,...,...,...,...,...,...,...,...
21671,5,3125,116,2,238368,20,FR CAPODANNO BOULEVARD,5031250116,46,D,"POLYGON ((-74.06060 40.59482, -74.06144 40.594..."
21673,5,3543,7501,1,78903,175,ZOE STREET,5035437501,74,B,"POLYGON ((-74.10170 40.58457, -74.10134 40.584..."
21674,5,3576,23,1,34580,15,LISBON PLACE,5035760023,66,C,"POLYGON ((-74.10901 40.58208, -74.10876 40.582..."
21677,5,3983,65,2,246848,26,EBBITTS AVENUE,5039830065,82,B,"POLYGON ((-74.11093 40.56352, -74.11288 40.564..."


In [40]:
energyData2021[energyData2021['BBL'].isin(energyData2020['BBL'])]

Unnamed: 0,BBL,EnergyScore,EnergyGrade,Boro,Block,Lot,BuildingCount,GrossSF,StreetNumber,StreetName
0,1008567502,73.0,B,1,856,7502,1,341125,225,5 AVENUE
1,4012380040,51.0,D,4,1238,40,1,208252,39-60,54 STREET
2,1008380021,86.0,A,1,838,21,1,57636,35,WEST 36 STREET
3,1007620025,64.0,C,1,762,25,1,274209,307,WEST 38 STREET
4,2036000004,51.0,D,2,3600,4,11,1021752,1850,LAFAYETTE AVENUE
...,...,...,...,...,...,...,...,...,...,...
20344,5066700001,15.0,D,5,6670,1,1,220240,375,SEGUINE AVENUE
20345,5071000028,100.0,A,5,7100,28,1,64117,2730,ARTHUR KILL ROAD
20348,5071870001,100.0,A,5,7187,1,45,349503,2911,ARTHUR KILL ROAD
20355,5076260001,100.0,A,5,7626,1,1,31300,2,ARTHUR KILL ROAD


In [41]:
energyData2020[~energyData2020['BBL'].isin(energyData2021['BBL'])]

Unnamed: 0,Boro,Block,Lot,BuildingCount,GrossSF,StreetNumber,StreetName,BBL,EnergyScore,EnergyGrade,geometry
0,1,1,10,124,2598091,920,GRESHAM ROAD,1000010010,1,D,"POLYGON ((-74.02240 40.68443, -74.02404 40.683..."
8,1,16,40,1,134082,55,BATTERY PLACE,1000160040,43,D,"POLYGON ((-74.01715 40.70658, -74.01736 40.706..."
13,1,23,19,2,240279,11,WALL STREET,1000230019,1,D,"POLYGON ((-74.01160 40.70689, -74.01135 40.707..."
26,1,37,13,1,34613,129,FRONT STREET,1000370013,13,D,"POLYGON ((-74.00602 40.70501, -74.00629 40.705..."
31,1,52,1,1,102591,96,TRINITY PLACE,1000520001,2,D,"POLYGON ((-74.01253 40.70919, -74.01244 40.709..."
...,...,...,...,...,...,...,...,...,...,...,...
21672,5,3171,1,1,39280,1055,TARGEE STREET,5031710001,57,C,"POLYGON ((-74.08924 40.60289, -74.08925 40.602..."
21675,5,3645,7,1,86850,206,CLAWSON STREET,5036450007,61,C,"POLYGON ((-74.10755 40.57344, -74.10797 40.572..."
21676,5,3696,100,2,151024,333,MIDLAND AVENUE,5036960100,97,A,"POLYGON ((-74.09842 40.57677, -74.09855 40.576..."
21679,5,6544,1,4,100780,1270,HUGUENOT AVENUE,5065440001,96,A,"POLYGON ((-74.18614 40.52383, -74.18516 40.522..."


In [42]:
energyData2021[~energyData2021['BBL'].isin(energyData2020['BBL'])]

Unnamed: 0,BBL,EnergyScore,EnergyGrade,Boro,Block,Lot,BuildingCount,GrossSF,StreetNumber,StreetName
6,1010817501,55.0,C,1,1081,7501,1,446447,525,WEST 52 STREET
13,1000077501,5.0,D,1,7,7501,3,47754,1,COENTIES SLIP
14,1000080039,22.0,D,1,8,39,1,46724,32,PEARL STREET
70,1000177501,42.0,D,1,17,7501,1,427177,50,WEST STREET
81,1000220013,88.0,A,1,22,13,1,860889,26,BROADWAY
...,...,...,...,...,...,...,...,...,...,...
20349,5072060171,79.0,B,5,7206,171,1,29566,112,INDUSTRIAL LOOP
20351,5074690200,69.0,C,5,7469,200,1,155282,55 - 65,TYRELLAN AVENUE
20352,5074810001,76.0,B,5,7481,1,1,144760,60,TYRELLAN AVE
20353,5075770003,92.0,A,5,7577,3,1,75000,200,BOSCOMBE AVENUE
