# LL84 Processing

In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
pd.set_option('display.max_columns', None)

## Step 0: Load in raw data

In [2]:
#load in raw data
# TODO: make sure we get the columns related to on-site FF infra

ll_raw = pd.read_csv('../data/raw_data/DOB/LL84 Energy Efficiency/NYC_Building_Energy_and_Water_Data_Disclosure_for_Local_Law_84__2022-Present__20250915.csv')
schools = gpd.read_file('../data/processed_data/schools/school_points_with_lcgms.geojson')

  ll_raw = pd.read_csv('../data/raw_data/DOB/LL84 Energy Efficiency/NYC_Building_Energy_and_Water_Data_Disclosure_for_Local_Law_84__2022-Present__20250915.csv')


## Step 1: Match schools to LL84 data
### We first attempt to match LL84 data to schools using building codes, then BBL (Building-Block-Lot)

In [4]:
# Step zero: filter for K-12 schools
ll84_edu = ll_raw[ll_raw['List of All Property Use Types (GFA) (ft²)'].fillna('').str.contains('|'.join(['K-12 School', 'Pre-school/Daycare', 'Other - Education', 'Worship Facility']), regex=True)]

# Step one: join records building code extracted from regex to "Property Name" field
extracted_building_codes = ll84_edu['Property Name'].str.extractall(r'([LKMQRX]\d{3})')
# For property names with multiple building codes, expand the dataframe so each building code has its own row
ll84_edu_bc = extracted_building_codes.droplevel('match').join(ll84_edu).rename(columns={0:'Building Code'})
# Keep only the most recent, and largest sqft record for each extracted building code 
ll84_edu_bc = ll84_edu_bc.sort_values(['Calendar Year','Largest Property Use Type - Gross Floor Area (ft²)']).drop_duplicates(subset=['Building Code'], keep='last')
schools_matched_on_bc = schools[['Location Name', 'Location Code', 'Building Code', 'Borough Block Lot']].merge(ll84_edu_bc, on='Building Code', how='left', indicator=True).rename(columns={'_merge':'_merge_bc'})
# TODO: The above merge results in a small join explosion for building codes that are associated with multiple properties in the LL84 data. Need to investigate further.
# NOTE: AS: @JW: As this code is written, there are no additional rows that are added to schools_matched_on_bc compared to schools after the merge. Multiple schools do get matched to the same building code, but this is expected behavior. Let me know if I missed something!

In [5]:
# List schools that are still unmatched. Most of these are DOE schools that are new, or charter schools that occupy space in larger, non K-12 buildings.
unmatched_afterbc_schools = schools_matched_on_bc[schools_matched_on_bc['_merge_bc']=='left_only']
print(f'{len(unmatched_afterbc_schools)} schools remain unmatched after building code joins:')
unmatched_afterbc_schools

244 schools remain unmatched after building code joins:


Unnamed: 0,Location Name,Location Code,Building Code,Borough Block Lot,Calendar Year,Property ID,Property Name,Parent Property ID,Parent Property Name,Year Ending,"NYC Borough, Block and Lot (BBL)",NYC Building Identification Number (BIN),Address 1,City,Postal Code,Primary Property Type - Self Selected,Primary Property Type - Portfolio Manager-Calculated,National Median Reference Property Type,List of All Property Use Types (GFA) (ft²),Largest Property Use Type,Largest Property Use Type - Gross Floor Area (ft²),2nd Largest Property Use Type,2nd Largest Property Use Type - Gross Floor Area (ft²),3rd Largest Property Use Type,3rd Largest Property Use Type - Gross Floor Area (ft²),Year Built,Construction Status,Number of Buildings,Occupancy,Metered Areas (Energy),Metered Areas (Water),ENERGY STAR Score,National Median ENERGY STAR Score,Target ENERGY STAR Score,Reason(s) for No Score,ENERGY STAR Certification - Year(s) Certified (Score),Eligible for Certification for Report PED (Y/N),ENERGY STAR Certification - Last Approval Date,ENERGY STAR Certification - Application Status,Site EUI (kBtu/ft²),Weather Normalized Site EUI (kBtu/ft²),National Median Site EUI (kBtu/ft²),Site Energy Use (kBtu),Weather Normalized Site Energy Use (kBtu),Electricity - Weather Normalized Site Electricity Use (Grid and Onsite Renewables) (kWh),Electricity - Weather Normalized Site Electricity Intensity (Grid and Onsite Renewables) (kWh/ft²),Percent Electricity,Natural Gas - Weather Normalized Site Natural Gas Use (therms),Natural Gas - Weather Normalized Site Natural Gas Intensity (therms/ft²),Source EUI (kBtu/ft²),Weather Normalized Source EUI (kBtu/ft²),National Median Source EUI (kBtu/ft²),Source Energy Use (kBtu),Weather Normalized Source Energy Use (kBtu),Fuel Oil #1 Use (kBtu),Fuel Oil #2 Use (kBtu),Fuel Oil #4 Use (kBtu),Fuel Oil #5 & 6 Use (kBtu),Diesel #2 Use (kBtu),Propane Use (kBtu),Kerosene Use (kBtu),District Steam Use (kBtu),District Hot Water Use (kBtu),District Chilled Water Use (kBtu),Natural Gas Use (kBtu),Natural Gas Use (therms),Electricity Use - Grid Purchase (kBtu),Electricity Use - Grid Purchase (kWh),Electricity Use - Grid Purchase and Generated from Onsite Renewable Systems (kWh),Electricity Use - Grid Purchase and Generated from Onsite Renewable Systems (kBtu),Electricity Use – Generated from Onsite Renewable Systems and Used Onsite (kWh),Electricity Use – Generated from Onsite Renewable Systems and Used Onsite (kBtu),Electricity Use – Generated from Onsite Renewable Systems (kWh),Electricity Use – Generated from Onsite Renewable Systems and Exported (kWh),Annual Maximum Demand (kW),Annual Maximum Demand (MM/YYYY),Annual Maximum Demand (Meter Name (Meter ID)),Green Power - Onsite (kWh),Green Power - Offsite (kWh),Green Power - Onsite and Offsite (kWh),Total (Location-Based) GHG Emissions (Metric Tons CO2e),Total (Location-Based) GHG Emissions Intensity (kgCO2e/ft²),Direct GHG Emissions (Metric Tons CO2e),Direct GHG Emissions Intensity (kgCO2e/ft²),Indirect (Location-Based) GHG Emissions (Metric Tons CO2e),Indirect (Location-Based) GHG Emissions Intensity (kgCO2e/ft²),Net Emissions (Metric Tons CO2e),National Median Total (Location-Based) GHG Emissions (Metric Tons CO2e),Avoided Emissions - Onsite Green Power (Metric Tons CO2e),Avoided Emissions - Offsite Green Power (Metric Tons CO2e),Avoided Emissions - Onsite and Offsite Green Power (Metric Tons CO2e),Percent of Electricity that is Green Power,Percent of RECs Retained,Percent of Total Electricity Generated from Onsite Renewable Systems,LEED US Project ID,Default Values,Temporary Values,Estimated Values - Energy,Estimated Values - Water,Alert - Energy Meter has less than 12 full calendar months of data,Alert - Energy Meter has gaps,Alert - Energy Meter has overlaps,Alert - Energy - No meters selected for metrics,Alert - Energy Meter has single entry more than 65 days,Alert - Property has no uses,Alert - Water Meter has less than 12 full calendar months of data,"Alert - Data Center Issue (with Estimates, IT Configuration, or IT Meter)",Alert - Gross Floor Area is 0 ft2,Estimated Data Flag - Electricity (Grid Purchase),Estimated Data Flag - Natural Gas,Estimated Data Flag - District Steam,Estimated Data Flag - Municipally Supplied Potable Water: Mixed Indoor/Outdoor Use,Estimated Data Flag - Fuel Oil (No. 1),Estimated Data Flag - Fuel Oil (No. 2),Estimated Data Flag - Fuel Oil (No. 4),Estimated Data Flag - Fuel Oil (No. 5 and No. 6),Estimated Data Flag - Diesel,Estimated Data Flag - Electricity (Onsite Solar),Data Center - UPS Output Meter (kWh),Data Center - PDU Input Meter (kWh),Data Center - PDU Output Meter (kWh),Data Center - IT Equipment Input Meter (kWh),Data Center - IT Site Energy (kWh),Data Center - IT Source Energy (kBtu),Data Center - PUE,Data Center - National Median PUE,Data Center - Gross Floor Area (ft²),Data Center - UPS System Redundancy,Data Center - IT Energy Configuration,Data Center - Energy Estimates Applied,Data Center - Cooling Equipment Redundancy,Adult Education - Gross Floor Area (ft²),Ambulatory Surgical Center - Gross Floor Area (ft²),Vehicle Dealership - Gross Floor Area (ft²),Bank Branch - Gross Floor Area (ft²),Barracks- Gross Floor Area (ft²),College/University - Gross Floor Area (ft²),Convention Center - Gross Floor Area (ft²),Courthouse - Gross Floor Area (ft²),Distribution Center - Gross Floor Area (ft²),Enclosed Mall - Gross Floor Area (ft²),Energy/Power Station - Gross Floor Area (ft²),Financial Office - Gross Floor Area (ft²),Financial Office - Number of Computers,Financial Office - Weekly Operating Hours,Financial Office - Number of Workers on Main Shift,Fitness Center/Health Club/Gym - Gross Floor Area (ft²),Food Sales - Gross Floor Area (ft²),Food Service - Gross Floor Area (ft²),Hospital (General Medical & Surgical) - Full-Time Equivalent (FTE) Workers,Hospital (General Medical & Surgical) - Gross Floor Area (ft²),Hospital (General Medical & Surgical) - Laboratory,Hospital (General Medical & Surgical) - Licensed Bed Capacity,Hospital (General Medical & Surgical) - Number of MRI Machines,"Hospital (General Medical & Surgical) - MRI Density (Number per 1,000 sq ft)",Hospital (General Medical & Surgical) - Number of Staffed Beds,"Hospital (General Medical & Surgical) - Staffed Bed Density (Number per 1,000 sq ft)","Hospital (General Medical & Surgical) - Full-Time Equivalent (FTE) Worker Density (Number per 1,000 sq ft)",Hospital (General Medical & Surgical) - Percent That Can Be Cooled,Hospital (General Medical & Surgical) - Percent That Can Be Heated,Medical Office - Gross Floor Area (ft²),Medical Office - Number of Computers,Medical Office - Number of MRI Machines,Medical Office - Number of Workers on Main Shift,Medical Office - Percent That Can Be Cooled,Medical Office - Percent That Can Be Heated,Medical Office - Weekly Operating Hours,Outpatient Rehabilitation/Physical Therapy - Gross Floor Area (ft²),Urgent Care/Clinic/Other Outpatient - Gross Floor Area (ft²),Hotel - Gross Floor Area (ft²),Hotel - Gym/fitness Center Floor Area (ft²),K-12 School - Gross Floor Area (ft²),Pre-school/Daycare - Gross Floor Area (ft²),Laboratory - Gross Floor Area (ft²),Mailing Center/Post Office - Gross Floor Area (ft²),Movie Theater - Gross Floor Area (ft²),Manufacturing/Industrial Plant - Gross Floor Area (ft²),Multifamily Housing - Government Subsidized Housing,Institutional Property? (Y/N),Multifamily Housing - Gross Floor Area (ft²),Multifamily Housing - Number of Bedrooms,"Multifamily Housing - Number of Bedrooms Density (Number per 1,000 sq ft)",Multifamily Housing - Number of Laundry Hookups in All Units,Multifamily Housing - Number of Laundry Hookups in Common Area(s),Multifamily Housing - Number of Residential Living Units in a High-Rise Building (10 or more Stories),Multifamily Housing - Percent That Can Be Cooled,Multifamily Housing - Percent That Can Be Heated,Multifamily Housing - Total Number of Residential Living Units,"Multifamily Housing - Total Number of Residential Living Units Density (Number per 1,000 sq ft)","Multifamily Housing - Number of Residential Living Units in a High-Rise Building Density (Number per 1,000 sq ft)",Multifamily Housing - Number of Residential Living Units in a Mid-Rise Building (5-9 Stories),"Multifamily Housing - Number of Residential Living Units in a Mid-Rise Building Density (Number per 1,000 sq ft)",Multifamily Housing - Resident Population Type,Residence Hall/Dormitory - Gross Floor Area (ft²),Refrigerated Warehouse - Gross Floor Area (ft²),Non-Refrigerated Warehouse - Gross Floor Area (ft²),Office - Gross Floor Area (ft²),Office - Number of Computers,Office - Number of Workers on Main Shift,Office - Percent That Can Be Cooled,Office - Percent That Can Be Heated,Office - Weekly Operating Hours,"Office - Worker Density (Number per 1,000 sq ft)",Other - Gross Floor Area (ft²),Museum - Gross Floor Area (ft²),Performing Arts - Gross Floor Area (ft²),Parking - Gross Floor Area (ft²),Parking - Open Parking Lot Size (ft²),Parking - Completely Enclosed Parking Garage Size (ft²),Parking - Partially Enclosed Parking Garage Size (ft²),Restaurant - Gross Floor Area (ft²),Restaurant - Weekly Operating Hours,"Restaurant - Worker Density (Number per 1,000 sq ft)",Retail Store - Gross Floor Area (ft²),Retail Store - Number of Walk-in Refrigeration/Freezer Units,Retail Store - Number of Open or Closed Refrigeration/Freezer Units,Self-Storage Facility - Gross Floor Area (ft²),Senior Living Community - Gross Floor Area (ft²),"Senior Living Community - Living Unit Density (Number per 1,000 sq ft)",Social/Meeting Hall - Gross Floor Area (ft²),Supermarket/Grocery - Gross Floor Area (ft²),Supermarket/Grocery - Number of Walk-in Refrigeration/Freezer Units,Supermarket/Grocery - Number of Open or Closed Refrigeration/Freezer Units,Wholesale Club/Supercenter- Gross Floor Area (ft²),Wholesale Club/Supercenter- Exterior Entrance to the Public,Wastewater Treatment Plant - Gross Floor Area (ft²),Worship Facility - Gross Floor Area (ft²),Strip Mall - Gross Floor Area (ft²),Property GFA - Self-Reported (ft²),Property GFA - Calculated (Buildings and Parking) (ft²),Property GFA - Calculated (Buildings) (ft²),Property GFA - Calculated (Parking) (ft²),Property Notes,Water Use (All Water Sources) (kgal),Indoor Water Use (All Water Sources) (kgal),Outdoor Water Use (All Water Sources) (kgal),Municipally Supplied Potable Water - Mixed Indoor/Outdoor Use (kgal),Municipally Supplied Potable Water - Total Use (All Meter Types) (kgal),Municipally Supplied Potable Water - Indoor Use (kgal),Municipally Supplied Potable Water - Outdoor Use (kgal),Third Party Certification,Third Party Certification Date Anticipated,Third Party Certification Date Achieved,Data Quality Checker Run?,Data Quality Checker - Date Run,Last Modified Date - Property,Last Modified Date - Electric Meters,Electric Distribution Utility,Last Modified Date - Gas Meters,Last Modified Date - Non-Electric Non-Gas Energy Meters,Last Modified Date - Water Meters,Last Modified Date - Property Use,Last Modified Date - Property Use Details,Number of Active Energy Meters - Total,Number of Active Energy Meters - Used to Compute Metrics,Number of Active Energy Meters - Not Used to Compute Metrics,Number of Active IT Meters,Aggregate Meter(s)-Electric – Number of Individual Meters Included,Aggregate Meter(s)-Natural Gas – Number of Individual Meters Included,Aggregate Meter(s)-District Steam – Number of Individual Meters Included,Report Generation Date,Report Submission Date,Borough,Latitude,Longitude,Community Board,Council District,Census Tract (2020),Neighborhood Tabulation Area (NTA) (2020),_merge_bc
252,Dock Street School for STEAM Studies,K313,K611,3000367504,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
264,The Detective WenJian Liu School of Civics and...,K331,K676,3057390002,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
286,Hellenic Classical Charter School,K362,KAWR,3006340034,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
317,"School of Technology, Arts, and Research",K407,K322,3060560015,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
322,Joanne Seminara School of Law and Medicine,K413,K053,3058530045,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1953,Icahn Charter School 1,X717,X717,2028950068,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
1957,Bronx Charter School for the Arts,X730,XAFM,2027550125,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
1959,Success Academy Charter School - Harlem 6,X780,X780,2033420029,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
1962,ALC - BRONX EDUCATIONAL CENTER,X966,X988,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only


In [6]:
# Step two: for remaining un-joined records, attempt join on BBL
unmatched_bc_schools_loc_codes = schools_matched_on_bc[schools_matched_on_bc['_merge_bc']!='both']['Location Code'].unique()
unmatched_bc_schools = schools[schools['Location Code'].isin(unmatched_bc_schools_loc_codes)].copy()

# LL84 sometimes lists multiple BBLs for a single property. Here, I explode those into one row per BBL to increase chances of a match on BBL.
ll84_edu_bbl = pd.DataFrame(ll84_edu['NYC Borough, Block and Lot (BBL)'].str.split(';', expand=True).stack().droplevel(1)).join(ll84_edu).rename(columns={0:'BBL'})
# Some BBLs have hyphens, so removing those
ll84_edu_bbl['BBL'] = ll84_edu_bbl['BBL'].str.replace('-', '')

In [12]:
ll84_edu_bbl['Largest Property Use Type - Gross Floor Area (ft²)'] = pd.to_numeric(ll84_edu_bbl['Largest Property Use Type - Gross Floor Area (ft²)'], errors='coerce')

In [13]:
#Step three: finally, choose most recent, largest sqft record per BBL
# Drop duplicated BBLs; keep most recent record
# NOTE: @JW I (AS) modified the code below to keep the most recent record per BBL, as well as the largest property. This prevents cases where smaller buildings w/ the same BBL were preserved in the filtering process
# ll84_edu_bbl_bbl = ll84_edu_bbl.sort_values('Report Submission Date').drop_duplicates(subset=['Property ID', 'NYC Borough, Block and Lot (BBL)'], keep='last')
ll84_edu_bbl = ll84_edu_bbl.sort_values(['Calendar Year','Largest Property Use Type - Gross Floor Area (ft²)']).drop_duplicates(subset=['BBL'], keep='last')
schools_matched_on_bbl = unmatched_bc_schools.merge(ll84_edu_bbl, left_on='Borough Block Lot', right_on='BBL', how='left', indicator=True).rename(columns={'_merge':'_merge_bbl'})

In [15]:
# Combine different join results into single DF
ll84_schools_combined = pd.concat([
    schools_matched_on_bc[schools_matched_on_bc['_merge_bc']=='both'],
    schools_matched_on_bbl[schools_matched_on_bbl['_merge_bbl']=='both']
], ignore_index=True)

In [16]:
# List schools that are still unmatched. Most of these are DOE schools that are new, or charter schools that occupy space in larger, non K-12 buildings.
unmatched_afterauto_schools = schools[~schools['Location Code'].isin(ll84_schools_combined['Location Code'].unique())]
print(f'{len(unmatched_afterauto_schools)} schools remain unmatched after Building Code and BBL joins:')
unmatched_afterauto_schools

159 schools remain unmatched after Building Code and BBL joins:


Unnamed: 0,Location Name,Managed By Name,Location Code,Building Code,ATS,Primary Address,City,State,Zip,Borough Block Lot,Census Tract,Community District,Council District,Latitude,Longitude,BEDS Number,Location Type Description,Location Category Description,Grades,Grades Final,Open Date,NTA,NTA_Name,Principal Name,Principal Title,Principal Phone Number,Fax Number,Geographical District Code,Administrative District Code,Administrative District Location Code,Administrative District Name,Community School Sup Name,HighSchool Network Location Code,HighSchool Network Name,HighSchool Network Superintendent,HighSchool Network Superintendent Email,BCO Location Code,in_LCGMS,full_address,google_lat,google_lng,google_location_type,lat,lng,geometry
264,The Detective WenJian Liu School of Civics and...,DOE,K331,K676,20K331,6312 13 AVENUE,BROOKLYN,NY,11219,3057390002,194,310,43,40.627038,-74.000890,332000010331,General Academic,Elementary,"0K,01,SE","0K,01,02,03,04,05,SE",Jul 1 2024,BK30,Dyker Heights,Lisa Lin,Principal,,,20,20,K820,COMMUNITY SCHOOL DISTRICT 20,"PRETTO, DAVID",,,,,KFSS,True,"6312 13 AVENUE, BROOKLYN, NY 11219",,,,40.627038,-74.000890,POINT (-74.00089 40.62704)
286,Hellenic Classical Charter School,Charter,K362,KAWR,84K362,646 FIFTH AVENUE,BROOKLYN,NY,11215,3006340034,14300,307,38,40.662985,-73.992254,331500860878,General Academic,K-8,"PK,0K,01,02,03,04,05,06,07,08","0K,01,02,03,04,05,06,07,08",Jul 1 2005,BK37,Park Slope-Gowanus,Christina Tettonis/Natasha Caban,Principal,718-499-0957,718-499-0959,15,84,,OFFICE OF CHARTER SCHOOLS,,,,,,,True,"646 FIFTH AVENUE, BROOKLYN, NY 11215",,,,40.662985,-73.992254,POINT (-73.99225 40.66298)
317,"School of Technology, Arts, and Research",DOE,K407,K322,20K407,650 86 STREET,BROOKLYN,NY,11228,3060560015,152,310,43,40.618628,-74.022073,332000010407,General Academic,Junior High-Intermediate-Middle,"06,07,SE","06,07,08,SE",Jul 1 2024,BK30,Dyker Heights,Stephen Lombardo,Principal,,,20,20,K820,COMMUNITY SCHOOL DISTRICT 20,"PRETTO, DAVID",,,,,KFSS,True,"650 86 STREET, BROOKLYN, NY 11228",,,,40.618628,-74.022073,POINT (-74.02207 40.61863)
322,Joanne Seminara School of Law and Medicine,DOE,K413,K053,20K413,280 SENATOR STREET,BROOKLN,NY,11220,3058530045,70,310,47,40.637031,-74.025888,332000010413,General Academic,Elementary,"0K,01,SE","0K,01,02,03,04,05,SE",Jul 1 2024,BK31,Bay Ridge,Saher Said,Principal,,,20,20,K820,COMMUNITY SCHOOL DISTRICT 20,"PRETTO, DAVID",,,,,KFSS,True,"280 SENATOR STREET, BROOKLN, NY 11220",,,,40.637031,-74.025888,POINT (-74.02589 40.63703)
349,Williamsburg Charter High School,Charter,K473,KBYQ,84K473,198 VARET STREET,BROOKLYN,NY,11206,0,48500,301,34,40.703703,-73.935969,331400860865,General Academic,High school,09101112,09101112,Aug 31 2004,BK78,Bushwick South,Valerie Jacobson,Principal,718-782-9830,347-464-7604,14,84,,OFFICE OF CHARTER SCHOOLS,,,,,,,True,"198 VARET STREET, BROOKLYN, NY 11206",,,,40.703703,-73.935969,POINT (-73.93597 40.7037)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1952,Harriet Tubman Charter School,Charter,X706,X256,84X706,3565 THIRD AVENUE,BRONX,NY,10456,2023730014,14500,203,16,40.832328,-73.905771,320900860823,General Academic,K-8,"0K,01,02,03,04,05,06,07,08","0K,01,02,03,04,05,06,07,08",Jul 1 2000,BX01,Claremont-Bathgate,Cleveland Person,Principal,718-537-9912,718-537-9858,9,84,,OFFICE OF CHARTER SCHOOLS,,,,,,,True,"3565 THIRD AVENUE, BRONX, NY 10456",,,,40.832328,-73.905771,POINT (-73.90577 40.83233)
1957,Bronx Charter School for the Arts,Charter,X730,XAFM,84X730,950 LONGFELLOW AVENUE,BRONX,NY,10474,2027550125,11502,202,17,40.821815,-73.886477,320800860846,General Academic,K-8,"0K,01,02,03,04,05,06,07,08","0K,01,02,03,04,05,06,07,08",Jul 1 2003,BX27,Hunts Point,Ryan Mccabe/Kathy Ortiz,Executive Director,718-893-1042,718-893-7910,8,84,,OFFICE OF CHARTER SCHOOLS,,,,,,,True,"950 LONGFELLOW AVENUE, BRONX, NY 10474",,,,40.821815,-73.886477,POINT (-73.88648 40.82182)
1959,Success Academy Charter School - Harlem 6,Charter,X780,X780,84X780,268 EAST 207 STREET,BRONX,NY,10467,2033420029,42300,207,11,,,321000861088,General Academic,Elementary,"0K,01,02,03,04",,Jul 1 2025,BX43,Norwood,,,,,10,84,,OFFICE OF CHARTER SCHOOLS,,,,,,,True,"268 EAST 207 STREET, BRONX, NY 10467",40.875385,-73.877649,ROOFTOP,40.875385,-73.877649,POINT (-73.87765 40.87538)
1962,ALC - BRONX EDUCATIONAL CENTER,DOE,X966,X988,88X966,,,,,,,,,40.816494,-73.890278,,,,,,,,,,,,,,,,,,,,,,,False,,,,,40.816494,-73.890278,POINT (-73.89028 40.81649)


### Manual matching to correct for missed matches

In [37]:
#NOTE @JW manual matches (Location Code, to the row in the ll84 data) for remaining unmatched schools, that did not pass the building code or BBL extractions
#commented out matches have building code, not the location code
manual_matches = {
    #'K241': 36863,
    'Q687': 47929,
    'M085': 5179,
    #'X445': 36862,
    #'K811' : 52066,
    #'K092' : 38626,
    #'K490' : 52758,
    #'M705' : 49040,
    #'M204' : 46251,
    #'M284' : 34331,
    #'K535' : 52810,
    #'K312' : 36864,
    #'Q136' : 36867,
    #'X614' : 57812,
    #'X177' : 51985,
    #'X063' : 51984,
    #'X008' : 52538,
    #'X310' : 52376,
    #'X226' : 39016    
}

manual_add = ll_raw.loc[manual_matches.values()]
manual_add['Location Code'] = manual_matches.keys()
manual_add['Building Code'] = schools[schools['Location Code'].isin(manual_add['Location Code'])]['Building Code'].to_list()
ll84_matched_bc = pd.concat([ll84_schools_combined, manual_add])

In [38]:
# List schools that are still unmatched. Most of these are DOE schools that are new, or charter schools that occupy space in larger, non K-12 buildings.
unmatched_aftermanual_schools = schools[~schools['Location Code'].isin(ll84_matched_bc['Location Code'])]
print(f'{len(unmatched_aftermanual_schools)} schools remain unmatched after building code and BBL joins:')
unmatched_aftermanual_schools

157 schools remain unmatched after building code and BBL joins:


Unnamed: 0,Location Name,Managed By Name,Location Code,Building Code,ATS,Primary Address,City,State,Zip,Borough Block Lot,Census Tract,Community District,Council District,Latitude,Longitude,BEDS Number,Location Type Description,Location Category Description,Grades,Grades Final,Open Date,NTA,NTA_Name,Principal Name,Principal Title,Principal Phone Number,Fax Number,Geographical District Code,Administrative District Code,Administrative District Location Code,Administrative District Name,Community School Sup Name,HighSchool Network Location Code,HighSchool Network Name,HighSchool Network Superintendent,HighSchool Network Superintendent Email,BCO Location Code,in_LCGMS,full_address,google_lat,google_lng,google_location_type,lat,lng,geometry
264,The Detective WenJian Liu School of Civics and...,DOE,K331,K676,20K331,6312 13 AVENUE,BROOKLYN,NY,11219,3057390002,194,310,43,40.627038,-74.000890,332000010331,General Academic,Elementary,"0K,01,SE","0K,01,02,03,04,05,SE",Jul 1 2024,BK30,Dyker Heights,Lisa Lin,Principal,,,20,20,K820,COMMUNITY SCHOOL DISTRICT 20,"PRETTO, DAVID",,,,,KFSS,True,"6312 13 AVENUE, BROOKLYN, NY 11219",,,,40.627038,-74.000890,POINT (-74.00089 40.62704)
286,Hellenic Classical Charter School,Charter,K362,KAWR,84K362,646 FIFTH AVENUE,BROOKLYN,NY,11215,3006340034,14300,307,38,40.662985,-73.992254,331500860878,General Academic,K-8,"PK,0K,01,02,03,04,05,06,07,08","0K,01,02,03,04,05,06,07,08",Jul 1 2005,BK37,Park Slope-Gowanus,Christina Tettonis/Natasha Caban,Principal,718-499-0957,718-499-0959,15,84,,OFFICE OF CHARTER SCHOOLS,,,,,,,True,"646 FIFTH AVENUE, BROOKLYN, NY 11215",,,,40.662985,-73.992254,POINT (-73.99225 40.66298)
317,"School of Technology, Arts, and Research",DOE,K407,K322,20K407,650 86 STREET,BROOKLYN,NY,11228,3060560015,152,310,43,40.618628,-74.022073,332000010407,General Academic,Junior High-Intermediate-Middle,"06,07,SE","06,07,08,SE",Jul 1 2024,BK30,Dyker Heights,Stephen Lombardo,Principal,,,20,20,K820,COMMUNITY SCHOOL DISTRICT 20,"PRETTO, DAVID",,,,,KFSS,True,"650 86 STREET, BROOKLYN, NY 11228",,,,40.618628,-74.022073,POINT (-74.02207 40.61863)
322,Joanne Seminara School of Law and Medicine,DOE,K413,K053,20K413,280 SENATOR STREET,BROOKLN,NY,11220,3058530045,70,310,47,40.637031,-74.025888,332000010413,General Academic,Elementary,"0K,01,SE","0K,01,02,03,04,05,SE",Jul 1 2024,BK31,Bay Ridge,Saher Said,Principal,,,20,20,K820,COMMUNITY SCHOOL DISTRICT 20,"PRETTO, DAVID",,,,,KFSS,True,"280 SENATOR STREET, BROOKLN, NY 11220",,,,40.637031,-74.025888,POINT (-74.02589 40.63703)
349,Williamsburg Charter High School,Charter,K473,KBYQ,84K473,198 VARET STREET,BROOKLYN,NY,11206,0,48500,301,34,40.703703,-73.935969,331400860865,General Academic,High school,09101112,09101112,Aug 31 2004,BK78,Bushwick South,Valerie Jacobson,Principal,718-782-9830,347-464-7604,14,84,,OFFICE OF CHARTER SCHOOLS,,,,,,,True,"198 VARET STREET, BROOKLYN, NY 11206",,,,40.703703,-73.935969,POINT (-73.93597 40.7037)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1952,Harriet Tubman Charter School,Charter,X706,X256,84X706,3565 THIRD AVENUE,BRONX,NY,10456,2023730014,14500,203,16,40.832328,-73.905771,320900860823,General Academic,K-8,"0K,01,02,03,04,05,06,07,08","0K,01,02,03,04,05,06,07,08",Jul 1 2000,BX01,Claremont-Bathgate,Cleveland Person,Principal,718-537-9912,718-537-9858,9,84,,OFFICE OF CHARTER SCHOOLS,,,,,,,True,"3565 THIRD AVENUE, BRONX, NY 10456",,,,40.832328,-73.905771,POINT (-73.90577 40.83233)
1957,Bronx Charter School for the Arts,Charter,X730,XAFM,84X730,950 LONGFELLOW AVENUE,BRONX,NY,10474,2027550125,11502,202,17,40.821815,-73.886477,320800860846,General Academic,K-8,"0K,01,02,03,04,05,06,07,08","0K,01,02,03,04,05,06,07,08",Jul 1 2003,BX27,Hunts Point,Ryan Mccabe/Kathy Ortiz,Executive Director,718-893-1042,718-893-7910,8,84,,OFFICE OF CHARTER SCHOOLS,,,,,,,True,"950 LONGFELLOW AVENUE, BRONX, NY 10474",,,,40.821815,-73.886477,POINT (-73.88648 40.82182)
1959,Success Academy Charter School - Harlem 6,Charter,X780,X780,84X780,268 EAST 207 STREET,BRONX,NY,10467,2033420029,42300,207,11,,,321000861088,General Academic,Elementary,"0K,01,02,03,04",,Jul 1 2025,BX43,Norwood,,,,,10,84,,OFFICE OF CHARTER SCHOOLS,,,,,,,True,"268 EAST 207 STREET, BRONX, NY 10467",40.875385,-73.877649,ROOFTOP,40.875385,-73.877649,POINT (-73.87765 40.87538)
1962,ALC - BRONX EDUCATIONAL CENTER,DOE,X966,X988,88X966,,,,,,,,,40.816494,-73.890278,,,,,,,,,,,,,,,,,,,,,,,False,,,,,40.816494,-73.890278,POINT (-73.89028 40.81649)


## Step 3: Clean and save LL84 data, merged with `Location Code` and `Building Code`.

In [39]:
data_columns = [
           'ENERGY STAR Score',
           'Site EUI (kBtu/ft²)',
           'Site Energy Use (kBtu)',
           'Percent Electricity',
           'Direct GHG Emissions (Metric Tons CO2e)',
           'Direct GHG Emissions Intensity (kgCO2e/ft²)',
           'Water Use (All Water Sources) (kgal)',          
           'Weather Normalized Site EUI (kBtu/ft²)',
           'Weather Normalized Site Energy Use (kBtu)',
           'Fuel Oil #1 Use (kBtu)',
           'Fuel Oil #2 Use (kBtu)',
           'Fuel Oil #4 Use (kBtu)',
           'Fuel Oil #5 & 6 Use (kBtu)',
           'Diesel #2 Use (kBtu)',
           'Propane Use (kBtu)',
           'Kerosene Use (kBtu)',
           'District Steam Use (kBtu)',
           'District Hot Water Use (kBtu)',
           'District Chilled Water Use (kBtu)',
           'Natural Gas Use (kBtu)',
           'Electricity Use - Grid Purchase (kBtu)',
           'Electricity Use - Grid Purchase (kWh)',
           'Electricity Use – Generated from Onsite Renewable Systems (kWh)',
           'Electricity Use – Generated from Onsite Renewable Systems and Exported (kWh)',
           'Green Power - Onsite (kWh)',
           'Avoided Emissions - Onsite Green Power (Metric Tons CO2e)',
           'Percent of Electricity that is Green Power',
           'Percent of Total Electricity Generated from Onsite Renewable Systems',
           'Report Submission Date']



ll84_matched_bc[data_columns[:-1]] = ll84_matched_bc[data_columns[:-1]].apply(pd.to_numeric, errors='coerce')

merged_geo = schools[['Location Code', 'Building Code', 'geometry']].merge(ll84_matched_bc[['Building Code'] + data_columns]
                       .drop_duplicates(subset='Building Code', keep='first'), on="Building Code", how='left')

In [41]:
#save final data
merged_geo.to_file('../data/processed_data/energy_water/ll84.geojson', driver='GeoJSON')