In [None]:
import pandas as pd
import geopandas as gpd

## Solar Readiness Assessment

The solar readiness of each building represents the ability to host solar PV systems in their current condition. Buildings must be: 

1. 10,000 square feet or more. 
2. Roof that's 10 years old or newer. 
3. Be in good structural condition. 

Read more at: https://www.nyc.gov/assets/dcas/downloads/pdf/energy/reportsandpublication/local24_2022.pdf

In [None]:
schools = gpd.read_file("../data/processed_data/school_points_with_lcgms.geojson")
solar_readiness = pd.read_csv("../data/raw_data/DCAS/City_of_New_York_Municipal_Solar-Readiness_Assessment_(Local_Law_24_of_2016)_data_20251117.csv")

# Once we filter down to DOE sites and deduplicate to most recent report, only 1,094 schools remain
solar_readiness_doe = solar_readiness[solar_readiness['Agency']=='DOE']
solar_readiness_doe = solar_readiness_doe.sort_values(by='Year of Report', ascending=False).drop_duplicates(subset=['Site'])
# Clean up "site" field to improve matches to school building codes
pd.set_option('display.max_columns', None)
print('unique sites in solar readiness data: ', solar_readiness_doe['Site'].nunique())
print('Sites that join to schools before cleaning:', len(solar_readiness_doe[solar_readiness_doe['Site'].isin(schools['Building Code'])]))

# Did some manual review to find the right Building Code in the following Sites that had more than one valid-looking building code:
site_building_code_corrections = {
    'K134- K849, K839 CAMPUS: K849 (MAIN BUILDING)': 'K849',
    'K077 (K902 ANNEX)': 'K902',
    'M035 (M058)': 'M058',
    'K293 (K497)': 'K293',
    'K724 (K500)': 'K500',
    'Q825/Q125': 'Q125'
}
# Replace the sites with multiple codes with the correct building code, then extract the building code from the rest
solar_readiness_doe['Site'] = solar_readiness_doe['Site'].replace(site_building_code_corrections).str.extract(r'([LKMQRX]\d{3})')
print('Sites that join to schools after cleaning:', len(solar_readiness_doe[solar_readiness_doe['Site'].isin(schools['Building Code'])]))
# Drop duplicates again after cleaning up Site field
solar_readiness_doe = solar_readiness_doe.sort_values(by='Year of Report', ascending=False).drop_duplicates(subset=['Site'])
# Clean up various cols a bit
solar_readiness_doe['Status'] = solar_readiness_doe['Status'].str.replace('-', ' ')
solar_readiness_doe['Installed or Estimated Capacity'] = pd.to_numeric(solar_readiness_doe['Installed or Estimated Capacity'])
solar_readiness_doe['Estimated Annual Production'] = pd.to_numeric(solar_readiness_doe['Estimated Annual Production'])

In [None]:
# Remove unneeded columns
solar_readiness_essential_cols = [
    'Site',
    'Year of Report',
    'Installed or Estimated Capacity',
    'Status', 
    'Solar-Readiness Assessment',
    'Percentage of Max Peak Demand',
    'Estimated Annual Production',
    'Percentage of Annual Electricity Consumption',
    'Estimated Annual Emissions Reduction',
    'Estimated Annual Energy Savings',
    'Upfront Project Cost',
    'Total Gross Square Footage',
    'Roof Condition',
    'Roof Age',
    'Other Sustaibility Projects'
]
solar_readiness_doe = solar_readiness_doe[solar_readiness_essential_cols].reset_index(drop=True)

# Export Data

In [None]:
solar_readiness_doe.to_csv("../data/processed_data/solar_readiness_assessment_doe_buildings.csv", index=False)