In [2]:
import numpy as np
import pandas as pd
import geopandas as gpd

In [3]:
# read data from Data+_2025/data/enrollment_projections/sgr_table_region_2324_20240710.xlsx in Google Drive
sgr_data = pd.read_excel('/Users/leahwallihan/Durham_school_planning/sgr_htype_region.xlsx')

In [4]:
# remove null values and shorten to only use relevant columns
sgr_data = sgr_data.dropna()
sgr_data.rename(columns={'sgr_dps_2324_all.1': 'sgr_dps_avg_k12'}, inplace=True) # because there might be a typo in the file?
sgr_data = sgr_data[['housing_type','region','sgr_dps_avg_k12']]
sgr_data['sgr_dps_avg_k12'] = sgr_data['sgr_dps_avg_k12'].round(4)
sgr_data.set_index(['region', 'housing_type'], inplace=True)

In [5]:
# read in shapefile to get geometries for HS regions from Data+_2025/QGIS/DPS shapefiles from layers in Google Drive
regions = gpd.read_file(r'/Users/leahwallihan/Durham_school_planning/geospatial files/HS_regions')[['region', 'geometry']]
regions = regions.to_crs('EPSG:4326')
#read in geojson with residential developments
res_dev = gpd.read_file(r'/Users/leahwallihan/Durham_school_planning/DPS-Planning/GIS_files/resdev_cases.geojson')
res_dev = res_dev.to_crs('EPSG:4326')

In [6]:
# let's define a function that will count up number of students generated for each data point
def count_students(row):
        
    htype_map = {
        'sf_detached': 'sf_detach',
        'sf_attached': 'sf_attach',
        'duplex/triplex': 'du_tri',
        'multifamily': 'mf_apt',
        'condo': 'condo'
    }

    region = row['region']

    total = 0
    for col_name, sgr_col in htype_map.items(): # col_name is the housing type columns in the residential developments data
        count = row.get(col_name, 0)

        try:
            multiplier = sgr_data.loc[(region, sgr_col), 'sgr_dps_avg_k12']
        except KeyError:
            multiplier = 0

        total += count * multiplier

    return total

In [7]:
# generate student counts for each data point
res_dev['student_gen'] = res_dev.apply(count_students, axis=1)

In [8]:
# read census tract shape file
census_shape = gpd.read_file('/Users/leahwallihan/Durham_school_planning/geospatial files/nc_censustract_shape')
census_shape = census_shape[census_shape['COUNTYFP'] == '063'] # only Durham tracts
census_shape = census_shape.to_crs('EPSG:4326')

In [9]:
# assign a census tract to each data point
res_dev_tract = res_dev.copy()

for i,geometry in enumerate(census_shape['geometry']):
    
    in_geometry = geometry.contains(res_dev['geometry'])
    census_tract = census_shape.iloc[i]['TRACTCE']

    res_dev_tract.loc[in_geometry, 'tract'] = census_tract

In [10]:
# add student_gen as a column to census tract file
tract_gen = res_dev_tract.groupby('tract')['student_gen'].sum()
census_shape.set_index('TRACTCE', inplace=True)
census_shape['student_gen'] = tract_gen
census_shape['student_gen'] = census_shape['student_gen'].fillna(0)

In [11]:
# add student_gen column to regions shape file
region_gen = res_dev.groupby('region')['student_gen'].sum()
regions.set_index('region', inplace=True)
regions['student_gen'] = region_gen

In [12]:
census_shape.to_file('census_gen.geojson', driver='GeoJSON')

In [13]:
# regions.to_file('region_gen.geojson', driver='GeoJSON')

In [14]:
# res_dev.to_file('resdev_student_gen.geojson', driver='GeoJSON')