In [57]:
import numpy as np
import pandas as pd
import geopandas as gpd

In [59]:
# read data from Data+_2025/data/enrollment_projections/sgr_table_region_2324_20240710.xlsx in Google Drive
sgr_data = pd.read_excel('/Users/leahwallihan/Durham_school_planning/sgr_htype_region.xlsx')

In [61]:
# remove null values and shorten to only use relevant columns
sgr_data = sgr_data.dropna()
sgr_data.rename(columns={'sgr_dps_2324_all.1': 'sgr_dps_avg_k12'}, inplace=True) # because there might be a typo in the file?
sgr_data = sgr_data[['housing_type','region','sgr_dps_avg_k12']]
sgr_data['sgr_dps_avg_k12'] = sgr_data['sgr_dps_avg_k12'].round(4)
sgr_data.set_index(['region', 'housing_type'], inplace=True)

In [63]:
# read in shapefile to get geometries for HS regions from Data+_2025/QGIS/DPS shapefiles from layers in Google Drive
regions = gpd.read_file(r'/Users/leahwallihan/Durham_school_planning/geospatial files/HS_regions')[['region', 'geometry']]
regions = regions.to_crs('EPSG:4326')
#read in geojson with residential developments
res_dev = gpd.read_file(r'/Users/leahwallihan/Durham_school_planning/DPS-Planning/GIS_files/resdev_cases.geojson')
res_dev = res_dev.to_crs('EPSG:4326')

In [69]:
# let's define a function that will count up number of students generated for each data point
def count_students(row): # htype_list = [sf_detached, sf_attached]
    htype_map = {
        'sf_detached': 'sf_detach',
        'sf_attached': 'sf_attach',
        'duplex/triplex': 'du_tri',
        'multifamily': 'mf_apt',
        'condo': 'condo'
    }

    total = 0
    for col_name, sgr_col in htype_map.items(): # col_name is the housing type columns in the residential developments data
        count = row[col_name]
        region = row['region']

        try:
            multiplier = sgr_data.loc[(sgr_data['region'] == region) & (sgr_data['housing_type'] == sgr_col),'sgr_dps_avg_k12'].values[0]
        except IndexError:
            multiplier = 0  # or np.nan or raise an error depending on your needs

        total += count * multiplier

    return total

In [79]:
def count_students(row):
        
    htype_map = {
        'sf_detached': 'sf_detach',
        'sf_attached': 'sf_attach',
        'duplex/triplex': 'du_tri',
        'multifamily': 'mf_apt',
        'condo': 'condo'
    }

    region = row['region']

    total = 0
    for col_name, sgr_col in htype_map.items():
        count = row.get(col_name, 0)

        try:
            multiplier = sgr_data.loc[(region, sgr_col), 'sgr_dps_avg_k12']
        except KeyError:
            multiplier = 0

        total += count * multiplier

    return total

In [81]:
res_dev['student_gen'] = res_dev.apply(count_students, axis=1)

In [13]:
regions.head()

Unnamed: 0,region,geometry
0,Central,"POLYGON ((2033813.235 831343.793, 2034345.112 ..."
1,East,"POLYGON ((2076824.784 827577.02, 2076829.527 8..."
2,North,"POLYGON ((2057650.162 851182.236, 2057445.451 ..."
3,Southeast,"POLYGON ((2039564.628 806881.697, 2039758.538 ..."
4,Southwest,"POLYGON ((2010158.969 831111.207, 2010977.529 ..."


In [77]:
print(sgr_data.columns)

Index(['sgr_dps_avg_k12'], dtype='object')


In [85]:
res_dev.head(30)

Unnamed: 0,A_NUMBER,A_TYPE,A_DATE,A_STATUS,A_STATUS_D,A_PROJECT_,A_DESCRIPT,A_USER_ID,A_CASE_PLA,StatCode,...,Editor,match_results,sf_detached,sf_attached,duplex/triplex,multifamily,condo,region,geometry,student_gen
0,D2000291,PL_MINSP,2020-12-02,APP,2021-09-03,Umstead Grove Conservation Subdivision,"50 Single - family lots, 1 stormwater pond, ad...",JESSICADO,COURTNEYMC,APP,...,gisproc_sys,"[(50, None, 'single family', 'lots')]",50,0,0,0,0,North,POINT (-78.94561 36.07501),13.435
1,D1800378,PL_MINSP,2018-11-28,APP,2020-04-14,Elan Innovation District,Mixe of uses including retail and residential....,JOHNRA,TREYFI,APP,...,gisproc_sys,[ ],0,0,0,0,0,Central,POINT (-78.90421 35.99991),0.0
2,D2100156,PL_MINSP,2021-06-04,APP,2022-02-02,ALTA Rutherford,Multifamily Apartments,KIMRO,TREYFI,APP,...,gisproc_sys,[ ],0,0,0,0,0,Central,POINT (-78.92991 36.01005),0.0
3,D1900171,PL_MINSP,2019-05-30,APP,2020-04-03,Ellis Road Phase 3,"37 Townhome units, 102 Attached S-F units (cal...",ROBINSH,COLERE,APP,...,gisproc_sys,"[(37, None, 'townhouse', 'units'), (102, 'atta...",146,139,0,0,0,East,POINT (-78.86167 35.9519),41.5747
4,A1900003,PL_CPAA,2019-02-14,COM,2020-08-17,Cole Property,"From Rural Density Residential (RDR, 0.5DU/acr...",DCULTRA,EMILYST,COM,...,gisproc_sys,[ ],0,0,0,0,0,East,POINT (-78.77719 35.98944),0.0
5,D2200181,PL_MINSP,2022-06-07,APP,2023-05-16,GTH Owner LLC,33 new town home lots with garages and mail ki...,COLERE,COLERE,APP,...,gisproc_sys,"[(33, None, 'townhouse', 'lots')]",0,33,0,0,0,Central,POINT (-78.89255 36.00264),1.089
6,D1900363,PL_MINSP,2019-10-09,APP,2021-07-21,Olive Branch West,"108 Single - Family lots, 2 stormwater ponds, ...",LROBERTSON,COLERE,APP,...,gisproc_sys,"[(108, None, 'single family', 'lots')]",108,0,0,0,0,East,POINT (-78.78659 35.96371),23.5872
7,D2100205,PL_MINSP,2021-07-15,APP,2021-11-29,The Village at Ellis Crossing,"397,000 Total SF: Apartments (264 units), Acce...",KIMRO,ROBINSH,APP,...,gisproc_sys,[ ],0,0,0,0,0,East,POINT (-78.85819 35.94202),0.0
8,D2100111,PL_MINSP,2021-04-12,APP,2024-03-15,Vintage Hill Subdivision,This is a residential subdivision of a 20.96 a...,JESSICADO,COLERE,APP,...,gisproc_sys,"[(49, None, 'single family', None)]",49,0,0,0,0,North,POINT (-78.86672 36.10942),13.1663
9,D1900335,PL_MINSP,2019-09-18,APP,2020-02-18,308 w Corporation St,2 New townhouses on vacant infill lot,WBLALOCK,TREYFI,APP,...,gisproc_sys,"[(2, None, 'townhouse', None)]",0,2,0,0,0,Central,POINT (-78.89871 36.00221),0.066


In [89]:
res_dev.to_file('resdev_student_gen.geojson', driver='GeoJSON')