In [27]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import os
import cartopy
from shapely.geometry import point
import math

In [28]:
os.getcwd()

'/Users/leahwallihan/Durham_school_planning/DPS-Planning'

In [29]:
#loading in the current enrollment for only the 2024-25 school year
current_enrollment = pd.read_csv(r'C:\\Users\\olubl\\OneDrive\\College\\Data+\\DPS-Planning\\marketshare_output_pu_2324_848_gr_2021_2425_long.csv')
#current_enrollment_2024 = current_enrollment[current_enrollment['year_str']=='2024-2025']

current_enrollment = current_enrollment.rename(columns = {'pu_2324_848':'pu_2324_84'})
current_enrollment = current_enrollment[['pu_2324_84','grade','fall_year','count','basez']].fillna(0)
current_enrollment

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\\\Users\\\\olubl\\\\OneDrive\\\\College\\\\Data+\\\\DPS-Planning\\\\marketshare_output_pu_2324_848_gr_2021_2425_long.csv'

In [None]:
current_hs = current_enrollment[current_enrollment['grade'].isin([9,10,11,12])&current_enrollment['fall_year'].isin([2021,2022,2023])]
current_ms = current_enrollment[current_enrollment['grade'].isin([6,7,8])&current_enrollment['fall_year'].isin([2021,2022,2023])]
#average out counts over years 2021-2023
averaged_hs = current_hs.groupby(['pu_2324_84','grade'],as_index=False).mean()
averaged_ms = current_ms.groupby(['pu_2324_84','grade'],as_index=False).mean()

averaged_hs = averaged_hs.groupby(['pu_2324_84'],as_index=False).sum().drop(columns=['grade','fall_year'])
averaged_ms = averaged_ms.groupby(['pu_2324_84'],as_index=False).sum().drop(columns=['grade','fall_year'])

averaged_hs[['count', 'basez']] = averaged_hs[['count', 'basez']].map(lambda x: int(x))
averaged_ms[['count', 'basez']] = averaged_ms[['count', 'basez']].map(lambda x: int(x))

all_pus = pd.DataFrame({'pu_2324_84': range(1,852)})
hs_full = all_pus.merge(averaged_hs, on='pu_2324_84',how='left').fillna(0)
ms_full = all_pus.merge(averaged_ms, on='pu_2324_84',how='left').fillna(0)


In [None]:
#now we want to only show the 5th, 6th, 7th, and 8th grade - the grades that will be in high school in 2028-29
mask = current_enrollment['grade'].isin([9,10,11,12])
hs_2028 = current_enrollment[mask]
hs_2028 = hs_2028[['pu_2324_84','count','basez','lottery']]

In [None]:
#merging identical rows to get student counts by block and grade
columns_to_group = ['pu_2324_84']
hs_2028 = hs_2028.groupby(columns_to_group,as_index=False).sum() #as_index=False preserves the columns we are grouping by

#the planning units with zero enrollment are missing, so we must fill them with zeros
all_pus = pd.DataFrame({'pu_2324_84': range(1,852)})
hs_2028_full = all_pus.merge(hs_2028, on='pu_2324_84',how='left').fillna(0)
hs_2028_full

In [None]:
#read in the planning units
dps_pu = gpd.read_file(r'C:\\Users\\olubl\\OneDrive\\College\\Data+\\DPS-Planning\\GIS_Files\\pu_2324_SPLIT.geojson').rename(columns={'pu_2324_848':'pu_2324_84'})
dps_pu = dps_pu.to_crs(epsg = 3857).sort_values(by='pu_2324_84')
dps_pu

In [None]:
#read in the school locations
dps_base = gpd.read_file(r'C:\\Users\\olubl\\OneDrive\\College\\Data+\\DPS-Planning\\dps_base_2324.geojson')
dps_base = dps_base.to_crs(epsg = 3857)
base_hs = {'Jordan High School','Riverside High School','Northern High School','Hillside High School','Southern High School'}
dps_base_hs = dps_base[dps_base['name'].isin(base_hs)]
dps_base_hs

In [None]:
fig,ax = plt.subplots(figsize=(6,8))

dps_pu.plot(
    column = 'Region',
    legend = True,
    cmap = 'viridis',
    ax = ax
)
dps_base_hs.plot(ax=plt.gca(),zorder = 1, color = 'white')


ax.set_xticks([])
ax.set_yticks([])


plt.title('DPS Planning Units by Base HS')
plt.show()

In [None]:
hs_2028_geo = dps_pu.merge(hs_2028_full, on = 'pu_2324_84')[['pu_2324_84','X','Y','Region','Shape_Area','geometry','count','basez','lottery']]
#merge the student counts with the geometries
hs_2028_geo['centroid'] = hs_2028_geo['geometry'].centroid
hs_2028_geo
#now we have the student counts and geometries for each planning unit

In [None]:
fig,ax = plt.subplots(figsize = (5,7))

hs_2028_geo.plot(
    column = 'basez',
    cmap = 'coolwarm',
    ax = ax,
    legend = True
)

In [None]:
#goal counts:
#Southern: 1700, Hillside: 1700, Northern: 1700, Riverside: 1700, Jordan: 2000

In [None]:
dps_base_hs = dps_base_hs[['name','geometry']]
dps_base_hs.loc[5,'name']='New High School'
dps_base_hs = dps_base_hs.reset_index()

In [None]:
def score_candidate(candidate):
    school_names = ['Southern High School','Hillside High School','Northern High School','Riverside High School','Jordan High School','New High School']
    capacities = [1600,1810,1540,1540,1535,1600]

    counts = pd.DataFrame({'school':school_names,
                           'capacity':capacities,
                           'count':[0,0,0,0,0,0],
                           'coefficient':[1,1,1,1,1,1]
                            })                               #initialize counts dataframe

    
    candidate_geom = candidate['geometry']
    dps_base_hs.loc[5,'geometry']=candidate_geom.centroid
    

    for i in range(20):                                      #iterative boundary assignment (running 10x)
        assignments = []
        for pu in hs_2028_geo.itertuples(index=False):                   
            centroid = pu.geometry.centroid
            pu_scores = []
            for j in range(6):                               #5 existing schools and 1 new
                dist = centroid.distance(dps_base_hs.loc[j,'geometry'])
                score = dist * counts.loc[j,'coefficient']
                pu_scores.append(score)
            assign = school_names[pu_scores.index(min(pu_scores))]
            assignments.append(assign)                        #collect hs assignments based on min distance score
        hs_2028_geo['assign'] = assignments

        for i,school in enumerate(school_names):
            assigned_students = hs_2028_geo.loc[hs_2028_geo['assign'] == school, 'count'].sum()
            counts.loc[i, 'count'] = assigned_students                    #sum up counts for all high schools and update coefficients
        counts['coefficient'] *= (counts['count']/counts['capacity'])**0.3

    objective=hs_2028_geo.merge(dps_base_hs,left_on='assign',right_on='name',how='left')
    objective['distance']=objective['geometry_x'].distance(objective['geometry_y'])
    objective_score = (objective['count']*objective['distance']).sum()

    return objective_score,counts

    

In [None]:
candidate = dps_pu.loc[515]

In [None]:
score_candidate(candidate)

In [None]:
fig,ax = plt.subplots(figsize = (7,9))

hs_2028_geo.plot(
    column='assign',
    ax = ax,
    cmap = 'viridis',
    legend = True
)
dps_base_hs.plot(ax=plt.gca(), color = 'white')

In [None]:
hs_full_geo = dps_pu.merge(hs_full,on='pu_2324_84',how='left')[['pu_2324_84','X','Y','Region','Shape_Area','geometry','count','basez']]
ms_full_geo = dps_pu.merge(ms_full,on='pu_2324_84',how='left')[['pu_2324_84','X','Y','Region','Shape_Area','geometry','count','basez']]

In [None]:
hs_full_geo.to_file('hs_full_geo.geojson')
ms_full_geo.to_file('ms_full_geo.geojson')