In [None]:
import csv,math

# In this notebook we compare 2021 submitted groupoings as provided by NKH for California
# with optimized groupings done by Meals Count
# Comparison is Like-for-Like, as in only schools that were submitted to CEP will be grouped

# First, we load ADP data
sites = {}
for row in csv.DictReader(open("working/CA ADP Data.csv")):
    sites[row["Site_ID"]] = {
        "district_code":row["Sponsor_ID"],
        "school_code": row["Site_ID"],
        "district_name":row["Sponsor_Name"],
        "school_name":row["Site_Name"],
        "daily_breakfast_served":float(row["Breakfast_ADP_F"]) + float(row["Breakfast_ADP_R"]) + float(row["Breakfast_ADP_P"]),
        "daily_lunch_served":float(row["Lunch_ADP_F"]) + float(row["Lunch_ADP_R"]) + float(row["Lunch_ADP_P"]),
    }

len(sites)

In [None]:
# Then Load Grouping Data and enrollment/ISP data
for row in csv.reader(open("working/CA_CEP_2021.csv")):
    if row[2] in sites and row[5]: # skip if no ISP
        sites[row[2]]["total_enrolled"] = int(row[13].replace(",",""))
        sites[row[2]]["total_eligible"] = int(float(row[5].replace("%",""))/100 * sites[row[2]]["total_enrolled"])
        sites[row[2]]["grouping"] = row[10] or row[12] or None

schools = [s for s in sites.values() if s.get("grouping",False)]
"%i of %i Grouped" % (len(schools),len(sites))

In [None]:
import matplotlib.pyplot as plt

plt.hist([(s["daily_lunch_served"]/s["total_enrolled"] * 100) for s in schools], 20, label='Lunch ADP')
plt.show()

In [None]:
upsidedown = [s for s in schools if s["total_enrolled"] < s["daily_lunch_served"]]
"%i schools have more lunches than enrolled" % len(upsidedown)

In [None]:
# Build our CEP District for San Diego as a test case
from strategies.base import CEPSchool,CEPDistrict

districts = {}
for s in schools:
    school = CEPSchool(s)
    if s["district_code"] not in districts:
        districts[s["district_code"]] = CEPDistrict(s["district_name"],s["district_code"])
    districts[s["district_code"]].add_school(school)
districts = list(districts.values())
districts.sort()

district = [d for d in districts if "San Diego" in d.name][0]
district.name,len(district.schools),district.code,district.overall_isp,district.total_enrolled

In [None]:
# View San Diego school data
from IPython.display import display, HTML
import tabulate
school_table = [('School','Total Enrolled','ISP%','Breakfast','Lunch','Orig Grouping')] + \
            [ (s.name,s.total_enrolled,('%0.2f%%' % (s.isp*100.0)),s.bfast_served,s.lunch_served, sites.get(s.code).get("grouping") ) 
              for s in district.schools ]

display(HTML(tabulate.tabulate(school_table,tablefmt='html')))

In [None]:
# Build two strategies: Custom Group to represent what was submitted, and NYCMODA for optimal
from strategies.naive import CustomGroupsCEPStrategy
from strategies.nyc_moda_simulated_annealing import NYCMODASimulatedAnnealingCEPStrategy

# What was submitted to USDA in 2021
orig_strategy = CustomGroupsCEPStrategy()
orig_strategy.set_groups([('%s-%s'%(s["district_code"],s["grouping"]),s["school_code"]) for s in schools])

# Our standard MealsCount Strategy for large districts
mc_strategy = NYCMODASimulatedAnnealingCEPStrategy({"fresh_starts":50,"iterations":1000})

In [None]:
# Calculate reimbursement for groupings submitted to USDA
orig_strategy.create_groups(district)
orig_strategy.reimbursement

In [None]:
# Then create groupings and caluclate reimbursement for a more optimal grouping
# WARNING this can take a while.
mc_strategy.create_groups(district)
mc_strategy.reimbursement

In [None]:
# What is our difference over the school year?
int((mc_strategy.reimbursement - orig_strategy.reimbursement) * 180)

In [None]:
len(district.schools)