In [None]:
# First, parse our California school data from a CSV bundled in the repo
import csv,codecs

i = lambda x: int(x.replace(',','')) # Quick function to make "123,123" into an int 123123
cupc_csv_file = 'data/ca/latest.csv'
schools_data = [
    r for r in 
    csv.DictReader(codecs.open(cupc_csv_file)) 
    if i(r['total_enrolled']) > 0 # ignore 0 student schools
] 

len(schools_data)

In [None]:
# The CEPSchool and CEPDistrict classes encapsulate our data model
# We first take the raw school data and push it into CEPDistrict and CEPSchools,
# reducing it to a list of Districts, each with its list of schools
from strategies.base import CEPSchool, CEPDistrict
districts = {}
for row in schools_data:
    school = CEPSchool(row)
    if row['District Code'] not in districts:
        district = CEPDistrict(row['District Name'],row['District Code'])
        districts[row['District Code']] = district
    districts[row['District Code']].add_school(school)
districts = list(districts.values())
districts.sort()
len(districts)

In [None]:
# The CEPDistrict class represents a School District
# it has a list of CEPSchools, and provides some aggregate information

# Let's inspect a district
san_diego = [d for d in districts if d.name == 'San Diego Unified'][0]
san_diego.name,len(san_diego.schools),san_diego.code,san_diego.overall_isp,san_diego.total_enrolled

    

In [None]:
# We can look at all the schools
from IPython.display import display, HTML
import tabulate

school_table = [('School','Total Enrolled','School Type','ISP%')] + \
            [ (s.name,s.total_enrolled,s.school_type, ('%0.2f%%' % (s.isp*100.0)), s.active ) 
              for s in san_diego.schools ]

display(HTML(tabulate.tabulate(school_table,tablefmt='html')))


In [None]:
# Or do some charts
import matplotlib.pyplot as plt

plt.hist([s.total_enrolled for s in san_diego.schools], 20, label='Label')
plt.show()

In [None]:
# Strategies are the algorithms we use to group schools
# Each one extends BaseStrategy and overrides the "create_groups" method
# Here we need to define which strategies we want to run
# We pass them into parse_districts as the class name, any parameters for it, and a name
# this way we can run the sam strategy in multiple configurations

from strategies.naive import OneGroupCEPStrategy,OneToOneCEPStrategy

# If we create a strategy, and run its create_groups method with the district as the
# incoming parameter, then it fills its own "groups" member with the resulting CEPGroup objects
strategy = OneGroupCEPStrategy()
strategy = OneToOneCEPStrategy()
strategy.create_groups(san_diego)

# In the case of OneGroup, we have only 1 group in our list
g = strategy.groups[0]
g,g.covered_students,g.est_reimbursement()


In [None]:
# We can see the recommended grouping of schools for the strategy result
from strategies.pairs import PairsCEPStrategy
strategy = PairsCEPStrategy()

strategy.create_groups(san_diego)

school_groupings = [('Group','School','Group ISP','School ISP','School Type','School Total Enrolled')]
for g in strategy.groups:
    for s in g.schools:
        school_groupings.append( (g.name,s.name,g.isp,s.isp,s.school_type,s.total_enrolled))
display(HTML(tabulate.tabulate(school_groupings,tablefmt='html')))


In [None]:
from strategies.nyc_moda_simulated_annealing import NYCMODASimulatedAnnealingCEPStrategy
strategy = NYCMODASimulatedAnnealingCEPStrategy()
groups = strategy.create_groups(san_diego)
strategy.reimbursement