In [1]:
# First, parse our California school data from a CSV bundled in the repo
import csv,codecs

i = lambda x: int(x.replace(',','')) # Quick function to make "123,123" into an int 123123
cupc_csv_file = 'data/calpads_school_level_1819.csv'
schools_data = [
    r for r in 
    csv.DictReader(codecs.open(cupc_csv_file)) 
    if i(r['total_enrolled']) > 0 # ignore 0 student schools
] 

len(schools_data)

10512

In [2]:
# The CEPSchool and CEPDistrict classes encapsulate our data model
# We first take the raw school data and push it into CEPDistrict and CEPSchools,
# reducing it to a list of Districts, each with its list of schools
from strategies.base import CEPSchool, CEPDistrict
districts = {}
for row in schools_data:
    school = CEPSchool(row)
    if school.district not in districts:
        district = CEPDistrict(school.district,school.district_code)
        districts[school.district] = district
    districts[school.district].schools.append(school)
districts = list(districts.values())
districts.sort()
len(districts)

1018

In [3]:
# The CEPDistrict class represents a School District
# it has a list of CEPSchools, and provides some aggregate information

# Let's inspect a district
san_diego = [d for d in districts if d.name == 'San Diego Unified'][0]
san_diego.name,len(san_diego.schools),san_diego.code,san_diego.overall_isp,san_diego.total_enrolled

    

('San Diego Unified', 223, '68338', 0.3661474742754003, 124103)

In [4]:
# We can look at all the schools
from IPython.display import display, HTML
import tabulate

school_table = [('School','Total Enrolled','ISP%')] + \
            [ (s.name,s.total_enrolled, ('%0.2f%%' % (s.isp*100.0)) ) 
              for s in san_diego.schools ]

display(HTML(tabulate.tabulate(school_table,tablefmt='html')))


0,1,2
School,Total Enrolled,ISP%
"Nonpublic, Nonsectarian Schools",310,11.29%
High Tech Middle,321,24.30%
KIPP Adelante Preparatory Academy,327,48.01%
High Tech High International,391,20.20%
Learning Choice Academy,566,29.68%
San Diego International Studies,1239,30.99%
San Diego Business/Leadership,531,57.63%
Kearny Digital Media & Design,384,40.10%
Kearny SCT,356,47.19%


In [5]:
# Or do some charts
import matplotlib.pyplot as plt

plt.hist([s.total_enrolled for s in san_diego.schools], 20, label='Label')
plt.show()

<Figure size 640x480 with 1 Axes>

In [6]:
# Strategies are the algorithms we use to group schools
# Each one extends BaseStrategy and overrides the "create_groups" method
# Here we need to define which strategies we want to run
# We pass them into parse_districts as the class name, any parameters for it, and a name
# this way we can run the sam strategy in multiple configurations

from strategies.naive import OneGroupCEPStrategy,OneToOneCEPStrategy

# If we create a strategy, and run its create_groups method with the district as the
# incoming parameter, then it fills its own "groups" member with the resulting CEPGroup objects
strategy = OneGroupCEPStrategy()
strategy = OneToOneCEPStrategy()
strategy.create_groups(san_diego)

# In the case of OneGroup, we have only 1 group in our list
g = strategy.groups[0]
g,g.covered_students,g.est_reimbursement()


(<strategies.base.CEPDistrict object at 0x1175623c8> / Nonpublic, Nonsectarian Schools ISP=11% ENROLLED=310 FREE_RATE=0.00%,
 0,
 {'low': 85.0485, 'high': 172.73975})

In [7]:
# We can see the recommended grouping of schools for the strategy result
from strategies.binning import BinCEPStrategy

strategy = BinCEPStrategy()
strategy.create_groups(san_diego)

school_groupings = [('Group','School','Group ISP','School ISP','School Total Enrolled')]
for g in strategy.groups:
    for s in g.schools:
        school_groupings.append( (g.name,s.name,g.isp,s.isp,s.total_enrolled))
display(HTML(tabulate.tabulate(school_groupings,tablefmt='html')))


0,1,2,3,4
Group,School,Group ISP,School ISP,School Total Enrolled
High-ISP,Crawford High,0.6244,0.6559,1119
High-ISP,Porter Elementary,0.6244,0.6814,835
High-ISP,Cherokee Point Elementary,0.6244,0.6429,378
High-ISP,Ibarra Elementary,0.6244,0.7743,452
High-ISP,Iftin Charter,0.6244,0.8481,349
High-ISP,Lincoln High,0.6244,0.66,1541
High-ISP,Rodriguez Elementary,0.6244,0.72,450
High-ISP,Joyner Elementary,0.6244,0.6796,568
High-ISP,Millennial Tech Middle,0.6244,0.6897,448
