# Calculating percent proficiency and its percent change in New York State education assessment data

After downloading the [2015, 2016 and 2017 data from the New York State Education Department](https://data.nysed.gov/downloads.php) and renaming the files to 3-8\_ELA\_MATH\_{{ year }}.csv, this program calculates the percent proficient, scoring at Level 3 or 4, of each school. The percent proficient is calculated by adding each raw proficient count together for available grades and then dividing the total test takers. The 2013 and 2014 files were first converted from .mdb file format.

In [1]:
import agate
import csv

Due to missing school values and changes in data columns, I needed two different typetesters to force the columns into their correct formatting.

In [2]:
tester_15 = agate.TypeTester(limit=100,force={
    'NRC_DESC': agate.Text(),
    'NRC_CODE': agate.Text(),
        'BEDSCODE': agate.Text(),
        'L1_COUNT': agate.Text(),
        'L1_PCT': agate.Text(),
        'L2_COUNT': agate.Text(),
        'L2_PCT': agate.Text(),
        'L3_COUNT': agate.Text(),
        'L3_PCT': agate.Text(),
        'L4_COUNT': agate.Text(),
        'L4_PCT': agate.Text(),
        'L2-L4_PCT': agate.Text(),
        'L3-L4_PCT': agate.Text(),
        'MEAN_SCALE_SCORE': agate.Text(),
        'COUNTY_DESC': agate.Text(),
        'TOTAL_TESTED': agate.Text(),
        'COUNTY_CODE': agate.Text(),
        'Sum_Of_SCALE_SCORE': agate.Text()
})
tester_16 = agate.TypeTester(limit=100,force={
    'NRC_DESC': agate.Text(),
    'NRC_CODE': agate.Text(),
        'BEDSCODE': agate.Text(),
        'L1_COUNT': agate.Text(),
        'L1_PCT': agate.Text(),
        'L2_COUNT': agate.Text(),
        'L2_PCT': agate.Text(),
        'L3_COUNT': agate.Text(),
        'L3_PCT': agate.Text(),
        'L4_COUNT': agate.Text(),
        'L4_PCT': agate.Text(),
        'L2-L4_PCT': agate.Text(),
        'L3-L4_PCT': agate.Text(),
        'MEAN_SCALE_SCORE': agate.Text(),
        'COUNTY_DESC': agate.Text(),
        'TOTAL_TESTED': agate.Text(),
        'COUNTY_CODE': agate.Text(),
        'SY_END_DATE': agate.Text(),
        'SUBGROUP_CODE':agate.Text()
})
tester_12 = agate.TypeTester(limit=100,force={
        'BEDSCODE': agate.Text(),
        'L1_COUNT': agate.Text(),
        'L2_COUNT': agate.Text(),
        'L3_COUNT': agate.Text(),
        'L4_COUNT': agate.Text(),
        'MEAN_SCALE_SCORE': agate.Text(),
        'COUNTY_DESC': agate.Text(),
        'TOTAL_TESTED': agate.Text(),
        'COUNTY_CODE': agate.Text()
})

This program takes in the csv of a specific year's assesment data and its tester and returns a dictionary of BEDS (state id codes) and test data.

In [3]:
def get_percent_dictionary(file_name, tester):
    schools = agate.Table.from_csv(file_name, column_types=tester)
    #Grab just Erie and Niagara schools, filtering out available subgroups and total county stats
    erie_niagara = schools.where(lambda row: row['COUNTY_DESC'] in ['ERIE', 'NIAGARA']).where(lambda row: row['SUBGROUP_NAME'] in ['All Students'])
    print(len(erie_niagara.rows))
    #Data doesn't include district information, but the first six digits of a school's BEDS codes includes that info.
    beds_code = {}
    for row in erie_niagara.rows:
        if 'SCHOOL DISTRICT' in row['NAME']:
            beds_district = row['BEDSCODE'][:6]
            #Creates a lookup table of ids for the district name
            beds_code[str(beds_district)] = row['NAME']
            
    def calculate_prof_counts(row):
        #Some schools don't have test takers for certain tests, so fill in with zeros.
        try:
            l3_counts = int(row['L3_COUNT'])
        except ValueError:
            l3_counts = 0
        try:
            l4_counts = int(row['L4_COUNT'])
        except ValueError:
            l4_counts = 0
        proficient = l3_counts + l4_counts
        return proficient
    
    erie_calcs = erie_niagara.compute([
        ('proficient_count', agate.Formula(agate.Number(), lambda row: calculate_prof_counts(row)))
    ])
    
    # Form the data structure for each school with beds code as a key
    schools_info = {}
    for row in erie_calcs.rows:
        if row['NAME'] == 'NIAGARA CHARTER SCHOOL':
            district = 'NIAGARA-WHEATFIELD CENTRAL SCHOOL DISTRICT'
        elif row['NAME'] == 'ERIE COUNTY' or row['NAME'] == 'NIAGARA COUNTY':
            district = row['NAME']
        else:
            beds_district = row['BEDSCODE'][:6]
            district = beds_code[beds_district]
            
        """name = School name
        math/ela dictionary has totals = total test takers one per applicable class; proficient = proficient_count values; classes = grades taking the test
        district = district name (looked up using the beds_code dict)
        county = county"""
        schools_info[row['BEDSCODE']] = {
            'name': row['NAME'], 
            'math': {'totals': [], 
                     'proficient': [], 
                     'classes': []
                    }, 
            'ela': {'totals': [], 
                    'proficient': [], 
                    'classes': []
                   }, 
            'district': district, 
            'county': row['COUNTY_DESC'] 
        }
        
    # After creating the correct dict format, now I run through the schools again to fill the score values.
    for row in erie_calcs.rows:
        if row['ITEM_SUBJECT_AREA'] == 'ELA':
            data_dict = schools_info[row['BEDSCODE']]['ela']
        else:
            data_dict = schools_info[row['BEDSCODE']]['math']
        try:
            data_dict['totals'].append(int(row['TOTAL_TESTED']))
        except ValueError:
            data_dict['totals'].append(0)
        try:
            data_dict['classes'].append(row['ITEM_DESC'])
        except ValueError:
            data_dict['classes'].append(0)
        try:
            data_dict['proficient'].append(int(row['proficient_count']))
        except ValueError:
            data_dict['proficient'].append(0)
            
    def calculate_prof_percent(test_area):
        total_sum = sum(values[test_area]['totals'])
        proficient_sum = sum(values[test_area]['proficient'])
        try:
            correct_value = ((proficient_sum / total_sum) * 100)
            schools_info[school][test_area]['total_percent'] = float("{0:.1f}".format(correct_value))
        except ZeroDivisionError:
            schools_info[school][test_area]['total_percent'] = '-'
    
    for school, values in schools_info.items():
        calculate_prof_percent('ela')
        calculate_prof_percent('math')
        
    return schools_info

In [4]:
year_16 = get_percent_dictionary('raw/3-8_ELA_AND_MATH_2016.csv', tester_16)

1861


In [5]:
year_17 = get_percent_dictionary('raw/3-8_ELA_AND_MATH_2017.csv', tester_16)

1839


In [6]:
year_15 = get_percent_dictionary('raw/3-8_ELA_AND_MATH_2015.csv',tester_15)

1876


In [7]:
year_14 = get_percent_dictionary('raw/3-8_ELA_AND_MATH_2014.csv',tester_15)

1883


In [8]:
year_13 = get_percent_dictionary('raw/3-8_ELA_AND_MATH_2013.csv',tester_15)

1912


In [9]:
year_12 = get_percent_dictionary('raw/3-8_ELA_AND_MATH_2012.csv',tester_12)

1820


In [10]:
year_11 = get_percent_dictionary('raw/3-8_ELA_AND_MATH_2011.csv',tester_12)

1786


In [11]:
year_10 = get_percent_dictionary('raw/3-8_ELA_AND_MATH_2010.csv',tester_12)

1780


In [12]:
year_09 = get_percent_dictionary('raw/3-8_ELA_AND_MATH_2009.csv',tester_12)

1776


In [13]:
year_08 = get_percent_dictionary('raw/3-8_ELA_AND_MATH_2008.csv',tester_12)

1772


In [14]:
year_07 = get_percent_dictionary('raw/3-8_ELA_AND_MATH_2007.csv',tester_12)

1726


In [15]:
year_06 = get_percent_dictionary('raw/3-8_ELA_AND_MATH_2006.csv',tester_12)

1722


Example of each year returning data for a specific school

In [16]:
print(year_16['140000000000'])
print('$$$$$$$$$$$$$$$$$$$$$$$$')
print(year_12['140000000000'])

{'name': 'ERIE COUNTY', 'math': {'totals': [6995, 6509, 6211, 5829, 5356, 4207], 'proficient': [2821, 2687, 2365, 2122, 1589, 708], 'classes': ['Grade 3 Math', 'Grade 4 Math', 'Grade 5 Math', 'Grade 6 Math', 'Grade 7 Math', 'Grade 8 Math'], 'total_percent': 35.0}, 'ela': {'totals': [7026, 6577, 6329, 6070, 5706, 5368], 'proficient': [2740, 2345, 1865, 1962, 1737, 1904], 'classes': ['Grade 3 ELA', 'Grade 4 ELA', 'Grade 5 ELA', 'Grade 6 ELA', 'Grade 7 ELA', 'Grade 8 ELA'], 'total_percent': 33.9}, 'district': 'ERIE COUNTY', 'county': 'ERIE'}
$$$$$$$$$$$$$$$$$$$$$$$$
{'name': 'ERIE COUNTY', 'math': {'totals': [9460, 9637, 9805, 9914, 9855, 9763], 'proficient': [5674, 6490, 6191, 6421, 6413, 6059], 'classes': ['Grade 3 Math', 'Grade 4 Math', 'Grade 5 Math', 'Grade 6 Math', 'Grade 7 Math', 'Grade 8 Math'], 'total_percent': 63.7}, 'ela': {'totals': [9403, 9583, 9738, 9862, 9798, 9725], 'proficient': [5281, 5629, 5515, 5765, 5114, 5127], 'classes': ['Grade 3 ELA', 'Grade 4 ELA', 'Grade 5 ELA',

In [17]:
print(year_16['400400010001'])
print(year_15['400400010001'])
print(year_17['400400010001'])
print('****')
print(year_17['140101060007'])

{'name': 'ANNA MERRITT ELEMENTARY SCHOOL', 'math': {'totals': [67, 54], 'proficient': [29, 21], 'classes': ['Grade 3 Math', 'Grade 4 Math'], 'total_percent': 41.3}, 'ela': {'totals': [69, 62], 'proficient': [22, 11], 'classes': ['Grade 3 ELA', 'Grade 4 ELA'], 'total_percent': 25.2}, 'district': 'LOCKPORT CITY SCHOOL DISTRICT', 'county': 'NIAGARA'}
{'name': 'ANNA MERRITT ELEMENTARY SCHOOL', 'math': {'totals': [57, 62], 'proficient': [15, 29], 'classes': ['Grade 3 Math', 'Grade 4 Math'], 'total_percent': 37.0}, 'ela': {'totals': [60, 69], 'proficient': [11, 12], 'classes': ['Grade 3 ELA', 'Grade 4 ELA'], 'total_percent': 17.8}, 'district': 'LOCKPORT CITY SCHOOL DISTRICT', 'county': 'NIAGARA'}
{'name': 'ANNA MERRITT ELEMENTARY SCHOOL', 'math': {'totals': [52, 62], 'proficient': [23, 19], 'classes': ['Grade 3 Math', 'Grade 4 Math'], 'total_percent': 36.8}, 'ela': {'totals': [53, 65], 'proficient': [11, 16], 'classes': ['Grade 3 ELA', 'Grade 4 ELA'], 'total_percent': 22.9}, 'district': 'LOC

Backfilling new 2017 schools

In [18]:
for school in year_17:
    if school not in year_16:
        print('not in 2016 {0}'.format(year_17[school]['name']))
        year_16[school] = {'name': year_17[school]['name'], 'county': year_17[school]['county'], 'district': year_17[school]['district'], 'math': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}, 'ela': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}}
    if school not in year_15:
        print('not in 2015 {0}'.format(year_17[school]['name']))
        year_15[school] = {'name': year_17[school]['name'], 'county': year_17[school]['county'], 'district': year_17[school]['district'], 'math': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}, 'ela': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}}
    if school not in year_14:
        print('not in 2014 {0}'.format(year_17[school]['name']))
        year_14[school] = {'name': year_17[school]['name'], 'county': year_17[school]['county'], 'district': year_17[school]['district'], 'math': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}, 'ela': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}}
    if school not in year_13:
        print('not in 2013 {0}'.format(year_17[school]['name']))
        year_13[school] = {'name': year_17[school]['name'], 'county': year_17[school]['county'], 'district': year_17[school]['district'], 'math': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}, 'ela': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}}
    if school not in year_12:
        print('not in 2012 {0}'.format(year_17[school]['name']))
        year_12[school] = {'name': year_17[school]['name'], 'county': year_17[school]['county'], 'district': year_17[school]['district'], 'math': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}, 'ela': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}}
    if school not in year_11:
        print('not in 2011 {0}'.format(year_17[school]['name']))
        year_11[school] = {'name': year_17[school]['name'], 'county': year_17[school]['county'], 'district': year_17[school]['district'], 'math': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}, 'ela': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}}
    if school not in year_10:
        print('not in 2010 {0}'.format(year_17[school]['name']))
        year_10[school] = {'name': year_17[school]['name'], 'county': year_17[school]['county'], 'district': year_17[school]['district'], 'math': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}, 'ela': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}}
    if school not in year_09:
        print('not in 2009 {0}'.format(year_17[school]['name']))
        year_09[school] = {'name': year_17[school]['name'], 'county': year_17[school]['county'], 'district': year_17[school]['district'], 'math': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}, 'ela': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}}
    if school not in year_08:
        print('not in 2008 {0}'.format(year_17[school]['name']))
        year_08[school] = {'name': year_17[school]['name'], 'county': year_17[school]['county'], 'district': year_17[school]['district'], 'math': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}, 'ela': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}}
    if school not in year_07:
        print('not in 2007 {0}'.format(year_17[school]['name']))
        year_07[school] = {'name': year_17[school]['name'], 'county': year_17[school]['county'], 'district': year_17[school]['district'], 'math': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}, 'ela': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}}
    if school not in year_06:
        print('not in 2006 {0}'.format(year_17[school]['name']))
        year_06[school] = {'name': year_17[school]['name'], 'county': year_17[school]['county'], 'district': year_17[school]['district'], 'math': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}, 'ela': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}}
        
for school in year_16:
    if school not in year_17:
        print ('Not in 2017 {0} {1}'.format(school, year_16[school]['name']))
    if school not in year_15:
        print ('Not in 2015 {0} {1}'.format(school, year_16[school]['name']))
print('checking 2015')
for school in year_15:
    if school not in year_17:
        print ('Not in 2017 {0} {1}'.format(school, year_15[school]['name']))
    if school not in year_16:
        print ('Not in 2016 {0} {1}'.format(school, year_15[school]['name']))

not in 2016 ALDEN INTERMEDIATE SCHOOL
not in 2015 ALDEN INTERMEDIATE SCHOOL
not in 2014 ALDEN INTERMEDIATE SCHOOL
not in 2013 ALDEN INTERMEDIATE SCHOOL
not in 2012 ALDEN INTERMEDIATE SCHOOL
not in 2011 ALDEN INTERMEDIATE SCHOOL
not in 2010 ALDEN INTERMEDIATE SCHOOL
not in 2009 ALDEN INTERMEDIATE SCHOOL
not in 2008 ALDEN INTERMEDIATE SCHOOL
not in 2007 ALDEN INTERMEDIATE SCHOOL
not in 2006 ALDEN INTERMEDIATE SCHOOL
not in 2009 PARKDALE ELEMENTARY SCHOOL
not in 2008 PARKDALE ELEMENTARY SCHOOL
not in 2007 PARKDALE ELEMENTARY SCHOOL
not in 2006 PARKDALE ELEMENTARY SCHOOL
not in 2006 FRANK A SEDITA SCHOOL #30
not in 2006 MATH SCIENCE TECHNOLOGY PREPARATORY SCHOOL AT SENECA (THE)
not in 2011 ALTERNATIVE HIGH SCHOOL AT 4
not in 2010 ALTERNATIVE HIGH SCHOOL AT 4
not in 2009 ALTERNATIVE HIGH SCHOOL AT 4
not in 2008 ALTERNATIVE HIGH SCHOOL AT 4
not in 2007 ALTERNATIVE HIGH SCHOOL AT 4
not in 2006 ALTERNATIVE HIGH SCHOOL AT 4
not in 2007 INTERNATIONAL PREPARATORY SCHOOL (THE)
not in 2006 INTERNAT

Grab clean school/district names and which schools are charters

In [19]:
clean_names = {}
charters = []
with open('../school_name_dictionary.csv') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        clean_names[row['BEDS']] = row['clean_school']
        if len(row['charter']) == 1:
            charters.append(row['BEDS'])
clean_names['140101060007']

'Alden Intermediate'

In [22]:
# Added by Steve
clean_names

{'140000000000': 'Erie County average',
 '8686868686868': 'Charters average',
 '400000000000': 'Niagara County average',
 '142101040000': 'AKRON SCHOOL DISTRICT',
 '142101040001': 'Akron Elementary',
 '142101040003': 'Akron Middle',
 '140101060000': 'ALDEN SCHOOL DISTRICT',
 '140101060005': 'Alden Middle',
 '140201060000': 'AMHERST',
 '140201060001': 'Amherst Middle',
 '140201060005': 'Smallwood',
 '140201060006': 'Windermere',
 '140600010000': 'Buffalo Sch. Dist. average',
 '140600860911': 'Johnson',
 '140600010133': 'Alternative High School',
 '140600010122': 'Bennett Park Montessori',
 '140600010033': 'Bilingual Center',
 '140600010097': 'Performing Arts',
 '140600860861': 'Bflo. Academy of Science',
 '140600010006': 'Bflo. Elem. Sch. of Tech.',
 '140600860851': 'Buffalo United',
 '140600010032': 'BUILD Academy',
 '140600010102': 'City Honors',
 '140600010053': 'Community School 53',
 '140600010003': "D'Youville-Porter",
 '140600010001': 'Discovery School',
 '140600010018': 'Pantoja

Separate schools/district into county lists for print, and create dictionary with included schools and codes.

In [30]:
erie_list = {}
niagara_list = {}
charter_list = {}
for key, value in year_17.items():
    if key not in charters:
        if 'SCHOOL DISTRICT' in value['name']:
            if value['county'] == 'ERIE':
                erie_list[key] = {'name': clean_names[key], 'schools': {}, 'dist_key': value['name']}
            else:
                niagara_list[key] = {'name': clean_names[key], 'schools': {}, 'dist_key': value['name']}
    else:
        charter_list[key] = clean_names[key]
        
for distict, value in erie_list.items():
    for school, info in year_17.items():
        if school not in charters:
            if value['dist_key'] == info['district'] and 'SCHOOL DISTRICT' not in info['name']:
                erie_list[distict]['schools'][school] = clean_names[school]
                
for distict, value in niagara_list.items():
    for school, info in year_17.items():
        if school not in charters:
           if value['dist_key'] == info['district'] and 'SCHOOL DISTRICT' not in info['name']:
                niagara_list[distict]['schools'][school] = clean_names[school]

print(niagara_list)

{'400800010000': {'schools': {'400800010022': 'Niagara Street', '400800010041': 'LaSalle Preparatory', '400800010021': 'Maple Avenue', '400800010020': 'Hyde Park', '400800010031': 'Abate Elementary', '400800010042': 'Cataract Elementary', '400800010040': 'Gaskill Preparatory', '400800010012': 'Mann', '400800010015': 'Kalfas Magnet', '400800010010': 'Seventy Ninth Street'}, 'name': 'NIAGARA FALLS SCHOOL DISTRICT', 'dist_key': 'NIAGARA FALLS CITY SCHOOL DISTRICT'}, '400701060000': {'schools': {'400701060005': 'Errick Road Elementary', '400701060002': 'West Street Elementary', '400701060004': 'Colonial Village Elem.', '400701060009': 'Edward Town Middle', '400701060003': 'Tuscarora Elementary'}, 'name': 'NIAGARA WHEATFIELD SCH. DIST.', 'dist_key': 'NIAGARA-WHEATFIELD CENTRAL SCHOOL DISTRICT'}, '400400010000': {'schools': {'400400010009': 'Belknap Intermediate', '400400010002': 'Upson Elementary', '400400010010': 'North Park Junior High', '400400010001': 'Merritt Elementary', '400400010007

In [31]:
def percent_change(new,old):
    try:
        calculate = (new-old)/old
        percent = calculate * 100
        one_decimal = float("{0:.1f}".format(percent))
        return one_decimal
    except (ZeroDivisionError,TypeError):
        return '-'

Exporting for print

In [23]:
erie_districts = 0
niagara_districts = 0
charter_districts = 0
def export_county_schools(county_list, county):
    print('Begging {0}'.format(county))
    county_count = 0
    if county != 'charter':
        #Sort the county of district id's by its name value.
        sorted_county = sorted(county_list, key= lambda district: county_list[district]['name'])
        for district in sorted_county:
            county_count += 1
            #print('*** starting district {0}'.format(county_list[district]['name']))
            pc_ela_15_17 = percent_change(year_17[district]['ela']['total_percent'],year_15[district]['ela']['total_percent'])
            pc_ela_16_17 = percent_change(year_17[district]['ela']['total_percent'],year_16[district]['ela']['total_percent'])
            pc_math_15_17 = percent_change(year_17[district]['math']['total_percent'],year_15[district]['math']['total_percent'])
            pc_math_16_17 = percent_change(year_17[district]['math']['total_percent'],year_16[district]['math']['total_percent'])
            district_info = [county_list[district]['name'], clean_names[district], year_15[district]['ela']['total_percent'],year_16[district]['ela']['total_percent'],year_17[district]['ela']['total_percent'],year_15[district]['math']['total_percent'],year_16[district]['math']['total_percent'],year_17[district]['math']['total_percent'], pc_ela_15_17, pc_ela_16_17, pc_math_15_17, pc_math_16_17]
            writer.writerow(district_info)
            schools_info = county_list[district]['schools']
            sorted_schools = sorted(schools_info, key = lambda school: schools_info[school])
            #Now go through each school in the district and calculate its percent change.
            for school in sorted_schools:
                    pc_school_ela_15_17 = percent_change(year_17[school]['ela']['total_percent'],year_15[school]['ela']['total_percent'])
                    pc_school_ela_16_17 = percent_change(year_17[school]['ela']['total_percent'],year_16[school]['ela']['total_percent'])
                    pc_school_math_15_17 = percent_change(year_17[school]['math']['total_percent'],year_15[school]['math']['total_percent'])
                    pc_school_math_16_17 = percent_change(year_17[school]['math']['total_percent'],year_16[school]['math']['total_percent'])
                    school_info = [county_list[district]['name'], clean_names[school], year_15[school]['ela']['total_percent'],year_16[school]['ela']['total_percent'],year_17[school]['ela']['total_percent'],year_15[school]['math']['total_percent'],year_16[school]['math']['total_percent'],year_17[school]['math']['total_percent'], pc_school_ela_15_17, pc_school_ela_16_17, pc_school_math_15_17, pc_school_math_16_17]
                    writer.writerow(school_info)
    else:
        #Charter schools only have one level aka direct to school data.
        sorted_county = sorted(county_list, key= lambda district: county_list[district])
        for district in sorted_county:
            county_count += 1
            #print('*** starting district {0}'.format(county_list[district]))
            pc_ela_15_17 = percent_change(year_17[district]['ela']['total_percent'],year_15[district]['ela']['total_percent'])
            pc_ela_16_17 = percent_change(year_17[district]['ela']['total_percent'],year_16[district]['ela']['total_percent'])
            pc_math_15_17 = percent_change(year_17[district]['math']['total_percent'],year_15[district]['math']['total_percent'])
            pc_math_16_17 = percent_change(year_17[district]['math']['total_percent'],year_16[district]['math']['total_percent'])
            district_info = [year_17[district]['district'], clean_names[district], year_15[district]['ela']['total_percent'],year_16[district]['ela']['total_percent'],year_17[district]['ela']['total_percent'],year_15[district]['math']['total_percent'],year_16[district]['math']['total_percent'],year_17[district]['math']['total_percent'], pc_ela_15_17, pc_ela_16_17, pc_math_15_17, pc_math_16_17]
            writer.writerow(district_info)
    return county_count

with open('print_out.tsv', 'w') as out_put:
    writer = csv.writer(out_put, delimiter='\t')
    writer.writerow(['district','school','ELA 2015', 'ELA 2016', 'ELA 2017', 'Math 2015', 'Math 2016', 'Math 2017', 'pc_ela_15_17', 'pc_ela_16_17', 'pc_math_15_17', 'pc_math_16_17'])
    erie_districts = export_county_schools(erie_list, 'erie')
    niagara_districts = export_county_schools(niagara_list, 'niagara')
    charter_districts = export_county_schools(charter_list, 'charter')
    print('{0} erie districts and {1} niagara districts {2} charters'.format(erie_districts,niagara_districts,charter_districts))

Begging erie
Begging niagara
Begging charter
28 erie districts and 10 niagara districts 15 charters


Online needs JSON in the clean_json groups.

In [32]:
print(erie_list['140201060000'])

{'schools': {'140201060001': 'Amherst Middle', '140201060006': 'Windermere', '140201060005': 'Smallwood'}, 'name': 'AMHERST', 'dist_key': 'AMHERST CENTRAL SCHOOL DISTRICT'}


In [33]:
erie_districts = 0
niagara_districts = 0
clean_json = {'Erie': {}, 'Niagara': {}, 'Buffalo': {}, 'Charters': {}}
def export_county_schools(county_list, county):
    ordered = []
    county_count = 0
    #Sort the county of district id's by its name value.
    if county != 'Charters':
        if county == 'Erie':
            ordered.append(['140000000000', clean_names['140000000000']])
        elif county == 'Niagara':
            ordered.append(['400000000000',  clean_names['400000000000']])
        sorted_county = sorted(county_list, key= lambda district: county_list[district]['name'])
        for district in sorted_county:
            #Buffalo gets thrown in its own group
            if district != '140600010000':
                county_count += 1
                #print('*** starting district {0}'.format(county_list[district]['name']))
                schools_info = county_list[district]['schools']
                ordered.append([district,clean_names[district]])
                sorted_schools = sorted(schools_info, key = lambda school: schools_info[school])
                #Now go through each school in the district and calculate its percent change.
                for school in sorted_schools:
                    try:
                        #print('writing the following {0}'.format(schools_info[school]))
                        ordered.append([school,clean_names[school]])
                    except KeyError:
                        #Beds code isn't found in one of the years for this school
                        print('Missing school in 2013, 2014, 2015, 2016 and/or 2017')
                        print(schools_info[school])
                        print(school)
            else:
                buffalo_list = []
                buffalo_list.append([district,clean_names[district]])
                schools_info = county_list[district]['schools']
                sorted_schools = sorted(schools_info, key = lambda school: schools_info[school])
                #Now go through each school in the district and calculate its percent change.
                for school in sorted_schools:
                    try:
                        #print('writing the following {0}'.format(schools_info[school]))
                        buffalo_list.append([school,clean_names[school]])
                    except KeyError:
                        #Beds code isn't found in one of the years for this school
                        print('Missing school in 2013, 2014, 2015, 2016 and/or 2017')
                        print(schools_info[school])
                        print(school)
                clean_json['Buffalo']['ordered_schools'] = buffalo_list
    else:
        ordered.append(['8686868686868', 'Charters Average'])
        sorted_county = sorted(county_list, key= lambda district: county_list[district])
        for district in sorted_county:
            county_count += 1
            print('*** starting district {0}'.format(county_list[district]))
            ordered.append([district,county_list[district]])
    clean_json[county]['ordered_schools'] = ordered
eried_list = export_county_schools(erie_list, 'Erie')

niagarad_list = export_county_schools(niagara_list, 'Niagara')
chartered_list = export_county_schools(charter_list, 'Charters')
print(clean_json['Niagara'])

*** starting district Bflo. Academy of Science
*** starting district Buffalo United
*** starting district Charter Sch. for App. Tech.
*** starting district Charter School of Inquiry
*** starting district Elmwood Village
*** starting district Enterprise
*** starting district Global Concepts
*** starting district Johnson
*** starting district King Center
*** starting district Niagara Charter
*** starting district South Buffalo
*** starting district Tapestry
*** starting district WNY Maritime Charter
*** starting district West Buffalo
*** starting district Westminster
{'ordered_schools': [['400000000000', 'Niagara County average'], ['401301040000', 'BARKER SCHOOL DISTRICT'], ['401301040003', 'Barker Junior/Senior High'], ['401301040002', 'Pratt Elementary'], ['400301060000', 'LEW-PORT SCHOOL DISTRICT'], ['400301060003', 'Intermediate Educ. Center'], ['400301060005', 'Lewiston Porter Middle'], ['400301060002', 'Primary Education Center'], ['400400010000', 'LOCKPORT SCHOOL DISTRICT'], ['400

In [34]:
def year_build(school_dict, year_dict, year):
    if school[0] in year_dict:
        #Only output dictionary if the school has test scores for that year.
        if len(year_dict[school[0]]['math']['classes']) != 0 or len(year_dict[school[0]]['ela']['classes']) != 0:
            year_info = year_dict[school[0]]
            school_info['district'] = year_info['district']
            school_info['name'] = clean_names[school[0]]
            math_year = {}
            math_year['total'] = str(year_info['math']['total_percent'])
            for i, item in enumerate(year_info['math']['classes']):
                clean_class = year_info['math']['classes'][i][0:7]
                try:
                    percent_profficient = (year_info['math']['proficient'][i] / year_info['math']['totals'][i]) * 100
                    math_year[clean_class] = str(float("{0:.1f}".format(percent_profficient)))
                except ZeroDivisionError:
                    math_year[clean_class] = '-'
            school_dict['math'][year] = math_year
            ela_year = {}
            ela_year['total'] = str(year_info['ela']['total_percent'])
            for i, item in enumerate(year_info['ela']['classes']):
                clean_class = year_info['ela']['classes'][i][0:7]
                try:
                    percent_profficient = (year_info['ela']['proficient'][i] / year_info['ela']['totals'][i]) * 100
                    ela_year[clean_class] = str(float("{0:.1f}".format(percent_profficient)))
                except ZeroDivisionError:
                    ela_year[clean_class] = '-'
            school_dict['ela'][year] = ela_year
    else:
        print('Missing {0} - {2} in {1}'.format(school[0], year, year_17[school[0]]['name']))

In [35]:
for group, value in clean_json.items():
    print ('**Starting {0}'.format(group))
    #print('values {0}'.format(value))
    schools = {}
    for school in value['ordered_schools']:
        school_info = {}
        school_info['math'] = {}
        school_info['ela'] = {}
        if school[0] == '8686868686868':
            #Charters
            print('hit charter average')
        else:
            year_build(school_info,year_17, '2017')
            year_build(school_info,year_16, '2016')
            year_build(school_info,year_15, '2015')
            year_build(school_info,year_14, '2014')
            year_build(school_info,year_13, '2013')
            year_build(school_info,year_12, '2012')
            year_build(school_info,year_11, '2011')
            year_build(school_info,year_10, '2010')
            year_build(school_info,year_09, '2009')
            year_build(school_info,year_08, '2008')
            year_build(school_info,year_07, '2007')
            year_build(school_info,year_06, '2006')
        schools[school[0]] = school_info
    clean_json[group]['schools'] = schools


**Starting Buffalo
**Starting Erie
**Starting Charters
hit charter average
**Starting Niagara


In [36]:
print(clean_json['Buffalo']['schools']['140600010000']['ela']['2017']['total'])
print(clean_json['Buffalo']['schools']['140600010000']['math']['2017']['total'])
print('*****')
print(clean_json['Buffalo']['schools']['140600010000']['ela']['2016']['total'])
print(clean_json['Buffalo']['schools']['140600010000']['math']['2016']['total'])
print('*****')
print('*****')
print(clean_json['Buffalo']['schools']['140600010000']['ela']['2015']['total'])
print(clean_json['Buffalo']['schools']['140600010000']['math']['2015']['total'])
print(clean_json['Buffalo']['schools']['140600010000']['ela']['2015'])
print('*****')
print('*****')
print(clean_json['Buffalo']['schools']['140600010000']['ela']['2014']['total'])
print(clean_json['Buffalo']['schools']['140600010000']['math']['2014']['total'])
print(clean_json['Buffalo']['schools']['140600010000']['ela']['2014'])
print('*****')
"""for school in clean_json['Buffalo']['schools']:
    print("{0}:{1}".format(clean_json['Buffalo']['schools'][school]['name'], school))"""
print(year_11['140600010000'])

17.8
17.2
*****
16.4
16.1
*****
*****
11.9
15.1
{'Grade 5': '10.8', 'Grade 8': '13.6', 'total': '11.9', 'Grade 6': '11.0', 'Grade 4': '13.6', 'Grade 3': '12.0', 'Grade 7': '10.2'}
*****
*****
11.9
13.1
{'Grade 5': '9.3', 'Grade 8': '14.0', 'total': '11.9', 'Grade 6': '9.9', 'Grade 4': '14.7', 'Grade 3': '13.2', 'Grade 7': '10.5'}
*****
{'math': {'total_percent': 31.1, 'proficient': [685, 872, 760, 790, 779, 664], 'classes': ['Grade 3 Math', 'Grade 4 Math', 'Grade 5 Math', 'Grade 6 Math', 'Grade 7 Math', 'Grade 8 Math'], 'totals': [2472, 2481, 2445, 2434, 2416, 2391]}, 'ela': {'total_percent': 27.0, 'proficient': [635, 680, 596, 843, 567, 534], 'classes': ['Grade 3 ELA', 'Grade 4 ELA', 'Grade 5 ELA', 'Grade 6 ELA', 'Grade 7 ELA', 'Grade 8 ELA'], 'totals': [2406, 2415, 2391, 2393, 2377, 2306]}, 'name': 'BUFFALO CITY SCHOOL DISTRICT', 'district': 'BUFFALO CITY SCHOOL DISTRICT', 'county': 'ERIE'}


In [37]:
def calculate_charter_average():
    """from statistics import mean
l = [15, 18, 2, 36, 12, 78, 5, 6, 9]
mean(l)"""
    charter_average = {'math': {}, 'ela': {}}
    check = 0
    #Get all values added to the correct subject/year/grade list
    for charter,charter_info in clean_json['Charters']['schools'].items():
        if charter != '8686868686868':
            print(charter)
            print(charter_info)
            print('*****')
            print(charter_average)
            for subject in charter_average.keys():
                subject_years = charter_info[subject].keys()
                for year in subject_years:
                    grades = charter_info[subject][year].keys()
                    for grade in grades:
                        if grade != 'total':
                            if grade == 'Grade 7' and year == '2013' and subject == 'ela':
                                check += 1
                            if year not in charter_average[subject]:
                                charter_average[subject][year] = {grade: [float(charter_info[subject][year][grade])]}
                            elif grade not in charter_average[subject][year]:
                                charter_average[subject][year][grade] = [float(charter_info[subject][year][grade])]
                            else:
                                charter_average[subject][year][grade].append(float(charter_info[subject][year][grade]))
    print(len(charter_average['ela']['2013']['Grade 7']))
    print(check)
    #Start calculating the correct mean
    from statistics import mean
    for subject in charter_average:
        for year in charter_average[subject]:
            mean_grades = []
            for grade in charter_average[subject][year]:
                print(charter_average[subject][year][grade])
                mean_score = "{0:.1f}".format(mean(charter_average[subject][year][grade]))
                print(mean_score)
                charter_average[subject][year][grade] = mean_score
                mean_grades.append(float(mean_score))
            charter_average[subject][year]['total'] = mean(mean_grades)
    
    clean_json['Charters']['schools']['8686868686868'] = charter_average
    clean_json['Charters']['schools']['8686868686868']['name'] = 'Charter Average'
                        
calculate_charter_average()

142601860031
{'math': {'2012': {'Grade 5': '65.7', 'Grade 8': '48.4', 'total': '61.6', 'Grade 6': '65.4', 'Grade 4': '85.0', 'Grade 3': '50.0', 'Grade 7': '54.3'}, '2009': {'Grade 5': '93.8', 'Grade 8': '89.0', 'total': '90.6', 'Grade 6': '92.4', 'Grade 4': '80.2', 'Grade 3': '98.4', 'Grade 7': '89.8'}, '2006': {'Grade 5': '51.5', 'Grade 8': '35.7', 'total': '44.8', 'Grade 6': '38.1', 'Grade 4': '58.4', 'Grade 3': '66.4', 'Grade 7': '14.5'}, '2007': {'Grade 5': '79.7', 'Grade 8': '57.9', 'total': '75.2', 'Grade 6': '76.6', 'Grade 4': '90.0', 'Grade 3': '89.8', 'Grade 7': '58.1'}, '2008': {'Grade 5': '84.0', 'Grade 8': '81.2', 'total': '85.3', 'Grade 6': '76.8', 'Grade 4': '94.6', 'Grade 3': '96.2', 'Grade 7': '78.9'}, '2016': {'Grade 5': '38.6', 'Grade 8': '7.4', 'total': '19.4', 'Grade 6': '13.6', 'Grade 4': '27.8', 'Grade 3': '34.8', 'Grade 7': '4.9'}, '2013': {'Grade 5': '24.6', 'Grade 8': '18.8', 'total': '22.5', 'Grade 6': '21.8', 'Grade 4': '39.1', 'Grade 3': '14.8', 'Grade 7': '

In [38]:
print(clean_json['Charters']['schools']['8686868686868'])
print(clean_json['Niagara']['schools']['400000000000'])

{'math': {'2012': {'Grade 5': '62.1', 'Grade 8': '57.6', 'Grade 6': '57.9', 'total': 59.266666666666666, 'Grade 4': '67.7', 'Grade 3': '46.7', 'Grade 7': '63.6'}, '2009': {'Grade 5': '88.2', 'Grade 8': '78.6', 'Grade 6': '87.1', 'total': 87.51666666666667, 'Grade 4': '88.6', 'Grade 3': '95.8', 'Grade 7': '86.8'}, '2006': {'Grade 5': '55.0', 'Grade 8': '27.4', 'Grade 6': '53.5', 'total': 52.4, 'Grade 4': '72.4', 'Grade 3': '74.2', 'Grade 7': '31.9'}, '2007': {'Grade 5': '67.3', 'Grade 8': '52.5', 'Grade 6': '78.7', 'total': 70.21666666666667, 'Grade 4': '71.6', 'Grade 3': '85.9', 'Grade 7': '65.3'}, '2008': {'Grade 5': '79.8', 'Grade 8': '73.4', 'Grade 6': '71.3', 'total': 79.11666666666667, 'Grade 4': '82.4', 'Grade 3': '89.6', 'Grade 7': '78.2'}, '2016': {'Grade 5': '27.8', 'Grade 8': '18.1', 'Grade 6': '30.1', 'total': 26.45, 'Grade 4': '33.0', 'Grade 3': '35.4', 'Grade 7': '14.3'}, '2013': {'Grade 5': '22.3', 'Grade 8': '19.3', 'Grade 6': '24.5', 'total': 22.116666666666667, 'Grade 

In [39]:
for group in clean_json:
    group_max = {'mathmaxValue': 0,'elamaxValue': 0}
    group_min = {'mathminValue': 100, 'elaminValue': 100}
    for school in clean_json[group]['schools']:
        #print(school)
        try:
            math_school_value = float(clean_json[group]['schools'][school]['math']['2017']['total'])
        except ValueError:
            math_school_value = '-'
        #print(math_school_value)
        #print(group_max)
        #print(group_min)
        if math_school_value != '-':
            if math_school_value > group_max['mathmaxValue']:
                group_max['mathmaxValue'] = math_school_value
                group_max['mathschool'] = school
            elif math_school_value < group_min['mathminValue']:
                group_min['mathminValue'] = math_school_value
                group_min['mathschool'] = school
        try:
            ela_school_value = float(clean_json[group]['schools'][school]['ela']['2017']['total'])
        except ValueError:
            ela_school_value = '-'
        #print(ela_school_value)
        if ela_school_value != '-':
            if ela_school_value > group_max['elamaxValue']:
                group_max['elamaxValue'] = ela_school_value
                group_max['elaschool'] = school
            elif ela_school_value < group_min['elaminValue']:
                group_min['elaminValue'] = ela_school_value
                group_min['elaschool'] = school
    print('&&&&&&group&&&&&')
    print(group)
    print('Highest schools')
    print(group_max)
    print('Lowest schools')
    print(group_min)
    clean_json[group]['max'] = group_max['elaschool']
    clean_json[group]['min'] = group_min['elaschool']

&&&&&&group&&&&&
Buffalo
Highest schools
{'elamaxValue': 83.3, 'mathschool': '140600010102', 'mathmaxValue': 79.7, 'elaschool': '140600010102'}
Lowest schools
{'elaminValue': 0.0, 'elaschool': '140600010133', 'mathschool': '140600010032', 'mathminValue': 1.6}
&&&&&&group&&&&&
Erie
Highest schools
{'elamaxValue': 72.0, 'mathschool': '140801060003', 'mathmaxValue': 87.6, 'elaschool': '140203060002'}
Lowest schools
{'elaminValue': 6.5, 'elaschool': '141800010005', 'mathschool': '141800010005', 'mathminValue': 5.1}
&&&&&&group&&&&&
Charters
Highest schools
{'elamaxValue': 46.6, 'mathschool': '400701860890', 'mathmaxValue': 52.9, 'elaschool': '140600860896'}
Lowest schools
{'elaminValue': 9.8, 'elaschool': '140600860856', 'mathschool': '140600860863', 'mathminValue': 2.6}
&&&&&&group&&&&&
Niagara
Highest schools
{'elamaxValue': 81.2, 'mathschool': '400701060005', 'mathmaxValue': 73.1, 'elaschool': '400301060002'}
Lowest schools
{'elaminValue': 14.1, 'elaschool': '400800010015', 'mathschool'

In [83]:
clean_json['Erie']['schools']['140101060000']

{'district': 'ALDEN CENTRAL SCHOOL DISTRICT',
 'ela': {'2006': {'Grade 3': '78.0',
   'Grade 4': '77.0',
   'Grade 5': '71.4',
   'Grade 6': '74.6',
   'Grade 7': '78.5',
   'Grade 8': '75.9',
   'total': '76.0'},
  '2007': {'Grade 3': '76.5',
   'Grade 4': '81.2',
   'Grade 5': '79.7',
   'Grade 6': '73.0',
   'Grade 7': '69.1',
   'Grade 8': '77.1',
   'total': '76.0'},
  '2008': {'Grade 3': '83.7',
   'Grade 4': '80.5',
   'Grade 5': '84.3',
   'Grade 6': '81.0',
   'Grade 7': '87.5',
   'Grade 8': '62.3',
   'total': '79.6'},
  '2009': {'Grade 3': '85.9',
   'Grade 4': '92.2',
   'Grade 5': '87.9',
   'Grade 6': '86.3',
   'Grade 7': '91.3',
   'Grade 8': '85.6',
   'total': '88.2'},
  '2010': {'Grade 3': '69.9',
   'Grade 4': '70.6',
   'Grade 5': '64.8',
   'Grade 6': '68.5',
   'Grade 7': '63.0',
   'Grade 8': '69.3',
   'total': '67.7'},
  '2011': {'Grade 3': '66.1',
   'Grade 4': '68.0',
   'Grade 5': '55.8',
   'Grade 6': '74.6',
   'Grade 7': '54.5',
   'Grade 8': '64.3',
  

In [81]:
clean_csv = []
clean_csv.append(['Group','District','School','GEOID','Grade','Test_type','2006','2007','2008','2009','2010','2011','2012','2013','2014','2015','2016','2017'])
years = ['2006','2007','2008','2009','2010','2011','2012','2013','2014','2015','2016','2017']
grades = ['Grade 5', 'Grade 8', 'total', 'Grade 6', 'Grade 4', 'Grade 3', 'Grade 7']
for group,schools in clean_json.items():
    print('group = {0}'.format(group))
    for school,school_info in schools['schools'].items():
        print('school = {0}'.format(school))
        for grade in grades:
            foundNum = False
            for year in years:
                if year in school_info['math']:
                    if grade in school_info['math'][year]:
                        foundNum = True
                        break
                if year in school_info['ela']:
                    if grade in school_info['ela'][year]:
                        foundNum = True
                        break
            if foundNum:
                print(school_info)
                if school == '8686868686868':
                    grade_info_math = [group,'charters',school_info['name'],school,grade,'math']
                    grade_info_ela = [group,'charters',school_info['name'],school,grade,'ela']
                else:
                    grade_info_math = [group,school_info['district'],school_info['name'],school,grade,'math']
                    grade_info_ela = [group,school_info['district'],school_info['name'],school,grade,'ela']
                for year in years:
                    try:
                        grade_info_math.append(school_info['math'][year][grade])
                    except KeyError:
                        print('###############')
                        print(grade)
                        try:
                            print(school_info['math'][year])
                        except KeyError:
                            print(year)
                            print(school_info['math'])
                        print('###############')
                        grade_info_math.append('-')
                    try:
                        grade_info_ela.append(school_info['ela'][year][grade])
                    except KeyError:
                        print('###############')
                        print(grade)
                        try:
                            print(school_info['ela'][year])
                        except KeyError:
                            print(year)
                            print(school_info['ela'])
                        print('###############')
                        grade_info_ela.append('-')
                #print(grade_info_math)
                clean_csv.append(grade_info_math)
                clean_csv.append(grade_info_ela)
print(len(clean_csv))
print(clean_csv[0])
print(clean_csv[10])
print(clean_json['Buffalo']['schools']['140600010037']['ela']['2010']['Grade 4'])
print(clean_json['Buffalo']['schools']['140600010037']['ela']['2017']['Grade 4'])
for row in clean_csv:
    if len(row) != 18:
        print('row not 18')
        print(row)
        print(len(row))

group = Buffalo
school = 140600010037
{'math': {'2012': {'Grade 5': '6.2', 'Grade 8': '7.8', 'total': '6.9', 'Grade 6': '3.0', 'Grade 4': '16.0', 'Grade 3': '5.3', 'Grade 7': '4.3'}, '2009': {'Grade 5': '25.0', 'Grade 8': '56.6', 'total': '48.8', 'Grade 6': '19.6', 'Grade 4': '44.4', 'Grade 3': '80.6', 'Grade 7': '55.2'}, '2006': {'Grade 5': '20.6', 'Grade 8': '15.3', 'total': '24.1', 'Grade 6': '7.9', 'Grade 4': '57.6', 'Grade 3': '26.0', 'Grade 7': '21.5'}, '2007': {'Grade 5': '19.1', 'Grade 8': '12.9', 'total': '19.1', 'Grade 6': '19.0', 'Grade 4': '25.5', 'Grade 3': '27.9', 'Grade 7': '12.5'}, '2008': {'Grade 5': '21.8', 'Grade 8': '23.2', 'total': '27.3', 'Grade 6': '27.9', 'Grade 4': '18.6', 'Grade 3': '46.8', 'Grade 7': '28.6'}, '2016': {'Grade 5': '8.7', 'Grade 8': '0.0', 'total': '5.7', 'Grade 6': '0.0', 'Grade 4': '2.7', 'Grade 3': '27.6', 'Grade 7': '0.0'}, '2013': {'Grade 5': '0.0', 'Grade 8': '0.0', 'total': '0.3', 'Grade 6': '0.0', 'Grade 4': '0.0', 'Grade 3': '2.3', 'Gra

In [82]:
with open('output.csv', 'w') as csvfile:
    writer = csv.writer(csvfile)
    for row in clean_csv:
        writer.writerow(row)

In [75]:
print(clean_json['Charters']['schools']['8686868686868'])

{'math': {'2012': {'Grade 5': '62.1', 'Grade 8': '57.6', 'Grade 6': '57.9', 'total': 59.266666666666666, 'Grade 4': '67.7', 'Grade 3': '46.7', 'Grade 7': '63.6'}, '2009': {'Grade 5': '88.2', 'Grade 8': '78.6', 'Grade 6': '87.1', 'total': 87.51666666666667, 'Grade 4': '88.6', 'Grade 3': '95.8', 'Grade 7': '86.8'}, '2006': {'Grade 5': '55.0', 'Grade 8': '27.4', 'Grade 6': '53.5', 'total': 52.4, 'Grade 4': '72.4', 'Grade 3': '74.2', 'Grade 7': '31.9'}, '2007': {'Grade 5': '67.3', 'Grade 8': '52.5', 'Grade 6': '78.7', 'total': 70.21666666666667, 'Grade 4': '71.6', 'Grade 3': '85.9', 'Grade 7': '65.3'}, '2008': {'Grade 5': '79.8', 'Grade 8': '73.4', 'Grade 6': '71.3', 'total': 79.11666666666667, 'Grade 4': '82.4', 'Grade 3': '89.6', 'Grade 7': '78.2'}, '2016': {'Grade 5': '27.8', 'Grade 8': '18.1', 'Grade 6': '30.1', 'total': 26.45, 'Grade 4': '33.0', 'Grade 3': '35.4', 'Grade 7': '14.3'}, '2013': {'Grade 5': '22.3', 'Grade 8': '19.3', 'Grade 6': '24.5', 'total': 22.116666666666667, 'Grade 

In [58]:
print(clean_json['Erie']['schools']['140000000000'])
print(clean_json['Niagara']['ordered_schools'])

import json
with open('data.json', 'w') as output:
    json.dump(clean_json, output)

{'ela': {'2012': {'Grade 6': '58.5', 'Grade 5': '56.6', 'Grade 8': '52.7', 'Grade 7': '52.2', 'Grade 3': '56.2', 'Grade 4': '58.7', 'total': '55.8'}, '2013': {'Grade 6': '32.6', 'Grade 5': '28.4', 'Grade 8': '36.6', 'Grade 7': '32.3', 'Grade 3': '32.0', 'Grade 4': '30.2', 'total': '32.0'}, '2017': {'Grade 6': '29.3', 'Grade 5': '30.4', 'Grade 8': '36.3', 'Grade 7': '36.4', 'Grade 3': '37.4', 'Grade 4': '35.3', 'total': '34.2'}, '2014': {'Grade 6': '27.3', 'Grade 5': '26.4', 'Grade 8': '33.2', 'Grade 7': '26.7', 'Grade 3': '32.8', 'Grade 4': '32.7', 'total': '29.8'}, '2006': {'Grade 6': '61.6', 'Grade 5': '67.3', 'Grade 8': '48.5', 'Grade 7': '57.7', 'Grade 3': '67.3', 'Grade 4': '66.5', 'total': '60.8'}, '2015': {'Grade 6': '27.2', 'Grade 5': '28.1', 'Grade 8': '31.5', 'Grade 7': '26.3', 'Grade 3': '29.6', 'Grade 4': '31.8', 'total': '29.1'}, '2007': {'Grade 6': '64.8', 'Grade 5': '69.1', 'Grade 8': '62.0', 'Grade 7': '58.7', 'Grade 3': '68.0', 'Grade 4': '69.4', 'total': '65.1'}, '200