# Calculating percent proficiency and its percent change in New York State education assessment data

After downloading the [2015, 2016 and 2017 data from the New York State Education Department](https://data.nysed.gov/downloads.php) and renaming the files to 3-8\_ELA\_MATH\_{{ year }}.csv, this program calculates the percent proficient, scoring at Level 3 or 4, of each school. The percent proficient is calculated by adding each raw proficient count together for available grades and then dividing the total test takers. The 2013 and 2014 files were first converted from .mdb file format.

In [1]:
import agate
import csv

Due to missing school values and changes in data columns, I needed two different typetesters to force the columns into their correct formatting.

In [2]:
tester_15 = agate.TypeTester(limit=100,force={
    'NRC_DESC': agate.Text(),
    'NRC_CODE': agate.Number(),
        'BEDSCODE': agate.Text(),
        'L1_COUNT': agate.Text(),
        'L1_PCT': agate.Text(),
        'L2_COUNT': agate.Text(),
        'L2_PCT': agate.Text(),
        'L3_COUNT': agate.Text(),
        'L3_PCT': agate.Text(),
        'L4_COUNT': agate.Text(),
        'L4_PCT': agate.Text(),
        'L2-L4_PCT': agate.Text(),
        'L3-L4_PCT': agate.Text(),
        'MEAN_SCALE_SCORE': agate.Text(),
        'COUNTY_DESC': agate.Text(),
        'TOTAL_TESTED': agate.Text(),
        'COUNTY_CODE': agate.Text(),
        'Sum_Of_SCALE_SCORE': agate.Text()
})
tester_16 = agate.TypeTester(limit=100,force={
    'NRC_DESC': agate.Text(),
    'NRC_CODE': agate.Number(),
        'BEDSCODE': agate.Text(),
        'L1_COUNT': agate.Text(),
        'L1_PCT': agate.Text(),
        'L2_COUNT': agate.Text(),
        'L2_PCT': agate.Text(),
        'L3_COUNT': agate.Text(),
        'L3_PCT': agate.Text(),
        'L4_COUNT': agate.Text(),
        'L4_PCT': agate.Text(),
        'L2-L4_PCT': agate.Text(),
        'L3-L4_PCT': agate.Text(),
        'MEAN_SCALE_SCORE': agate.Text(),
        'COUNTY_DESC': agate.Text(),
        'TOTAL_TESTED': agate.Text(),
        'COUNTY_CODE': agate.Text()
})

This program takes in the csv of a specific year's assesment data and its tester and returns a dictionary of BEDS (state id codes) and test data.

In [3]:
def get_percent_dictionary(file_name, tester):
    schools = agate.Table.from_csv(file_name, column_types=tester)
    #Grab just Erie and Niagara schools, filtering out available subgroups and total county stats
    erie_niagara = schools.where(lambda row: row['COUNTY_DESC'] in ['ERIE', 'NIAGARA']).where(lambda row: row['SUBGROUP_NAME'] in ['All Students'])
    print(len(erie_niagara.rows))
    #Data doesn't include district information, but the first six digits of a school's BEDS codes includes that info.
    beds_code = {}
    for row in erie_niagara.rows:
        if 'SCHOOL DISTRICT' in row['NAME']:
            beds_district = row['BEDSCODE'][:6]
            #Creates a lookup table of ids for the district name
            beds_code[str(beds_district)] = row['NAME']
    def calculate_prof_counts(row):
        #Some schools don't have test takers for certain tests, so fill in with zeros.
        try:
            l3_counts = int(row['L3_COUNT'])
        except ValueError:
            l3_counts = 0
        try:
            l4_counts = int(row['L4_COUNT'])
        except ValueError:
            l4_counts = 0
        proficient = l3_counts + l4_counts
        return proficient
    erie_calcs = erie_niagara.compute([
        ('proficient_count', agate.Formula(agate.Number(), lambda row: calculate_prof_counts(row)))
    ])
    # Form the data structure for each school with beds code as a key
    schools_info = {}
    for row in erie_calcs.rows:
        if row['NAME'] == 'NIAGARA CHARTER SCHOOL':
            district = 'NIAGARA-WHEATFIELD CENTRAL SCHOOL DISTRICT'
        elif row['NAME'] == 'ERIE COUNTY' or row['NAME'] == 'NIAGARA COUNTY':
            district = row['NAME']
        else:
            beds_district = row['BEDSCODE'][:6]
            district = beds_code[beds_district]
        """name = School name
        math/ela dictionary has totals = total test takers one per applicable class; proficient = proficient_count values; classes = grades taking the test
        district = district name (looked up using the beds_code dict)
        county = county"""
        schools_info[row['BEDSCODE']] = {'name': row['NAME'], 'math': {'totals': [], 'proficient': [], 'classes': []}, 'ela': {'totals': [], 'proficient': [], 'classes': []}, 'district': district, 'county': row['COUNTY_DESC'] }
    # After creating the correct dict format, now I run through the schools again to fill the score values.
    for row in erie_calcs.rows:
        if row['ITEM_SUBJECT_AREA'] == 'ELA':
            data_dict = schools_info[row['BEDSCODE']]['ela']
        else:
            data_dict = schools_info[row['BEDSCODE']]['math']
        try:
            data_dict['totals'].append(int(row['TOTAL_TESTED']))
        except ValueError:
            data_dict['totals'].append(0)
        try:
            data_dict['classes'].append(row['ITEM_DESC'])
        except ValueError:
            data_dict['classes'].append(0)
        try:
            data_dict['proficient'].append(int(row['proficient_count']))
        except ValueError:
            data_dict['proficient'].append(0)
    def calculate_prof_percent(test_area):
        total_sum = sum(values[test_area]['totals'])
        proficient_sum = sum(values[test_area]['proficient'])
        try:
            correct_value = ((proficient_sum / total_sum) * 100)
            schools_info[school][test_area]['total_percent'] = float("{0:.1f}".format(correct_value))
        except ZeroDivisionError:
            schools_info[school][test_area]['total_percent'] = '-'
    for school, values in schools_info.items():
        calculate_prof_percent('ela')
        calculate_prof_percent('math')
    return schools_info

In [4]:
year_16 = get_percent_dictionary('3-8_ELA_AND_MATH_2016.csv', tester_16)

1861


In [5]:
year_17 = get_percent_dictionary('3-8_ELA_AND_MATH_2017.csv', tester_16)

1839


In [6]:
year_15 = get_percent_dictionary('3-8_ELA_AND_MATH_2015.csv',tester_15)

1876


In [7]:
year_14 = get_percent_dictionary('3-8_ELA_AND_MATH_2014.csv',tester_15)

1883


In [8]:
year_13 = get_percent_dictionary('3-8_ELA_AND_MATH_2013.csv',tester_15)

1912


Example of each year returning data for a specific school

In [13]:
print(year_16['140000000000'])

{'name': 'ERIE COUNTY', 'county': 'ERIE', 'math': {'proficient': [2821, 2687, 2365, 2122, 1589, 708], 'classes': ['Grade 3 Math', 'Grade 4 Math', 'Grade 5 Math', 'Grade 6 Math', 'Grade 7 Math', 'Grade 8 Math'], 'totals': [6995, 6509, 6211, 5829, 5356, 4207], 'total_percent': 35.0}, 'ela': {'proficient': [2740, 2345, 1865, 1962, 1737, 1904], 'classes': ['Grade 3 ELA', 'Grade 4 ELA', 'Grade 5 ELA', 'Grade 6 ELA', 'Grade 7 ELA', 'Grade 8 ELA'], 'totals': [7026, 6577, 6329, 6070, 5706, 5368], 'total_percent': 33.9}, 'district': 'ERIE COUNTY'}


In [9]:
print(year_16['400400010001'])
print(year_15['400400010001'])
print(year_17['400400010001'])
print('****')
print(year_17['140101060007'])

{'name': 'ANNA MERRITT ELEMENTARY SCHOOL', 'ela': {'totals': [69, 62], 'classes': ['Grade 3 ELA', 'Grade 4 ELA'], 'total_percent': 25.2, 'proficient': [22, 11]}, 'math': {'totals': [67, 54], 'classes': ['Grade 3 Math', 'Grade 4 Math'], 'total_percent': 41.3, 'proficient': [29, 21]}, 'district': 'LOCKPORT CITY SCHOOL DISTRICT', 'county': 'NIAGARA'}
{'name': 'ANNA MERRITT ELEMENTARY SCHOOL', 'ela': {'totals': [60, 69], 'classes': ['Grade 3 ELA', 'Grade 4 ELA'], 'total_percent': 17.8, 'proficient': [11, 12]}, 'math': {'totals': [57, 62], 'classes': ['Grade 3 Math', 'Grade 4 Math'], 'total_percent': 37.0, 'proficient': [15, 29]}, 'district': 'LOCKPORT CITY SCHOOL DISTRICT', 'county': 'NIAGARA'}
{'name': 'ANNA MERRITT ELEMENTARY SCHOOL', 'ela': {'totals': [53, 65], 'classes': ['Grade 3 ELA', 'Grade 4 ELA'], 'total_percent': 22.9, 'proficient': [11, 16]}, 'math': {'totals': [52, 62], 'classes': ['Grade 3 Math', 'Grade 4 Math'], 'total_percent': 36.8, 'proficient': [23, 19]}, 'district': 'LOC

Backfilling new 2017 schools

In [9]:
for school in year_17:
    if school not in year_16:
        print('not in 2016 {0}'.format(year_17[school]['name']))
        year_16[school] = {'name': year_17[school]['name'], 'county': year_17[school]['county'], 'district': year_17[school]['district'], 'math': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}, 'ela': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}}
    if school not in year_15:
        print('not in 2015 {0}'.format(year_17[school]['name']))
        year_15[school] = {'name': year_17[school]['name'], 'county': year_17[school]['county'], 'district': year_17[school]['district'], 'math': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}, 'ela': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}}
    if school not in year_14:
        print('not in 2014 {0}'.format(year_17[school]['name']))
        year_14[school] = {'name': year_17[school]['name'], 'county': year_17[school]['county'], 'district': year_17[school]['district'], 'math': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}, 'ela': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}}
    if school not in year_13:
        print('not in 2013 {0}'.format(year_17[school]['name']))
        year_13[school] = {'name': year_17[school]['name'], 'county': year_17[school]['county'], 'district': year_17[school]['district'], 'math': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}, 'ela': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}}
for school in year_16:
    if school not in year_17:
        print ('Not in 2017 {0} {1}'.format(school, year_16[school]['name']))
    if school not in year_15:
        print ('Not in 2015 {0} {1}'.format(school, year_16[school]['name']))
print('checking 2015')
for school in year_15:
    if school not in year_17:
        print ('Not in 2017 {0} {1}'.format(school, year_15[school]['name']))
    if school not in year_16:
        print ('Not in 2016 {0} {1}'.format(school, year_15[school]['name']))

not in 2013 WEST BUFFALO CHARTER SCHOOL
not in 2016 KENMORE WEST SENIOR HIGH SCHOOL
not in 2015 KENMORE WEST SENIOR HIGH SCHOOL
not in 2014 KENMORE WEST SENIOR HIGH SCHOOL
not in 2013 KENMORE WEST SENIOR HIGH SCHOOL
not in 2016 CHARTER SCHOOL OF INQUIRY
not in 2015 CHARTER SCHOOL OF INQUIRY
not in 2014 CHARTER SCHOOL OF INQUIRY
not in 2013 CHARTER SCHOOL OF INQUIRY
not in 2016 KENMORE EAST SENIOR HIGH SCHOOL
not in 2015 KENMORE EAST SENIOR HIGH SCHOOL
not in 2014 KENMORE EAST SENIOR HIGH SCHOOL
not in 2013 KENMORE EAST SENIOR HIGH SCHOOL
not in 2016 ALDEN INTERMEDIATE SCHOOL
not in 2015 ALDEN INTERMEDIATE SCHOOL
not in 2014 ALDEN INTERMEDIATE SCHOOL
not in 2013 ALDEN INTERMEDIATE SCHOOL
not in 2016 NEWCOMER ACADEMY AT LAFAYETTE
not in 2015 NEWCOMER ACADEMY AT LAFAYETTE
not in 2014 NEWCOMER ACADEMY AT LAFAYETTE
not in 2013 NEWCOMER ACADEMY AT LAFAYETTE
not in 2016 WESTERN NEW YORK MARITIME CHARTER SCHOOL
not in 2015 WESTERN NEW YORK MARITIME CHARTER SCHOOL
not in 2014 WESTERN NEW YORK M

Grab clean school/district names and which schools are charters

In [10]:
clean_names = {}
charters = []
with open('../school_name_dictionary.csv') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        clean_names[row['BEDS']] = row['clean_school']
        if len(row['charter']) == 1:
            charters.append(row['BEDS'])
clean_names['140101060007']

'Alden Intermediate'

Separate schools/district into county lists for print, and create dictionary with included schools and codes.

In [11]:
erie_list = {}
niagara_list = {}
charter_list = {}
for key, value in year_17.items():
    if key not in charters:
        if 'SCHOOL DISTRICT' in value['name']:
            if value['county'] == 'ERIE':
                erie_list[key] = {'name': clean_names[key], 'schools': {}, 'dist_key': value['name']}
            else:
                niagara_list[key] = {'name': clean_names[key], 'schools': {}, 'dist_key': value['name']}
    else:
        charter_list[key] = clean_names[key]
for distict, value in erie_list.items():
    for school, info in year_17.items():
        if school not in charters:
            if value['dist_key'] == info['district'] and 'SCHOOL DISTRICT' not in info['name']:
                erie_list[distict]['schools'][school] = clean_names[school]
for distict, value in niagara_list.items():
    for school, info in year_17.items():
        if school not in charters:
            if value['name'] == info['district'] and 'SCHOOL DISTRICT' not in info['name']:
                niagara_list[distict]['schools'][school] = clean_names[school]
print(niagara_list)

{'400701060000': {'schools': {}, 'name': 'NIAGARA WHEATFIELD SCH. DIST.', 'dist_key': 'NIAGARA-WHEATFIELD CENTRAL SCHOOL DISTRICT'}, '400301060000': {'schools': {}, 'name': 'LEW-PORT SCHOOL DISTRICT', 'dist_key': 'LEWISTON-PORTER CENTRAL SCHOOL DISTRICT'}, '401501060000': {'schools': {}, 'name': 'WILSON SCHOOL DISTRICT', 'dist_key': 'WILSON CENTRAL SCHOOL DISTRICT'}, '400800010000': {'schools': {}, 'name': 'NIAGARA FALLS SCHOOL DISTRICT', 'dist_key': 'NIAGARA FALLS CITY SCHOOL DISTRICT'}, '401001060000': {'schools': {}, 'name': 'STARPOINT SCHOOL DISTRICT', 'dist_key': 'STARPOINT CENTRAL SCHOOL DISTRICT'}, '400400010000': {'schools': {}, 'name': 'LOCKPORT SCHOOL DISTRICT', 'dist_key': 'LOCKPORT CITY SCHOOL DISTRICT'}, '400601060000': {'schools': {}, 'name': 'NEWFANE SCHOOL DISTRICT', 'dist_key': 'NEWFANE CENTRAL SCHOOL DISTRICT'}, '400900010000': {'schools': {}, 'name': 'NORTH TONAWANDA SCH. DIST.', 'dist_key': 'NORTH TONAWANDA CITY SCHOOL DISTRICT'}, '401201060000': {'schools': {}, 'na

In [18]:
def percent_change(new,old):
    try:
        calculate = (new-old)/old
        percent = calculate * 100
        one_decimal = float("{0:.1f}".format(percent))
        return one_decimal
    except (ZeroDivisionError,TypeError):
        return '-'

Exporting for print

In [23]:
erie_districts = 0
niagara_districts = 0
charter_districts = 0
def export_county_schools(county_list, county):
    print('Begging {0}'.format(county))
    county_count = 0
    if county != 'charter':
        #Sort the county of district id's by its name value.
        sorted_county = sorted(county_list, key= lambda district: county_list[district]['name'])
        for district in sorted_county:
            county_count += 1
            #print('*** starting district {0}'.format(county_list[district]['name']))
            pc_ela_15_17 = percent_change(year_17[district]['ela']['total_percent'],year_15[district]['ela']['total_percent'])
            pc_ela_16_17 = percent_change(year_17[district]['ela']['total_percent'],year_16[district]['ela']['total_percent'])
            pc_math_15_17 = percent_change(year_17[district]['math']['total_percent'],year_15[district]['math']['total_percent'])
            pc_math_16_17 = percent_change(year_17[district]['math']['total_percent'],year_16[district]['math']['total_percent'])
            district_info = [county_list[district]['name'], clean_names[district], year_15[district]['ela']['total_percent'],year_16[district]['ela']['total_percent'],year_17[district]['ela']['total_percent'],year_15[district]['math']['total_percent'],year_16[district]['math']['total_percent'],year_17[district]['math']['total_percent'], pc_ela_15_17, pc_ela_16_17, pc_math_15_17, pc_math_16_17]
            writer.writerow(district_info)
            schools_info = county_list[district]['schools']
            sorted_schools = sorted(schools_info, key = lambda school: schools_info[school])
            #Now go through each school in the district and calculate its percent change.
            for school in sorted_schools:
                    pc_school_ela_15_17 = percent_change(year_17[school]['ela']['total_percent'],year_15[school]['ela']['total_percent'])
                    pc_school_ela_16_17 = percent_change(year_17[school]['ela']['total_percent'],year_16[school]['ela']['total_percent'])
                    pc_school_math_15_17 = percent_change(year_17[school]['math']['total_percent'],year_15[school]['math']['total_percent'])
                    pc_school_math_16_17 = percent_change(year_17[school]['math']['total_percent'],year_16[school]['math']['total_percent'])
                    school_info = [county_list[district]['name'], clean_names[school], year_15[school]['ela']['total_percent'],year_16[school]['ela']['total_percent'],year_17[school]['ela']['total_percent'],year_15[school]['math']['total_percent'],year_16[school]['math']['total_percent'],year_17[school]['math']['total_percent'], pc_school_ela_15_17, pc_school_ela_16_17, pc_school_math_15_17, pc_school_math_16_17]
                    writer.writerow(school_info)
    else:
        #Charter schools only have one level aka direct to school data.
        sorted_county = sorted(county_list, key= lambda district: county_list[district])
        for district in sorted_county:
            county_count += 1
            #print('*** starting district {0}'.format(county_list[district]))
            pc_ela_15_17 = percent_change(year_17[district]['ela']['total_percent'],year_15[district]['ela']['total_percent'])
            pc_ela_16_17 = percent_change(year_17[district]['ela']['total_percent'],year_16[district]['ela']['total_percent'])
            pc_math_15_17 = percent_change(year_17[district]['math']['total_percent'],year_15[district]['math']['total_percent'])
            pc_math_16_17 = percent_change(year_17[district]['math']['total_percent'],year_16[district]['math']['total_percent'])
            district_info = [year_17[district]['district'], clean_names[district], year_15[district]['ela']['total_percent'],year_16[district]['ela']['total_percent'],year_17[district]['ela']['total_percent'],year_15[district]['math']['total_percent'],year_16[district]['math']['total_percent'],year_17[district]['math']['total_percent'], pc_ela_15_17, pc_ela_16_17, pc_math_15_17, pc_math_16_17]
            writer.writerow(district_info)
    return county_count

with open('print_out.tsv', 'w') as out_put:
    writer = csv.writer(out_put, delimiter='\t')
    writer.writerow(['district','school','ELA 2015', 'ELA 2016', 'ELA 2017', 'Math 2015', 'Math 2016', 'Math 2017', 'pc_ela_15_17', 'pc_ela_16_17', 'pc_math_15_17', 'pc_math_16_17'])
    erie_districts = export_county_schools(erie_list, 'erie')
    niagara_districts = export_county_schools(niagara_list, 'niagara')
    charter_districts = export_county_schools(charter_list, 'charter')
    print('{0} erie districts and {1} niagara districts {2} charters'.format(erie_districts,niagara_districts,charter_districts))

Begging erie
Begging niagara
Begging charter
28 erie districts and 10 niagara districts 15 charters


Online needs JSON in the clean_json groups.

In [33]:
print(erie_list['140201060000'])

{'name': 'AMHERST', 'dist_key': 'AMHERST CENTRAL SCHOOL DISTRICT', 'schools': {'140201060006': 'Windermere', '140201060001': 'Amherst Middle', '140201060005': 'Smallwood'}}


In [32]:
erie_districts = 0
niagara_districts = 0
clean_json = {'Erie': {}, 'Niagara': {}, 'Buffalo': {}, 'Charters': {}}
def export_county_schools(county_list, county):
    ordered = []
    county_count = 0
    #Sort the county of district id's by its name value.
    if county != 'Charters':
        if county == 'Erie':
            ordered.append(['140000000000', 'Erie County'])
        elif county == 'Niagara':
            ordered.append(['400000000000', 'Niagara County'])
        sorted_county = sorted(county_list, key= lambda district: county_list[district]['name'])
        for district in sorted_county:
            #Buffalo gets thrown in its own group
            if district != '140600010000':
                county_count += 1
                #print('*** starting district {0}'.format(county_list[district]['name']))
                schools_info = county_list[district]['schools']
                ordered.append([district,clean_names[district]])
                sorted_schools = sorted(schools_info, key = lambda school: schools_info[school])
                #Now go through each school in the district and calculate its percent change.
                for school in sorted_schools:
                    try:
                        #print('writing the following {0}'.format(schools_info[school]))
                        ordered.append([school,clean_names[school]])
                    except KeyError:
                        #Beds code isn't found in one of the years for this school
                        print('Missing school in 2013, 2014, 2015, 2016 and/or 2017')
                        print(schools_info[school])
                        print(school)
            else:
                buffalo_list = []
                buffalo_list.append([district,clean_names[district]])
                schools_info = county_list[district]['schools']
                sorted_schools = sorted(schools_info, key = lambda school: schools_info[school])
                #Now go through each school in the district and calculate its percent change.
                for school in sorted_schools:
                    try:
                        #print('writing the following {0}'.format(schools_info[school]))
                        buffalo_list.append([school,clean_names[school]])
                    except KeyError:
                        #Beds code isn't found in one of the years for this school
                        print('Missing school in 2013, 2014, 2015, 2016 and/or 2017')
                        print(schools_info[school])
                        print(school)
                clean_json['Buffalo']['ordered_schools'] = buffalo_list
    else:
        ordered.append(['8686868686868', 'Charters Average'])
        sorted_county = sorted(county_list, key= lambda district: county_list[district])
        for district in sorted_county:
            county_count += 1
            print('*** starting district {0}'.format(county_list[district]))
            ordered.append([district,county_list[district]])
    clean_json[county]['ordered_schools'] = ordered
eried_list = export_county_schools(erie_list, 'Erie')

niagarad_list = export_county_schools(niagara_list, 'Niagara')
chartered_list = export_county_schools(charter_list, 'Charters')
print(clean_json['Charters'])

*** starting district Bflo. Academy of Science
*** starting district Buffalo United
*** starting district Charter Sch. for App. Tech.
*** starting district Charter School of Inquiry
*** starting district Elmwood Village
*** starting district Enterprise
*** starting district Global Concepts
*** starting district Johnson
*** starting district King Center
*** starting district Niagara Charter
*** starting district South Buffalo
*** starting district Tapestry
*** starting district WNY Maritime Charter
*** starting district West Buffalo
*** starting district Westminster
{'ordered_schools': [['8686868686868', 'Charters Average'], ['140600860861', 'Bflo. Academy of Science'], ['140600860851', 'Buffalo United'], ['142601860031', 'Charter Sch. for App. Tech.'], ['140600861072', 'Charter School of Inquiry'], ['140600860896', 'Elmwood Village'], ['140600860856', 'Enterprise'], ['141800860044', 'Global Concepts'], ['140600860911', 'Johnson'], ['140600860814', 'King Center'], ['400701860890', 'Niag

In [13]:
def year_build(school_dict, year_dict, year):
    if school[0] in year_dict:
        #Only output dictionary if the school has test scores for that year.
        if len(year_dict[school[0]]['math']['classes']) != 0 or len(year_dict[school[0]]['ela']['classes']) != 0:
            year_info = year_dict[school[0]]
            school_info['district'] = year_info['district']
            school_info['name'] = clean_names[school[0]]
            math_year = {}
            math_year['total'] = str(year_info['math']['total_percent'])
            for i, item in enumerate(year_info['math']['classes']):
                clean_class = year_info['math']['classes'][i][0:7]
                try:
                    percent_profficient = (year_info['math']['proficient'][i] / year_info['math']['totals'][i]) * 100
                    math_year[clean_class] = str(float("{0:.1f}".format(percent_profficient)))
                except ZeroDivisionError:
                    math_year[clean_class] = '-'
            school_dict['math'][year] = math_year
            ela_year = {}
            ela_year['total'] = str(year_info['ela']['total_percent'])
            for i, item in enumerate(year_info['ela']['classes']):
                clean_class = year_info['ela']['classes'][i][0:7]
                try:
                    percent_profficient = (year_info['ela']['proficient'][i] / year_info['ela']['totals'][i]) * 100
                    ela_year[clean_class] = str(float("{0:.1f}".format(percent_profficient)))
                except ZeroDivisionError:
                    ela_year[clean_class] = '-'
            school_dict['ela'][year] = ela_year
    else:
        print('Missing {0} - {2} in {1}'.format(school[0], year, year_17[school[0]]['name']))

In [33]:
for group, value in clean_json.items():
    print ('**Starting {0}'.format(group))
    #print('values {0}'.format(value))
    schools = {}
    for school in value['ordered_schools']:
        school_info = {}
        school_info['math'] = {}
        school_info['ela'] = {}
        if school[0] == '8686868686868':
            #Charters
            print('hit charter average')
        else:
            year_build(school_info,year_17, '2017')
            year_build(school_info,year_16, '2016')
            year_build(school_info,year_15, '2015')
            year_build(school_info,year_14, '2014')
            year_build(school_info,year_13, '2013')
        schools[school[0]] = school_info
    clean_json[group]['schools'] = schools
            
print(clean_json['Charters']['schools']['8686868686868'])
print('((**(**(***()))))')
print(clean_json['Charters']['schools']['140600860861'])
print(clean_json['Charters']['ordered_schools'])
print('***************')
print(clean_json['Charters']['schools'])


**Starting Erie
**Starting Buffalo
**Starting Niagara
**Starting Charters
hit charter average
{'math': {}, 'ela': {}}
((**(**(***()))))
{'math': {'2013': {'total': '10.2', 'Grade 7': '3.3', 'Grade 8': '18.8'}, '2016': {'total': '26.7', 'Grade 7': '22.9', 'Grade 8': '29.8'}, '2015': {'total': '25.9', 'Grade 7': '12.0', 'Grade 8': '37.9'}, '2014': {'total': '20.0', 'Grade 7': '20.3', 'Grade 8': '19.7'}, '2017': {'total': '33.5', 'Grade 5': '37.5', 'Grade 7': '23.5', 'Grade 8': '38.7'}}, 'name': 'Bflo. Academy of Science', 'district': 'BUFFALO CITY SCHOOL DISTRICT', 'ela': {'2013': {'total': '13.5', 'Grade 7': '12.7', 'Grade 8': '14.6'}, '2016': {'total': '31.5', 'Grade 7': '17.3', 'Grade 8': '44.6'}, '2015': {'total': '10.9', 'Grade 7': '5.8', 'Grade 8': '15.5'}, '2014': {'total': '15.7', 'Grade 7': '16.9', 'Grade 8': '14.5'}, '2017': {'total': '35.4', 'Grade 5': '34.7', 'Grade 7': '23.1', 'Grade 8': '46.0'}}}
[['8686868686868', 'Charters Average'], ['140600860861', 'Bflo. Academy of Sci

In [67]:
def calculate_charter_average():
    """from statistics import mean
l = [15, 18, 2, 36, 12, 78, 5, 6, 9]
mean(l)"""
    charter_average = {'math': {}, 'ela': {}}
    check = 0
    #Get all values added to the correct subject/year/grade list
    for charter,charter_info in clean_json['Charters']['schools'].items():
        if charter != '8686868686868':
            print(charter)
            print(charter_info)
            print('*****')
            print(charter_average)
            for subject in charter_average.keys():
                subject_years = charter_info[subject].keys()
                for year in subject_years:
                    grades = charter_info[subject][year].keys()
                    for grade in grades:
                        if grade != 'total':
                            if grade == 'Grade 7' and year == '2013' and subject == 'ela':
                                check += 1
                            if year not in charter_average[subject]:
                                charter_average[subject][year] = {grade: [float(charter_info[subject][year][grade])]}
                            elif grade not in charter_average[subject][year]:
                                charter_average[subject][year][grade] = [float(charter_info[subject][year][grade])]
                            else:
                                charter_average[subject][year][grade].append(float(charter_info[subject][year][grade]))
    print(len(charter_average['ela']['2013']['Grade 7']))
    print(check)
    #Start calculating the correct mean
    from statistics import mean
    for subject in charter_average:
        for year in charter_average[subject]:
            mean_grades = []
            for grade in charter_average[subject][year]:
                print(charter_average[subject][year][grade])
                mean_score = "{0:.1f}".format(mean(charter_average[subject][year][grade]))
                print(mean_score)
                charter_average[subject][year][grade] = mean_score
                mean_grades.append(float(mean_score))
            charter_average[subject][year]['total'] = mean(mean_grades)
    
    clean_json['Charters']['schools']['8686868686868'] = charter_average
    clean_json['Charters']['schools']['8686868686868']['name'] = 'Charter Average'
                        
calculate_charter_average()

140600860911
{'math': {'2013': {'total': '10.9', 'Grade 3': '10.0', 'Grade 4': '11.9'}, '2016': {'total': '17.7', 'Grade 3': '20.0', 'Grade 4': '14.6'}, '2015': {'total': '26.2', 'Grade 3': '18.5', 'Grade 4': '34.7'}, '2014': {'total': '20.4', 'Grade 3': '24.5', 'Grade 4': '16.4'}, '2017': {'total': '18.9', 'Grade 3': '20.8', 'Grade 4': '17.2'}}, 'name': 'Johnson', 'district': 'BUFFALO CITY SCHOOL DISTRICT', 'ela': {'2013': {'total': '12.6', 'Grade 3': '8.3', 'Grade 4': '16.9'}, '2016': {'total': '18.6', 'Grade 3': '20.0', 'Grade 4': '16.7'}, '2015': {'total': '13.6', 'Grade 3': '11.1', 'Grade 4': '16.3'}, '2014': {'total': '7.4', 'Grade 3': '11.3', 'Grade 4': '3.6'}, '2017': {'total': '19.8', 'Grade 3': '15.4', 'Grade 4': '23.7'}}}
*****
{'math': {}, 'ela': {}}
140600860856
{'math': {'2013': {'Grade 8': '7.5', 'Grade 5': '11.4', 'Grade 7': '2.2', 'Grade 3': '0.0', 'total': '8.4', 'Grade 6': '19.1', 'Grade 4': '8.7'}, '2016': {'Grade 8': '2.4', 'Grade 5': '8.9', 'Grade 7': '5.3', 'Grad

In [68]:
print(clean_json['Charters']['schools']['8686868686868'])

{'math': {'2013': {'total': 22.116666666666667, 'Grade 7': '12.1', 'Grade 3': '23.3', 'Grade 5': '22.3', 'Grade 4': '31.2', 'Grade 6': '24.5', 'Grade 8': '19.3'}, '2016': {'total': 26.45, 'Grade 7': '14.3', 'Grade 3': '35.4', 'Grade 5': '27.8', 'Grade 4': '33.0', 'Grade 6': '30.1', 'Grade 8': '18.1'}, '2017': {'total': 27.566666666666666, 'Grade 7': '17.7', 'Grade 3': '39.1', 'Grade 5': '27.8', 'Grade 4': '32.3', 'Grade 6': '33.7', 'Grade 8': '14.8'}, '2014': {'total': 25.016666666666666, 'Grade 7': '20.8', 'Grade 3': '30.0', 'Grade 5': '24.2', 'Grade 4': '31.3', 'Grade 6': '27.8', 'Grade 8': '16.0'}, '2015': {'total': 26.266666666666666, 'Grade 7': '17.3', 'Grade 3': '34.4', 'Grade 5': '27.4', 'Grade 4': '36.0', 'Grade 6': '24.5', 'Grade 8': '18.0'}}, 'name': 'Charter Average', 'ela': {'2013': {'total': 17.95, 'Grade 7': '14.9', 'Grade 3': '18.1', 'Grade 5': '17.7', 'Grade 4': '20.7', 'Grade 6': '19.1', 'Grade 8': '17.2'}, '2016': {'total': 25.85, 'Grade 7': '20.0', 'Grade 3': '33.9',

In [69]:
print(clean_json['Erie']['schools']['140000000000'])
print(clean_json['Erie']['ordered_schools'])

import json
with open('data.json', 'w') as output:
    json.dump(clean_json, output)

{'math': {'2013': {'Grade 8': '28.2', 'Grade 5': '27.4', 'Grade 7': '26.7', 'Grade 3': '33.4', 'total': '30.2', 'Grade 6': '29.0', 'Grade 4': '36.8'}, '2016': {'Grade 8': '16.8', 'Grade 5': '38.1', 'Grade 7': '29.7', 'Grade 3': '40.3', 'total': '35.0', 'Grade 6': '36.4', 'Grade 4': '41.3'}, '2015': {'Grade 8': '16.6', 'Grade 5': '40.7', 'Grade 7': '30.8', 'Grade 3': '39.3', 'total': '35.2', 'Grade 6': '34.0', 'Grade 4': '42.7'}, '2014': {'Grade 8': '17.3', 'Grade 5': '36.0', 'Grade 7': '30.7', 'Grade 3': '40.9', 'total': '33.6', 'Grade 6': '33.3', 'Grade 4': '39.8'}, '2017': {'Grade 8': '13.3', 'Grade 5': '40.0', 'Grade 7': '33.1', 'Grade 3': '43.2', 'total': '36.2', 'Grade 6': '38.7', 'Grade 4': '39.3'}}, 'name': 'Erie County', 'district': 'ERIE COUNTY', 'ela': {'2013': {'Grade 8': '36.6', 'Grade 5': '28.4', 'Grade 7': '32.3', 'Grade 3': '32.0', 'total': '32.0', 'Grade 6': '32.6', 'Grade 4': '30.2'}, '2016': {'Grade 8': '35.5', 'Grade 5': '29.5', 'Grade 7': '30.4', 'Grade 3': '39.0', 