# Calculating percent proficiency and its percent change in New York State education assessment data

After downloading the [2015, 2016 and 2017 data from the New York State Education Department](https://data.nysed.gov/downloads.php) and renaming the files to 3-8\_ELA\_MATH\_{{ year }}.csv, this program calculates the percent proficient, scoring at Level 3 or 4, of each school. The percent proficient is calculated by adding each raw proficient count together for available grades and then dividing the total test takers. 

In [11]:
import agate
import csv

Due to missing school values and changes in data columns, I needed two different typetesters to force the columns into their correct formatting.

In [12]:
tester_15 = agate.TypeTester(limit=100,force={
    'NRC_DESC': agate.Text(),
    'NRC_CODE': agate.Number(),
        'BEDSCODE': agate.Text(),
        'L1_COUNT': agate.Text(),
        'L1_PCT': agate.Text(),
        'L2_COUNT': agate.Text(),
        'L2_PCT': agate.Text(),
        'L3_COUNT': agate.Text(),
        'L3_PCT': agate.Text(),
        'L4_COUNT': agate.Text(),
        'L4_PCT': agate.Text(),
        'L2-L4_PCT': agate.Text(),
        'L3-L4_PCT': agate.Text(),
        'MEAN_SCALE_SCORE': agate.Text(),
        'COUNTY_DESC': agate.Text(),
        'TOTAL_TESTED': agate.Text(),
        'COUNTY_CODE': agate.Text(),
        'Sum_Of_SCALE_SCORE': agate.Text()
})
tester_16 = agate.TypeTester(limit=100,force={
    'NRC_DESC': agate.Text(),
    'NRC_CODE': agate.Number(),
        'BEDSCODE': agate.Text(),
        'L1_COUNT': agate.Text(),
        'L1_PCT': agate.Text(),
        'L2_COUNT': agate.Text(),
        'L2_PCT': agate.Text(),
        'L3_COUNT': agate.Text(),
        'L3_PCT': agate.Text(),
        'L4_COUNT': agate.Text(),
        'L4_PCT': agate.Text(),
        'L2-L4_PCT': agate.Text(),
        'L3-L4_PCT': agate.Text(),
        'MEAN_SCALE_SCORE': agate.Text(),
        'COUNTY_DESC': agate.Text(),
        'TOTAL_TESTED': agate.Text(),
        'COUNTY_CODE': agate.Text()
})

This program takes in the csv of a specific year's assesment data and its tester and returns a dictionary of BEDS (state id codes) and test data.

In [27]:
def get_percent_dictionary(file_name, tester):
    schools = agate.Table.from_csv(file_name, column_types=tester)
    #Grab just Erie and Niagara schools, filtering out available subgroups and total county stats
    erie_niagara = schools.where(lambda row: row['COUNTY_DESC'] in ['ERIE', 'NIAGARA']).where(lambda row: row['SUBGROUP_NAME'] in ['All Students']).where(lambda row: row['NAME'] not in ['ERIE COUNTY', 'NIAGARA COUNTY'])
    print(len(erie_niagara.rows))
    #Data doesn't include district information, but the first six digits of a school's BEDS codes includes that info.
    beds_code = {}
    for row in erie_niagara.rows:
        if 'SCHOOL DISTRICT' in row['NAME']:
            beds_district = row['BEDSCODE'][:6]
            #Creates a lookup table of ids for the district name
            beds_code[str(beds_district)] = row['NAME']
    def calculate_prof_counts(row):
        #Some schools don't have test takers for certain tests, so fill in with zeros.
        try:
            l3_counts = int(row['L3_COUNT'])
        except ValueError:
            l3_counts = 0
        try:
            l4_counts = int(row['L4_COUNT'])
        except ValueError:
            l4_counts = 0
        proficient = l3_counts + l4_counts
        return proficient
    erie_calcs = erie_niagara.compute([
        ('proficient_count', agate.Formula(agate.Number(), lambda row: calculate_prof_counts(row)))
    ])
    # Form the data structure for each school with beds code as a key
    schools_info = {}
    for row in erie_calcs.rows:
        if row['NAME'] == 'NIAGARA CHARTER SCHOOL':
            district = 'NIAGARA-WHEATFIELD CENTRAL SCHOOL DISTRICT'
        else:
            beds_district = row['BEDSCODE'][:6]
            district = beds_code[beds_district]
        """name = School name
        math/ela dictionary has totals = total test takers one per applicable class; proficient = proficient_count values; classes = grades taking the test
        district = district name (looked up using the beds_code dict)
        county = county"""
        schools_info[row['BEDSCODE']] = {'name': row['NAME'], 'math': {'totals': [], 'proficient': [], 'classes': []}, 'ela': {'totals': [], 'proficient': [], 'classes': []}, 'district': district, 'county': row['COUNTY_DESC'] }
    # After creating the correct dict format, now I run through the schools again to fill the score values.
    for row in erie_calcs.rows:
        if row['ITEM_SUBJECT_AREA'] == 'ELA':
            data_dict = schools_info[row['BEDSCODE']]['ela']
        else:
            data_dict = schools_info[row['BEDSCODE']]['math']
        try:
            data_dict['totals'].append(int(row['TOTAL_TESTED']))
        except ValueError:
            data_dict['totals'].append(0)
        try:
            data_dict['classes'].append(row['ITEM_DESC'])
        except ValueError:
            data_dict['classes'].append(0)
        try:
            data_dict['proficient'].append(int(row['proficient_count']))
        except ValueError:
            data_dict['proficient'].append(0)
    def calculate_prof_percent(test_area):
        total_sum = sum(values[test_area]['totals'])
        proficient_sum = sum(values[test_area]['proficient'])
        try:
            correct_value = ((proficient_sum / total_sum) * 100)
            schools_info[school][test_area]['total_percent'] = float("{0:.1f}".format(correct_value))
        except ZeroDivisionError:
            schools_info[school][test_area]['total_percent'] = '-'
    for school, values in schools_info.items():
        calculate_prof_percent('ela')
        calculate_prof_percent('math')
    return schools_info

In [28]:
year_16 = get_percent_dictionary('3-8_ELA_AND_MATH_2016.csv', tester_16)


1837


In [15]:
year_17 = get_percent_dictionary('3-8_ELA_AND_MATH_2017.csv', tester_16)

1815


In [16]:
year_15 = get_percent_dictionary('3-8_ELA_AND_MATH_2015.csv',tester_15)

1852


Example of each year returning data for a specific school

In [25]:
print(year_16['400400010001'])
print(year_15['400400010001'])
print(year_17['400400010001'])
print('****')
print(year_16['140701060000'])

{'math': {'proficient': [29, 21], 'total_percent': 41.3, 'classes': ['Grade 3 Math', 'Grade 4 Math'], 'totals': [67, 54]}, 'ela': {'proficient': [22, 11], 'total_percent': 25.2, 'classes': ['Grade 3 ELA', 'Grade 4 ELA'], 'totals': [69, 62]}, 'district': 'LOCKPORT CITY SCHOOL DISTRICT', 'county': 'NIAGARA', 'name': 'ANNA MERRITT ELEMENTARY SCHOOL'}
{'math': {'proficient': [15, 29], 'total_percent': 37.0, 'classes': ['Grade 3 Math', 'Grade 4 Math'], 'totals': [57, 62]}, 'ela': {'proficient': [11, 12], 'total_percent': 17.8, 'classes': ['Grade 3 ELA', 'Grade 4 ELA'], 'totals': [60, 69]}, 'district': 'LOCKPORT CITY SCHOOL DISTRICT', 'county': 'NIAGARA', 'name': 'ANNA MERRITT ELEMENTARY SCHOOL'}
{'math': {'proficient': [23, 19], 'total_percent': 36.8, 'classes': ['Grade 3 Math', 'Grade 4 Math'], 'totals': [52, 62]}, 'ela': {'proficient': [11, 16], 'total_percent': 22.9, 'classes': ['Grade 3 ELA', 'Grade 4 ELA'], 'totals': [53, 65]}, 'district': 'LOCKPORT CITY SCHOOL DISTRICT', 'county': 'NI

Separate each school into its county and district for print order

In [45]:
print('checking 2017 {0} 2016'.format(len(year_16)))
for school in year_17:
    if school not in year_16:
        print('not in 2016')
        year_16[school] = {'name': year_17[school]['name'], 'county': year_17[school]['county'], 'district': year_17[school]['district'], 'math': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}, 'ela': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}}
    if school not in year_15:
        print('not in 2015')
        year_15[school] = {'name': year_17[school]['name'], 'county': year_17[school]['county'], 'district': year_17[school]['district'], 'math': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}, 'ela': {'totals': [], 'proficient': [], 'classes': [], 'total_percent': '-'}}
print('checking 2016 {0}'.format(len(year_16)))
for school in year_16:
    if school not in year_17:
        print ('Not in 2017 {0} {1}'.format(school, year_16[school]['name']))
    if school not in year_15:
        print ('Not in 2015 {0} {1}'.format(school, year_16[school]['name']))
print('checking 2015')
for school in year_15:
    if school not in year_17:
        print ('Not in 2017 {0} {1}'.format(school, year_15[school]['name']))
    if school not in year_16:
        print ('Not in 2016 {0} {1}'.format(school, year_15[school]['name']))

checking 2017 245 2016
checking 2016 245
Not in 2017 140600010107 LAFAYETTE HIGH SCHOOL
Not in 2017 142601030022 KENMORE MIDDLE SCHOOL
Not in 2017 140101060003 ALDEN PRIMARY AT TOWNLINE
Not in 2017 142601030019 THEODORE ROOSEVELT ELEMENTARY SCHOOL
Not in 2017 140600010039 DR MARTIN LUTHER KING, JR MULTICULTURAL INSTITUTE
Not in 2017 142601030002 ALEXANDER HAMILTON ELEMENTARY SCHOOL
checking 2015
Not in 2017 140600010107 LAFAYETTE HIGH SCHOOL
Not in 2017 142601030022 KENMORE MIDDLE SCHOOL
Not in 2017 140101060003 ALDEN PRIMARY AT TOWNLINE
Not in 2017 142601030019 THEODORE ROOSEVELT ELEMENTARY SCHOOL
Not in 2017 140600010039 DR MARTIN LUTHER KING, JR MULTICULTURAL INSTITUTE
Not in 2017 400301060006 LEWISTON PORTER SENIOR HIGH SCHOOL
Not in 2016 400301060006 LEWISTON PORTER SENIOR HIGH SCHOOL
Not in 2017 142601030002 ALEXANDER HAMILTON ELEMENTARY SCHOOL
Not in 2017 142801060001 POTTERS ROAD SCHOOL
Not in 2016 142801060001 POTTERS ROAD SCHOOL


In [21]:
erie_list = {}
niagara_list = {}
for key, value in year_17.items():
    if 'SCHOOL DISTRICT' in value['name']:
        if value['county'] == 'ERIE':
            erie_list[key] = {'name': value['name'], 'schools': {}}
        else:
            niagara_list[key] = {'name': value['name'], 'schools': {}}
for distict, value in erie_list.items():
    for school, info in year_17.items():
        if value['name'] == info['district'] and 'SCHOOL DISTRICT' not in info['name']:
            erie_list[distict]['schools'][school] = info['name']
for distict, value in niagara_list.items():
    for school, info in year_17.items():
        if value['name'] == info['district'] and 'SCHOOL DISTRICT' not in info['name']:
            niagara_list[distict]['schools'][school] = info['name']
print(erie_list['140701060000'])

{'name': 'CHEEKTOWAGA CENTRAL SCHOOL DISTRICT', 'schools': {'140701060004': 'UNION EAST ELEMENTARY SCHOOL', '140701060007': 'CHEEKTOWAGA MIDDLE SCHOOL', '140701060008': 'PINE HILL EDUCATION CENTER'}}


In [22]:
def percent_change(new,old):
    try:
        calculate = (new-old)/old
        percent = calculate * 100
        one_decimal = float("{0:.1f}".format(percent))
        return one_decimal
    except (ZeroDivisionError,TypeError):
        return '-'

Exporting for print

In [24]:
erie_districts = 0
niagara_districts = 0
def export_county_schools(county_list):
    county_count = 0
    #Sort the county of district id's by its name value.
    sorted_county = sorted(county_list, key= lambda district: county_list[district]['name'])
    for district in sorted_county:
        county_count += 1
        #print('*** starting district {0}'.format(county_list[district]['name']))
        pc_ela_15_17 = percent_change(year_17[district]['ela']['total_percent'],year_15[district]['ela']['total_percent'])
        pc_ela_16_17 = percent_change(year_17[district]['ela']['total_percent'],year_16[district]['ela']['total_percent'])
        pc_math_15_17 = percent_change(year_17[district]['math']['total_percent'],year_15[district]['math']['total_percent'])
        pc_math_16_17 = percent_change(year_17[district]['math']['total_percent'],year_16[district]['math']['total_percent'])
        district_info = [county_list[district]['name'], county_list[district]['name'], year_15[district]['ela']['total_percent'],year_16[district]['ela']['total_percent'],year_17[district]['ela']['total_percent'],year_15[district]['math']['total_percent'],year_16[district]['math']['total_percent'],year_17[district]['math']['total_percent'], pc_ela_15_17, pc_ela_16_17, pc_math_15_17, pc_math_16_17]
        writer.writerow(district_info)
        schools_info = county_list[district]['schools']
        sorted_schools = sorted(schools_info, key = lambda school: schools_info[school])
        #Now go through each school in the district and calculate its percent change.
        for school in sorted_schools:
                pc_school_ela_15_17 = percent_change(year_17[school]['ela']['total_percent'],year_15[school]['ela']['total_percent'])
                pc_school_ela_16_17 = percent_change(year_17[school]['ela']['total_percent'],year_16[school]['ela']['total_percent'])
                pc_school_math_15_17 = percent_change(year_17[school]['math']['total_percent'],year_15[school]['math']['total_percent'])
                pc_school_math_16_17 = percent_change(year_17[school]['math']['total_percent'],year_16[school]['math']['total_percent'])
                school_info = [county_list[district]['name'], schools_info[school], year_15[school]['ela']['total_percent'],year_16[school]['ela']['total_percent'],year_17[school]['ela']['total_percent'],year_15[school]['math']['total_percent'],year_16[school]['math']['total_percent'],year_17[school]['math']['total_percent'], pc_school_ela_15_17, pc_school_ela_16_17, pc_school_math_15_17, pc_school_math_16_17]
                writer.writerow(school_info)
    return county_count

with open('print_out.tsv', 'w') as out_put:
    writer = csv.writer(out_put, delimiter='\t')
    writer.writerow(['district','school','ELA 2015', 'ELA 2016', 'ELA 2017', 'Math 2015', 'Math 2016', 'Math 2017', 'pc_ela_15_17', 'pc_ela_16_17', 'pc_math_15_17', 'pc_math_16_17'])
    erie_districts = export_county_schools(erie_list)
    niagara_districts = export_county_schools(niagara_list)
    print('{0} erie districts and {1} niagara districts'.format(erie_districts,niagara_districts))

28 erie districts and 10 niagara districts


In [31]:
erie_districts = 0
niagara_districts = 0
clean_json = {'Erie': {}, 'Niagara': {}, 'Buffalo': {}}
def export_county_schools(county_list, county):
    ordered = []
    county_count = 0
    #Sort the county of district id's by its name value.
    sorted_county = sorted(county_list, key= lambda district: county_list[district]['name'])
    for district in sorted_county:
        #Buffalo gets thrown in its own group
        if district != '140600010000':
            county_count += 1
            print('*** starting district {0}'.format(county_list[district]['name']))
            schools_info = county_list[district]['schools']
            ordered.append([district,county_list[district]['name']])
            sorted_schools = sorted(schools_info, key = lambda school: schools_info[school])
            #Now go through each school in the district and calculate its percent change.
            for school in sorted_schools:
                #TODO ADD CONDITION FOR CHARTERS
                try:
                    print('writing the following {0}'.format(schools_info[school]))
                    ordered.append([school,schools_info[school]])
                except KeyError:
                    #Beds code isn't found in one of the years for this school
                    print('Missing school in 2015, 2016 and/or 2017')
                    print(schools_info[school])
                    print(school)
        else:
            buffalo_list = []
            buffalo_list.append([district,county_list[district]['name']])
            schools_info = county_list[district]['schools']
            sorted_schools = sorted(schools_info, key = lambda school: schools_info[school])
            #Now go through each school in the district and calculate its percent change.
            for school in sorted_schools:
                try:
                    print('writing the following {0}'.format(schools_info[school]))
                    buffalo_list.append([school,schools_info[school]])
                except KeyError:
                    #Beds code isn't found in one of the years for this school
                    print('Missing school in 2015, 2016 and/or 2017')
                    print(schools_info[school])
                    print(school)
            clean_json['Buffalo']['ordered_schools'] = buffalo_list
    clean_json[county]['ordered_schools'] = ordered
erie_list = export_county_schools(erie_list, 'Erie')
niagara_list = export_county_schools(niagara_list, 'Niagara')

*** starting district AKRON CENTRAL SCHOOL DISTRICT
writing the following AKRON ELEMENTARY SCHOOL
writing the following AKRON MIDDLE SCHOOL
*** starting district ALDEN CENTRAL SCHOOL DISTRICT
writing the following ALDEN INTERMEDIATE SCHOOL
writing the following ALDEN MIDDLE SCHOOL
*** starting district AMHERST CENTRAL SCHOOL DISTRICT
writing the following AMHERST MIDDLE SCHOOL
writing the following SMALLWOOD DRIVE SCHOOL
writing the following WINDERMERE BLVD SCHOOL
writing the following ALOMA D JOHNSON CHARTER SCHOOL
writing the following ALTERNATIVE HIGH SCHOOL AT 4
writing the following BENNETT PARK MONTESSORI SCHOOL
writing the following BILINGUAL CENTER
writing the following BUFFALO ACADEMY FOR THE VISUAL & PERFORMING ARTS
writing the following BUFFALO ACADEMY OF SCIENCE CHARTER SCHOOL
writing the following BUFFALO ELEMENTARY SCHOOL OF TECHNOLOGY
writing the following BUFFALO UNITED CHARTER SCHOOL
writing the following BUILD ACADEMY
writing the following CHARTER SCHOOL OF INQUIRY
w

In [46]:
def year_build(school_dict, year_dict, year):
    if school[0] in year_dict:
        if len(year_dict[school[0]]['math']['classes']) != 0 or len(year_dict[school[0]]['ela']['classes']) != 0:
            year_info = year_dict[school[0]]
            school_info['district'] = year_info['district']
            school_info['name'] = year_info['name']
            math_year = {}
            math_year['total'] = str(year_info['math']['total_percent'])
            for i, item in enumerate(year_info['math']['classes']):
                clean_class = year_info['math']['classes'][i][0:7]
                try:
                    percent_profficient = (year_info['math']['proficient'][i] / year_info['math']['totals'][i]) * 100
                    math_year[clean_class] = str(float("{0:.1f}".format(percent_profficient)))
                except ZeroDivisionError:
                    math_year[clean_class] = '-'
            school_dict['math'][year] = math_year
            ela_year = {}
            ela_year['total'] = str(year_info['ela']['total_percent'])
            for i, item in enumerate(year_info['ela']['classes']):
                clean_class = year_info['ela']['classes'][i][0:7]
                try:
                    percent_profficient = (year_info['ela']['proficient'][i] / year_info['ela']['totals'][i]) * 100
                    ela_year[clean_class] = str(float("{0:.1f}".format(percent_profficient)))
                except ZeroDivisionError:
                    ela_year[clean_class] = '-'
            school_dict['ela'][year] = ela_year
    else:
        print('Missing {0} - {2} in {1}'.format(school[0], year, year_17[school[0]]['name']))

In [48]:
for group, value in clean_json.items():
    print ('**Starting {0}'.format(group))
    #print('values {0}'.format(value))
    schools = {}
    for school in value['ordered_schools']:
        school_info = {}
        school_info['math'] = {}
        school_info['ela'] = {}
        year_build(school_info,year_17, '2017')
        year_build(school_info,year_16, '2016')
        year_build(school_info,year_15, '2015')
        schools[school[0]] = school_info
    clean_json[group]['schools'] = schools
            
print(clean_json['Erie']['schools']['140101060007'])
import json
with open('data.json', 'w') as output:
    json.dump(clean_json, output)

**Starting Niagara
**Starting Erie
Missing 140101060007 - ALDEN INTERMEDIATE SCHOOL in 2016 so not included in the year
Missing 140101060007 - ALDEN INTERMEDIATE SCHOOL in 2015 so not included in the year
Missing 142601030025 - KENMORE EAST SENIOR HIGH SCHOOL in 2016 so not included in the year
Missing 142601030025 - KENMORE EAST SENIOR HIGH SCHOOL in 2015 so not included in the year
Missing 142601030026 - KENMORE WEST SENIOR HIGH SCHOOL in 2016 so not included in the year
Missing 142601030026 - KENMORE WEST SENIOR HIGH SCHOOL in 2015 so not included in the year
**Starting Buffalo
Missing 140600861072 - CHARTER SCHOOL OF INQUIRY in 2016 so not included in the year
Missing 140600861072 - CHARTER SCHOOL OF INQUIRY in 2015 so not included in the year
Missing 140600010309 - NEWCOMER ACADEMY AT LAFAYETTE in 2016 so not included in the year
Missing 140600010309 - NEWCOMER ACADEMY AT LAFAYETTE in 2015 so not included in the year
Missing 140600860863 - WESTERN NEW YORK MARITIME CHARTER SCHOOL 