# Calculating percent proficiency and its percent change in New York State education assessment data

After downloading the [2015, 2016 and 2017 data from the New York State Education Department](https://data.nysed.gov/downloads.php) and renaming the files to 3-8\_ELA\_MATH\_{{ year }}.csv, this program calculates the percent proficient, scoring at Level 3 or 4, of each school. The percent proficient is calculated by adding each raw proficient count together for available grades and then dividing the total test takers. 

In [229]:
import agate
import csv

Due to missing school values and changes in data columns, I needed two different typetesters to force the columns into their correct formatting.

In [113]:
tester_15 = agate.TypeTester(limit=100,force={
    'NRC_DESC': agate.Text(),
    'NRC_CODE': agate.Number(),
        'BEDSCODE': agate.Text(),
        'L1_COUNT': agate.Text(),
        'L1_PCT': agate.Text(),
        'L2_COUNT': agate.Text(),
        'L2_PCT': agate.Text(),
        'L3_COUNT': agate.Text(),
        'L3_PCT': agate.Text(),
        'L4_COUNT': agate.Text(),
        'L4_PCT': agate.Text(),
        'L2-L4_PCT': agate.Text(),
        'L3-L4_PCT': agate.Text(),
        'MEAN_SCALE_SCORE': agate.Text(),
        'COUNTY_DESC': agate.Text(),
        'TOTAL_TESTED': agate.Text(),
        'COUNTY_CODE': agate.Text(),
        'Sum_Of_SCALE_SCORE': agate.Text()
})
tester_16 = agate.TypeTester(limit=100,force={
    'NRC_DESC': agate.Text(),
    'NRC_CODE': agate.Number(),
        'BEDSCODE': agate.Text(),
        'L1_COUNT': agate.Text(),
        'L1_PCT': agate.Text(),
        'L2_COUNT': agate.Text(),
        'L2_PCT': agate.Text(),
        'L3_COUNT': agate.Text(),
        'L3_PCT': agate.Text(),
        'L4_COUNT': agate.Text(),
        'L4_PCT': agate.Text(),
        'L2-L4_PCT': agate.Text(),
        'L3-L4_PCT': agate.Text(),
        'MEAN_SCALE_SCORE': agate.Text(),
        'COUNTY_DESC': agate.Text(),
        'TOTAL_TESTED': agate.Text(),
        'COUNTY_CODE': agate.Text()
})

This program takes in the csv of a specific year's assesment data and its tester and returns a dictionary of BEDS (state id codes) and test data.

In [230]:
def get_percent_dictionary(file_name, tester):
    schools = agate.Table.from_csv(file_name, column_types=tester)
    #Grab just Erie and Niagara schools, filtering out available subgroups and total county stats
    erie_niagara = schools.where(lambda row: row['COUNTY_DESC'] in ['ERIE', 'NIAGARA']).where(lambda row: row['SUBGROUP_NAME'] in ['All Students']).where(lambda row: row['NAME'] not in ['ERIE COUNTY', 'NIAGARA COUNTY'])
    print(len(erie_niagara.rows))
    #Data doesn't include district information, but the first six digits of a school's BEDS codes includes that info.
    beds_code = {}
    for row in erie_niagara.rows:
        if 'SCHOOL DISTRICT' in row['NAME']:
            beds_district = row['BEDSCODE'][:6]
            #Creates a lookup table of ids for the district name
            beds_code[str(beds_district)] = row['NAME']
    def calculate_prof_counts(row):
        #Some schools don't have test takers for certain tests, so fill in with zeros.
        try:
            l3_counts = int(row['L3_COUNT'])
        except ValueError:
            l3_counts = 0
        try:
            l4_counts = int(row['L4_COUNT'])
        except ValueError:
            l4_counts = 0
        proficient = l3_counts + l4_counts
        return proficient
    erie_calcs = erie_niagara.compute([
        ('proficient_count', agate.Formula(agate.Number(), lambda row: calculate_prof_counts(row)))
    ])
    # Form the data structure for each school with beds code as a key
    schools_info = {}
    for row in erie_calcs.rows:
        if row['NAME'] == 'NIAGARA CHARTER SCHOOL':
            district = 'NIAGARA-WHEATFIELD CENTRAL SCHOOL DISTRICT'
        else:
            beds_district = row['BEDSCODE'][:6]
            district = beds_code[beds_district]
        """name = School name
        math/ela dictionary has totals = total test takers one per applicable class; proficient = proficient_count values; classes = grades taking the test
        district = district name (looked up using the beds_code dict)
        county = county"""
        schools_info[row['BEDSCODE']] = {'name': row['NAME'], 'math': {'totals': [], 'proficient': [], 'classes': []}, 'ela': {'totals': [], 'proficient': [], 'classes': []}, 'district': district, 'county': row['COUNTY_DESC'] }
    # After creating the correct dict format, now I run through the schools again to fill the score values.
    for row in erie_calcs.rows:
        if row['ITEM_SUBJECT_AREA'] == 'ELA':
            data_dict = schools_info[row['BEDSCODE']]['ela']
        else:
            data_dict = schools_info[row['BEDSCODE']]['math']
        try:
            data_dict['totals'].append(int(row['TOTAL_TESTED']))
        except ValueError:
            data_dict['totals'].append(0)
        try:
            data_dict['classes'].append(row['ITEM_DESC'])
        except ValueError:
            data_dict['classes'].append(0)
        try:
            data_dict['proficient'].append(int(row['proficient_count']))
        except ValueError:
            data_dict['proficient'].append(0)
    def calculate_prof_percent(test_area):
        total_sum = sum(values[test_area]['totals'])
        proficient_sum = sum(values[test_area]['proficient'])
        try:
            correct_value = ((proficient_sum / total_sum) * 100)
            schools_info[school][test_area]['total_percent'] = float("{0:.1f}".format(correct_value))
        except ZeroDivisionError:
            schools_info[school][test_area]['total_percent'] = '-'
    for school, values in schools_info.items():
        calculate_prof_percent('ela')
        calculate_prof_percent('math')
    return schools_info

In [231]:
year_16 = get_percent_dictionary('3-8_ELA_MATH_2016.csv', tester_16)


  warn_duplicate_column(new_column_name, final_column_name)


1837


In [232]:
year_17 = get_percent_dictionary('3-8_ELA_MATH_2017.csv', tester_16)

1815


In [233]:
year_15 = get_percent_dictionary('3-8_ELA_MATH_2015.csv',tester_15)

1852


Example of each year returning data for a specific school

In [234]:
print(year_16['400400010001'])
print(year_15['400400010001'])
print(year_17['400400010001'])

{'name': 'ANNA MERRITT ELEMENTARY SCHOOL', 'ela': {'total_percent': 25.2, 'classes': ['Grade 3 ELA', 'Grade 4 ELA'], 'proficient': [22, 11], 'totals': [69, 62]}, 'county': 'NIAGARA', 'district': 'LOCKPORT CITY SCHOOL DISTRICT', 'math': {'total_percent': 41.3, 'classes': ['Grade 3 Math', 'Grade 4 Math'], 'proficient': [29, 21], 'totals': [67, 54]}}
{'name': 'ANNA MERRITT ELEMENTARY SCHOOL', 'ela': {'total_percent': 17.8, 'classes': ['Grade 3 ELA', 'Grade 4 ELA'], 'proficient': [11, 12], 'totals': [60, 69]}, 'county': 'NIAGARA', 'district': 'LOCKPORT CITY SCHOOL DISTRICT', 'math': {'total_percent': 37.0, 'classes': ['Grade 3 Math', 'Grade 4 Math'], 'proficient': [15, 29], 'totals': [57, 62]}}
{'name': 'ANNA MERRITT ELEMENTARY SCHOOL', 'ela': {'total_percent': 22.9, 'classes': ['Grade 3 ELA', 'Grade 4 ELA'], 'proficient': [11, 16], 'totals': [53, 65]}, 'county': 'NIAGARA', 'district': 'LOCKPORT CITY SCHOOL DISTRICT', 'math': {'total_percent': 36.8, 'classes': ['Grade 3 Math', 'Grade 4 Mat

Separate each school into it's county and district for print order

In [235]:
erie_list = {}
niagara_list = {}
for key, value in year_16.items():
    if 'SCHOOL DISTRICT' in value['name']:
        if value['county'] == 'ERIE':
            erie_list[key] = {'name': value['name'], 'schools': {}}
        else:
            niagara_list[key] = {'name': value['name'], 'schools': {}}
for distict, value in erie_list.items():
    for school, info in year_16.items():
        if value['name'] == info['district'] and 'SCHOOL DISTRICT' not in info['name']:
            erie_list[distict]['schools'][school] = info['name']
for distict, value in niagara_list.items():
    for school, info in year_16.items():
        if value['name'] == info['district'] and 'SCHOOL DISTRICT' not in info['name']:
            niagara_list[distict]['schools'][school] = info['name']

In [209]:
def percent_change(new,old):
    try:
        calculate = (new-old)/old
        percent = calculate * 100
        one_decimal = float("{0:.1f}".format(percent))
        return one_decimal
    except (ZeroDivisionError,TypeError):
        return 0

In [228]:
erie_districts = 0
niagara_districts = 0
def export_county_schools(county_list):
    county_count = 0
    #Sort the county of district id's by its name value.
    sorted_county = sorted(county_list, key= lambda district: county_list[district]['name'])
    for district in sorted_county:
        county_count += 1
        #print('*** starting district {0}'.format(county_list[district]['name']))
        pc_ela_15_17 = percent_change(year_17[district]['ela']['total_percent'],year_15[district]['ela']['total_percent'])
        pc_ela_16_17 = percent_change(year_17[district]['ela']['total_percent'],year_16[district]['ela']['total_percent'])
        pc_math_15_17 = percent_change(year_17[district]['math']['total_percent'],year_15[district]['math']['total_percent'])
        pc_math_16_17 = percent_change(year_17[district]['math']['total_percent'],year_16[district]['math']['total_percent'])
        district_info = [county_list[district]['name'], year_15[district]['ela']['total_percent'],year_16[district]['ela']['total_percent'],year_17[district]['ela']['total_percent'],year_15[district]['math']['total_percent'],year_16[district]['math']['total_percent'],year_17[district]['math']['total_percent'], pc_ela_15_17, pc_ela_16_17, pc_math_15_17, pc_math_16_17]
        writer.writerow(district_info)
        schools_info = county_list[district]['schools']
        #Now go through each school in the district and calculate its percent change.
        for school in schools_info.keys():
            try:
                pc_school_ela_15_17 = percent_change(year_17[school]['ela']['total_percent'],year_15[school]['ela']['total_percent'])
                pc_school_ela_16_17 = percent_change(year_17[school]['ela']['total_percent'],year_16[school]['ela']['total_percent'])
                pc_school_math_15_17 = percent_change(year_17[school]['math']['total_percent'],year_15[school]['math']['total_percent'])
                pc_school_math_16_17 = percent_change(year_17[school]['math']['total_percent'],year_16[school]['math']['total_percent'])
                school_info = [schools_info[school], year_15[school]['ela']['total_percent'],year_16[school]['ela']['total_percent'],year_17[school]['ela']['total_percent'],year_15[school]['math']['total_percent'],year_16[school]['math']['total_percent'],year_17[school]['math']['total_percent'], pc_school_ela_15_17, pc_school_ela_16_17, pc_school_math_15_17, pc_school_math_16_17]
                writer.writerow(school_info)
                #print('writing the followin {0}'.format(schools_info[school]))
            except KeyError:
                #Beds code isn't found in one of the years for this school
                print('Missing school in 2015, 2016 and/or 2017')
                print(schools_info[school])
                print(school)
    return county_count
with open('print_out.tsv', 'w') as out_put:
    writer = csv.writer(out_put, delimiter='\t')
    writer.writerow(['school','ELA 2015', 'ELA 2016', 'ELA 2017', 'Math 2015', 'Math 2016', 'Math 2017', 'pc_ela_15_17', 'pc_ela_16_17', 'pc_math_15_17', 'pc_math_16_17'])
    erie_districts = export_county_schools(erie_list)
    niagara_districts = export_county_schools(niagara_list)
    print('{0} erie districts and {1} niagara districts'.format(erie_districts,niagara_districts))

Missing school in 2015, 2016 and/or 2017
ALDEN PRIMARY AT TOWNLINE
140101060003
Missing school in 2015, 2016 and/or 2017
DR MARTIN LUTHER KING, JR MULTICULTURAL INSTITUTE
140600010039
Missing school in 2015, 2016 and/or 2017
LAFAYETTE HIGH SCHOOL
140600010107
Missing school in 2015, 2016 and/or 2017
KENMORE MIDDLE SCHOOL
142601030022
Missing school in 2015, 2016 and/or 2017
THEODORE ROOSEVELT ELEMENTARY SCHOOL
142601030019
Missing school in 2015, 2016 and/or 2017
ALEXANDER HAMILTON ELEMENTARY SCHOOL
142601030002
28 erie districts and 10 niagara districts


ALDEN PRIMARY AT TOWNLINE 140101060003 -> shifted to Alden Immediate

DR MARTIN LUTHER KING, JR MULTICULTURAL INSTITUTE 140600010039 -> closed

LAFAYETTE HIGH SCHOOL 140600010107 -> renamed to NEWCOMER ACADEMY AT LAFAYETTE 

KENMORE MIDDLE SCHOOL 142601030022 -> closed

THEODORE ROOSEVELT ELEMENTARY SCHOOL 142601030019 -> closed

ALEXANDER HAMILTON ELEMENTARY SCHOOL 142601030002 -> closed