In [1]:
import pandas as pd

In [2]:
# Main method for getting my school scores csv file in a summarized form
def main():
    file = 'files/2016_PSSA_School_Level_Perfomance_Results.csv'
    ss_df = pd.DataFrame(pd.read_csv(file, sep=',', encoding='utf-8'))

    # Boolean index to pull in only total student scores that are school level
    boolindex = (ss_df['Group'] == 'All Students') & (ss_df['Grade'] == 'School Total') & (ss_df['% Below Basic'].notnull())
    ss_df = ss_df[boolindex]
    grouped_data = ss_df.groupby('School')

    # Create df that will hold the summarized rows of data
    summarized_data = pd.DataFrame(columns=['County', 'School Number', 'School', '% Advanced', '% Proficient', '% Basic', '% Below Basic'])

    # Loop over all grouped data and summarize it with a call to my sum_school_scores method
    index = 0
    for name, group in grouped_data:
        sum_row = sum_school_scores(name, group)
        summarized_data.loc[index] = sum_row
        index += 1
    summarized_data.to_csv('files/Summarized_School_Scores.csv', sep=',', encoding='utf-8')

In [3]:
# Method that takes in groups of school data and summarizes all of the educational data into one row of weighted data
def sum_school_scores(name, data):
    advsum = 0
    prosum = 0
    bsum = 0
    bbsum = 0
    num = 0
    # Grab all of the school score data, weight it by number of students taking the tests, then return the data
    for index, row in data.iterrows():
        county = row['County']
        snum = row['School Number']
        numcount = row['Number Scored']
        advsum += row['% Advanced'] * numcount
        prosum += row['% Proficient'] * numcount
        bsum += row['% Basic'] * numcount
        bbsum += row['% Below Basic'] * numcount
        num += numcount
    sum_data = [county, snum, name, round(advsum/num, 2), round(prosum/num, 2), round(bsum/num, 2), round(bbsum/num, 2)]
    return sum_data

In [4]:
if __name__ == '__main__':
    main()