In [7]:
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.io import output_notebook

In [8]:
from IPython.core.display import HTML
css = open('/Users/Scott/Desktop/Data/style-table.css').read() + open('/Users/Scott/Desktop/Data/style-notebook.css').read()
HTML('<style>{}</style>'.format(css))

## ISEE Trends by Year
#### This notebook takes 14 years of admissions data and generates graphs to visualize standardized testing trends over those years.
*Data has been altered for the purposes of confidentitality.

In [18]:
# Takes admissions Excel files by year, converts them into pandas dataframes, and stores them in a list called files.

counter = 4
files = []

for _ in range(5):
    files.append('/Users/Scott/Desktop/Data/Admissions/original_files/0'+
                 str(counter)+'_0'+str(counter+1)+'.xlsx')
    counter += 1
    
for _ in range(1):
    files.append('/Users/Scott/Desktop/Data/Admissions/original_files/0'+
                 str(counter)+'_'+str(counter+1)+'.xlsx')
    counter += 1
    
for _ in range(7):
    files.append('/Users/Scott/Desktop/Data/Admissions/original_files/'+
                 str(counter)+'_'+str(counter+1)+'.xlsx')
    counter += 1
    
for i, file in enumerate(files):
    files[i] = pd.read_excel(file)
    files[i] = files[i].dropna(axis=1, how="all")
    files[i] = files[i].dropna(axis=0, how="all")

## Graphs of incoming Math, Quantitative, Verbal, Reading ISEE scores by year for 6th grade applicants, 9th, and all new students

##### Creates dictionary of standardized testing scores by year admitted and subject

In [22]:
# Selects admitted students and puts them into dataframes for analysis, by year and subject
# Accounts for the different years in file names

dfs_subj_and_grade = {}
subject_col_names = ['ISEE_Math', 'ISEE_Quantitative', 'ISEE_Verbal', 'ISEE_Reading']
grades = {'sixth': 6, 'ninth': 9}

for subj in subject_col_names:
    for grade in grades:
        # starts with 2004, then joins other years
        counter = 4 
        students = files[0][files[0].Grade_apply==grades[grade]]   #students by year
        scores = students[students.columns.intersection([subj])]
        scores_by_grade_and_subject = scores.dropna(how="any")
        scores_by_grade_and_subject.columns = ['200'+str(counter)]
        subj_and_grade = str(subj)+'_'+str(grade)
        
        # create dictionary key of SUBJECT_GRADE ADMITTED, e.g "ISEE_MATH_sixth"
        dfs_subj_and_grade[subj_and_grade] = scores_by_grade_and_subject
        
        # concat all other years (2005 to 2016) by subject to the dataframe
        for i in range(len(files)-1):
            counter += 1
            students_by_year = files[i+1][files[i+1].Grade_apply==grades[grade]]
            file = students_by_year
            if counter <= 9:
                new_year = file[file.columns.intersection([subj])].dropna(how="any")
                new_year.columns = ['200'+str(counter)]

            else:
                new_year = file[file.columns.intersection([subj])].dropna(how="any")
                new_year.columns = ['20'+str(counter)]
            
            dfs_subj_and_grade[subj_and_grade]= pd.concat([dfs_subj_and_grade[subj_and_grade]
                                                              ,new_year], axis=1)

In [15]:
print(subj_and_grade)
dfs_by_subj_and_grade[subj_and_grade].describe().drop(['mean', 'std','min','max'])


ISEE_Reading_sixth


Unnamed: 0,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
count,36.0,37.0,36.0,34.0,37.0,34.0,35.0,38.0,37.0,37.0,36.0,25.0,29.0
25%,65.0,51.0,59.5,60.25,45.0,66.25,49.0,43.25,74.0,80.0,79.0,71.0,72.0
50%,75.5,71.0,79.0,83.0,83.0,85.0,72.0,51.0,91.0,87.0,86.0,86.0,86.0
75%,88.5,83.0,90.0,92.0,94.0,96.75,85.0,70.0,98.0,93.0,93.0,92.0,92.0


In [12]:
# edits key names for chart titles for aesthetics
for old_key in dfs_by_subj_and_grade:
    try:
        key_list = old_key.split('_')
        new_key = key_list[2].capitalize()+' Grade '+key_list[1].capitalize()+' Admissions Scores'
        dfs_by_subj_and_grade[new_key] = dfs_by_subj_and_grade.pop(old_key)
    except:
        pass

## Outputs graphs showing trends of the 75%, 50%, and 25% standardized test scores by subject and class

##### Gives admissions and administration a sense of whether our students have grown academically stronger or weaker through the years

In [8]:
# Outputs 8 graphs of 6th and 9th grade admissions scores for the four subjects

for graph_title in dfs_by_subj_and_grade:
    output_notebook()
    
    graph = dfs_by_subj_and_grade[graph_title].describe().drop(['mean', 'std','min','max'])

    p = figure(title=graph_title, plot_width=400, plot_height=400)
    p.xaxis.axis_label = 'Year Admitted'
    p.yaxis.axis_label = 'Percentile'

    leg = ["75%","50%","25%"]
    col = ["green", "navy", "firebrick"]
    counter = 0
    years = [2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016]
    for i in [graph.values.tolist()[3], 
              graph.values.tolist()[2], 
              graph.values.tolist()[1]]:
        p.line(x=years, y=i, legend = leg[counter], color = col[counter])
        counter += 1

    p.legend.location = "bottom_right"

    show(p)