# 2013-2017 ELA (English Language Arts) and Math Test State Results

In [12]:
# dependencies

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import requests
import json

## Data

*2013-2017 New York State Test Data by Borough and Grade:*

**ELA** https://data.cityofnewyork.us/Education/2013-2017-ELA-Test-Results-by-Grade-Borough-All/mi8r-ff2q <br>
**Math** https://data.cityofnewyork.us/Education/2013-2017-Borough-Math-Results-All/qjx7-9mep

*And further broken down by economic status:*

**ELA** https://data.cityofnewyork.us/Education/2013-2017-Borough-ELA-Results-Economic/xkkx-md5q <br>
**Math** https://data.cityofnewyork.us/Education/2013-2017-Borough-Math-Results-Economic/vpdq-ktpr

*And further broken down by race/ethnicity:*

**ELA** https://data.cityofnewyork.us/Education/2013-2017-Borough-ELA-Results-Ethnic/kjey-zuvr <br>
**Math** https://data.cityofnewyork.us/Education/2013-2017-Borough-Math-Results-Ethnic/tph5-57et

*And further broken down by gender:*

**ELA** https://data.cityofnewyork.us/Education/2013-2017-Borough-ELA-Results-Gender/5hiy-kwc2 <br>
**Math** https://data.cityofnewyork.us/Education/2013-2017-Borough-Math-Results-Gender/48ka-f6z6

### API

In [13]:
# API URLs
ela_all_url = 'https://data.cityofnewyork.us/resource/mi8r-ff2q.json'
math_all_url = 'https://data.cityofnewyork.us/resource/qjx7-9mep.json'

ela_eco_url = 'https://data.cityofnewyork.us/resource/xkkx-md5q.json'
math_eco_url = 'https://data.cityofnewyork.us/resource/vpdq-ktpr.json'

ela_race_url = 'https://data.cityofnewyork.us/resource/kjey-zuvr.json'
math_race_url = 'https://data.cityofnewyork.us/resource/tph5-57et.json'

ela_gender_url = 'https://data.cityofnewyork.us/resource/5hiy-kwc2.json'
math_gender_url = 'https://data.cityofnewyork.us/resource/48ka-f6z6.json'

#### All Results

In [14]:
# retrieve 2013-2017 test results by borough

# ELA
ela_results = requests.get(ela_all_url).json()

ela_all = pd.DataFrame(ela_results)
ela_all.head()
print(f'ELA Row Count: {len(ela_all)}')
print(list(ela_all.columns))

# Math
math_results = requests.get(math_all_url).json()

math_all = pd.DataFrame(math_results)
math_all.head()
print(f'Math Row Count: {len(math_all)}')
print(list(math_all.columns))

math_all.head(1)

ELA Row Count: 175
['borough', 'grade', 'year', 'category', 'number_tested', 'mean_scale_score', 'level1_n', 'level1_', 'level2_n', 'level2_', 'level3_n', 'level3_', 'level4_n', 'level4_', 'level3_4_n', 'level3_4_']
Math Row Count: 175
['borough', 'grade', 'year', 'category', 'number_tested', 'mean_scale_score', 'level1_n', 'level1_', 'level2_n', 'level2_', 'level3_n', 'level3_', 'level4_n', 'level4_', 'level3_4_n', 'level3_4_']


Unnamed: 0,borough,grade,year,category,number_tested,mean_scale_score,level1_n,level1_,level2_n,level2_,level3_n,level3_,level4_n,level4_,level3_4_n,level3_4_
0,BRONX,3,2013,All Students,15901,287,7153,45.0,5729,36.0,2216,13.9,803,5.0,3019,19.0


#### Results by Economic Status

In [15]:
# retrieve 2013-2017 test results by economic status

# ELA
ela_results = requests.get(ela_eco_url).json()

ela_eco = pd.DataFrame(ela_results)
ela_eco.head()
print(f'ELA Row Count: {len(ela_eco)}')
print(list(ela_eco.columns))

# Math
math_results = requests.get(math_eco_url).json()

math_eco = pd.DataFrame(math_results)
math_eco.head()
print(f'Math Row Count: {len(math_eco)}')
print(list(math_eco.columns))

ela_eco.head(1)

ELA Row Count: 350
['borough', 'grade', 'year', 'category', 'number_tested', 'mean_scale_score', 'level1_n', 'level1_', 'level2_n', 'level2_', 'level3_n', 'level3_', 'level4_n', 'level4_', 'level3_4_n', 'level3_4_']
Math Row Count: 350
['borough', 'grade', 'year', 'category', 'number_tested', 'mean_scale_score', 'level1_n', 'level1_', 'level2_n', 'level2_', 'level3_n', 'level3_', 'level4_n', 'level4_', 'level3_4_n', 'level3_4_']


Unnamed: 0,borough,grade,year,category,number_tested,mean_scale_score,level1_n,level1_,level2_n,level2_,level3_n,level3_,level4_n,level4_,level3_4_n,level3_4_
0,BRONX,3,2013,Econ Disadv,14584,286,7613,52.2,4763,32.7,2089,14.3,119,0.8,2208,15.1


#### Results by Race/Ethnicity

In [16]:
# retrieve 2013-2017 test results by race/ethnicity

# ELA
ela_results = requests.get(ela_race_url).json()

ela_race = pd.DataFrame(ela_results)
ela_race.head()
print(f'ELA Row Count: {len(ela_race)}')
print(list(ela_race.columns))

# Math
math_results = requests.get(math_race_url).json()

math_race = pd.DataFrame(math_results)
math_eco.head()
print(f'Math Row Count: {len(math_race)}')
print(list(math_race.columns))

math_race.head(1)

ELA Row Count: 700
['borough', 'grade', 'year', 'category', 'number_tested', 'mean_scale_score', 'level1_n', 'level1_', 'level2_n', 'level2_', 'level3_n', 'level3_', 'level4_n', 'level4_', 'level3_4_n', 'level3_4_']
Math Row Count: 700
['borough', 'grade', 'year', 'category', 'number_tested', 'mean_scale_score', 'level1_n', 'level1_', 'level2_n', 'level2_', 'level3_n', 'level3_', 'level4_n', 'level4_', 'level3_4_n', 'level3_4_']


Unnamed: 0,borough,grade,year,category,number_tested,mean_scale_score,level1_n,level1_,level2_n,level2_,level3_n,level3_,level4_n,level4_,level3_4_n,level3_4_
0,BRONX,3,2013,Asian,631,312,131,20.8,196,31.1,173,27.4,131,20.8,304,48.2


#### Results by Gender

In [17]:
# retrieve 2013-2017 test results by gender

# ELA
ela_results = requests.get(ela_gender_url).json()

ela_gender = pd.DataFrame(ela_results)
ela_gender.head()
print(f'ELA Row Count: {len(ela_gender)}')
print(list(ela_gender.columns))

# Math
math_results = requests.get(math_gender_url).json()

math_gender = pd.DataFrame(math_results)
math_gender.head()
print(f'Math Row Count: {len(math_gender)}')
print(list(math_gender.columns))

ela_gender.head(1)

ELA Row Count: 350
['borough', 'grade', 'year', 'category', 'number_tested', 'mean_scale_score', 'level1_n', 'level1_', 'level2_n', 'level2_', 'level3_n', 'level3_', 'level4_n', 'level4_', 'level3_4_n', 'level3_4_']
Math Row Count: 350
['borough', 'grade', 'year', 'category', 'number_tested', 'mean_scale_score', 'level1_n', 'level1_', 'level2_n', 'level2_', 'level3_n', 'level3_', 'level4_n', 'level4_', 'level3_4_n', 'level3_4_']


Unnamed: 0,borough,grade,year,category,number_tested,mean_scale_score,level1_n,level1_,level2_n,level2_,level3_n,level3_,level4_n,level4_,level3_4_n,level3_4_
0,BRONX,3,2013,Female,7553,292,3378,44.7,2681,35.5,1385,18.3,109,1.4,1494,19.8


### Data to csv files

In [22]:
# data to csv

# all students
ela_all.to_csv("Data/ela_all.csv")
math_all.to_csv("Data/math_all.csv")

# economic status
ela_eco.to_csv("Data/ela_eco.csv")
math_eco.to_csv("Data/math_eco.csv")

# race/ethnicity
ela_race.to_csv("Data/ela_race.csv")
math_race.to_csv("Data/math_race.csv")

# gender
ela_gender.to_csv("Data/ela_gender.csv")
math_gender.to_csv("Data/math_gender.csv")

## Data Cleaning and Exploration