In [6]:
# Dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as sts
from scipy.stats import linregress

In [7]:
# Specify path to merged dataset
merged_dataset = "data/merged_school_zhi_value.csv"

# Load merged dataset into dataframe and drop error column
df = pd.read_csv(merged_dataset)
merged_df = df.drop(columns="Unnamed: 0")

# Remove unnecessary rows and rename incorrect column
math_df = merged_df.drop(columns=["English Language Arts Achievement", "English Language Arts Learning Gains", "English Language Arts Learning Gains of the Lowest 25%", "Mathematics Learning Gains", "Mathematics Learning Gains of the Lowest 25%", "Science Achievement", "Social Studies Achievement", "Middle School Acceleration", "Graduation Rate 2017-18", "College and Career Acceleration 2017-18", "Total Components", "Grade 2008", "Grade 2007", "Grade 2006", "Grade 2005", "Grade 2004", "Grade 2003", "Grade 2002", "Grade 2001", "Grade 2000", "Grade 1999", "2020-01", "2020-02"])
math_df.rename(columns={"Informational Baseline Grade 2015":"Grade 2015"}, inplace=True)
math_df.dropna(inplace=True)
math_df.head()

Unnamed: 0,Zip Code,School Name,Mathematics Achievement,Total Points Earned,Percent of Total Possible Points,Grade 2019,Grade 2018,Grade 2017,Grade 2016,Grade 2015,...,2019-03,2019-04,2019-05,2019-06,2019-07,2019-08,2019-09,2019-10,2019-11,2019-12
0,32603,J. J. FINLEY ELEMENTARY SCHOOL,56.0,377.0,54.0,B,C,B,B,A,...,279827.0,283381.0,286035.0,287334.0,287821.0,289008.0,290524.0,291696.0,291216.0,291212.0
1,32609,STEPHEN FOSTER ELEMENTARY SCHOOL,69.0,395.0,56.0,B,C,B,C,A,...,132431.0,132669.0,133012.0,133550.0,134383.0,135429.0,137031.0,138650.0,140052.0,141110.0
2,32641,LAKE FOREST ELEMENTARY SCHOOL,26.0,223.0,32.0,D,D,C,F,F,...,101256.0,101874.0,102785.0,103191.0,103928.0,104809.0,106085.0,107431.0,108637.0,109839.0
3,32605,LITTLEWOOD ELEMENTARY SCHOOL,61.0,407.0,58.0,B,B,C,B,B,...,207175.0,207517.0,207737.0,208196.0,208743.0,209731.0,211050.0,212435.0,213381.0,214302.0
4,32609,W. A. METCALFE ELEMENTARY SCHOOL,48.0,386.0,55.0,B,D,D,C,D,...,132431.0,132669.0,133012.0,133550.0,134383.0,135429.0,137031.0,138650.0,140052.0,141110.0


In [8]:
# Save summary statistics into variables
mean = math_df["Mathematics Achievement"].mean()
median = math_df["Mathematics Achievement"].median()
variance = math_df["Mathematics Achievement"].var()
stdv = math_df["Mathematics Achievement"].std()
sem = math_df["Mathematics Achievement"].sem()

# Save summary variables into a dataframe
math_summary_table = pd.DataFrame({"Mean": [mean], "Median": [median], "Variance": [variance], "Standard Deviation": [stdv], "SEM": [sem]})
print("Mathematics Achievement Summary Table")
math_summary_table

Mathematics Achievement Summary Table


Unnamed: 0,Mean,Median,Variance,Standard Deviation,SEM
0,60.152594,60.0,270.844509,16.457354,0.32024


In [9]:
zip_df = math_df.groupby(["Zip Code"])
zip_df.head()

Unnamed: 0,Zip Code,School Name,Mathematics Achievement,Total Points Earned,Percent of Total Possible Points,Grade 2019,Grade 2018,Grade 2017,Grade 2016,Grade 2015,...,2019-03,2019-04,2019-05,2019-06,2019-07,2019-08,2019-09,2019-10,2019-11,2019-12
0,32603,J. J. FINLEY ELEMENTARY SCHOOL,56.0,377.0,54.0,B,C,B,B,A,...,279827.0,283381.0,286035.0,287334.0,287821.0,289008.0,290524.0,291696.0,291216.0,291212.0
1,32609,STEPHEN FOSTER ELEMENTARY SCHOOL,69.0,395.0,56.0,B,C,B,C,A,...,132431.0,132669.0,133012.0,133550.0,134383.0,135429.0,137031.0,138650.0,140052.0,141110.0
2,32641,LAKE FOREST ELEMENTARY SCHOOL,26.0,223.0,32.0,D,D,C,F,F,...,101256.0,101874.0,102785.0,103191.0,103928.0,104809.0,106085.0,107431.0,108637.0,109839.0
3,32605,LITTLEWOOD ELEMENTARY SCHOOL,61.0,407.0,58.0,B,B,C,B,B,...,207175.0,207517.0,207737.0,208196.0,208743.0,209731.0,211050.0,212435.0,213381.0,214302.0
4,32609,W. A. METCALFE ELEMENTARY SCHOOL,48.0,386.0,55.0,B,D,D,C,D,...,132431.0,132669.0,133012.0,133550.0,134383.0,135429.0,137031.0,138650.0,140052.0,141110.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3317,32459,SEASIDE NEIGHBORHOOD SCHOOL,98.0,950.0,86.0,A,A,A,A,A,...,556455.0,560597.0,562283.0,562098.0,561314.0,561586.0,562173.0,564011.0,566313.0,568156.0
3327,33431,A.D. HENDERSON UNIVERSITY SCHOOL & FAU HIGH SC...,92.0,970.0,88.0,A,A,A,A,A,...,425430.0,426154.0,427255.0,427777.0,428313.0,429252.0,430083.0,431249.0,432929.0,434365.0
3328,34987,FAU/SLCSD PALM POINTE EDUCATIONAL RESEARCH SCH...,75.0,626.0,70.0,A,A,A,A,A,...,275754.0,276839.0,277004.0,276612.0,276477.0,276489.0,276601.0,276633.0,276647.0,276785.0
3329,32311,FLORIDA STATE UNIVERSITY SCHOOL,82.0,793.0,72.0,A,A,A,A,A,...,210433.0,210698.0,211335.0,212393.0,213032.0,213375.0,213602.0,213942.0,214216.0,214488.0


In [10]:
# Save monthly data into ranges
#zhvi_19 = math_df.loc[:, "2019-01":"2019-12"]
#zhvi_18 = math_df.loc[:, "2018-01":"2018-12"]
#zhvi_17 = math_df.loc[:, "2017-01":"2017-12"]
#zhvi_16 = math_df.loc[:, "2016-01":"2016-12"]
#zhvi_15 = math_df.loc[:, "2015-01":"2015-12"]
#hvi_14 = math_df.loc[:, "2014-01":"2014-12"]
#zhvi_13 = math_df.loc[:, "2013-01":"2013-12"]
#zhvi_12 = math_df.loc[:, "2012-01":"2012-12"]
#zhvi_11 = math_df.loc[:, "2011-01":"2011-12"]
#zhvi_09 = math_df.loc[:, "2010-01":"2010-12"]
#zhvi_08 = math_df.loc[:, "2009-01":"2009-12"]