In [1]:
# Dependencies and Setup
import pandas as pd

In [2]:
# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

#school_data

In [3]:
# Combine the data into a single dataset.  
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])
#school_data_complete.head(15)

In [4]:
#look at data types
#school_data_complete.dtypes

In [5]:
#Ensure no data is missing
#school_data_complete.count()

In [6]:
#find total number of schools using merged dataframe
total_schools = len(school_data_complete["school_name"].unique())
#total_schools 

In [7]:
#find total number of students using merged dataframe
total_students = school_data_complete["Student ID"].count()
#total_students

In [8]:
#find total budget using school dataframe
total_budget = school_data["budget"].sum()
#total_budget

In [9]:
#find average math score
avg_math = school_data_complete["math_score"].mean()
#avg_math

In [10]:
#find average reading score
avg_reading = school_data_complete["reading_score"].mean()
#avg_reading 

In [11]:
#Percent passing math
count_math = school_data_complete[(school_data_complete["math_score"]>=70)].count()["Student ID"]
percent_math = count_math / total_students
#percent_math

In [12]:
#Percent passing reading
count_reading = school_data_complete[(school_data_complete["reading_score"]>=70)].count()["Student ID"]
percent_reading = count_reading/total_students
#percent_reading

In [13]:
#Find overall number of students passing math AND reading
overall_count = school_data_complete[(school_data_complete["reading_score"]>=70) & (school_data_complete["math_score"]>=70)].count()["Student ID"]
overall_percent = overall_count/total_students
#overall_percent

## Summary of the Entire School District

* Total number of schools
* Total number of students
* Total budget
* Average math score 
* Average reading score
* Percentage of students with a passing math score
* Percentage of students with a passing reading score
* Percentage of students with both passing scores in reading and math

* (Passing is regarded as 70 and above)

In [14]:
#Create dataframe to display all summary variables
district_summary_df = pd.DataFrame ({
    "Total Schools": [total_schools],
    "Total Students": [total_students],
    "Total Budget": [total_budget],
    "Average Math Score": [avg_math],
    "Average Reading Score": [avg_reading],
    "Percent Passing Math": [percent_math],
    "Percent Passing Reading": [percent_reading],
    "Overall Passing Rate": [overall_percent]
    
    
})
#district_summary_df

In [15]:
#clean data and format
district_summary_df["Total Students"] = district_summary_df["Total Students"].map("{:,}".format)
district_summary_df["Total Budget"] = district_summary_df["Total Budget"].map("${:,.0f}".format)
district_summary_df["Percent Passing Math"] = district_summary_df["Percent Passing Math"].map("{:.2%}".format)
district_summary_df["Percent Passing Reading"] = district_summary_df["Percent Passing Reading"].map("{:.2%}".format)
district_summary_df["Overall Passing Rate"] = district_summary_df["Overall Passing Rate"].map("{:.2%}".format)

district_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,Percent Passing Math,Percent Passing Reading,Overall Passing Rate
0,15,39170,"$24,649,428",78.985371,81.87784,74.98%,85.81%,65.17%


## Summary of the Each School in the  District

  * School Name
  * School Type
  * Total Students
  * Total School Budget
  * Per Student Budget
  * Average Math Score
  * Average Reading Score
  * Percentage Passing Math
  * Percentage Passing Reading
  * Percent Overall Passing
  
  * (Passing is regarded as 70 and above)
 

In [16]:
#Find school type from school_data 
school_types =school_data.set_index("school_name")['type']
#school_types

In [17]:
#total students in each school
school_student_count = school_data_complete['school_name'].value_counts()
#school_student_count

In [18]:
#school budget for each school
school_budget_count = school_data_complete.groupby(["school_name"]).mean()['budget']
#school_budget_count

In [19]:
#budget per student
school_per_student_budget = school_budget_count / school_student_count
#school_per_student_budget

In [20]:
#Average math score
school_avg_math = school_data_complete.groupby('school_name').mean()['math_score']
#school_avg_math

In [21]:
#Average Reading score per school
school_avg_reading = school_data_complete.groupby('school_name').mean()['reading_score']
#school_avg_reading

In [22]:
#percent of students passing math per school
school_passing_math = school_data_complete[(school_data_complete['math_score']>=70)]
school_passing_math_count = school_passing_math.groupby(['school_name']).count()['Student ID']
school_passing_math_percent = (school_passing_math_count / school_student_count*100)
#school_passing_math_percent

In [23]:
#percent of students passing reading per school
school_passing_reading = school_data_complete[(school_data_complete['reading_score']>=70)]
school_passing_reading_count = school_passing_reading.groupby(['school_name']).count()['Student ID']
school_passing_reading_percent = (school_passing_reading_count / school_student_count)
#school_passing_reading_percent

In [24]:
#percent of students passing math per school
school_passing_overall = school_data_complete[(school_data_complete['math_score']>=70) & (school_data_complete['reading_score']>=70)]
school_passing_overall_count = school_passing_overall.groupby(['school_name']).count()['Student ID']
school_passing_overall_percent = (school_passing_overall_count / school_student_count)
#school_passing_overall_percent

In [25]:
#create data frame with school summary data
school_summary_df = pd.DataFrame ({
    "School Type": school_types,
    "Total Students": school_student_count,
    "Total School Budget": school_budget_count,
    "Budget per Student": school_per_student_budget,
    "Average Math Score": school_avg_math,
    "Average Reading Score": school_avg_reading,
    "Percent Passing Math": school_passing_math_percent,
    "Percent Passing Reading": school_passing_reading_percent,
    "Overall Percent Passing": school_passing_overall_percent
    
    
})


In [26]:
#Display table
school_summary_df

Unnamed: 0,School Type,Total Students,Total School Budget,Budget per Student,Average Math Score,Average Reading Score,Percent Passing Math,Percent Passing Reading,Overall Percent Passing
Bailey High School,District,4976,3124928.0,628.0,77.048432,81.033963,66.680064,0.819333,0.546423
Cabrera High School,Charter,1858,1081356.0,582.0,83.061895,83.97578,94.133477,0.970398,0.913348
Figueroa High School,District,2949,1884411.0,639.0,76.711767,81.15802,65.988471,0.807392,0.532045
Ford High School,District,2739,1763916.0,644.0,77.102592,80.746258,68.309602,0.79299,0.542899
Griffin High School,Charter,1468,917500.0,625.0,83.351499,83.816757,93.392371,0.97139,0.905995
Hernandez High School,District,4635,3022020.0,652.0,77.289752,80.934412,66.752967,0.80863,0.535275
Holden High School,Charter,427,248087.0,581.0,83.803279,83.814988,92.505855,0.962529,0.892272
Huang High School,District,2917,1910635.0,655.0,76.629414,81.182722,65.683922,0.813164,0.535139
Johnson High School,District,4761,3094650.0,650.0,77.072464,80.966394,66.057551,0.812224,0.535392
Pena High School,Charter,962,585858.0,609.0,83.839917,84.044699,94.594595,0.959459,0.905405


## Top Performing Schools (By Percent Overall Passing)

In [27]:
#sort school summary datafram by overall percent passing and return top 5
sorted_summary = school_summary_df.sort_values('Overall Percent Passing')
sorted_summary.head(5)


Unnamed: 0,School Type,Total Students,Total School Budget,Budget per Student,Average Math Score,Average Reading Score,Percent Passing Math,Percent Passing Reading,Overall Percent Passing
Rodriguez High School,District,3999,2547363.0,637.0,76.842711,80.744686,66.366592,0.802201,0.529882
Figueroa High School,District,2949,1884411.0,639.0,76.711767,81.15802,65.988471,0.807392,0.532045
Huang High School,District,2917,1910635.0,655.0,76.629414,81.182722,65.683922,0.813164,0.535139
Hernandez High School,District,4635,3022020.0,652.0,77.289752,80.934412,66.752967,0.80863,0.535275
Johnson High School,District,4761,3094650.0,650.0,77.072464,80.966394,66.057551,0.812224,0.535392


## Bottom Performing Schools (By Percent Overall Passing)

In [28]:
#use sorted data and return bottom 5
sorted_summary.tail(5)

Unnamed: 0,School Type,Total Students,Total School Budget,Budget per Student,Average Math Score,Average Reading Score,Percent Passing Math,Percent Passing Reading,Overall Percent Passing
Pena High School,Charter,962,585858.0,609.0,83.839917,84.044699,94.594595,0.959459,0.905405
Wilson High School,Charter,2283,1319574.0,578.0,83.274201,83.989488,93.867718,0.965396,0.905826
Griffin High School,Charter,1468,917500.0,625.0,83.351499,83.816757,93.392371,0.97139,0.905995
Thomas High School,Charter,1635,1043130.0,638.0,83.418349,83.84893,93.272171,0.973089,0.90948
Cabrera High School,Charter,1858,1081356.0,582.0,83.061895,83.97578,94.133477,0.970398,0.913348


## Math Scores by Grade

* Average Math Score for students of each grade level (9th, 10th, 11th, 12th) at each school.



In [29]:
#find average math score for 9th graders by school
ninth_math = school_data_complete[(school_data_complete['grade']=='9th')]
ninth_math_average = ninth_math.groupby(['school_name']).mean()['math_score']
#ninth_math_average

In [30]:
#find average math score for 10th graders by school
tenth_math = school_data_complete[(school_data_complete['grade']=='10th')]
tenth_math_average = tenth_math.groupby(['school_name']).mean()['math_score']
#tenth_math_average

In [31]:
#find average math score for 11th graders by school
eleventh_math = school_data_complete[(school_data_complete['grade']=='11th')]
eleventh_math_average = eleventh_math.groupby(['school_name']).mean()['math_score']
#eleventh_math_average

In [32]:
#find average math score for 12th graders by school
twelfth_math = school_data_complete[(school_data_complete['grade']=='12th')]
twelfth_math_average = twelfth_math.groupby(['school_name']).mean()['math_score']
#twelfth_math_average

In [33]:
#create data frame for math average by grade and school
math_grade_df = pd.DataFrame ({
    "9th": ninth_math_average,
    "10th": tenth_math_average,
    "11th": eleventh_math_average,
    "12th": twelfth_math_average
    
    
})
math_grade_df

Unnamed: 0_level_0,9th,10th,11th,12th
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,77.083676,76.996772,77.515588,76.492218
Cabrera High School,83.094697,83.154506,82.76556,83.277487
Figueroa High School,76.403037,76.539974,76.884344,77.151369
Ford High School,77.361345,77.672316,76.918058,76.179963
Griffin High School,82.04401,84.229064,83.842105,83.356164
Hernandez High School,77.438495,77.337408,77.136029,77.186567
Holden High School,83.787402,83.429825,85.0,82.855422
Huang High School,77.027251,75.908735,76.446602,77.225641
Johnson High School,77.187857,76.691117,77.491653,76.863248
Pena High School,83.625455,83.372,84.328125,84.121547


## Reading Score by Grade 

* Average Reading Score for students of each grade level (9th, 10th, 11th, 12th) at each school.

In [34]:
#find average reading score for 9th graders by school
ninth_reading = school_data_complete[(school_data_complete['grade']=='9th')]
ninth_reading_average = ninth_reading.groupby(['school_name']).mean()['reading_score']
#ninth_reading_average

In [35]:
#find average reading score for 10th graders by school
tenth_reading = school_data_complete[(school_data_complete['grade']=='10th')]
tenth_reading_average = tenth_reading.groupby(['school_name']).mean()['reading_score']
#tenth_reading_average

In [36]:
#find average reading score for 11th graders by school
eleventh_reading = school_data_complete[(school_data_complete['grade']=='11th')]
eleventh_reading_average = eleventh_reading.groupby(['school_name']).mean()['reading_score']
#eleventh_reading_average

In [37]:
#find average reading score for 12th graders by school
twelfth_reading = school_data_complete[(school_data_complete['grade']=='12th')]
twelfth_reading_average = twelfth_reading.groupby(['school_name']).mean()['reading_score']
#twelfth_reading_average

In [38]:
#create dataframe for reading averages by grade and school
reading_grade_df = pd.DataFrame ({
    "9th": ninth_reading_average,
    "10th": tenth_reading_average,
    "11th": eleventh_reading_average,
    "12th": twelfth_reading_average
    
    
})
reading_grade_df

Unnamed: 0_level_0,9th,10th,11th,12th
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,81.303155,80.907183,80.945643,80.912451
Cabrera High School,83.676136,84.253219,83.788382,84.287958
Figueroa High School,81.198598,81.408912,80.640339,81.384863
Ford High School,80.632653,81.262712,80.403642,80.662338
Griffin High School,83.369193,83.706897,84.288089,84.013699
Hernandez High School,80.86686,80.660147,81.39614,80.857143
Holden High School,83.677165,83.324561,83.815534,84.698795
Huang High School,81.290284,81.512386,81.417476,80.305983
Johnson High School,81.260714,80.773431,80.616027,81.227564
Pena High School,83.807273,83.612,84.335938,84.59116


## School performance sorted by School Spending

In [39]:
bins = [0, 585, 630, 645, 680]
labels =['Less than $585', '$585-$630', '$630-$645', '$645-$680']


In [40]:
school_summary_df.dtypes

School Type                 object
Total Students               int64
Total School Budget        float64
Budget per Student         float64
Average Math Score         float64
Average Reading Score      float64
Percent Passing Math       float64
Percent Passing Reading    float64
Overall Percent Passing    float64
dtype: object

In [41]:
school_summary_df['Budget Summary']=pd.cut(school_summary_df["Budget per Student"], bins, labels=labels, include_lowest=True)

In [43]:
budget_summary_df=school_summary_df.groupby("Budget Summary").mean()


Unnamed: 0_level_0,Total Students,Total School Budget,Budget per Student,Average Math Score,Average Reading Score,Percent Passing Math,Percent Passing Reading,Overall Percent Passing
Budget Summary,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Less than $585,1592.0,924604.2,581.0,83.455399,83.933814,93.460096,0.966109,0.903695
$585-$630,2291.75,1421222.0,615.5,81.899826,83.155286,87.133538,0.927182,0.814186
$630-$645,2830.5,1809705.0,639.5,78.518855,81.624473,73.484209,0.843918,0.628577
$645-$680,4104.333333,2675768.0,652.333333,76.99721,81.027843,66.164813,0.81134,0.535269


In [44]:
budget_summary_df = budget_summary_df.drop('Total Students',1)
budget_summary_df = budget_summary_df.drop('Total School Budget',1)
budget_summary_df = budget_summary_df.drop('Budget per Student',1)

In [45]:
budget_summary_df["Percent Passing Math"] = budget_summary_df["Percent Passing Math"].map("{:.2%}".format)

In [46]:
budget_summary_df["Percent Passing Reading"] = budget_summary_df["Percent Passing Reading"].map("{:.2%}".format)

In [47]:
budget_summary_df["Overall Percent Passing"] = budget_summary_df["Overall Percent Passing"].map("{:.2%}".format)

In [48]:
budget_summary_df

Unnamed: 0_level_0,Average Math Score,Average Reading Score,Percent Passing Math,Percent Passing Reading,Overall Percent Passing
Budget Summary,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Less than $585,83.455399,83.933814,9346.01%,96.61%,90.37%
$585-$630,81.899826,83.155286,8713.35%,92.72%,81.42%
$630-$645,78.518855,81.624473,7348.42%,84.39%,62.86%
$645-$680,76.99721,81.027843,6616.48%,81.13%,53.53%


## School performance sorted by School Size

In [None]:
bins = [0, 1000, 2000, 5000]
labels =['Small (Less than 1000)', 'Medium (1000-2000)', 'Large (2000-5000)']

In [None]:
school_summary_df['School Size Summary']=pd.cut(school_summary_df["Total Students"], bins, labels=labels, include_lowest=True)

In [None]:
school_size_df=school_summary_df.groupby('School Size Summary').mean()
school_size_df = school_size_df.drop('Total Students',1)
school_size_df = school_size_df.drop('Total School Budget',1)
school_size_df = school_size_df.drop('Budget per Student',1)
school_size_df

## School Performance sorted by School Type

In [None]:
school_type_df=school_summary_df.groupby('School Type').mean()
school_type_df = school_type_df.drop('Total Students',1)
school_type_df = school_type_df.drop('Total School Budget',1)
school_type_df = school_type_df.drop('Budget per Student',1)
school_type_df