# pycity Schools Analysis

Summary Analysis:

In this pycityschools analysis, we have created different DataFrames (named: school_summary, school_spending_df, size_summary and type_summary) to analyse the overall performance of different schools in the district i.e. average scores along with percentage of passing in math or/and reading based on spending, size and type of the schools.



Conclusions:

1. After analysing the spending_summary and size_summary DataFrames, we may say that overall performance is decreasing as per student budget or school sizes are increasing.

2. By looking at the type_summary DataFrame we may conclude that district schools have much less overall passing rate than Charter schools.It is also matching with the fact that by overall performance criteria, the top five schools are all charter whereas bottom five schools are all district schools.

In [None]:
# Dependencies and Setup
import pandas as pd


In [None]:
# Reading School and Student Data File and store into Pandas DataFrames
school_data_df = pd.read_csv("./Resources/schools_complete.csv")
student_data_df = pd.read_csv("./Resources/students_complete.csv")

In [None]:
# printing the school DataFrame
school_data_df.head()

In [None]:
# printing the student DataFrame
student_data_df.head()

In [None]:
# Combining all school data into a single dataset named all_data.

all_data_df = pd.merge(school_data_df,student_data_df,how="left",on=["school_name"])
all_data_df.head()

# District Summary 

In [None]:
# Calculating the total number of unique schools

unique_schools_total = len(all_data_df["school_name"].unique())
unique_schools_total

In [None]:
# Calculate the total number of students

students_total = all_data_df["student_name"].count()
students_total


In [None]:
# Calculating the total budget

total_budget = school_data_df["budget"].sum()
total_budget


In [None]:
# Calculating the Average math score
avg_math_score = all_data_df["math_score"].mean()
avg_math_score


In [None]:
# Calculating the Average reading score
avg_reading_score = all_data_df["reading_score"].mean()
avg_reading_score

In [None]:
# Using the (following) given code to calculate the percentage of students who passed math (math scores greather than or equal to 70)


passing_math_count = all_data_df[(all_data_df["math_score"] >= 70)].count()["student_name"]
passing_math_percentage = (passing_math_count / float(students_total)) * 100
passing_math_percentage

In [None]:
#Calculating the percentage of students who passed reading (reading scores greather than or equal to 70)

passing_reading_count = all_data_df[(all_data_df["reading_score"] >= 70)].count()["student_name"]
passing_reading_percentage = (passing_reading_count / float(students_total)) * 100
passing_reading_percentage



In [None]:
# Using the (following) given code to calculate the percentage of students that passed math and reading

passing_math_reading_count = all_data_df[
    (all_data_df["math_score"] >= 70) & (all_data_df["reading_score"] >= 70)].count()["student_name"]
overall_passing_rate = (passing_math_reading_count) /  float(students_total) * 100
overall_passing_rate






In [None]:
# Creating a high-level snapshot of the district's key metrics in a DataFrame named district_summary

district_summary =pd.DataFrame(
    {  
        "Total Schools":[unique_schools_total],
        "Total Students":[students_total],
        "Total Budget":[total_budget],
        "Average Math Score": [avg_math_score],
        "Average Reading Score":[avg_reading_score],
        "% Passing Math":[passing_math_percentage],
        "% Passing Reading":[passing_reading_percentage],
        "% Overall Passing" :[overall_passing_rate]         
    }  
)

                                      
#Formatting the column values for district summary

district_summary["Total students"] = district_summary["Total Students"].map("{:,}".format)
district_summary["Total Budget"] = district_summary["Total Budget"].map("${:,.2f}".format)

# Displaying the DataFrame (district_summary)

district_summary


# School Summary

In [None]:
# Using the code provided to select all of the school types

school_type = school_data_df.set_index(["school_name"])["type"]
#school_type

In [None]:
# Calculating the total student count per school
students_perschool = all_data_df["school_name"].value_counts()

#students_perschool

In [None]:
# Calculating the total school budget 
budget_perschool = all_data_df.groupby(["school_name"])["budget"].mean()

#budget_perschool 

In [None]:
# Calculating per capita spending per school
capita_perschool = budget_perschool / students_perschool

#capita_perschool

In [None]:
# Calculating the average test scores per school for Math

avg_math_score_perschool = all_data_df.groupby(["school_name"])["math_score"].mean()
#avg_math_score_perschool

In [None]:
# Calculating the average test scores per school for Reading

avg_reading_score_perschool = all_data_df.groupby(["school_name"])["reading_score"].mean()
#avg_reading_score_perschool

In [None]:
# Calculating the number of students per school with math scores of 70 or higher

passing_math_students_perschool = all_data_df[all_data_df["math_score"]>=70]

#passing_math_students_perschool

In [None]:
# Calculating the number of students per school with reading scores of 70 or higher

passing_reading_students_perschool = all_data_df[all_data_df["reading_score"]>=70]
#passing_reading_students_perschool

In [None]:
#Calculating the number of students per school that passed both math and reading with scores of 70 or higher
passing_math_and_reading_students = all_data_df[(all_data_df["reading_score"] >= 70) & (all_data_df["math_score"] >= 70)]


#passing_math_and_reading_students_perschool 

In [None]:
# Calculating the passing rates


passing_percent_math_perschool = (passing_math_students_perschool.groupby(["school_name"]).count()["student_name"]/\
                                students_perschool)*100
passing_percent_reading_perschool = (passing_reading_students_perschool.groupby(["school_name"]).count()["student_name"]/\
                                students_perschool)*100
overall_passing_rate = (passing_math_and_reading_students.groupby(["school_name"]).count()["student_name"]/\
                                students_perschool)*100



In [None]:
# Creating a DataFrame called `per_school_summary` with columns for the calculations above.

per_school_summary = pd.DataFrame(
    {
    "School Type":school_type,
    "Total Students":students_perschool,
    "Total School Budget":budget_perschool ,
    "Per Student Budget":capita_perschool,
    "Average Math Score":avg_math_score_perschool,
    "Average Reading Score":avg_reading_score_perschool,
    "% Passing Math":passing_percent_math_perschool,
    "% Passing Reading":passing_percent_reading_perschool,
    "% Overall Passing":overall_passing_rate   
})

# Formatting the column values 
per_school_summary["Total School Budget"] = per_school_summary["Total School Budget"].map("${:,.2f}".format)
per_school_summary["Per Student Budget"] = per_school_summary["Per Student Budget"].map("${:,.2f}".format)

#Displaying the DataFrame
per_school_summary




# Highest-Performing Schools (by % Overall Passing)

In [None]:
# Sorting the schools by `% Overall Passing` in descending order and displaying the top 5 rows.
top5_performing_schools = per_school_summary.sort_values(["% Overall Passing"],ascending = False)
top5_performing_schools.head(5)

# Bottom Performing Schools (By % Overall Passing)

In [None]:
# Sorting the schools by `% Overall Passing` in ascending order and displaying the top 5 rows.
bottom5_performing_schools = per_school_summary.sort_values(["% Overall Passing"],ascending = True)
bottom5_performing_schools.head(5)

# Math Scores by Grade

In [None]:
# Using the code provided to separate the data by grade

Ninth_grade_students = all_data_df[all_data_df["grade"]=="9th"]
Tenth_grade_students = all_data_df[all_data_df["grade"]=="10th"]
Eleventh_grade_students = all_data_df[all_data_df["grade"]=="11th"]
Twelth_grade_students = all_data_df[all_data_df["grade"]=="12th"]


#Group by `school_name` and taking the mean of the `math_score` column for each.
Ninth_grade_students_avgmath_perschool = Ninth_grade_students.groupby(["school_name"])["math_score"].mean()
Tenth_grade_students_avgmath_perschool = Tenth_grade_students.groupby(["school_name"])["math_score"].mean()
Eleventh_grade_students_avgmath_perschool = Eleventh_grade_students.groupby(["school_name"])["math_score"].mean()
Twelth_grade_students_avgmath_perschool = Twelth_grade_students.groupby(["school_name"])["math_score"].mean()


# Combining each of the scores above into single DataFrame named `math_scores_by_grade`
math_scores_by_grade =pd.DataFrame({
    
    "9th":Ninth_grade_students_avgmath_perschool,
    "10th":Tenth_grade_students_avgmath_perschool,
    "11th":Eleventh_grade_students_avgmath_perschool,
    "12th":Twelth_grade_students_avgmath_perschool   
})

# Minor data wrangling
math_scores_by_grade.index.name = None


# Displaying the DataFrame
math_scores_by_grade

# Reading Score by Grade

In [None]:
# Using the code provided to separate the data by grade

Ninth_grade_students = all_data_df[all_data_df["grade"]=="9th"]
Tenth_grade_students = all_data_df[all_data_df["grade"]=="10th"]
Eleventh_grade_students = all_data_df[all_data_df["grade"]=="11th"]
Twelth_grade_students = all_data_df[all_data_df["grade"]=="12th"]


# Group by `school_name` and take the mean of the the `reading_score` column for each.
Ninth_grade_students_avgreading_perschool = Ninth_grade_students.groupby(["school_name"])["reading_score"].mean()
Tenth_grade_students_avgreading_perschool = Tenth_grade_students.groupby(["school_name"])["reading_score"].mean()
Eleventh_grade_students_avgreading_perschool = Eleventh_grade_students.groupby(["school_name"])["reading_score"].mean()
Twelth_grade_students_avgreading_perschool = Twelth_grade_students.groupby(["school_name"])["reading_score"].mean()


# Combining each of the scores above into single DataFrame named `reading_scores_by_grade`
reading_scores_by_grade =pd.DataFrame({
    
    "9th":Ninth_grade_students_avgreading_perschool,
    "10th":Tenth_grade_students_avgreading_perschool,
    "11th":Eleventh_grade_students_avgreading_perschool,
    "12th":Twelth_grade_students_avgreading_perschool    
})


# Minor data wrangling
reading_scores_by_grade = reading_scores_by_grade[["9th", "10th", "11th", "12th"]]
reading_scores_by_grade.index.name = None


# Displaying the DataFrame
reading_scores_by_grade



# Scores by School Spending

In [None]:
# Establishing the bins 
spending_bins = [0, 585, 630, 645, 680]

# cretating and naming the labels for the above bins 
ranges_names = ["<$585", "$585-630", "$630-645", "$645-680"]

# Creating a copy of the school summary since it has the "Per Student Budget" 
school_spending_df = per_school_summary.copy()

# Using `pd.cut` to categorize spending based on the bins.
school_spending_df["Spending Ranges (Per Student)"] = pd.cut(capita_perschool,bins = spending_bins, labels = ranges_names)
school_spending_df




In [None]:
#  Calculating averages for the desired columns. 
spending_math_scores = school_spending_df.groupby(["Spending Ranges (Per Student)"])["Average Math Score"].mean()
spending_reading_scores = school_spending_df.groupby(["Spending Ranges (Per Student)"])["Average Reading Score"].mean()
spending_passing_math = school_spending_df.groupby(["Spending Ranges (Per Student)"])["% Passing Math"].mean()
spending_passing_reading = school_spending_df.groupby(["Spending Ranges (Per Student)"])["% Passing Reading"].mean()
spending_passing_overall = school_spending_df.groupby(["Spending Ranges (Per Student)"])["% Overall Passing"].mean()




In [None]:
# Assembling the data into DataFrame named spending_summary
spending_summary =pd.DataFrame({
    "Average Math Score":spending_math_scores,
    "Average Reading Score":spending_reading_scores,
    "% Passing Math":spending_passing_math,
    "% Passing Reading":spending_passing_reading,
    "% Overall Passing":spending_passing_overall    
})

# Displaying the DataFrame
spending_summary


# Scores by School Size

In [None]:
# Establishing the bins
size_bins = [0, 1000, 2000, 5000]

# Categorizing the spending based on the bins
labels = ["Small (<1000)", "Medium (1000-2000)", "Large(2000-5000)"]

In [None]:
# Using `pd.cut` on the "Total Students" column of the `per_school_summary` DataFrame.

per_school_summary["School Size"] = pd.cut(per_school_summary["Total Students"], size_bins, labels=labels)
per_school_summary

In [None]:
# Calculating averages for the desired columns. 

size_math_scores = per_school_summary.groupby(["School Size"])["Average Math Score"].mean()
size_reading_scores = per_school_summary.groupby(["School Size"])["Average Reading Score"].mean()
size_passing_math = per_school_summary.groupby(["School Size"])["% Passing Math"].mean()
size_passing_reading = per_school_summary.groupby(["School Size"])["% Passing Reading"].mean()
size_passing_overall = per_school_summary.groupby(["School Size"])["% Overall Passing"].mean()

In [None]:
#Using the scores above and Creating the DataFrame called `size_summary` that breaks down school performance 
#based on school size (small, medium, or large) 

size_summary = pd.DataFrame({
    "Average Math Score":size_math_scores,
    "Average Reading Score":size_reading_scores,
    "% Passing Math":size_passing_math,
    "% Passing Reading":size_passing_reading,
    "% Overall Passing":size_passing_overall   
})

#Displaying the DataFrame
size_summary

# Scores by School Type

In [None]:
# Grouping the per_school_summary DataFrame by "School Type" and averaging the results.

type_avg_math_scores = per_school_summary.groupby(["School Type"])["Average Math Score"].mean()
type_avg_reading_scores = per_school_summary.groupby(["School Type"])["Average Reading Score"].mean()
type_avg_percent_passing_math_scores = per_school_summary.groupby(["School Type"])["% Passing Math"].mean()
type_avg_percent_passing_reading_scores = per_school_summary.groupby(["School Type"])["% Passing Reading"].mean()
type_avg_percent_overall_passing_scores = per_school_summary.groupby(["School Type"])["% Overall Passing"].mean()

In [None]:
# Assembling the new data by type DataFrame named `type_summary`
type_summary = pd.DataFrame({
    "Average Math Score":type_avg_math_scores,
    "Average Reading Score":type_avg_reading_scores,
    "% Passing Math":type_avg_percent_passing_math_scores,
    "% Passing Reading":type_avg_percent_passing_reading_scores,
    "% Overall Passing":type_avg_percent_overall_passing_scores    
    
})

# Displaying results
type_summary