# PyCitySchools

### Adding the Pandas dependency

In [25]:
import pandas as pd
import os

school_data_to_load = os.path.join("Resources", "schools_complete.csv")
student_data_to_load = os.path.join("Resources", "students_complete.csv")

school_data_df = pd.read_csv(school_data_to_load)
student_data_df = pd.read_csv(student_data_to_load)

### Add each prefix and suffix to remove to a list. Iterate through the words in the "prefixes_suffixes" list and replace them with an empty space, "".

In [26]:
prefixes_suffixes = ["Dr. ", "Mr. ","Ms. ", "Mrs. ", "Miss ", " MD", " DDS", " DVM", " PhD"]

for word in prefixes_suffixes:
    student_data_df["student_name"] = student_data_df["student_name"].str.replace(word,"")

### Counting and declaring variables

In [55]:
school_data_complete_df = pd.merge(student_data_df, school_data_df, on=["school_name", "school_name"])

student_count = school_data_complete_df["Student ID"].count()
school_count = len(school_data_complete_df["school_name"].unique())
total_budget = school_data_complete_df["budget"].unique().sum()

### Creating function for calculating percentages

In [65]:
def percentage_calc(count, total_count):
    return count/float(total_count)* 100

### Determining Average and count student passing grades

In [56]:
average_reading_score = school_data_complete_df["reading_score"].mean()
average_math_score = school_data_complete_df["math_score"].mean()

passing_reading = school_data_complete_df[school_data_complete_df["reading_score"] >= 70]
passing_math = school_data_complete_df[school_data_complete_df["math_score"] >= 70]

passing_reading_count = passing_reading["student_name"].count()
passing_math_count = passing_math["student_name"].count()

passing_reading_percentage = percentage_calc(passing_reading_count, student_count)
passing_math_percentage = percentage_calc(passing_math_count, student_count)

passing_both = school_data_complete_df[(school_data_complete_df["reading_score"] >= 70) & (school_data_complete_df["math_score"] >= 70)]
passing_both_count = passing_both["student_name"].count()
passing_both_percentage = percentage_calc(passing_both_count, student_count)

### District Summary DataFrame / Changing formate for Budget, Average Scores, and Score Percentages

In [83]:
district_summary_df = pd.DataFrame([{"Total Schools": school_count, "Total Students": student_count, "Total Budget": total_budget, "Average Reading Score": average_reading_score, "Average Math Score": average_math_score, "% Passing Reading": passing_reading_percentage, "% Passing Math": passing_math_percentage, "% Overall Passing": passing_both_percentage}])

district_summary_df["Total Students"] = district_summary_df["Total Students"].map("{:,}".format)
district_summary_df["Total Budget"] = district_summary_df["Total Budget"].map("${:,.2f}".format)
district_summary_df["Average Reading Score"] = district_summary_df["Average Reading Score"].map("{:.1f}".format)
district_summary_df["Average Math Score"] = district_summary_df["Average Math Score"].map("{:.1f}".format)
district_summary_df["% Passing Reading"] = district_summary_df["% Passing Reading"].map("{:.2f}%".format)
district_summary_df["% Passing Math"] = district_summary_df["% Passing Math"].map("{:.2f}%".format)
district_summary_df["% Overall Passing"] = district_summary_df["% Overall Passing"].map("{:.2f}%".format)

new_column_order = ["Total Schools", "Total Students", "Total Budget","Average Math Score", "Average Reading Score", "% Passing Math", "% Passing Reading", "% Overall Passing"]
district_summary_df = district_summary_df[new_column_order]
district_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",79.0,81.9,74.98%,85.81%,65.17%
