In [41]:
# Add Pandas and os packages
import pandas as pd
import os

# Save paths to load data files
school_data_load = os.path.join(".","Resources","schools_complete.csv")
student_data_load = os.path.join(".","Resources","students_complete.csv")

# Read school data file and store it in a data frame
school_data_df = pd.read_csv(school_data_load)

# Read the student data file and store it in a Pandas DataFrame.
student_data_df = pd.read_csv(student_data_load)

# Declare a list of prefixes and suffixes
prefixes_suffixes = ['Dr. ', 'Miss ', 'Mr. ', 'Mrs. ', 'Ms. ', 
                     ' DVM', ' PhD', ' MD', ' DDS']

# Remove prefixes and suffixes from student data
for part_of_name in prefixes_suffixes:
    student_data_df["student_name"] = student_data_df["student_name"].str.replace(part_of_name, "")

# Merge student and school data frames
school_data_complete_df = pd.merge(student_data_df,school_data_df,on = ["school_name","school_name"])

# Get the total number of students and schools
student_count = school_data_complete_df["student_name"].count()
school_count = len(school_data_complete_df["school_name"].unique())

# Calculate the total budget
total_budget = school_data_df["budget"].sum()

# Calculate the average reading and math score
average_reading_score = school_data_complete_df["reading_score"].mean()

average_math_score = school_data_complete_df["math_score"].mean() 

# store the students with passing math / reading scores into separate data frames
passing_math_df = school_data_complete_df[school_data_complete_df["math_score"] >= 70]
passing_reading_df = school_data_complete_df[school_data_complete_df["reading_score"] >= 70]

# Count the number of students with passing math / reading scores and store them into variables
passing_math_count = passing_math_df["student_name"].count()
passing_reading_count = passing_reading_df["student_name"].count()

# Calculate the percentage of students with passing math / reading scores and store them into variables
passing_math_percent = (passing_math_count / float(student_count)) * 100
passing_reading_percent = (passing_reading_count / float(student_count)) * 100

# Calculate the percentage of students that are passing both math and reading
passing_math_reading_df = school_data_complete_df[(school_data_complete_df["math_score"] >= 70) & (school_data_complete_df["reading_score"] >= 70)]
passing_both_count = passing_math_reading_df["student_name"].count()
passing_both_percent = (passing_both_count/float(student_count))*100

# Adding a list of values with keys to create a new DataFrame.
district_summary_df = pd.DataFrame(
          [{"Total Schools": school_count,
          "Total Students": student_count,
          "Total Budget": total_budget,
          "Average Math Score": average_math_score,
          "Average Reading Score": average_reading_score,
          "% Passing Math": passing_math_percent,
          "% Passing Reading": passing_reading_percent,
          "% Overall Passing": passing_both_percent}])

# Format the "Total Students" to have the comma for a thousands separator.
district_summary_df["Total Students"] = district_summary_df["Total Students"].map("{:,}".format)

# Format "Total Budget" to have the comma for a thousands separator, a decimal separator, and a "$".
district_summary_df["Total Budget"] = district_summary_df["Total Budget"].map("${:,.2f}".format)
district_summary_df["Total Budget"]

# Format the remaining columns.
district_summary_df["Average Math Score"] = district_summary_df["Average Math Score"].map("{:.1f}".format)
district_summary_df["Average Reading Score"] = district_summary_df["Average Reading Score"].map("{:.1f}".format)
district_summary_df["% Passing Math"] = district_summary_df["% Passing Math"].map("{:.0f}%".format)
district_summary_df["% Passing Reading"] = district_summary_df["% Passing Reading"].map("{:.0f}%".format)
district_summary_df["% Overall Passing"] = district_summary_df["% Overall Passing"].map("{:.0f}%".format)

# district_summary_short = ['Total Students','Total Budget','% Overall Passing']

district_summary_df


# Determine if there are any missing values in the school data.
# school_data_df.notnull().sum()

# Determine if there are any missing values in the student data.
# student_data_df.isnull().sum()

Unnamed: 0,Total Students,Total Budget,% Overall Passing
0,39170,"$24,649,428.00",65%
