In [None]:
# import necessary modules
import pandas as pd
import os

In [None]:
# define the source data paths
students_csvPath = os.path.join("Resources", "students_complete.csv")
schools_csvPath = os.path.join("Resources", "schools_complete.csv")

In [None]:
# read the data into their initial dataframs
source_students_df = pd.read_csv(students_csvPath)
source_schools_df = pd.read_csv(schools_csvPath)

In [None]:
# rename columns to use universal formatting
source_students_df = source_students_df.rename(columns={
                                    "Student ID": "Student ID",
                                    "student_name": "Student Name",
                                    "gender": "Gender",
                                    "grade": "Grade",
                                    "school_name", "School Name",
                                    "reading_score", "Reading Score",
                                    "math_score", "Math Score"})

source_schools_df = source_schools_df.rename(columns={
                                    "School ID": "School ID",
                                    "school_name": "School Name",
                                    "type": "School Type",
                                    "size": "Total Students"
                                    "budget": "Total School Budget"
})

In [None]:
# define threshold values
passingThreshold = 70

In [None]:
# calculate the district summary metrics
totalSchools = len(schools_df["School ID"])
totalStudents = len(students_df["student_name"])
totalBudget = schools_df["budget"].sum()
avgMathScore = students_df["math_score"].mean()
avgReadingScore = students_df["reading_score"].mean()
passingMath = sum(students_df.math_score >= passingThreshold) / totalStudents * 100
passingReading = sum(students_df.reading_score >= passingThreshold) / totalStudents * 100
passingBoth = sum(
    (students_df.math_score >= passingThreshold) & 
    (students_df.reading_score >= passingThreshold)
    ) / totalStudents * 100

In [None]:
# create the district summary DataFrame
district_df = pd.DataFrame({
    "Total Schools": [totalSchools],
    "Total Students": f"{totalStudents:,.0f}",
    "Total Budget": f"${totalBudget:,.2f}",
    "Avg Math Score": f"{avgMathScore:,.2f}",
    "Avg Reading Score": f"{avgReadingScore:,.2f}",
    "Passing Math": f"{passingMath:,.2f}%",
    "Passing Reading": f"{passingReading:,.2f}%",
    "Passing": f"{passingBoth:,.2f}%"})
district_df.style.hide_index()

In [None]:
# create the school summary DataFrame
studentsbySchool_df = students_df.groupby("school_name").sum()
schools_df = pd.merge(schools_df, studentsbySchool_df, on="school_name")
schools_df.head()






# schools_df = schools_df.rename(columns={
#                         "type": "School Type",
#                         "size": "Total Students",
#                         "budget": "Total School Budget",
#                         "school_name": "School"})
#schools_df["Per Student Budget"] = schools_df["Total School Budget"] / schools_df["Total Students"]
#schools_df["Average Math Score"] = students_df.groupby("school_name").sum()
#students_df.groupby("school_name").sum()


#merged_df.groupby('school_name').sum()

#school_df = pd.merge(schools_df, students_df, on="school_name", how="left").sort_values("school_name", ascending=True).groupby(["School ID", "school_name"])

# I could not find an elegant way to combine merge/sort/groupby in order to attain these results so I am resorting to a
# brute force method

# get a list of the school names
# schools_List = schools_df["school_name"]
# schoolTypes_List = []
# totalStudents_List = []
# totalBudgets_List = []
# perStudentBudget_List = []
# avgMathScore_List = []
# avgReadingScore_List = []
# percentMathScore_List = []
# percentReadingScore_List = []
# percentOveral_List = []

# # iterate through each of the school names
# for i in range(len(schools_List)):
#     schoolTypes_List.append(schools_df["school_name"][i])
#     totalStudents_List.append(sum(students_df["school_name"] == schools_List[i]))
#     totalBudgets_List.append(sum(schools_df["budget"] == schools_List[i]))
    