# School District Analysis Deliverables 


In [1]:
# Tables presenting each of the following metrics:
# Top 5 and bottom 5 performing schools, based on the overall passing rate
# The average math score received by students in each grade level at each school
# The average reading score received by students in each grade level at each school
# School performance based on the budget per student
# School performance based on the school size 
# School performance based on the type of school

In [2]:
#Import dependencies 

import pandas as pd
import os

In [3]:
# Files to load
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

In [4]:
# Read the school data file and store it in a Pandas DataFrame.
school_data_df = pd.read_csv(school_data_to_load)
school_data_df.head()

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [5]:
# Read the student data file and store it in a Pandas DataFrame.
student_data_df = pd.read_csv(student_data_to_load)
student_data_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [6]:
files = os.listdir()
files

['.git',
 '.ipynb_checkpoints',
 'cleaning_data.ipynb',
 'cleaning_student_names.ipynb',
 'Jupyter_Practice.ipynb',
 'PyCitySchools.ipynb',
 'README.md',
 'Resources']

In [7]:
cwd = os.getcwd()
cwd

'C:\\Users\\mdele\\OneDrive\\Desktop\\Bootcamp Clean\\Module 4\\School_District_Analysis_Clean'

In [8]:
for file in files:
    print(os.path.join(cwd,file))

C:\Users\mdele\OneDrive\Desktop\Bootcamp Clean\Module 4\School_District_Analysis_Clean\.git
C:\Users\mdele\OneDrive\Desktop\Bootcamp Clean\Module 4\School_District_Analysis_Clean\.ipynb_checkpoints
C:\Users\mdele\OneDrive\Desktop\Bootcamp Clean\Module 4\School_District_Analysis_Clean\cleaning_data.ipynb
C:\Users\mdele\OneDrive\Desktop\Bootcamp Clean\Module 4\School_District_Analysis_Clean\cleaning_student_names.ipynb
C:\Users\mdele\OneDrive\Desktop\Bootcamp Clean\Module 4\School_District_Analysis_Clean\Jupyter_Practice.ipynb
C:\Users\mdele\OneDrive\Desktop\Bootcamp Clean\Module 4\School_District_Analysis_Clean\PyCitySchools.ipynb
C:\Users\mdele\OneDrive\Desktop\Bootcamp Clean\Module 4\School_District_Analysis_Clean\README.md
C:\Users\mdele\OneDrive\Desktop\Bootcamp Clean\Module 4\School_District_Analysis_Clean\Resources


In [9]:
File_Path_list = []
for file in files:
    File_Path_list.append(os.path.join(cwd,file))

In [10]:
File_Path_list

['C:\\Users\\mdele\\OneDrive\\Desktop\\Bootcamp Clean\\Module 4\\School_District_Analysis_Clean\\.git',
 'C:\\Users\\mdele\\OneDrive\\Desktop\\Bootcamp Clean\\Module 4\\School_District_Analysis_Clean\\.ipynb_checkpoints',
 'C:\\Users\\mdele\\OneDrive\\Desktop\\Bootcamp Clean\\Module 4\\School_District_Analysis_Clean\\cleaning_data.ipynb',
 'C:\\Users\\mdele\\OneDrive\\Desktop\\Bootcamp Clean\\Module 4\\School_District_Analysis_Clean\\cleaning_student_names.ipynb',
 'C:\\Users\\mdele\\OneDrive\\Desktop\\Bootcamp Clean\\Module 4\\School_District_Analysis_Clean\\Jupyter_Practice.ipynb',
 'C:\\Users\\mdele\\OneDrive\\Desktop\\Bootcamp Clean\\Module 4\\School_District_Analysis_Clean\\PyCitySchools.ipynb',
 'C:\\Users\\mdele\\OneDrive\\Desktop\\Bootcamp Clean\\Module 4\\School_District_Analysis_Clean\\README.md',
 'C:\\Users\\mdele\\OneDrive\\Desktop\\Bootcamp Clean\\Module 4\\School_District_Analysis_Clean\\Resources']

In [11]:
File_Path_list_Df = pd.DataFrame(File_Path_list)
File_Path_list_Df

Unnamed: 0,0
0,C:\Users\mdele\OneDrive\Desktop\Bootcamp Clean...
1,C:\Users\mdele\OneDrive\Desktop\Bootcamp Clean...
2,C:\Users\mdele\OneDrive\Desktop\Bootcamp Clean...
3,C:\Users\mdele\OneDrive\Desktop\Bootcamp Clean...
4,C:\Users\mdele\OneDrive\Desktop\Bootcamp Clean...
5,C:\Users\mdele\OneDrive\Desktop\Bootcamp Clean...
6,C:\Users\mdele\OneDrive\Desktop\Bootcamp Clean...
7,C:\Users\mdele\OneDrive\Desktop\Bootcamp Clean...


In [12]:
# Determine if there are any missing values in the student data.
student_data_df.isnull().sum()

Student ID       0
student_name     0
gender           0
grade            0
school_name      0
reading_score    0
math_score       0
dtype: int64

In [13]:
# Determine if there are not any missing values in the student data.
student_data_df.notnull().sum()


Student ID       39170
student_name     39170
gender           39170
grade            39170
school_name      39170
reading_score    39170
math_score       39170
dtype: int64

In [14]:
student_data_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39170 entries, 0 to 39169
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Student ID     39170 non-null  int64 
 1   student_name   39170 non-null  object
 2   gender         39170 non-null  object
 3   grade          39170 non-null  object
 4   school_name    39170 non-null  object
 5   reading_score  39170 non-null  int64 
 6   math_score     39170 non-null  int64 
dtypes: int64(3), object(4)
memory usage: 2.1+ MB


In [15]:
prefixes_suffixes = ["Dr. ", "Mr. ","Ms. ", "Mrs. ", "Miss ", " MD", " DDS", " DVM", " PhD"]

In [16]:
for i in prefixes_suffixes:
    student_data_df["student_name"] = student_data_df["student_name"].str.replace(i,"")
    
student_data_df.head(10)

  student_data_df["student_name"] = student_data_df["student_name"].str.replace(i,"")


Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84
5,5,Bryan Miranda,M,9th,Huang High School,94,94
6,6,Sheena Carter,F,11th,Huang High School,82,80
7,7,Nicole Baker,F,12th,Huang High School,96,69
8,8,Michael Roth,M,10th,Huang High School,95,87
9,9,Matthew Greene,M,10th,Huang High School,96,84
