# Student Scores: Predicting Writing Scores

# Load Required Python Packages

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from pprint import pprint

%matplotlib inline



**Locate and print current working directory.**

In [2]:
path = os.getcwd()
parent = os.path.dirname(path)
figures_folder = parent+'/figures'
print(path)
print(parent)
print(figures_folder)

/Users/tiffanyflor/Dropbox/MyProjects/Student Scores/notebooks
/Users/tiffanyflor/Dropbox/MyProjects/Student Scores
/Users/tiffanyflor/Dropbox/MyProjects/Student Scores/figures


**Print contents of data/interim.**

In [3]:
os.listdir(parent+'/data/interim')

['total_student_scores_without_dummies.csv',
 'cleaned_student_scores.csv',
 'student_scores_no_outliers.csv',
 'total_student_scores_dummies.csv',
 'prediction_dataframe.csv',
 '.ipynb_checkpoints']

In [4]:
df = pd.read_csv(parent+'/data/interim/prediction_dataframe.csv',index_col=0)

In [5]:
df.head()

Unnamed: 0,education,gender_male,lunch_standard,writing score,Completed Prep Course,Recommend Prep Course,Prediction
0,1,0,0,62,0,1,1
1,1,1,1,74,0,0,1
2,1,1,1,75,1,0,0
3,0,1,0,46,1,1,1
4,0,0,1,60,0,1,0


# Examine Predictions

## What percentage of students who failed the exam were correctly identified?
1047 students failed the writing exam.<br>
770 students of the 1047 who failed the exam were correctly identified.<br>
**This calculates to 73.5% of students failed the exam were correctly identified as students who might fail.**<br>
**92.7% of students identified as needing the exam prep failed or scored within 10 points of failing. This provides extra opportunity for students who are teetering on the edge of failing the exam.**

In [6]:
# Get count of total students who failed the exam
df.loc[(df['Recommend Prep Course']==1)].count()

education                1047
gender_male              1047
lunch_standard           1047
writing score            1047
Completed Prep Course    1047
Recommend Prep Course    1047
Prediction               1047
dtype: int64

In [7]:
# Get count of number of students who need to take the prep course (even if they have already)
df.loc[(df['Recommend Prep Course']==1)&(df['Prediction']==1)].count()

education                770
gender_male              770
lunch_standard           770
writing score            770
Completed Prep Course    770
Recommend Prep Course    770
Prediction               770
dtype: int64

In [8]:
# Get count of number of students who failed and were cutting it close by 10 points (even if they have already)
df.loc[(df['writing score']<79.5)&(df['Prediction']==1)].count()

education                971
gender_male              971
lunch_standard           971
writing score            971
Completed Prep Course    971
Recommend Prep Course    971
Prediction               971
dtype: int64

## What percentage of students who failed the exam and have not completed the prep course before are correctly identified?
808 students who have not taken the prep course before have failed the exam.<br>
653 of the 808 students who have not taken the prep course and have failed the exam were correctly identified.<br>
**80.8% of students who have failed and not taken the prep course were accurately identified as students who should be recommended to take the prep course.**

In [9]:
# Get count of number of students who need to take the prep course and have not already
df.loc[(df['Recommend Prep Course']==1)&(df['Completed Prep Course']==0)].count()

education                808
gender_male              808
lunch_standard           808
writing score            808
Completed Prep Course    808
Recommend Prep Course    808
Prediction               808
dtype: int64

In [10]:
df.loc[(df['Prediction']==1)&(df['Recommend Prep Course']==1)&(df['Completed Prep Course']==0)].count()

education                653
gender_male              653
lunch_standard           653
writing score            653
Completed Prep Course    653
Recommend Prep Course    653
Prediction               653
dtype: int64

## What percentage of students who were not identified have not taken the exam prep course before?
**14.8% of students who failed the exam have not taken the exam prep course and were missed by the model's prediction.**<br>
**11.7% of students who failed the exam have taken the exam prep course and were missed by the model's prediction.**

In [11]:
# Get count of number of students who need to take the prep course, have not already, and were not identified
df.loc[(df['Recommend Prep Course']==1)&(df['Completed Prep Course']==0)&(df['Prediction']==0)].count()

education                155
gender_male              155
lunch_standard           155
writing score            155
Completed Prep Course    155
Recommend Prep Course    155
Prediction               155
dtype: int64

In [12]:
# Get count of number of students who need to take the prep course and have not already
df.loc[(df['Recommend Prep Course']==1)&(df['Completed Prep Course']==1)&(df['Prediction']==0)].count()

education                122
gender_male              122
lunch_standard           122
writing score            122
Completed Prep Course    122
Recommend Prep Course    122
Prediction               122
dtype: int64