In [1]:
import numpy as np
import pandas as pd 
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, precision_score, recall_score, accuracy_score
from sklearn.model_selection import train_test_split
np.random.seed(42)

df = pd.read_csv("admissions.csv")
df.head(20)

Unnamed: 0,admit,gre,gpa,prestige
0,0,380,3.61,3
1,1,660,3.67,3
2,1,800,4.0,1
3,1,640,3.19,4
4,0,520,2.93,4
5,1,760,3.0,2
6,1,560,2.98,1
7,0,400,3.08,2
8,1,540,3.39,3
9,0,700,3.92,2


In [2]:
# Change prestige to dummy variable columns that are added to df:
df[["prest_1","prest_2","prest_3","prest_4"]] = pd.get_dummies(df["prestige"])

In [3]:
df.head(6)

Unnamed: 0,admit,gre,gpa,prestige,prest_1,prest_2,prest_3,prest_4
0,0,380,3.61,3,0,0,1,0
1,1,660,3.67,3,0,0,1,0
2,1,800,4.0,1,1,0,0,0
3,1,640,3.19,4,0,0,0,1
4,0,520,2.93,4,0,0,0,1
5,1,760,3.0,2,0,1,0,0


In [4]:
# Divide my data into training and test data. 
X = df.drop(["admit", "prestige", "prest_1"], axis = 1)
y = df['admit']

In [5]:
# Create my test as 20% of the data, and use a random state of 0. 
# https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)

In [6]:
# Now use sklearn's Logistic Regression to fit model using gre, gpa, and 3 of pretige dummy variables.
# https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html
# Fit the logistic regression model without changing any of the hyperparameters. 
# Steps as follows:
# 1. Instantiate
# 2. Fit (on train)
# 3. Predict (on test)
# 4. Score (compare predict to test)
log_mod = LogisticRegression()
log_mod.fit(X_train, y_train)
preds = log_mod.predict(X_test)



In [7]:
# Create a confusion matrix. 
confusion_matrix(y_test, preds)

array([[56,  0],
       [22,  2]])

In [9]:
# Introduce to additional metrics: precision, recall, accuracy.
# based on the cofusion_matrix above to compute these metrics.
print (precision_score(y_test, preds))
print (recall_score(y_test, preds))
print (accuracy_score(y_test, preds))

1.0
0.08333333333333333
0.725
