## Problem 6

In [1]:
# Imports
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import pandas
import math
import matplotlib.pyplot as plt

from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

In [11]:
# Load data
data = pandas.read_excel('data.xls')

X1 = data.to_numpy()[:, 0].reshape(-1, 1)
X2 = data.to_numpy()[:, 1].reshape(-1, 1)
X3 = data.to_numpy()[:, 2].reshape(-1, 1)
y = data.to_numpy()[:, 3]

# For logistic regression, our "true" case is when the value of the final exam is >=160.
y = [1 if i >= 160 else 0 for i in y]

# Iterate through different X values and fit logistic predictors based off of them
for X in [X1, X2, X3]:
    model = LogisticRegression(solver="lbfgs")
    model.fit(X, y)
    y_predict = model.predict(X)
    print("Confusion matrix: ")
    print(confusion_matrix(y_predict, y))
    
    # Calculate likelihood
    proba = 0
    for p in model.predict_proba(X):
        proba += math.log(max(p)) # For each value, sum the logs of probabilities of the predicted class (class that datapoint has the most probability of being in)
    # Multiply likelihood by -2 and add dof*log(n) to get BIC. 1 degree of freedom here.
    bic = -2 * proba + 1 * math.log(len(y))
    print("BIC: ", bic)



*** No CODEPAGE record, no encoding_override: will use 'ascii'
Confusion matrix: 
[[11  1]
 [ 1 12]]
BIC:  8.758778070757323
Confusion matrix: 
[[10  2]
 [ 2 11]]
BIC:  13.87588718959119
Confusion matrix: 
[[12  0]
 [ 0 13]]
BIC:  3.6158410859756875


### Problem 6B

In [12]:
# Iterate through different X values and fit linear predictors based off of them
X1X2 = data.to_numpy()[:, 0:2]
X1X3 = data.to_numpy()[:, 0:3:2]
X2X3 = data.to_numpy()[:, 1:3]

for X in [X1X2, X1X3, X2X3]:
    model = LogisticRegression(solver='lbfgs')
    model.fit(X, y)
    y_predict = model.predict(X)
    print("Confusion matrix: ")
    print(confusion_matrix(y_predict, y))
    
    # Calculate likelihood
    proba = 0
    for p in model.predict_proba(X):
        proba += math.log(max(p)) # For each value, sum the logs of probabilities of the predicted class (class that datapoint has the most probability of being in)
    # Multiply likelihood by -2 and add dof*log(n) to get BIC. 2 degrees of freedom here.
    bic = -2 * proba + 2 * math.log(len(y))
    print("BIC: ", bic)

    

Confusion matrix: 
[[11  1]
 [ 1 12]]
BIC:  11.915751795022299
Confusion matrix: 
[[12  0]
 [ 0 13]]
BIC:  6.766577115595288
Confusion matrix: 
[[12  0]
 [ 0 13]]
BIC:  6.779364072584452


### Problem 6C

In [13]:
X = data.to_numpy()[:, :-1]
model = LogisticRegression(solver='lbfgs')
model.fit(X, y)
y_predict = model.predict(X)

print("Confusion matrix:")
print(confusion_matrix(y_predict, y))

# Calculate likelihood
proba = 0
for p in model.predict_proba(X):
    proba += math.log(max(p)) # For each value, sum the logs of probabilities of the predicted class (class that datapoint has the most probability of being in)
# Multiply likelihood by -2 and add dof*log(n) to get BIC. 3 degrees of freedom here.
bic = -2 * proba + 3 * math.log(len(y))
print("BIC: ", bic)


Confusion matrix:
[[12  0]
 [ 0 13]]
BIC:  9.979839820466438


### Problem 6D
In the case of logistic regression with the threshold for passing set at 160, all models that include X3 correctly classify all points in the data set. Based on the BIC, which is strongly affected by the degrees of freedom here, the model based ONLY on X3 is the best choice because it has the lowest BIC and correctly classifies all points in the dataset. (Other models also correctly classify all points in the dataset, but they are more complicated and have higher BICs, which makes them less desirable, so the X3 only predictor is our best choice).