In [None]:
# import library and load data
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
%matplotlib inline

In [None]:
data = pd.read_csv("DMV_Written_Tests.csv")

In [None]:
data.head()

In [None]:
data.info()

In [None]:
scores = data[["DMV_Test_1", "DMV_Test_2"]].values
results = data["Results"].values

In [None]:
# visualize the data
passed = (results == 1).reshape(100, 1)
failed = (results == 0).reshape(100, 1)
scp = sns.scatterplot(x = scores[passed[:, 0], 0],
                      y = scores[passed[:, 0], 1],
                     markers = "^",
                      color = "green"
                     )
sns.scatterplot(x = scores[failed[:, 0], 0],
                      y = scores[failed[:, 0], 1],
                     markers = "*",
                      color = "red"
                     )

scp.legend(["passed","failed"])
scp.set(xlabel = "test1", ylabel = "test2")

In [None]:
# define the logistic sigmoid function
def logistic_sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [None]:
# compute cost and gradient
def cost_gradient(x, y, theta):
    m = len(y)
    y_pred = logistic_sigmoid(np.dot(x, theta))
    cost = (- 1 / m) * (sum(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred)))
    gradient = (1 / m) * np.dot(x.transpose(),(y_pred - y))
    return cost, gradient

In [None]:
# initialize cost and gradient(theta = 0)
mean_scores = np.mean(scores, axis = 0)
std_scores = np.std(scores, axis = 0)
scores = (scores - mean_scores) / std_scores
#  scores row and column 
rows = scores.shape[0]
cols = scores.shape[1]

X = np.append(np.ones((rows, 1)), scores, axis = 1)
y = results.reshape(rows, 1)
theta_init = np.zeros((cols + 1, 1))

cost, gradient = cost_gradient(X, y, theta_init)
print("initial cost :", cost, "initial gradient",gradient)

In [None]:
# gradient descent
def gradient_descent(x, y, theta, alpha, iterations):
    costs = []
    for i in range(iterations):
        cost, gradient = cost_gradient(x, y, theta)
        costs.append(cost)
        theta -= alpha * gradient
        
    return costs, theta
    
        

In [None]:
costs, theta = gradient_descent(X, y, theta_init, 1, 200)

In [None]:
print("theta after gradient descent : ", theta, "cost with current theta : ", costs[-1])

In [None]:
# plot the convergence of J(theta)
plt.plot(costs)
plt.xlabel("iterations")
plt.ylabel("$J(\Theta)$")

In [None]:
# plot the decision boundary
new_scp = sns.scatterplot(x = X[passed[:,0],1],
                         y = X[passed[:,0], 2],
                         markers = "^",
                         color = "green")
sns.scatterplot(x = X[failed[:, 0], 1],
               y = X[failed[:,0],2],
                markers = "*",
                color = "red"
               )

new_scp.legend(["passed", "failed"])
new_scp.set(xlabel = "test1", ylabel = "test2")
X_boundary = np.array([np.min(X[:,1]), np.max(X[:,1])])
y_boundary = -(theta[0] + theta[1] * X_boundary) / theta[2]
sns.lineplot(x = X_boundary, y = y_boundary, color = "blue")

In [None]:
# prediction using the optimized theta 
def predict(theta, x):
    result = logistic_sigmoid(np.dot(x, theta))
    return result > 0.5

In [None]:
# calculate accuracy with training data
p = predict(theta, X)
accuracy = sum(p == y)
print("accuracy is :", accuracy[0],"%")

In [None]:
# predict for the test data
test = np.array([50, 79])
test = (test - mean_scores)/std_scores
test = np.append(np.ones(1), test, axis = 0)
p_test = logistic_sigmoid(np.dot(test, theta))
print("the probability of passing the test is " ,np.round(p_test[0],2))