In [None]:
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from IPython.display import display, Math, Latex
from sklearn.metrics import accuracy_score

#Loading
data = pd.read_csv('exams.csv') #read from dataset with columns exam_1, exam_2 and admitted - shows passed or not (0 or 1)
data.head()
#min-max normalization
ndata = data.copy() #normalized form of data set
#normalizing first two columns, third column only contains 0 and 1s

ndata['exam_1'] = (ndata['exam_1']-ndata['exam_1'].min())/(ndata['exam_1'].max()-ndata['exam_1'].min())
ndata['exam_2'] = (ndata['exam_2']-ndata['exam_2'].min())/(ndata['exam_2'].max()-ndata['exam_2'].min())
ndata['admitted'] = ndata['admitted']
ndata.head()


#Sigmoid function.

def sigmoidFunction(x):
    return 1 / (1 + np.exp(-x))
    
    
    
#cost function

def costFunction(h, y):
    return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean() 


#Gradient descent implementation from scratch.

x = ndata[['exam_1', 'exam_2']].values #here data for exam1 and exam2 is included to x
y = ndata['admitted'].values #here data for admission (0,1) included to y

#these initializations randomly chosen (can be changed)
alpha = 0.01
numOfIterations = 100000

#initializations 
x = np.concatenate((np.ones((x.shape[0], 1)), x), axis=1)
theta = np.zeros(x.shape[1])
allCosts = []

for i in range(numOfIterations):
    h = sigmoidFunction(np.dot(x, theta))
    error = h - y
    gradient = np.dot(x.T, (error)) / y.size
    theta -= alpha * gradient
    if i % 1000 == 0: print("loss:", costFunction(h, y))
    allCosts.append(costFunction(h, y))
    
    
    
plt.figure(figsize = (10,5))
plt.title('First exam score vs Second exam score.')
plt.xlabel('First exam')
plt.ylabel('Second exam')
#nearly the same graph as shown in visualization part 1 but here with decision boundary
#admitted student points shown in green
plt.scatter(ndata[ndata['admitted'] == 1]['exam_1'], ndata[ndata['admitted'] == 1]['exam_2'], color = 'green') 
#failed student points shown in red
plt.scatter(ndata[ndata['admitted'] == 0]['exam_1'], ndata[ndata['admitted'] == 0]['exam_2'], color = 'red')
#boundary datas
bx = [np.min(ndata['exam_1']), np.max(ndata['exam_1'])]
by = - (theta[0] + np.dot(theta[1], bx)) / theta[2]
plt.plot(bx, by) #decision boundary
plt.show()