In [None]:
# Imports
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt 
import scipy.optimize as opt 

In [None]:
# Read Data
path = 'D:\\Andrew NG Tasks\\Classification Python\\ex2data1.txt'
data = pd.read_csv(path, header=None, names=['Exam1', 'Exam2', 'Admitted'])

In [None]:
# Show info of the data
print('Data = ')
print(data.head(10))

In [None]:
positive = data[data['Admitted'].isin([1])]
negative = data[data['Admitted'].isin([0])]
print('Admitted \n', positive[1:10])
print('===========================')
print('Not Admitted \n', negative[1:10])

In [None]:
# Plot the data
fig, ax = plt.subplots()
ax.scatter(positive['Exam1'], positive['Exam2'], c='b', marker='o', label='Admitted')
ax.scatter(negative['Exam1'], negative['Exam2'], c='r', marker='x', label='Not Admitted')
ax.legend()
ax.set_xlabel('Exam1 score')
ax.set_ylabel('Exam2 score')


In [None]:
# Cost function.
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def cost(theta, X, y):
    theta = np.matrix(theta)
    X = np.matrix(X)
    y = np.matrix(y)
    first = np.multiply(-y, np.log(sigmoid(X * theta.T)))
    second = np.multiply((1-y), np.log(1-sigmoid(X * theta.T)))
    return np.sum(first - second) / (len(X))

In [None]:
# Add ones to the data
data.insert(0, 'Ones', 1)
print('New data \n', data[1:10])

# Set X and y
cols = data.shape[1]
X = data.iloc[:, 0:cols-1]
y = data.iloc[:,cols-1:cols]
#print('X = \n', X[1:10])
#print('y = \n', y[1:10])

# Convert to numpy array
X = np.array(X.values)
y = np.array(y.values)
theta = np.zeros(X.shape[1])

# Calculate the cost
this_cost = cost(theta, X, y)
print('Cost when theta (0,0,0) = \n', this_cost)

In [None]:
# Calcualte Gradient
def gradient(theta, X, y):
    theta = np.matrix(theta)
    X = np.matrix(X)
    y = np.matrix(y)

    parameters = int(theta.ravel().shape[1])
    grad = np.zeros(parameters)

    error = sigmoid(X * theta.T) - y
    
    for i in range(parameters):
        term = np.multiply(error, X[:,i])
        grad[i] = np.sum(term) / (len(X))

    return grad

In [None]:
# Calculate parametes using an optimiztion function (fmin_tnc)
result = opt.fmin_tnc(func=cost, x0=theta, fprime=gradient, args=(X, y))
cost_after_opt = cost(result[0], X, y)
print('Cost after learning = \n', cost_after_opt)

In [None]:
# Predict
def predict(theta, X):
    probability = sigmoid(X * theta.T)
    return[1 if x>=0.5 else 0 for x in probability]

theta_learned = np.matrix(result[0])
predictions = predict(theta_learned, X)
correct = [1 if (a == b) else 0 for (a,b) in zip(predictions, y)]
accuracy = sum(map(int,correct)) % len(correct)
print('Accuracy = {0}%'.format(accuracy))