In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
import os
path = 'datasets/student-mat.csv'
data = pd.read_csv(path,header=None)
data.head()

Unnamed: 0,0,1,2,3,4
0,age,traveltime,studytime,failures,G1
1,18,2,2,0,5
2,17,1,2,0,5
3,15,1,2,3,7
4,15,1,3,0,15


In [3]:
data.shape

(396, 5)

In [4]:
cols = data.shape[1]
print("columns = {}".format(cols))

columns = 5


In [5]:
y = np.array(data.iloc[1:,-1:],dtype='int')
x = np.array(data.iloc[1:,0:cols-1],dtype='int')

In [6]:
x.shape, y.shape

((395, 4), (395, 1))

In [7]:
np.unique(y)

array([ 3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

In [8]:
#Finding number of rows
rows = x.shape[0]
#insert bias value
x = np.insert(x, 0, values=np.ones(rows),axis = 1)
x.shape

(395, 5)

In [9]:
#Find number of features 
params = x.shape[1]

Number of features including the bias terms accounts to 5

In [10]:
all_theta = np.zeros((17,params))
all_theta.shape

(17, 5)

In [11]:
def sigmoid(z):
    return 1/(1 + np.exp(-z))

In [12]:
def computeCost(theta,x,y):
    theta = np.matrix(theta)
    x = np.matrix(x)
    y = np.matrix(y)
    first = np.multiply(-y,np.log(sigmoid(x * theta.T)))
    second = np.multiply((1-y),np.log(1-sigmoid(x * theta.T)))
    cost = np.sum(first-second)
    cost = cost/(2*len(x))
    return cost

In [13]:
cost = computeCost(all_theta,x,y)
print("cost = {}".format(cost))

cost = 5.891751034759535


In [14]:
learning_rate = 1
def gradient(theta,x,y):
    theta = np.matrix(theta)
    x = np.matrix(x)
    y = np.matrix(y)
    error = sigmoid(x * theta.T) - y
    grad = ((x.T * error)/len(x))
    grad = grad * learning_rate
    return grad

In [15]:
from scipy.optimize import minimize 
def one_vs_all(x,y,classes,new_theta):
    params = x.shape[1]
    rows = x.shape[0]
    for i in range(0,classes):
        theta = np.zeros(params)
        theta = new_theta[i,:]
        y_i = np.array([1 if label == i else 0 for label in y ]) #one-hot encoding
        y_i = np.reshape(y_i, (rows,1)) #reshapes the array into 90*1
        fmin = minimize(fun=computeCost, x0=theta, args=(x,y_i), method='TNC',jac=gradient) #gradient-descent 
        all_theta[i,:] = fmin.x 
    return all_theta       

In [16]:
classes = 17
new_theta = one_vs_all(x,y,classes,all_theta)
computeCost(new_theta,x,y)

689.0849746849467

In [18]:
def predict_all(x,theta):
    x = np.matrix(x)
    theta = np.matrix(theta)
    #compute class probability 
    h = sigmoid(x * theta.T)
    h_argmax = np.argmax(h,axis=1)
    return h_argmax

In [19]:
y_pred = predict_all(x,new_theta)
correct = [ 1 if a == b else 0 for(a,b) in zip(y_pred,y)]
print(correct)
accuracy = (sum(map(int,correct))/float(len(correct)))
print("Training data = {} %".format(accuracy*100))

[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 