In [1]:
#Import required python modules
import numpy as np 
from sklearn import linear_model 

In [2]:
#load data from file
data = np.genfromtxt('iris.csv', delimiter=',',skip_header=True)

#Distribute data into train and test sets
X_train = data[:80,[0,1,2,3]]
Y_train = data[:80,5]

X_test = data[-20:,[0,1,2,3]]
Y_test = data[-20:,5]

In [3]:
#Define the required Sigmoid function
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [6]:
#Define the Raw implementation function to set the parameters (theta)

def fit_implementation(X_train, Y_train, learning_rate=0.0005, max_iteration=1000):
    #Adding a column of 1's so that the first element of each input is always 1
    #It would be multiplied with theta_0 later
    X_train= np.insert(X_train, 0, values=1, axis=1)
    no_attributes = X_train.shape[1]
    
    #Initialize model parameters theta
    theta = np.zeros((no_attributes,1))
    
    #Run number of iterations
    for icount in range(max_iteration):
        #delta is the quantity that will be added with theta during updating theta
        delta = np.zeros((no_attributes,1))
        totalLogLikelihood = 0
        #Check each data point
        for instance, actualOutput in zip(X_train,Y_train):
            instance=instance.reshape(no_attributes,1)
            dotResult = np.dot(theta.T, instance)
            
            predictedOutput=sigmoid(dotResult).squeeze()
            #Calculate the derivative value for this data point
            derivativeValue = instance*(actualOutput-predictedOutput)
            #Calculate the amount to be added with theta
            delta += learning_rate*derivativeValue

            logLikelihood = actualOutput*np.log(predictedOutput)+(1-actualOutput)*np.log(1-predictedOutput)
            totalLogLikelihood += logLikelihood
        theta = theta + delta
        
        #After each 100 iteration, print the status
        if icount%100==0:
            print(icount)
            print(totalLogLikelihood)
            print(theta)
    #print(theta.shape)
    
    return theta

def fit_implementation2(X_train, Y_train, learning_rate=0.0005, max_iteration=1000):
    #Adding a column of 1's so that the first element of each input is always 1
    #It would be multiplied with theta_0 later
    X_train= np.insert(X_train, 0, values=1, axis=1)
    no_attributes = X_train.shape[1]
    Y_train = Y_train.reshape(-1,1)
    
    #Initialize model parameters theta
    theta = np.zeros((no_attributes,1))
    
    #Run number of iterations
    for icount in range(max_iteration):
        #delta is the quantity that will be added with theta during updating theta
        delta = np.zeros((no_attributes,1))
        totalLogLikelihood = 0
        
        dotResult = np.dot(X_train,theta)
        #print("Dot Result: ",dotResult.shape)
        predictedValue = sigmoid(dotResult)
        #print("predictedValue: ",predictedValue.shape)
        diff = Y_train - predictedValue
        #print("diff: ",diff.shape)
        derivativeValue = X_train*diff
        #print("derivativeValue: ",derivativeValue.shape)
        delta = learning_rate*derivativeValue
        #print("delta: ",delta.shape)
        delta = np.sum(delta, axis=0).reshape(no_attributes,-1)
        #print("delta Updated: ",delta.shape)
        logLikelihood = Y_train*np.log(predictedValue) + (1-Y_train)*np.log(1-predictedValue)
        #print("logLikelihood: ",logLikelihood.shape)
        totalLogLikelihood = np.sum(logLikelihood)
        theta = theta + delta
        
        #After each 100 iteration, print the status
        if icount%100==0:
            print(icount)
            print(totalLogLikelihood)
            print(theta)
    #print(theta.shape)
    
    return theta


#parameters = fit_implementation(X_train, Y_train)
parameters = fit_implementation2(X_train, Y_train)

0
-55.451774444795646
[[ 0.      ]
 [-0.009725]
 [ 0.0066  ]
 [-0.02855 ]
 [-0.011175]]
100
-11.743797936695358
[[ 0.12128412]
 [ 0.18112537]
 [ 0.69729525]
 [-1.06492132]
 [-0.46220067]]
200
-6.299389109303773
[[ 0.16878723]
 [ 0.25502814]
 [ 0.95980853]
 [-1.45763631]
 [-0.63651892]]
300
-4.3043061196194445
[[ 0.1979695 ]
 [ 0.29967412]
 [ 1.12020526]
 [-1.69806549]
 [-0.74416005]]
400
-3.2751399944391686
[[ 0.21902153]
 [ 0.33158634]
 [ 1.23573927]
 [-1.8713324 ]
 [-0.82216338]]
500
-2.6474995606354796
[[ 0.23548623]
 [ 0.35639062]
 [ 1.32608088]
 [-2.00682483]
 [-0.88341048]]
600
-2.2246003867464386
[[ 0.24900609]
 [ 0.37666497]
 [ 1.40029232]
 [-2.11811169]
 [-0.93387848]]
700
-1.9201379443146736
[[ 0.26047577]
 [ 0.39380256]
 [ 1.46329086]
 [-2.21256466]
 [-0.97682677]]
800
-1.6903417276892148
[[ 0.27043611]
 [ 0.40864065]
 [ 1.5180416 ]
 [-2.29463231]
 [-1.0142282 ]]
900
-1.510654577546624
[[ 0.27923891]
 [ 0.42172131]
 [ 1.56646995]
 [-2.36720477]
 [-1.0473677 ]]
0
-55.45177444

In [5]:
def prediction(X_test, Y_test, theta):
    #Adding a column of 1's so that the first element of each input is always 1
    #It would be multiplied with theta_0 later
    X_test= np.insert(X_test, 0, values=1, axis=1)
    no_attributes = X_test.shape[1]
    
    correctCount = 0
    totalCount = 0
    
    Y_test = Y_test.reshape(-1,1)
    dotResult = np.dot(X_test,theta)
    predictedValue = sigmoid(dotResult)
    #print(list(zip(predictedValue, Y_test)))
    predictedOutput = (predictedValue >= 0.5).astype(int)
    resultMatrix = (Y_test == predictedOutput).astype(int)
    correctCount = np.sum(resultMatrix)
    totalCount = len(resultMatrix)
    print("Total Correct Count: ",correctCount," Total Wrong Count: ",totalCount-correctCount," Accuracy: ",(correctCount*100)/(totalCount))

prediction(X_test, Y_test, parameters)

Total Correct Count:  20  Total Wrong Count:  0  Accuracy:  100.0
