In [1]:
#Import required python modules
import numpy as np 
from sklearn import linear_model 

In [2]:
#load data from file
data = np.genfromtxt('iris.csv', delimiter=',',skip_header=True)

#Distribute data into train and test sets
X_train = data[:80,[0,1,2,3]]
Y_train = data[:80,5]

X_test = data[-20:,[0,1,2,3]]
Y_test = data[-20:,5]

In [3]:
#Define the required Sigmoid function
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [4]:
#Define the Raw implementation function to set the parameters (theta)

def fit_implementation(X_train, Y_train, learning_rate=0.0005, max_iteration=1000):
    #Adding a column of 1's so that the first element of each input is always 1
    #It would be multiplied with theta_0 later
    X_train= np.insert(X_train, 0, values=1, axis=1)
    no_attributes = X_train.shape[1]
    
    #Initialize model parameters theta
    theta = np.zeros((no_attributes,1))
    
    #Run number of iterations
    for icount in range(max_iteration):
        #delta is the quantity that will be added with theta during updating theta
        delta = np.zeros((no_attributes,1))
        totalLogLikelihood = 0
        #Check each data point
        for instance, actualOutput in zip(X_train,Y_train):
            instance=instance.reshape(no_attributes,1)
            dotResult = np.dot(theta.T, instance)
            
            predictedValue=sigmoid(dotResult).squeeze()
            #Calculate the derivative value for this data point
            derivativeValue = instance*(actualOutput-predictedValue)
            #Calculate the amount to be added with theta
            delta += learning_rate*derivativeValue

            logLikelihood = actualOutput*np.log(predictedValue)+(1-actualOutput)*np.log(1-predictedValue)
            totalLogLikelihood += logLikelihood
        theta = theta + delta
        
        #After each 100 iteration, print the status
        if icount%100==0:
            print(icount)
            print(totalLogLikelihood)
            print(theta)
    #print(theta.shape)
    
    return theta


parameters = fit_implementation(X_train, Y_train)

0
-55.451774444795646
[[ 0.      ]
 [-0.009725]
 [ 0.0066  ]
 [-0.02855 ]
 [-0.011175]]
100
-11.743797936695358
[[ 0.12128412]
 [ 0.18112537]
 [ 0.69729525]
 [-1.06492132]
 [-0.46220067]]
200
-6.299389109303773
[[ 0.16878723]
 [ 0.25502814]
 [ 0.95980853]
 [-1.45763631]
 [-0.63651892]]
300
-4.3043061196194445
[[ 0.1979695 ]
 [ 0.29967412]
 [ 1.12020526]
 [-1.69806549]
 [-0.74416005]]
400
-3.2751399944391686
[[ 0.21902153]
 [ 0.33158634]
 [ 1.23573927]
 [-1.8713324 ]
 [-0.82216338]]
500
-2.6474995606354796
[[ 0.23548623]
 [ 0.35639062]
 [ 1.32608088]
 [-2.00682483]
 [-0.88341048]]
600
-2.2246003867464386
[[ 0.24900609]
 [ 0.37666497]
 [ 1.40029232]
 [-2.11811169]
 [-0.93387848]]
700
-1.9201379443146736
[[ 0.26047577]
 [ 0.39380256]
 [ 1.46329086]
 [-2.21256466]
 [-0.97682677]]
800
-1.6903417276892148
[[ 0.27043611]
 [ 0.40864065]
 [ 1.5180416 ]
 [-2.29463231]
 [-1.0142282 ]]
900
-1.510654577546624
[[ 0.27923891]
 [ 0.42172131]
 [ 1.56646995]
 [-2.36720477]
 [-1.0473677 ]]


In [5]:
def prediction(X_test, Y_test, theta):
    #Adding a column of 1's so that the first element of each input is always 1
    #It would be multiplied with theta_0 later
    X_test= np.insert(X_test, 0, values=1, axis=1)
    no_attributes = X_test.shape[1]
    
    correctCount = 0
    totalCount = 0
    
    #Check each data point
    for instance, actualOutput in zip(X_test,Y_test):
            instance=instance.reshape(no_attributes,1)
            dotResult = np.dot(theta.T, instance)
            #Calculated the probability of belonging to class 1
            predictedValue=sigmoid(dotResult).squeeze()
            
            if predictedValue >= 0.5:
                predictedOutput = 1
            else:
                predictedOutput = 0
            print(predictedValue, actualOutput)
            if predictedOutput == actualOutput:
                correctCount += 1
            totalCount += 1
    print("Total Correct Count: ",correctCount," Total Wrong Count: ",totalCount-correctCount," Accuracy: ",(correctCount*100)/(totalCount))
    
prediction(X_test, Y_test, parameters)

0.005843420174785157 0.0
0.007142666796493177 0.0
0.01739664292145254 0.0
0.04993088807252141 0.0
0.010418956479142625 0.0
0.01945900413556282 0.0
0.014943426943600038 0.0
0.014558137483623515 0.0
0.1235906750910857 0.0
0.016203190234137476 0.0
0.9900735083720652 1.0
0.9208927529978683 1.0
0.9814293710741085 1.0
0.9720780831436339 1.0
0.972403312348781 1.0
0.9697635609932154 1.0
0.9890881941176695 1.0
0.9783513337192419 1.0
0.9907680978898962 1.0
0.9844066598433767 1.0
Total Correct Count:  20  Total Wrong Count:  0  Accuracy:  100.0
