# LOGISTIC REGRESSION ON IRIS DATASET

In [325]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn import datasets, preprocessing
import pandas as pd
from sklearn import cross_validation as cv

In [326]:
iris = datasets.load_iris()
df = pd.DataFrame(iris.data)
print(iris.feature_names)
print(iris.target_names)
print(df.describe())

['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
['setosa' 'versicolor' 'virginica']
                0           1           2           3
count  150.000000  150.000000  150.000000  150.000000
mean     5.843333    3.054000    3.758667    1.198667
std      0.828066    0.433594    1.764420    0.763161
min      4.300000    2.000000    1.000000    0.100000
25%      5.100000    2.800000    1.600000    0.300000
50%      5.800000    3.000000    4.350000    1.300000
75%      6.400000    3.300000    5.100000    1.800000
max      7.900000    4.400000    6.900000    2.500000


Using inbuild sklearn function

In [350]:
X_train, X_test, Y_train, Y_test = cv.train_test_split(iris.data, iris.target, test_size=0.3)
clf = LogisticRegression()
clf.fit(X_train, Y_train)
clf.predict(X_test)
clf.score(X_test, Y_test)

0.9777777777777777

In [565]:
# We get poorer results on applying feature scaling for use of inbuilt classifier 

# Implementing 1 vs all technique

In [377]:
def predict(X, coef):
    z = X.dot(coef)
    hox = 1 / (1.0 + np.exp(-z))
    return hox

In [378]:
def cost_fn(X, Y, coef, r_param=1):
    M = len(coef)
    hox = predict(X,coef)
    term1 = Y.T.dot(np.log(hox)) 
    term2 = (1-Y).T.dot(np.log(1-hox))
    cost = (-1/M) * (term1 + term2)
    # adding regularisation 
    R_term = (coef**2).sum() * (r_param/(2*M))
    cost = cost+R_term      
    return cost

def gd_step(X, Y, coef, l_rate, r_param=1):
    Y = np.reshape(Y, (len(Y), 1))
    N = len(Y)
    hox = predict(X, coef)
    loss = hox - Y
    r = X.T
    term1 = r.dot(loss)
    term1 = term1/N
    term2 = coef*(r_param/N)
    grad = term1+term2
    coef = coef - l_rate*grad
    return coef

In [379]:
def gd_runner(X, Y, learning_rate=0.02, num_iter=10):
    X = np.array(X)
    Y = np.array(Y)
    arb, count = X.shape
    coef = np.random.rand(count,1)
    print("cost before gd = " + str(cost_fn(X, Y, coef)))
    for i in range(num_iter):
        coef = gd_step(X, Y, coef, learning_rate)
    print("cost after = " + str(cost_fn(X, Y, coef)))
    return coef

In [380]:
# applying feature scaling
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# appending the bias column 
arr = np.full((len(Y_train), 1), 1)
X = np.append(arr, X_train, axis=1)
arr = np.full((len(X_test), 1), 1)
X_test1 = np.append(arr, X_test, axis=1)

In [558]:
# TRAINING FOR EACH CLASS 

# class 1 
Y_new= []
for i in range(len(Y_train)):
    if(Y_train[i]==0):
        Y_new.append(1)
    else:
        Y_new.append(0)
coef_1 = gd_runner(X, Y_new, 0.01, 40)

# class 2
Y_new = []
for i in range(len(Y_train)):
    if(Y_train[i]==1):
        Y_new.append(1)
    else:
        Y_new.append(0)
coef_2 = gd_runner(X, Y_new, 0.01, 40)

# class 3
Y_new = []
for i in range(len(Y_train)):
    if(Y_train[i]==2):
        Y_new.append(1)
    else:
        Y_new.append(0)
coef_3 = gd_runner(X, Y_new, 0.01, 40)

cost before gd = [28.36911838]
cost after = [19.99788686]
cost before gd = [20.90649062]
cost after = [19.35929606]
cost before gd = [9.45748105]
cost after = [8.92042852]


In [559]:
Y_pred_1 = predict(X_test1, coef_1)
Y_pred_2 = predict(X_test1, coef_2)
Y_pred_3 = predict(X_test1, coef_3)

In [560]:
# classifying each test data point to one of the 3 classes 
Y_pred = []
for i in range(len(X_test1)):
    max_class = 0
    if(Y_pred_1[i]<=Y_pred_2[i]):
        if(Y_pred_2[i]>Y_pred_3[i]):
            max_class = 1
        else:
            max_class = 2
    else:
        if(Y_pred_1[i]<Y_pred_3[i]):
            max_class = 2
    Y_pred.append(max_class)
    

In [561]:
# testing accuracy 
count = 0
for i in range(len(Y_test)):
    if(Y_test[i]==Y_pred[i]):
        count = count+1
accuracy = count/len(Y_test)

In [562]:
print(accuracy)

0.8


In [563]:
print(Y_pred[0:10])

[0, 0, 0, 1, 2, 1, 1, 0, 2, 0]


In [564]:
print(Y_test[0:10])

[0 0 0 1 2 1 0 1 2 0]
