In [1]:
import numpy as np
import math
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import datasets

In [2]:
iris = datasets.load_iris()

In [3]:
classes = np.unique(iris.target)

In [4]:
target = [[],[],[]]
for i in range(len(classes)):
    for j in range(len(iris.target)):
        if(iris.target[j] == classes[i]):
            target[i].append(1)
        else:
            target[i].append(0)
            
x_train = []
x_test = []
y_train = []
y_test = []

for i in range(len(classes)):
    x_train.append(0)
    y_train.append(0)
    x_test.append(0)
    y_test.append(0)
    
for i in range(len(classes)):
    x_train[i], x_test[i], y_train[i], y_test[i] = train_test_split(iris.data, target[i], test_size = 0.2)

In [5]:
def sigmoid(z):
    s = float(1/((1.0 + float((1.0/math.exp(z))))))
    return s

def hypothesis(x, m):
    z = 0
    for i in range(len(m)):
        z += m[i]*x[i]
    return sigmoid(z)

In [6]:
def step_gradient(x, y, theta_current, learning_rate):
    m = len(y)
    feature_numbers = x.shape[1]
    new_theta = np.zeros(feature_numbers)
    for i in range(feature_numbers):
        slope = 0
        for j in range(m):
            slope += (1/m)*x[j][i]*(hypothesis(x[j], theta_current) - y[j])
        new_theta[i] = theta_current[i] - (learning_rate*slope)
    return new_theta

def cost_gd(x, y, theta):
    m = len(y)
    feature_numbers = x.shape[1]
    total_cost = 0
    for j in range(m):
        if(y[j] == 1):
            total_cost += -1*y[j]*math.log(hypothesis(x[j], theta))
        elif(y[j] == 0):
            total_cost += -1*(1-y[j])*math.log(1-hypothesis(x[j], theta))
    return total_cost/m

def gd_runner(x, y, learning_rate, num_iterations):
    m = len(y)
    feature_numbers = x.shape[1]
    theta_current = np.zeros(feature_numbers)
    print("Start Cost", cost_gd(x, y, theta_current))
    for i in range(num_iterations):
        theta_current = step_gradient(x, y, theta_current, learning_rate)
    print("Final Cost", cost_gd(x, y, theta_current))
    return theta_current

def predict(x, theta):
    m = x.shape[0]
    feature_numbers = x.shape[1]
    predictions = np.zeros(m)
    for i in range(m):
        predictions[i] = round(hypothesis(x[i], theta))
    return predictions

In [7]:
theta = []
for i in range(len(classes)):
    theta.append(gd_runner(x_train[i], y_train[i], 0.01, 10000))
print(theta)

Start Cost 0.6931471805599461
Final Cost 0.0075507739353354725
Start Cost 0.6931471805599461
Final Cost 0.47863121415293114
Start Cost 0.6931471805599461
Final Cost 0.13711822650065963
[array([ 0.58992731,  1.84661867, -2.83092768, -1.28859174]), array([ 0.87966143, -2.08056136,  0.86077616, -2.26514502]), array([-2.05268595, -2.30583108,  2.94675526,  3.09292903])]


In [33]:
#abhi sabki cost kam nhi aa rhi sirf pehle wale ki aa rhi hai so, har ek ke liye alag iterations and learning rate denge
theta_new = []
theta.append(gd_runner(x_train[0], y_train[0], 0.2, 1000))
theta.append(gd_runner(x_train[1], y_train[1], 0.2, 10000))

Start Cost 0.6931471805599461
Final Cost 0.004065203230328945
Start Cost 0.6931471805599461
Final Cost 0.44977578168304605


In [9]:
for i in range(len(classes)):
    print(y_test[i])
    print(predict(x_test[i], theta[i]).astype(int))

[1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0]
[1 1 0 0 0 0 0 0 1 0 1 1 0 1 0 0 0 0 0 1 0 1 0 1 0 1 0 0 0 0]
[0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0]
[1 0 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 0 0]
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0]
[0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 1 0 1 1 0]
