In [2]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn import linear_model
import random
random.seed(123)

In [3]:
# Using iris data as training dataset
def load_data():
    iris = load_iris()
    X = iris.data  
    y = iris.target
    delete_index = []
    # Remove the data label = 2. In other words, only keep class = 0,1 for simplicity.
    for index in range(len(y)):
        if y[index] == 2:
            delete_index.append(index)
    X = np.delete(X,delete_index,0)
    y = np.delete(y,delete_index)
    return X,y

In [10]:
def sgd_class (X,y):
    # Use for unit test
    sgd = linear_model.SGDClassifier(loss= 'log',max_iter=1000, tol=0.001)
    sgd.fit(X, y)
    return sgd

In [16]:
# Logistic Regression using SGD 
def logistic_regression(ws):
    return 1 / (1 + np.exp(-ws))

def sgd(X, y):
    #Run randomly
    w = np.ones(4)
    r,c = np.shape(X)
    # define maximum iteration
    max_iteration = 1000
    # define learning rate
    learning_rate = 0.001
    for i in range(max_iteration):
        index = random.randint(0, (r - 1))
        random_x = X[index]
        random_y = y[index]
        p = logistic_regression(np.dot(w,random_x))
        w = w - learning_rate * (p - random_y) * random_x
        print (w)
    return w

In [17]:
# driver
X,y = load_data()
w = sgd(X,y)
predicted_y=[]
for x in X:
    predicted_y.append(logistic_regression(np.dot(w,x)))
for i in range(len(predicted_y)):
    if predicted_y[i] > 0.5:
        predicted_y[i] = 1
    else:
        predicted_y[i] = 0

print ("Score of Logistic Regression using SGD: ",np.sum( y == predicted_y)/len(y))
print (predicted_y)

[0.99420008 0.99600005 0.99880002 0.9998    ]
[0.9891002  0.99220014 0.99730005 0.99950001]
[0.98910021 0.99220015 0.99730006 0.99950001]
[0.98390042 0.98880029 0.99590011 0.99930002]
[0.97880056 0.98550038 0.99420016 0.99880003]
[0.97880057 0.98550038 0.99420017 0.99880003]
[0.97440126 0.98260084 0.99280039 0.99860007]
[0.96870131 0.97820087 0.9913004  0.99820007]
[0.96870131 0.97820088 0.9913004  0.99820007]
[0.96340147 0.97450099 0.98980045 0.99800008]
[0.96340148 0.97450099 0.98980045 0.99800008]
[0.9634015  0.974501   0.98980047 0.99800008]
[0.9634015  0.974501   0.98980047 0.99800008]
[0.9584017  0.97110114 0.98820053 0.9976001 ]
[0.9584017  0.97110114 0.98820053 0.9976001 ]
[0.95330186 0.96740125 0.98670058 0.99720011]
[0.94750198 0.96340134 0.98550061 0.99700012]
[0.94250227 0.95980154 0.98410068 0.99680013]
[0.94250227 0.95980154 0.98410069 0.99680013]
[0.93790285 0.956202   0.98310081 0.99660015]
[0.93790285 0.956202   0.98310081 0.99660015]
[0.93330346 0.95260247 0.98210095 

[-0.42652709 -0.01610623  0.65467787  0.9598654 ]
[-0.42521108 -0.01551873  0.65559438  0.9601239 ]
[-0.42419897 -0.0150038   0.65634014  0.96035473]
[-0.42541489 -0.0159625   0.65598939  0.96033135]
[-0.42403803 -0.0153305   0.65689226  0.96062478]
[-0.4253726  -0.01622021  0.65649974  0.96057245]
[-0.42412298 -0.01572036  0.65737447  0.96082237]
[-0.42547178 -0.01663755  0.65696983  0.96076842]
[-0.42656302 -0.01749157  0.6567326   0.96072097]
[-0.42528163 -0.01691715  0.65761632  0.96098609]
[-0.42418437 -0.01640946  0.65838604  0.96123174]
[-0.42545157 -0.01727911  0.65803818  0.96118205]
[-0.42665523 -0.01822814  0.65769097  0.9611589 ]
[-0.42786684 -0.01898541  0.65733759  0.96113366]
[-0.42732926 -0.01869384  0.65777494  0.96129767]
[-0.42859893 -0.01958021  0.6574156   0.96124976]
[-0.42996085 -0.02063151  0.6570572   0.96115418]
[-0.42941395 -0.02033489  0.65750214  0.96132103]
[-0.42851434 -0.01990962  0.65822182  0.96151731]
[-0.42976824 -0.02073719  0.65787073  0.96146715]


In [117]:
sgdlg = sgd_class(X,y)
result = sgdlg.predict(X)
print ("Score of Logistic Regression using SGDclassifier",sgdlg.score(X,y))
print (result)

Score of Logistic Regression using SGDclassifier 1.0
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


In [118]:
# unittest
print(np.alltrue(result == predicted_y))

True


4
