In [4]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing

iris = np.genfromtxt('iris_2D_3c.csv', 
                     dtype=None, 
                     delimiter=',', 
                     skip_header=1) 
X = iris[:, 0:2]
y = iris[:, 2]

# change data type
y = y.astype('uint8')

print('X:\n', X)
print('y:\n', y)

X:
 [[1.5 0.2]
 [1.4 0.2]
 [1.6 0.2]
 [4.7 1.6]
 [3.3 1.1]
 [4.6 1.3]
 [5.6 2.2]
 [5.1 1.5]
 [5.6 1.4]]
y:
 [0 0 0 1 1 1 2 2 2]


In [5]:
N = 9
d = 3 # dimensionality
k = 3 # number of classes

# one_hot
def convert_one_hot(y, k):  
    one_hot = np.zeros((len(y), k))
    one_hot[np.arange(len(y)), y] = 1
    return one_hot

y_one_hot = convert_one_hot(y, k)
print(y_one_hot)

intercept = np.ones((X.shape[0], 1))
X = np.concatenate((intercept, X), axis=1)
print(X)

[[1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]]
[[1.  1.5 0.2]
 [1.  1.4 0.2]
 [1.  1.6 0.2]
 [1.  4.7 1.6]
 [1.  3.3 1.1]
 [1.  4.6 1.3]
 [1.  5.6 2.2]
 [1.  5.1 1.5]
 [1.  5.6 1.4]]


In [6]:
# initialize parameters
theta = 0.01 * np.random.randn(d, k)
print('theta:\n', theta)

theta:
 [[ 0.00032416  0.00648754  0.02601451]
 [ 0.01630317 -0.01158649  0.02485419]
 [ 0.01635965  0.01219001  0.00736532]]


In [None]:
# some parameters
learning_rate = 0.01
losses = []
num_iter = 3000

for epoch in range(num_iter):
    # evaluate class scores
    z = X.dot(theta)    

    #compute the class probabilities
    exp_z = np.exp(z)    
    y_hat = exp_z / np.sum(exp_z, axis=1, keepdims=True)

    # compute the loss
    loss = -np.log(np.sum(y_hat*y_one_hot, axis=1))
    losses.append(np.sum(loss))

    # compute the gradient on scores
    dz = y_hat - y_one_hot

    # backpropate the gradient to the parameters (W,b)
    dtheta = X.T.dot(dz)

    # perform a parameter update
    theta = theta - learning_rate*dtheta

In [None]:
x_axis = range(len(losses))
plt.plot(x_axis, losses, color="r")
plt.show()

In [None]:
z = X.dot(theta)    
exp_z = np.exp(z)
y_hat = exp_z / np.sum(exp_z, axis=1, keepdims=True)
print(y_hat)

In [None]:
predicted_class = np.argmax(y_hat, axis=1)
print(predicted_class)