In [1]:
import numpy as np
from numpy import ndarray
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
df = pd.read_csv(r"./data/iris/iris.data", header=None)
df.shape

(150, 5)

In [3]:
df.columns

Int64Index([0, 1, 2, 3, 4], dtype='int64')

In [4]:
df

Unnamed: 0,0,1,2,3,4
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


Attribute Information:
   1. sepal length in cm
   2. sepal width in cm
   3. petal length in cm
   4. petal width in cm
   5. class: 
      -- Iris Setosa
      -- Iris Versicolour
      -- Iris Virginica

In [5]:
classes = {"Iris-versicolor": 2, "Iris-setosa": 1, "Iris-virginica": 0}
y = df.replace({4: classes})[4]
X = df.iloc[:, :4]
X['b'] = 1

In [6]:
def one_hot_encoder(y: ndarray, k: int) -> ndarray:
    m = np.zeros((y.shape[0], k))
    for i, row in enumerate(m):
        row[y[i]] = 1
    return m

In [7]:
y = one_hot_encoder(y, 3)
y

array([[0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1

In [8]:
def softmax(z: ndarray) -> ndarray:
    a = np.exp(z - np.max(z))
    return a / np.sum(a)

#https://stackoverflow.com/questions/34968722/how-to-implement-the-softmax-function-in-python

In [9]:
def thesis(theta: ndarray, X: ndarray) -> ndarray:
    z =  X.dot(theta)
    return softmax(z)

In [10]:
def cross_entropy(y_vec: ndarray, y: ndarray, m)-> float:
    return -np.sum(y*np.log(y_vec+1e-9))/m
#+1e-9 for avoiding log(0)

In [11]:
def gradient(loss_v: ndarray, X: ndarray) -> ndarray:
    return X.T.dot(loss_v)

In [12]:
def train(X: ndarray, W: ndarray, y: ndarray, epochs: int, lr: float) -> ndarray:
    losses = []
    for i in range(epochs):
        y_vec = thesis(W, X)
        gradient_v = gradient((y-y_vec), X)
        W = np.add(W, np.multiply(lr, gradient_v))
        print(f'Current epoch: {i}, Running Weights: {W}')
        
        loss_v = cross_entropy(y_vec, y, X.shape[0])
        losses.append(loss_v)
        if i%10==0:
            print(f'Current epoch: {i}, Running losses: {loss_v}')
        
        print('-------------')
        
    return W

In [13]:
n = X.shape[1]
m = X.shape[0]
k = 3
X = X.to_numpy()

In [14]:
random_state = 2232
rgen = np.random.RandomState(random_state)
W = rgen.normal(loc=0.0, scale=0.01, size=(n,k))

In [15]:
W.shape

(5, 3)

In [16]:
X.shape

(150, 5)

In [17]:
lr = 0.01
epochs = 100

result = train(X, W, y, epochs, lr)
result

Current epoch: 0, Running Weights: [[3.26476588 2.46861481 2.95569623]
 [1.48274306 1.69407932 1.37176117]
 [2.75371855 0.72185757 2.1197849 ]
 [1.03354365 0.12851672 0.65237789]
 [0.49136375 0.49576816 0.4848778 ]]
Current epoch: 0, Running losses: 6.1127068879576445
-------------
Current epoch: 1, Running Weights: [[6.48151382 4.97161481 5.92366347]
 [2.93495157 3.40307932 2.75674643]
 [5.46376934 1.45385757 4.24975704]
 [2.02516303 0.25051672 1.31536885]
 [0.98136799 0.99576816 0.98487356]]
-------------
Current epoch: 2, Running Weights: [[9.69795922 7.47461481 8.89166343]
 [4.38509387 5.11207932 4.14174642]
 [8.17353837 2.18585757 6.37975701]
 [3.01672553 0.37251672 1.97836883]
 [1.471368   1.49576816 1.48487355]]
-------------
Current epoch: 3, Running Weights: [[12.9144854   9.97761481 11.85966343]
 [ 5.83448077  6.82107932  5.52674642]
 [10.8832055   2.91785757  8.50975701]
 [ 4.00818842  0.49451672  2.64136883]
 [ 1.961368    1.99576816  1.98487355]]
-------------
Current epoc

array([[321.7451855 , 250.26561481, 296.78766343],
       [144.93867792, 170.88507932, 138.48674642],
       [270.94912703,  73.18985757, 212.98975701],
       [ 99.14547716,  12.20651672,  66.28936883],
       [ 49.001368  ,  49.99576816,  49.98487355]])

In [18]:
def test(X: ndarray, theta: ndarray) -> ndarray:
    return np.argmax(thesis(theta, X))


In [19]:
predict = test(X[12], result)
predict

0