In [598]:
from pathlib import Path

import random
import math

import numpy as np
import pandas as pd

from matplotlib import pyplot as plt

In [599]:
def F(x: np.ndarray) -> float:
    return 1/(1+np.exp(-x))

def dF(x: np.ndarray) -> float:
    return F(x)*(1-F(x))

class NNClassifier:
    def __init__(self, nIn: int, nOut: int) -> None:
        self.w: np.ndarray = np.random.uniform(-1, 1, (nIn, nOut))
        self.b: np.ndarray = np.zeros((nOut))


    def predict(self, x:np.ndarray) -> np.ndarray:
        return F(np.dot(x, self.w)+self.b)


    def train(self, xTrain: np.ndarray, yTrain: np.ndarray, lr, batch_size, max_iter) -> None:
        n = xTrain.shape[0]

        dw = np.zeros(self.w.shape)
        db = np.zeros(self.b.shape)

        for k in range(max_iter):
            idxs = np.random.choice(a=np.arange(n), size=batch_size, replace=False)

            for i in idxs:
                dwi, dbi = self.grads(xTrain[i], yTrain[i])
                dw += dwi
                db += dbi
            
            self.w -= lr*dw
            self.b -= lr*db

            dw *= 0
            db *= 0
            

    def grads(self, x: np.ndarray, y: np.ndarray) -> tuple[np.ndarray]:
        u = np.dot(x, self.w)+self.b
        a = F(u)

        dEdu = 2*(a-y)*dF(u)
        dEdw = dEdu*np.atleast_2d(x).T
        dEdb = dEdu
        
        return dEdw, dEdb
        
    
    def loss(self, x: np.ndarray, y: np.ndarray) -> float:
        n, m = y.shape[0], y[0].size
        
        u = np.array([self.predict(xi) for xi in x])
        d = np.array([1/m * np.sum(np.square(ui-yi)) for ui, yi in zip(u, y)])
        
        return 1/n*np.sum(d)

In [600]:
df = pd.read_csv(Path('..', 'data', 'iris_csv.csv'))

class_mapping = {
    'Iris-setosa':0,
    'Iris-versicolor':0.5,
    'Iris-virginica':1
}

for name in df['class'].unique():
    df[f'y_{name}'] = df['class'].map(lambda x: 1 if x == name else 0)

In [601]:
test_size = 0.8

p = np.random.permutation(df.index.size)

idx_train = p[int(p.size*test_size):]
idx_test = p[:int(p.size*test_size)]

xTrain = np.array(df.iloc[idx_train, 0:4])
yTrain = np.array(df.iloc[idx_train, 5:])

xTest = np.array(df.iloc[idx_test, 0:4])
yTest = np.array(df.iloc[idx_test, 5:])

In [604]:
nIn = 4
nOut = 3

lr = 1e-2
batch_size = 10
max_iter = 1000

model = NNClassifier(nIn, nOut)

print('untrained loss: ', model.loss(xTest, yTest))

model.train(xTrain, yTrain, lr, batch_size, max_iter)

print('trained loss: ', model.loss(xTest, yTest))

untrained loss:  0.47566985097169867
trained loss:  0.07783338340674685


In [605]:
for x, y in zip(xTest, yTest):
    print(model.predict(x).round(2), y)

[0.97 0.07 0.  ] [1 0 0]
[0.   0.37 0.74] [0 0 1]
[0.   0.18 0.89] [0 0 1]
[0.   0.66 0.87] [0 0 1]
[0.92 0.16 0.  ] [1 0 0]
[0.03 0.56 0.43] [0 1 0]
[0.   0.33 0.9 ] [0 0 1]
[0.   0.24 0.82] [0 0 1]
[0.87 0.17 0.  ] [1 0 0]
[0.96 0.09 0.  ] [1 0 0]
[0.01 0.3  0.51] [0 1 0]
[0.02 0.39 0.4 ] [0 1 0]
[0.88 0.1  0.  ] [1 0 0]
[0.   0.74 0.98] [0 0 1]
[0.01 0.34 0.64] [0 0 1]
[0.95 0.12 0.  ] [1 0 0]
[0.   0.31 0.81] [0 0 1]
[0.   0.38 0.92] [0 0 1]
[0.01 0.7  0.82] [0 0 1]
[0.91 0.23 0.  ] [1 0 0]
[0.92 0.22 0.  ] [1 0 0]
[0.   0.66 0.89] [0 0 1]
[0.01 0.36 0.83] [0 0 1]
[0.17 0.38 0.09] [0 1 0]
[0.87 0.16 0.  ] [1 0 0]
[0.   0.73 0.95] [0 0 1]
[0.9  0.19 0.  ] [1 0 0]
[0.01 0.47 0.69] [0 0 1]
[0.94 0.25 0.  ] [1 0 0]
[0.   0.41 0.86] [0 0 1]
[0.05 0.37 0.21] [0 1 0]
[0.   0.23 0.87] [0 0 1]
[0.   0.43 0.9 ] [0 0 1]
[0.02 0.51 0.46] [0 1 0]
[0.88 0.4  0.  ] [1 0 0]
[0.04 0.32 0.23] [0 1 0]
[0.05 0.42 0.15] [0 1 0]
[0.02 0.41 0.34] [0 1 0]
[0.94 0.19 0.  ] [1 0 0]
[0.98 0.09 0.  ] [1 0 0]
