In [14]:
from pathlib import Path

import numpy as np
import pandas as pd
from numba import njit, prange

from matplotlib import pyplot as plt

In [15]:

def F(x: np.ndarray) -> float:
    return 1/(1+np.exp(-x))


@njit(fastmath=True)
def F_jit(x: np.ndarray) -> float | np.ndarray:
    return 1/(1+np.exp(-x))


@njit(fastmath=True)
def dF_jit(x: np.ndarray) -> float | np.ndarray:
    return F_jit(x)*(1-F_jit(x))


@njit(fastmath=True)
def grads(xBatch: np.ndarray, yBatch: np.ndarray, w: np.ndarray, b: np.ndarray) -> tuple[np.ndarray]:
    n = xBatch.shape[0]
    nOut = w.shape[1]

    dw = np.zeros(w.shape)
    db = np.zeros(b.shape)
    
    for i in prange(n):
        u = xBatch[i] @ w + b
        y = F_jit(u)

        dLdu = 2/nOut * (y-yBatch[i]) * dF_jit(u)
        dw += dLdu * np.atleast_2d(xBatch[i]).T
        db += dLdu
    
    return dw, db

class Perceptron:
    def __init__(self, nIn: int, nOut: int) -> None:
        self.nIn = nIn
        self.nOut = nOut
        self.w: np.ndarray = np.random.uniform(-1, 1, (nIn, nOut))
        self.b: np.ndarray = np.zeros((nOut))


    def predict(self, x:np.ndarray) -> np.ndarray:
        return F(x @ self.w + self.b)


    def train(self, xTrain: np.ndarray, yTrain: np.ndarray, lr, batch_size, max_iter) -> None:
        n = xTrain.shape[0]

        for j in range(max_iter):
            idxs = np.random.choice(a=np.arange(n), size=batch_size, replace=False)

            dw, db = grads(xTrain[idxs], yTrain[idxs], self.w, self.b)
            
            self.w -= lr*dw
            self.b -= lr*db
        
    
    def loss(self, x: np.ndarray, y: np.ndarray) -> float:        
        Yp = np.array([self.predict(xi) for xi in x])
        d = 1/self.nOut * np.sum(np.square(y-Yp), axis=1)
        return 1/y.shape[0] * np.sum(d)

In [16]:
df = pd.read_csv(Path('..', '..', 'data', 'iris_csv.csv'))

for c in df.columns[0:4]:
    df[c] = (df[c]-df[c].mean())/df[c].std()

df['synth1'] = df['petallength']*df['petalwidth']
df['synth2'] = df['sepallength']*df['petallength']
df['synth3'] = df['sepallength']*df['petalwidth']

for name in df['class'].unique():
    df[f'{name}_label'] = df['class'].map(lambda x: 1 if x == name else 0)

In [17]:
test_frac = 0.8

p = np.random.permutation(df.index.size)

test_size = int(p.size*test_frac)
train_size = int(p.size*(1-test_frac))

idx_test = p[0 : test_size]
idx_train = p[test_size: p.size]

features_columns = ['sepallength', 'sepalwidth', 'petallength', 'petalwidth']
label_columns = ['Iris-setosa_label', 'Iris-versicolor_label', 'Iris-virginica_label']

xTest = np.array(df.iloc[idx_test][features_columns])
yTest = np.array(df.iloc[idx_test][label_columns])

xTrain = np.array(df.iloc[idx_train][features_columns])
yTrain = np.array(df.iloc[idx_train][label_columns])

In [18]:
nIn = 4
nOut = 3

learning_rate = 1e-2
batch_size = 10
max_iter = 15000

model = Perceptron(nIn, nOut)

print('untrained loss: ', model.loss(xTest, yTest).round(4))

model.train(
    xTrain,
    yTrain,
    learning_rate,
    batch_size,
    max_iter
)

print('trained loss: ', model.loss(xTest, yTest).round(4))

untrained loss:  0.3471


trained loss:  0.0768


In [19]:
print(model.w.round(3))
print(model.b.round(3))

[[-0.472  0.08   0.128]
 [ 1.658 -2.2    0.25 ]
 [-1.224  0.57   1.871]
 [-2.062 -0.898  3.481]]
[-1.627 -1.105 -3.642]


In [20]:
for x,y in zip(xTest, yTest):
    print(model.predict(x).round(2), y)

[0.05 0.22 0.13] [0 1 0]
[0.   0.22 0.96] [0 0 1]
[0.04 0.61 0.05] [0 1 0]
[0.06 0.08 0.35] [0 1 0]
[0.99 0.04 0.  ] [1 0 0]
[0.01 0.58 0.28] [0 0 1]
[0.07 0.82 0.  ] [0 1 0]
[0.07 0.79 0.01] [0 1 0]
[0.   0.85 0.21] [0 1 0]
[0.   0.04 0.99] [0 0 1]
[0.03 0.78 0.03] [0 1 0]
[0.01 0.46 0.53] [0 0 1]
[0.99 0.08 0.  ] [1 0 0]
[0.   0.09 0.98] [0 0 1]
[0.98 0.17 0.  ] [1 0 0]
[0.   0.22 0.92] [0 0 1]
[0.99 0.08 0.  ] [1 0 0]
[0.   0.15 0.92] [0 0 1]
[0.   0.07 0.96] [0 0 1]
[1. 0. 0.] [1 0 0]
[0.02 0.84 0.04] [0 1 0]
[0.1  0.3  0.05] [0 1 0]
[0.03 0.67 0.05] [0 1 0]
[0.95 0.09 0.  ] [1 0 0]
[0.95 0.49 0.  ] [1 0 0]
[0.   0.96 0.13] [0 1 0]
[0.97 0.4  0.  ] [1 0 0]
[1.   0.01 0.  ] [1 0 0]
[0.99 0.05 0.  ] [1 0 0]
[0.06 0.13 0.21] [0 1 0]
[0.1  0.37 0.03] [0 1 0]
[0.99 0.08 0.  ] [1 0 0]
[0.01 0.09 0.83] [0 0 1]
[0.   0.11 0.95] [0 0 1]
[0.   0.79 0.65] [0 0 1]
[0.94 0.35 0.  ] [1 0 0]
[0.99 0.01 0.  ] [1 0 0]
[0.   0.22 0.83] [0 0 1]
[0.01 0.29 0.73] [0 0 1]
[0.97 0.36 0.  ] [1 0 0]
[0.   