In [88]:
from pathlib import Path


import numpy as np
from numba import njit, prange

import pandas as pd

from matplotlib import pyplot as plt

In [89]:
df = pd.read_csv(Path('..', '..', 'data', 'iris_csv.csv'))

for c in df.columns[0:4]:
    df[c] = (df[c]-df[c].mean())/df[c].std()

df['synth1'] = df['petallength']*df['petalwidth']
df['synth2'] = df['sepallength']*df['petallength']
df['synth3'] = df['sepallength']*df['petalwidth']

for name in df['class'].unique():
    df[f'label-{name}'] = df['class'].map(lambda x: 1 if x == name else 0)

In [90]:
# test_frac = 0.8

# np.random.seed(0)
# p = np.random.permutation(df.index.size)

# test_size = int(p.size*test_frac)
# train_size = int(p.size*(1-test_frac))

# idx_test = p[0 : test_size]
# idx_train = p[test_size: p.size]

# features_columns = ['sepallength', 'sepalwidth', 'petallength', 'petalwidth']
# label_columns = ['Iris-setosa_label', 'Iris-versicolor_label', 'Iris-virginica_label']

# xTest = np.array(df.iloc[idx_test][features_columns])
# yTest = np.array(df.iloc[idx_test][label_columns])

# xTrain = np.array(df.iloc[idx_train][features_columns])
# yTrain = np.array(df.iloc[idx_train][label_columns])

In [91]:
np.random.seed(0)

setosa_idxs = np.arange(0, 50)
versicolor_idxs = np.arange(50, 100)
virginica_idxs = np.arange(100, 150)

p = np.random.permutation(np.arange(50))

setosa_train_idxs = setosa_idxs[p[0:10]]
setosa_test_idxs = setosa_idxs[p[10:]]

versicolor_train_idxs = versicolor_idxs[p[0:10]]
versicolor_test_idxs = versicolor_idxs[p[10:]]

virginica_train_idxs = virginica_idxs[p[0:10]]
virginica_test_idxs = virginica_idxs[p[10:]]

feature_columns = ['sepallength', 'sepalwidth', 'petallength', 'petalwidth']
label_columns = ['label-Iris-setosa', 'label-Iris-versicolor', 'label-Iris-virginica']

xTrain = np.vstack([
    df.iloc[setosa_train_idxs][feature_columns],
    df.iloc[versicolor_train_idxs][feature_columns],
    df.iloc[virginica_train_idxs][feature_columns]
])

yTrain = np.vstack([
    df.iloc[setosa_train_idxs][label_columns],
    df.iloc[versicolor_train_idxs][label_columns],
    df.iloc[virginica_train_idxs][label_columns]
])

xTest = np.vstack([
    df.iloc[setosa_test_idxs][feature_columns],
    df.iloc[versicolor_test_idxs][feature_columns],
    df.iloc[virginica_test_idxs][feature_columns]
])

yTest = np.vstack([
    df.iloc[setosa_test_idxs][label_columns],
    df.iloc[versicolor_test_idxs][label_columns],
    df.iloc[virginica_test_idxs][label_columns]
])

In [92]:
def F(x: np.ndarray) -> float:
    # return np.clip(x,-1, 1)
    return np.tanh(x)


@njit(fastmath=True)
def F_jit(x: np.ndarray) -> np.ndarray:
    # return np.clip(x,-1, 1)
    return np.tanh(x)


@njit(fastmath=True)
def dF_jit(x: np.ndarray) -> np.ndarray:
    # return np.array([0 if xi <= -1 or xi >= 1 else 1 for xi in x])
    return 1-np.square(np.tanh(x))


def Softmax(x: np.ndarray) -> float:
    y = np.exp(x)
    return y/np.sum(y)


@njit(fastmath=True)
def Softmax_jit(x: np.ndarray) -> float:
    y = np.exp(x)
    return y/np.sum(y)


@njit(fastmath=True)
def grads(xBatch: np.ndarray, yBatch: np.ndarray, Wy: np.ndarray, Wh: np.ndarray, Bh:np.ndarray, Bx:np.ndarray, b:np.ndarray) -> tuple[np.ndarray]:
    dWh = np.zeros(Wh.shape)
    dWy = np.zeros(Wy.shape)
    
    dBh = np.zeros(Bh.shape)
    dBx = np.zeros(Bx.shape)
    db = np.zeros(b.shape)

    Uh = np.zeros((Wh.shape[0], Wh.shape[2]))
    Yh = np.zeros((Wh.shape[0], Wh.shape[1]))
    
    for i in prange(xBatch.shape[0]):
        Uh *= 0
        Yh *= 0

        for j in prange(Wh.shape[0]):
            Uh[j] = xBatch[i] @ Wh[j] + Bh[j]
            Yh[j] = F_jit(Uh[j]) @ Wh[j].T + Bx[j]

        Xh = np.sum(Yh, axis=0)
        
        u = Xh @ Wy + b
        y = Softmax_jit(u)
                
        for j in prange(Wh.shape[0]):
            dWh[j] += (y-yBatch[i]) @ Wy.T @ Wh[j] * dF_jit(Uh[j]) * np.atleast_2d(xBatch[i]).T
            dBh[j] += (y-yBatch[i]) @ Wy.T @ Wh[j] * dF_jit(Uh[j])
            dBx[j] += (y-yBatch[i]) @ Wy.T
        
        dWy += (y-yBatch[i]) * np.atleast_2d(Xh).T
        db += y-yBatch[i]
    
    return (dWh, dWy, dBh, dBx, db)

class RBMPerceptron:
    def __init__(self, nL:int, nH:int, nIn:int, nOut:int) -> None:
        self.nL = nL
        self.nH = nH
        self.nIn = nIn
        self.nOut = nOut
                
        self.Wh: np.ndarray = np.random.uniform(-1, 1, (nL, nIn, nH))
        self.Wy: np.ndarray = np.random.uniform(-1, 1, (nIn, nOut))
        
        self.Bh: np.ndarray = np.zeros((nL, nH))
        self.Bx: np.ndarray = np.zeros((nL, nIn))
        self.b: np.ndarray = np.zeros(nOut)

    def predict(self, x:np.ndarray) -> np.ndarray:
        Yh = np.zeros(self.nIn)
        
        for i in range(self.Wh.shape[0]):
            Yh += F(x @ self.Wh[i] + self.Bh[i]) @ self.Wh[i].T + self.Bx[i]
            
        return Softmax(Yh @ self.Wy + self.b)

    def train(self, xTrain: np.ndarray, yTrain: np.ndarray, lr, batch_size, max_iter) -> None:
        n = xTrain.shape[0]

        for k in range(max_iter):
            idxs = np.random.choice(a=np.arange(n), size=batch_size, replace=False)
            
            dWh, dWy, dBh, dBx, db = grads(xTrain[idxs], yTrain[idxs], self.Wy, self.Wh, self.Bh, self.Bx, self.b)
            
            self.Wh -= lr*dWh
            self.Wy -= lr*dWy
            
            self.Bh -= lr*dBh
            self.Bx -= lr*dBx
            self.b -= lr*db
        
    def loss(self, x: np.ndarray, y: np.ndarray) -> float:
        Ypred = np.array([self.predict(xi) for xi in x])
        h = -1/self.nOut * np.sum(y*np.log(Ypred), axis=1)
        return 1/x.shape[0] * np.sum(h)

In [108]:
nL = 4
nH = 4
nIn = 4
nOut = 3

lr = 1e-2
batch_size = 30
max_iter = 1000

model = RBMPerceptron(nL, nH, nIn, nOut)

print('untrained loss: {0:.6f}'.format(model.loss(xTest, yTest)))

model.train(xTrain, yTrain, lr, batch_size, max_iter)

print('trained loss: {0:.6f}'.format(model.loss(xTest, yTest)))

yPred = np.array([model.predict(x) for x in xTest])

c = 0
for y, yp in zip(yTest, yPred):
    c += 1 if np.argmax(y) == np.argmax(yp) else 0
    
accuracy = c / xTest.shape[0]

print(accuracy)

untrained loss: 1.532463
trained loss: 0.094877
0.9416666666666667


In [109]:
i = 0
for x,y in zip(xTest, yTest):
    print("{0:-3} {1} {2}".format(i, y, model.predict(x).round(2)))
    i += 1

  0 [1 0 0] [1. 0. 0.]
  1 [1 0 0] [1. 0. 0.]
  2 [1 0 0] [1. 0. 0.]
  3 [1 0 0] [1. 0. 0.]
  4 [1 0 0] [1. 0. 0.]
  5 [1 0 0] [1. 0. 0.]
  6 [1 0 0] [1. 0. 0.]
  7 [1 0 0] [1. 0. 0.]
  8 [1 0 0] [1. 0. 0.]
  9 [1 0 0] [1. 0. 0.]
 10 [1 0 0] [1. 0. 0.]
 11 [1 0 0] [1. 0. 0.]
 12 [1 0 0] [1. 0. 0.]
 13 [1 0 0] [1. 0. 0.]
 14 [1 0 0] [1. 0. 0.]
 15 [1 0 0] [1. 0. 0.]
 16 [1 0 0] [1. 0. 0.]
 17 [1 0 0] [1. 0. 0.]
 18 [1 0 0] [1. 0. 0.]
 19 [1 0 0] [1. 0. 0.]
 20 [1 0 0] [1. 0. 0.]
 21 [1 0 0] [1. 0. 0.]
 22 [1 0 0] [1. 0. 0.]
 23 [1 0 0] [1. 0. 0.]
 24 [1 0 0] [1. 0. 0.]
 25 [1 0 0] [1. 0. 0.]
 26 [1 0 0] [1. 0. 0.]
 27 [1 0 0] [1. 0. 0.]
 28 [1 0 0] [1. 0. 0.]
 29 [1 0 0] [1. 0. 0.]
 30 [1 0 0] [1. 0. 0.]
 31 [1 0 0] [1. 0. 0.]
 32 [1 0 0] [1. 0. 0.]
 33 [1 0 0] [1. 0. 0.]
 34 [1 0 0] [1. 0. 0.]
 35 [1 0 0] [1. 0. 0.]
 36 [1 0 0] [1. 0. 0.]
 37 [1 0 0] [1. 0. 0.]
 38 [1 0 0] [1. 0. 0.]
 39 [1 0 0] [1. 0. 0.]
 40 [0 1 0] [0.   0.32 0.68]
 41 [0 1 0] [0.   0.92 0.08]
 42 [0 1 0] [0. 1. 0.]