In [29]:
from pathlib import Path

import numpy as np
from numba import njit, prange

import pandas as pd

### Load Iris dataset and prepare data

In [30]:
df = pd.read_csv(Path('..', '..', 'data', 'iris_csv.csv'))

for c in df.columns[0:4]:
    df[c] = (df[c]-df[c].mean())/df[c].std()

df['synth1'] = df['petallength']*df['petalwidth']
df['synth2'] = df['sepallength']*df['petallength']
df['synth3'] = df['sepallength']*df['petalwidth']

for name in df['class'].unique():
    df[f'label-{name}'] = df['class'].map(lambda x: 1 if x == name else 0)

### Split data to test and train

In [31]:
np.random.seed(0)

setosa_idxs = np.arange(0, 50)
versicolor_idxs = np.arange(50, 100)
virginica_idxs = np.arange(100, 150)

p = np.random.permutation(np.arange(50))

setosa_train_idxs = setosa_idxs[p[0:10]]
setosa_test_idxs = setosa_idxs[p[10:]]

versicolor_train_idxs = versicolor_idxs[p[0:10]]
versicolor_test_idxs = versicolor_idxs[p[10:]]

virginica_train_idxs = virginica_idxs[p[0:10]]
virginica_test_idxs = virginica_idxs[p[10:]]

feature_columns = ['sepallength', 'sepalwidth', 'petallength', 'petalwidth']
label_columns = ['label-Iris-setosa', 'label-Iris-versicolor', 'label-Iris-virginica']

xTrain = np.vstack([
    df.iloc[setosa_train_idxs][feature_columns],
    df.iloc[versicolor_train_idxs][feature_columns],
    df.iloc[virginica_train_idxs][feature_columns]
])

yTrain = np.vstack([
    df.iloc[setosa_train_idxs][label_columns],
    df.iloc[versicolor_train_idxs][label_columns],
    df.iloc[virginica_train_idxs][label_columns]
])

xTest = np.vstack([
    df.iloc[setosa_test_idxs][feature_columns],
    df.iloc[versicolor_test_idxs][feature_columns],
    df.iloc[virginica_test_idxs][feature_columns]
])

yTest = np.vstack([
    df.iloc[setosa_test_idxs][label_columns],
    df.iloc[versicolor_test_idxs][label_columns],
    df.iloc[virginica_test_idxs][label_columns]
])

### Activation functions

In [32]:
@njit(fastmath=True)
def F(x: np.ndarray) -> np.ndarray:
    # return x
    # return np.maximum(np.zeros(x.shape), x)
    # return np.clip(x,-1, 1)
    return np.tanh(x)


@njit(fastmath=True)
def dF(x: np.ndarray) -> np.ndarray:
    # return np.ones(x.shape)
    # return 1 * (x > 0)
    # return np.array([0 if xi <= -1 or xi >= 1 else 1 for xi in x])
    return 1-np.square(np.tanh(x))


@njit(fastmath=True)
def sigmoid(x: np.ndarray) -> np.ndarray:
    return 1/(1+np.exp(-x))


@njit(fastmath=True)
def dSigmoid(x: np.ndarray) -> np.ndarray:
    y = 1/(1+np.exp(-x))
    return y*(1-y)


@njit(fastmath=True)
def softmax(x: np.ndarray) -> np.ndarray:
    y = np.exp(x)
    return y/np.sum(y)

### Model

In [33]:
@njit(fastmath=True)
def grads_rbm(xBatch: np.ndarray, Wh: np.ndarray, Bh: np.ndarray, Bx: np.ndarray) -> tuple[np.ndarray]:
    dWh = np.zeros(Wh.shape)
    dBh = np.zeros(Bh.shape)
    dBx = np.zeros(Bx.shape)
    
    for i in prange(xBatch.shape[0]):
        Uh = xBatch[i] @ Wh + Bh
        y = F(Uh) @ Wh.T + Bx

        dLdy = 2/Wh.shape[0] * (y-xBatch[i])

        dWh += np.outer(xBatch[i], dLdy @ Wh * dF(Uh))
        dBh += dLdy @ Wh * dF(Uh)
        dBx += dLdy

    return (dWh, dBh, dBx)


@njit(fastmath=True)
def grads_classifier(xBatch: np.ndarray, yBatch: np.ndarray, Wh: np.ndarray, Wy: np.ndarray, Bh: np.ndarray, b:np.ndarray) -> tuple[np.ndarray]:
    dWy = np.zeros(Wy.shape)
    db = np.zeros(b.shape)
    
    for i in prange(xBatch.shape[0]):
        Yh = F(xBatch[i] @ Wh + Bh)
        y = softmax(Yh @ Wy + b)
        
        dWy += (y - yBatch[i]) * np.atleast_2d(Yh).T
        db += y - yBatch[i]
    
    return (dWy, db)


class RBMPerceptron:
    def __init__(self, nIn: int, nH:int, nOut: int) -> None:
        self.nIn = nIn
        self.nH = nH
        self.nOut = nOut
        
        self.Wh: np.ndarray = np.random.uniform(-1, 1, (nIn, nH))
        self.Wy: np.ndarray = np.random.uniform(-1, 1, (nH, nOut))

        self.Bh: np.ndarray = np.zeros(nH)
        self.Bx: np.ndarray = np.zeros(nIn)
        self.b: np.ndarray = np.zeros(nOut)

    def predict(self, x:np.ndarray) -> np.ndarray:
        # Yh = (x @ self.Wh + self.Bh)
        Yh = F(x @ self.Wh + self.Bh)
        return softmax(Yh @ self.Wy + self.b)

    def train_rbm(self, xTrain: np.ndarray, lr, batch_size, max_iter):
        n = xTrain.shape[0]
        
        for j in range(max_iter):
            idxs = np.random.choice(a=np.arange(n), size=batch_size, replace=False)    
            
            dWh, dBh, dBx = grads_rbm(xTrain[idxs], self.Wh, self.Bh, self.Bx)              
            
            self.Wh -= lr*dWh
            self.Bh -= lr*dBh
            self.Bx -= lr*dBx

    def train_classifier(self, xTrain: np.ndarray, yTrain: np.ndarray, lr, batch_size, max_iter) -> None:
        n = xTrain.shape[0]

        for j in range(max_iter):
            idxs = np.random.choice(a=np.arange(n), size=batch_size, replace=False)
            
            dWy, db = grads_classifier(xTrain[idxs], yTrain[idxs], self.Wh, self.Wy, self.Bh, self.b)
            
            self.Wy -= lr*dWy
            self.b -= lr*db
        
    def loss(self, x: np.ndarray, y: np.ndarray) -> float:
        Ypred = np.array([self.predict(xi) for xi in x])
        h = - 1/self.nOut * np.sum(y*np.log(Ypred), axis=1)
        return 1/y.shape[0] * np.sum(h)

In [46]:
nIn = 4
nH = 12
nOut = 3

lr = 1e-2
batch_size = 30
max_iter = 3000

model = RBMPerceptron(nIn, nH, nOut)

print('untrained loss: {0:.6f}'.format(model.loss(xTest, yTest)))

model.train_rbm(xTrain, lr, batch_size, max_iter)
model.train_classifier(xTrain, yTrain, lr, batch_size, max_iter)

print('trained loss: {0:.6f}'.format(model.loss(xTest, yTest)))

TP_count = 0
for x, y in zip(xTest, yTest):
    yPred = model.predict(x)
    TP_count += 1 if np.argmax(y) == np.argmax(yPred) else 0

accuracy = TP_count / xTest.shape[0]
print('accuracy: ', accuracy)

untrained loss: 0.165203
trained loss: 0.057444
accuracy:  0.9416666666666667


In [47]:
for x,y in zip(xTest, yTest):
    yPred = model.predict(x)
    print(' ' if np.argmax(y) == np.argmax(yPred) else 'X', y, yPred.round(2))

  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [0.99 0.01 0.  ]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [0.98 0.02 0.  ]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [0.99 0.01 0.  ]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [0.99 0.01 0.  ]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [1 0 0] [1. 0. 0.]
  [0 1 0] [0.   0.56 0.44]
X [0 1 0] [0.   0.48 0.51]
  [0 1 0] [0. 1. 0.]
X [0 1 0] [0.01 0.47 0.52]
  [0 1 0] [0. 1. 0.]
  [0 1 0] [0.