In [126]:
from pathlib import Path

import random
import math

import numpy as np
from numba import njit, prange

import pandas as pd

from matplotlib import pyplot as plt

In [195]:
def F(x: np.ndarray) -> float:
    return 1/(1+np.exp(-x))


@njit(fastmath=True)
def F_jit(x: np.ndarray) -> float:
    return 1/(1+np.exp(-x))


@njit(fastmath=True)
def dF_jit(x: np.ndarray) -> float:
    y = 1/(1+np.exp(-x))
    return y*(1-y)


def Fh(x: np.ndarray) -> float:
    # return np.clip(x,-1, 1)
    return np.tanh(x)


@njit(fastmath=True)
def Fh_jit(x: np.ndarray) -> np.ndarray:
    # return np.clip(x,-1, 1)
    return np.tanh(x)


@njit(fastmath=True)
def dFh_jit(x: np.ndarray) -> np.ndarray:
    # return np.array([0 if xi <= -1 or xi >= 1 else 1 for xi in x])
    return 1-np.square(np.tanh(x))


@njit
def grads(xBatch: np.ndarray, yBatch: np.ndarray, Wy: np.ndarray, Wh: np.ndarray, Bu:np.ndarray, Bh: np.ndarray, b:np.ndarray) -> tuple[np.ndarray]:
    n = xBatch.shape[0]
    nIn = Wy.shape[0]

    dWy = np.zeros(Wy.shape)
    dWh = np.zeros(Wh.shape)
    dBu = np.zeros(Bu.shape)
    dBh = np.zeros(Bh.shape)
    db = np.zeros(b.shape)
    
    for i in prange(n):
        Uh = xBatch[i] @ Wh + Bu
        Yh = Fh_jit(Uh) @ Wh.T + Bh
        u = Yh @ Wy + b
        y = F_jit(u)

        dLdu = 2/nIn * (y-yBatch[i])*dF_jit(u)
        
        dWh += dLdu @ Wy.T @ Wh * dFh_jit(Uh) * np.atleast_2d(xBatch[i]).T
        dWy += dLdu * np.atleast_2d(Yh).T
        dBu += dLdu @ Wy.T @ Wh * dFh_jit(Uh)
        dBh += dLdu @ Wy.T
        db += dLdu
    
    return (dWh, dWy, dBu, dBh, db)


class RBMPerceptron:
    def __init__(self, nH:int, nIn: int, nOut: int) -> None:
        self.nH = nH
        self.nIn = nIn
        self.nOut = nOut
        
        self.Wh: np.ndarray = np.random.uniform(-1, 1, (nIn, nH))
        self.Wy: np.ndarray = np.random.uniform(-1, 1, (nIn, nOut))

        self.Bu: np.ndarray = np.zeros(nH)
        self.Bh: np.ndarray = np.zeros(nIn)
        self.b: np.ndarray = np.zeros(nOut)


    def predict(self, x:np.ndarray) -> np.ndarray:
        Uh = x @ self.Wh + self.Bu
        Yh = Fh(Uh) @ self.Wh.T + self.Bh
        return F(Yh @ self.Wy + self.b)


    def train(self, xTrain: np.ndarray, yTrain: np.ndarray, lr, batch_size, max_iter) -> None:
        n = xTrain.shape[0]

        for k in range(max_iter):
            idxs = np.random.choice(a=np.arange(n), size=batch_size, replace=False)
            
            dWh, dWy, dBu, dBh, db = grads(xTrain[idxs], yTrain[idxs], self.Wy, self.Wh, self.Bu, self.Bh, self.b)
            
            self.Wy -= lr*dWy
            self.Wh -= lr*dWh
            self.Bu -= lr*dBu
            self.Bh -= lr*dBh
            self.b -= lr*db
        
    
    def loss(self, x: np.ndarray, y: np.ndarray) -> float:
        n = x.shape[0]
        
        d = np.array([1/self.nIn*np.sum(np.square(self.predict(xi)-yi)) for xi,yi in zip(x,y)])
        
        return 1/n*np.sum(d)

In [128]:
df = pd.read_csv(Path('..', '..', 'data', 'iris_csv.csv'))

for c in df.columns[0:4]:
    df[c] = (df[c]-df[c].mean())/df[c].std()

df['synth1'] = df['petallength']*df['petalwidth']
df['synth2'] = df['sepallength']*df['petallength']
df['synth3'] = df['sepallength']*df['petalwidth']

for name in df['class'].unique():
    df[f'{name}_label'] = df['class'].map(lambda x: 1 if x == name else 0)

In [129]:
test_frac = 0.8

np.random.seed(0)
p = np.random.permutation(df.index.size)

test_size = int(p.size*test_frac)
train_size = int(p.size*(1-test_frac))

idx_test = p[0 : test_size]
idx_train = p[test_size: p.size]

features_columns = ['sepallength', 'sepalwidth', 'petallength', 'petalwidth']
label_columns = ['Iris-setosa_label', 'Iris-versicolor_label', 'Iris-virginica_label']

xTest = np.array(df.iloc[idx_test][features_columns])
yTest = np.array(df.iloc[idx_test][label_columns])

xTrain = np.array(df.iloc[idx_train][features_columns])
yTrain = np.array(df.iloc[idx_train][label_columns])

In [199]:
nH = 4
nIn = 4
nOut = 3

lr = 1e-2
batch_size = 15
max_iter = 5000

model = RBMPerceptron(nH, nIn, nOut)

print('untrained loss: {0:.6f}'.format(model.loss(xTest, yTest)))

model.train(xTrain, yTrain, lr, batch_size, max_iter)

print('trained loss: {0:.6f}'.format(model.loss(xTest, yTest)))

untrained loss: 0.320013
trained loss: 0.022419


In [200]:
for x,y in zip(xTest, yTest):
    print(model.predict(x).round(2), y.round(2))

[0.   0.07 0.99] [0 0 1]
[0.06 0.95 0.01] [0 1 0]
[0.99 0.04 0.  ] [1 0 0]
[0.   0.17 0.96] [0 0 1]
[0.99 0.1  0.  ] [1 0 0]
[0.   0.03 0.99] [0 0 1]
[0.99 0.09 0.  ] [1 0 0]
[0.09 0.86 0.03] [0 1 0]
[0.06 0.89 0.03] [0 1 0]
[0.09 0.93 0.01] [0 1 0]
[0.   0.54 0.63] [0 0 1]
[0.12 0.85 0.02] [0 1 0]
[0.06 0.91 0.02] [0 1 0]
[0.06 0.87 0.04] [0 1 0]
[0.05 0.87 0.04] [0 1 0]
[0.98 0.13 0.  ] [1 0 0]
[0.06 0.86 0.04] [0 1 0]
[0.04 0.91 0.03] [0 1 0]
[0.98 0.13 0.  ] [1 0 0]
[0.99 0.05 0.  ] [1 0 0]
[0.   0.26 0.89] [0 0 1]
[0.06 0.84 0.04] [0 1 0]
[0.99 0.11 0.  ] [1 0 0]
[0.98 0.14 0.  ] [1 0 0]
[0.02 0.61 0.33] [0 0 1]
[0.99 0.09 0.  ] [1 0 0]
[0.99 0.06 0.  ] [1 0 0]
[0.09 0.92 0.01] [0 1 0]
[0.07 0.95 0.01] [0 1 0]
[0.99 0.09 0.  ] [1 0 0]
[0.01 0.36 0.68] [0 0 1]
[0.05 0.82 0.05] [0 1 0]
[0.99 0.09 0.  ] [1 0 0]
[0.02 0.58 0.3 ] [0 0 1]
[0.   0.08 0.99] [0 0 1]
[0.06 0.91 0.02] [0 1 0]
[0.99 0.06 0.  ] [1 0 0]
[0.01 0.61 0.44] [0 1 0]
[0.09 0.88 0.02] [0 1 0]
[0.07 0.94 0.01] [0 1 0]
