In [102]:
from pathlib import Path

import random
import math

import numpy as np
import pandas as pd
from numba import njit, prange

from matplotlib import pyplot as plt

In [160]:
@njit
def F(x: np.ndarray) -> float:
    return 1/(1+np.exp(-x))


@njit
def dF(x: np.ndarray) -> float:
    return F(x)*(1-F(x))


@njit
def Fh(x: np.ndarray) -> float:
    return x


@njit
def dFh(x: np.ndarray) -> float:
    return np.sign(x)


# @njit
def back_prop(dEdw:np.ndarray, u: np.ndarray, z:np.ndarray) -> np.ndarray:
    dEdul = np.sum(dEdw,axis=1)*dF(u)
    dEdwl = dEdul*np.atleast_2d(z).T

    return dEdwl

# @njit
# def back_prop(x: np.ndarray, y: np.ndarray, w: list[np.ndarray]) -> list[np.ndarray]:
#     nl = len(w)
#     l = len(w)
    
#     u = [x]
#     a = [x]
    
#     dEdw = []

#     uli = x.copy()
    
#     for i in range(nl):
#         uli = uli@w[i]
        
#         u.append(uli)
#         a.append(F(uli))

#     dEdul = (a[l]-y)*dF(u[l])
#     dEdwl = dEdul*np.atleast_2d(a[l-1]).T

#     dEdw.append(dEdwl)

#     l -= 1
    
#     while l > 0:
#         dEdul = np.sum(dEdwl,axis=1)*dF(u[l])
#         dEdwl = dEdul*np.atleast_2d(a[l-1]).T

#         dEdw.append(dEdwl)
        
#         l -= 1
    
#     return dEdw


class NNClassifier:
    def __init__(self, shape: tuple) -> None:
        self.shape = shape
        self.nIn = shape[0]
        self.nOut = shape[-1]
        
        self.w: list[np.ndarray] = list()
        self.b: list[np.ndarray] = list()

        for i in range(len(shape)-1):
            self.w.append(np.random.uniform(-1, 1, (shape[i], shape[i+1])))

        for i in range(len(shape)-1):
            self.b.append(np.zeros(shape[i+1]))


    def predict(self, x: np.ndarray) -> np.ndarray:
        y = x.copy()
        
        for i in range(len(self.w)):
            y = F(y@self.w[i])
            
        return y


    def train(self, xTrain: np.ndarray, yTrain: np.ndarray, lr, batch_size, max_iter) -> None:
        n = xTrain.shape[0]

        dwh = list([np.zeros(wi.shape) for wi in self.w])

        for k in range(max_iter):
            idxs = np.random.choice(a=np.arange(n), size=batch_size, replace=False)

            for i in idxs:
                u = [xTrain[i]]
                z = [xTrain[i]]
                
                uu = xTrain[i].copy()
                for j in range(len(self.w)):
                    uu = uu@self.w[j]
                    u.append(uu)
                    z.append(F(uu))
                
                dwl = np.atleast_2d(2*(z[-1]-yTrain[i]))
                for j in reversed(range(len(self.w))):
                    dwl = back_prop(dwl, u[j], z[j-1])
                    dwh[j] = dwl.copy()
                
            for j in range(len(self.w)):
                self.w[j] -= lr*dwh[j]
                dwh[j] *= 0
    

    def back_prop(self, x: np.ndarray, y: np.ndarray) -> tuple[np.ndarray]:
        u = [x]
        a = [x]
        
        dEdw = list()
        dEdb = list()

        uli = x.copy()
        
        for i in range(len(self.shape)-1):
            uli = uli@self.w[i]+self.b[i]
            u.append(uli)
            a.append(F(uli))

        dEdul = (a[-1]-y)*dF(u[-1])
        dEdwl = dEdul*np.atleast_2d(a[-2]).T
        dEdbl = dEdul

        dEdw.append(dEdwl)
        dEdb.append(dEdbl)

        l = len(self.w)-2
        
        while l >= 0:
            dEdul = np.sum(dEdwl,axis=1)*dF(u[l+1])
            dEdwl = dEdul*np.atleast_2d(a[l]).T
            dEdbl = dEdul

            dEdw.append(dEdwl)
            dEdb.append(dEdbl)
            
            l -= 1
        
        return dEdw, dEdb
        
    
    def loss(self, x: np.ndarray, y: np.ndarray) -> float:
        n = y.shape[0]

        d = np.array([1/self.nOut*np.sum(np.square(self.predict(xi)-yi)) for xi, yi in zip(x, y)])
        
        return 1/n*np.sum(d)

In [104]:
df = pd.read_csv(Path('..', '..', 'data', 'iris_csv.csv'))

for c in df.columns[0:4]:
    df[c] = (df[c]-df[c].mean())/df[c].std()

df['synth1'] = df['petallength']*df['petalwidth']
df['synth2'] = df['sepallength']*df['petallength']
df['synth3'] = df['sepallength']*df['petalwidth']

for name in df['class'].unique():
    df[f'{name}_label'] = df['class'].map(lambda x: 1 if x == name else 0)

In [105]:
test_frac = 0.8

p = np.random.permutation(df.index.size)

test_size = int(p.size*test_frac)
train_size = int(p.size*(1-test_frac))

idx_test = p[0 : test_size]
idx_train = p[test_size: p.size]

features_columns = ['sepallength', 'sepalwidth', 'petallength', 'petalwidth']
label_columns = ['Iris-setosa_label', 'Iris-versicolor_label', 'Iris-virginica_label']

xTest = np.array(df.iloc[idx_test][features_columns])
yTest = np.array(df.iloc[idx_test][label_columns])

xTrain = np.array(df.iloc[idx_train][features_columns])
yTrain = np.array(df.iloc[idx_train][label_columns])

In [161]:
lr = 1e-3
batch_size = 10
max_iter = 100

layers_shape = (4, 3)

model = NNClassifier(layers_shape)

# print(len(model.w), [w.shape for w in model.w])
# print(len(model.b), [b.shape for b in model.b])

print('untrained loss: ', model.loss(xTest, yTest))

model.train(xTrain, yTrain, lr, batch_size, max_iter)

print('trained loss: ', model.loss(xTest, yTest))

untrained loss:  0.42361601776372343


ValueError: operands could not be broadcast together with shapes (4,3) (3,4) (4,3) 