# Own implement

## The really important part

### Imports

In [1]:
import numpy as np
import os
import sys
import typing

### Random

In [2]:
SEED = 42
np.random.seed(SEED)
os.environ['PYTHONHASHSEED']=str(SEED)

## Compile elements

### Optimizers

In [3]:
from abc import ABC, abstractmethod

class Optimizer(ABC):
  @abstractmethod
  def compute(self, weights: np.ndarray, gradient: np.ndarray, losses_count: int) -> np.ndarray:
      pass

In [4]:
class SGD_W(Optimizer):
  def __init__(self, weight_decay: float, learning_rate: float):
    self.weight_decay = weight_decay
    self.learning_rate = learning_rate

  def compute(self, weights: np.ndarray, gradient: np.ndarray, losses_count: int) -> np.ndarray:
    weight_decay = self.weight_decay
    learning_rate = self.learning_rate
    mean_gradient = gradient / losses_count

    decay_term = weights * weight_decay

    weight_gradient = mean_gradient + decay_term
    # Return updated weights
    return weights - learning_rate * weight_gradient

## Layers

In [5]:
def he_init(input_num, output_num) -> np.ndarray:
  stddev = np.sqrt(2 / input_num)
  weights = np.random.normal(0, stddev, (input_num, output_num))
  return weights

In [6]:
from abc import ABC, abstractmethod

class Layer(ABC):
  def __init__(self):
    self.is_loss_computable = True
    self.is_loss_applyable = True

  @abstractmethod
  def forward(self, input: np.ndarray):
    pass

  @abstractmethod
  def backward(self, input: np.ndarray):
    pass

  @abstractmethod
  def apply_loss(self, optimizer: Optimizer):
    pass

In [7]:
class Input(Layer): # i=(25,), o=(1,25)
  def __init__(self):
    super().__init__()
    self.is_loss_applyable = False
    self.is_loss_computable = False

  def forward(self, input: np.ndarray) -> np.ndarray:
    return input.reshape(1, -1) # (1,25)

  def backward(self, input: np.ndarray):
    raise NotImplementedError("You shouldn't")

  def apply_loss(self, optimizer: Optimizer):
    raise NotImplementedError("You shouldn't")

In [8]:
class Dense(Layer):
  def __init__(self, input_num: int, output_num: int): # i=(1,25) o=(1,x)
    super().__init__()
    input_num = input_num + 1
    self.weights: np.ndarray = he_init(input_num, output_num)
    self.gradient: np.ndarray = np.zeros_like(self.weights)
    self.losses_count = 0

  def forward(self, input: np.ndarray) -> np.ndarray:
    app_eleme = np.array([[1]])
    temp_in = np.append(input, app_eleme, axis=1)
    self.forward_input = temp_in
    return np.dot(temp_in,self.weights)

  def backward(self, input: np.ndarray) -> np.ndarray:
    self.losses_count+=1
    forward_input = self.forward_input
    self.gradient += np.dot(forward_input.T, input)
    weights = self.weights
    ret = np.dot(input,weights.T)
    return np.delete(ret, -1, axis=1)

  def apply_loss(self, optimizer: Optimizer) -> None:
    losses_count = self.losses_count
    gradient = self.gradient
    weights = self.weights
    self.weights = optimizer.compute(weights, gradient, losses_count)

    self.gradient = self.gradient*0
    self.losses_count = 0

In [9]:
class Sigmoid(Layer): # i=(1,x) o=(1,x)
  def __init__(self):
    super().__init__()
    self.is_loss_applyable = False

  def forward(self, input: np.ndarray) -> np.ndarray:
    self.forward_input = input
    return self.__sigmoid(input)

  def backward(self, input: np.ndarray):
    forward_input = self.forward_input
    return input * self.__deriv_sigmoid(forward_input)

  def apply_loss(self, optimizer: Optimizer):
    raise NotImplementedError("You shouldn't")

  def __sigmoid(self, x: np.ndarray) -> np.ndarray:
    return 1 / (1+np.exp(-x))

  def __deriv_sigmoid(self, x: np.ndarray) -> np.ndarray:
    return self.__sigmoid(x) * (1 - self.__sigmoid(x))

In [10]:
class Softmax(Layer): # i=(1,x) o=(1,x)
  def __init__(self):
    super().__init__()
    self.is_loss_applyable = False

  def forward(self, input: np.ndarray) -> np.ndarray:
    self.forward_input = input
    e_x = np.exp(input - np.max(input))
    return e_x / e_x.sum()

  def backward(self, input: np.ndarray):
    forward_input = self.forward_input
    e_x = np.exp(forward_input - np.max(forward_input))
    e_x_sum = e_x.sum()
    softmax = e_x / e_x_sum

    jacobian_matrix = np.diagflat(softmax) - np.dot(softmax.T, softmax)
    return np.dot(input, jacobian_matrix)

  def apply_loss(self, optimizer: Optimizer):
    raise NotImplementedError("You shouldn't")

In [11]:
class CrossEntropy(Layer): # i=(1,x) o=(1,)
  def __init__(self):
    super().__init__()
    self.is_loss_applyable = False

  def forward(self, input: np.ndarray, labels: np.ndarray):
    self.forward_input = input
    temp = labels * np.log(input)
    return -temp.sum()

  def backward(self, labels: np.ndarray):
    return -labels / self.forward_input

  def apply_loss(self, optimizer: Optimizer):
    raise NotImplementedError("You shouldn't")

## Model

In [12]:
class Sequential():
  def __init__(self, layers: typing.List[Layer]):
    self.layers = layers
    self.optimizer = None
    self.loss_cl = None

  def compile(self, optimizer: Optimizer, loss: Layer):
    self.loss_cl = loss
    self.optimizer = optimizer

  def __shuffle_x_y(self, x ,y):
    indices = np.random.permutation(len(x))
    return x[indices], y[indices]

  def __batch_train(self, i, batch_size, x, y, losses):
    batch_start = i * batch_size
    batch_end = (i + 1) * batch_size
    batch_train = x[batch_start:batch_end]
    batch_labels = y[batch_start:batch_end]

    for elem_x, elem_y in zip(batch_train, batch_labels):
      soft, cur_loss = self.__forward(elem_x, elem_y.reshape(1, -1))
      losses.append(cur_loss)
      self.__backward(elem_y.reshape(1, -1))

    self.__apply_loss()

  def train(self, data: np.ndarray, labels: np.ndarray, num_epochs, batch_size, patient=1000, shuffle=True):
    x = data.copy()
    y = labels.copy()

    NUM_BATCHES = len(x) // batch_size + 1
    best_loss = [sys.float_info.max, 0]

    for i in range(num_epochs):
      if best_loss[1] > patient:
        print("THE END OF MY FREAKING PATIENT!!!!!")
        break

      losses = []
      if shuffle:
        x, y = self.__shuffle_x_y(x,y)

      for ii in range(NUM_BATCHES):
        self.__batch_train(ii, batch_size, x, y, losses)

      epoch_loss = np.mean(losses)

      if best_loss[0] > epoch_loss:
        best_loss[0] = epoch_loss
        best_loss[1] = 0
      else :
        best_loss[1] += 1

      print(f"Epoch {i}: {epoch_loss} - loss")


  def __forward(self, x: np.ndarray, y=None):
    next_input = x.copy()
    for layer in self.layers:
      next_input = layer.forward(next_input)

    if y is None:
      return next_input, None

    loss_cl = self.loss_cl
    loss = loss_cl.forward(next_input,y)
    return next_input, loss


  def __backward(self, labels: np.ndarray):
    y = labels.copy()
    loss_cl = self.loss_cl
    back = loss_cl.backward(y)

    for layer in reversed(self.layers):
      if layer.is_loss_computable:
        back = layer.backward(back)


  def __apply_loss(self):
    for layer in self.layers:
      if layer.is_loss_applyable:
        layer.apply_loss(self.optimizer)


  def evaluate(self, data: np.ndarray):
    x = data.copy()
    answers = []
    for element in x:
      soft, loss = self.__forward(element)
      answers.append(np.argmax(soft))
    return answers

## Data

In [13]:
data = np.array([
    [1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1],
    [0,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1],
    [1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,1,1,1,1,1],
    [1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1],
    [1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,0,1,0,0,0,0,1],
    [1,1,1,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1],
    [1,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,1,1],
    [1,1,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1],
    [1,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1,1],
    [1,1,1,1,1,1,0,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1]
])

lables = np.array([i for i in range(10)])
labels = np.eye(len(lables))[lables]

## Train

In [14]:
model = Sequential([
    Input(),
    Dense(25,15),
    Sigmoid(),
    Dense(15,10),
    Softmax(),
])

In [15]:
model.compile(SGD_W(1e-2,1e-1), CrossEntropy())
model.train(data, labels, 2000, 32, 20)

Epoch 0: 2.798211558723291 - loss
Epoch 1: 2.726341656194429 - loss
Epoch 2: 2.665389213543812 - loss
Epoch 3: 2.613286871384058 - loss
Epoch 4: 2.5684366713235542 - loss
Epoch 5: 2.5295826050557926 - loss
Epoch 6: 2.4957236237368488 - loss
Epoch 7: 2.4660528108833484 - loss
Epoch 8: 2.4399136287138505 - loss
Epoch 9: 2.4167676295708627 - loss
Epoch 10: 2.396170131417669 - loss
Epoch 11: 2.3777515976647883 - loss
Epoch 12: 2.361203203445108 - loss
Epoch 13: 2.346265531099137 - loss
Epoch 14: 2.332719637206864 - loss
Epoch 15: 2.320379936824433 - loss
Epoch 16: 2.309088493252692 - loss
Epoch 17: 2.2987104041521835 - loss
Epoch 18: 2.2891300496229583 - loss
Epoch 19: 2.28024802307755 - loss
Epoch 20: 2.2719786068376564 - loss
Epoch 21: 2.264247685218721 - loss
Epoch 22: 2.256991011170187 - loss
Epoch 23: 2.250152760286527 - loss
Epoch 24: 2.243684319626979 - loss
Epoch 25: 2.237543269322857 - loss
Epoch 26: 2.2316925231728644 - loss
Epoch 27: 2.2260996008889027 - loss
Epoch 28: 2.2207360

## Test

In [16]:
model.evaluate(data)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

# Keras

## Config

### Imports

In [17]:
import keras
import os

### Random

In [18]:
SEED = 42
os.environ['PYTHONHASHSEED']=str(SEED)
keras.utils.set_random_seed(SEED)

## Model

In [19]:
keras_model = keras.models.Sequential(
    [
        keras.layers.Dense(
            15,
            activation=keras.activations.sigmoid,
            kernel_initializer=keras.initializers.he_normal(seed=SEED),
            bias_initializer=keras.initializers.he_normal(seed=SEED),
            kernel_regularizer=keras.regularizers.l2(1e-2),
            bias_regularizer=keras.regularizers.l2(1e-2),
        ),
        keras.layers.Dense(
            10,
            kernel_initializer=keras.initializers.he_normal(seed=SEED),
            bias_initializer=keras.initializers.he_normal(seed=SEED),
            kernel_regularizer=keras.regularizers.l2(1e-2),
            bias_regularizer=keras.regularizers.l2(1e-2),
        ),
        keras.layers.Softmax(),
    ]
)

keras_model.compile(
    optimizer=keras.optimizers.SGD(learning_rate=1e-1),
    loss=keras.losses.CategoricalCrossentropy(),
)

## Data

In [20]:
data = np.array([
    [1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1],
    [0,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1],
    [1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,1,1,1,1,1],
    [1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1],
    [1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,0,1,0,0,0,0,1],
    [1,1,1,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1],
    [1,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,1,1],
    [1,1,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1],
    [1,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1,1],
    [1,1,1,1,1,1,0,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1]
])

lables = np.array([i for i in range(10)])
labels = np.eye(len(lables))[lables]

## Train

In [21]:
keras_model.fit(
    data,
    labels,
    epochs=2000,
    shuffle=True,
    verbose=2,
    batch_size=32,
)

Epoch 1/2000
1/1 - 2s - loss: 3.1927 - 2s/epoch - 2s/step
Epoch 2/2000
1/1 - 0s - loss: 3.1238 - 9ms/epoch - 9ms/step
Epoch 3/2000
1/1 - 0s - loss: 3.0713 - 8ms/epoch - 8ms/step
Epoch 4/2000
1/1 - 0s - loss: 3.0299 - 9ms/epoch - 9ms/step
Epoch 5/2000
1/1 - 0s - loss: 2.9963 - 10ms/epoch - 10ms/step
Epoch 6/2000
1/1 - 0s - loss: 2.9682 - 10ms/epoch - 10ms/step
Epoch 7/2000
1/1 - 0s - loss: 2.9443 - 9ms/epoch - 9ms/step
Epoch 8/2000
1/1 - 0s - loss: 2.9235 - 10ms/epoch - 10ms/step
Epoch 9/2000
1/1 - 0s - loss: 2.9052 - 10ms/epoch - 10ms/step
Epoch 10/2000
1/1 - 0s - loss: 2.8889 - 10ms/epoch - 10ms/step
Epoch 11/2000
1/1 - 0s - loss: 2.8742 - 10ms/epoch - 10ms/step
Epoch 12/2000
1/1 - 0s - loss: 2.8607 - 10ms/epoch - 10ms/step
Epoch 13/2000
1/1 - 0s - loss: 2.8484 - 10ms/epoch - 10ms/step
Epoch 14/2000
1/1 - 0s - loss: 2.8371 - 11ms/epoch - 11ms/step
Epoch 15/2000
1/1 - 0s - loss: 2.8265 - 19ms/epoch - 19ms/step
Epoch 16/2000
1/1 - 0s - loss: 2.8165 - 21ms/epoch - 21ms/step
Epoch 17/2000

<keras.src.callbacks.History at 0x79265e441600>

## Test

In [22]:
keras_model.add(keras.layers.Lambda(lambda x: keras.backend.argmax(x, axis=-1)))
keras_model.predict(data)



array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])