In [1]:
# Import modules
import numpy as np
import math

In [2]:
# Fungsi-fungsi aktivasi
def linear(x, derivative = False):
  if not (derivative):
    return x
  return np.ones_like(x)

def sigmoid(x, derivative = False):
  if not (derivative):
    return 1 / (1 + np.exp(-x))
  return sigmoid(x)*(1 - sigmoid(x))

def relu(x, derivative = False):
  if not (derivative):
    return np.maximum(0, x)
  return np.where(x >= 0, 1, 0)

def softmax(x, derivative = False):
    if not (derivative):
        ex = np.exp(x)
        return ex / np.sum(ex)
    
    result = np.zeros(x.shape)
    for i in range(x.shape[1]):
        temp = x[:,i].reshape(-1,1)
        resTemp = np.diagflat(temp) - np.dot(temp, temp.T)
        result[:,i] = np.sum(resTemp, axis=1)
    return result

activation_function = {
    "Linear": linear,
    "Sigmoid": sigmoid,
    "ReLU": relu,
    "Softmax": softmax,
}

In [3]:
# Fungsi loss

# Sum of squared errors (Linear, Sigmoid, ReLU)
def sum_of_squared_errors(target, output):
    return 0.5 * np.sum((target - output)**2)

# Cross Entopy (Softmax)
def cross_entropy(target, output):
    result = (-1)*math.log(target)
    return result

cost_function = {
    "Linear": sum_of_squared_errors,
    "Sigmoid": sum_of_squared_errors,
    "ReLU": sum_of_squared_errors,
    "Softmax": cross_entropy,
}

In [4]:
clip_upper_threshold = 5
clip_lower_threshold = 0.5
def clip(x):
    ret = x
    norm = np.sum(x * x)
    if norm > clip_upper_threshold ** 2:
        ret = ret * (clip_upper_threshold / np.sqrt(norm))
    return ret

In [5]:
class Layer():
  def __init__(self, activation, input, output):
    if activation not in ['Linear', 'Sigmoid', 'ReLU', 'Softmax']:
          raise NotImplementedError("Layer activation `%s` is not implemented." 
                                      % activation)
    self.weight = np.random.rand(output, input)
    self.bias = np.random.rand(output, 1)
    self.activation = activation

    self.delta = np.zeros(output)
    self.delta_weight = np.zeros((output, input))
    self.delta_bias = np.ones((output, 1))
    self.data_in = np.zeros(output)

  def net(self):
    net = np.dot(self.weight, self.data_in) + self.bias
    return net

  def output(self):
    net = self.net()
    return activation_function[self.activation](x = net)
  
  def derivative_output(self):
    net = self.net()
    return activation_function[self.activation](x = net, derivative = True)

  def calculate_error(self, target, output):
    return cost_function[self.activation](target, output)

In [6]:
class NeuralNetwork():
  def __init__(self, learning_rate, error_threshold, max_iter, batch_size):
    self.layers = []
    self.learning_rate = learning_rate
    self.error_threshold = error_threshold 
    self.max_iter = max_iter
    self.batch_size = batch_size
  
  def summary(self):
    print("Jumlah layer: ", len(self.layers))
    for i, layer in enumerate(self.layers):
      print("============================================================")
      print('Layer {} (Activation: "{}", Units: {})'.format(i+1, layer.activation, len(layer.weight)))
      print("Weight:")
      print(np.array(layer.weight))
      print("Bias:")
      print(np.array(layer.bias))
    print("============================================================")

  def add(self, layer):
    self.layers.append(layer)

  def predict(self, input):
    return self.forward_propagation(input)
        
  def load_file(self, filename):
    '''
    ### DEPRECATED ###
    File format
    <depth>
    <units> <activation function>
    <weight0> 
    <bias0>
    '''
    with open(filename, 'r') as file:
      depth = int(file.readline().strip())
      for i in range (depth):
        line = file.readline().strip().split()
        unit = int(line[0])
        activation = line[1]

        # Weight Matrix
        weight = []
        for j in range(unit):
          weight.append(list(map(float, file.readline().strip().split())))

        # Bias Matrix
        bias = list(map(float, file.readline().strip().split()))
        
        # Add layer
        layer = Layer(weight, bias, activation)
        self.add(layer)
      
      # End of file
    # Close file
    print('File loaded. Model detected')

  def forward_propagation(self, inputs):
    arr_in = np.array(inputs).T
    for layer in self.layers:
      layer.data_in = arr_in
      arr_in = layer.output()
    return arr_in

  def shuffle(self, X, y):
    arr_id = [i for i in range(len(y))]
    np.random.shuffle(arr_id)
    X_result = [] 
    y_result = []

#     try:
    for i in arr_id:
      X_result.append(list(X[i]))
      y_result.append(list(y[i]))
#     except:
#         print(type(arr_id))
#         print(arr_id)
#         print(X)
#         print(y)

    return X_result, y_result

  def create_batch(self, X, y):
    batch_x = []
    batch_y = []
    epoch = math.ceil(len(X) / self.batch_size)
    
    for i in range(0, epoch):
      head = i * self.batch_size
      tail = (i + 1) * self.batch_size
      batch_x.append(np.array(X[head : tail]))
      batch_y.append(np.array(y[head : tail]))
        
    return batch_x, batch_y

  def backward_propagation(self, X, y, output):
    for i, layer in reversed(list(enumerate(self.layers))):
      # Output Layer Chain Rule      
      if (i == len(self.layers)-1):
        if (layer.activation == "Softmax"):
          # Derivative of Cross Entropy times derivative output
          dE = y
          for j in range(y.shape[1]):
            k = np.argmax(y[:, i])
            dE[k, j] = -(1 - dE[k, j])
          layer.delta = clip(dE * layer.derivative_output())
        else:
          # Derivative of MSE times derivative output
          dE = (output - y)
          layer.delta = clip(dE * layer.derivative_output())
      
      # Hidden Layer Chain Rule
      else:
        nextl = self.layers[i + 1]
        error = np.dot(nextl.weight.T, nextl.delta)
        layer.delta = error * layer.derivative_output()
    
      layer.delta_weight = clip(np.dot(layer.delta, layer.data_in.T) * self.learning_rate)
      layer.delta_bias = clip(layer.delta * self.learning_rate)
       
  def mgd(self, X, y):
    for iteration in range(0, self.max_iter):
      # Shuffle data
      data_X, data_y = self.shuffle(X, y)

      # Divide into batch
      batch_x, batch_y = self.create_batch(data_X, data_y)
      batches = len(batch_x)
      
      error = 0  
      for batch in range(0, batches):
        X_train = batch_x[batch]
        y_train = batch_y[batch]

        # Forward propagation on input
        y_predict = self.forward_propagation(X_train)

        # Compute cost
        error += self.layers[-1].calculate_error(y_predict, y_train.T)
      
        # Backward propagation to count delta
        self.backward_propagation(X_train.T, y_train.T, y_predict)

        # Update each layer
        for layer in self.layers:
          # Update weight
          layer.weight += layer.delta_weight

          # Update bias
          delta_bias = layer.delta_bias
          layer.bias += np.sum(delta_bias, axis=1).reshape(len(delta_bias), 1)
          
          # Reset delta value
          layer.delta_weight = np.zeros(layer.weight.shape)
          layer.delta_bias = np.zeros(layer.bias.shape)
      
      error *= 1 / len(X)
      if error <= self.error_threshold:
        print("Error is lower or equal than error threshold")
        print("Ended in {} iterations".format(iteration))
        return
    
    print("Reached maximum iterations")
    print("Ended in {} iterations".format(self.max_iter))
    return

In [7]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

enc = OneHotEncoder(handle_unknown='ignore')

data = load_iris()
X = data.data
y = data.target
y = y.reshape(-1,1)
enc.fit(y)
y = enc.transform(y).toarray()

In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)


In [9]:
from sklearn.neural_network import MLPClassifier

model_sklearn = MLPClassifier(max_iter = 2000)
model_sklearn.fit(X_train, y_train)

MLPClassifier(max_iter=2000)

In [10]:
prediction_sklearn = model_sklearn.predict(X_test)
print(prediction_sklearn)

[[0 1 0]
 [1 0 0]
 [0 0 1]
 [0 1 0]
 [0 1 0]
 [1 0 0]
 [0 1 0]
 [0 0 1]
 [0 1 0]
 [0 1 0]
 [0 0 1]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [0 1 0]
 [0 0 1]
 [0 1 0]
 [0 1 0]
 [0 0 1]
 [1 0 0]
 [0 0 1]
 [1 0 0]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [0 1 0]
 [1 0 0]
 [1 0 0]
 [0 0 1]
 [0 1 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [0 0 1]
 [0 1 0]
 [0 1 0]
 [1 0 0]
 [1 0 0]
 [0 1 0]
 [0 1 0]
 [0 0 1]
 [0 1 0]
 [0 0 1]
 [0 1 0]
 [0 0 1]
 [0 1 0]
 [1 0 0]
 [0 0 1]
 [0 1 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [0 1 0]
 [0 0 1]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [0 1 0]
 [1 0 0]
 [0 1 0]
 [0 0 1]
 [1 0 0]
 [0 1 0]
 [0 0 1]
 [1 0 0]
 [0 1 0]
 [0 0 1]
 [0 1 0]]


In [11]:
from sklearn.metrics import accuracy_score
print(accuracy_score(prediction_sklearn, y_test))

0.9733333333333334


In [12]:
model_scratch = NeuralNetwork(learning_rate = 0.001, error_threshold = 0.01, max_iter = 2000, batch_size = 5)

# Layer 1
model_scratch.add(Layer("ReLU", 4, 10))
# Layer 2
model_scratch.add(Layer("ReLU", 10, 10))
# Layer 3
model_scratch.add(Layer("Linear", 10, 5))
# Layer 4
model_scratch.add(Layer("Sigmoid", 5, 3))

# Layer Output
model_scratch.mgd(X_train, y_train)

Reached maximum iterations
Ended in 2000 iterations


In [13]:
model_scratch.summary()

Jumlah layer:  4
Layer 1 (Activation: "ReLU", Units: 10)
Weight:
[[0.39197195 0.80487849 0.08901206 0.97277294]
 [0.17378321 0.66068846 0.02117359 0.94674468]
 [0.8316787  0.48436075 0.27544551 0.76061833]
 [0.94311292 0.29420751 0.44675564 0.99289372]
 [0.99845535 0.11453258 0.76941347 0.90086565]
 [0.24263891 0.18805312 0.34384888 0.78591562]
 [0.03984703 0.56574264 0.38302135 0.74069951]
 [0.60705419 0.35689064 0.87011459 0.15487822]
 [0.83372127 0.58202184 0.13530059 0.1721255 ]
 [0.12889095 0.0629536  0.08731491 0.41460441]]
Bias:
[[0.75775004]
 [0.67971567]
 [0.86451071]
 [0.34272421]
 [0.7040273 ]
 [0.34097599]
 [0.60497078]
 [0.27630508]
 [0.72588838]
 [0.60829054]]
Layer 2 (Activation: "ReLU", Units: 10)
Weight:
[[9.29375257e-01 8.57373305e-01 7.37899346e-02 1.91151745e-01
  2.41221580e-01 5.02277179e-01 2.39209631e-01 6.32224951e-01
  3.71605159e-01 2.77212516e-01]
 [3.19190584e-01 9.93838471e-01 2.34748444e-01 6.48095698e-02
  9.26156238e-01 5.96046603e-01 9.00956929e-01 3.9

In [14]:
prediction_scratch = model_scratch.predict(X_test)
print(prediction_scratch)

[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1. 1. 1.]]


In [15]:
prediction_scratch

array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1.

In [16]:
y_test

array([[0., 1., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0

In [21]:
print(accuracy_score(prediction_scratch, y_test.T))

0.0
