In [None]:
# Import modules
import numpy as np
import math

In [None]:
# Fungsi-fungsi aktivasi
def linear(x, derivative = False):
  if not (derivative):
    return x
  return np.ones_like(x)

def sigmoid(x, derivative = False):
  if not (derivative):
    return 1 / (1 + np.exp(-x))
  return sigmoid(x)*(1 - sigmoid(x))

def relu(x, derivative = False):
  if not (derivative):
    return np.maximum(0, x)
  return np.where(x >= 0, 1, 0)

def softmax(x, derivative = False):
    if not (derivative):
        ex = np.exp(x)
        return ex / np.sum(ex)
    result = np.zeros(x.shape)
    for i in range(x.shape[1]):
        temp = x[:,i].reshape(-1,1)
        resTemp = np.diagflat(temp) - np.dot(temp, temp.T)
        result[:,i] = np.sum(resTemp, axis=1)
    return result

activation_function = {
    "Linear": linear,
    "Sigmoid": sigmoid,
    "ReLU": relu,
    "Softmax": softmax,
}

In [None]:
# Fungsi loss

# Sum of squared errors (Linear, Sigmoid, ReLU)
def sum_of_squared_errors(target, output):
    return 0.5 * np.sum((target - output)**2)

# Softmax
def cross_entropy(target):
    # print(target)
    # Rusak
    result = (-1)*math.log(target)
    return result

cost_function = {
    "Linear": sum_of_squared_errors,
    "Sigmoid": sum_of_squared_errors,
    "ReLU": sum_of_squared_errors,
    "Softmax": cross_entropy,
}

In [None]:
class Layer():
  def __init__(self, activation, input, output):
    if activation not in ['Linear', 'Sigmoid', 'ReLU', 'Softmax']:
          raise NotImplementedError("Layer activation `%s` is not implemented." 
                                      % activation)
    self.weight = np.random.rand(output, input)
    self.bias = np.random.rand(output, 1)
    self.activation = activation
    self.delta_weight = np.zeros((output, input))
    self.delta_bias = np.ones((output, 1))
    self.delta = np.zeros(output)
    self.data_in = np.zeros(output)

  def net(self, input):
    result = np.dot(self.weight, input) + self.bias
    # except:
    #   print("!")
    #   print(input.shape)
    #   print(input_T.shape)
    #   print(self.weight.shape)
    return result

  def forward_propagation(self, input):
    net = self.net(input)
    return activation_function[self.activation](net)
    
  def calculate_error(self, target, output):
    return cost_function[self.activation](target) if self.activation == "Softmax" else cost_function[self.activation](target, output)
        
class NeuralNetwork():
  def __init__(self, learning_rate, error_threshold, max_iter, batch_size):
    self.layers = []
    self.learning_rate = learning_rate
    self.error_threshold = error_threshold 
    self.max_iter = max_iter
    self.batch_size = batch_size
  
  def summary(self):
    print("Jumlah layer: ", len(self.layers))
    for i, layer in enumerate(self.layers):
      print("============================================================")
      print('Layer {} (Activation: "{}", Units: {})'.format(i+1, layer.activation, len(layer.weight)))
      print("Weight:")
      print(np.array(layer.weight))
      print("Bias:")
      print(np.array(layer.bias))
    print("============================================================")

  def add(self, layer):
    self.layers.append(layer)

  def predict(self, input):
    return self.forward_propagation(input)
    # arr = np.array(input)
    # if arr.ndim == 1:
    #   instance = arr
    #   instance_res = instance
    #   for layer in self.layers:
    #     instance_res = layer.forward_propagation(instance_res)
    #   return instance_res.tolist()

    # batch = arr
    # batch_res = []
    # for instance in batch:
    #   instance_res = instance
    #   for layer in self.layers:
    #     instance_res = layer.forward_propagation(instance_res)
      
    #   batch_res.append(instance_res.tolist())
    
    # return batch_res
        
  def load_file(self, filename):
    '''
    File format
    <depth>
    <units> <activation function>
    <weight0> 
    <bias0>
    '''
    with open(filename, 'r') as file:
      depth = int(file.readline().strip())
      for i in range (depth):
        line = file.readline().strip().split()
        unit = int(line[0])
        activation = line[1]

        # Weight Matrix
        weight = []
        for j in range(unit):
          weight.append(list(map(float, file.readline().strip().split())))

        # Bias Matrix
        bias = list(map(float, file.readline().strip().split()))
        
        # Add layer
        layer = Layer(weight, bias, activation)
        self.add(layer)
      
      # End of file
    # Close file
    print('File loaded. Model detected')

  def shuffle(self, X, y):
    arr_id = [i for i in range(len(y))]
    np.random.shuffle(arr_id)
    X_return, y_return = [], []

    for i in arr_id:
        X_return.append(list(X[i]))
        y_return.append(list(y[i]))

    return X_return, y_return

  def create_batch(self, X, y):
    batch_x = []
    batch_y = []
    epoch = math.ceil(len(X) / self.batch_size)

    # Shuffle data
    data_X, data_y = self.shuffle(X, y)
    
    for i in range(0, epoch):
      head = i * self.batch_size
      tail = (i + 1) * self.batch_size
      batch_x.append(data_X[head : tail])
      batch_y.append(data_y[head : tail])
      # batch_x.append(X[head : tail])
      # batch_y.append(y[head : tail])

    return batch_x, batch_y

  def forward_propagation(self, inputs):
    arr = np.array(inputs).T
    for layer in self.layers:
      layer.data_in = arr
      net = layer.net(arr)
      arr = activation_function[layer.activation](net)
    return arr

  def backward_propagation(self, X, y, output):
    for i, layer in reversed(list(enumerate(self.layers))):
      # Output Layer Chain Rule      
      if (i == len(self.layers)-1):
        # gradient *= layer.calculate_error(target = y, output = output, derivative = True)
        dO = activation_function[layer.activation](x = layer.net(layer.data_in), derivative = True)
        if (layer.activation != "Softmax"):
          # gradient = (-1) * self.delta(target = y, output = output) * X
          dE = - (y - output) # Derivative of MSE
        else:
          # Derivative of Cross Entropy
          dE = y
          for j in range(y.shape[1]):
            k = np.argmax(y[:, i])
            dE[k, j] = -(1 - dE[k, j])
        
        # prevl = self.layers[i-1]
        # o = prevl.net(prevl.data_in)
        
        gdelta  = dE * dO.T
        
        #print("gdelta", gdelta.shape)
      
      # Hidden Layer Chain Rule
      else:
        nextl = self.layers[i + 1]
        error = np.dot(nextl.weight.T, gdelta)
        gdelta = error * activation_function[layer.activation](x = layer.net(layer.data_in), derivative = True)
      
      # if i != 0:
      #   prevl = self.layer[i-1]
      #   input_layer = activation_function[prevl.activation](x = prevl.net())
      # else:
      #   input_layer = X
      # print("dET")
      # print(dE.T.shape)
      # print("dO")
      # print(dO.shape)

      layer.delta_weight = np.dot(gdelta, layer.data_in) * self.learning_rate
      layer.delta_bias = gdelta * self.learning_rate
       
  def mgd(self, X, y):
    for iteration in range(0, self.max_iter):
      # Divide into batch
      batch_x, batch_y = self.create_batch(X, y)
      batches = len(batch_x)
      
      error = 0  
      for batch in range(0, batches):
        X_train = batch_x[batch]
        y_train = batch_y[batch]

        # Forward propagation on input
        prediction = self.predict(X_train).T

        # Compute cost
        error += self.layers[-1].calculate_error(y_train, prediction)
      
        # Backward propagation to count delta or gradient
        self.backward_propagation(X_train, y_train, prediction)

        # Update weight
        for layer in self.layers:
          print(layer.bias.shape)
          print(layer.delta_bias.shape)
          layer.weight += layer.delta_weight
          layer.bias += layer.delta_bias
          
          # Reset delta value
          layer.delta_weight = np.zeros(layer.weight.shape)
          layer.delta_bias = np.zeros(layer.bias.shape)
      
      error *= 1 / len(X)
      if error <= self.error_threshold:
        print("Error is lower or equal than error threshold")
        print("Ended in {} iterations".format(iteration))
        return
    
    print("Reach`ed maximum iterations")
    print("Ended in {} iterations".format(self.max_iter))
    return

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

data = load_iris()
X = data.data
y = data.target

print(data['feature_names'], '\n', y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42)

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import OneHotEncoder

enc = OneHotEncoder(handle_unknown='ignore')

model_sklearn = MLPClassifier(max_iter = 1000)
model_sklearn.fit(X_train, y_train)

In [None]:
prediction_sklearn = model_sklearn.predict(X_test)
print(prediction_sklearn)

In [None]:
from sklearn.metrics import accuracy_score
print(accuracy_score(prediction_sklearn, y_test))

In [None]:
model_scratch = NeuralNetwork(learning_rate = 0.001, error_threshold = 0.01, max_iter = 1000, batch_size = 5)

# Layer 1
model_scratch.add(Layer("ReLU", 4, 10))
# Layer 2
model_scratch.add(Layer("ReLU", 10, 10))
# Layer 3
model_scratch.add(Layer("Linear", 10, 5))
# Layer 4
model_scratch.add(Layer("Sigmoid", 5, 3))

# Layer Output
# model_scratch.add(Layer("Softmax", 3, 3))

model_scratch.summary()

In [None]:
y = y.reshape(-1,1)
enc.fit(y)
y = enc.transform(y).toarray()
model_scratch.mgd(X, y)

In [None]:
np.zeros((5,3))