In [11]:
# Import modules
import numpy as np
import math
import json

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from sklearn.preprocessing import OneHotEncoder
from sklearn.neural_network import MLPClassifier

In [2]:
# Fungsi-fungsi aktivasi
def linear(x, derivative = False):
  if not (derivative):
    return x
  return np.ones_like(x)

def sigmoid(x, derivative = False):
  if not (derivative):
    return 1 / (1 + np.exp(-x))
  return sigmoid(x)*(1 - sigmoid(x))

def relu(x, derivative = False):
  if not (derivative):
    return np.maximum(0, x)
  return np.where(x >= 0, 1, 0)

def softmax(x, derivative = False):
    if not (derivative):
        ex = np.exp(x)
        return ex / np.sum(ex)
    
    result = np.zeros(x.shape)
    for i in range(x.shape[1]):
        temp = x[:,i].reshape(-1,1)
        resTemp = np.diagflat(temp) - np.dot(temp, temp.T)
        result[:,i] = np.sum(resTemp, axis=1)
    return result

activation_function = {
    "Linear": linear,
    "Sigmoid": sigmoid,
    "ReLU": relu,
    "Softmax": softmax,
}

In [3]:
# Fungsi loss

# Sum of squared errors (Linear, Sigmoid, ReLU)
def sum_of_squared_errors(target, output):
    return 0.5 * np.sum((target - output)**2)

# Cross Entopy (Softmax)
def cross_entropy(target, output):
    result = (-1)*math.log(target)
    return result

cost_function = {
    "Linear": sum_of_squared_errors,
    "Sigmoid": sum_of_squared_errors,
    "ReLU": sum_of_squared_errors,
    "Softmax": cross_entropy,
}

In [4]:
class Layer():
  def __init__(self, activation, input, output):
    if activation not in ['Linear', 'Sigmoid', 'ReLU', 'Softmax']:
          raise NotImplementedError("Layer activation `%s` is not implemented." 
                                      % activation)
    np.random.seed(69)
    self.weight = np.random.randn(output, input)
    self.bias = np.random.randn(output, 1)
    self.activation = activation

    self.delta = np.zeros(output)
    self.delta_weight = np.zeros((output, input))
    self.delta_bias = np.ones((output, 1))
    self.data_in = np.zeros(output)

  def set_weight(self, weight):
    self.weight = weight

  def set_bias(self, bias):
    self.bias = bias
    
  def net(self):
    net = np.dot(self.weight, self.data_in) + self.bias
    return net

  def output(self):
    net = self.net()
    return activation_function[self.activation](x = net)
  
  def derivative_output(self):
    net = self.net()
    return activation_function[self.activation](x = net, derivative = True)

  def calculate_error(self, target, output):
    return cost_function[self.activation](target, output)

In [5]:
class NeuralNetwork():
  def __init__(self, learning_rate, error_threshold, max_iter, batch_size):
    self.layers = []
    self.learning_rate = learning_rate
    self.error_threshold = error_threshold 
    self.max_iter = max_iter
    self.batch_size = batch_size
  
  def summary(self):
    print("Jumlah layer: ", len(self.layers))
    for i, layer in enumerate(self.layers):
      print("============================================================")
      print('Layer {} (Activation: "{}", Units: {})'.format(i+1, layer.activation, len(layer.weight)))
      print("Weight:")
      print(np.array(layer.weight))
      print("Bias:")
      print(np.array(layer.bias))
    print("============================================================")

  def add(self, layer):
    self.layers.append(layer)

  def predict(self, input):
    return self.forward_propagation(input)

  def save_file(self) :
    filename = input("Nama file yang ingin disimpan : ")
    f = open(filename,"w")

    n_layer = len(self.layers)
    layer_arr = []

    for i, layer in enumerate(self.layers) :
      weight_arr = np.array(layer.weight)
      bias_arr = np.array(layer.bias)
      activation = layer.activation
      
      layer_content = {
        "weight" : weight_arr,
        "bias" : bias_arr,
        "activation" : activation
      }

      layer_arr.append(layer_content)

    content = {
      "n_layer" : n_layer,
      "layers" : layer_arr,
      "learning_rate" : self.learning_rate,
      "error_threshold" : self.error_threshold,
      "max_iter" : self.max_iter,
      "batch_size" : self.batch_size
    }

    json.dump(content, f, indent = 6)

    f.close()

  def load_file(self) :
    filename = input("Nama file yang ingin dibuka : ")
    with open(filename) as json_model :
      dataModel = json.load(json_model)
      
      for i in range (dataModel["n_layer"]) :
        bias = dataModel["layers"]["bias"]
        weight = dataModel["layers"]["weight"]
        activation = dataModel["layers"]["activation"]

        input = len(dataModel["layers"]["weight"][0])
        output = len(dataModel["layers"]["weight"])

        tempLayer = Layer(activation,input,output)
        tempLayer.set_weight(weight)
        tempLayer.set_bias(bias)

        add(tempLayer)

      self.learning_rate = dataModel["learning_rate"]
      self.error_threshold = dataModel["error_threshold"] 
      self.max_iter = dataModel["max_iter"]
      self.batch_size = dataModel["batch_size"]
    print('File loaded. Model detected')

  def forward_propagation(self, inputs):
    arr_in = np.array(inputs).T
    for layer in self.layers:
      layer.data_in = arr_in
      arr_in = layer.output()
    return arr_in

  def shuffle(self, X, y):
    arr_id = [i for i in range(len(y))]
    np.random.shuffle(arr_id)
    X_result = [] 
    y_result = []

    for i in arr_id:
      X_result.append(list(X[i]))
      y_result.append(list(y[i]))

    return X_result, y_result

  def create_batch(self, X, y):
    batch_x = []
    batch_y = []
    epoch = math.ceil(len(X) / self.batch_size)

    for i in range(0, epoch):
      head = i * self.batch_size
      tail = (i + 1) * self.batch_size
      batch_x.append(np.array(X[head : tail]))
      batch_y.append(np.array(y[head : tail]))
        
    return batch_x, batch_y

  def backward_propagation(self, X, y, output):
    for i, layer in reversed(list(enumerate(self.layers))):
      # Output Layer Chain Rule      
      if (i == len(self.layers)-1):
        if (layer.activation == "Softmax"):
          # Derivative of Cross Entropy times derivative output
          dE = y
          for j in range(y.shape[1]):
            k = np.argmax(y[:, i])
            dE[k, j] = -(1 - dE[k, j])
          layer.delta = dE * layer.derivative_output()
        else:
          # Derivative of MSE times derivative output
          dE = -(output - y)
          layer.delta = dE * layer.derivative_output()
      
      # Hidden Layer Chain Rule
      else:
        nextl = self.layers[i + 1]
        error = np.dot(nextl.weight.T, nextl.delta)
        layer.delta = error * layer.derivative_output()
    
      layer.delta_weight = np.dot(layer.delta, layer.data_in.T) * self.learning_rate
      layer.delta_bias = layer.delta * self.learning_rate
       
  def mgd(self, X, y):
    for iteration in range(0, self.max_iter):
      # Shuffle data
      data_X, data_y = self.shuffle(X, y)

      # Divide into batch
      batch_x, batch_y = self.create_batch(data_X, data_y)
      batches = len(batch_x)
      
      error = 0  
      for batch in range(0, batches):
        X_train = batch_x[batch]
        y_train = batch_y[batch]

        # Forward propagation on input
        y_predict = self.forward_propagation(X_train)

        # Compute cost
        error += self.layers[-1].calculate_error(y_predict, y_train.T)
      
        # Backward propagation to count delta
        self.backward_propagation(X_train.T, y_train.T, y_predict)

        # Update each layer
        for layer in self.layers:
          # Update weight
          layer.weight += layer.delta_weight

          # Update bias
          delta_bias = layer.delta_bias
          layer.bias += np.sum(delta_bias, axis=1).reshape(len(delta_bias), 1)
          
          # Reset delta value
          layer.delta_weight = np.zeros(layer.weight.shape)
          layer.delta_bias = np.zeros(layer.bias.shape)
      
      error *= 1/len(X)
      if iteration % 50 == 0:
        print(f"Iteration {iteration}: ", error)

      if error <= self.error_threshold:
        print("Error is lower or equal than error threshold")
        print("Ended in {} iterations".format(iteration))
        return
    
    print("Reached maximum iterations")
    print("Ended in {} iterations".format(self.max_iter))
    return
  

In [86]:
class Metrics():
    def __init__(self):
        pass

    def compute_confusion_matrix(self, y_true, y_pred):
        K = len(np.unique(y_true)) # Number of classes 
        result = np.zeros((K, K))

        for i in range(len(y_true)):
            result[y_true[i]][y_pred[i]] += 1

        return result

    def accuracy_score(self, y_true, y_pred, normalize=True):
        if normalize:
            acc = np.sum(np.equal(y_true, y_pred)) / len(y_true)
        else:
            acc = np.sum(np.equal(y_true, y_pred))

        return acc
  
    def precision_score(self, y_true, y_pred, binary, class_label=None):
        pred_unique, pred_counts = np.unique(y_pred, return_counts=True)

        if class_label is not None:
            class_index = np.where(pred_unique == class_label)[0][0]

            tp = np.sum(np.equal(y_true, class_label) & np.equal(y_pred, class_label))
            total = pred_counts[class_index]

            prec = tp/total
        
        else:
            if binary:
                tp = np.sum(np.equal(y_true, 1) & np.equal(y_pred, 1))
                total = pred_counts[1]

                prec = tp/total
            else:
                prec = []
                for (i, val) in enumerate(pred_unique):
                    class_index = np.where(pred_unique == val)[0][0]

                    tp = np.sum(np.equal(y_true, val) & np.equal(y_pred, val))
                    total = pred_counts[i]

                    prec.append(tp/total)
        
        return prec

    def recall_score(self, y_true, y_pred, binary, class_label=None):
        true_unique, true_counts = np.unique(y_true, return_counts=True)

        if class_label is not None:
            class_index = np.where(true_unique == class_label)[0][0]

            tp = np.sum(np.equal(y_true, class_label) & np.equal(y_pred, class_label))
            total = true_counts[class_index]

            prec = tp/total
        
        else:
            if binary:
                tp = np.sum(np.equal(y_true, 1) & np.equal(y_pred, 1))
                total = true_counts[1]

                prec = tp/total
            else:
                prec = []
                for (i, val) in enumerate(true_unique):
                    class_index = np.where(true_unique == val)[0][0]

                    tp = np.sum(np.equal(y_true, val) & np.equal(y_pred, val))
                    total = true_counts[i]

                    prec.append(tp/total)

        return prec

    def f1_score(self, y_true, y_pred, binary, class_label=None):
        prec = self.precision_score(y_true, y_pred, binary, class_label)
        rec = self.recall_score(y_true, y_pred, binary, class_label)

        if class_label is not None:
            f1 = 2 * ((prec * rec)/(prec + rec))
        else:
            f1 = []
            class_length = len(prec) | len(rec)
            for i in range(class_length):
                f1_scr = (2 * ((prec[i] * rec[i])/(prec[i] + rec[i]))) if (prec[i]+rec[i] != 0) else 0.0

                f1.append(f1_scr)

        return f1

metrics = Metrics()
a = [0,2,1,0,2,0,1,2]
b = [1,2,1,1,0,1,1,2]

print('Accuracy score: ' + str(metrics.accuracy_score(a, b, normalize=True)))
print('Precision score: ' + str(metrics.precision_score(a, b, binary=False)))
print('Recall score: ' + str(metrics.recall_score(a, b, binary=False)))
print('F1 score: ' + str(metrics.f1_score(a, b, binary=False)))

Accuracy score: 0.5
Precision score: [0.0, 0.4, 1.0]
Recall score: [0.0, 1.0, 0.6666666666666666]
F1 score: [0.0, 0.5714285714285715, 0.8]


In [7]:
def kFold(model, X, y, n_splits=10, scratch=True):
  data_X, data_y = X, y
  total_score = 0
    
  if len(X) != len(y):
    raise Exception("Length X and y is not the same")

  data_size = len(X)
  fold_size = data_size // n_splits
  remainder = data_size % n_splits
  last_idx = fold_size * data_size + remainder
  for test_idx in range(n_splits):
    # Split the datasets
    head_test = fold_size * test_idx
    # If last fold, same as last_idx
    tail_test = last_idx if (test_idx == n_splits - 1) else fold_size * (test_idx + 1)
      
    X_left_train = data_X[0 : head_test]
    X_right_train = data_X[tail_test : last_idx]
    y_left_train = data_y[0 : head_test]
    y_right_train = data_y[tail_test : last_idx]

    X_train = np.concatenate((X_left_train, X_right_train))
    y_train = np.concatenate((y_left_train, y_right_train))
    X_test = data_X[head_test : tail_test]
    y_test = data_y[head_test : tail_test]

    # Train the dataset
    if not scratch:
      model.fit(X_train, y_train)
    else:
      model.mgd(X_train, y_train)

    # Get the prediction
    prediction = model.predict(X_test)

    # Count the score
    accuracy = score_prediction(prediction, y_test, scratch)
    print("Fold {} score: {}".format(test_idx+1, accuracy))
    total_score += accuracy

  average_score = total_score / n_splits
  return average_score

In [8]:
# Load dataset iris
enc = OneHotEncoder(handle_unknown='ignore')

data = load_iris()
X = data.data
y = data.target
y = y.reshape(-1,1)
enc.fit(y)
y = enc.transform(y).toarray()

In [9]:
# Melakukan pembelajaran dalam batch
model_sklearn_1 = MLPClassifier(max_iter = 2000)
model_sklearn_1.fit(X, y)

model_scratch_1 = NeuralNetwork(learning_rate = 0.001, error_threshold = 0.01, max_iter = 2000, batch_size = 5)
model_scratch_1.add(Layer("ReLU", 4, 10)) # Layer 1
model_scratch_1.add(Layer("ReLU", 10, 10)) # Layer 2
model_scratch_1.add(Layer("Linear", 10, 5)) # Layer 3
model_scratch_1.add(Layer("Sigmoid", 5, 3)) # Layer Output
model_scratch_1.mgd(X, y)

Iteration 0:  0.8344637342734189
Iteration 50:  0.8332005585350447
Iteration 100:  0.8329270120479051
Iteration 150:  0.8423381842974287
Iteration 200:  0.6666719484974449
Iteration 250:  0.6666687632169056
Iteration 300:  0.6666679679791139
Iteration 350:  0.666667608443739
Iteration 400:  0.6666674039247352
Iteration 450:  0.6666672720810244
Iteration 500:  0.6666671800667273
Iteration 550:  0.6666671122341928
Iteration 600:  0.666667060170655
Iteration 650:  0.666667018958473
Iteration 700:  0.6666669855304095
Iteration 750:  0.6666669578746494
Iteration 800:  0.6666669346171826
Iteration 850:  0.6666669147876133
Iteration 900:  0.6666668976812197
Iteration 950:  0.6666668827738993
Iteration 1000:  0.6666668696678286
Iteration 1050:  0.666666858055601
Iteration 1100:  0.6666668476959509
Iteration 1150:  0.6666668383968354
Iteration 1200:  0.6666668300035973
Iteration 1250:  0.6666668223901282
Iteration 1300:  0.6666668154527843
Iteration 1350:  0.666666809105582
Iteration 1400:  0.6

In [10]:
def score_prediction(prediction, y, scratch=True):
  if scratch:
    label_pred = []
    for i in range(prediction.shape[1]):
      label_pred.append(np.argmax(prediction[:, i]))

    label_y = []
    for i in range(y.shape[0]):
      label_y.append(np.argmax(y[i, :]))
    return accuracy_score(label_pred, label_y)

  else:
    return (accuracy_score(prediction, y))


# 2. Membandingkan kedua hasil prediksi

In [11]:
prediction_sklearn_1 = model_sklearn_1.predict(X)
print("Hasil model sklearn: {}".format(score_prediction(prediction_sklearn_1, y, scratch=False)))

prediction_scratch_1 = model_scratch_1.predict(X)
print("Hasil model scratch: {}".format(score_prediction(prediction_scratch_1, y)))

Hasil model sklearn: 0.9733333333333334
Hasil model scratch: 0.3333333333333333


# 3. Melakukan pembelajaran dengan skema split train 90% dan test 10%

In [13]:
# Melakukan split data train 90% dan data test 10%
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [14]:
# Melakukan pembelajaran menggunakan data split
model_sklearn_2 = MLPClassifier(max_iter = 2000)
model_sklearn_2.fit(X_train, y_train)

model_scratch_2 = NeuralNetwork(learning_rate = 0.001, error_threshold = 0.01, max_iter = 2000, batch_size = 5)
model_scratch_2.add(Layer("ReLU", 4, 10)) # Layer 1
model_scratch_2.add(Layer("ReLU", 10, 10)) # Layer 2
model_scratch_2.add(Layer("Linear", 10, 5)) # Layer 3
model_scratch_2.add(Layer("Sigmoid", 5, 3)) # Layer Output
model_scratch_2.mgd(X_train, y_train)

Iteration 0:  0.8301444158043019
Iteration 50:  0.82591081197782
Iteration 100:  0.8258389976544714
Iteration 150:  0.8257702190919224
Iteration 200:  0.8251163118877487
Iteration 250:  0.8259257226866845
Iteration 300:  0.8259257167984402
Iteration 350:  0.8259257105574979
Iteration 400:  0.8259257039297215
Iteration 450:  0.8259256968785782
Iteration 500:  0.825925689364013
Iteration 550:  0.8259256813334239
Iteration 600:  0.8259256727366858
Iteration 650:  0.8259256635099537
Iteration 700:  0.8259256535830756
Iteration 750:  0.8259256428669772
Iteration 800:  0.8259256312681275
Iteration 850:  0.8259256186752185
Iteration 900:  0.8259256049506337
Iteration 950:  0.825925589930705
Iteration 1000:  0.8259255734313599
Iteration 1050:  0.8259255552161039
Iteration 1100:  0.8259255350057972
Iteration 1150:  0.8259255124402604
Iteration 1200:  0.8259254871061538
Iteration 1250:  0.8259254584336465
Iteration 1300:  0.8259254257294232
Iteration 1350:  0.8259253880785669
Iteration 1400:  0.

In [15]:
# Membandingkan kedua hasil prediksi
prediction_sklearn_2 = model_sklearn_2.predict(X_test)
print("Hasil model sklearn: {}".format(score_prediction(prediction_sklearn_2, y_test, scratch=False)))

prediction_scratch_2 = model_scratch_2.predict(X_test)
print("Hasil model scratch: {}".format(score_prediction(prediction_scratch_2, y_test)))

Hasil model sklearn: 1.0
Hasil model scratch: 0.4


# 4. Melakukan pembelajaran dengan skema 10-fold cross validation

In [16]:
# 10 fold cross validation
avg_sklearn = kFold(model_sklearn_2, X, y, scratch=False)
avg_scratch = kFold(model_scratch_2, X, y, scratch=True)

print("Rata-rata score dari model sklearn: {}".format(avg_sklearn))
print("Rata-rata score dari model scratch: {}".format(avg_scratch))

Fold 1 score: 1.0
Fold 2 score: 1.0
Fold 3 score: 1.0
Fold 4 score: 1.0
Fold 5 score: 0.8
Fold 6 score: 0.9333333333333333
Fold 7 score: 1.0
Fold 8 score: 1.0
Fold 9 score: 0.8666666666666667
Fold 10 score: 1.0
Iteration 0:  0.8703118509550729
Iteration 50:  0.7407521709125973
Iteration 100:  0.7407436071280161
Iteration 150:  0.7407423659778378
Iteration 200:  0.740741872205911
Iteration 250:  0.7407416076175651
Iteration 300:  0.740741442902036
Iteration 350:  0.7407413305675564
Iteration 400:  0.7407412490917029
Iteration 450:  0.740741187313579
Iteration 500:  0.7407411388710583
Iteration 550:  0.7407410998737471
Iteration 600:  0.740741067807712
Iteration 650:  0.7407410409795184
Iteration 700:  0.7407410182043521
Iteration 750:  0.7407409986296041
Iteration 800:  0.7407409816259489
Iteration 850:  0.7407409667187725
Iteration 900:  0.7407409535434057
Iteration 950:  0.7407409418150405
Iteration 1000:  0.7407409313080563
Iteration 1050:  0.7407409218415654
Iteration 1100:  0.74074

0.33333333333333337

# 5. Menyimpan model hipotesis hasil pembelajaran

In [None]:
# Save model

# 6. Membaca model hipotesis dari file eksternal

In [None]:
# Load model

# 7. Membuat instance baru lalu memprediksi hasil

In [17]:
# function to create random n instances
def create_random_instance(n):
    '''
    Creating n random instances
    '''
    rand_array = []
    n_attr = 4
    for i in range (n):
        rand_row = []
        for j in range (n_attr):
            rand_row.append(round(np.random.uniform(0, 7), 2))
        rand_array.append(rand_row)
    return(rand_array)

In [20]:
# Create new instances & predict them using scratch model
new_instances = create_random_instance(300)
result = model_scratch_1.predict(new_instances)

# Print instance & result of predict
print("instance: ")
print(np.array(new_instances))
print("result: ")
print(result)
print(list(map(np.argmax, result)))

instance: 
[[3.27 5.21 3.38 0.77]
 [3.18 4.98 5.42 6.44]
 [0.89 1.05 5.72 0.14]
 ...
 [1.1  5.34 0.55 5.21]
 [0.77 6.47 3.28 0.85]
 [5.35 4.39 2.79 4.55]]
result: 
[[1.00000000e+00 1.00000000e+00 1.00000000e+00 1.00000000e+00
  1.00000000e+00 1.00000000e+00 1.00000000e+00 2.40835363e-06
  1.00000000e+00 1.00000000e+00 1.00000000e+00 9.99982000e-01
  1.00000000e+00 1.00000000e+00 2.48519586e-03 1.00000000e+00
  1.00000000e+00 1.00000000e+00 9.99999996e-01 1.00000000e+00
  1.00000000e+00 9.99999961e-01 1.00000000e+00 9.79043408e-01
  1.00000000e+00 1.00000000e+00 1.00000000e+00 9.99997551e-01
  1.00000000e+00 9.99999993e-01 9.99993878e-01 1.00000000e+00
  2.33715598e-05 1.00000000e+00 1.00000000e+00 9.99999998e-01
  1.00000000e+00 9.99999998e-01 1.00000000e+00 1.00000000e+00
  1.00000000e+00 1.00000000e+00 1.00000000e+00 1.00000000e+00
  1.00000000e+00 1.00000000e+00 1.00000000e+00 9.99999789e-01
  9.99992083e-01 1.00000000e+00 1.00000000e+00 1.00000000e+00
  1.00000000e+00 1.00000000e+0

# 8. Analisis Hasil

## Analisis hasil nomor 2.

Berdasarkan hasil perbandingan confusion matrix dan perhitungan kinerja dari sklearn yang telah dieksekusi, didapatkan bahwa <!-- kinerja dari model yang dibuat oleh sklearn lebih baik dibandingkan dengan model yang dibuat secara sendiri. -->

## Analisis hasil nomor 3.

Berdasarkan hasil pembelajaran FFNN untuk dataset iris dengan skema split train 90% dan test 10% yang telah dieksekusi, didapatkan bahwa <!-- kinerja dari model yang dibuat oleh sklearn lebih baik dibandingkan dengan model yang dibuat secara sendiri. -->

### Perbandingan hasil nomor 2 dan 3
Berdasarkan hasil pembelajaran nomor 2 dan nomor 3 dapat dilihat bahwa ketika menggunakan skema 90% data training dan 10% data testing maka didapatkan hasil yang lebih baik.