In [1]:
# Import modules
import numpy as np
import math

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from sklearn.preprocessing import OneHotEncoder
from sklearn.neural_network import MLPClassifier

In [2]:
# Fungsi-fungsi aktivasi
def linear(x, derivative = False):
  if not (derivative):
    return x
  return np.ones_like(x)

def sigmoid(x, derivative = False):
  if not (derivative):
    return 1 / (1 + np.exp(-x))
  return sigmoid(x)*(1 - sigmoid(x))

def relu(x, derivative = False):
  if not (derivative):
    return np.maximum(0, x)
  return np.where(x >= 0, 1, 0)

def softmax(x, derivative = False):
    if not (derivative):
        ex = np.exp(x)
        return ex / np.sum(ex)
    
    result = np.zeros(x.shape)
    for i in range(x.shape[1]):
        temp = x[:,i].reshape(-1,1)
        resTemp = np.diagflat(temp) - np.dot(temp, temp.T)
        result[:,i] = np.sum(resTemp, axis=1)
    return result

activation_function = {
    "Linear": linear,
    "Sigmoid": sigmoid,
    "ReLU": relu,
    "Softmax": softmax,
}

In [3]:
# Fungsi loss

# Sum of squared errors (Linear, Sigmoid, ReLU)
def sum_of_squared_errors(target, output):
    return 0.5 * np.sum((target - output)**2)

# Cross Entopy (Softmax)
def cross_entropy(target, output):
    result = (-1)*math.log(target)
    return result

cost_function = {
    "Linear": sum_of_squared_errors,
    "Sigmoid": sum_of_squared_errors,
    "ReLU": sum_of_squared_errors,
    "Softmax": cross_entropy,
}

In [4]:
class Layer():
  def __init__(self, activation, input, output):
    if activation not in ['Linear', 'Sigmoid', 'ReLU', 'Softmax']:
          raise NotImplementedError("Layer activation `%s` is not implemented." 
                                      % activation)
    np.random.seed(69)
    self.weight = np.random.randn(output, input)
    self.bias = np.random.randn(output, 1)
    self.activation = activation

    self.delta = np.zeros(output)
    self.delta_weight = np.zeros((output, input))
    self.delta_bias = np.ones((output, 1))
    self.data_in = np.zeros(output)

  def set_weight(self, weight):
    self.weight = weight

  def set_bias(self, bias):
    self.bias = bias
    
  def net(self):
    net = np.dot(self.weight, self.data_in) + self.bias
    return net

  def output(self):
    net = self.net()
    return activation_function[self.activation](x = net)
  
  def derivative_output(self):
    net = self.net()
    return activation_function[self.activation](x = net, derivative = True)

  def calculate_error(self, target, output):
    return cost_function[self.activation](target, output)

In [5]:
class NeuralNetwork():
  def __init__(self, learning_rate, error_threshold, max_iter, batch_size):
    self.layers = []
    self.learning_rate = learning_rate
    self.error_threshold = error_threshold 
    self.max_iter = max_iter
    self.batch_size = batch_size
  
  def summary(self):
    print("Jumlah layer: ", len(self.layers))
    for i, layer in enumerate(self.layers):
      print("============================================================")
      print('Layer {} (Activation: "{}", Units: {})'.format(i+1, layer.activation, len(layer.weight)))
      print("Weight:")
      print(np.array(layer.weight))
      print("Bias:")
      print(np.array(layer.bias))
    print("============================================================")

  def add(self, layer):
    self.layers.append(layer)

  def predict(self, input):
    return self.forward_propagation(input)
        
  def load_file(self, filename):
    '''
    ### DEPRECATED ###
    File format
    <depth>
    <units> <activation function>
    <weight0> 
    <bias0>
    '''
    with open(filename, 'r') as file:
      depth = int(file.readline().strip())
      for i in range (depth):
        line = file.readline().strip().split()
        unit = int(line[0])
        activation = line[1]

        # Weight Matrix
        weight = []
        for j in range(unit):
          weight.append(list(map(float, file.readline().strip().split())))

        # Bias Matrix
        bias = list(map(float, file.readline().strip().split()))
        
        # Add layer
        layer = Layer(weight, bias, activation)
        self.add(layer)
      
      # End of file
    # Close file
    print('File loaded. Model detected')

  def forward_propagation(self, inputs):
    arr_in = np.array(inputs).T
    for layer in self.layers:
      layer.data_in = arr_in
      arr_in = layer.output()
    return arr_in

  def shuffle(self, X, y):
    arr_id = [i for i in range(len(y))]
    np.random.shuffle(arr_id)
    X_result = [] 
    y_result = []

    for i in arr_id:
      X_result.append(list(X[i]))
      y_result.append(list(y[i]))

    return X_result, y_result

  def create_batch(self, X, y):
    batch_x = []
    batch_y = []
    epoch = math.ceil(len(X) / self.batch_size)

    for i in range(0, epoch):
      head = i * self.batch_size
      tail = (i + 1) * self.batch_size
      batch_x.append(np.array(X[head : tail]))
      batch_y.append(np.array(y[head : tail]))
        
    return batch_x, batch_y

  def backward_propagation(self, X, y, output):
    for i, layer in reversed(list(enumerate(self.layers))):
      # Output Layer Chain Rule      
      if (i == len(self.layers)-1):
        if (layer.activation == "Softmax"):
          # Derivative of Cross Entropy times derivative output
          dE = y
          for j in range(y.shape[1]):
            k = np.argmax(y[:, i])
            dE[k, j] = -(1 - dE[k, j])
          layer.delta = dE * layer.derivative_output()
        else:
          # Derivative of MSE times derivative output
          dE = (output - y)
          layer.delta = dE * layer.derivative_output()
      
      # Hidden Layer Chain Rule
      else:
        nextl = self.layers[i + 1]
        error = np.dot(nextl.weight.T, nextl.delta)
        layer.delta = error * layer.derivative_output()
    
      layer.delta_weight = np.dot(layer.delta, layer.data_in.T) * self.learning_rate
      layer.delta_bias = layer.delta * self.learning_rate
       
  def mgd(self, X, y):
    for iteration in range(0, self.max_iter):
      # Shuffle data
      data_X, data_y = self.shuffle(X, y)

      # Divide into batch
      batch_x, batch_y = self.create_batch(data_X, data_y)
      batches = len(batch_x)
      
      error = 0  
      for batch in range(0, batches):
        X_train = batch_x[batch]
        y_train = batch_y[batch]

        # Forward propagation on input
        y_predict = self.forward_propagation(X_train)

        # Compute cost
        error += self.layers[-1].calculate_error(y_predict, y_train.T)
      
        # Backward propagation to count delta
        self.backward_propagation(X_train.T, y_train.T, y_predict)

        # Update each layer
        for layer in self.layers:
          # Update weight
          layer.weight += layer.delta_weight

          # Update bias
          delta_bias = layer.delta_bias
          layer.bias += np.sum(delta_bias, axis=1).reshape(len(delta_bias), 1)
          
          # Reset delta value
          layer.delta_weight = np.zeros(layer.weight.shape)
          layer.delta_bias = np.zeros(layer.bias.shape)
      
      error *= 1/len(X)
      if iteration % 50 == 0:
        print(f"Iteration {iteration}: ", error)

      if error <= self.error_threshold:
        print("Error is lower or equal than error threshold")
        print("Ended in {} iterations".format(iteration))
        return
    
    print("Reached maximum iterations")
    print("Ended in {} iterations".format(self.max_iter))
    return
  

In [None]:
class Metrics():
    def __init__(self):
        pass

    def compute_confusion_matrix(y_true, y_pred):
        K = len(np.unique(y_true)) # Number of classes 
        result = np.zeros((K, K))

        for i in range(len(y_true)):
            result[y_true[i]][y_pred[i]] += 1

        return result

    def accuracy(self, y_true, y_pred):
        acc = np.sum(np.equal(y_true, y_pred)) / len(y_true)
        return acc
  
    def precision(self, y_true, y_pred):
        fp = 0
        tp = 0
        for (i, val) in enumerate(y_true):
            for j in range(val):
                if (y_true[i][j] == 1 and y_pred[i][j] == 1):
                    tp += 1
                elif (y_true[i][j] == 0 and y_pred[i][j] == 1):
                    fp += 1

        prec = tp/(tp + fp)
        return prec

    def recall(self, y_true, y_pred):
        fn = 0
        tp = 0
        for (i, val) in enumerate(y_true):
            for j in range(val):
                if (y_true[i][j] == 1 and y_pred[i][j] == 1):
                    tp += 1
                elif (y_true[i][j] == 1 and y_pred[i][j] == 0):
                    fn += 1
        
        rec = tp/(tp + fn)
        return rec

    def f1_score(self, y_true, y_pred):
        prec = self.precision(y_true, y_pred)
        rec = self.recall(y_true, y_pred)
        f1 = 2 * ((prec * rec)/(prec + rec))

In [6]:
def kFold(model, X, y, n_splits=10):
  data_X, data_y = X, y
  total_score = 0
    
  if len(X) != len(y):
    raise Exception("Length X and y is not the same")

  data_size = len(X)
  fold_size = data_size // n_splits
  remainder = data_size % n_splits
  last_idx = fold_size * data_size + remainder
  for test_idx in range(n_splits):
    # Split the datasets
    head_test = fold_size * test_idx
    # If last fold, same as last_idx
    tail_test = last_idx if (test_idx == n_splits - 1) else fold_size * (test_idx + 1)
      
    X_left_train = data_X[0 : head_test]
    X_right_train = data_X[tail_test : last_idx]
    y_left_train = data_y[0 : head_test]
    y_right_train = data_y[tail_test : last_idx]

    X_train = np.concatenate((X_left_train, X_right_train))
    y_train = np.concatenate((y_left_train, y_right_train))
    X_test = data_X[head_test : tail_test]
    y_test = data_y[head_test : tail_test]

    # Train the dataset
    model.mgd(X_train, y_train)

    # Get the prediction
    prediction = model.predict(X_test)

    label_pred = []
    for i in range(prediction.shape[1]):
      label_pred.append(np.argmax(prediction[:, i]))

    y_test_label = []
    for i in range(y_test.shape[0]):
      y_test_label.append(np.argmax(y_test[i, :]))

    # Count the score
    accuracy = accuracy_score(label_pred, y_test_label)
    print("Fold {} score: {}".format(test_idx+1, accuracy))
    total_score += accuracy

  average_score = total_score / n_splits
  return average_score

In [7]:
enc = OneHotEncoder(handle_unknown='ignore')

data = load_iris()
X = data.data
y = data.target
y = y.reshape(-1,1)
enc.fit(y)
y = enc.transform(y).toarray()

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

In [9]:
model_sklearn = MLPClassifier(max_iter = 2000)
model_sklearn.fit(X_train, y_train)

MLPClassifier(max_iter=2000)

In [10]:
prediction_sklearn = model_sklearn.predict(X_test)
print(prediction_sklearn)

[[0 1 0]
 [1 0 0]
 [0 0 1]
 [0 1 0]
 [0 1 0]
 [1 0 0]
 [0 1 0]
 [0 0 1]
 [0 1 0]
 [0 1 0]
 [0 0 1]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [0 1 0]
 [0 0 1]
 [0 1 0]
 [0 1 0]
 [0 0 1]
 [1 0 0]
 [0 0 0]
 [1 0 0]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [0 1 0]
 [1 0 0]
 [1 0 0]
 [0 0 1]
 [0 1 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [0 0 1]
 [0 1 0]
 [0 1 0]
 [1 0 0]
 [1 0 0]
 [0 1 0]
 [0 1 0]
 [0 0 1]
 [0 1 0]
 [0 0 1]
 [0 1 0]
 [0 0 1]
 [0 1 0]
 [1 0 0]
 [0 0 1]
 [0 1 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [0 1 0]
 [0 0 1]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [0 1 0]
 [1 0 0]
 [0 1 0]
 [0 0 1]
 [1 0 0]
 [0 1 0]
 [0 0 1]
 [1 0 0]
 [0 0 0]
 [0 0 1]
 [0 1 0]]


In [11]:
print(accuracy_score(prediction_sklearn, y_test))

0.96


In [12]:
model_scratch = NeuralNetwork(learning_rate = 0.001, error_threshold = 0.01, max_iter = 2000, batch_size = 5)

# Layer 1
model_scratch.add(Layer("ReLU", 4, 10))
# Layer 2
model_scratch.add(Layer("ReLU", 10, 10))
# Layer 3
model_scratch.add(Layer("Linear", 10, 5))
# Layer 4
model_scratch.add(Layer("Sigmoid", 5, 3))

# Layer Output
model_scratch.mgd(X_train, y_train)

Iteration 0:  0.85958824871429
Iteration 50:  0.8664271383877938
Iteration 100:  0.9863424895175223
Iteration 150:  0.9898387940446075
Iteration 200:  1.006676669744341
Iteration 250:  0.9989250159367618
Iteration 300:  1.0054180166741193
Iteration 350:  1.0065572591600704
Iteration 400:  1.006383200769154
Iteration 450:  1.0246537052375744
Iteration 500:  1.0146623503648629
Iteration 550:  1.0175015069949296
Iteration 600:  1.0125485572027337
Iteration 650:  1.0206306324738483
Iteration 700:  1.0226735081494525
Iteration 750:  1.0170284357168744
Iteration 800:  1.0186770128821194
Iteration 850:  1.020205990067933
Iteration 900:  1.0209520642778276
Iteration 950:  1.016458375720153
Iteration 1000:  1.0231709648937464
Iteration 1050:  1.0244521583995667
Iteration 1100:  1.0210158418005626
Iteration 1150:  1.0202548964961522
Iteration 1200:  1.024155680606967
Iteration 1250:  1.024962084734202
Iteration 1300:  1.0245036294388938
Iteration 1350:  1.024842586630811
Iteration 1400:  1.02526

In [13]:
model_scratch.summary()

Jumlah layer:  4
Layer 1 (Activation: "ReLU", Units: 10)
Weight:
[[ 0.67879435 -1.24118777  1.25135229 -0.28999701]
 [-1.59748566  0.39772612  1.20586542  1.05629911]
 [ 0.67011832  1.32639244 -0.67249428 -1.04395704]
 [-0.84007163  1.18058312 -1.32884084 -0.12439675]
 [-0.4974814  -1.94712335 -1.84865498  0.26466031]
 [ 0.75969772 -0.78863112  0.33764768 -1.48690934]
 [-1.98192696 -1.19387174  0.16862644 -2.09861293]
 [ 0.01915532  0.30218571 -0.96038242  1.59176309]
 [ 0.35233315 -0.6519459  -1.16200164 -2.28303757]
 [-0.63182666 -0.50416998  1.4516454  -0.05451633]]
Bias:
[[-0.39424211]
 [-1.38093545]
 [ 0.047078  ]
 [-0.10417045]
 [ 0.08445267]
 [ 0.93357375]
 [-1.57490495]
 [-1.41536889]
 [ 0.93064584]
 [ 0.31623553]]
Layer 2 (Activation: "ReLU", Units: 10)
Weight:
[[ 1.03366945 -0.60354197  0.95732616 -0.60140014 -1.59748566  0.41359044
   1.20586542  1.05629911  0.85254391  0.8012377 ]
 [-0.22473706 -0.52218217 -0.84007163  1.18058312 -1.32884084 -0.12439675
  -0.4974814  -1.947

In [14]:
prediction_scratch = model_scratch.predict(X_test)
print(prediction_scratch)

[[1.00000000e+00 1.00000000e+00 1.00000000e+00 1.00000000e+00
  1.00000000e+00 1.00000000e+00 1.00000000e+00 1.00000000e+00
  1.00000000e+00 1.00000000e+00 1.00000000e+00 1.00000000e+00
  1.00000000e+00 1.00000000e+00 1.00000000e+00 1.00000000e+00
  1.00000000e+00 1.00000000e+00 1.00000000e+00 1.00000000e+00
  1.00000000e+00 1.00000000e+00 1.00000000e+00 1.00000000e+00
  1.00000000e+00 1.00000000e+00 1.00000000e+00 1.00000000e+00
  1.00000000e+00 1.00000000e+00 1.00000000e+00 1.00000000e+00
  1.00000000e+00 1.00000000e+00 1.00000000e+00 1.00000000e+00
  1.00000000e+00 1.00000000e+00 1.00000000e+00 1.00000000e+00
  1.00000000e+00 1.00000000e+00 1.00000000e+00 1.00000000e+00
  1.00000000e+00 1.00000000e+00 1.00000000e+00 1.00000000e+00
  1.00000000e+00 1.00000000e+00 1.00000000e+00 1.00000000e+00
  1.00000000e+00 1.00000000e+00 1.00000000e+00 1.00000000e+00
  1.00000000e+00 1.00000000e+00 1.00000000e+00 1.00000000e+00
  1.00000000e+00 1.00000000e+00 1.00000000e+00 1.00000000e+00
  1.0000

In [15]:
label_pred = []
for i in range(prediction_scratch.shape[1]):
    label_pred.append(np.argmax(prediction_scratch[:, i]))

y_test_label = []
for i in range(y_test.shape[0]):
    y_test_label.append(np.argmax(y_test[i, :]))

In [16]:
print(label_pred)
print(y_test_label)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2, 0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 2, 1, 1, 0, 0, 1, 2, 2, 1, 2, 1, 2, 1, 0, 2, 1, 0, 0, 0, 1, 2, 0, 0, 0, 1, 0, 1, 2, 0, 1, 2, 0, 2, 2, 1]


In [17]:
print(accuracy_score(label_pred, y_test_label))

0.38666666666666666


In [18]:
# 10 fold cross validation
model = NeuralNetwork(learning_rate = 0.001, error_threshold = 0.01, max_iter = 2000, batch_size = 5)

# Layer 1
model.add(Layer("ReLU", 4, 10))
# Layer 2
model.add(Layer("ReLU", 10, 10))
# Layer 3
model.add(Layer("Linear", 10, 5))
# Layer 4
model.add(Layer("Sigmoid", 5, 3))

kFold(model, X, y)

Iteration 0:  0.8734740179395201
Iteration 50:  0.8703634865615099
Iteration 100:  0.8703668670285022
Iteration 150:  0.8703680398695443
Iteration 200:  0.8703686280052914
Iteration 250:  0.8703689802741165
Iteration 300:  0.8703692144355661
Iteration 350:  0.8703693812398866
Iteration 400:  0.8703695060612097
Iteration 450:  0.8703696029440171
Iteration 500:  0.8703696803056253
Iteration 550:  0.870369743502959
Iteration 600:  0.8703697961049758
Iteration 650:  0.870369840547665
Iteration 700:  0.8703698786166616
Iteration 750:  0.8703699115711023
Iteration 800:  0.8703699403878011
Iteration 850:  0.8703699657890991
Iteration 900:  0.8703699883577967
Iteration 950:  0.8703700085364844
Iteration 1000:  0.8703700266865743
Iteration 1050:  0.8703700431001771
Iteration 1100:  0.8703700580179724
Iteration 1150:  0.8703700716317303
Iteration 1200:  0.8703700841052497
Iteration 1250:  0.8703700955789866
Iteration 1300:  0.870370106163538
Iteration 1350:  0.870370115965009
Iteration 1400:  0.

Iteration 1150:  0.7592592354911001
Iteration 1200:  0.7592592355708414
Iteration 1250:  0.7592592356500376
Iteration 1300:  0.7592592357287186
Iteration 1350:  0.75925923580686
Iteration 1400:  0.7592592358845092
Iteration 1450:  0.7592592359616345
Iteration 1500:  0.7592592360382575
Iteration 1550:  0.7592592361143367
Iteration 1600:  0.7592592361899722
Iteration 1650:  0.7592592362650659
Iteration 1700:  0.7592592363397175
Iteration 1750:  0.7592592364138514
Iteration 1800:  0.7592592364875266
Iteration 1850:  0.7592592365607109
Iteration 1900:  0.7592592366334191
Iteration 1950:  0.7592592367056593
Reached maximum iterations
Ended in 2000 iterations
Fold 6 score: 0.0
Iteration 0:  0.7962962673150666
Iteration 50:  0.7962962674362042
Iteration 100:  0.7962962675562891
Iteration 150:  0.7962962676753681
Iteration 200:  0.7962962677934865
Iteration 250:  0.7962962679106098
Iteration 300:  0.796296268026787
Iteration 350:  0.7962962681419768
Iteration 400:  0.7962962682562746
Iteration

0.33333333333333337

In [124]:
#function to create random n instances
def create_random_instance(n):
    rand_array = []
    n_attr = 4
    for i in range (n):
        rand_row = []
        for j in range (n_attr):
            rand_row.append(round(np.random.uniform(0, 7), 2))
        rand_array.append(rand_row)
    return(rand_array)

In [134]:
#create new instances & predict them using scratch model
new_instances = create_random_instance(300)
result = model_scratch.predict(new_instances)

#print instance & result of predict
print("instance: ")
print(np.array(new_instances))
print("result: ")
print(result2)

instance: 
[[6.46 6.11 3.18 5.55]
 [2.93 2.26 5.28 4.73]
 [5.43 3.5  6.95 6.01]
 ...
 [0.29 0.9  0.67 6.9 ]
 [2.63 1.06 0.02 4.05]
 [6.85 1.04 1.14 6.62]]
result: 
[[1.00000000e+00 9.99999941e-01 1.00000000e+00 1.00000000e+00
  9.99999943e-01 1.00000000e+00 1.00000000e+00 9.99999994e-01
  2.53855075e-03 5.74396136e-07 1.00000000e+00 1.00000000e+00
  1.00000000e+00 9.99754162e-01 1.00000000e+00 1.00000000e+00
  9.99757160e-01 9.61341280e-01 1.00000000e+00 1.00000000e+00
  1.00000000e+00 1.00000000e+00 9.99999998e-01 1.00000000e+00
  1.00000000e+00 1.70809665e-05 1.00000000e+00 1.00000000e+00
  1.00000000e+00 9.93424853e-01 1.00000000e+00 8.05122501e-01
  1.00000000e+00 1.00000000e+00 1.00000000e+00 1.00000000e+00
  9.99999998e-01 1.00000000e+00 1.00000000e+00 1.00000000e+00
  1.00000000e+00 1.00000000e+00 9.99998026e-01 1.00000000e+00
  9.99999983e-01 1.00000000e+00 1.00000000e+00 1.00000000e+00
  1.00000000e+00 1.00000000e+00 1.00000000e+00 1.00000000e+00
  6.65176520e-04 1.00000000e+0