# Tugas Besar IF3270 - Pembelajaran Mesin Bag. B

Anggota Kelompok:

1. 13519103 - Bryan Rinaldo
2. 13519135 - Naufal Alexander Suryasumirat
3. 13519141 - Naufal Yahya Kurnianto
4. 13519153 - Maximillian Lukman

In [None]:
import math
import numpy as np

import pandas as pd
from sklearn import preprocessing
from sklearn import datasets
from sklearn.model_selection import train_test_split

## Activation Functions

In [None]:
# Activation functions
## Linear
linear = lambda x: x
linear = np.vectorize(linear)
## Sigmoid
sigmoid = lambda x: 1 / (1 + math.exp(-x))
sigmoid = np.vectorize(sigmoid)
## ReLU
relu = lambda x: float(max(0, x))
relu = np.vectorize(relu)
## Softmax
softmax = lambda x: np.exp(x) / np.exp(x).sum(axis=0) # already used for vectors
## Dict
activation_functions = {
    'linear': linear,
    'sigmoid': sigmoid,
    'relu': relu,
    'softmax': softmax
}

## Loss/Cost Functions

In [None]:
# Loss functions
## Linear, Sigmoid, ReLU
def general_loss(predict: np.array or int, target: list or int):
    if (isinstance(predict, type(int))): return 0.5 * ((target - predict) ** 2)
    sum = 0
    for i in range(len(target)):
        sum += ((target[i] - predict[i]) ** 2)
    return 0.5 * sum
## Softmax
def softmax_loss(predict: np.array, target: int):
    return -math.log(predict[target]) # base e
## Dict
loss_functions = {
    'linear': general_loss,
    'sigmoid': general_loss,
    'relu': general_loss,
    'softmax': softmax_loss
}

## Back-propagation Functions / Derivatives

In [None]:
# Back-propagation functions (derivatives)
## Linear
linear_backprop = lambda x: 1
## Sigmoid
# sigmoid_backprop = lambda x: sigmoid(x) * (1 - sigmoid(x)) # or x * (1 - x)?
sigmoid_backprop = lambda x: x * (1 - x)
## ReLU
relu_backprop = lambda x: float(x >= 0)
relu_backprop = np.vectorize(relu_backprop)
## Softmax
def softmax_backprop(arr, targ):
    arr_copy = np.copy(arr)
    arr_copy[targ] = -(1 - arr_copy[targ])
    return arr_copy
## Dict
backprop_functions = {
    'linear': linear_backprop,
    'sigmoid': sigmoid_backprop,
    'relu': relu_backprop,
    'softmax': softmax_backprop
}

## Layer Class

In [None]:
class Layer:
    # n_neuron: number of neuron, weights: weight matrix, activation: activation function
    def __init__(self, n_neuron: int, weights: np.array, activation: str) -> None:
        self.n_neuron = n_neuron # visualization purposes
        self.weights = weights # weights (including bias)
        self.activation = activation # activation type [linear, sigmoid, relu, softmax]
        self.act_function = activation_functions[activation] # activation function used
        self.loss_function = loss_functions[activation] # loss function used
        self.backprop_functions = backprop_functions[activation] # derivative activation functions
        self.result = [] # retain result of feed forward iteration
        self.deltas = np.zeros_like(self.weights) # initialize delta for backprop
        # every feed forward add result, every backprop add to delta

    def calculate(self, in_matrix: np.array) -> np.array:
        self.result = self.act_function(np.dot(self.weights.transpose(), in_matrix))
        return self.result # [a0, a1, a2, ..., an]

    def calculate_loss(self, prediction: (np.array or int), target: (list or int)) -> float: # used for calculating loss for output layer
        if (self.activation == "softmax"): return self.loss_function(prediction, np.argmax(target))
        return self.loss_function(prediction, target)
    
    def update_weight(self):
        self.weights += self.deltas # adding deltas to weights
        self.deltas = np.zeros_like(self.deltas) # resettings deltas for next mini-batch
        return self.weights # for verbose purpose
    
    def add_deltas(self, delta_matrix: np.array) -> None:
        self.deltas += delta_matrix
        return self.deltas # for verbose purpose
    
    def get_structure(self) -> tuple((int, np.array, np.array)):
        # n_neuron: int, weight matrix: np.array, bias weight matrix: np.array
        n_neuron = self.n_neuron
        weight_neuron = self.weights[:-1,]
        weight_bias = self.weights[-1:,].flatten()
        return (n_neuron, weight_neuron, weight_bias)

## FFNN Class

In [None]:
class FFNN:
    def __init__(self,  
            hidden_layers: list,
            input_layer = None,
            threshold = 0.5,
            learning_rate = 0.01,
            err_threshold = 0.001,
            max_iter = 5000,
            batch_size = 1) -> None:
        self.hidden_layers = hidden_layers
        self.output_layer = hidden_layers[-1]
        self.output_activation = self.output_layer.activation
        self.input_layer = input_layer
        self.threshold = threshold
        self.learning_rate = learning_rate
        self.err_threshold = err_threshold
        self.max_iter = max_iter
        self.batch_size = batch_size # default incremental SGD
    
    @staticmethod
    def generate_model(input_size: int, n_neurons: list, activations: list):
        if (len(n_neurons) != len(activations)): return None
        arr = []
        for i in range(len(n_neurons)):
            if (i == 0): arr.append(Layer(
                n_neurons[i], np.random.uniform(low = -1.0, high = 1.0, 
                    size = (input_size + 1, n_neurons[i])),
                    activations[i])
                )
            else: arr.append(Layer(
                n_neurons[i], np.random.uniform(low = -1.0, high = 1.0,
                    size = (n_neurons[i - 1] + 1, n_neurons[i])),
                    activations[i])
                )
        return FFNN(arr)

    def feed_forward(self) -> (np.array or None):
        if (isinstance(self.input_layer, type(None))): return None
        if len(self.input_layer.shape) == 1: return self.forward(self.input_layer)
        else:
            outputs = []
            for data in self.input_layer: outputs.append(self.forward(data))
            if (self.output_layer.activation == 'softmax'): return outputs
            return np.array(outputs).flatten()
    
    def forward(self, input) -> (np.array or None):
        output = input
        for i in range(0, len(self.hidden_layers)):
            output = self.hidden_layers[i].calculate(np.append(output, 1))
        if (self.output_layer.activation == 'softmax'): return output # usually used for multiclass
        if (self.output_layer.n_neuron > 1): return np.where(output > self.threshold, 1, 0) # multiclass non-softmax
        return int(output > self.threshold) # binary
    
    def fit(self, x_train, y_train, randomize = False, learning_rate = None, 
            batch_size = None, max_iter = None, 
            err_threshold = None, update_every = 250) -> None:
        if learning_rate is not None: self.learning_rate = learning_rate
        if batch_size is not None: self.batch_size = batch_size
        if max_iter is not None: self.max_iter = max_iter
        if err_threshold is not None: self.err_threshold = err_threshold
        for epoch in range(self.max_iter):
            training_data = x_train
            training_target = y_train
            if randomize:
                pass # randomize dataset x_train here
            error_sum = 0 # initialize error (for comparing with err_threshold)
            for iter in range(len(y_train)):
                pred = self.predict(training_data[iter]) # results already encoded
                pred = self.output_layer.result # result before encoded
                error = self.output_layer.calculate_loss(pred, training_target[iter])
                self.backpropagate(training_data[iter], training_target[iter])
                error_sum += error
                if ((iter + 1) % self.batch_size == 0 or iter == len(training_target) - 1):
                    self.update_weights() # update weights (mini-batch)
            err_avg = error_sum / len(y_train)

            if (err_avg < self.err_threshold):
                print("Epoch %d, Loss: %.6f | Reason for stopping: err < err_threshold" % (epoch, err_avg))
                break # stop fitting process when avg error < threshold

            if (epoch % update_every == 0):
                print("Epoch %d, Loss: %.6f" % (1 if epoch == 0 else epoch, err_avg))
        return
    
    def backpropagate(self, input, target): # update deltas for every layer
        err_term = 0
        for iter in reversed(range(0, len(self.hidden_layers))):
            prev_layer = None if iter == 0 else self.hidden_layers[iter - 1]
            prev_result = np.atleast_2d(np.append(input, 1)) if prev_layer == None \
                else np.atleast_2d(np.append(prev_layer.result, 1))
            if (iter == len(self.hidden_layers) - 1): # if output layer
                if (self.output_activation == "softmax"): # if softmax output layer
                    pred = self.output_layer.result
                    err_deriv = self.output_layer.backprop_functions(pred, np.argmax(target))
                    err_term = err_deriv
                    gradient = np.dot(prev_result.T,
                        np.atleast_2d(err_deriv))
                    delta = -self.learning_rate * gradient
                    self.output_layer.add_deltas(delta)
                    pass
                else: # if other output layer
                    pred = self.output_layer.result
                    err_deriv = -(np.array(target) - pred)
                    err_term = err_deriv
                    donet = self.output_layer.backprop_functions(pred)
                    gradient = np.dot(prev_result.T,
                        np.atleast_2d(err_deriv * donet))
                    delta = -self.learning_rate * gradient
                    self.output_layer.add_deltas(delta)
            else: # if hidden layer
                this_layer = self.hidden_layers[iter]
                next_layer = self.hidden_layers[iter + 1]
                err_term = np.add.reduce(next_layer.weights[:-1].T * 
                    np.atleast_2d(err_term).T, 0) / np.shape(err_term)[0]
                donet = this_layer.backprop_functions(this_layer.result) # no softmax in hidden layer
                gradient = np.dot(prev_result.T,
                    np.atleast_2d(err_term * donet))
                delta = -self.learning_rate * gradient
                self.hidden_layers[iter].add_deltas(delta)
                pass
        return
        
    def update_weights(self):
        for layer in self.hidden_layers:
            layer.update_weight()
        return
    
    def attach_input(self, input_layer: np.array) -> None:
        self.input_layer = input_layer
        return

    def attach_hidden_layer(self, hidden_layer: Layer) -> None:
        self.hidden_layers.append(hidden_layer)
        return

    def predict(self, input_layer: np.array) -> list: # input_layer without bias
        self.input_layer = input_layer
        return self.feed_forward()

    def get_structure(self) -> tuple((np.array, list)):
        return (self.input_layer, [layer.get_structure() for layer in self.hidden_layers])

## Accuracy Calculation Function

In [None]:
# Fungsi menghitung akurasi dari model
def calculate_accuracy(model: FFNN, input_set, validation_set: list, is_softmax = False):
    # returns range from 1..100 (percentage)
    predicted_set = model.predict(np.array(input_set))
    if (not isinstance(predicted_set, (list, np.ndarray))): return int(predicted_set == validation_set[0]) * 100
    if (len(predicted_set) != len(validation_set)): return None
    num_correct = 0
    for i in range(len(predicted_set)):
        if is_softmax:
            if (np.argmax(predicted_set[i]) == np.argmax(validation_set[i])): num_correct += 1
        else:
            if predicted_set[i].tolist() == validation_set[i].tolist(): num_correct += 1
    return num_correct / len(validation_set) * 100

## Input Model File Function

In [None]:
# Fungsi membaca input file
def _input(filename: str, with_input = False) -> tuple((FFNN, np.array, np.array)):
    f = open(filename, "r")
    f = f.readlines()
    f = [line.strip() for line in f]

    nLayer = int(f[0])
    f = f[1:]
    n_layer_neurons = []
    struct_model = {}

    for i in range(nLayer-1):
        struct_model[i] = {}
        n_layer_neurons.append(int(f[0]))
        struct_model[i]["b"] = [float(b) for b in f[1].split()]
        struct_model[i]["w"] = [[float(w) for w in weights.split()] for weights in f[2:(2 + int(f[0]))]]
        struct_model[i]["f"] = f[2 + int(f[0])]
        f = f[2 + int(f[0]) + 1:]

    n_layer_neurons.append(int(f[0]))
    
    if (with_input):
        n_input = int(f[1])
        f = f[2:]
        input_data = []
        for i in range(n_input):
            input = [int(x) for x in (f[i].split())]
            input_data.append(input)

        f = f[n_input:]
        validation_data = []
        for i in range(n_input):
            result = [int(y) for y in (f[i].split())]
            validation_data.append(result)

    model_layers = []
    for i in range (nLayer-1):
        weight = struct_model[i]["w"]
        weight.append(struct_model[i]["b"])
        layer = Layer(n_layer_neurons[i+1], np.array(weight), struct_model[i]["f"].lower())
        model_layers.append(layer)
    
    if (with_input):
        return FFNN(model_layers, np.array(input_data)), input_data, validation_data
    else:
        return FFNN(model_layers)

## Show Model Function

In [None]:
# Memperlihatkan koefisien dan struktur dari model
def showModel(model: FFNN): #masukan berupa FFNN
    initLayers = model.get_structure()
    countLayer = len(initLayers[1])
    print("==============Model FFNN==============\n")
    print("------------------------------")
    for j in range(0, countLayer):
        weight = initLayers[1][j][1]
        bias = initLayers[1][j][2]

        if (j == (countLayer - 1)):
            print("------ Output Layer ------" )
            print("Weight: " , weight)
            print("Bias: " , bias)
            print('\n')
            print("------------------------------")
        else:
            print("--- Hidden Layer %d ---" %(j+1))
            print("H%d Weight: " %(j+1), weight)
            print("H%d Bias: " %(j+1), bias)
            print('\n')
            print("------------------------------")

## Fungsi Formatting Input Data

In [None]:
def input_dataset(dataset_input):
  x_iris = dataset_input.data
  y_iris = dataset_input.target

  y_iris_temp = [] 
  maxElement = np.amax(y_iris)
  for x in range(len(y_iris)):
    iris = []
    i = 0

    # CEK UDH FULL
    while len(iris) < (maxElement+1) :
      if(i == y_iris[x]):
        iris.append(1)
      else:
        iris.append(0)
      i+=1
    y_iris_temp.append(iris)

  y_iris_return = np.array(y_iris_temp)
  returnDict = {
      "x_train" : x_iris,
      "y_train" : y_iris_return
  }
  return returnDict

## Fit Kelas FFNN (backprop)

### Load Data

In [None]:
dataIris = datasets.load_iris()
df = pd.DataFrame(data=dataIris.data, columns=dataIris.feature_names)

print("Data Iris")
print(df)

dataset = input_dataset(dataIris) # using input_dataset function

x_train = dataset['x_train']
y_train = dataset['y_train']

Data Iris
     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0                  5.1               3.5                1.4               0.2
1                  4.9               3.0                1.4               0.2
2                  4.7               3.2                1.3               0.2
3                  4.6               3.1                1.5               0.2
4                  5.0               3.6                1.4               0.2
..                 ...               ...                ...               ...
145                6.7               3.0                5.2               2.3
146                6.3               2.5                5.0               1.9
147                6.5               3.0                5.2               2.0
148                6.2               3.4                5.4               2.3
149                5.9               3.0                5.1               1.8

[150 rows x 4 columns]


## Defining FFNN Model

### Fungsi FFNN.generate_model

Digunakan untuk mendefinisikan model awal yang akan digunakan untuk training (random weight diantara -1.0 dan 1.0)

### Parameters
input_size: size atau panjang dari input data / x_train

n_neurons: banyak neuron untuk tiap layer

activations: fungsi aktivasi untuk setiap layer

In [None]:
ffnn_model = FFNN.generate_model(
    4, [6, 4, 5, 3], ['sigmoid', 'relu', 'linear', 'softmax']
)

## Model Before Training

In [None]:
showModel(ffnn_model) # showing model before training


------------------------------
--- Hidden Layer 1 ---
H1 Weight:  [[ 0.66369284 -0.45270365 -0.42983886 -0.85583888  0.73228471 -0.80021572]
 [-0.79724836  0.31760963 -0.86537262 -0.3157094  -0.99836713  0.61790338]
 [ 0.25208063 -0.03818579  0.65917266  0.68604262 -0.87913485 -0.4246658 ]
 [-0.69237659  0.80651322 -0.09485453  0.55354486 -0.20639417  0.74513472]]
H1 Bias:  [-0.45344607 -0.55241345 -0.25748296  0.87481449  0.2647077  -0.60764919]


------------------------------
--- Hidden Layer 2 ---
H2 Weight:  [[-0.6878233  -0.76034618 -0.19083851  0.34942044]
 [-0.44804084 -0.07654788  0.1594647  -0.89866458]
 [ 0.26774823 -0.13340287 -0.62994191 -0.35570497]
 [ 0.93109576 -0.02493166 -0.91936429 -0.8340818 ]
 [-0.06191021 -0.01641168  0.36525006 -0.98274208]
 [ 0.24841938 -0.71940122 -0.49256429 -0.39798024]]
H2 Bias:  [-0.41645079  0.05962058  0.72659033 -0.67594457]


------------------------------
--- Hidden Layer 3 ---
H3 Weight:  [[ 0.83439762  0.80329118 -0.9011956  -0.1891

## Training Model

### Parameters
x_train: data untuk training

y_train: target untuk training

learning_rate: learning rate untuk dikalikan dengan gradien

batch_size: size mini-batch

err_threshold: threshold error, kasus berhenti jika error < error threshold

max_iter: iterasi maksimal (epoch)

update_every: memberikan update setiap x iterasi

In [None]:
ffnn_model.fit(x_train, y_train, learning_rate = 0.01, batch_size = 4, err_threshold = 0.01, max_iter = 1000)

Epoch 1, Loss: 0.978117
Epoch 250, Loss: 0.058231
Epoch 500, Loss: 0.053985
Epoch 750, Loss: 0.051862


## Model After Training

In [None]:
showModel(ffnn_model) # showing model after training


------------------------------
--- Hidden Layer 1 ---
H1 Weight:  [[ 0.65116446 -0.57562676 -0.01249252 -0.55501137  0.87979008 -0.87055799]
 [-1.03946763  0.3993611  -1.04980528 -0.66744711 -0.71195099  0.56682225]
 [ 0.33312991 -0.17847254  0.83043653  0.79964899 -0.96828172 -0.44995993]
 [-0.44689255  0.61963042  0.46531022  1.32859817 -0.64693884  0.74321905]]
H1 Bias:  [-0.54651916 -0.51823667 -0.38754463  0.62481382  0.43956705 -0.62300903]


------------------------------
--- Hidden Layer 2 ---
H2 Weight:  [[-0.16688791 -1.09655799 -0.97738668  0.59159437]
 [-0.61223257 -0.05802843  0.25959456 -0.95884851]
 [ 0.98794726 -0.46748047 -1.54225814 -0.0189208 ]
 [ 1.7279013  -0.50425013 -2.04179827 -0.40535579]
 [-0.94482137  0.30516561  1.1579046  -1.45113032]
 [ 0.15671981 -0.68392457 -0.40545831 -0.44205489]]
H2 Bias:  [-1.0419827   0.06284684  0.87350288 -0.96249756]


------------------------------
--- Hidden Layer 3 ---
H3 Weight:  [[ 2.21253589  1.92109744 -1.72545139 -0.3422

## Predictions After Training

In [None]:
for data in x_train:
    print(ffnn_model.predict(data))

[9.99997117e-01 2.88311033e-06 3.47068008e-21]
[9.99965608e-01 3.43917006e-05 3.23788012e-19]
[9.99991374e-01 8.62559964e-06 2.57762305e-20]
[9.99911143e-01 8.88568519e-05 1.83885941e-18]
[9.99997521e-01 2.47857952e-06 2.63196213e-21]
[9.99991830e-01 8.17025516e-06 2.33412020e-20]
[9.99982350e-01 1.76503155e-05 9.55410852e-20]
[9.99989576e-01 1.04238386e-05 3.64494636e-20]
[9.99839693e-01 1.60307123e-04 5.41321380e-18]
[9.99968414e-01 3.15860600e-05 2.77100660e-19]
[9.99998123e-01 1.87688537e-06 1.58239974e-21]
[9.99964011e-01 3.59894934e-05 3.51841220e-19]
[9.99972791e-01 2.72092007e-05 2.10915742e-19]
[9.99993899e-01 6.10131500e-06 1.36802798e-20]
[9.99999913e-01 8.72493960e-08 5.76669900e-24]
[9.99999623e-01 3.76873431e-07 8.38640088e-23]
[9.99999524e-01 4.76378901e-07 1.28753736e-22]
[9.99996025e-01 3.97492623e-06 6.24594878e-21]
[9.99995420e-01 4.58001918e-06 8.09458832e-21]
[9.99996663e-01 3.33731535e-06 4.53592814e-21]
[9.99974886e-01 2.51137799e-05 1.82149553e-19]
[9.99993649e-

## Accuracy of Model After Training

In [None]:
print("Accuracy of Model: %.2f" % (calculate_accuracy(ffnn_model, x_train, y_train, is_softmax = True)) + "%")

Accuracy of Model: 98.00%


In [None]:
# IRIS MLP

from sklearn.neural_network import MLPClassifier # neural network
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris

x, y = load_iris(return_X_y=True, as_frame=True) # First param to denote "dictionary form", second param to declare as pandas df
mlpLearner = MLPClassifier(max_iter = 1000, learning_rate_init = 0.01, batch_size=4, tol=0.001, verbose=False, activation='logistic') # change arguments to customize

# Full Data
mlpLearner.fit(x,y)
predFull = mlpLearner.predict(x)
probFull = mlpLearner.predict_proba(x)
scoreFull = mlpLearner.score(x,y)

print("Class Prediction")
print(predFull)
print("\nProbability Prediction")
print(probFull)
print("\nScore")
print(scoreFull)

Class Prediction
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]

Probability Prediction
[[9.99671403e-01 3.28597256e-04 8.48919426e-24]
 [9.99299698e-01 7.00301566e-04 6.53435910e-23]
 [9.99485826e-01 5.14173823e-04 2.70846705e-23]
 [9.99087652e-01 9.12348492e-04 1.51733461e-22]
 [9.99689064e-01 3.10936053e-04 7.42235442e-24]
 [9.99518586e-01 4.81414395e-04 3.89875777e-23]
 [9.99316636e-01 6.83364202e-04 7.61452759e-23]
 [9.99539206e-01 4.60794024e-04 2.28655834e-23]
 [9.98816300e-01 1.18370015e-03 2.89501726e-22]
 [9.99462025e-01 5.37975310e-04 2.95499538e-23]
 [9.99740335e-01 2.59664648e-04 4.73803205e-24]
 [9.99375332e-01 6.24667540e-04 5.76567679e-23]
 [9.99448432e-01 5.51567513e-04 2.94076084e-23]
 [9.99539126e-01 4.60874000e-