# Tugas Besar II
## Feed Forward Neural Network
### IF4071 Pembelajaran Mesin

### 1.a. Implementasi Classifier from Scratch

Dibuat Neural Network untuk melakukan klasifikasi data weather. Neural Network merupakan fully connected layer yang memiliki jumlah hidden layer maksimal 10. Jumlah node dalam setiap hidden layer dapat bervariasi. Bagian backpropagation diimplementasikan  seperti contoh algoritma pada buku Tom Mitchell hal. 98. Neural network menggunakan fungsi aktivasi sigmoid untuk semua hidden layer maupun output layer. Node output untuk klasifikasi berjumlah 1. 

Program memberikan pilihan untuk menggunakan momentum atau tidak. Program mengimplementasikan mini-batch stochastic gradient descent. Prorgram Stokasti Gradien Descent diimplementasikan jenis incremental (batch-size=1) dan jenis batch (batch-size=jumlah data).

Fungsi loss yang digunakan pada program yang diimplementasikan kali ini adalah MSE, yaitu:
![MSE Lossunction](img/mse.svg)

#### Program

In [None]:
"""
~~~~~~~~~~
Sebuah kelas yang mengimplementasikan SGD untuk 
sebuah feed forward neural network.
"""

#### Libraries
# Standard library
import random

# Third-party libraries
import numpy as np

class Network(object):

    def __init__(self, sizes):
        """List ``sizes`` berisi jumlah neuron sesuai dengan
        urutan layer. Sebagai contoh, jika dimasukkan list [2, 3, 1]
        maka akan digenerate 3 layer network dengan layer input berisi
        2 neuron, hiddel layer berisi 3 neuron dan layer output 1 neuron.
        Bias dan weight diinisialisasi secara random, menggunakan 
        distribusi Gaussian dengan mean 0, dan variance 1."""
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]
        self.print_status = False

    def feedforward(self, a):
        """Melakukan feed forward dengan input ``a``"""
        activation = a
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation)+b.transpose()[0]
            activation = sigmoid(z)
        return activation

    def SGD(self, training_data, epochs, mini_batch_size, learning_rate, 
            momentum=0,test_data=None, print_status=False):
        """Melatih NN dengan mini-batch SGD."""
        self.print_status = print_status
        if test_data: n_test = len(test_data)
        n = len(training_data)
        for j in range(epochs):
            mini_batches = [
                training_data[k:k+mini_batch_size]
                for k in range(0, n, mini_batch_size)]
            prev_weights = None
            prev_biases = None
            first = True
            for mini_batch in mini_batches:
                if first :
                    prev_weights = self.weights
                    prev_biases = self.biases
                prev_weights, prev_biases = self.update_mini_batch(mini_batch, learning_rate, momentum, prev_weights, prev_biases)
    #             if self.print_status :
            if test_data:
                print("Epoch " +  str(j+1))
                print("\tAccuracy: " + str(100*self.evaluate(test_data)/n_test) + "%")
            else:
                print("Epoch " + str(j+1) + " complete")

    def update_mini_batch(self, mini_batch, learning_rate, momentum, prev_weights, prev_biases):
        """Update bobot dari network dengan mengaplikasikan 
        backrop ke sebuah single mini batch."""
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        temp_weights = self.weights
        self.weights = [w + momentum * pw +(learning_rate/len(mini_batch)) * nw
                        for w, nw, pw in zip(self.weights, nabla_w, prev_weights)]
        temp_biases = self.biases
        self.biases = [b + momentum *pb +  (learning_rate/len(mini_batch)) * nb
                       for b, nb, pb in zip(self.biases, nabla_b, prev_biases)]
        return (temp_weights, temp_biases)
        
    def backprop(self, x, y):
        """Mengembalikan nilai tuple ``(nabla_b, nabla_w)`` yang 
        merepresentasikan gradien untuk cost function C_x.  ``nabla_b`` dan
        ``nabla_w`` adalah layer lists dari numpy arrays."""
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        
        if self.print_status :
            print("FORWARD")
        # feedforward
        activation = x
        activations = [x] # list untuk menyimpan semua activation, layer by layer
        zs = [] # list untuk menyimpan net function, layer by layer
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation)+b.transpose()[0]
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
        
        if self.print_status :
            print("BACKWARD")
        # backward pass
        delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
        nabla_b[-1] = delta
        nabla_w[-1] = multiply(delta, activations[-2])
        
        if self.print_status :
            print("delta")
            print(delta)
            print("activation")
            print(activations[-2])
            print("nabla_w[-1]")
            print(nabla_w[-1])
            
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = multiply(delta, activations[-l-1].transpose())
            if self.print_status :
                print("delta")
                print(delta)
                print("activation")
                print(activations[-l-1])
                print("nabla_w[-l]")
                print(nabla_w[-l])
        return (nabla_b, nabla_w)

    def evaluate(self, test_data):
        """Mengembalikan jumlah nilai prediksi benar
        dari test data."""
        predicted_class = None
        count = 0
        for (x, y) in test_data :
            if 2*self.feedforward(x) >= 1 : 
                predicted_class = 1 
            else :
                predicted_class = 0
                
            if predicted_class == y :
                count += 1
        return count
    
    def predict(self, test_data):
        """Mengembalikan prediksi terhadap data."""
        predicted_class = None
        count = 0
        predicted = []
        for x in test_data :
            if 2*self.feedforward(x) >= 1 : 
                predicted_class = 1 
            else :
                predicted_class = 0
            predicted.append(predicted_class)
        return predicted

    def cost_derivative(self, output_activations, y):
        """derivatif parsial dari cost function"""
        return np.squeeze(y-output_activations)

#### Miscellaneous functions
def sigmoid(z):
    """Fungsi sigmoid."""
    return 1.0/(1.0+np.exp(-z))

def sigmoid_prime(z):
    """Turunan fungsi sigmoid."""
    return sigmoid(z)*(1-sigmoid(z))

def multiply(A, B):
    result = []
    for i in range(len(A)) :
        row = []
        for j in range(len(B)):
            row.append(A[i]*B[j])
        result.append(row)
        
    return row
    