# <center> WSI Ćwiczenie nr.5 - Implementacja drzew decyzyjncyh tworzonych algorytmem ID3</center>

### <center>Maciej Łodziński, Adam Wróblewski</center>


### Cel eksperymentów:
Celem ćwiczenia jest implementacja perceptronu wielowarstwowego oraz wybranego algorytmu optymalizacji gradientowej z algorytmem propagacji wstecznej, a następnie wytrenowanie perceptron wielowarstwowy do klasyfikacji zbioru danych MNIST.

In [13]:
#import libraries
import pandas as pd
import numpy as np
import math
from random import uniform
from sklearn.model_selection import train_test_split
import warnings
from keras.datasets import mnist

warnings.filterwarnings('ignore')

In [15]:
def one_hot_encode(expected_outputs):
    data_size = expected_outputs.shape[0]
    labels = np.zeros((data_size, 10))
    for row in range(data_size):
        val = expected_outputs[row]
        labels[row, val] = 1
    return labels

def split_to_batches(data, mini_batch_size):
    mini_batches = []
    i = 0
    while i < data.shape[0] - mini_batch_size:
        mini_batch = data[i:(mini_batch_size+i)]
        mini_batches.append(mini_batch)
        i += mini_batch_size
        
    mini_batch = data[i::, :]
    mini_batches.append(mini_batch)
    return mini_batches

def modify_data(data, mini_batch_size):
    batches = split_to_batches(data, mini_batch_size)
    training_batches = []
    for mini_batches in batches:
        x_train = mini_batches[:, 1::]
        y_train = mini_batches[:, 0]
        y_train = one_hot_encode(y_train)
        batch = (x_train, y_train)
        training_batches.append(batch)
    return training_batches

In [14]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
train_x_reshaped = x_train.reshape(60000,784)
test_x_reshaped = x_test.reshape(10000,784)
train_y_reshaped = y_train.reshape(-1,1)
test_y_reshaped = y_test.reshape(-1,1)
train_data = np.hstack((train_y_reshaped, train_x_reshaped))
test_data = np.hstack((test_y_reshaped, test_x_reshaped))
train_data = np.vstack((train_data, test_data))

In [15]:
training_data, validation_data = train_test_split(train_data, train_size=0.9)

X_train = training_data[:, 1::]
Y_train = one_hot_encode(training_data[:, 0])

X_valid = validation_data[:, 1::]
Y_valid = one_hot_encode(validation_data[:, 0])

In [16]:
class Layer:
    def __init__(self, num_nodes_in : int, num_nodes_out : int) -> None:
        self.num_nodes_in = num_nodes_in
        self.num_nodes_out = num_nodes_out
        
        self.weights = np.random.uniform(-1, 1, (num_nodes_in, num_nodes_out))
        self.cost_gradient_w = np.zeros((num_nodes_in, num_nodes_out))
        
        self.biases = np.zeros((1, num_nodes_out))
        self.cost_gradient_b = np.zeros((1, num_nodes_out))
        
        self.weighted_input = None
        self.activations = None
        self.inputs = None
                        
    # calculates activations of a layer based on input
    def calculate_output(self, inputs) -> list[float]:
        weighted_input = np.matmul(inputs, self.weights)
        activations = sigmoid(weighted_input)
        self.inputs = inputs
        self.weighted_input = weighted_input
        self.activations = activations
        return activations
    
    # 'loop over' weights and biases to subtract corresponding value from gradients arrays
    def apply_gradients(self, learn_rate) -> None:
        self.weights -= self.cost_gradient_w * learn_rate
        self.biases -= self.cost_gradient_b * learn_rate
           
    # calculates some derivatives for output layer
    def calculate_output_layer_node_values(self, expected_output) -> list[float]:
        derivative_cost = cost_derivative(self.activations, expected_output)
        derivative_sigmoid = sigmoid_derivative(self.weighted_input)
        node_values = derivative_sigmoid * derivative_cost
        return node_values
    
    # calculates some derivatives for hidden layers
    def calculate_hidden_layer_node_values(self, old_layer, old_node_values) -> list[float]:
        old_node_values = old_node_values.reshape(-1, 1)
        node_values = np.matmul(old_layer.weights, old_node_values)
        node_values = node_values.reshape(1, -1)
        activate_derivative = sigmoid_derivative(self.weighted_input)
        node_values = node_values * activate_derivative
        return node_values
    
    # updates gradients arrays in layer object based on given node_values, which were calculated in previous methods
    def update_gradients(self, node_values) -> None:
        self.cost_gradient_b += 1 * node_values
        inputs = self.inputs.reshape(-1, 1)
        derivative_cost_wrt_weight = np.matmul(inputs, node_values)
        self.cost_gradient_w += derivative_cost_wrt_weight


In [17]:
class NeuralNetwork:
    def __init__(self, layer_sizes) -> None:
        self.layers = []
        for i in range(len(layer_sizes) - 1):
            layer = Layer(layer_sizes[i], layer_sizes[i + 1])
            self.layers.append(layer) 
            
    # calls calculate_output function threw all layers
    def forward_propagation(self, inputs) -> list[float]:
        for layer in self.layers:
            inputs = layer.calculate_output(inputs)
        return inputs
    
    # perform apply_gradients on all layers
    def apply_all_gradients(self, learn_rate) -> None:
        for layer in self.layers:
            layer.apply_gradients(learn_rate)
    
    # chooses network's weights and parameters
    def back_propagation(self, inputs, expected_output) -> None:
        self.forward_propagation(inputs)
        
        output_layer = self.layers[-1]
        node_values = output_layer.calculate_output_layer_node_values(expected_output)
        output_layer.update_gradients(node_values)
        
        for index in reversed(range(len(self.layers) - 1)):
            hidden_layer = self.layers[index]
            node_values = hidden_layer.calculate_hidden_layer_node_values(self.layers[index + 1], node_values)
            hidden_layer.update_gradients(node_values)
            
    # clears gradients arrays in layers objects
    def clear_all_gradients(self) -> None:
        for layer in self.layers:
            layer.cost_gradient_w = np.zeros((layer.num_nodes_in, layer.num_nodes_out))
            layer.cost_gradient_b = np.zeros((1, layer.num_nodes_out))
            
    # learns network's weights and biases
    def learn(self, training_batches, learn_rate, batch_size) -> None:
        for batch in training_batches:
            inputs, expected_output = batch
            for i in range(len(expected_output)):
                self.back_propagation(inputs[i].reshape(1 , -1), expected_output[i])
            self.apply_all_gradients(learn_rate / inputs.shape[0])
            self.clear_all_gradients()
              
    # learn function with epochs and progress vizualization
    def learn_with_epochs(self, training_data, learn_rate, batch_size, epochs) -> None:
        training_batches = modify_data(training_data, batch_size)
        for i in range(epochs):
            self.learn(training_batches, learn_rate, batch_size)
            
            results_train = self.clasify(X_train)
            accuracy_train = acuracy(results_train, Y_train)
            cost_train = self.total_cost(X_train, Y_train)
            
            results_valid = self.clasify(X_valid)
            accuracy_valid = acuracy(results_valid, Y_valid)
            cost_valid = self.total_cost(X_valid, Y_valid)
            
            info = f"Epoch {i+1}/{epochs}:\n\
            train set accuracy {round(accuracy_train * 100, 2)}%\n\
            cost train {round(cost_train, 2)}\n\
            valid set accuracy {round(accuracy_valid * 100, 2)}%\n\
            cost valid {round(cost_valid, 2)}\n"
            
            print(info)

    # perform forward_propagation and returns index of node with max value
    def clasify(self, data) -> list[int]:
        ans = []
        for row in range(data.shape[0]):
            output = self.forward_propagation(data[row])
            max_value = max(output)
            index = np.where(output == max_value)
            ans.append(index[0][0].item())
        return np.asarray(ans)
    
    # perform forward_propagation and returns error cost of all final nodes
    def sample_cost(self, inputs, expected_output) -> float:
        output = self.forward_propagation(inputs)
        return cost(output, expected_output)
    
    # perform sample_cost on all data samples
    def total_cost(self, data, expected_outputs) -> float:
        total_cost = 0.0
        size = len(data)
        for i in range(size):
            total_cost += self.sample_cost(data[i], expected_outputs[i])
        return total_cost / size

In [18]:
def sigmoid(z) -> list[float]:
    return 1 / (1 + np.exp(-z))

def sigmoid_derivative(z) -> list[float]:
    activation = sigmoid(z)
    return activation * (1 - activation)

In [19]:
def cost(output_activation, expected_output) -> list[float]:
    error = expected_output - output_activation
    return sum(error * error)

def cost_derivative(output_activation, expected_output) -> list[float]:
    return 2 * (output_activation - expected_output)

In [20]:
def acuracy(outputs, labels) -> float:
    diff = one_hot_encode(outputs) - labels
    wrong = 0
    for result in diff:
        if np.count_nonzero(result) != 0:
            wrong += 1
    accuracy = (len(labels) - wrong ) / len(labels)
    return accuracy

In [11]:
network = NeuralNetwork([784, 100, 20, 10])
network.learn_with_epochs(training_data, learn_rate=0.05, batch_size=30, epochs=40)

Epoch 1/40:
            train set accuracy 37.71%
            cost train 0.79
            valid set accuracy 36.78%
            cost valid 0.79

Epoch 2/40:
            train set accuracy 55.55%
            cost train 0.64
            valid set accuracy 54.77%
            cost valid 0.65

Epoch 3/40:
            train set accuracy 65.08%
            cost train 0.54
            valid set accuracy 64.28%
            cost valid 0.55

Epoch 4/40:
            train set accuracy 71.06%
            cost train 0.47
            valid set accuracy 70.44%
            cost valid 0.47

Epoch 5/40:
            train set accuracy 74.76%
            cost train 0.42
            valid set accuracy 74.75%
            cost valid 0.42

Epoch 6/40:
            train set accuracy 77.68%
            cost train 0.38
            valid set accuracy 77.48%
            cost valid 0.38

Epoch 7/40:
            train set accuracy 79.81%
            cost train 0.35
            valid set accuracy 79.48%
            co

In [23]:
network = NeuralNetwork([784, 16, 16, 10])
network.learn_with_epochs(training_data, learn_rate=0.05, batch_size=30, epochs=40)

Epoch 1/40:
            train set accuracy 20.36%
            cost train 0.86
            valid set accuracy 20.42%
            cost valid 0.86

Epoch 2/40:
            train set accuracy 31.61%
            cost train 0.81
            valid set accuracy 31.31%
            cost valid 0.81

Epoch 3/40:
            train set accuracy 42.24%
            cost train 0.76
            valid set accuracy 42.18%
            cost valid 0.76

Epoch 4/40:
            train set accuracy 52.94%
            cost train 0.69
            valid set accuracy 52.48%
            cost valid 0.69

Epoch 5/40:
            train set accuracy 58.54%
            cost train 0.62
            valid set accuracy 57.98%
            cost valid 0.62

Epoch 6/40:
            train set accuracy 61.78%
            cost train 0.56
            valid set accuracy 61.69%
            cost valid 0.57

Epoch 7/40:
            train set accuracy 67.15%
            cost train 0.51
            valid set accuracy 67.11%
            co

In [28]:
network = NeuralNetwork([784, 10, 10])
network.learn_with_epochs(training_data, learn_rate=0.05, batch_size=30, epochs=40)

Epoch 1/40:
            train set accuracy 30.05%
            cost train 0.86
            valid set accuracy 30.17%
            cost valid 0.86

Epoch 2/40:
            train set accuracy 36.78%
            cost train 0.79
            valid set accuracy 36.73%
            cost valid 0.79

Epoch 3/40:
            train set accuracy 43.59%
            cost train 0.73
            valid set accuracy 43.52%
            cost valid 0.73

Epoch 4/40:
            train set accuracy 48.47%
            cost train 0.68
            valid set accuracy 48.28%
            cost valid 0.68

Epoch 5/40:
            train set accuracy 52.65%
            cost train 0.64
            valid set accuracy 52.96%
            cost valid 0.64

Epoch 6/40:
            train set accuracy 55.25%
            cost train 0.61
            valid set accuracy 55.47%
            cost valid 0.61

Epoch 7/40:
            train set accuracy 58.58%
            cost train 0.6
            valid set accuracy 59.3%
            cost

In [29]:
network = NeuralNetwork([784, 16, 16, 16, 10])
network.learn_with_epochs(training_data, learn_rate=0.05, batch_size=30, epochs=40)

Epoch 1/40:
            train set accuracy 20.91%
            cost train 0.88
            valid set accuracy 20.94%
            cost valid 0.88

Epoch 2/40:
            train set accuracy 24.45%
            cost train 0.84
            valid set accuracy 24.4%
            cost valid 0.84

Epoch 3/40:
            train set accuracy 27.78%
            cost train 0.81
            valid set accuracy 27.59%
            cost valid 0.81

Epoch 4/40:
            train set accuracy 32.49%
            cost train 0.78
            valid set accuracy 31.98%
            cost valid 0.78

Epoch 5/40:
            train set accuracy 34.74%
            cost train 0.75
            valid set accuracy 34.72%
            cost valid 0.75

Epoch 6/40:
            train set accuracy 39.61%
            cost train 0.73
            valid set accuracy 39.38%
            cost valid 0.73

Epoch 7/40:
            train set accuracy 42.72%
            cost train 0.7
            valid set accuracy 42.58%
            cost

In [30]:
network = NeuralNetwork([784, 100, 100, 100, 10])
network.learn_with_epochs(training_data, learn_rate=0.05, batch_size=30, epochs=40)

Epoch 1/40:
            train set accuracy 55.22%
            cost train 0.62
            valid set accuracy 55.15%
            cost valid 0.62

Epoch 2/40:
            train set accuracy 71.04%
            cost train 0.45
            valid set accuracy 70.88%
            cost valid 0.46

Epoch 3/40:
            train set accuracy 76.85%
            cost train 0.37
            valid set accuracy 76.9%
            cost valid 0.37

Epoch 4/40:
            train set accuracy 79.72%
            cost train 0.33
            valid set accuracy 79.54%
            cost valid 0.33

Epoch 5/40:
            train set accuracy 81.39%
            cost train 0.3
            valid set accuracy 81.44%
            cost valid 0.3

Epoch 6/40:
            train set accuracy 82.94%
            cost train 0.28
            valid set accuracy 82.98%
            cost valid 0.28

Epoch 7/40:
            train set accuracy 83.71%
            cost train 0.26
            valid set accuracy 83.99%
            cost 

In [31]:
network = NeuralNetwork([784, 200, 10])
network.learn_with_epochs(training_data, learn_rate=0.05, batch_size=30, epochs=40)

Epoch 1/40:
            train set accuracy 21.6%
            cost train 0.9
            valid set accuracy 21.98%
            cost valid 0.89

Epoch 2/40:
            train set accuracy 29.9%
            cost train 0.81
            valid set accuracy 30.26%
            cost valid 0.8

Epoch 3/40:
            train set accuracy 31.81%
            cost train 0.77
            valid set accuracy 32.61%
            cost valid 0.76

Epoch 4/40:
            train set accuracy 32.98%
            cost train 0.75
            valid set accuracy 33.77%
            cost valid 0.75

Epoch 5/40:
            train set accuracy 39.96%
            cost train 0.69
            valid set accuracy 40.62%
            cost valid 0.69

Epoch 6/40:
            train set accuracy 41.03%
            cost train 0.67
            valid set accuracy 42.0%
            cost valid 0.66

Epoch 7/40:
            train set accuracy 42.26%
            cost train 0.66
            valid set accuracy 43.2%
            cost val

In [39]:
network = NeuralNetwork([784, 200, 200, 10])
network.learn_with_epochs(training_data, learn_rate=0.05, batch_size=30, epochs=40)

Epoch 1/40:
            train set accuracy 42.73%
            cost train 0.71
            valid set accuracy 42.61%
            cost valid 0.72

Epoch 2/40:
            train set accuracy 63.43%
            cost train 0.51
            valid set accuracy 63.42%
            cost valid 0.52

Epoch 3/40:
            train set accuracy 68.86%
            cost train 0.44
            valid set accuracy 68.72%
            cost valid 0.44

Epoch 4/40:
            train set accuracy 72.31%
            cost train 0.4
            valid set accuracy 72.16%
            cost valid 0.4

Epoch 5/40:
            train set accuracy 78.65%
            cost train 0.34
            valid set accuracy 78.58%
            cost valid 0.34

Epoch 6/40:
            train set accuracy 81.87%
            cost train 0.3
            valid set accuracy 81.93%
            cost valid 0.3

Epoch 7/40:
            train set accuracy 83.46%
            cost train 0.28
            valid set accuracy 83.07%
            cost v

In [33]:
network = NeuralNetwork([784, 100, 20, 10])
network.learn_with_epochs(training_data, learn_rate=0.05, batch_size=30, epochs=80)

Epoch 1/80:
            train set accuracy 44.68%
            cost train 0.73
            valid set accuracy 43.99%
            cost valid 0.73

Epoch 2/80:
            train set accuracy 60.77%
            cost train 0.58
            valid set accuracy 61.58%
            cost valid 0.57

Epoch 3/80:
            train set accuracy 68.7%
            cost train 0.48
            valid set accuracy 69.1%
            cost valid 0.48

Epoch 4/80:
            train set accuracy 73.95%
            cost train 0.42
            valid set accuracy 74.52%
            cost valid 0.42

Epoch 5/80:
            train set accuracy 77.57%
            cost train 0.37
            valid set accuracy 77.79%
            cost valid 0.37

Epoch 6/80:
            train set accuracy 80.11%
            cost train 0.33
            valid set accuracy 80.34%
            cost valid 0.33

Epoch 7/80:
            train set accuracy 81.38%
            cost train 0.31
            valid set accuracy 81.71%
            cost

Epoch 58/80:
            train set accuracy 91.43%
            cost train 0.14
            valid set accuracy 90.94%
            cost valid 0.15

Epoch 59/80:
            train set accuracy 91.41%
            cost train 0.14
            valid set accuracy 90.72%
            cost valid 0.14

Epoch 60/80:
            train set accuracy 91.34%
            cost train 0.14
            valid set accuracy 90.92%
            cost valid 0.15

Epoch 61/80:
            train set accuracy 91.43%
            cost train 0.14
            valid set accuracy 91.07%
            cost valid 0.14

Epoch 62/80:
            train set accuracy 91.54%
            cost train 0.14
            valid set accuracy 91.21%
            cost valid 0.14

Epoch 63/80:
            train set accuracy 91.48%
            cost train 0.14
            valid set accuracy 90.88%
            cost valid 0.15

Epoch 64/80:
            train set accuracy 91.81%
            cost train 0.13
            valid set accuracy 91.27%
       

In [34]:
network = NeuralNetwork([784, 100, 20, 10])
network.learn_with_epochs(training_data, learn_rate=0.05, batch_size=30, epochs=120)

Epoch 1/120:
            train set accuracy 35.37%
            cost train 0.8
            valid set accuracy 35.01%
            cost valid 0.8

Epoch 2/120:
            train set accuracy 52.51%
            cost train 0.65
            valid set accuracy 52.87%
            cost valid 0.65

Epoch 3/120:
            train set accuracy 62.89%
            cost train 0.55
            valid set accuracy 62.7%
            cost valid 0.55

Epoch 4/120:
            train set accuracy 69.54%
            cost train 0.47
            valid set accuracy 69.62%
            cost valid 0.47

Epoch 5/120:
            train set accuracy 74.51%
            cost train 0.42
            valid set accuracy 74.34%
            cost valid 0.42

Epoch 6/120:
            train set accuracy 76.74%
            cost train 0.38
            valid set accuracy 77.0%
            cost valid 0.38

Epoch 7/120:
            train set accuracy 78.71%
            cost train 0.35
            valid set accuracy 78.88%
           

Epoch 57/120:
            train set accuracy 90.68%
            cost train 0.15
            valid set accuracy 90.08%
            cost valid 0.16

Epoch 58/120:
            train set accuracy 91.05%
            cost train 0.14
            valid set accuracy 90.4%
            cost valid 0.15

Epoch 59/120:
            train set accuracy 91.08%
            cost train 0.14
            valid set accuracy 90.43%
            cost valid 0.15

Epoch 60/120:
            train set accuracy 91.48%
            cost train 0.14
            valid set accuracy 91.27%
            cost valid 0.14

Epoch 61/120:
            train set accuracy 91.14%
            cost train 0.14
            valid set accuracy 90.83%
            cost valid 0.15

Epoch 62/120:
            train set accuracy 91.43%
            cost train 0.14
            valid set accuracy 91.08%
            cost valid 0.14

Epoch 63/120:
            train set accuracy 91.55%
            cost train 0.13
            valid set accuracy 91.22%
 

Epoch 113/120:
            train set accuracy 93.1%
            cost train 0.11
            valid set accuracy 92.47%
            cost valid 0.12

Epoch 114/120:
            train set accuracy 93.15%
            cost train 0.11
            valid set accuracy 92.51%
            cost valid 0.12

Epoch 115/120:
            train set accuracy 93.11%
            cost train 0.11
            valid set accuracy 92.56%
            cost valid 0.12

Epoch 116/120:
            train set accuracy 92.72%
            cost train 0.12
            valid set accuracy 92.17%
            cost valid 0.12

Epoch 117/120:
            train set accuracy 93.07%
            cost train 0.11
            valid set accuracy 92.23%
            cost valid 0.12

Epoch 118/120:
            train set accuracy 93.17%
            cost train 0.11
            valid set accuracy 92.48%
            cost valid 0.12

Epoch 119/120:
            train set accuracy 92.73%
            cost train 0.12
            valid set accuracy 9

In [35]:
network = NeuralNetwork([784, 100, 20, 10])
network.learn_with_epochs(training_data, learn_rate=0.2, batch_size=30, epochs=40)

Epoch 1/40:
            train set accuracy 67.53%
            cost train 0.5
            valid set accuracy 67.9%
            cost valid 0.5

Epoch 2/40:
            train set accuracy 75.56%
            cost train 0.39
            valid set accuracy 75.55%
            cost valid 0.39

Epoch 3/40:
            train set accuracy 79.37%
            cost train 0.34
            valid set accuracy 79.15%
            cost valid 0.34

Epoch 4/40:
            train set accuracy 81.73%
            cost train 0.3
            valid set accuracy 82.01%
            cost valid 0.3

Epoch 5/40:
            train set accuracy 81.37%
            cost train 0.3
            valid set accuracy 81.4%
            cost valid 0.3

Epoch 6/40:
            train set accuracy 81.24%
            cost train 0.3
            valid set accuracy 81.77%
            cost valid 0.29

Epoch 7/40:
            train set accuracy 81.8%
            cost train 0.29
            valid set accuracy 82.13%
            cost valid 0

In [36]:
network = NeuralNetwork([784, 100, 20, 10])
network.learn_with_epochs(training_data, learn_rate=0.5, batch_size=30, epochs=40)

Epoch 1/40:
            train set accuracy 60.91%
            cost train 0.53
            valid set accuracy 61.85%
            cost valid 0.52

Epoch 2/40:
            train set accuracy 61.76%
            cost train 0.53
            valid set accuracy 62.48%
            cost valid 0.53

Epoch 3/40:
            train set accuracy 67.18%
            cost train 0.47
            valid set accuracy 67.77%
            cost valid 0.46

Epoch 4/40:
            train set accuracy 66.99%
            cost train 0.46
            valid set accuracy 67.19%
            cost valid 0.46

Epoch 5/40:
            train set accuracy 65.72%
            cost train 0.48
            valid set accuracy 66.17%
            cost valid 0.47

Epoch 6/40:
            train set accuracy 68.01%
            cost train 0.44
            valid set accuracy 68.4%
            cost valid 0.43

Epoch 7/40:
            train set accuracy 68.37%
            cost train 0.44
            valid set accuracy 68.97%
            cos

In [37]:
network = NeuralNetwork([784, 100, 20, 10])
network.learn_with_epochs(training_data, learn_rate=0.01, batch_size=30, epochs=40)

Epoch 1/40:
            train set accuracy 19.62%
            cost train 0.9
            valid set accuracy 19.85%
            cost valid 0.9

Epoch 2/40:
            train set accuracy 26.83%
            cost train 0.86
            valid set accuracy 27.12%
            cost valid 0.86

Epoch 3/40:
            train set accuracy 32.85%
            cost train 0.82
            valid set accuracy 32.87%
            cost valid 0.82

Epoch 4/40:
            train set accuracy 37.49%
            cost train 0.78
            valid set accuracy 37.21%
            cost valid 0.78

Epoch 5/40:
            train set accuracy 41.55%
            cost train 0.74
            valid set accuracy 41.52%
            cost valid 0.74

Epoch 6/40:
            train set accuracy 45.28%
            cost train 0.71
            valid set accuracy 45.22%
            cost valid 0.71

Epoch 7/40:
            train set accuracy 48.69%
            cost train 0.68
            valid set accuracy 48.38%
            cost

In [38]:
network = NeuralNetwork([784, 100, 20, 10])
network.learn_with_epochs(training_data, learn_rate=0.01, batch_size=30, epochs=120)

Epoch 1/120:
            train set accuracy 23.4%
            cost train 0.87
            valid set accuracy 22.88%
            cost valid 0.87

Epoch 2/120:
            train set accuracy 29.66%
            cost train 0.83
            valid set accuracy 29.55%
            cost valid 0.83

Epoch 3/120:
            train set accuracy 34.76%
            cost train 0.8
            valid set accuracy 34.58%
            cost valid 0.8

Epoch 4/120:
            train set accuracy 39.17%
            cost train 0.77
            valid set accuracy 39.24%
            cost valid 0.77

Epoch 5/120:
            train set accuracy 43.44%
            cost train 0.74
            valid set accuracy 43.09%
            cost valid 0.74

Epoch 6/120:
            train set accuracy 47.3%
            cost train 0.71
            valid set accuracy 46.62%
            cost valid 0.71

Epoch 7/120:
            train set accuracy 50.39%
            cost train 0.68
            valid set accuracy 49.55%
           

Epoch 57/120:
            train set accuracy 86.71%
            cost train 0.22
            valid set accuracy 85.77%
            cost valid 0.24

Epoch 58/120:
            train set accuracy 86.84%
            cost train 0.22
            valid set accuracy 85.86%
            cost valid 0.23

Epoch 59/120:
            train set accuracy 86.9%
            cost train 0.22
            valid set accuracy 85.88%
            cost valid 0.23

Epoch 60/120:
            train set accuracy 87.08%
            cost train 0.22
            valid set accuracy 86.04%
            cost valid 0.23

Epoch 61/120:
            train set accuracy 87.2%
            cost train 0.21
            valid set accuracy 86.23%
            cost valid 0.23

Epoch 62/120:
            train set accuracy 87.32%
            cost train 0.21
            valid set accuracy 86.08%
            cost valid 0.22

Epoch 63/120:
            train set accuracy 87.5%
            cost train 0.21
            valid set accuracy 86.29%
   

Epoch 113/120:
            train set accuracy 90.54%
            cost train 0.16
            valid set accuracy 89.28%
            cost valid 0.17

Epoch 114/120:
            train set accuracy 90.65%
            cost train 0.16
            valid set accuracy 89.31%
            cost valid 0.17

Epoch 115/120:
            train set accuracy 90.55%
            cost train 0.16
            valid set accuracy 89.25%
            cost valid 0.17

Epoch 116/120:
            train set accuracy 90.63%
            cost train 0.15
            valid set accuracy 89.15%
            cost valid 0.17

Epoch 117/120:
            train set accuracy 90.6%
            cost train 0.15
            valid set accuracy 89.31%
            cost valid 0.17

Epoch 118/120:
            train set accuracy 90.74%
            cost train 0.15
            valid set accuracy 89.41%
            cost valid 0.17

Epoch 119/120:
            train set accuracy 90.77%
            cost train 0.15
            valid set accuracy 8

In [40]:
network = NeuralNetwork([784, 100, 100, 100, 10])
network.learn_with_epochs(training_data, learn_rate=0.05, batch_size=30, epochs=120)

Epoch 1/120:
            train set accuracy 54.83%
            cost train 0.63
            valid set accuracy 54.35%
            cost valid 0.63

Epoch 2/120:
            train set accuracy 70.07%
            cost train 0.46
            valid set accuracy 70.33%
            cost valid 0.46

Epoch 3/120:
            train set accuracy 76.05%
            cost train 0.39
            valid set accuracy 76.02%
            cost valid 0.38

Epoch 4/120:
            train set accuracy 79.05%
            cost train 0.34
            valid set accuracy 79.29%
            cost valid 0.34

Epoch 5/120:
            train set accuracy 81.4%
            cost train 0.3
            valid set accuracy 81.55%
            cost valid 0.3

Epoch 6/120:
            train set accuracy 82.16%
            cost train 0.28
            valid set accuracy 82.77%
            cost valid 0.28

Epoch 7/120:
            train set accuracy 83.45%
            cost train 0.26
            valid set accuracy 83.63%
          

Epoch 57/120:
            train set accuracy 92.04%
            cost train 0.13
            valid set accuracy 91.38%
            cost valid 0.13

Epoch 58/120:
            train set accuracy 92.3%
            cost train 0.12
            valid set accuracy 91.61%
            cost valid 0.13

Epoch 59/120:
            train set accuracy 92.04%
            cost train 0.13
            valid set accuracy 91.64%
            cost valid 0.13

Epoch 60/120:
            train set accuracy 92.03%
            cost train 0.13
            valid set accuracy 91.32%
            cost valid 0.13

Epoch 61/120:
            train set accuracy 92.25%
            cost train 0.12
            valid set accuracy 92.17%
            cost valid 0.13

Epoch 62/120:
            train set accuracy 92.25%
            cost train 0.12
            valid set accuracy 92.17%
            cost valid 0.13

Epoch 63/120:
            train set accuracy 92.62%
            cost train 0.12
            valid set accuracy 92.27%
 

Epoch 113/120:
            train set accuracy 93.88%
            cost train 0.1
            valid set accuracy 93.26%
            cost valid 0.11

Epoch 114/120:
            train set accuracy 93.86%
            cost train 0.1
            valid set accuracy 93.33%
            cost valid 0.11

Epoch 115/120:
            train set accuracy 93.97%
            cost train 0.1
            valid set accuracy 93.28%
            cost valid 0.11

Epoch 116/120:
            train set accuracy 93.71%
            cost train 0.1
            valid set accuracy 93.19%
            cost valid 0.11

Epoch 117/120:
            train set accuracy 93.79%
            cost train 0.1
            valid set accuracy 93.33%
            cost valid 0.11

Epoch 118/120:
            train set accuracy 93.86%
            cost train 0.1
            valid set accuracy 93.45%
            cost valid 0.11

Epoch 119/120:
            train set accuracy 93.75%
            cost train 0.1
            valid set accuracy 93.29%


### Wnioski:
Przetestowaliśmy działanie sieci neuronowych manipulując ich strukturą - liczbą warstw oraz neurownów w każdej z nich, a także zmienialiśmy wartość parametru `learn_rate` i liczbę epok. W rezultacie otrzymywaliśmy różną dokładność rozpoznawania obrazków ze zbioru `MNIST` dla zbioru uczącego i walidacyjnego.
Z przeprowadzonych eksperymentów możemy stwierdzić że struktura sieci znacząco wpływa na skuteczność działania seci - przykładowo dla 10 neuronów w 1 warstwie ukrytej uzyskaliśmy najgorszą precyzję wynoszącą zaledwie 79%, natomiast dla sieci o 2 warstwach uktytych i 200 neuronach w każdej z nich, lub 3 warstwach ukrytych i 100 neuronach w każdej z nich, dokładność była znacznie wyższa i wynosiła około 91%. Zwiększając rozmiar sieci - liczbę warstw i neuronów, jesteśmy w stanie zwiększyć dokładność sieci, jednak wiąże się to z dłuższym czasem potrzebnym na zbudowanie i wyuczenie sieci. <br><br>
Kolejnym parametrem który zmienia efektywnosć sieci jest parametr `learn_rate` - mówiący o tym jak szybko sieć ma się uczyć, a tak naprawdę jest to współczynnik proporcjonalności w metodzie gradientu prostego zmieniający długość kroku z jaką przemieszczamy się w kierunku minimum lokalnego. Nie może być on zbyt duży, ponieważ wtedy prawdopodobe są oscylacje wokół minimum, ale nie powienien być także zbyt mały ponieważ możmy nie "zdążyć" zbilżyć się do wartości optymalnej, jednak w połączeniu z dużą liczbą epok, możemy uzyskać bardzo dokładnie minimum lokalne, a tym samym otrzymać bardzo wysoką skuteczność sieci. <br><br>
Z przeprowadzonych przez nas eksperymentów wyniaka że z rozpoznawainem obrazków ze ziobru MNIST najlepiej radzi sobie sieć o struktorze 3 warst ukrytych i 100 neuronach w każdej z nich i parametrach `learn_rate` = 0.05, `epochs` = 120. Uzyskaliśmy dokładność ok 93,03%. Jednak bardzo dobrze z postawionym zadanie poradziała sobie także nieco mniejsza sieć - 2 warstwy ukryte 100 i 20 neuronów, learn_rate =0,05 oraz `epochs` = 120 uzyskując dokładnośc równą około 92%. Jednak ze względu na jej mniejszą strukturę eksperyment trwał znacznie krócej.
Wadami sieci neuronowych są z pewnością potrzeba dużej ilości czasu i zasobów obliczeniowych do wytrenowania sieci, a także fakt że za bardzo nie wiemy w jaki sposób sieć dokonuje swoich przewidywań - zdaje się że kolejne warstwy nie reprezentują nie reprezentują nic intuicyjnego.