### **Ćwiczenie 2** - sieć wielowarstwowa uczona metodą propagacji wstecznej

In [1]:
from keras.datasets import mnist

(train_X, train_y), (test_X, test_y) = mnist.load_data()

print('X_train: ' + str(train_X.shape))
print('Y_train: ' + str(train_y.shape))
print('X_test:  '  + str(test_X.shape))
print('Y_test:  '  + str(test_y.shape))

X_train: (60000, 28, 28)
Y_train: (60000,)
X_test:  (10000, 28, 28)
Y_test:  (10000,)


In [7]:
import time
import numpy as np
import random as random
from scipy.special import softmax
import math
import pandas as pd

class Network():
    
    # Utils
    
    def __generate_random_matrix(self, shape_x, shape_y, max_value=1):
        return np.random.rand(shape_x, shape_y) * 2 * max_value - max_value
    
    def __generate_random_number(self, max_value=0.1):
        return random.random() * 2 * max_value - max_value
    
    # Constructor
    
    def __init__(self, layer_shapes, function, max_value=1):
        self.weights = []
        self.biases = []
        self.function = function
        
        for layer_shape in layer_shapes:
            shape_x, shape_y = layer_shape
            self.weights.append(self.__generate_random_matrix(shape_x, shape_y, max_value=max_value))
            self.biases.append(self.__generate_random_number())
        
    # Code
    
    @staticmethod
    def normalize(X):
        return X / 254
    
    @staticmethod
    def get_result(prediction):
        return np.where(prediction == np.max(prediction))[0][0]
    
    def __sigmoid(self, z, deriv=False):
        if deriv:
            sigmoid = self.__sigmoid(z)
            return sigmoid * (1 - sigmoid)
        else:
            return 1.0 / (1.0 + np.exp(-z))
        
    def __tangens(self, z, deriv=False):
        if deriv:
            tangens = self.__tangens(z)
            return tangens * (1 - tangens)
        else:
            return (2 / (1 + np.exp(-2 * z))) - 1
        
    def __relu(self, z, deriv=False):
        if deriv:
            return 1 / (1 + np.exp(-z / np.max(z))) if np.mean(z) > 1000 else 1 / (1 + np.exp(-z))
        else:
            return np.log(1 + np.exp(z / np.max(z))) if np.mean(z) > 1000 else np.log(1 + np.exp(z))
        
    def __activation_function(self, z, deriv=False):
        if self.function == 'sigmoid':
            return self.__sigmoid(z, deriv)
        elif self.function == 'tangens':
            return self.__tangens(z, deriv)
        else:
            return self.__relu(z, deriv)
    
    def __softmax_derivative(self, x):
        return softmax(x) * (1 - softmax(x))
    
    def forward(self, X, predict=False):
        
        aa = [X]
        zz = [X]
        a = X
        
        for i, (w, b) in enumerate(zip(self.weights, self.biases)):
            z = a @ w + b
            
            if i == len(self.weights) - 1:
                a = softmax(z, axis=1)
            else:
                a = self.__activation_function(z)
                
            aa.append(a)
            zz.append(z)
            
        if predict:
            return a
        else:
            return aa, zz
        
    def backpropagation(self, learning_rate, y, aa, zz):
        
#         delta = (y - aa[2]) * self.__softmax_derivative(aa[2])
#         self.weights[1] += learning_rate * zz[1].T @ delta
        
#         delta2 = ((delta @ self.weights[1].T) - aa[1]) * self.__activation_function(aa[1], True)
#         self.weights[0] = learning_rate * zz[0].T @ delta2
        
        cost = np.sum((y - aa[-1]) ** 2) / 2
    
        deltas = []
        errors = []
        output_error = y - aa[-1]
        errors.append(output_error)
        
        delta = output_error * self.__softmax_derivative(aa[-1])
        deltas.append(delta)
        
        for i in range(1, len(aa) - 1):
            hidden_error = delta @ self.weights[-i].T
            errors.append(hidden_error)
            delta = hidden_error * self.__activation_function(aa[-(i + 1)], True)
            deltas.append(delta)
        
        return cost, deltas, errors
    
    def train(self, X, y, learning_rate, batch_size, max_epochs, max_error):
    
        costs = []
        batch_index = 0
        epochs = 0
        
        for i in range(max_epochs):
            
            batch_X = X[batch_index * batch_size:(batch_index + 1) * batch_size]
            batch_y = y[batch_index * batch_size:(batch_index + 1) * batch_size]
            batch_index += 1
            
            if batch_index * batch_size >= len(X):
                batch_index = 0
            
            aa, zz = self.forward(batch_X)
            cost, deltas, errors = self.backpropagation(learning_rate, batch_y, aa, zz)

            for j in range(len(self.weights)):
                self.weights[j] += learning_rate * aa[j].T @ deltas[-(j + 1)]
                self.biases[j] += learning_rate * np.mean(deltas[-(j + 1)])

            costs.append(cost / batch_size)
            
            epochs += 1
            
            if (cost / batch_size < max_error):
                break

            if i % 1000 == 0:
                print(f'Iteration: {i}, error: {cost / batch_size}')
                
        return costs, epochs

In [8]:
np.random.seed(243)
random.seed(243)

layer_shapes=[(784, 15), (15, 10)]

network = Network(layer_shapes=layer_shapes, function='relu')

X = []
for X_matrix in train_X:
    X.append(Network.normalize(X_matrix.reshape(784)))
X = np.array(X)
    
labels = []
for y in train_y:
    label = np.zeros(10)
    label[y] = 1
    labels.append(label)
labels = np.array(labels)

costs, epochs = network.train(X, 
              labels, 
              learning_rate=0.1, 
              max_epochs=10000,
              batch_size=64,
              max_error=0.1)

print(f'Epochs: {epochs}')

Iteration: 0, error: 0.7587876471239032
Iteration: 1000, error: 0.23227497758181062
Epochs: 1859


In [9]:
X = []
for X_matrix in test_X:
    X.append(Network.normalize(X_matrix.reshape(784)))
X = np.array(X)

z = network.forward(X, predict=True)

sum = 0
for z_i, label_i in zip(z, test_y):
    if Network.get_result(z_i) == label_i:
        sum +=1
        
print(f'Accuracy: {round(sum / len(z) * 100, 2)}%')

Accuracy: 70.31%


In [11]:
X = []
for X_matrix in train_X:
    X.append(Network.normalize(X_matrix.reshape(784)))
X = np.array(X, dtype=np.float128)
    
labels = []
for y in train_y:
    label = np.zeros(10)
    label[y] = 1
    labels.append(label)
labels = np.array(labels, dtype=np.float128)

X_test = []
for X_matrix in test_X[0:-3000]:
    X_test.append(Network.normalize(X_matrix.reshape(784)))
X_test = np.array(X_test, dtype=np.float128)

In [99]:
# Test Batch Size

np.random.seed(243)
random.seed(243)

layer_shapes=[(784, 15), (15, 10)]
batches = [2, 4, 8, 16, 32, 64, 128]
learning_rate = 0.1

df = pd.DataFrame(columns=['Batch', 'Epochs', 'Accuracy'])
function = 'sigmoid'

for batch in batches:
    
    learning_times = []
    epochs = []
    errors = []
    accs = []
    
    for i in range(5):
        network = Network(layer_shapes=layer_shapes, function=function)

        costs, epoch = network.train(X, 
                      labels, 
                      learning_rate=learning_rate, 
                      max_epochs=10000,
                      batch_size=batch,
                      max_error=0.1)

        z = network.forward(X_test, predict=True)

        sum = 0
        for z_i, label_i in zip(z, test_y[0:-3000]):
            if Network.get_result(z_i) == label_i:
                sum +=1
                
        epochs.append(epoch)
        accs.append(round(sum / len(z) * 100, 2))

    print('Batch:', batch, ', Epochs:', round(np.mean(epochs), 2), ', Accuracy:', round(np.mean(accs), 2))
    df = df.append({'Batch': batch, 'Epochs': round(np.mean(epochs), 2), 'Accuracy': round(np.mean(accs), 2)}, ignore_index=True)
    
display(df)
df.to_csv(f'./Results/{function}-batch-size-testing.csv', index=False)

Batch: 2 , Epochs: 1310.8 , Accuracy: 45.06
Batch: 4 , Epochs: 1841.2 , Accuracy: 56.82
Batch: 8 , Epochs: 3315.2 , Accuracy: 61.73
Batch: 16 , Epochs: 4941.4 , Accuracy: 68.46
Batch: 32 , Epochs: 6907.2 , Accuracy: 72.24
Batch: 64 , Epochs: 6752.2 , Accuracy: 71.25
Batch: 128 , Epochs: 9780.6 , Accuracy: 73.27


Unnamed: 0,Batch,Epochs,Accuracy
0,2.0,1310.8,45.06
1,4.0,1841.2,56.82
2,8.0,3315.2,61.73
3,16.0,4941.4,68.46
4,32.0,6907.2,72.24
5,64.0,6752.2,71.25
6,128.0,9780.6,73.27


In [100]:
# Test learning rate Size

np.random.seed(243)
random.seed(243)

layer_shapes=[(784, 20), (20, 15), (15, 10)]
learning_rates = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

df = pd.DataFrame(columns=['Learning rate', 'Epochs', 'Accuracy'])
function = 'sigmoid'

for learning_rate in learning_rates:
    
    learning_times = []
    epochs = []
    errors = []
    accs = []
    
    for i in range(5):
        network = Network(layer_shapes=layer_shapes, function=function)

        costs, epoch = network.train(X, 
                      labels, 
                      learning_rate=learning_rate, 
                      max_epochs=10000,
                      batch_size=32,
                      max_error=0.1)

        z = network.forward(X_test, predict=True)

        sum = 0
        for z_i, label_i in zip(z, test_y[0:-3000]):
            if Network.get_result(z_i) == label_i:
                sum +=1
                
        epochs.append(epoch)
        accs.append(round(sum / len(z) * 100, 2))

    print('Learning rate:', learning_rate, ', Epochs:', round(np.mean(epochs), 2), ', Accuracy:', round(np.mean(accs), 2))
    df = df.append({'Learning rate': learning_rate, 'Epochs': round(np.mean(epochs), 2), 'Accuracy': round(np.mean(accs), 2)}, ignore_index=True)
    
display(df)
df.to_csv(f'./Results/{function}-learning-rate-testing.csv', index=False)

Learning rate: 0.1 , Epochs: 10000.0 , Accuracy: 66.88
Learning rate: 0.2 , Epochs: 6691.4 , Accuracy: 71.3
Learning rate: 0.3 , Epochs: 4008.8 , Accuracy: 70.61


  return 1.0 / (1.0 + np.exp(-z))


Learning rate: 0.4 , Epochs: 5157.8 , Accuracy: 68.56
Learning rate: 0.5 , Epochs: 4589.2 , Accuracy: 67.76
Learning rate: 0.6 , Epochs: 2433.4 , Accuracy: 71.53
Learning rate: 0.7 , Epochs: 5157.2 , Accuracy: 66.3
Learning rate: 0.8 , Epochs: 4966.2 , Accuracy: 67.56
Learning rate: 0.9 , Epochs: 1585.6 , Accuracy: 71.28
Learning rate: 1.0 , Epochs: 1738.0 , Accuracy: 70.71


Unnamed: 0,Learning rate,Epochs,Accuracy
0,0.1,10000.0,66.88
1,0.2,6691.4,71.3
2,0.3,4008.8,70.61
3,0.4,5157.8,68.56
4,0.5,4589.2,67.76
5,0.6,2433.4,71.53
6,0.7,5157.2,66.3
7,0.8,4966.2,67.56
8,0.9,1585.6,71.28
9,1.0,1738.0,70.71


In [101]:
# Test Hidden Layer Size

np.random.seed(243)
random.seed(243)

layer_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100, 200, 300, 400, 500, 600, 700]

df = pd.DataFrame(columns=['Layer size', 'Epochs', 'Accuracy'])
function = 'sigmoid'

for layer_size in layer_sizes:
    
    layer_shapes=[(784, layer_size), (layer_size, 10)]
    
    learning_times = []
    epochs = []
    errors = []
    accs = []
    
    for i in range(5):
        network = Network(layer_shapes=layer_shapes, function='sigmoid')

        costs, epoch = network.train(X, 
                      labels, 
                      learning_rate=0.1, 
                      max_epochs=10000,
                      batch_size=32,
                      max_error=0.1)

        z = network.forward(X_test, predict=True)

        sum = 0
        for z_i, label_i in zip(z, test_y[0:-3000]):
            if Network.get_result(z_i) == label_i:
                sum +=1
                
        epochs.append(epoch)
        accs.append(round(sum / len(z) * 100, 2))

    print('Layer size:', layer_size, ', Epochs:', round(np.mean(epochs), 2), ', Accuracy:', round(np.mean(accs), 2))
    df = df.append({'Layer size': layer_size, 'Epochs': round(np.mean(epochs), 2), 'Accuracy': round(np.mean(accs), 2)}, ignore_index=True)
    
display(df)
df.to_csv(f'./Results/{function}-layer-size-testing.csv', index=False)

Layer size: 1 , Epochs: 10000.0 , Accuracy: 16.97
Layer size: 2 , Epochs: 10000.0 , Accuracy: 26.33
Layer size: 3 , Epochs: 10000.0 , Accuracy: 30.52
Layer size: 4 , Epochs: 10000.0 , Accuracy: 28.88
Layer size: 5 , Epochs: 10000.0 , Accuracy: 41.66
Layer size: 6 , Epochs: 10000.0 , Accuracy: 49.38
Layer size: 7 , Epochs: 10000.0 , Accuracy: 51.03
Layer size: 8 , Epochs: 10000.0 , Accuracy: 56.15
Layer size: 9 , Epochs: 10000.0 , Accuracy: 59.17
Layer size: 10 , Epochs: 9438.2 , Accuracy: 69.37
Layer size: 20 , Epochs: 4234.8 , Accuracy: 72.72
Layer size: 30 , Epochs: 3041.2 , Accuracy: 72.95
Layer size: 40 , Epochs: 1728.4 , Accuracy: 70.02
Layer size: 50 , Epochs: 1694.2 , Accuracy: 70.5
Layer size: 100 , Epochs: 1185.0 , Accuracy: 69.07
Layer size: 200 , Epochs: 972.4 , Accuracy: 68.8
Layer size: 300 , Epochs: 819.8 , Accuracy: 65.13
Layer size: 400 , Epochs: 689.2 , Accuracy: 65.35
Layer size: 500 , Epochs: 651.4 , Accuracy: 64.35
Layer size: 600 , Epochs: 680.0 , Accuracy: 67.02
L

Unnamed: 0,Layer size,Epochs,Accuracy
0,1.0,10000.0,16.97
1,2.0,10000.0,26.33
2,3.0,10000.0,30.52
3,4.0,10000.0,28.88
4,5.0,10000.0,41.66
5,6.0,10000.0,49.38
6,7.0,10000.0,51.03
7,8.0,10000.0,56.15
8,9.0,10000.0,59.17
9,10.0,9438.2,69.37


In [15]:
# Test Initial Weight values

np.random.seed(243)
random.seed(243)

w_max_values = [0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 1, 2, 3, 4, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]

df = pd.DataFrame(columns=["Weights' range", 'Epochs', 'Accuracy'])
function = 'sigmoid'

for max_value in w_max_values:
    
    layer_shapes=[(784, 15), (15, 10)]
    
    learning_times = []
    epochs = []
    errors = []
    accs = []
    
    for i in range(5):
        network = Network(layer_shapes=layer_shapes, function='sigmoid', max_value=max_value)

        costs, epoch = network.train(X, 
                      labels, 
                      learning_rate=0.1, 
                      max_epochs=10000,
                      batch_size=32,
                      max_error=0.1)

        z = network.forward(X_test, predict=True)

        sum = 0
        for z_i, label_i in zip(z, test_y[0:-3000]):
            if Network.get_result(z_i) == label_i:
                sum +=1
                
        epochs.append(epoch)
        accs.append(round(sum / len(z) * 100, 2))

    print("Weights' range:", (-max_value, max_value), ', Epochs:', round(np.mean(epochs), 2), ', Accuracy:', round(np.mean(accs), 2))
    df = df.append({"Weights' range:": (-max_value, max_value), 'Epochs': round(np.mean(epochs), 2), 'Accuracy': round(np.mean(accs), 2)}, ignore_index=True)
    
display(df)
df.to_csv(f'./Results/{function}-weights-testing.csv', index=False)

Weights' range: (-0.01, 0.01) , Epochs: 10000.0 , Accuracy: 70.2
Weights' range: (-0.05, 0.05) , Epochs: 8436.4 , Accuracy: 75.44
Weights' range: (-0.1, 0.1) , Epochs: 7621.8 , Accuracy: 74.71
Weights' range: (-0.2, 0.2) , Epochs: 7465.0 , Accuracy: 75.0
Weights' range: (-0.3, 0.3) , Epochs: 6885.2 , Accuracy: 75.14
Weights' range: (-0.4, 0.4) , Epochs: 7222.4 , Accuracy: 74.59
Weights' range: (-0.5, 0.5) , Epochs: 7307.4 , Accuracy: 73.72
Weights' range: (-1, 1) , Epochs: 5905.0 , Accuracy: 72.42
Weights' range: (-2, 2) , Epochs: 5422.6 , Accuracy: 66.69
Weights' range: (-3, 3) , Epochs: 5596.0 , Accuracy: 63.97
Weights' range: (-4, 4) , Epochs: 8577.8 , Accuracy: 61.89


  return 1.0 / (1.0 + np.exp(-z))


Weights' range: (-5, 5) , Epochs: 10000.0 , Accuracy: 57.74
Weights' range: (-10, 10) , Epochs: 8747.0 , Accuracy: 44.34
Weights' range: (-20, 20) , Epochs: 10000.0 , Accuracy: 30.51
Weights' range: (-30, 30) , Epochs: 10000.0 , Accuracy: 35.48
Weights' range: (-40, 40) , Epochs: 10000.0 , Accuracy: 33.11
Weights' range: (-50, 50) , Epochs: 10000.0 , Accuracy: 33.06
Weights' range: (-60, 60) , Epochs: 10000.0 , Accuracy: 32.09
Weights' range: (-70, 70) , Epochs: 10000.0 , Accuracy: 33.58
Weights' range: (-80, 80) , Epochs: 10000.0 , Accuracy: 39.63
Weights' range: (-90, 90) , Epochs: 10000.0 , Accuracy: 32.9
Weights' range: (-100, 100) , Epochs: 10000.0 , Accuracy: 32.8


Unnamed: 0,Layer size,Epochs,Accuracy,Weights' range:
0,,10000.0,70.2,"(-0.01, 0.01)"
1,,8436.4,75.44,"(-0.05, 0.05)"
2,,7621.8,74.71,"(-0.1, 0.1)"
3,,7465.0,75.0,"(-0.2, 0.2)"
4,,6885.2,75.14,"(-0.3, 0.3)"
5,,7222.4,74.59,"(-0.4, 0.4)"
6,,7307.4,73.72,"(-0.5, 0.5)"
7,,5905.0,72.42,"(-1, 1)"
8,,5422.6,66.69,"(-2, 2)"
9,,5596.0,63.97,"(-3, 3)"


In [12]:
# Test Batch Size

np.random.seed(243)
random.seed(243)

layer_shapes=[(784, 15), (15, 10)]
batches = [2, 4, 8, 16, 32, 64, 128]
learning_rate = 0.1

df = pd.DataFrame(columns=['Batch', 'Epochs', 'Accuracy'])
function = 'relu'

for batch in batches:
    
    learning_times = []
    epochs = []
    errors = []
    accs = []
    
    for i in range(5):
        network = Network(layer_shapes=layer_shapes, function=function)

        costs, epoch = network.train(X, 
                      labels, 
                      learning_rate=0.1, 
                      max_epochs=5000,
                      batch_size=64,
                      max_error=0.1)

        z = network.forward(X_test, predict=True)

        sum = 0
        for z_i, label_i in zip(z, test_y[0:-3000]):
            if Network.get_result(z_i) == label_i:
                sum +=1
                
        epochs.append(epoch)
        accs.append(round(sum / len(z) * 100, 2))

    print('Batch:', batch, ', Epochs:', round(np.mean(epochs), 2), ', Accuracy:', round(np.mean(accs), 2))
    df = df.append({'Batch': batch, 'Epochs': round(np.mean(epochs), 2), 'Accuracy': round(np.mean(accs), 2)}, ignore_index=True)
    
display(df)
df.to_csv(f'./Results/{function}-batch-size-testing.csv', index=False)

Iteration: 0, error: 0.7587876471239032
Iteration: 1000, error: 0.23227497758181057
Iteration: 0, error: 0.8361567392133491
Iteration: 1000, error: 0.21374314212147152
Iteration: 0, error: 0.6916539050901342
Iteration: 1000, error: 0.24216882457404768
Iteration: 0, error: 0.7959780376256106
Iteration: 1000, error: 0.7495916906677739
Iteration: 2000, error: 0.5369314542569128
Iteration: 3000, error: 0.5447214734078016
Iteration: 4000, error: 0.5197941172824602
Iteration: 0, error: 0.8112129029570698
Iteration: 1000, error: 0.5672622030068897
Iteration: 2000, error: 0.5536046769430132
Iteration: 3000, error: 0.5560127331183259
Iteration: 4000, error: 0.5358670543441755
Batch: 2 , Epochs: 3118.6 , Accuracy: 44.46
Iteration: 0, error: 0.8203965150838686
Iteration: 1000, error: 0.6776453710662813


KeyboardInterrupt: 