### **Ćwiczenie 2** - sieć wielowarstwowa uczona metodą propagacji wstecznej

In [5]:
from keras.datasets import mnist

(train_X, train_y), (test_X, test_y) = mnist.load_data()

print('X_train: ' + str(train_X.shape))
print('Y_train: ' + str(train_y.shape))
print('X_test:  '  + str(test_X.shape))
print('Y_test:  '  + str(test_y.shape))

X_train: (60000, 28, 28)
Y_train: (60000,)
X_test:  (10000, 28, 28)
Y_test:  (10000,)


In [6]:
import time
import numpy as np
import random as random
from scipy.special import softmax
import math
import pandas as pd
from matplotlib import pyplot as plt

class Network():
    
    # Utils
    
    def __generate_random_matrix(self, shape_x, shape_y, max_value=1):
        return np.random.rand(shape_x, shape_y) * 2 * max_value - max_value
    
    def __generate_random_number(self, max_value=0.1):
        return random.random() * 2 * max_value - max_value
    
    # Constructor
    
    def __init__(self, layer_shapes, function, max_value=1):
        self.weights = []
        self.biases = []
        self.function = function
        
        for layer_shape in layer_shapes:
            shape_x, shape_y = layer_shape
            self.weights.append(self.__generate_random_matrix(shape_x, shape_y, max_value=max_value))
            self.biases.append(self.__generate_random_number())
        
    # Code
    
    @staticmethod
    def normalize(X):
        return X / 254
    
    @staticmethod
    def get_result(prediction):
        return np.where(prediction == np.max(prediction))[0][0]
    
    def __sigmoid(self, z, deriv=False):
        if deriv:
            sigmoid = self.__sigmoid(z)
            return sigmoid * (1 - sigmoid)
        else:
            return 1.0 / (1.0 + np.exp(-z))
        
    def __tangens(self, z, deriv=False):
        if deriv:
            tangens = self.__tangens(z)
            return tangens * (1 - tangens)
        else:
            return (2 / (1 + np.exp(-2 * z))) - 1
        
    def __relu(self, z, deriv=False):
        if deriv:
            return 1 / (1 + np.exp(-z / np.max(z))) if np.mean(z) > 1000 else 1 / (1 + np.exp(-z))
        else:
            return np.log(1 + np.exp(z / np.max(z))) if np.mean(z) > 1000 else np.log(1 + np.exp(z))
        
    def __activation_function(self, z, deriv=False):
        if self.function == 'sigmoid':
            return self.__sigmoid(z, deriv)
        elif self.function == 'tangens':
            return self.__tangens(z, deriv)
        else:
            return self.__relu(z, deriv)
    
    def __softmax_derivative(self, x):
        return softmax(x) * (1 - softmax(x))
    
    def forward(self, X, predict=False):
        
        aa = [X]
        zz = [X]
        a = X
        
        for i, (w, b) in enumerate(zip(self.weights, self.biases)):
            z = a @ w + b
            
            if i == len(self.weights) - 1:
                a = softmax(z, axis=1)
            else:
                a = self.__activation_function(z)
                
            aa.append(a)
            zz.append(z)
            
        if predict:
            return a
        else:
            return aa, zz
        
    def backpropagation(self, learning_rate, y, aa, zz):
        
        cost = np.sum((y - aa[-1]) ** 2) / 2
    
        deltas = []
        errors = []
        output_error = y - aa[-1]
        errors.append(output_error)
        
        delta = output_error * self.__softmax_derivative(aa[-1])
        deltas.append(delta)
        
        for i in range(1, len(aa) - 1):
            hidden_error = delta @ self.weights[-i].T
            errors.append(hidden_error)
            delta = hidden_error * self.__activation_function(aa[-(i + 1)], True)
            deltas.append(delta)
        
        return cost, deltas, errors
    
    def train(self, X, y, learning_rate, batch_size, max_epochs, max_error):
    
        costs = []
        batch_index = 0
        epochs = 0
        
        for i in range(max_epochs):
            
            batch_X = X[batch_index * batch_size:(batch_index + 1) * batch_size]
            batch_y = y[batch_index * batch_size:(batch_index + 1) * batch_size]
            batch_index += 1
            
            if batch_index * batch_size >= len(X):
                batch_index = 0
            
            aa, zz = self.forward(batch_X)
            cost, deltas, errors = self.backpropagation(learning_rate, batch_y, aa, zz)

            for j in range(len(self.weights)):
                self.weights[j] += learning_rate * aa[j].T @ deltas[-(j + 1)]
                self.biases[j] += learning_rate * np.mean(deltas[-(j + 1)])

            costs.append(cost)
            
            epochs += 1
            
            if (cost / batch_size < max_error):
                break

#             if i % 1000 == 0:
#                 print(f'Iteration: {i}, error: {cost / batch_size}')
                
        return costs, epochs

In [9]:
X = []
for X_matrix in train_X:
    X.append(Network.normalize(X_matrix.reshape(784)))
X = np.array(X, dtype=np.float128)
    
labels = []
for y in train_y:
    label = np.zeros(10)
    label[y] = 1
    labels.append(label)
labels = np.array(labels, dtype=np.float128)

X_test = []
for X_matrix in test_X[0:-3000]:
    X_test.append(Network.normalize(X_matrix.reshape(784)))
X_test = np.array(X_test, dtype=np.float128)

In [14]:
np.random.seed(243)
random.seed(243)

layer_shapes=[(784, 15), (15, 10)]

total_costs = []
epochs = 0
accs = 0
times = 10

for i in range(times):
    network = Network(layer_shapes=layer_shapes, function='sigmoid')

    costs, epoch = network.train(X, 
                  labels, 
                  learning_rate=0.1, 
                  max_epochs=10000,
                  batch_size=32,
                  max_error=0.05)

    if len(total_costs) == 0:
        total_costs = costs
    else:
        total_costs = (np.array(total_costs) + np.array(costs)).tolist()

    epochs += epoch
    
    z = network.forward(X_test, predict=True)

    sum = 0
    for z_i, label_i in zip(z, test_y[0:-3000]):
        if Network.get_result(z_i) == label_i:
            sum +=1
            
    accs += round(sum / len(z) * 100, 2)
            
total_costs = (np.array(total_costs) / times).tolist()

plt.plot(total_costs)
plt.xlabel('Epochs')
plt.ylabel('Cost')
plt.show()

print(f'Epochs: {epochs / times}')
print(f'Accuracy: {accs / times}')

KeyboardInterrupt: 

In [None]:
X = []
for X_matrix in test_X:
    X.append(Network.normalize(X_matrix.reshape(784)))
X = np.array(X)

z = network.forward(X, predict=True)

sum = 0
for z_i, label_i in zip(z, test_y):
    if Network.get_result(z_i) == label_i:
        sum +=1
        
print(f'Accuracy: {round(sum / len(z) * 100, 2)}%')

In [5]:
X = []
for X_matrix in train_X:
    X.append(Network.normalize(X_matrix.reshape(784)))
X = np.array(X, dtype=np.float128)
    
labels = []
for y in train_y:
    label = np.zeros(10)
    label[y] = 1
    labels.append(label)
labels = np.array(labels, dtype=np.float128)

X_test = []
for X_matrix in test_X[0:-3000]:
    X_test.append(Network.normalize(X_matrix.reshape(784)))
X_test = np.array(X_test, dtype=np.float128)

In [6]:
# Test Batch Size

np.random.seed(243)
random.seed(243)

layer_shapes=[(784, 15), (15, 10)]
batches = [2, 4, 8, 16, 32, 64, 128]
learning_rate = 0.1

df = pd.DataFrame(columns=['Batch', 'Epochs', 'Accuracy'])
function = 'sigmoid'

for batch in batches:
    
    learning_times = []
    epochs = []
    errors = []
    accs = []
    
    for i in range(20):
        network = Network(layer_shapes=layer_shapes, function=function)

        costs, epoch = network.train(X, 
                      labels, 
                      learning_rate=learning_rate, 
                      max_epochs=1000,
                      batch_size=batch,
                      max_error=0.1)

        z = network.forward(X_test, predict=True)

        sum = 0
        for z_i, label_i in zip(z, test_y[0:-3000]):
            if Network.get_result(z_i) == label_i:
                sum +=1
                
        epochs.append(epoch)
        accs.append(round(sum / len(z) * 100, 2))

    print('Batch:', batch, ', Epochs:', round(np.mean(epochs), 2), ', Accuracy:', round(np.mean(accs), 2))
    df = df.append({'Batch': batch, 'Epochs': round(np.mean(epochs), 2), 'Accuracy': round(np.mean(accs), 2)}, ignore_index=True)
    
display(df)
df.to_csv(f'./Results/{function}-batch-size-testing.csv', index=False)

KeyboardInterrupt: 

In [None]:
# Test learning rate Size

np.random.seed(243)
random.seed(243)

layer_shapes=[(784, 20), (20, 15), (15, 10)]
learning_rates = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

df = pd.DataFrame(columns=['Learning rate', 'Epochs', 'Accuracy'])
function = 'sigmoid'

for learning_rate in learning_rates:
    
    learning_times = []
    epochs = []
    errors = []
    accs = []
    
    for i in range(20):
        network = Network(layer_shapes=layer_shapes, function=function)

        costs, epoch = network.train(X, 
                      labels, 
                      learning_rate=learning_rate, 
                      max_epochs=10000,
                      batch_size=32,
                      max_error=0.1)

        z = network.forward(X_test, predict=True)

        sum = 0
        for z_i, label_i in zip(z, test_y[0:-3000]):
            if Network.get_result(z_i) == label_i:
                sum +=1
                
        epochs.append(epoch)
        accs.append(round(sum / len(z) * 100, 2))

    print('Learning rate:', learning_rate, ', Epochs:', round(np.mean(epochs), 2), ', Accuracy:', round(np.mean(accs), 2))
    df = df.append({'Learning rate': learning_rate, 'Epochs': round(np.mean(epochs), 2), 'Accuracy': round(np.mean(accs), 2)}, ignore_index=True)
    
display(df)
df.to_csv(f'./Results/{function}-learning-rate-testing.csv', index=False)

In [None]:
# Test Hidden Layer Size

np.random.seed(243)
random.seed(243)

layer_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100, 200, 300, 400, 500, 600, 700]

df = pd.DataFrame(columns=['Layer size', 'Epochs', 'Accuracy'])
function = 'sigmoid'

for layer_size in layer_sizes:
    
    layer_shapes=[(784, layer_size), (layer_size, 10)]
    
    learning_times = []
    epochs = []
    errors = []
    accs = []
    
    for i in range(20):
        network = Network(layer_shapes=layer_shapes, function='sigmoid')

        costs, epoch = network.train(X, 
                      labels, 
                      learning_rate=0.1, 
                      max_epochs=1000,
                      batch_size=32,
                      max_error=0.1)

        z = network.forward(X_test, predict=True)

        sum = 0
        for z_i, label_i in zip(z, test_y[0:-3000]):
            if Network.get_result(z_i) == label_i:
                sum +=1
                
        epochs.append(epoch)
        accs.append(round(sum / len(z) * 100, 2))

    print('Layer size:', layer_size, ', Epochs:', round(np.mean(epochs), 2), ', Accuracy:', round(np.mean(accs), 2))
    df = df.append({'Layer size': layer_size, 'Epochs': round(np.mean(epochs), 2), 'Accuracy': round(np.mean(accs), 2)}, ignore_index=True)
    
display(df)
df.to_csv(f'./Results/{function}-layer-size-testing.csv', index=False)

In [None]:
# Test Initial Weight values

np.random.seed(243)
random.seed(243)

w_max_values = [0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 1, 2, 3, 4, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]

df = pd.DataFrame(columns=["Weights' range", 'Epochs', 'Accuracy'])
function = 'sigmoid'

for max_value in w_max_values:
    
    layer_shapes=[(784, 15), (15, 10)]
    
    learning_times = []
    epochs = []
    errors = []
    accs = []
    
    for i in range(20):
        network = Network(layer_shapes=layer_shapes, function='sigmoid', max_value=max_value)

        costs, epoch = network.train(X, 
                      labels, 
                      learning_rate=0.1, 
                      max_epochs=10000,
                      batch_size=32,
                      max_error=0.1)

        z = network.forward(X_test, predict=True)

        sum = 0
        for z_i, label_i in zip(z, test_y[0:-3000]):
            if Network.get_result(z_i) == label_i:
                sum +=1
                
        epochs.append(epoch)
        accs.append(round(sum / len(z) * 100, 2))

    print("Weights' range:", (-max_value, max_value), ', Epochs:', round(np.mean(epochs), 2), ', Accuracy:', round(np.mean(accs), 2))
    df = df.append({"Weights' range:": (-max_value, max_value), 'Epochs': round(np.mean(epochs), 2), 'Accuracy': round(np.mean(accs), 2)}, ignore_index=True)
    
display(df)
df.to_csv(f'./Results/{function}-weights-testing.csv', index=False)

In [None]:
# Test Batch Size

np.random.seed(243)
random.seed(243)

layer_shapes=[(784, 15), (15, 10)]
batches = [2, 4, 8, 16, 32, 64, 128]
learning_rate = 0.1

df = pd.DataFrame(columns=['Function', 'Epochs', 'Accuracy'])
functions = ['relu', 'sigmoid']

for function in functions:
    
    learning_times = []
    epochs = []
    errors = []
    accs = []
    
    for i in range(20):
        network = Network(layer_shapes=layer_shapes, function=function)

        costs, epoch = network.train(X, 
                      labels, 
                      learning_rate=0.1, 
                      max_epochs=5000,
                      batch_size=64,
                      max_error=0.1)

        z = network.forward(X_test, predict=True)

        sum = 0
        for z_i, label_i in zip(z, test_y[0:-3000]):
            if Network.get_result(z_i) == label_i:
                sum +=1
                
        epochs.append(epoch)
        accs.append(round(sum / len(z) * 100, 2))

    print('Function:', function, ', Epochs:', round(np.mean(epochs), 2), ', Accuracy:', round(np.mean(accs), 2))
    df = df.append({'Function': function, 'Epochs': round(np.mean(epochs), 2), 'Accuracy': round(np.mean(accs), 2)}, ignore_index=True)
    
display(df)
df.to_csv(f'./Results/function-testing.csv', index=False)