# Project1 Training of a Fully-Connected Neural Network
### MNIST Dataset - Autonomous Learning of Neural Networks
*——————  Neural network learning and reasoning based on numpy*

### 1.  Visualization of MNIST dataset
First, download mnist.py from website and locate it in your working directory. You can then prepare the MNIST data in Python as follows. There is not much introduction to mnist.py here

The mnist dataset mainly includes four files:
- x_train : 60,000x784 numpy array that each row contains flattened version of training images.
- t_train : 1x60,000 numpy array that each component is true label of the corresponding training images.
- x_test : 10,000x784 numpy array that each row contains flattened version of test images.
- t_test : 1x10,000 numpy array that each component is true label of the corresponding test images.

In [None]:
# mnist dataset image display
import sys, os
sys.path.append(os.pardir)
import numpy as np
from dataset.mnist import load_mnist
from PIL import Image
def img_show(img):
    pil_img = Image.fromarray(np.uint8(img))
    pil_img.show()

(x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=False)
img = x_train[5]
lable = t_train[5]
print(lable)

print(img.shape)
img = img.reshape(28, 28)   # Change the shape of the image to its original shape
print(img.shape)

img_show(img)

![](图片2.png)

### 2.Handwritten digit recognition based on numpy

In the process of neural network implementation, a 3-layer neural network is used to realize the processing from input to output

<img alt="三层神经网络" height="300" src="https://img-blog.csdnimg.cn/b8c6d8838a2e48b285a1bedada0eeeb0.png" width="600"/>

In the inference processing of the neural network implemented on the MNIST dataset, the input layer of the neural network has 784 neurons and the output layer has 10 neurons. The number 784 in the input layer comes from the image size of 28*28=784, and the number 10 in the output layer comes from the 10-category classification. In addition, this neural network has two hidden layers, the first hidden layer is set to have 50 neurons, and the second hidden layer is set to have 100 neurons

This set of data is the shape of the weights of each layer of the neural network
![](图片1.png)

The accuracy rate of this test is finally about 94.5%.
![准确率](图片3.png)

This is the data image display for this test
![图像 ](图片4.png)

In [None]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
import matplotlib.pylab as plt
# matplotlib_ data visualization tool for drawing two-dimensional and three-dimensional charts;
# pyplot_ for the current graph
from dataset.mnist import load_mnist

#The sigmoid function is used as the activation function,
#and the function of the activation function is to decide how to activate the sum of the input signals
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_grad(x):
    return (1.0 - sigmoid(x)) * sigmoid(x)

def softmax(x):
    if x.ndim == 2:
        x = x.T
        x = x - np.max(x, axis=0)
        y = np.exp(x) / np.sum(np.exp(x), axis=0)
        return y.T

    x = x - np.max(x)  # softmax - Spill Countermeasures
    return np.exp(x) / np.sum(np.exp(x))

#Implementation of min-batch version cross entropy error
def cross_entropy_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)

 # When the supervised data is one-hot-vector, it is converted to the index of the correct solution label
    if t.size == y.size:
        t = t.argmax(axis=1)

    batch_size = y.shape[0]
    return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
#np.arange([start, ]stop, [step, ]dtype=None)

def numerical_gradient(f, x):
    h = 1e-4  # 0.0001
    grad = np.zeros_like(x)

    for idx in range(x.size):
        tmp_val = x[idx]
        x[idx] = tmp_val + h
        fxh1 = f(x)  # f(x+h)

        x[idx] = tmp_val - h
        fxh2 = f(x)  # f(x-h)
        grad[idx] = (fxh1 - fxh2) / (2 * h)
        x[idx] = tmp_val  # 还原值

    return grad
#Read in the MNIST dataset
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
class ThreeLayerNet:
    def __init__(self, input_size, hidden_size, mid_size, output_size, weight_init_std=0.01):
        print("Build Net")
        # Initialize weight and bias parameters
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, mid_size)
        self.params['b2'] = np.zeros(mid_size)
        self.params['W3'] = weight_init_std * np.random.randn(mid_size, output_size)
        self.params['b3'] = np.zeros(output_size)

    def predict(self, x):
        W1, W2, W3 = self.params['W1'], self.params['W2'], self.params['W3']
        b1, b2, b3 = self.params['b1'], self.params['b2'], self.params['b3']

        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        z2 = sigmoid(a2)
        a3 = np.dot(z2, W3) + b3
        y = softmax(a3)

        return y

    # Loss function - x: input data, t: supervised data
    def loss(self, x, t):
        y = self.predict(x)
        return cross_entropy_error(y, t)


    # Numerical differentiation
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        grads['W3'] = numerical_gradient(loss_W, self.params['W3'])
        grads['b3'] = numerical_gradient(loss_W, self.params['b3'])
        return grads


    # Error backpropagation
    def gradient(self, x, t):
        W1, W2, W3 = self.params['W1'], self.params['W2'], self.params['W3']
        b1, b2, b3 = self.params['b1'], self.params['b2'], self.params['b3']
        grads = {}
        print('W1:', W1.shape)
        print('W2:', W2.shape)
        print('W3:', W3.shape)

        batch_num = x.shape[0]

        # Forward
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        z2 = sigmoid(a2)
        a3 = np.dot(z2, W3) + b3
        y = softmax(a3)


        # Backward
        dy = (y - t) / batch_num
        grads['W3'] = np.dot(z2.T, dy)
        grads['b3'] = np.sum(dy, axis=0)

        da2 = np.dot(dy, W3.T)
        dz2 = sigmoid_grad(a2) * da2
        grads['W2'] = np.dot(z1.T, dz2)
        grads['b2'] = np.sum(dz2, axis=0)

        da1 = np.dot(dz2, W2.T)
        dz1 = sigmoid_grad(a1) * da1
        grads['W1'] = np.dot(x.T, dz1)
        grads['b1'] = np.sum(dz1, axis=0)

        return grads

    # Accuracy
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        if t.ndim != 1 : t = np.argmax(t, axis=1)
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy

if __name__ == '__main__':
    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
    network = ThreeLayerNet(input_size=784, hidden_size=50, mid_size=100, output_size=10)

    # Training and validation
    batch_size = 150
    iter_nums = 15000
    train_size = x_train.shape[0]
    learning_rate = 0.1
    train_loss_list = []
    train_acc_list = []
    test_acc_list = []

    # Average number of repetitions per epoch
    iter_per_epoch = max(train_size / batch_size, 1)

    for i in range(iter_nums):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        # Gradient by Error Backpropagation
        grad = network.gradient(x_batch, t_batch)

        # Update parameters - weight W and weight b
        for key in ['W1', 'b1', 'W2', 'b2', 'W3', 'b3']:
            network.params[key] -= learning_rate * grad[key]

        # Record the learning process
        loss = network.loss(x_batch, t_batch)
        print('训练次数:' + str(i) + '    loss:' + str(loss))
        train_loss_list.append(loss)

        # Calculate the recognition accuracy for each epoch
        if i % iter_per_epoch == 0:
            train_acc = network.accuracy(x_train, t_train)
            test_acc = network.accuracy(x_test, t_test)
            train_acc_list.append(train_acc)
            test_acc_list.append(test_acc)
            print(train_acc, test_acc)

    print(train_acc_list)
    print(test_acc_list)

    # Draw Accuracy Graphs
    markers = {'train': 'o', 'test': 's'}
    x = np.arange(len(train_acc_list))
    plt.plot(x, train_acc_list, label='train acc')
    plt.plot(x, test_acc_list, label='test acc', linestyle='--')
    plt.xlabel("epochs")
    plt.ylabel("accuracy")
    plt.ylim(0, 1.0)
    plt.legend(loc='lower right')
    plt.show()

