# Intermediate Neural Network in TensorFlow

Build a intermediate neural network to classify handwritten digits

#### Load dependencies

In [14]:
import tensorflow
from keras.datasets import mnist
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
from matplotlib import pyplot as plt

#### Load data

In [15]:
(X_train, y_train), (X_valid, y_valid) = mnist.load_data() # X - inputs, y - outputs

#### Preprocess data

In [16]:
X_train = X_train.reshape(60000, 784).astype('float32')
X_valid = X_valid.reshape(10000, 784).astype('float32')

In [17]:
X_train /= 255
X_valid /= 255

In [18]:
n_classes = 10
y_train = to_categorical(y_train, n_classes)
y_valid = to_categorical(y_valid, n_classes)

#### Design NN arch

In [19]:
model = Sequential()

# hidden layer:
# 定义第一个隐藏层时，也需要通过input_shape参数定义输入层
model.add(Dense(64, activation='relu', input_shape=(784,)))
model.add(Dense(64, activation='relu'))

# output layer:
model.add(Dense(10, activation='softmax'))

In [20]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 64)                50240     
                                                                 
 dense_4 (Dense)             (None, 64)                4160      
                                                                 
 dense_5 (Dense)             (None, 10)                650       
                                                                 
Total params: 55,050
Trainable params: 55,050
Non-trainable params: 0
_________________________________________________________________


dense_1包含的参数总数

In [21]:
64*64+64

4160

#### Compile model

In [22]:
# 在模型编译步骤中把优化器设置为SGD，以使用随机梯度下降（stochastic gradient descent)
model.compile(loss='categorical_crossentropy', optimizer=SGD(learning_rate=0.1), metrics=['accuracy']) # SGD: stochastic gradient descent,随机梯度下降法

#### Train!

In [23]:
# 批次大小(batch siye)、网络训练总次数(epochs)都是超参数(hyper parameter)
model.fit(X_train, y_train, batch_size=128, epochs=20, verbose=1, validation_data=(X_valid, y_valid))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x263e60ff4c0>

#### Evaluating model performance

In [24]:
model.evaluate(X_valid, y_valid)



[0.08337689936161041, 0.9749000072479248]

In [25]:
valid0 = X_valid[0].reshape(1, 784)

In [28]:
predict_0 = model.predict(valid0)



In [29]:
predict_0

array([[2.1223204e-07, 1.5965300e-09, 4.7581043e-06, 7.4996511e-05,
        2.4737741e-08, 3.3083506e-08, 3.2706406e-13, 9.9989772e-01,
        1.0645803e-06, 2.1190363e-05]], dtype=float32)

In [30]:
import numpy as np
np.argmax(predict_0,axis=1)

array([7], dtype=int64)