## <center>多层神经网络</center>

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os
gpu_no = '0'
os.environ["CUDA_VISIBLE_DEVICES"] = gpu_no
# 定义TensorFlow配置
config = tf.ConfigProto()
# 配置GPU内存分配方式，按需增长，很关键
config.gpu_options.allow_growth = True
# 配置可使用的显存比例
config.gpu_options.per_process_gpu_memory_fraction = 0.1
# 在创建session的时候把config作为参数传进去
sess = tf.Session(config = config)

### 1 使用隐藏层解决非线性问题

In [2]:
x = tf.placeholder(dtype=tf.float32,shape=[None,2])
y = tf.placeholder(dtype=tf.float32,shape=[None,1])

# 定义隐含层1
h1 = tf.Variable(dtype=tf.float32,initial_value=tf.truncated_normal(shape=[2,3]))
h2 = tf.Variable(dtype=tf.float32,initial_value=tf.truncated_normal(shape=[3,1]))
# 定义偏置
b1 = tf.Variable(dtype=tf.float32,initial_value=tf.zeros(shape=[3]))
b2 = tf.Variable(dtype=tf.float32,initial_value=tf.zeros(shape=[1]))

# 定义网络模型
layer_1 = tf.nn.relu(tf.matmul(x,h1)+b1)
pred = tf.nn.sigmoid(tf.matmul(layer_1,h2)+b2)
cost = tf.reduce_mean(tf.square(y-pred))
optimizer = tf.train.AdamOptimizer(learning_rate=0.01).minimize(cost)

In [3]:
X = [[0,0],[0,1],[1,0],[1,1]]
Y = [[0],[1],[1],[0]]
X = np.array(X).astype(np.float32)
Y = np.array(Y).astype(np.int32)

In [4]:
sess.run(tf.global_variables_initializer())
for i in range(10000):
    sess.run(optimizer,feed_dict={x:X,y:Y})
print(sess.run(pred,feed_dict={x:X}))

[[0.00143438]
 [0.99931204]
 [0.9995128 ]
 [0.00143438]]


### 2 使用全连接网络对图片分类

In [5]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("data/mnist/",one_hot = True)
tf.reset_default_graph()

x = tf.placeholder(dtype=tf.float32,shape=[None,784])
y = tf.placeholder(dtype=tf.float32,shape=[None,10])

h1 = tf.Variable(dtype=tf.float32,initial_value=tf.truncated_normal(shape=[784,256]))
b1 = tf.Variable(dtype=tf.float32,initial_value=tf.zeros(shape=[256]))

h2 = tf.Variable(dtype=tf.float32,initial_value=tf.truncated_normal(shape=[256,256]))
b2 = tf.Variable(dtype=tf.float32,initial_value=tf.zeros(shape=[256]))

h_out = tf.Variable(dtype=tf.float32,initial_value=tf.truncated_normal(shape=[256,10]))
b_out = tf.Variable(dtype=tf.float32,initial_value=tf.zeros(shape=[10]))

layer_1 = tf.nn.relu(tf.matmul(x,h1)+b1)
layer_2 = tf.nn.relu(tf.matmul(layer_1,h2)+b2)
layer_out = tf.matmul(layer_2,h_out)+b_out

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=layer_out,labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.01).minimize(cost)

train_epochs = 20
batch_size = 100
display_step = 1
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(train_epochs):
        avg_cost = 0.0
        total_batch = int(mnist.train.num_examples/batch_size)
        for j in range(total_batch):
            batch_x,batch_y = mnist.train.next_batch(batch_size)
            _,loss = sess.run([optimizer,cost],feed_dict={x:batch_x,y:batch_y})
            avg_cost = avg_cost+loss/total_batch
        if (i+1)%display_step==0:
            # 如果两者最大值索引相等,那么就会该位置就会变成True,否则变成False
            correct_prediction = tf.equal(tf.argmax(layer_out,axis=1),tf.argmax(y,axis=1))
            acc = tf.reduce_mean(tf.cast(correct_prediction,dtype=tf.float32))
            accuracy = sess.run(acc,feed_dict={x:mnist.test.images,y:mnist.test.labels})
            print('epochs:',i+1,'loss:',avg_cost,'acc:',accuracy)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting data/mnist/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting data/mnist/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting data/mnist/t10k-images-idx3-ubyte.gz
Extracting data/mnist/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
epochs: 1 loss: 28.88468648585407 acc: 0.9295
epochs: 2 loss: 5.447905037635568 acc: 0.9423
epochs: 3 loss: 2.8284919566736506 acc: 0.9476
epochs: 4 loss: 1.8984966477905423 acc: 0.9447
epochs: 5 loss: 1.362716485215845 acc: 0.9533
epochs: 6 loss: 1.2237135898893499 acc: 0.9588
epochs: 7 loss: 1.0794

### 3 正则化

所谓的正则化,其实就是在神经网络计算损失值的过程中,在损失后面再加一项.随着模型复杂度增加,那么正则化损失也会增加,这样就能防止过拟合.
+ $L1$损失:所有学习参数 $w$ 的绝对值的和.
+ $L2$损失:所欲学习参数 $w$ 的平方和然后就平方根

```python
Tensorflow 中 L2 正则化函数为:
tf.nn.l2_loss(w,name=None)

Tensorflow 中没有现成的 L1 正则化函数,需要自己组合
tf.reduce_sum(tf.abs(w))
```

In [6]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("data/mnist/",one_hot = True)
tf.reset_default_graph()shuchu

x = tf.placeholder(dtype=tf.float32,shape=[None,784])
y = tf.placeholder(dtype=tf.float32,shape=[None,10])

h1 = tf.Variable(dtype=tf.float32,initial_value=tf.truncated_normal(shape=[784,256]))
b1 = tf.Variable(dtype=tf.float32,initial_value=tf.zeros(shape=[256]))

h2 = tf.Variable(dtype=tf.float32,initial_value=tf.truncated_normal(shape=[256,256]))
b2 = tf.Variable(dtype=tf.float32,initial_value=tf.zeros(shape=[256]))

h_out = tf.Variable(dtype=tf.float32,initial_value=tf.truncated_normal(shape=[256,10]))
b_out = tf.Variable(dtype=tf.float32,initial_value=tf.zeros(shape=[10]))

layer_1 = tf.nn.relu(tf.matmul(x,h1)+b1)
layer_2 = tf.nn.relu(tf.matmul(layer_1,h2)+b2)
layer_out = tf.matmul(layer_2,h_out)+b_out

# 在这个数据集中,过拟合现象不严重,所以加入之后对效果影响不大
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=layer_out,labels=y))
cost = cost + tf.nn.l2_loss(h1)*0.001+tf.nn.l2_loss(h2)*0.001+tf.nn.l2_loss(h_out)*0.001
optimizer = tf.train.AdamOptimizer(learning_rate=0.01).minimize(cost)

train_epochs = 20
batch_size = 100
display_step = 1
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(train_epochs):
        avg_cost = 0.0
        total_batch = int(mnist.train.num_examples/batch_size)
        for j in range(total_batch):
            batch_x,batch_y = mnist.train.next_batch(batch_size)
            _,loss = sess.run([optimizer,cost],feed_dict={x:batch_x,y:batch_y})
            avg_cost = avg_cost+loss/total_batch
        if (i+1)%display_step==0:
            # 如果两者最大值索引相等,那么就会该位置就会变成True,否则变成False
            correct_prediction = tf.equal(tf.argmax(layer_out,axis=1),tf.argmax(y,axis=1))
            acc = tf.reduce_mean(tf.cast(correct_prediction,dtype=tf.float32))
            accuracy = sess.run(acc,feed_dict={x:mnist.test.images,y:mnist.test.labels})
            print('epochs:',i+1,'loss:',avg_cost,'acc:',accuracy)

Extracting data/mnist/train-images-idx3-ubyte.gz
Extracting data/mnist/train-labels-idx1-ubyte.gz
Extracting data/mnist/t10k-images-idx3-ubyte.gz
Extracting data/mnist/t10k-labels-idx1-ubyte.gz
epochs: 1 loss: 103.19655143737795 acc: 0.9278
epochs: 2 loss: 58.23222935763277 acc: 0.9389
epochs: 3 loss: 41.8394661088423 acc: 0.9445
epochs: 4 loss: 30.256434114629588 acc: 0.9408
epochs: 5 loss: 21.46111306277189 acc: 0.9509
epochs: 6 loss: 14.805812995217059 acc: 0.95
epochs: 7 loss: 9.72043402411721 acc: 0.9527
epochs: 8 loss: 6.158308019638061 acc: 0.952
epochs: 9 loss: 3.612032660137522 acc: 0.9595
epochs: 10 loss: 1.9632780209454623 acc: 0.9615
epochs: 11 loss: 1.0657167414101694 acc: 0.9593
epochs: 12 loss: 0.6343103901364584 acc: 0.962
epochs: 13 loss: 0.4481143177639356 acc: 0.9525
epochs: 14 loss: 0.36888129472732567 acc: 0.9554
epochs: 15 loss: 0.34787350337613737 acc: 0.9539
epochs: 16 loss: 0.33314223213629285 acc: 0.9531
epochs: 17 loss: 0.3303274585171174 acc: 0.9475
epochs: 

### 4 dropout

dropout在每次训练过程中,随机选择一部分节点不去学习.过拟合的问题恰恰是把异常数据当成归来来学习了,所以dropout会忽略一些节点,对于量比较大的正样本忽略了,不碍事,还有很多.对于异常样本,忽略了就有利于模型的效果.

dropout 改变了神经网络的结构,它仅仅是属于训练时的方法,所以一般在进行测试时要将 dropout 的 keep_prob 变为 1 ,代表不需要丢弃,否则影响模型的正常输出

In [8]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("data/mnist/",one_hot = True)
tf.reset_default_graph()

x = tf.placeholder(dtype=tf.float32,shape=[None,784])
y = tf.placeholder(dtype=tf.float32,shape=[None,10])
keep_prob = tf.placeholder(dtype=tf.float32)

h1 = tf.Variable(dtype=tf.float32,initial_value=tf.truncated_normal(shape=[784,256]))
b1 = tf.Variable(dtype=tf.float32,initial_value=tf.zeros(shape=[256]))

h2 = tf.Variable(dtype=tf.float32,initial_value=tf.truncated_normal(shape=[256,256]))
b2 = tf.Variable(dtype=tf.float32,initial_value=tf.zeros(shape=[256]))

h_out = tf.Variable(dtype=tf.float32,initial_value=tf.truncated_normal(shape=[256,10]))
b_out = tf.Variable(dtype=tf.float32,initial_value=tf.zeros(shape=[10]))

layer_1 = tf.nn.relu(tf.matmul(x,h1)+b1)
layer_1_dropout = tf.nn.dropout(x=layer_1,keep_prob=keep_prob)

layer_2 = tf.nn.relu(tf.matmul(layer_1_dropout,h2)+b2)
layer_2_dropout = tf.nn.dropout(x=layer_2,keep_prob=keep_prob)

layer_out = tf.matmul(layer_2_dropout,h_out)+b_out

# 在这个数据集中,过拟合现象不严重,所以加入之后对效果影响不大
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=layer_out,labels=y))
cost = cost + tf.nn.l2_loss(h1)*0.001+tf.nn.l2_loss(h2)*0.001+tf.nn.l2_loss(h_out)*0.001
optimizer = tf.train.AdamOptimizer(learning_rate=0.01).minimize(cost)

train_epochs = 20
batch_size = 100
display_step = 1
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(train_epochs):
        avg_cost = 0.0
        total_batch = int(mnist.train.num_examples/batch_size)
        for j in range(total_batch):
            batch_x,batch_y = mnist.train.next_batch(batch_size)
            # 训练时可以使用 dropout
            _,loss = sess.run([optimizer,cost],feed_dict={x:batch_x,y:batch_y,keep_prob:0.8})
            avg_cost = avg_cost+loss/total_batch
        if (i+1)%display_step==0:
            # 如果两者最大值索引相等,那么就会该位置就会变成True,否则变成False
            correct_prediction = tf.equal(tf.argmax(layer_out,axis=1),tf.argmax(y,axis=1))
            acc = tf.reduce_mean(tf.cast(correct_prediction,dtype=tf.float32))
            # 测试时不能使用dropout
            accuracy = sess.run(acc,feed_dict={x:mnist.test.images,y:mnist.test.labels,keep_prob:1.})
            print('epochs:',i+1,'loss:',avg_cost,'acc:',accuracy)

Extracting data/mnist/train-images-idx3-ubyte.gz
Extracting data/mnist/train-labels-idx1-ubyte.gz
Extracting data/mnist/t10k-images-idx3-ubyte.gz
Extracting data/mnist/t10k-labels-idx1-ubyte.gz
epochs: 1 loss: 126.09144298206687 acc: 0.9237
epochs: 2 loss: 63.4208103942871 acc: 0.926
epochs: 3 loss: 45.32155839399856 acc: 0.9109
epochs: 4 loss: 31.962404528531174 acc: 0.9107
epochs: 5 loss: 22.071054656288837 acc: 0.9328
epochs: 6 loss: 14.551033335599032 acc: 0.9456
epochs: 7 loss: 9.062510582317014 acc: 0.9534
epochs: 8 loss: 5.297731639255179 acc: 0.9562
epochs: 9 loss: 2.9327388191223145 acc: 0.9599
epochs: 10 loss: 1.5967770346728243 acc: 0.9606
epochs: 11 loss: 0.9326776024428278 acc: 0.952
epochs: 12 loss: 0.6448689064654438 acc: 0.9553
epochs: 13 loss: 0.5292343805053019 acc: 0.956
epochs: 14 loss: 0.507958949804306 acc: 0.9455
epochs: 15 loss: 0.5028879054026167 acc: 0.9542
epochs: 16 loss: 0.5305594129995872 acc: 0.9501
epochs: 17 loss: 0.533424191637473 acc: 0.9452
epochs: 1

### 5 全连接网络的深浅关系

全连接网络,只要有足够多的神经元,即使只有一层隐含层的神经网络.利用常用的 sigmoid 和 relu 激活函数,就可以无限逼近任何连续函数.

在实际中,如果需要使用浅层神经网络来拟合复杂的非线性函数,就需要依靠增加神经元的个数来实现. 神经元过多意味着需要训练的参数越多,训练速度比较慢,泛化能力差. <font color='red'> 一般倾向于使用更深的模型,来减少网络中所需神经元的数量,提升网络的泛化性能</font>