## <center>多层神经网络</center>

In [2]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os
gpu_no = '0'
os.environ["CUDA_VISIBLE_DEVICES"] = gpu_no
# 定义TensorFlow配置
config = tf.ConfigProto()
# 配置GPU内存分配方式，按需增长，很关键
config.gpu_options.allow_growth = True
# 配置可使用的显存比例
config.gpu_options.per_process_gpu_memory_fraction = 0.1
# 在创建session的时候把config作为参数传进去
sess = tf.Session(config = config)

### 1 使用隐藏层解决非线性问题

In [3]:
x = tf.placeholder(dtype=tf.float32,shape=[None,2])
y = tf.placeholder(dtype=tf.float32,shape=[None,1])

# 定义隐含层1
h1 = tf.Variable(dtype=tf.float32,initial_value=tf.truncated_normal(shape=[2,3]))
h2 = tf.Variable(dtype=tf.float32,initial_value=tf.truncated_normal(shape=[3,1]))
# 定义偏置
b1 = tf.Variable(dtype=tf.float32,initial_value=tf.zeros(shape=[3]))
b2 = tf.Variable(dtype=tf.float32,initial_value=tf.zeros(shape=[1]))

# 定义网络模型
layer_1 = tf.nn.relu(tf.matmul(x,h1)+b1)
pred = tf.nn.sigmoid(tf.matmul(layer_1,h2)+b2)
cost = tf.reduce_mean(tf.square(y-pred))
optimizer = tf.train.AdamOptimizer(learning_rate=0.01).minimize(cost)

In [5]:
X = [[0,0],[0,1],[1,0],[1,1]]
Y = [[0],[1],[1],[0]]
X = np.array(X).astype(np.float32)
Y = np.array(Y).astype(np.int32)

In [6]:
sess.run(tf.global_variables_initializer())
for i in range(10000):
    sess.run(optimizer,feed_dict={x:X,y:Y})
print(sess.run(pred,feed_dict={x:X}))

[[9.9104259e-04]
 [9.9940002e-01]
 [9.9880183e-01]
 [6.0765416e-04]]


### 2 使用全连接网络对图片分类

In [12]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("data/mnist/",one_hot = True)
tf.reset_default_graph()

x = tf.placeholder(dtype=tf.float32,shape=[None,784])
y = tf.placeholder(dtype=tf.float32,shape=[None,10])

h1 = tf.Variable(dtype=tf.float32,initial_value=tf.truncated_normal(shape=[784,256]))
b1 = tf.Variable(dtype=tf.float32,initial_value=tf.zeros(shape=[256]))

h2 = tf.Variable(dtype=tf.float32,initial_value=tf.truncated_normal(shape=[256,256]))
b2 = tf.Variable(dtype=tf.float32,initial_value=tf.zeros(shape=[256]))

h_out = tf.Variable(dtype=tf.float32,initial_value=tf.truncated_normal(shape=[256,10]))
b_out = tf.Variable(dtype=tf.float32,initial_value=tf.zeros(shape=[10]))

layer_1 = tf.nn.relu(tf.matmul(x,h1)+b1)
layer_2 = tf.nn.relu(tf.matmul(layer_1,h2)+b2)
layer_out = tf.matmul(layer_2,h_out)+b_out

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=layer_out,labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.01).minimize(cost)

train_epochs = 20
batch_size = 100
display_step = 1
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(train_epochs):
        avg_cost = 0.0
        total_batch = int(mnist.train.num_examples/batch_size)
        for j in range(total_batch):
            batch_x,batch_y = mnist.train.next_batch(batch_size)
            _,loss = sess.run([optimizer,cost],feed_dict={x:batch_x,y:batch_y})
            avg_cost = avg_cost+loss/total_batch
        if (i+1)%display_step==0:
            # 如果两者最大值索引相等,那么就会该位置就会变成True,否则变成False
            correct_prediction = tf.equal(tf.argmax(layer_out,axis=1),tf.argmax(y,axis=1))
            acc = tf.reduce_mean(tf.cast(correct_prediction,dtype=tf.float32))
            accuracy = sess.run(acc,feed_dict={x:mnist.test.images,y:mnist.test.labels})
            print('epochs:',i+1,'loss:',avg_cost,'acc:',accuracy)

Extracting data/mnist/train-images-idx3-ubyte.gz
Extracting data/mnist/train-labels-idx1-ubyte.gz
Extracting data/mnist/t10k-images-idx3-ubyte.gz
Extracting data/mnist/t10k-labels-idx1-ubyte.gz
epochs: 1 loss: 31.54041914766484 acc: 0.9218
epochs: 2 loss: 5.803514355055312 acc: 0.938
epochs: 3 loss: 3.1674637403753776 acc: 0.9478
epochs: 4 loss: 2.0671564910447584 acc: 0.9488
epochs: 5 loss: 1.6037047074092095 acc: 0.9563
epochs: 6 loss: 1.4405812888084677 acc: 0.9478
epochs: 7 loss: 1.2604456870241894 acc: 0.9578
epochs: 8 loss: 1.0620960363571688 acc: 0.955
epochs: 9 loss: 0.9612375956136133 acc: 0.9543
epochs: 10 loss: 0.9075811765061731 acc: 0.9583
epochs: 11 loss: 0.7633365943931096 acc: 0.9547
epochs: 12 loss: 0.5869159922126443 acc: 0.9598
epochs: 13 loss: 0.4966547162856433 acc: 0.9587
epochs: 14 loss: 0.46215647156189715 acc: 0.9616
epochs: 15 loss: 0.4761607729830023 acc: 0.9573
epochs: 16 loss: 0.35861462562042923 acc: 0.9583
epochs: 17 loss: 0.30204927054404107 acc: 0.9596


### 3 正则化

所谓的正则化,其实就是在神经网络计算损失值的过程中,在损失后面再加一项.随着模型复杂度增加,那么正则化损失也会增加,这样就能防止过拟合.
+ $L1$损失:所有学习参数 $w$ 的绝对值的和.
+ $L2$损失:所欲学习参数 $w$ 的平方和然后就平方根

```python
Tensorflow 中 L2 正则化函数为:
tf.nn.l2_loss(w,name=None)

Tensorflow 中没有现成的 L1 正则化函数,需要自己组合
tf.reduce_sum(tf.abs(w))
```

In [13]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("data/mnist/",one_hot = True)
tf.reset_default_graph()

x = tf.placeholder(dtype=tf.float32,shape=[None,784])
y = tf.placeholder(dtype=tf.float32,shape=[None,10])

h1 = tf.Variable(dtype=tf.float32,initial_value=tf.truncated_normal(shape=[784,256]))
b1 = tf.Variable(dtype=tf.float32,initial_value=tf.zeros(shape=[256]))

h2 = tf.Variable(dtype=tf.float32,initial_value=tf.truncated_normal(shape=[256,256]))
b2 = tf.Variable(dtype=tf.float32,initial_value=tf.zeros(shape=[256]))

h_out = tf.Variable(dtype=tf.float32,initial_value=tf.truncated_normal(shape=[256,10]))
b_out = tf.Variable(dtype=tf.float32,initial_value=tf.zeros(shape=[10]))

layer_1 = tf.nn.relu(tf.matmul(x,h1)+b1)
layer_2 = tf.nn.relu(tf.matmul(layer_1,h2)+b2)
layer_out = tf.matmul(layer_2,h_out)+b_out

# 在这个数据集中,过拟合现象不严重,所以加入之后对效果影响不大
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=layer_out,labels=y))
cost = cost + tf.nn.l2_loss(h1)*0.001+tf.nn.l2_loss(h2)*0.001+tf.nn.l2_loss(h_out)*0.001
optimizer = tf.train.AdamOptimizer(learning_rate=0.01).minimize(cost)

train_epochs = 20
batch_size = 100
display_step = 1
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(train_epochs):
        avg_cost = 0.0
        total_batch = int(mnist.train.num_examples/batch_size)
        for j in range(total_batch):
            batch_x,batch_y = mnist.train.next_batch(batch_size)
            _,loss = sess.run([optimizer,cost],feed_dict={x:batch_x,y:batch_y})
            avg_cost = avg_cost+loss/total_batch
        if (i+1)%display_step==0:
            # 如果两者最大值索引相等,那么就会该位置就会变成True,否则变成False
            correct_prediction = tf.equal(tf.argmax(layer_out,axis=1),tf.argmax(y,axis=1))
            acc = tf.reduce_mean(tf.cast(correct_prediction,dtype=tf.float32))
            accuracy = sess.run(acc,feed_dict={x:mnist.test.images,y:mnist.test.labels})
            print('epochs:',i+1,'loss:',avg_cost,'acc:',accuracy)

Extracting data/mnist/train-images-idx3-ubyte.gz
Extracting data/mnist/train-labels-idx1-ubyte.gz
Extracting data/mnist/t10k-images-idx3-ubyte.gz
Extracting data/mnist/t10k-labels-idx1-ubyte.gz
epochs: 1 loss: 107.72825730757276 acc: 0.926
epochs: 2 loss: 60.60393948641685 acc: 0.9409
epochs: 3 loss: 44.68430514248934 acc: 0.9481
epochs: 4 loss: 32.92696786360306 acc: 0.946
epochs: 5 loss: 23.92089685613461 acc: 0.9475
epochs: 6 loss: 16.8419449892911 acc: 0.9514
epochs: 7 loss: 11.463425773273814 acc: 0.952
epochs: 8 loss: 7.399854530854652 acc: 0.9484
epochs: 9 loss: 4.456128301620484 acc: 0.9447
epochs: 10 loss: 2.5142341375350945 acc: 0.9491
epochs: 11 loss: 1.3514582600376828 acc: 0.9578
epochs: 12 loss: 0.7686492352052176 acc: 0.9622
epochs: 13 loss: 0.492222175056284 acc: 0.9578
epochs: 14 loss: 0.38345455386421945 acc: 0.9491
epochs: 15 loss: 0.34847966836257427 acc: 0.9573
epochs: 16 loss: 0.33561972964893716 acc: 0.9533
epochs: 17 loss: 0.33416108854792376 acc: 0.9515
epochs:

### 4 dropout