In [1]:
# -*- coding:utf-8 -*-
"""
@author:          Bin.Chen
@date:             2019/07/08 
@software:       Jupyter Notebook
@Environment : python3.6
@Description:    CNN框架，力求网络结构完全可控
"""

'\n@author:          Bin.Chen\n@date:             2019/07/08 \n@software:       Jupyter Notebook\n@Environment : python3.6\n@Description:    CNN框架，力求网络结构完全可控\n'

In [2]:
import numpy as np
import tensorflow as tf

In [3]:
def full_connected(input_tensor,hidden_layer,regularizer=None,dropout=None,reuse=False):
    """
    全连接层的计算
    :input_tensor:           输入数据，矩阵，形如[[1,2],[3,4]]表示矩阵第一行为[1,2]，第二行为[3,4]
    :param hidden_layer: 隐藏层结构，列表，例：[3,4]表示第一个隐藏层有3个节点，第二个隐藏层有4个节点，以此类推，可自行增减，
                                 所谓隐藏层，不包含输入层，但包含输出层，调整参数时注意最后一层节点个数
    :return:                    经计算后的输出
    """
    layer = input_tensor
    for i in range(1,len(hidden_layer)+1):
        with tf.variable_scope('layer{}'.format(i),reuse=reuse):
            weights = tf.get_variable(
                'weights',shape=[layer.shape[1],hidden_layer[i-1]],initializer=tf.truncated_normal_initializer(mean=0,stddev=0.1,seed=1)
            )
            if regularizer != None:
                tf.add_to_collection('losses',regularizer(weights))
            biases = tf.get_variable(
                'bias',shape=[hidden_layer[i-1]],initializer=tf.constant_initializer(0.1)
            )
            layer = tf.nn.tanh(
                tf.matmul(layer,weights)+biases
            )
            if dropout != None:
                layer = tf.nn.dropout(layer,dropout[0],noise_shape=dropout[1])
    return layer


In [4]:
def conv(input_tensor,structure,reuse=False):
    """
    卷积和池化层的计算
    :input_tensor:           输入数据
    :param structure:      形如
                                                        structure = {
                                                    1:{
                                                        'conv':{'filter':[5,5,3,16],'stride':[1,1,1,1],'padding':'SAME'},
                                                        'pool':{'filter':[1,3,3,1],'stride':[1,2,2,1],'padding':'SAME'}
                                                    }
                                                }
                                    直接将其复制粘贴，1表示第一层卷积层结构，如需要多层卷积，加2，3。。。照搬格式即可，
                                    conv对应的filter为卷积过滤器尺寸、当前层深度、过滤器深度，当前层深度初值为图形数据的深度，
                                    如果有多层卷积，注意structure中当前层深度等于前一层的过滤器深度,
                                    stride首尾为1，不可更改，中间俩表示长宽维度上的步长，
                                    padding表示是否使用全零填充，SAME或者VALID
                                    pool对应的filter为池化过滤器尺寸，首尾必须为1，stride意义同上，
                                    如果没有池化层，删去'pool'对应的字典即可
    :return:                    经计算后的输出
    """
    clayer = input_tensor
    for i in range(1,len(structure)+1):
        with tf.variable_scope('clayer{}'.format(i),reuse=reuse):
            filter_weight = tf.get_variable(
                'weights',structure[i]['conv']['filter'],initializer=tf.truncated_normal_initializer(mean=0,stddev=0.1,seed=1)
            )
            biases = tf.get_variable(
                'biases',[structure[i]['conv']['filter'][3]],initializer=tf.constant_initializer(0.1)
            )
            conv = tf.nn.conv2d(
                clayer,filter_weight,structure[i]['conv']['stride'],padding=structure[i]['conv']['padding']
            )
            bias = tf.nn.bias_add(conv,biases)
            activated_conv = tf.nn.tanh(bias)
            if 'pool' in structure[i]:
                pool = tf.nn.max_pool(
                    activated_conv,ksize=structure[i]['pool']['filter'],strides=structure[i]['pool']['stride'],
                    padding=structure[i]['conv']['padding']
                )
                clayer = pool
            else:
                clayer = activated_conv
    return clayer

In [5]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('./mnist/data')
data_size = len(mnist.train.images)
test_data_size = len(mnist.test.images)
image_size = 28
channel = 1
X_train = np.reshape(mnist.train.images,(data_size,image_size,image_size,channel))
Y_train_num = mnist.train.labels
Y_train = []
for i in Y_train_num:
    temp = [0,0,0,0,0,0,0,0,0,0]
    temp[i] = 1
    Y_train.append(temp)  
X_test = np.reshape(mnist.test.images,(test_data_size,image_size,image_size,channel))
Y_test = mnist.test.labels

tol = 0.0001
# 终止条件
batch_size = int(0.9*data_size)
# 随机梯度下降的一个batch大小，设为1*data_size即为不使用随机梯度下降
STEPS = 200
# 迭代轮数上限
learning_rate_base = 0.7
# 初始学习率
learning_rate_decay = 0.9
# 学习率衰退速度，设为1即为不适用指数衰减法
stair_num = 20
# 梯形衰退参数，每过stair_num轮迭代，指数衰减一次
dropout = [0.2,None]
# dropout参数设置，第一个参数表示权重变为0的概率，第二个参数可以使得矩阵的一部分全为0，是一个列表，不需要此功能则为None
# 例如：[0.5,None]，也可以是[0.5,[3,1]]
# 如果不用dropout功能，dropout = None即可
optimizer = 'Adam'
# 优化方法选择，可选：Adam, GradientDescent,Momentum
hidden_layer = [84,10]
# 全连接层的隐藏层结构，参数说明见上文
conv_structure = {
    1:{
        'conv':{'filter':[5,5,1,6],'stride':[1,1,1,1],'padding':'SAME'},
        'pool':{'filter':[1,2,2,1],'stride':[1,2,2,1],'padding':'SAME'}
    },
    2:{
        'conv':{'filter':[5,5,6,16],'stride':[1,1,1,1],'padding':'SAME'},
        'pool':{'filter':[1,2,2,1],'stride':[1,2,2,1],'padding':'SAME'}
    }
}
# 卷积层结构，参数说明见上文
regularizer = tf.contrib.layers.l2_regularizer(0.001)
# 正则化参数，如需使用L1正则化，将函数名中的2改为1即可

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ./mnist/data\train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ./mnist/data\train-labels-idx1-ubyte.gz
Extracting ./mnist/data\t10k-images-idx3-ubyte.gz
Extracting ./mnist/data\t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [6]:
x = tf.placeholder(tf.float32,[None,image_size,image_size,channel],name='x_input')
y_ = tf.placeholder(tf.float32,[None,10],name='y_input')
clayer = conv(x,structure=conv_structure)
pool_shape = clayer.get_shape().as_list()
nodes = pool_shape[1]*pool_shape[2]*pool_shape[3]
reshaped = tf.reshape(clayer,[-1,nodes])
# 因为要使用随机梯度下降，一个batch内数据量不固定，参数-1表示由程序确定第一个维度大小，
# 原本用None也行，但是None会报错，应当是新版本有所改变
y = full_connected(reshaped,hidden_layer,regularizer,dropout)
cost = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y_,logits=y)
)
tf.add_to_collection('losses',cost)
loss = tf.add_n(tf.get_collection('losses'))
global_step = tf.Variable(0,trainable=False)
learning_rate = tf.train.exponential_decay(learning_rate_base,global_step,10,learning_rate_decay,staircase=True)
# staircase参数为True表示学习率梯形下降，每过一定轮数迭代乘以learning_rate_decay
if optimizer == 'Adam':
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss,global_step=global_step)
elif optimizer == 'GradientDescent':
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,global_step=global_step)
elif optimizer == 'Momentum':
    train_step = tf.train.MomentumOptimizer(learning_rate).minimize(loss,global_step=global_step)
total_cross_entropy = []
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(STEPS):
        start = (i*batch_size)%data_size
        end = min(start+batch_size, data_size)
        sess.run(train_step,feed_dict={x:X_train[start:end], y_:Y_train[start:end]})
        total_cross_entropy.append(sess.run(loss,feed_dict={x:X_train, y_:Y_train}))
        if i%10 ==0:
            print(i, total_cross_entropy[i])
        if i > 0:
            if abs(total_cross_entropy[i]-total_cross_entropy[i-1]) <= tol:
                break
    y_prob = sess.run(tf.nn.softmax(y),feed_dict={x:X_test})
    y_label = sess.run(tf.argmax(y_prob,1))
    hit = 0
    for i in range(test_data_size):
        if y_label[i]==Y_test[i]:
            hit += 1
    accuracy = hit/test_data_size
    print(accuracy)
    

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

0 20.736486
10 15.604498
20 10.456354
30 11.808698
40 7.96723
50 6.1782546
60 6.668642
70 7.732251
80 5.8408594
90 4.603943
100 3.9713244
110 3.2840233
120 3.2707956
130 3.1290329
140 3.1094031
150 3.0778682
160 3.073609
170 3.0750597
180 3.060993
190 3.0616088
0.1014


In [20]:
i = 9
print(y_label[i],'\n',y_prob[i],'\n',Y_test[i])

0 
 [0.1247898  0.00084083 0.1247898  0.00084083 0.1247898  0.1247898
 0.1247898  0.1247898  0.1247898  0.1247898 ] 
 9
