In [2]:
import cv2
import sys
import os
import time
import random
import numpy as np
import tensorflow as tf

In [3]:
SIZE = 256
WIDTH = 16
HEIGHT = 16
NUM_CLASSES = 10
iterations = 100


SAVER_DIR = "train_saver/digits/"
TRAIN_DIR = ""
LETTERS_DIGITS = ("0","1","2","3","4","5","6","7","8","9")
license_num = ""
 
time_begin = time.time()

input_count = 0
for i in range(0,NUM_CLASSES):
    dir = 'train_images/train/%s/' % i           # 这里可以改成你自己的图片目录，i为分类标签
    for rt, dirs, files in os.walk(dir):
        for filename in files:
            input_count += 1

# 定义对应维数和各维长度的数组
input_images = np.array([[0]*SIZE for i in range(input_count)])
input_labels = np.array([[0]*NUM_CLASSES for i in range(input_count)])
            
index = 0
for i in range(0,NUM_CLASSES):
    dir = './train_images/train/%s/' % i          # 这里可以改成你自己的图片目录，i为分类标签
    for rt, dirs, files in os.walk(dir):
        for filename in files:
            filename = dir + filename
            img = cv2.imread(filename,cv2.IMREAD_GRAYSCALE)
            binary = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY, 25, 10)
            # 转维度由(16,16) -> (1,256)
            sp = binary.shape
            width = sp[0]
            height = sp[1]
            for w in range(0,width):
                for h in range(0,height):
                     # 通过这样的处理，使数字的线条变细，有利于提高识别准确率
                    if img[w][h] > 250:
                        input_images[index][w+h*width] = 0
                    else:
                        input_images[index][w+h*width] = 1
            input_labels[index][i] = 1
            index += 1
time_elapsed = time.time() - time_begin
print("读取图片文件耗费时间：%d秒" % time_elapsed)


读取图片文件耗费时间：13秒


In [4]:
 # 定义卷积函数
def conv_layer(inputs, W, b, conv_strides, kernel_size, pool_strides, padding):
    L1_conv = tf.nn.conv2d(inputs, W, strides=conv_strides, padding=padding)
    L1_relu = tf.nn.relu(L1_conv + b)
    return tf.nn.max_pool(L1_relu, ksize=kernel_size, strides=pool_strides, padding='SAME')
 
# 定义全连接层函数
def full_connect(inputs, W, b):
    return tf.nn.relu(tf.matmul(inputs, W) + b)

#parameter summary
def variable_summaries(var):
    with tf.name_scope('summaries'):
        mean = tf.reduce_mean(var)
        tf.summary.scalar('mean',mean) 
        with tf.name_scope('stddev'):
            stddev = tf.sqrt(tf.reduce_mean(tf.square(var-mean)))
        tf.summary.scalar('stddev',stddev)
        tf.summary.scalar('max',tf.reduce_max(var))
        tf.summary.scalar('min',tf.reduce_min(var))
        tf.summary.histogram('histogram',var)
# 定义输入节点，对应于图片像素值矩阵集合和图片标签(即所代表的数字)
with tf.name_scope('input'):
    with tf.name_scope('input_x'):
        x = tf.placeholder(tf.float32, shape=[None, SIZE])
    with tf.name_scope('input_y'):
        y = tf.placeholder(tf.float32, shape=[None, NUM_CLASSES])

x_image = tf.reshape(x, [-1, WIDTH, HEIGHT, 1])
        
with tf.Session() as sess:
    
    # 第一个卷积层
    with tf.name_scope('conv_layer1'):
        with tf.name_scope('W_conv1'):
            W_conv1 = tf.Variable(tf.truncated_normal([8, 8, 1, 16], stddev=0.1), name="W_conv1")
            variable_summaries(W_conv1)
        with tf.name_scope('b_conv1'):
            b_conv1 = tf.Variable(tf.constant(0.1, shape=[16]), name="b_conv1")
            variable_summaries(b_conv1)
        with tf.name_scope('L1_pool'):
            conv_strides = [1, 1, 1, 1]
            kernel_size = [1, 2, 2, 1]
            pool_strides = [1, 2, 2, 1]
            L1_pool = conv_layer(x_image, W_conv1, b_conv1, conv_strides, kernel_size, pool_strides, padding='SAME')

    # 第二个卷积层
    with tf.name_scope('conv_layer2'):
        with tf.name_scope('W_conv2'):
            W_conv2 = tf.Variable(tf.truncated_normal([5, 5, 16, 32], stddev=0.1), name="W_conv2")
            variable_summaries(W_conv2)
        with tf.name_scope('b_conv2'):
            b_conv2 = tf.Variable(tf.constant(0.1, shape=[32]), name="b_conv2")
            variable_summaries(b_conv2)
        with tf.name_scope('L2_pool'):
            conv_strides = [1, 1, 1, 1]
            kernel_size = [1, 1, 1, 1]
            pool_strides = [1, 1, 1, 1]
            L2_pool = conv_layer(L1_pool, W_conv2, b_conv2, conv_strides, kernel_size, pool_strides, padding='SAME')


    # 全连接层
    with tf.name_scope('fc_layer1'):
        with tf.name_scope('W_fc1'):
            W_fc1 = tf.Variable(tf.truncated_normal([8*8*32, 512], stddev=0.1), name="W_fc1")
            variable_summaries(W_fc1)
        with tf.name_scope('b_fc1'):
            b_fc1 = tf.Variable(tf.constant(0.1, shape=[512]), name="b_fc1")
            variable_summaries(b_fc1)
        with tf.name_scope('h_fc1'):
            h_pool2_flat = tf.reshape(L2_pool, [-1, 8 * 8*32])
            h_fc1 = full_connect(h_pool2_flat, W_fc1, b_fc1)
    
    


    # dropout
    keep_prob = tf.placeholder(tf.float32)

    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

    print(W_conv1.shape)
    print(b_conv1.shape)
    print(W_conv2.shape)
    print(b_conv2.shape)
    print(W_fc1.shape)
    print(b_fc1.shape)
    
    # readout层
    with tf.name_scope('fc_layer2'):
        with tf.name_scope('W_fc2'):
            W_fc2 = tf.Variable(tf.truncated_normal([512, NUM_CLASSES], stddev=0.1), name="W_fc2")
            variable_summaries(W_fc2)
        with tf.name_scope('b_fc2'):
            b_fc2 = tf.Variable(tf.constant(0.1, shape=[NUM_CLASSES]), name="b_fc2")
            variable_summaries(b_fc2)


    # 定义优化器和训练op
    with tf.name_scope('optimizer'):
        with tf.name_scope('y_conv'):
            y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
        with tf.name_scope('cross_entropy'):
            cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=y_conv))
            tf.summary.scalar('cross_entropy',cross_entropy)
        with tf.name_scope('train_step'):
            train_step = tf.train.AdamOptimizer((1e-4)).minimize(cross_entropy)

    #求准确率
    with tf.name_scope('correct'):
        with tf.name_scope('correct_prediction'):
            correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y, 1))
        with tf.name_scope('accuracy'):
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
            tf.summary.scalar('accuracy',accuracy)
    
    sess.run(tf.global_variables_initializer())


    time_begin = time.time()

    print ("一共读取了 %s 个训练图像， %s 个标签" % (input_count, input_count))

    # 设置每次训练op的输入个数和迭代次数，这里为了支持任意图片总数，定义了一个余数remainder，譬如，如果每次训练op的输入个数为60，图片总数为150张，则前面两次各输入60张，最后一次输入30张（余数30）
    batch_size = 60
    iterations = iterations
    batches_count = int(input_count / batch_size)
    remainder = input_count % batch_size
    print ("训练数据集分成 %s 批, 前面每批 %s 个数据，最后一批 %s 个数据" % (batches_count+1, batch_size, remainder))
    
    writer = tf.summary.FileWriter('logs/',sess.graph)
    
    #合并所有的summary
    merged = tf.summary.merge_all()
    
    # 执行训练迭代
    for it in range(iterations):
        # 这里的关键是要把输入数组转为np.array
        for n in range(batches_count):
            summary,_ = sess.run([merged,train_step],feed_dict={x: input_images[n*batch_size:(n+1)*batch_size], y: input_labels[n*batch_size:(n+1)*batch_size], keep_prob: 0.5})
#             train_step.run(feed_dict={x: input_images[n*batch_size:(n+1)*batch_size], y: input_labels[n*batch_size:(n+1)*batch_size], keep_prob: 0.5})
        if remainder > 0:
            start_index = batches_count * batch_size;
            summary,_ = sess.run([merged,train_step],feed_dict={x: input_images[n*batch_size:(n+1)*batch_size], y: input_labels[n*batch_size:(n+1)*batch_size], keep_prob: 0.5})
#             train_step.run(feed_dict={x: input_images[start_index:input_count-1], y: input_labels[start_index:input_count-1], keep_prob: 0.5})

        # 每完成五次迭代，判断准确度是否已达到100%，达到则退出迭代循环
        iterate_accuracy = 0
        if it%5 == 0:
            iterate_accuracy = accuracy.eval(feed_dict={x: input_images, y: input_labels, keep_prob: 1.0})
            print ('第 %d 次训练迭代: 准确率 %0.5f%%' % (it, iterate_accuracy*100))
            if iterate_accuracy >= 0.9999 and it >= iterations:
                break;
        writer.add_summary(summary,it)
    print ('完成训练!')
    time_elapsed = time.time() - time_begin
    print ("训练耗费时间：%d秒" % time_elapsed)
    time_begin = time.time()

    # 保存训练结果
    if not os.path.exists(SAVER_DIR):
        print ('不存在训练数据保存目录，现在创建保存目录')
        os.makedirs(SAVER_DIR)
    # 初始化saver
    saver = tf.train.Saver()            
    saver_path = saver.save(sess, "%smodel.ckpt"%(SAVER_DIR))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
(8, 8, 1, 16)
(16,)
(5, 5, 16, 32)
(32,)
(2048, 512)
(512,)
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

一共读取了 10144 个训练图像， 10144 个标签
训练数据集分成 170 批, 前面每批 60 个数据，最后一批 4 个数据
第 0 次训练迭代: 准确率 20.63289%
第 5 次训练迭代: 准确率 54.85016%
第 10 次训练迭代: 准确率 80.05717%
第 15 次训练迭代: 准确率 91.04890%
第 20 次训练迭代: 准确率 94.74566%
第 25 次训练迭代: 准确率 96.36238%
第 30 次训练迭代: 准确率 97.19046%
第 35 次训练迭代: 准确率 97.11159%
第 40 次训练迭代: 准确率 97.82137%
第 45 次训练迭代: 准确率 98.43257%
第 50 次训练迭代: 准确率 98.78746%
第 55 次训练迭代: 准确率 99.31980%
第 60 次训练迭代: 准确率 99.69440%
第 65 次训练迭代: 准确率 99.81270%
第 70 次训练迭代: 准确率 99.82256%
第 75 次训练迭代: 准确率 99.92114%
第 80 次训练迭代: 准确率 99.95071%
第 85 次训练迭代: 准确率 99.96057%
第 90 次训练迭代: 准确率 99.95071%
第 9

KeyboardInterrupt: 