In [2]:
import os
import tensorflow as tf
from PIL import Image
from nets import nets_factory
import numpy as np

In [3]:
CHAR_SET_LEN = 10
#image height
IMAGE_HEIGHT = 60
#image width
IMAGE_WIDTH = 160
#batch size
BATCH_SIZE = 32
#tfrecord file path
TFRECOD_FILE = " /train.tfrecords"

#placeholder
x = tf.placeholder(tf.float32,[None,224,224])
y0 = tf.placeholder(tf.float32,[None])
y1 = tf.placeholder(tf.float32,[None])
y2 = tf.placeholder(tf.float32,[None])
y3 = tf.placeholder(tf.float32,[None])

#define the learning rate
lr = tf.Variable(0.003, dtype=tf.float32)

# import image data from tfrecords
def read_and_decode(filename):
    filename_queue = tf.train.string_input_producer([filename])
    reader = tf.TFRecordReader()
    #The read function takes the filename queue, dequeues file names and extracts the image and label
    _,serialized_example = reader.read(filename_queue)
    features = tf.parse_single_example(serialized_example,
                                      features={
                                          'image':tf.FixedLenFeature([],tf.string),
                                          'label0':tf.FixedLenFeature([],tf.int64),
                                          'label1':tf.FixedLenFeature([],tf.int64),
                                          'label2':tf.FixedLenFeature([],tf.int64),
                                          'label3':tf.FixedLenFeature([],tf.int64),
                                      })
    # get the image data
    image = tf.decode_raw(features['image'],tf.uint8)
    image = tf.reshape(image,[224,224])
    #image preprocessing
    image = tf.cast(image,tf.float32) / 255.0
    image = tf.subtract(image,0.5)
    image = tf.multiply(image,2.0)
    
    #get the label
    label0 = tf.cast(features['label0'],tf.int32)
    label1 = tf.cast(features['label1'],tf.int32)
    label2 = tf.cast(features['label2'],tf.int32)
    label3 = tf.cast(features['label3'],tf.int32)
    
    return image,label0,label1,label2,label3
    

In [4]:
# get the image and label infomation
image,label0,label1,label2,label3 = read_and_decode(TFRECOD_FILE)

# shuffle the input
image_batch,label1_batch0,label1_batch1,label1_batch2,label1_batch3 = tf.train.shuffle_batch(
    [image,label0,label1,label2,label3],batch_size = BATCH_SIZE,
    capacity = 50000, min_after_dequeue = 10000,num_threads=1)
 
# define the network structure
train_network_fn = nets_factory.get_network_fn('alexnet_v2',
                                              num_classes=CHAR_SET_LEN,
                                              weight_decay=0.0005,
                                              is_training= True)
with tf.Session() as sess:
    
    X = tf.reshape(x,[BATCH_SIZE,224,224,1])
    logits0,logits1,logits2,logits3,end_points = train_network_fn(X)
    
    # convert the label to one_hot
    one_hot_labels0 = tf.one_hot(indices=tf.cast(y0,tf.int32),depth=CHAR_SET_LEN)
    one_hot_labels1 = tf.one_hot(indices=tf.cast(y1,tf.int32),depth=CHAR_SET_LEN)
    one_hot_labels2 = tf.one_hot(indices=tf.cast(y2,tf.int32),depth=CHAR_SET_LEN)
    one_hot_labels3 = tf.one_hot(indices=tf.cast(y3,tf.int32),depth=CHAR_SET_LEN)
    
    # calculate loss
    loss0 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits0,labels=one_hot_labels0))
    loss1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits1,labels=one_hot_labels1))
    loss2 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits2,labels=one_hot_labels2))
    loss3 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits3,labels=one_hot_labels3))
    
    # calculate the total loss
    total_loss = (loss0 + loss1 + loss2 + loss3) / 4.0
    #optimize the total loss
    optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(total_loss)
    
    # calculate the accuracy 
    correct_prediction0 = tf.equal(tf.argmax(one_hot_labels0,1),tf.arg_max(logits0,1))
    accuracy0 = tf.reduce_mean(tf.cast(correct_prediction0,tf.float32))
    
    correct_prediction1 = tf.equal(tf.argmax(one_hot_labels1,1),tf.arg_max(logits1,1))
    accuracy1 = tf.reduce_mean(tf.cast(correct_prediction1,tf.float32))
    
    correct_prediction2 = tf.equal(tf.argmax(one_hot_labels2,1),tf.arg_max(logits2,1))
    accuracy2 = tf.reduce_mean(tf.cast(correct_prediction2,tf.float32))
    
    correct_prediction3 = tf.equal(tf.argmax(one_hot_labels3,1),tf.arg_max(logits3,1))
    accuracy3 = tf.reduce_mean(tf.cast(correct_prediction3,tf.float32))
    
    # save the model
    saver = tf.train.Saver()
    #initialzation
    sess.run(tf.global_variables_initializer())
    
    # 
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess,coord=coord)
    
    for i in range(6001):
        b_image,b_label0,b_label1,b_label2,b_label3 = sess.run([image_batch,label1_batch0,label1_batch1,label1_batch2,label1_batch3])
        sess.run(optimizer, feed_dict={x: b_image, y0:b_label0, y1: b_label1, y2 : b_label2, y3:b_label3})
        
        if i % 20 == 0:
            if i % 2000 == 0:
                sess.run(tf.assign(lr,lr/3))
            acc0,acc1,acc2,acc3,loss_=sess.run([accuracy0, accuracy1,accuracy2,accuracy3,total_loss],feed_dict={x: b_image,
                                                                                                             y0: b_label0,
                                                                                                             y1: b_label1,
                                                                                                             y2: b_label2,
                                                                                                             y3: b_label3})
            learning_rate = sess.run(lr)
            print("Iter:%d Loss:%.3f Accuracy:%.2f,%.2f,%.2f,%.2f Learning_rate:%.4f" % (i,loss_,acc0,acc1,acc2,acc3,learning_rate))
            
            if i == 6000:
                saver.save(sess,"captcha/models/crack_captcha.model",global_step=i)
                break
    
    coord.request_stop()
    coord.join(threads)
    

Instructions for updating:
Use `argmax` instead
Instructions for updating:
Use `argmax` instead
Instructions for updating:
Use `argmax` instead
Instructions for updating:
Use `argmax` instead
Iter:0 Loss:2839.936 Accuracy:0.19,0.09,0.19,0.19 Learning_rate:0.0010
Iter:20 Loss:2.300 Accuracy:0.09,0.12,0.12,0.12 Learning_rate:0.0010
Iter:40 Loss:2.312 Accuracy:0.12,0.09,0.12,0.06 Learning_rate:0.0010
Iter:60 Loss:2.309 Accuracy:0.09,0.16,0.06,0.09 Learning_rate:0.0010
Iter:80 Loss:2.310 Accuracy:0.19,0.06,0.06,0.12 Learning_rate:0.0010
Iter:100 Loss:2.308 Accuracy:0.09,0.06,0.00,0.12 Learning_rate:0.0010
Iter:120 Loss:2.306 Accuracy:0.09,0.09,0.25,0.09 Learning_rate:0.0010
Iter:140 Loss:2.313 Accuracy:0.03,0.03,0.16,0.06 Learning_rate:0.0010
Iter:160 Loss:2.309 Accuracy:0.19,0.16,0.09,0.03 Learning_rate:0.0010
Iter:180 Loss:2.316 Accuracy:0.12,0.12,0.09,0.06 Learning_rate:0.0010
Iter:200 Loss:2.292 Accuracy:0.16,0.06,0.09,0.16 Learning_rate:0.0010
Iter:220 Loss:2.307 Accuracy:0.16,0.12,0.

Iter:2100 Loss:0.243 Accuracy:1.00,0.91,0.91,0.88 Learning_rate:0.0003
Iter:2120 Loss:0.160 Accuracy:0.97,1.00,0.84,0.97 Learning_rate:0.0003
Iter:2140 Loss:0.224 Accuracy:0.94,0.91,0.94,0.94 Learning_rate:0.0003
Iter:2160 Loss:0.166 Accuracy:0.94,0.91,0.94,1.00 Learning_rate:0.0003
Iter:2180 Loss:0.218 Accuracy:0.97,0.97,0.91,0.91 Learning_rate:0.0003
Iter:2200 Loss:0.165 Accuracy:0.97,0.97,0.88,1.00 Learning_rate:0.0003
Iter:2220 Loss:0.184 Accuracy:1.00,0.97,0.94,0.88 Learning_rate:0.0003
Iter:2240 Loss:0.241 Accuracy:1.00,0.75,0.91,0.94 Learning_rate:0.0003
Iter:2260 Loss:0.270 Accuracy:1.00,0.84,0.84,0.94 Learning_rate:0.0003
Iter:2280 Loss:0.091 Accuracy:0.97,1.00,1.00,0.97 Learning_rate:0.0003
Iter:2300 Loss:0.237 Accuracy:0.97,0.91,0.81,0.94 Learning_rate:0.0003
Iter:2320 Loss:0.329 Accuracy:0.97,0.81,0.81,0.94 Learning_rate:0.0003
Iter:2340 Loss:0.140 Accuracy:1.00,0.97,0.91,1.00 Learning_rate:0.0003
Iter:2360 Loss:0.203 Accuracy:0.97,0.94,0.91,0.91 Learning_rate:0.0003
Iter:2

Iter:4420 Loss:0.078 Accuracy:0.97,0.97,0.97,0.97 Learning_rate:0.0001
Iter:4440 Loss:0.045 Accuracy:0.97,0.97,1.00,1.00 Learning_rate:0.0001
Iter:4460 Loss:0.022 Accuracy:1.00,1.00,0.97,1.00 Learning_rate:0.0001
Iter:4480 Loss:0.096 Accuracy:0.97,1.00,0.97,0.97 Learning_rate:0.0001
Iter:4500 Loss:0.028 Accuracy:1.00,1.00,0.97,1.00 Learning_rate:0.0001
Iter:4520 Loss:0.049 Accuracy:1.00,1.00,1.00,0.94 Learning_rate:0.0001
Iter:4540 Loss:0.006 Accuracy:1.00,1.00,1.00,1.00 Learning_rate:0.0001
Iter:4560 Loss:0.016 Accuracy:1.00,1.00,1.00,1.00 Learning_rate:0.0001
Iter:4580 Loss:0.029 Accuracy:1.00,0.97,1.00,1.00 Learning_rate:0.0001
Iter:4600 Loss:0.022 Accuracy:1.00,0.97,1.00,1.00 Learning_rate:0.0001
Iter:4620 Loss:0.033 Accuracy:1.00,0.97,1.00,1.00 Learning_rate:0.0001
Iter:4640 Loss:0.031 Accuracy:1.00,1.00,0.97,0.97 Learning_rate:0.0001
Iter:4660 Loss:0.024 Accuracy:0.97,1.00,1.00,0.97 Learning_rate:0.0001
Iter:4680 Loss:0.025 Accuracy:1.00,1.00,0.97,1.00 Learning_rate:0.0001
Iter:4