In [1]:
import random
import numpy as np
from PIL import Image
from captcha.image import ImageCaptcha
import tensorflow as tf

In [2]:
STRING = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 
        'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u','J', 'K', 'L', 'M',
        'v', 'w', 'x', 'y', 'z','A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 
        'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
        'V', 'W', 'X', 'Y', 'Z']
CAPTCHA_LIST = STRING
CAPTCHA_LEN = 4            # 验证码长度
CAPTCHA_HEIGHT = 60        # 验证码高度
CAPTCHA_WIDTH = 160        # 验证码宽度

In [3]:
#随机生成长度为4的验证码字符串
def randomText(charSet=STRING, captcha_size=4):
    captchaText = [random.choice(charSet) for _ in range(captcha_size)]
    return ''.join(captchaText)

#生成验证码图片
def generateCaptchaImage(width=CAPTCHA_WIDTH, height=CAPTCHA_HEIGHT, save=None):
    image = ImageCaptcha(width=width, height=height)
    # 验证码文本
    captcha_text = randomText()
    captcha = image.generate(captcha_text)
    # 转化为np数组
    captcha_image = Image.open(captcha)
    captcha_image = np.array(captcha_image)
    return captcha_text, captcha_image

### 对图片进行预处理

In [4]:
#将图片转为黑白的
def convertToGray(img):
    if len(img.shape) > 2:
        img = np.mean(img, -1)
    return img

#将验证码的文本转为对应的向量
def textToVector(text, captcha_len=CAPTCHA_LEN, captcha_list=CAPTCHA_LIST):
    text_len = len(text)    # 欲生成验证码的字符长度
    if text_len > captcha_len:
        raise ValueError('验证码最长4个字符')
    vector = np.zeros(captcha_len * len(captcha_list))    
    for i in range(text_len):
        vector[captcha_list.index(text[i])+i*len(captcha_list)] = 1     
    return vector

#将验证码向量转为对应的字符串;
def vectorToText(vec, captcha_list=CAPTCHA_LIST, captcha_len=CAPTCHA_LEN):
    vec_idx = vec
    text_list = [captcha_list[int(v)] for v in vec_idx]
    return ''.join(text_list)

#返回图片的shape();
def wrapImageShape(shape=(60, 160, 3)):
    while True:
        t, im = generateCaptchaImage()
        if im.shape == shape:
            return t, im

#获取图片组
def getImageBatch(batch_count=60, width=CAPTCHA_WIDTH, height=CAPTCHA_HEIGHT):
    
    batch_x = np.zeros([batch_count, width * height])
    batch_y = np.zeros([batch_count, CAPTCHA_LEN * len(CAPTCHA_LIST)])
    for i in range(batch_count):    # 生成对应的训练集
        text, image = wrapImageShape()
        image = convertToGray(image)     # 转灰度numpy
        # 将图片数组一维化 同时将文本也对应在两个二维组的同一行
        batch_x[i, :] = image.flatten() / 255
        batch_y[i, :] = textToVector(text)  # 验证码文本的向量形式
    # 返回该训练批次
    return batch_x, batch_y

### CNN算法实现

In [5]:
#初始化权值
def weight(shape, w_alpha=0.01):
    initial = w_alpha * tf.random.normal(shape)
    return tf.Variable(initial)

#初始化偏置项
def bias(shape, b_alpha=0.1):
    initial = b_alpha * tf.random.normal(shape)
    return tf.Variable(initial)

#卷积层
def ConvLayer(x, w):
    return tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')


#池化层（最大池化）
def MaxPool(x):
    return tf.nn.max_pool2d(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

#实现卷积神经网络；
def CNN(x, keep_prob, size, captcha_list=CAPTCHA_LIST, captcha_len=CAPTCHA_LEN):
 
    # 需要将图片reshape为4维向量
    image_height, image_width = size
    x_image = tf.reshape(x, shape=[-1, image_height, image_width, 1])

    # 第一层
    w_conv1 = weight([3, 3, 1, 32])   
    b_conv1 = bias([32])
    h_conv1 = tf.nn.relu(ConvLayer(x_image, w_conv1) + b_conv1)    #卷积
    h_pool1 = MaxPool(h_conv1)     # 池化
    h_drop1 = tf.nn.dropout(h_pool1, rate = 1-keep_prob)     

    # 第二层
    w_conv2 = weight([3, 3, 32, 64])
    b_conv2 = bias([64])
    h_conv2 = tf.nn.relu(ConvLayer(h_drop1, w_conv2) + b_conv2)
    h_pool2 = MaxPool(h_conv2)
    h_drop2 = tf.nn.dropout(h_pool2,rate= 1 - keep_prob)

    # 第三层
    w_conv3 = weight([3, 3, 64, 64])
    b_conv3 = bias([64])
    h_conv3 = tf.nn.relu(ConvLayer(h_drop2, w_conv3) + b_conv3)
    h_pool3 = MaxPool(h_conv3)
    h_drop3 = tf.nn.dropout(h_pool3, rate = 1 - keep_prob)

    # 全连接层
    image_height = int(h_drop3.shape[1])
    image_width = int(h_drop3.shape[2])
    w_fc = weight([image_height*image_width*64, 1024])    
    b_fc = bias([1024])
    h_drop3_re = tf.reshape(h_drop3, [-1, image_height*image_width*64])
    h_fc = tf.nn.relu(tf.matmul(h_drop3_re, w_fc) + b_fc)
    h_drop_fc = tf.nn.dropout(h_fc, keep_prob)

    # 输出层
    w_out = weight([1024, len(captcha_list)*captcha_len])
    b_out = bias([len(captcha_list)*captcha_len])
    y_conv = tf.matmul(h_drop_fc, w_out) + b_out
    return y_conv


### 模型训练

In [7]:
acc_rate = 0.85   #期望达到的准确率     
# 按照图片大小申请占位符
x = tf.compat.v1.placeholder(tf.float32, [None, 60 * 160])
y = tf.compat.v1.placeholder(tf.float32, [None, (4 * len(STRING))])

keep_prob = tf.compat.v1.placeholder(tf.float32)
# cnn模型
y_conv = CNN(x, keep_prob, (60, 160))

# 优化
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=y_conv)) 
optimizer = tf.compat.v1.train.AdamOptimizer(1e-3).minimize(loss)


width=len(STRING)
height=4
predict = tf.reshape(y_conv,[-1, height, width])   
max_predict_idx = tf.argmax(predict, 2)
label = tf.reshape(y, [-1, height, width])
max_label_idx = tf.argmax(label, 2)
correct_p = tf.equal(max_predict_idx, max_label_idx)   
accuracy = tf.reduce_mean(tf.cast(correct_p, tf.float32))

# 启动会话.开始训练
saver = tf.compat.v1.train.Saver()
sess = tf.compat.v1.Session()
sess.run(tf.compat.v1.global_variables_initializer())     # 初始化
step = 0    # 步数
while(1):
    batch_x, batch_y = getImageBatch(64)
    sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, keep_prob: 0.75})
    if step % 100 == 0:
        batch_x_test, batch_y_test = getImageBatch(100)
        acc = sess.run(accuracy, feed_dict={x: batch_x_test, y: batch_y_test, keep_prob: 1.0})
        # 准确率满足要求，保存模型
        if acc > acc_rate:
            model_path = "./model/captcha.model"
            saver.save(sess, model_path, global_step=step)
            break
    step += 1
    
batch_x_test, batch_y_test = getImageBatch(100)
acc = sess.run(accuracy, feed_dict={x: batch_x_test, y: batch_y_test, keep_prob: 1.0}) 
print("step: %d    accuracy:%.2f" % (step, acc))
sess.close()
print("train end!")

step: 15500    accuracy:0.85
train end!


In [8]:
#传入单个图片文件自动判定其验证码内容
text, image = generateCaptchaImage()
img = Image.fromarray(image)
image = convertToGray(image)
image = image.flatten() / 255
image_list = [image]     
with tf.Session() as sess:
    saver.restore(sess, tf.train.latest_checkpoint('model/'))
    predict = tf.argmax(tf.reshape(y_conv, [-1,4, len(STRING)]), 2)
    vector_list = sess.run(predict, feed_dict={x: image_list, keep_prob: 1})
    vector_list = vector_list.tolist()
    pre_text = [vectorToText(vector) for vector in vector_list]

print("生成的验证码:", text, ' 模型预测值:', pre_text )

Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from model/captcha.model-15500
生成的验证码: dvUM  模型预测值: ['dvUv']
