In [1]:
import os

from keras.callbacks import ModelCheckpoint

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

import glob
from keras.models import Sequential
from keras.layers import Convolution2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Activation
from keras.layers import Dropout
from keras.layers import RepeatVector
from keras.layers import LSTM
from keras.layers import GRU
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import OneHotEncoder
from numpy import genfromtxt
from keras.utils import np_utils
import tensorflow
import numpy
import os
import cv2
import sys
from keras import backend as K
import numpy as np
K.set_image_dim_ordering('th')

cwd = os.getcwd()

Using TensorFlow backend.


In [2]:
#Part 1 -> defines
img_rows, img_cols = 128, 128
nb_filters = 32 # nr of conv filters to use
nb_pool = 2 # size of pooling area
nb_conv = 3 # convolution kernel size

In [3]:
#Part 2 -> get data
def get_im(path):
    # Load as grayscale
    img = cv2.imread(path, 0)
    # Reduce size
    resized = cv2.resize(img, (img_cols, img_rows))
    return resized


def encode(str, num_rows):
    """
    One hot encodes str
    params: num_rows for keeping the num_rows the same
    """
    
    chars = '0123456789'
    char_to_nr = dict( (ch, nr) for nr, ch in enumerate(sorted(set(chars))) )
    nr_to_char = dict( (nr, ch) for nr, ch in enumerate(sorted(set(chars))) )
    
    #Matrice of zeros with the following shape [number_of_lines][max_different_chars_per_line]
    x = np.zeros((num_rows, len(chars)))
        
    #Do the encoding
    for i, ch in enumerate(str):
        x[i, char_to_nr[ch]] = True
        
    return x    
        
def decode(x, calc_argmax = True):
    """
    Decodes x and returns it
    """
        
    chars = '0123456789'
    char_to_nr = dict( (ch, nr) for nr, ch in enumerate(sorted(set(chars))) )
    nr_to_char = dict( (nr, ch) for nr, ch in enumerate(sorted(set(chars))) )
    
    if calc_argmax:
        x = x.argmax(axis = -1)
        
    return ''.join(nr_to_char[x] for x in x)
    
def encodeAll(data):
    chars = '0123456789'
    MAX_LEN_Y = 3
    
    # [number_of_lines][max_size_of_y][total_different_chars_possible]
    y = np.zeros( (len(data), MAX_LEN_Y, len(chars)), dtype = np.bool )
    
    for i, line in enumerate(data):
        y[i] = encode(line, MAX_LEN_Y)
    
    return y

def getDataManually(path):
    X = []
    y = []
    

    for dir in os.listdir(path):
        crtPath = os.path.join(path, dir, "*.png")
        
        files = glob.glob(crtPath)
        for fl in files:
             # X
             fl = os.path.join(cwd, fl)
             img = get_im(fl)
             X.append(img)
            
             # Y
             #crtStr = []
             #for chr in str(dir):
             #       crtStr.append(int(chr))
             #y.append(crtStr)
             
             y.append(str(dir))
                
             if len(X) % 1000 == 0:
                 print("Picture " + str(len(X)) + " added from path: ", fl)

    return X, y

def getData():
    train_datagen = ImageDataGenerator()
    test_datagen = ImageDataGenerator()

    x_train, y_train = getDataManually(os.path.join("dataset", "training_set"))
    x_test, y_test = getDataManually(os.path.join("dataset", "test_set"))
    
    x_train = numpy.array(x_train)
    x_train = numpy.reshape(x_train, (len(x_train), 1, img_rows, img_cols))
    x_train = x_train.astype("float64")
    train_datagen.fit(x_train, augment = False)

    x_test = numpy.array(x_test)
    x_test = numpy.reshape(x_test, (len(x_test), 1, img_rows, img_cols))
    x_test = x_test.astype("float64")
    test_datagen.fit(x_test, augment = False)
    
    y_train = encodeAll(y_train)
    y_test = encodeAll(y_test)
    
    return x_train, y_train, x_test, y_test, train_datagen, test_datagen

In [4]:
#Part 3 -> get the model
def getModel():
    max_caption_len = 3

    model = Sequential()
    model.add(Convolution2D(32, 3, 3, input_shape=(1, img_rows, img_cols))) 
    model.add(Activation('relu'))
    model.add(Convolution2D(32, 3, 3))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Convolution2D(64, 3, 3)) 
    model.add(Activation('relu'))
    model.add(Convolution2D(64, 3, 3)) 
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Convolution2D(128, 3, 3)) 
    model.add(Activation('relu'))
    model.add(Convolution2D(128, 3, 3)) 
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(1024))
    model.add(Activation('relu'))
    model.add(Dropout(0.2))
    model.add(Dense(2048))
    model.add(Activation('relu'))

    model.add(RepeatVector(max_caption_len)) 
    # the GRU below returns sequences of max_caption_len vectors of size 10 (our word embedding size)
    model.add(GRU(10, return_sequences=True))

    model.compile(loss='mean_squared_error', optimizer='rmsprop')

    return model

In [None]:
#Part 4 -> fit the model
def fitModel(x_train, y_train, x_test, y_test, train_datagen, test_datagen, classifier):
    filepath = sys.argv[0] + "-{epoch:02d}-{loss:.4f}.hdf5"
    checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
    callbacks_list = [checkpoint]

    classifier.fit_generator(train_datagen.flow(x_train, y_train, batch_size= 32),
                         samples_per_epoch = 10000,
                         nb_epoch = 5,
                         validation_data = test_datagen.flow(x_test, y_test),
                         nb_val_samples = 5000,
                         callbacks = callbacks_list
                        )


In [None]:
#Part 5 -> run everything
x_train, y_train, x_test, y_test, train_datagen, test_datagen = getData()

print("Shape of y_train: ", y_train.shape)
classifier = getModel()

print(classifier.summary())

fitModel(x_train, y_train, x_test, y_test, train_datagen, test_datagen, classifier)

('Picture 1000 added from path: ', '/docker_shared/12Captcha/dataset/training_set/512/87227.png')
('Picture 2000 added from path: ', '/docker_shared/12Captcha/dataset/training_set/885/10590.png')
('Picture 3000 added from path: ', '/docker_shared/12Captcha/dataset/training_set/287/38704.png')
('Picture 4000 added from path: ', '/docker_shared/12Captcha/dataset/training_set/578/19666.png')
('Picture 5000 added from path: ', '/docker_shared/12Captcha/dataset/training_set/730/47508.png')
('Picture 6000 added from path: ', '/docker_shared/12Captcha/dataset/training_set/392/19334.png')
('Picture 7000 added from path: ', '/docker_shared/12Captcha/dataset/training_set/517/45118.png')
('Picture 8000 added from path: ', '/docker_shared/12Captcha/dataset/training_set/733/81721.png')
('Picture 9000 added from path: ', '/docker_shared/12Captcha/dataset/training_set/873/22641.png')
('Picture 10000 added from path: ', '/docker_shared/12Captcha/dataset/training_set/599/84021.png')
('Picture 11000 add

('Picture 84000 added from path: ', '/docker_shared/12Captcha/dataset/training_set/678/18693.png')
('Picture 85000 added from path: ', '/docker_shared/12Captcha/dataset/training_set/556/26068.png')
('Picture 86000 added from path: ', '/docker_shared/12Captcha/dataset/training_set/989/90001.png')
('Picture 87000 added from path: ', '/docker_shared/12Captcha/dataset/training_set/612/30842.png')
('Picture 88000 added from path: ', '/docker_shared/12Captcha/dataset/training_set/248/91021.png')
('Picture 89000 added from path: ', '/docker_shared/12Captcha/dataset/training_set/184/76528.png')
('Picture 90000 added from path: ', '/docker_shared/12Captcha/dataset/training_set/580/51899.png')
('Picture 91000 added from path: ', '/docker_shared/12Captcha/dataset/training_set/448/2782.png')
('Picture 92000 added from path: ', '/docker_shared/12Captcha/dataset/training_set/559/74148.png')
('Picture 93000 added from path: ', '/docker_shared/12Captcha/dataset/training_set/637/31360.png')
('Picture 9

Epoch 1/5



Epoch 00000: loss improved from inf to 0.10084, saving model to /usr/local/lib/python2.7/dist-packages/ipykernel_launcher.py-00-0.1008.hdf5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
 1824/10000 [====>.........................] - ETA: 48s - loss: 0.1000

In [None]:
#Test an image case
x = get_im('/docker_shared/12Captcha/dataset/test_set/341/10373.png')
x = numpy.array(x)
x = numpy.reshape(x, (1, 1, img_rows, img_cols))
x = x.astype("float64")
    
y = ['341']
y = encodeAll(y)
print("y Encoded: ", y)

pred = classifier.predict(x)
predDecoded = decode(pred[0])
print("Pred  : ", predDecoded)



In [None]:
print("da")