In [24]:
import os

from keras.callbacks import ModelCheckpoint

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

import glob
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Activation
from keras.layers import RepeatVector
from keras.layers import LSTM
from keras.layers import GRU
from keras.layers import Embedding
from keras.layers import TimeDistributed
from keras.layers import Merge
from keras.callbacks import TensorBoard
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import OneHotEncoder
from numpy import genfromtxt
from keras.utils import np_utils
import tensorflow as tf
import numpy
import os
import cv2
import sys
from keras import backend as K
import numpy as np
import copy
K.set_image_dim_ordering('th')

cwd = os.getcwd()

In [2]:
#Part 1 -> defines
img_channels, img_rows, img_cols = 1, 128, 128
max_caption_len = 3

In [3]:
#Part 2 -> get data
def get_im(path):
    # Load as grayscale
    img = cv2.imread(path, 0)
    # Reduce size
    resized = cv2.resize(img, (img_cols, img_rows))
    return resized


def encode(str, num_rows):
    """
    One hot encodes str
    params: num_rows for keeping the num_rows the same
    """
    
    chars = '0123456789'
    char_to_nr = dict( (ch, nr) for nr, ch in enumerate(sorted(set(chars))) )
    nr_to_char = dict( (nr, ch) for nr, ch in enumerate(sorted(set(chars))) )
    
    #Matrice of zeros with the following shape [number_of_lines][max_different_chars_per_line]
    x = np.zeros((num_rows, len(chars)))
        
    #Do the encoding
    for i, ch in enumerate(str):
        x[i, char_to_nr[ch]] = True
        
    return x    
        
def decode(x, calc_argmax = True):
    """
    Decodes x and returns it
    """
        
    chars = '0123456789'
    char_to_nr = dict( (ch, nr) for nr, ch in enumerate(sorted(set(chars))) )
    nr_to_char = dict( (nr, ch) for nr, ch in enumerate(sorted(set(chars))) )
    
    if calc_argmax:
        x = x.argmax(axis = -1)
        
    return ''.join(nr_to_char[x] for x in x)
    
def encodeAll(data):
    chars = '0123456789'
    MAX_LEN_Y = 1
    
    # [number_of_lines][total_different_chars_possible]
    y = np.zeros( (len(data), len(chars)), dtype = np.bool )
    
    for i, line in enumerate(data):
        y[i] = encode(line, MAX_LEN_Y)
    
    return y

def pad(str, size):
    newStr = copy.deepcopy(str)
    while(len(newStr) < size):
        newStr.append(0)
        
    return newStr

def getDataManually(path):
    X = []
    X2 = []
    y = []
    

    for dir in os.listdir(path):
        crtPath = os.path.join(path, dir, "*.png")
        
        files = glob.glob(crtPath)
        for fl in files:
             # X
             fl = os.path.join(cwd, fl)
             img = get_im(fl)
            
             # Y
             crtSeq = ''
             crtSeqList = []
             
             X.append(img)
             X2.append( pad(crtSeqList, 3) )
             y.append( str(dir[0]) )
                
             for i in range( len(str(dir)) - 1 ):
                    crtSeq += dir[i]
                    crtSeqList.append( int(dir[i]) )
                    
                    X.append(img)
                    X2.append( pad(crtSeqList, 3) )
                    y.append( str(dir[i + 1]) )
                    
             if len(X) % 1000 == 0:
                 print("Picture " + str(len(X)) + " added from path: ", fl)
    
    return X, X2, y

def getData():
    x_train, x_train2, y_train = getDataManually(os.path.join("dataset", "training_set"))
    x_test, x_test2, y_test = getDataManually(os.path.join("dataset", "test_set"))
    
    x_train = numpy.array(x_train)
    x_train = numpy.reshape(x_train, (len(x_train), 1, img_rows, img_cols))
    x_train = x_train.astype("float32")
    x_train /= 255

    x_test = numpy.array(x_test)
    x_test = numpy.reshape(x_test, (len(x_test), 1, img_rows, img_cols))
    x_test = x_test.astype("float32")
    x_train /= 255
        
    x_train2 = numpy.array(x_train2)
    x_test2 = numpy.array(x_test2)

    y_train = encodeAll(y_train)
    y_test = encodeAll(y_test)
    
    return x_train, x_train2, y_train, x_test, x_test2, y_test

In [4]:
#Part 3 -> get the model
def getModel():
    max_caption_len = 3
    vocab_size = 10

    # first, let's define an image model that
    # will encode pictures into 128-dimensional vectors.
    # it should be initialized with pre-trained weights.
    image_model = Sequential()
    image_model.add(Conv2D(32, (3, 3), padding = 'valid', input_shape=(img_channels, img_rows, img_cols)))
    image_model.add(Activation('relu'))
    image_model.add(Conv2D(32, (3, 3), padding = 'valid'))
    image_model.add(Activation('relu'))
    image_model.add(MaxPooling2D(pool_size=(2, 2)))

    image_model.add(Conv2D(64, (3, 3), padding = 'valid'))
    image_model.add(Activation('relu'))
    image_model.add(Conv2D(64, (3, 3), padding = 'valid'))
    image_model.add(Activation('relu'))
    image_model.add(MaxPooling2D(pool_size=(2, 2)))
    
    image_model.add(Conv2D(128, (3, 3), padding='valid'))
    image_model.add(Activation('relu'))
    image_model.add(Conv2D(128, (3, 3)))
    image_model.add(Activation('relu'))

    image_model.add(Flatten())
    image_model.add(Dense(128))

    # let's load the weights from a save file.
    #image_model.load_weights('weight_file.h5')

    # next, let's define a RNN model that encodes sequences of words
    # into sequences of 128-dimensional word vectors.
    language_model = Sequential()
    language_model.add(Embedding(vocab_size, 256, input_length=max_caption_len))
    language_model.add(GRU(output_dim=128, return_sequences=True))
    language_model.add(TimeDistributed(Dense(128)))

    # let's repeat the image vector to turn it into a sequence.
    image_model.add(RepeatVector(max_caption_len))

    # the output of both models will be tensors of shape (samples, max_caption_len, 128).
    # let's concatenate these 2 vector sequences.
    model = Sequential()
    model.add(Merge([image_model, language_model], mode='concat', concat_axis=-1))
    # let's encode this vector sequence into a single vector
    model.add(GRU(512, return_sequences=False))
    model.add(Dense(1024, ))
    # which will be used to compute a probability
    # distribution over what the next word in the caption should be!
    model.add(Dense(vocab_size))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics = ['accuracy'])

    # "images" is a numpy float array of shape (num_samples, num_channels=3, width, height).
    # "captions" is a numpy integer array of shape (num_samples, max_caption_len)
    # containing word index sequences representing partial captions.
    # "next_words" is a numpy float array of shape (num_samples, vocab_size)
    # containing a categorical encoding (0s and 1s) of the next word in the corresponding
    # partial caption.
    #model.fit([images, partial_captions], next_words, batch_size=16, epochs=100)

    return model

In [None]:
#Part 4 -> fit the model
def fitModel(x_train, x_train2, y_train, x_test, x_test2, y_test, model):
    filepath = "12v4CNNLSTMModel-{epoch:02d}-{loss:.4f}.hdf5"
    checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
    tensorboard = TensorBoard(log_dir='./tensorboard', histogram_freq=2, batch_size=32, write_graph=True, write_grads=True, write_images=True, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)
    callbacks_list = [tensorboard]

    model.fit([x_train, x_train2], y_train, 
              batch_size=16, 
              epochs=4, 
              validation_data = ([x_test, x_test2], y_test),
              callbacks = callbacks_list )


In [None]:
#Part 5 -> run everything
x_train, x_train2, y_train, x_test, x_test2, y_test = getData()

classifier = getModel()
classifier.summary()
fitModel(x_train, x_train2, y_train, x_test, x_test2, y_test, classifier)

Picture 3000 added from path:  /src/docker_shared/12Captcha/dataset/training_set/556/131.png




_________________________________________________________________
Layer (type)                 Output Shape              Param #   
merge_1 (Merge)              (None, 3, 256)            0         
_________________________________________________________________
gru_2 (GRU)                  (None, 512)               1181184   
_________________________________________________________________
dense_3 (Dense)              (None, 1024)              525312    
_________________________________________________________________
dense_4 (Dense)              (None, 1024)              1049600   
_________________________________________________________________
dense_5 (Dense)              (None, 10)                10250     
_________________________________________________________________
activation_7 (Activation)    (None, 10)                0         
Total params: 5,579,114
Trainable params: 5,579,114
Non-trainable params: 0
_________________________________________________________________


In [None]:
print(x_train2[0])

In [None]:
print(y_train[0])

In [None]:
print(x_train2[1])

In [None]:
print(y_train[1])

In [None]:
print(x_train2[2])

In [None]:
print(y_train[2])

In [None]:
print(x_train2[6])

In [None]:
print(y_train.shape)

In [None]:
x_train2.shape


In [None]:
y_train.shape

In [22]:
#Test an image case
x = get_im('/src/docker_shared/12Captcha/dataset/test_set/411/377.png')
x = numpy.array(x)
x = numpy.reshape(x, (1, 1, img_rows, img_cols))
x = x.astype("float64")

x2 = numpy.array([[4,2, 3]])

y = ['341']

pred = classifier.predict([x,x2])
print("Pred  : ", pred[0])

Pred  :  [ 0.09362856  0.09199297  0.08601591  0.11848346  0.13384528  0.09528937
  0.11850605  0.09794832  0.0868976   0.07739241]


In [23]:
print("nu")

nu


In [25]:
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))

In [26]:
print(sess)

<tensorflow.python.client.session.Session object at 0x7fd5bfde25f8>


In [None]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())