In [1]:
import os

from keras.callbacks import ModelCheckpoint

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

import glob
from keras.models import Sequential
from keras.layers import Convolution2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Activation
from keras.layers import Dropout
from keras.layers import RepeatVector
from keras.layers import LSTM
from keras.layers import GRU
from keras.layers import Embedding
from keras.layers import TimeDistributed
from keras.layers import Merge
from keras.layers import UpSampling2D
from keras.layers import Reshape
from keras.layers import Conv2DTranspose
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import OneHotEncoder
from numpy import genfromtxt
from keras.utils import np_utils
import tensorflow
import numpy
import os
import cv2
import sys
from keras import backend as K
import numpy as np
import copy
K.set_image_dim_ordering('th')

cwd = os.getcwd()

Using TensorFlow backend.


ImportError: cannot import name Conv2DTranspose

In [None]:
#Part 1 -> defines
img_channels, img_rows, img_cols = 1, 128, 128
max_caption_len = 3

In [None]:
#Part 2 -> get data
def get_im(path):
    # Load as grayscale
    img = cv2.imread(path, 0)
    # Reduce size
    resized = cv2.resize(img, (img_cols, img_rows))
    return resized


def encode(str, num_rows):
    """
    One hot encodes str
    params: num_rows for keeping the num_rows the same
    """
    
    chars = '0123456789'
    char_to_nr = dict( (ch, nr) for nr, ch in enumerate(sorted(set(chars))) )
    nr_to_char = dict( (nr, ch) for nr, ch in enumerate(sorted(set(chars))) )
    
    #Matrice of zeros with the following shape [number_of_lines][max_different_chars_per_line]
    x = np.zeros((num_rows, len(chars)))
        
    #Do the encoding
    for i, ch in enumerate(str):
        x[i, char_to_nr[ch]] = True
        
    return x    
        
def decode(x, calc_argmax = True):
    """
    Decodes x and returns it
    """
        
    chars = '0123456789'
    char_to_nr = dict( (ch, nr) for nr, ch in enumerate(sorted(set(chars))) )
    nr_to_char = dict( (nr, ch) for nr, ch in enumerate(sorted(set(chars))) )
    
    if calc_argmax:
        x = x.argmax(axis = -1)
        
    return ''.join(nr_to_char[x] for x in x)
    
def encodeAll(data):
    chars = '0123456789'
    MAX_LEN_Y = 1
    
    # [number_of_lines][total_different_chars_possible]
    y = np.zeros( (len(data), len(chars)), dtype = np.bool )
    
    for i, line in enumerate(data):
        y[i] = encode(line, MAX_LEN_Y)
    
    return y

def pad(str, size):
    newStr = copy.deepcopy(str)
    while(len(newStr) < size):
        newStr.append(0)
        
    return newStr

def getDataManually(path):
    X = []
    X2 = []
    y = []
    

    for dir in os.listdir(path):
        crtPath = os.path.join(path, dir, "*.png")
        
        files = glob.glob(crtPath)
        for fl in files:
             # X
             fl = os.path.join(cwd, fl)
             img = get_im(fl)
            
             # Y
             crtSeq = ''
             crtSeqList = []
             
             X.append(img)
             X2.append( pad(crtSeqList, 3) )
             y.append( str(dir[0]) )
                
             for i in range( len(str(dir)) - 1 ):
                    crtSeq += dir[i]
                    crtSeqList.append( int(dir[i]) )
                    
                    X.append(img)
                    X2.append( pad(crtSeqList, 3) )
                    y.append( str(dir[i + 1]) )
                    
             if len(X) % 1000 == 0:
                 print("Picture " + str(len(X)) + " added from path: ", fl)
    
    return X, X2, y

def getData():
    x_train, x_train2, y_train = getDataManually(os.path.join("dataset", "training_set"))
    x_test, x_test2, y_test = getDataManually(os.path.join("dataset", "test_set"))
    
    x_train = numpy.array(x_train)
    x_train = numpy.reshape(x_train, (len(x_train), 1, img_rows, img_cols))
    x_train = x_train.astype("float64")

    x_test = numpy.array(x_test)
    x_test = numpy.reshape(x_test, (len(x_test), 1, img_rows, img_cols))
    x_test = x_test.astype("float64")

    x_train2 = numpy.array(x_train2)
    x_test2 = numpy.array(x_test2)

    y_train = encodeAll(y_train)
    y_test = encodeAll(y_test)
    
    return x_train, x_train2, y_train, x_test, x_test2, y_test

In [None]:
#Part 3 -> get the model
def getModel():
    max_caption_len = 3
    vocab_size = 10

    # first, let's define an image model that
    # will encode pictures into 128-dimensional vectors.
    # it should be initialized with pre-trained weights.
    image_model = Sequential()
    
    #ENCODER PART
    image_model.add(Convolution2D(32, 3, 3, border_mode='valid', input_shape=(img_channels, img_rows, img_cols)))
    image_model.add(Activation('relu'))
    image_model.add(Convolution2D(32, 3, 3))
    image_model.add(Activation('relu'))
    image_model.add(MaxPooling2D(pool_size=(2, 2)))

    image_model.add(Convolution2D(64, 3, 3, border_mode='valid'))
    image_model.add(Activation('relu'))
    image_model.add(Convolution2D(64, 3, 3))
    image_model.add(Activation('relu'))
    image_model.add(MaxPooling2D(pool_size=(2, 2)))

    image_model.add(Flatten())
    image_model.add(Dense(128))
    image_model.add(Activation('relu'))
    #Decoder part
    
    image_model.add( Dense(29*29*64) )
    image_model.add( Activation('relu'))
    image_model.add( Reshape( (64, 29, 29)) )
    
    image_model.add( UpSampling2D((2, 2)) )
    image_model.add( Conv2DTranspose(64, 3, 3, border_mode='same') ) 
    image_model.add( Activation('relu') )
    image_model.add( Conv2DTranspose(64, 3, 3, border_mode='same') )
    image_model.add( Activation('relu') )
    image_model.add( UpSampling2D((2, 2)) )
        
    #image_model.add( UpSampling2D((2, 2)) )
    #image_model.add( Convolution2D(32, 3, 3) )
    #image_model.add( Activation('relu') )
    #image_model.add( Convolution2D(32, 3, 3, border_mode='valid') )
    #image_model.add( Activation('relu') )
    
    
    image_model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics = ['accuracy'])

    # "images" is a numpy float array of shape (num_samples, num_channels=3, width, height).
    # "captions" is a numpy integer array of shape (num_samples, max_caption_len)
    # containing word index sequences representing partial captions.
    # "next_words" is a numpy float array of shape (num_samples, vocab_size)
    # containing a categorical encoding (0s and 1s) of the next word in the corresponding
    # partial caption.
    #model.fit([images, partial_captions], next_words, batch_size=16, epochs=100)

    return image_model

In [None]:
#Part 4 -> fit the model
def fitModel(x_train, x_train2, y_train, x_test, x_test2, y_test, model):
    filepath = "12v4CNNLSTMModel-{epoch:02d}-{loss:.4f}.hdf5"
    checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
    callbacks_list = [checkpoint]

    #model.fit(x_train, x_train, 
    #          batch_size=16, 
    #          nb_epoch=15, 
    #          validation_data = (x_test, x_test),
    #          callbacks = callbacks_list )

In [None]:
#Part 5 -> run everything
x_train, x_train2, y_train, x_test, x_test2, y_test = getData()

classifier = getModel()
classifier.summary()
fitModel(x_train, x_train2, y_train, x_test, x_test2, y_test, classifier)

In [None]:
print(x_train2[0])

In [None]:
print(y_train[0])

In [None]:
print(x_train2[1])

In [None]:
print(y_train[1])

In [None]:
print(x_train2[2])

In [None]:
print(y_train[2])

In [None]:
print(x_train2[6])

In [None]:
print(y_train.shape)

In [None]:
x_train2.shape

In [None]:
y_train.shape

In [None]:
#Test an image case
x = get_im('/docker_shared/12Captcha/dataset/test_set/672/427.png')
x = numpy.array(x)
x = numpy.reshape(x, (1, 1, img_rows, img_cols))
x = x.astype("float64")

x2 = numpy.array([[1,2,3]])

y = ['341']

pred = classifier.predict([x,x2])
print("Pred  : ", pred[0])

In [None]:
print("nu")