In [1]:
# https://github.com/NeilNie/EMNIST-Keras

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pickle
import imageio
import scipy.misc
import os
import argparse
import keras

from scipy.io import loadmat
from scipy.misc import imread, imsave, imresize
# from skimage.io import imread, imsave
from PIL import Image
from keras.models import save_model, Sequential
from keras.utils import np_utils
from keras.layers import Conv2D, MaxPooling2D, Convolution2D, Dropout, Dense, Flatten, LSTM

# Mute tensorflow debugging information console
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
def load_data(mat_file_path, width=28, height=28, max=None):

    ''' Load data in from .mat file as specified by the paper.
        Arguments:
            mat_file_path: path to the .mat, should be in sample/
        Optional Arguments:
            width: specified width
            height: specified height
            max: the max number of samples to load
            verbose: enable verbose printing
        Returns:
            A tuple of training and test data, and the mapping for class code to ascii value,
            in the following format:
                - ((training_images, training_labels), (testing_images, testing_labels), mapping)
    '''
    # Local functions
    def rotate(img):
        # Used to rotate images (for some reason they are transposed on read-in)
        flipped = np.fliplr(img)
        return np.rot90(flipped)

    def display(img, threshold=0.5):
        # Debugging only
        render = ''
        for row in img:
            for col in row:
                if col > threshold:
                    render += '@'
                else:
                    render += '.'
            render += '\n'
        return render

    # Load convoluted list structure form loadmat
    mat = loadmat(mat_file_path)

    # Load char mapping
    mapping = {kv[0]:kv[1:][0] for kv in mat['dataset'][0][0][2]}
    pickle.dump(mapping, open('bin/mapping.p', 'wb' ))

    # Load training data
    if max == None:
        max = len(mat['dataset'][0][0][0][0][0][0])
    training_images = mat['dataset'][0][0][0][0][0][0][:max].reshape(max, height, width, 1)
    training_labels = mat['dataset'][0][0][0][0][0][1][:max]

    # Load testing data
    if max == None:
        max = len(mat['dataset'][0][0][1][0][0][0])
    else:
        max = int(max / 6)
    testing_images = mat['dataset'][0][0][1][0][0][0][:max].reshape(max, height, width, 1)
    testing_labels = mat['dataset'][0][0][1][0][0][1][:max]

    # Reshape training data to be valid
    _len = len(training_images)
    for i in range(len(training_images)):
        training_images[i] = rotate(training_images[i])

    # Reshape testing data to be valid
    _len = len(testing_images)
    for i in range(len(testing_images)):
        testing_images[i] = rotate(testing_images[i])

    # Convert type to float32
    training_images = training_images.astype('float32')
    testing_images = testing_images.astype('float32')

    # Normalize to prevent issues with model
    training_images /= 255
    testing_images /= 255

    nb_classes = len(mapping)

    return ((training_images, training_labels), (testing_images, testing_labels), mapping, nb_classes)

In [4]:
def build_model(training_data, width=28, height=28):

    ''' Build and train neural network. Also offloads the net in .yaml and the
        weights in .h5 to the bin/.
        Arguments:
            training_data: the packed tuple from load_data()
        Optional Arguments:
            width: specified width
            height: specified height
            epochs: the number of epochs to train over
            verbose: enable verbose printing
    '''
    # Initialize data
    (x_train, y_train), (x_test, y_test), mapping, nb_classes = training_data
    input_shape = (height, width, 1)

    # Hyperparameters
    nb_filters = 32 # number of convolutional filters to use
    pool_size = (2, 2) # size of pooling area for max pooling
    kernel_size = (3, 3) # convolution kernel size

    model = Sequential()
    model.add(Convolution2D(nb_filters, kernel_size, padding='valid', input_shape=input_shape, activation='relu'))
    model.add(Convolution2D(nb_filters, kernel_size, activation='relu'))

    model.add(MaxPooling2D(pool_size=pool_size))
    model.add(Dropout(0.25))
    model.add(Flatten())

    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(nb_classes, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy'])

    # print(model.summary())
    return model

In [5]:
def train(model, training_data, callback=True, batch_size=256, epochs=10):

    (x_train, y_train), (x_test, y_test), mapping, nb_classes = training_data

    # convert class vectors to binary class matrices
    y_train = np.subtract(y_train, 1)
    y_test  = np.subtract(y_test, 1)
    y_train = np_utils.to_categorical(y_train, nb_classes)
    y_test  = np_utils.to_categorical(y_test, nb_classes)

    if callback == True:
        # Callback for analysis in TensorBoard
        tbCallBack = keras.callbacks.TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=True)

    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              verbose=1,
              validation_data=(x_test, y_test),
              callbacks=[tbCallBack] if callback else None)

    score = model.evaluate(x_test, y_test, verbose=0)
    print('Test score:', score[0])
    print('Test accuracy:', score[1])

    # Offload model to file
    model_yaml = model.to_yaml()
    with open("bin/model.yaml", "w") as yaml_file:
        yaml_file.write(model_yaml)
    save_model(model, 'bin/model.h5')

In [6]:
mat_file_path = "dataset/matlab/emnist-letters.mat"
training_data = load_data(mat_file_path)
model = build_model(training_data)
train(model, training_data)

Train on 124800 samples, validate on 20800 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test score: 0.1860164220572915
Test accuracy: 0.9390384615384615


In [10]:
(training_images, training_labels), (testing_images, testing_labels), mapping, nb_classes = training_data

In [26]:
def predict(x, best_confidences, best_responses, x_start, y_start):
    x_ = x
    imsave('resized.png', x)
    x = imresize(x,(28,28))

    # reshape image data for use in neural network
    x = x.reshape(1,28,28,1)

    # Convert type to float32
    x = x.astype('float32')

    # Normalize to prevent issues with model
    x /= 255

    # Predict from model
    out = model.predict(x)

    # Generate response
    argmax      = int(np.argmax(out, axis=1)[0])
    prediction  = chr(mapping[argmax + 1])
    confidence  = np.float64(str(max(out[0]) * 100)[:6])

    response = {'prediction': prediction,
                'confidence': confidence,
                'box': (x_start, y_start),
                'sample_len': x_.shape, 
                'image': x_}

    if confidence > best_confidences[argmax]:
        best_confidences[argmax] = confidence
        best_responses[argmax]   = response
        
    return best_confidences, best_responses

In [None]:
im_full  = "locations/italy_3.jpg"
im_saved = "saved.jpg"
img = Image.open(im_full) 

num_classes = 26
best_confidences = np.zeros(num_classes)
best_responses   = [dict() for x in range(num_classes)]

index = 0
step  = 4
ratio = 1
sample_len = 28

width  = img.size[0]
height = img.size[1]

# 28 x 28
while sample_len < np.minimum(width,height):
    print(sample_len)
    
    start_i = width - sample_len
    start_j = height - sample_len
    
    i_passes = int(np.floor(start_i/step))
    j_passes = int(np.floor(start_j/step))
    
    for i in range( i_passes ): 
        for j in range( j_passes ):

            step_i = step*i;
            step_j = step*j;

            im_cropped = img.crop( (step_i, step_j, step_i+sample_len, step_j+sample_len) )
            im_cropped.save(im_saved)
            x = imread(im_saved, mode='L')

            best_confidences, best_responses = predict(x, best_confidences, best_responses, step_i, step_j)

            x = np.invert(x)
            best_confidences, best_responses = predict(x, best_confidences, best_responses, step_i, step_j)
    
    index += 1
    ratio = np.power(2, index)
    sample_len = 28 * ratio

28


`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imwrite`` instead.
  This is separate from the ipykernel package so we can avoid doing imports until
`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.
  after removing the cwd from sys.path.


56
112
224


In [None]:
for i in range (num_classes): 
    if not best_responses[i]:
        print("EMPTY")
    else:
        x_ = best_responses[i]['image']
#         plt.imshow(x_)
#         plt.show()

        response = {'prediction': best_responses[i]['prediction'],
                    'confidence': best_responses[i]['confidence'],
                    'sample_len': best_responses[i]['sample_len'],
                    'box': best_responses[i]['box']}
        print(response)
        
        im = Image.fromarray(best_responses[i]['image'])
        im.save("attempts/attempt_9/" + best_responses[i]['prediction'] + ".jpg")

In [None]:
# import scipy
# import tensorflow as tf
# from PIL import Image

# init = tf.initialize_all_variables()
# sess = tf.Session()
# sess.run(init)

# im_full  = "naples.jpg"
# im_saved = "saved.jpg"
# img = Image.open(im_full) 

# img.show()

# step = 4

# width = img.size[0]
# height = img.size[1]

# width_passes  = int(np.floor(width/step))
# height_passes = int(np.floor(height/step))

# for i in range( width_passes ): 
#     for j in range( height_passes ):
        
#         step_i = step*i;
#         step_j = step*j;
        
#         im_cropped = img.crop( (step_i, step_j, step_i+28, step_j+28) )
#         im_cropped.save(im_saved)

#         print(type(im_cropped))
        
#         region = scipy.ndimage.imread(im_saved, flatten=True)
#         data = np.vectorize(lambda x: 255 - x)(np.ndarray.flatten(region))
#         result = sess.run(tf.argmax(y,1), feed_dict={x: [data]})
        
#         prediction = ' '.join(map(str, result))
        #region = region.reshape([28,28])
        #plt.gray()
#         if (prediction == 3):
#             plt.imshow(region)
#             plt.show()
#             print (prediction)

# print ({x: [data]})

# image = Image.open(im_saved) 
# image.show()



# im_full  = "naples.jpg"
# im_saved = "saved.jpg"
# img = Image.open(im_full) 

# plt.imshow(img)
# plt.show()
# print (prediction)

# Test (28,28) image
# im_cropped = img.crop( (400,600,428,628) )
# im_cropped.save(im_saved)

# x = imread('naples.jpg', mode='L')
# x = imread('saved.jpg', mode='L')
# x = np.invert(x)

# x = imread('nums_bw/e_1.png', mode='L')
# x = imread('letters_small/l.png', mode='L')
# x = imread('letters_small/b.png', mode='L')
# x = np.invert(x)