In [1]:

import keras
import os
import numpy as np
from scipy import ndimage
import pickle
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
import tensorflow as tf

Using TensorFlow backend.


In [2]:
tf.__version__

'1.0.0'

In [3]:
image_size = 90

data_folders = os.listdir('./letters/')

data_folders = [os.path.join('./letters/', d) for d in data_folders if os.path.isdir(os.path.join('./letters/', d))] 

In [4]:
def load_letter(folder):
    files = os.listdir(folder)
    dataset = np.ndarray(shape = (len(files), image_size, image_size), dtype=np.float32)
    img_num = 0
    for image in files:
        if 'DS' not in image:
            image_name = os.path.join(folder, image)
            image_data = ndimage.imread(image_name, mode = 'L')
            dataset[img_num] = image_data
            img_num += 1
    return dataset
def image_pickling(folders):
    dataset_names = []
    for folder in folders:
        set_filename = folder + '.pickle'
        dataset_names.append(set_filename)
        dataset = load_letter(folder)
        try:
            with open(set_filename, 'wb') as f:
                pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL)
        except Exception as e:
            print('Something is wrong', set_filename, ':', e )
    return dataset_names

In [5]:
datasets = image_pickling(data_folders)

In [6]:
set_size = 1000

In [7]:
def merge_datasets(pickle_files, set_size):
    num_classes = len(pickle_files)
    dataset = np.ndarray(shape = (set_size * num_classes, image_size, image_size), dtype = np.float32)
    labels = np.ndarray(set_size * num_classes, dtype = np.int32)
    start_pos = 0
    for label, pickle_file in enumerate(sorted(pickle_files)):
        try:
            with open(pickle_file, 'rb') as f:
                letter_set = pickle.load(f)
                np.random.shuffle(letter_set)
                dataset[start_pos:start_pos + set_size, :, :] = letter_set[:set_size, :, :]
                labels[start_pos:start_pos + set_size] = label
                start_pos += set_size
        except Exception as e:
            print('Something is wrong', pickle_file, ':', e)
    dataset /= 255
    return dataset, labels

In [8]:
dataset, labels = merge_datasets(datasets, set_size)

In [9]:
def randomize(dataset, labels):
    permutaion = np.random.permutation(dataset.shape[0])
    dataset = dataset[permutaion]
    labels = labels[permutaion]
    return dataset, labels

In [10]:
dataset, labels = randomize(dataset, labels)


In [29]:
labels.shape

(4000,)

In [11]:
batch_size = 128
num_classes = 4
epochs = 12

In [30]:
x_train, x_test, y_train, y_test = train_test_split(dataset, labels, random_state = 241)

In [31]:
y_train.shape

(3000,)

In [37]:
x_train = x_train.reshape(x_train.shape[0], image_size, image_size, 1)
x_test = x_test.reshape(x_test.shape[0],  image_size, image_size, 1)
input_shape = (image_size, image_size, 1)
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

x_train shape: (3000, 90, 90, 1)
3000 train samples
1000 test samples


In [33]:
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

In [34]:
y_train.shape

(3000, 4)

In [35]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

In [38]:
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Train on 3000 samples, validate on 1000 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Test loss: 0.128609986186
Test accuracy: 0.973


In [40]:
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=3,
          verbose=1,
          validation_data=(x_test, y_test))

Train on 3000 samples, validate on 1000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x1333e52e8>

In [42]:
model.shape()

AttributeError: 'Sequential' object has no attribute 'shape'

In [44]:
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot

SVG(model_to_dot(model).create(prog='dot', format='svg'))

ImportError: Failed to import pydot. You must install pydot and graphviz for `pydotprint` to work.

In [None]:
vv