In [2]:
from __future__ import print_function

import scipy.io as spio
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np

import datetime
import keras
#from keras.datasets import mnist
from keras.models import Sequential
from keras.datasets import mnist
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import RMSprop
from keras import backend as K

def tobacco_loadData():
    mat = spio.loadmat('imdb-tobacco.mat')
    images = mat['images']
    data = images['data']
    labels = images['labels'];
    labels = labels[0][0][:][:]
    assert np.array_equal(np.unique(labels), np.array([0,1])) # fails if only 0 or 1
    im = np.squeeze(data[0][0][:][:])

    x_total = im
    y_total = labels
    set_ = images['set'] # train:1, val:2, test:3
    set_ = set_[0,0];

    #print ('x_total={} y_total={} set_={}'.format(x_total.shape, y_total.shape, set_.shape))
    set_train = set_ == 1;
    train_no_sample = np.sum(set_train)
    set_test = set_ == 3;
    test_no_sample = np.sum(set_test)
    x_train = np.reshape(x_total[np.tile(set_train, (patch_size,patch_size,1))], (patch_size,patch_size,1,train_no_sample))
    y_train = np.reshape(y_total[set_train], (1,train_no_sample))
    x_test = np.reshape(x_total[np.tile(set_test, (patch_size,patch_size,1))], (patch_size,patch_size,1,test_no_sample))
    y_test = np.reshape(y_total[set_test], (1,test_no_sample))    

    print('END: tobacco_loadData')
    return (x_train, y_train), (x_test, y_test)

Using TensorFlow backend.


In [3]:
# params
patch_size = 50
now = datetime.datetime.now
# copy and modify the code for MNIST
batch_size = 128
num_classes = 2
epochs = 20
    # input image dimensions
img_rows, img_cols = patch_size, patch_size
    # number of convolutional filters to use
filters = 32
    # size of pooling area for max pooling
pool_size = 2
    # convolution kernel size
kernel_size = 3
#if K.image_data_format() == 'channels_first':
#    input_shape = (1, img_rows, img_cols)
#else:
input_shape = (img_rows, img_cols, 1)


def train_model(model, x_train, y_train, x_test, y_test, num_classes):
    x_train = x_train.transpose(3,0,1,2)
    x_test = x_test.transpose(3,0,1,2)
    
    # convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)
    print('y_train:', y_train)
    print('PASSED') 
    

    model.compile(loss='categorical_crossentropy',
                  optimizer='adadelta',
                  metrics=['accuracy'])

    t = now()
    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              verbose=1,
              validation_data=(x_test, y_test))
    print('Training time: %s' % (now() - t))
    score = model.evaluate(x_test, y_test, verbose=0)
    print('Test score:', score[0])
    print('Test accuracy:', score[1])
    print('END: train_model') 
    
    
(x_train, y_train), (x_test, y_test) = tobacco_loadData()     
#print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

# define two groups of layers: feature (convolutions) and classification (dense)
feature_layers = [
    Conv2D(filters, kernel_size,
           padding='valid',
           input_shape=input_shape),
    Activation('relu'),
    Conv2D(filters, kernel_size),
    Activation('relu'),
    MaxPooling2D(pool_size=pool_size),
    Dropout(0.25),
    Flatten(),
]

classification_layers = [
    Dense(128),
    Activation('relu'),
    Dropout(0.5),
    Dense(num_classes),
    Activation('softmax')
]

# create complete model
model = Sequential(feature_layers + classification_layers)
print('LOG: Model Created')

# train model for 5-digit classification [0..4]
train_model(model, x_train, y_train, x_test, y_test, num_classes)

END: tobacco_loadData
LOG: Model Created
y_train: [[ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 ..., 
 [ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]]
PASSED
Train on 23583 samples, validate on 6738 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Training time: 1:41:11.853296
Test score: 0.563362723378
Test accuracy: 0.772781240751
END: train_model
