In [80]:
import os
import subprocess
import math

from PIL import Image
from scipy import misc
import pandas as pd
import numpy as np

from sklearn.cross_validation import train_test_split

In [26]:
for folder in os.listdir('../data/food-101/images/')[1:10]:
    source_folder_path = '../data/food-101/images/%s/' % folder
    target_folder_path = '../data/food-101-modified/%s/' % folder
        
    for filename in os.listdir(source_folder_path):
        subprocess.call([
                'convert',
                source_folder_path + filename,
                '-gravity',
                'center',
                '-crop',
                '320x320+0+0',
                '-resize',
                '75x75',
                '../data/food-101-modified/' + folder + '__' + filename
            ])

### Data Prep

In [86]:
path = '../data/food-101-modified//'
filenames = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]

In [60]:
bad_ixs = []

In [61]:
for ix, img in enumerate(image_data):
    if img.shape != (75, 75, 3):
        bad_ixs.append(ix)

In [65]:
image_data = [
    misc.imread(path + img) 
    for ix, img 
    in enumerate(filenames[1:]) 
    if ix not in bad_ixs
]

In [26]:
# swapped_image_data = np.swapaxes(image_data, 3, 1)

In [68]:
data = np.array(image_data)

In [69]:
swapped_image_data = np.swapaxes(data, 3, 1)

In [89]:
labels = [
    f.split('__')[0] 
    for ix, f 
    in enumerate(filenames[1:]) 
    if ix not in bad_ixs
]

In [90]:
print swapped_image_data.shape
print len(labels)

(9507, 3, 75, 75)
9507


In [91]:
X = swapped_image_data
y = pd.factorize(labels)[0][np.newaxis].T

In [92]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=.65)

### Keras

In [93]:
from __future__ import print_function
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import SGD
from keras.utils import np_utils

batch_size = 32
nb_epoch = 5
data_augmentation = True

Using Theano backend.


In [94]:
nb_classes = 10
# input image dimensions
img_rows, img_cols = 75, 75
# the CIFAR10 images are RGB
img_channels = 3

# the data, shuffled and split between train and test sets
# (X_train, y_train), (X_test, y_test) = cifar10.load_data()

print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

X_train shape: (6179, 3, 75, 75)
6179 train samples
3328 test samples


In [95]:
# Create the model
model = Sequential()

model.add(Convolution2D(32, 3, 3, border_mode='same',
                        input_shape=(img_channels, img_rows, img_cols)))
model.add(Activation('relu'))
model.add(Convolution2D(32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Convolution2D(64, 3, 3, border_mode='same'))
model.add(Activation('relu'))
model.add(Convolution2D(64, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))



In [96]:
# let's train the model using SGD + momentum (how original).
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

In [97]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

In [98]:
if not data_augmentation:
    print('Not using data augmentation.')
    model.fit(X_train, Y_train,
              batch_size=batch_size,
              nb_epoch=nb_epoch,
              validation_data=(X_test, Y_test),
              shuffle=True)
else:
    print('Using real-time data augmentation.')

    # this will do preprocessing and realtime data augmentation
    datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False)  # randomly flip images

    # compute quantities required for featurewise normalization
    # (std, mean, and principal components if ZCA whitening is applied)
    datagen.fit(X_train)

    # fit the model on the batches generated by datagen.flow()
    model.fit_generator(datagen.flow(X_train, Y_train,
                        batch_size=batch_size),
                        samples_per_epoch=X_train.shape[0],
                        nb_epoch=nb_epoch,
                        validation_data=(X_test, Y_test))

Using real-time data augmentation.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [99]:
preds = model.predict_classes(X_test, batch_size=32)

