In [22]:
from keras_preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras import regularizers, optimizers
from keras.models import Model, Sequential

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [10]:
data_path = 'data/cifar-10/'
train_path = data_path + 'trainLabels.csv'
test_path = data_path + 'sampleSubmission.csv'

# dfs contain filenames under "id" 
# we need to indicate we want our to load this in as string data
train_df = pd.read_csv(train_path, dtype='str')
test_df = pd.read_csv(test_path, dtype='str')

In [11]:
train_df['label'].value_counts()

automobile    5000
horse         5000
airplane      5000
bird          5000
frog          5000
dog           5000
cat           5000
truck         5000
deer          5000
ship          5000
Name: label, dtype: int64

In [12]:
def append_ext(filename):
    '''
    appends 'png' file extension to a filename (str)
    '''
    return filename+'.png'

In [13]:
train_df['id'] = train_df['id'].apply(append_ext)
test_df['id'] = test_df['id'].apply(append_ext)

In [14]:
train_df.head()

Unnamed: 0,id,label
0,1.png,frog
1,2.png,truck
2,3.png,truck
3,4.png,deer
4,5.png,automobile


In [15]:
datagen = ImageDataGenerator(rescale=1./255.,validation_split=0.2)

In [19]:
train_generator=datagen.flow_from_dataframe(dataframe=train_df,
                                            directory=(data_path+"train/"),
                                            x_col="id",
                                            y_col="label",
                                            subset="training",
                                            batch_size=32,
                                            seed=42,
                                            shuffle=True,
                                            class_mode="categorical",
                                            target_size=(32,32))

valid_generator=datagen.flow_from_dataframe(dataframe=train_df,
                                            directory=(data_path+"train/"),
                                            x_col="id",
                                            y_col="label",
                                            subset="validation",
                                            batch_size=32,
                                            seed=42,
                                            shuffle=True,
                                            class_mode="categorical",
                                            target_size=(32,32))

Found 40000 validated image filenames belonging to 10 classes.
Found 10000 validated image filenames belonging to 10 classes.


In [20]:
testgen=ImageDataGenerator(rescale=1./255.)

test_generator=testgen.flow_from_dataframe(dataframe=test_df,
                                           directory=(data_path+"test/"),
                                           x_col="id",
                                           y_col=None,
                                           batch_size=32,
                                           seed=42,
                                           shuffle=False,
                                           class_mode=None,
                                           target_size=(32,32))

Found 300000 validated image filenames.


In [26]:
model = Sequential()

model.add(Conv2D(32, (3, 3), padding='same', input_shape=(32,32,3)))
model.add(Activation('relu'))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())

model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(10, activation='softmax'))

model.compile(optimizers.RMSprop(lr=0.0001, decay=1e-6),loss="categorical_crossentropy",metrics=["accuracy"])

In [27]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size

In [28]:
model.fit_generator(generator=train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=valid_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=10)

Instructions for updating:
Please use Model.fit, which supports generators.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f88d2423208>

In [30]:
model.fit_generator(generator=train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=valid_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f88bf5f56a0>