In [1]:
import os

import tensorflow as tf
import numpy as np
import pandas as pd
from keras.preprocessing.image import ImageDataGenerator

import keras as K
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D

Using TensorFlow backend.


In [2]:
df = pd.read_csv("../data/train.csv")
df.head()

Unnamed: 0,id,label
0,1,9
1,2,0
2,3,0
3,4,3
4,5,0


In [3]:
train_data_dir = "../data/train/"

img_height = 256
img_width = 256
img_height = 32
img_width = 32

channels = 3
input_shape = (img_height, img_width, channels)
target_size = (img_height, img_width)
num_classes = 10
batch_size = 32

train_datagen = ImageDataGenerator(rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=False,
    validation_split=0.2) # set validation split

train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=target_size,
    batch_size=batch_size,
    class_mode="categorical",
    subset='training') # set as training data

validation_generator = train_datagen.flow_from_directory(
    train_data_dir, # same directory as training data
    target_size=target_size,
    batch_size=batch_size,
    class_mode="categorical",
    subset='validation') # set as validation data

Found 48000 images belonging to 10 classes.
Found 12000 images belonging to 10 classes.


In [4]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=K.losses.categorical_crossentropy,
              optimizer=K.optimizers.Adadelta(),
              metrics=['accuracy'])

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [5]:
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=2)
mc = ModelCheckpoint('../model/best_model.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)


In [6]:
nb_epochs = 10

model.fit_generator(
    train_generator,
    steps_per_epoch = train_generator.samples // batch_size,
    validation_data = validation_generator, 
    validation_steps = validation_generator.samples // batch_size,
    epochs = nb_epochs,
    callbacks = [es, mc])


model.save_weights('../model/my_model')

Instructions for updating:
Use tf.cast instead.
Epoch 1/10

Epoch 00001: val_acc improved from -inf to 0.84742, saving model to ../model/best_model.h5
Epoch 2/10

Epoch 00002: val_acc improved from 0.84742 to 0.88067, saving model to ../model/best_model.h5
Epoch 3/10

Epoch 00003: val_acc improved from 0.88067 to 0.88300, saving model to ../model/best_model.h5
Epoch 4/10

Epoch 00004: val_acc did not improve from 0.88300
Epoch 5/10

Epoch 00005: val_acc improved from 0.88300 to 0.89267, saving model to ../model/best_model.h5
Epoch 6/10

Epoch 00006: val_acc did not improve from 0.89267
Epoch 7/10

Epoch 00007: val_acc improved from 0.89267 to 0.89783, saving model to ../model/best_model.h5
Epoch 8/10

Epoch 00008: val_acc improved from 0.89783 to 0.90058, saving model to ../model/best_model.h5
Epoch 9/10

Epoch 00009: val_acc did not improve from 0.90058
Epoch 10/10

Epoch 00010: val_acc did not improve from 0.90058
Epoch 00010: early stopping


In [10]:
model.load_weights('../model/best_model.h5')

test_data_dir = "../data/test/"
test_datagen = ImageDataGenerator(rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=False) # set validation split

test_generator = test_datagen.flow_from_directory(
    test_data_dir,
    target_size=target_size,
    batch_size=1,
    class_mode="categorical",
    shuffle = False)

filenames = test_generator.filenames
nb_samples = len(filenames)

predict = model.predict_generator(test_generator,steps = nb_samples)

Found 10000 images belonging to 1 classes.


In [11]:
filenames[0][5:]

'60001.png'

In [12]:
df_pred = pd.read_csv("../data/test.csv")
df_pred

Unnamed: 0,id
0,60001
1,60002
2,60003
3,60004
4,60005
5,60006
6,60007
7,60008
8,60009
9,60010


In [13]:
predict[1]

array([4.9181293e-05, 6.2052853e-13, 9.9973208e-01, 2.0242119e-07,
       3.7674174e-05, 1.9821087e-11, 1.8080290e-04, 2.9201522e-14,
       2.2562201e-08, 1.7938224e-13], dtype=float32)

In [14]:
predict.shape

(10000, 10)

In [15]:
y_pred = np.argmax(predict, axis = 1)

In [16]:
y_pred.shape

(10000,)

In [17]:
df_pred["label"] = y_pred

In [18]:
df_pred.to_csv("../data/pred.csv", index=False)