In [1]:
import os

import tensorflow as tf
import numpy as np
import pandas as pd
from keras.preprocessing.image import ImageDataGenerator

import keras as K
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D

Using TensorFlow backend.


In [2]:
df = pd.read_csv("../data/train.csv")
df.head()

Unnamed: 0,filename,label
0,0.png,4
1,1.png,9
2,2.png,1
3,3.png,7
4,4.png,3


In [12]:
train_data_dir = "../data/train/"

img_height = 256
img_width = 256
img_height = 16
img_width = 16

channels = 3
input_shape = (img_height, img_width, channels)
target_size = (img_height, img_width)
num_classes = 10
batch_size = 32

train_datagen = ImageDataGenerator(rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=False,
    validation_split=0.2) # set validation split

train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=target_size,
    batch_size=batch_size,
    class_mode="categorical",
    subset='training') # set as training data

validation_generator = train_datagen.flow_from_directory(
    train_data_dir, # same directory as training data
    target_size=target_size,
    batch_size=batch_size,
    class_mode="categorical",
    subset='validation') # set as validation data

Found 39205 images belonging to 10 classes.
Found 9795 images belonging to 10 classes.


In [14]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=K.losses.categorical_crossentropy,
              optimizer=K.optimizers.Adadelta(),
              metrics=['accuracy'])

In [13]:
checkpoint_path = "../model/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create checkpoint callback
cp_callback = K.callbacks.ModelCheckpoint(checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)


In [15]:
nb_epochs = 10

model.fit_generator(
    train_generator,
    steps_per_epoch = train_generator.samples // batch_size,
    validation_data = validation_generator, 
    validation_steps = validation_generator.samples // batch_size,
    epochs = nb_epochs,
    callbacks = [cp_callback])


model.save_weights('../model/my_model')

Epoch 1/10

Epoch 00001: saving model to ../model/cp.ckpt
Epoch 2/10

Epoch 00002: saving model to ../model/cp.ckpt
Epoch 3/10

Epoch 00003: saving model to ../model/cp.ckpt
Epoch 4/10

Epoch 00004: saving model to ../model/cp.ckpt
Epoch 5/10

Epoch 00005: saving model to ../model/cp.ckpt
Epoch 6/10

Epoch 00006: saving model to ../model/cp.ckpt
Epoch 7/10

Epoch 00007: saving model to ../model/cp.ckpt
Epoch 8/10

Epoch 00008: saving model to ../model/cp.ckpt
Epoch 9/10

Epoch 00009: saving model to ../model/cp.ckpt
Epoch 10/10

Epoch 00010: saving model to ../model/cp.ckpt


In [19]:
test_data_dir = "../data/Images/test"
test_datagen = ImageDataGenerator(rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=False) # set validation split

test_generator = test_datagen.flow_from_directory(
    test_data_dir,
    target_size=target_size,
    batch_size=1,
    class_mode="categorical",
    shuffle = False)

filenames = test_generator.filenames
nb_samples = len(filenames)

predict = model.predict_generator(test_generator,steps = nb_samples)

Found 21000 images belonging to 1 classes.


In [22]:
filenames[0][5:]

'49000.png'

In [27]:
df_pred = pd.read_csv("../data/test.csv")
df_pred

Unnamed: 0,filename
0,49000.png
1,49001.png
2,49002.png
3,49003.png
4,49004.png
5,49005.png
6,49006.png
7,49007.png
8,49008.png
9,49009.png


In [25]:
predict[1]

array([9.9997640e-01, 1.6430181e-07, 9.8705132e-06, 3.2868926e-11,
       9.4605575e-06, 7.2530306e-12, 3.5988501e-07, 1.6123521e-07,
       1.6211862e-06, 1.9946049e-06], dtype=float32)

In [30]:
predict.shape

(21000, 10)

In [32]:
y_pred = np.argmax(predict, axis = 1)

In [33]:
y_pred.shape

(21000,)

In [34]:
df_pred["label"] = y_pred

In [37]:
df_pred.to_csv("../data/pred.csv", index=False)