In [1]:
#Set path to sample/ path if desired
test_path = 'testing/' #We use all the test data
train_path='train/'
valid_path='valid/'

In [6]:
#import Vgg16 helper class
# vgg = Vgg16()

from keras.models import Model, Sequential
from keras.layers import Flatten, Lambda
from keras.layers import Dense
from keras.layers import Input
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import GlobalAveragePooling2D
from keras.layers import GlobalMaxPooling2D
from keras.optimizers import Adam

import numpy as np

vgg_mean = np.array([123.68, 116.779, 103.939], dtype=np.float32).reshape((1,1,3))
def vgg_preprocess(x):
    """
        Subtracts the mean RGB value, and transposes RGB to BGR.
        The mean RGB was computed on the image set used to train the VGG model.

        Args: 
            x: Image array (height x width x channels)
        Returns:
            Image array (height x width x transposed_channels)
    """
#     x = x - vgg_mean
#     return x[:, ::-1] # reverse axis rgb->bgr
    return x

# img_input = Input(shape=(224, 224, 3))

model = Sequential()
model.add(Lambda(vgg_preprocess, input_shape=(224,224,3), output_shape=(224,224,3)))

model.add(Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1'))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2'))
model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool'))
model.add(Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1'))
model.add(Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2'))
model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool'))
model.add(Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1'))
model.add(Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2'))
model.add(Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3'))
model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool'))
model.add(Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1'))
model.add(Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2'))
model.add(Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3'))
model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool'))
model.add(Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1'))
model.add(Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2'))
model.add(Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3'))
model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool'))
model.add(Flatten(name='flatten'))
model.add(Dense(4096, activation='relu', name='fc1'))
model.add(Dense(4096, activation='relu', name='fc2'))
model.add(Dense(1000, activation='softmax', name='predictions'))
model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lambda_2 (Lambda)            (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [7]:
from keras.utils.data_utils import get_file

file_name = 'vgg16_weights_tf_dim_ordering_tf_kernels.h5'
# file_name = 'vgg16.h5'
file_path = 'http://files.fast.ai/models/'
model.load_weights(get_file(file_name, file_path+file_name, cache_subdir='models'))

In [8]:
for layer in model.layers:
    layer.trainable = False

model.pop()
model.add(Dense(2, activation='softmax', name='predictions'))

model.summary()

model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lambda_2 (Lambda)            (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [9]:
#Set constants. You can experiment with no_of_epochs to improve the model
batch_size=32
no_of_epochs=1

In [10]:
from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

train_data = datagen.flow_from_directory(train_path, target_size=(224,224),
                class_mode='categorical', shuffle=True, batch_size=batch_size)

valid_data = ImageDataGenerator().flow_from_directory(valid_path, target_size=(224,224),
                class_mode='categorical', shuffle=True, batch_size=batch_size)

Found 23000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.


In [12]:
import keras
from keras import backend as K
# K.clear_session()
    
for epoch in range(no_of_epochs):
    print("Running epoch:", epoch)
    steps_per_epoch = int(train_data.samples/train_data.batch_size)

    if steps_per_epoch == 0:
        steps_per_epoch = 1

    validation_steps = int(valid_data.samples/valid_data.batch_size)

    if validation_steps == 0:
        validation_steps = 1

    model.fit_generator(train_data, 
                             steps_per_epoch=steps_per_epoch,
                             epochs=1,
                             validation_data=valid_data, 
                             validation_steps=validation_steps)
    

model.save_weights('ft2.h5')     
print("Completed", no_of_epochs, "fit operations")

Running epoch: 0
Epoch 1/1
Completed 1 fit operations


In [None]:
model.load_weights('ft1.h5')

In [15]:
from keras.preprocessing.image import ImageDataGenerator
import pandas as pd

test_path = "testing/"

gen = ImageDataGenerator()
images = gen.flow_from_directory(test_path, target_size=(224,224), class_mode='categorical', shuffle = False)
predict = model.predict_generator(images, images.samples // images.batch_size + 1)

submission = pd.DataFrame()
submission['label'] = predict[0:, 1]

test_list = []

for fn in range(len(images.filenames)):
    test_list.append(images.filenames[fn].replace('test1\\', '').replace('.jpg', ''))
    
submission['id'] = pd.to_numeric(test_list)

submission.sort_values(["id"], inplace = True)

submission['label'][submission['label'] < 0.05] = 0.05
submission['label'][submission['label'] > 0.95] = 0.95

submission.to_csv('submission.csv', columns = ["id", "label"], index = False)


Found 12500 images belonging to 1 classes.


KeyboardInterrupt: 