In [None]:
import os
import numpy as np

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import *

import pandas as pd

In [None]:
DATA_DIR   = os.path.join(os.getcwd(), "data")
OUTPUT_DIR = os.path.join(os.getcwd(), "output")

# Utility functions
Path     = os.path.join
DataPath = lambda path: os.path.join(DATA_DIR, path)

def mkdir_p(directory):
    if not os.path.exists(directory):
        os.mkdir(directory)

In [None]:
img_width, img_height = 150, 150

# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
            rescale=1./255,
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True,
            validation_split=0.2)

train_generator = train_datagen.flow_from_directory(
        Path(DATA_DIR, 'verified'),
        color_mode='rgb',
        target_size=(img_width, img_height),
        batch_size=64,
        class_mode='categorical',
        subset='training'
)

valid_generator = train_datagen.flow_from_directory(
        Path(DATA_DIR, 'verified'),
        color_mode='rgb',
        target_size=(img_width, img_height),
        batch_size=64,
        class_mode='categorical',
        subset='validation'
)

# Split using https://stackoverflow.com/questions/42443936/keras-split-train-test-set-when-using-imagedatagenerator
print("Label codes : ", train_generator.class_indices)
print("Training data   : ", train_generator.n)
print("Validation data : ", valid_generator.n)

### RESNET

In [None]:
from tensorflow.keras.applications.resnet50 import ResNet50
model = ResNet50(include_top=False, weights="imagenet", input_shape=(img_width, img_height, 3))
# model.summary()
top_model = Sequential([
    Flatten(input_shape=model.output_shape[1:]),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(5, activation='softmax')
])
seq = Sequential([model, top_model])
# Compile with Binary Crossentropy loss and SGD optimizer
seq.compile(loss='categorical_crossentropy', optimizer=SGD(lr=1e-4, momentum=0.9), metrics=['accuracy'])
# Train for 5 epochs
seq.fit_generator(train_generator, epochs=5, validation_data=valid_generator)

output = seq.predict_generator(test_generator)
fids = map(lambda x: os.path.basename(x).split('.')[0], test_generator.filepaths)

df = pd.DataFrame(output, index=fids, columns=train_generator.class_indices)
df.index.name = 'id'

submission_format = pd.read_csv(Path(OUTPUT_DIR, 'submission_format.csv'), index_col='id')
df = df.reindex(submission_format.index)
df.to_csv(Path(OUTPUT_DIR, "resnet-submission.csv"))

In [None]:
!dd-sub-valid output/submission_format.csv output/resnet-submission.csv

### INCEPTION

In [None]:
from tensorflow.keras.applications.inception_v3 import InceptionV3
model = InceptionV3(include_top=False, weights='imagenet', input_shape=(img_width, img_height, 3))
# model.summary()
top_model = Sequential([
    Flatten(input_shape=model.output_shape[1:]),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(5, activation='softmax')
])
seq = Sequential([model, top_model])
# Compile with Binary Crossentropy loss and SGD optimizer
seq.compile(loss='categorical_crossentropy', optimizer=SGD(lr=1e-4, momentum=0.9), metrics=['accuracy'])
# Train for 5 epochs
seq.fit_generator(train_generator, epochs=5, validation_data=valid_generator)

output = seq.predict_generator(test_generator)
fids = map(lambda x: os.path.basename(x).split('.')[0], test_generator.filepaths)

df = pd.DataFrame(output, index=fids, columns=train_generator.class_indices)
df.index.name = 'id'

submission_format = pd.read_csv(Path(OUTPUT_DIR, 'submission_format.csv'), index_col='id')
df = df.reindex(submission_format.index)
df.to_csv(Path(OUTPUT_DIR, "inception-submission.csv"))

In [None]:
!dd-sub-valid output/submission_format.csv output/inception-submission.csv

### VGG19

In [None]:
from tensorflow.keras.applications.vgg19 import VGG19
model = VGG19(include_top=False, weights='imagenet', input_shape=(img_width, img_height, 3))
# model.summary()
top_model = Sequential([
    Flatten(input_shape=model.output_shape[1:]),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(5, activation='softmax')
])
seq = Sequential([model, top_model])
# Compile with Binary Crossentropy loss and SGD optimizer
seq.compile(loss='categorical_crossentropy', optimizer=SGD(lr=1e-4, momentum=0.9), metrics=['accuracy'])
# Train for 5 epochs
seq.fit_generator(train_generator, epochs=5, validation_data=valid_generator)

output = seq.predict_generator(test_generator)
fids = map(lambda x: os.path.basename(x).split('.')[0], test_generator.filepaths)

df = pd.DataFrame(output, index=fids, columns=train_generator.class_indices)
df.index.name = 'id'

submission_format = pd.read_csv(Path(OUTPUT_DIR, 'submission_format.csv'), index_col='id')
df = df.reindex(submission_format.index)
df.to_csv(Path(OUTPUT_DIR, "vgg19-submission.csv"))

In [None]:
!dd-sub-valid output/submission_format.csv output/vgg19-submission.csv

### RESNET15V2

In [None]:
from tensorflow.keras.applications.resnet_v2 import ResNet152V2
model =  ResNet152V2(include_top=False, weights='imagenet', input_shape=(img_width, img_height, 3))
# model.summary()
top_model = Sequential([
    Flatten(input_shape=model.output_shape[1:]),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(5, activation='softmax')
])
seq = Sequential([model, top_model])
# Compile with Binary Crossentropy loss and SGD optimizer
seq.compile(loss='categorical_crossentropy', optimizer=SGD(lr=1e-4, momentum=0.9), metrics=['accuracy'])
# Train for 5 epochs
seq.fit_generator(train_generator, epochs=5, validation_data=valid_generator)

output = seq.predict_generator(test_generator)
fids = map(lambda x: os.path.basename(x).split('.')[0], test_generator.filepaths)

df = pd.DataFrame(output, index=fids, columns=train_generator.class_indices)
df.index.name = 'id'

submission_format = pd.read_csv(Path(OUTPUT_DIR, 'submission_format.csv'), index_col='id')
df = df.reindex(submission_format.index)
df.to_csv(Path(OUTPUT_DIR, "resnet15v2-submission.csv"))

In [None]:
!dd-sub-valid output/submission_format.csv output/resnet15v2-submission.csv

### XCEPTION

In [None]:
from tensorflow.keras.applications.xception import Xception
model = Xception(include_top=False, weights='imagenet', input_shape=(img_width, img_height, 3))
# model.summary()
top_model = Sequential([
    Flatten(input_shape=model.output_shape[1:]),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(5, activation='softmax')
])
seq = Sequential([model, top_model])
# Compile with Binary Crossentropy loss and SGD optimizer
seq.compile(loss='categorical_crossentropy', optimizer=SGD(lr=1e-4, momentum=0.9), metrics=['accuracy'])
# Train for 5 epochs
seq.fit_generator(train_generator, epochs=5, validation_data=valid_generator)

output = seq.predict_generator(test_generator)
fids = map(lambda x: os.path.basename(x).split('.')[0], test_generator.filepaths)

df = pd.DataFrame(output, index=fids, columns=train_generator.class_indices)
df.index.name = 'id'

submission_format = pd.read_csv(Path(OUTPUT_DIR, 'submission_format.csv'), index_col='id')
df = df.reindex(submission_format.index)
df.to_csv(Path(OUTPUT_DIR, "xception-submission.csv"))

In [None]:
!dd-sub-valid output/submission_format.csv output/xception-submission.csv