In [9]:
import pandas as pd
import cv2
import os
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.python.keras.utils import to_categorical
import numpy as np

print(os.listdir("../input"))

['test', 'train', 'train.csv', 'sample_submission.csv']


In [10]:
dir = "../input/train/train"
def process_picture():
    image_files = []
    data = pd.read_csv('../input/train.csv')
    images = data['id'].values
    labels = []
    for files in images:
        labels.append(data[data['id'] == files]['has_cactus'].values[0])
        files = os.path.join(dir, files)
        image_files.append(files)
    return image_files, labels


def get_images_lables():
    images_files, labels = process_picture()
    images = []
    
    for index, file in enumerate(images_files):
        image = cv2.imread(file)
        images.append(image)
    train_images, test_images, train_labels, test_labels = train_test_split(images, labels,
                                                                           test_size=0.2, random_state=7,
                                                                           shuffle=True)
    train_images = np.array(train_images) / 255
    test_images = np.array(test_images) / 255

    print(train_images.shape)
    
    return train_images, test_images, train_labels, test_labels

In [38]:
%time
train_images, test_images, train_labels, test_labels = get_images_lables()
class_weight = compute_class_weight(class_weight='balanced',
                                        classes=np.unique(train_labels),
                                        y=train_labels)
# train_labels = to_categorical(train_labels, 2)
# test_labels = to_categorical(test_labels, 2)
train_labels = np.asarray(train_labels)
test_labels = np.asarray(test_labels)
print(test_labels.shape)
print(class_weight)

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 27.9 µs
(14000, 32, 32, 3)
(3500,)
[1.99430199 0.66730219]


In [27]:
from tensorflow.python.keras.applications import VGG16
from tensorflow.python.keras.models import Model, Sequential
from tensorflow.python.keras.layers import Flatten, Dropout, Dense, BatchNormalization
from tensorflow.python.keras.optimizers import Adam, SGD
from tensorflow.python.keras.regularizers import l2
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator
from tensorflow.python.keras.models import load_model
from tensorflow.python.keras.layers import Conv2D
from tensorflow.python.keras import callbacks


In [41]:
def bulid_model():
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(32, 32, 3))
    add_model = Sequential()
    add_model.add(Flatten(input_shape=base_model.output_shape[1:]))
    add_model.add(BatchNormalization())
    add_model.add(Dense(256, activation='relu', name='FC1'))
    add_model.add(BatchNormalization())
    add_model.add(Dropout(0.5))
    add_model.add(Dense(128, activation='relu', name='FC2'))
    add_model.add(BatchNormalization())
    add_model.add(Dense(1, activation='sigmoid', name='sigmoid'))
    model = Model(inputs=base_model.input, outputs=add_model(base_model.output))
    model.summary()
    for layer in model.layers:
        layer.trainable = False
    model.trainable = True
    for layer in model.layers:
        # Boolean whether this layer is trainable.
        trainable = ('block5' in layer.name or 'block4' in layer.name)
        # Set the layer's bool.
        layer.trainable = trainable
    return model


def train(batch_size=64, nb_epoch=500):
    model = bulid_model()
    optimizer = Adam(1e-5)
#     optimizer = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)

    model.compile(optimizer=optimizer, metrics=['accuracy'], loss='binary_crossentropy')

    callback=[callbacks.EarlyStopping(monitor='val_acc', patience=20, mode='auto', restore_best_weights=True),
         callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10, mode='auto')]

    train_datagen = ImageDataGenerator(rotation_range=180,
                                       width_shift_range=0.1,
                                       height_shift_range=0.1,
                                       shear_range=0.1,
                                       zoom_range=[0.9, 1.5],
                                       vertical_flip=True,
                                       horizontal_flip=True)
    train_datagen.fit(train_images)
    history = model.fit_generator(train_datagen.flow(train_images, train_labels, batch_size=batch_size),
                                  steps_per_epoch=train_images.shape[0] // batch_size,
                                  epochs=nb_epoch,
                                  validation_data=(test_images, test_labels),
                                  class_weight=class_weight,
                                 callbacks=callback)
    score = model.evaluate(test_images, test_labels)
    print("%s: %.2f%%" % (model.metrics_names[1], score[1] * 100))
    model.save('./test.h5')
    return model, history

In [42]:
model, history = train()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_9 (InputLayer)         (None, 32, 32, 3)         0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 32, 32, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 32, 32, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 16, 16, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 16, 16, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 16, 16, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 8, 8, 128)         0         
__________

Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
acc: 99.66%


In [43]:
def get_test_images():
    images = []
    id = []
    for image in os.listdir('../input/test/test'):
        id.append(image)
        files = os.path.join('../input/test/test', image)
        img = cv2.imread(files)
        images.append(img)
    images = np.asarray(images, dtype=np.float32)
    images = images / 255
    print(images.shape)
    return images, id

In [44]:
def predict(model):
    images, id = get_test_images()
    predict = model.predict(images)
    predict = np.argmax(predict, axis=1)
    print(predict[0])
    sub_df = pd.DataFrame(id, columns=['id'])
    sub_df['has_cactus'] = predict
    sub_df.to_csv('sample_submission.csv', index=False)

In [45]:
%time
predict(model)

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 8.82 µs
(4000, 32, 32, 3)
0
