In [57]:
from __future__ import division
from __future__ import print_function
import cv2, os
import numpy as np
import pandas as pd
import datetime
import keras

from keras.layers import Flatten, Dense, Conv2D ,Dropout, MaxPooling2D, AveragePooling2D
from keras.layers.advanced_activations import LeakyReLU, PReLU
from keras.optimizers import Adam
from keras.models import Sequential, Model
from keras.layers.normalization import BatchNormalization
from keras.applications.vgg16 import VGG16
from keras.applications.inception_v3 import InceptionV3

from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from keras.models import load_model

import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.image as mpimg
from sklearn.model_selection import train_test_split
import csv
from PIL import Image
%matplotlib inline
from jupyterthemes import jtplot
from glob import glob
import h5py

def get_classes():
    return ['Black-grass',
            'Charlock',
            'Cleavers',
            'Common Chickweed',
            'Common wheat',
            'Fat Hen',
            'Loose Silky-bent',
            'Maize',
            'Scentless Mayweed',
            'Shepherds Purse',
            'Small-flowered Cranesbill',
            'Sugar beet']

In [58]:
def create_mask_for_plant(image):
    image_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

    sensitivity = 35
    lower_hsv = np.array([60 - sensitivity, 100, 50])
    upper_hsv = np.array([60 + sensitivity, 255, 255])

    mask = cv2.inRange(image_hsv, lower_hsv, upper_hsv)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (11,11))
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
    
    return mask

def segment_plant(image):
    mask = create_mask_for_plant(image)
    output = cv2.bitwise_and(image, image, mask = mask)
    return output

def sharpen_image(image):
    image_blurred = cv2.GaussianBlur(image, (0, 0), 3)
    image_sharp = cv2.addWeighted(image, 1.5, image_blurred, -0.5, 0)
    return image_sharp

In [59]:
def get_sharp_image(image):
    image = image.astype('uint8')
    image_mask = create_mask_for_plant(image)
    image_segmented = segment_plant(image)
    image_sharpen = sharpen_image(image_segmented)
    return image_sharpen

In [60]:
def normalize(x):
    x = np.array(x, np.float32) / 255.
    #x = x.transpose((0, 1, 2, 3))
    print("Shape:", x.shape)
    return x

def get_image(path, img_width=48, img_height=48):
    img = cv2.imread(path)
    return cv2.resize(img, (img_width, img_height), fx=0.5, fy=0.5, interpolation=cv2.INTER_AREA)

def load_train(path, categories, img_width, img_height):
    X_train, y_train, filenames = [], [], []
    for folder in categories:
        idx = categories.index(folder)
        print("ID {}: Load {}".format(idx, folder))
        fullpath = os.path.join(path, folder)
        for fl in os.listdir(fullpath):
            filename = os.path.basename(fl)
            img_path = os.path.join(fullpath, filename)
            img = get_image(img_path, img_width, img_height)
            img = get_sharp_image(img)
            X_train.append(img)
            filenames.append(filename)
            y_train.append(idx)
    return X_train, y_train, filenames

def load_test(path, img_width, img_height):
    X, filenames = [], []
    for fl in sorted(os.listdir(path)):
        filename = os.path.basename(fl)
        img_path = os.path.join(path, filename)
        img = get_image(img_path, img_width, img_height)
        img = get_sharp_image(img)
        X.append(img)
        filenames.append(filename)
    return X, filenames

def get_train(path, categories, img_width, img_height):
    X_train, y_train, filenames = load_train(path, categories, img_width, img_height)
    X_train = normalize(X_train)
    y_train = np.array(y_train, dtype=np.uint8)
    return X_train, y_train, filenames

def get_test(path, img_width, img_height):
    X_test, filenames = load_test(path, img_width, img_height)
    X_test = normalize(X_test)
    return X_test, filenames

def create_submission(preds, ids, output_path="./", filename="test", isSubmission=False):
    df = pd.DataFrame({"file": pd.Series(ids), "species": pd.Series(preds)})
    csvfile = filename
    if isSubmission:
        now = datetime.datetime.now()
        csvfile = "submission_" + filename + "_" + str(now.strftime("%Y-%m-%d-%H-%M"))
    df.to_csv(output_path + csvfile + ".csv", index=False)
    return df
    
def to_csv_ens(preds_true, preds, ids, classes, output_path="./", filename="test"):
    df1 = pd.DataFrame({"file": pd.Series(ids), "species": pd.Series(preds_true)})
    df2 = pd.DataFrame(preds, columns=classes)
    df = pd.concat([df1, df2], axis=1)
    df.to_csv(output_path + filename + ".csv", index=False)
    return df

In [61]:
def create_conv_model(model):
    last_conv_idx = [i for i, layer in enumerate(model.layers) if type(layer) is Conv2D][-1]
    layers = model.layers[:last_conv_idx+1]
    return Model(inputs=model.input, outputs=layers[-1].output)

def stack_on_top(p, model):
    inp = model.output
    x = MaxPooling2D()(inp)
    x = BatchNormalization(axis=1)(x)
    x = Dropout(p / 4)(x)
    x = Flatten()(x)
    x = Dense(512, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(p)(x)
    x = Dense(512, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(p / 2)(x)
    y = Dense(len(get_classes()), activation='softmax')(x)
    model = Model(inputs=[model.input], outputs=[y])
    return model

def get_VGG16(input_shape=(128, 128, 3)):
    model = keras.applications.inception_resnet_v2.InceptionResNetV2(include_top=False, weights='imagenet', input_tensor=None, input_shape=input_shape, pooling='avg', classes=1000)

#     model = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)
    model = create_conv_model(model)
    model = stack_on_top(0.6, model)
    optimizer = Adam(1e-4)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

In [62]:
# Seed
SEED = 42
np.random.seed(SEED)
# Input and  dimensions
img_width, img_height = (150, 150)
input_shape = (img_width, img_height, 3)
# Modelname
modelname = 'VGG19'

In [63]:
def get_callbacks(path):
    early_stop = EarlyStopping('val_loss', patience=5, mode="min")
    model_ckpt = ModelCheckpoint(path, save_best_only=True)
    return [early_stop, model_ckpt]

def image_augmetation(X, y, batch_size=32):
    datagen = ImageDataGenerator(horizontal_flip=True, vertical_flip=True,
                                 width_shift_range=0.1, height_shift_range=0.1,
                                 zoom_range=0.1, rotation_range=90)
    datagen.fit(X)
    return datagen.flow(X, y, batch_size=batch_size, seed=SEED)

In [None]:
def train(X, y, epochs=1, batch_size=32):
    # y categorical
    y_true = np_utils.to_categorical(y, len(get_classes()))

    # Split train/test data
    trX, teX, trY, teY = train_test_split(X, y_true, test_size=0.20, random_state=SEED)

    # Image augmentation
    gen = image_augmetation(trX, trY)

    # Create model
    model = get_VGG16(input_shape)

    # Fit model
    model.fit(X, y_true, epochs=epochs, batch_size=batch_size, shuffle=True, verbose=1)

    # Fit model (generator)
#     try:
#         model.fit_generator(gen, epochs=epochs,
#               steps_per_epoch=len(X)/batch_size,
#               validation_data=(teX, teY),
#               callbacks=get_callbacks(path='models/' + modelname + '-{epoch:02d}-{val_loss:.3f}' +'.h5')
#               )
#         # Save model
#         model.save('models/' + modelname + '.h5')
#     except:
#         # Save model on keyboard abort
#         model.save('models/' + modelname + '_OnExit' + '.h5')

    print("Model saved.")
    return model

In [None]:
# Get label encoder
lb = LabelBinarizer()
lbenc = lb.fit(get_classes())

# Get train data
X_train, y_train, train_filenames = get_train('train', list(lbenc.classes_), img_width, img_height)


# Create and train model
model = train(X_train, y_train, epochs=30, batch_size=32)

print("+++++++++++++++++++++++++++++++++++++++++++")


ID 0: Load Black-grass
ID 1: Load Charlock
ID 2: Load Cleavers
ID 3: Load Common Chickweed
ID 4: Load Common wheat
ID 5: Load Fat Hen
ID 6: Load Loose Silky-bent
ID 7: Load Maize


In [None]:
# Get test data
X_test, X_test_id = get_test('test', img_width, img_height)
# Predict on test data
preds = model.predict(X_test, verbose=1)

In [None]:
create_submission(lbenc.inverse_transform(preds), X_test_id, output_path="submissions/", filename='8', isSubmission=True)
print('Finished.')