In [None]:
# essential imports, to use in the notebook
from fastai import *
from fastai.vision import *
import matplotlib.pyplot as plt
from sklearn import metrics
import numpy as np
import pandas as pd
import cv2
from glob import glob

In [None]:
# copy input data from Kaggle into current working directory
!rm -rf input
!cp -r ../input .

In [None]:
# define the path for the training data
path = Path('input/train/')

# variable to store train data directory
TRAIN_DATA_DIR = 'input/train'

In [None]:
# data augmentation for segmenting the images
# the code below provides the functions for the same

def create_mask_for_plant(image):
    # read image, change to HSV
    image_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    sensitivity = 35
    
    # Get pixels from image in the following (Hue, Saturation, Lightness) range
    lower_hsv = np.array([60 - sensitivity, 100, 50])
    upper_hsv = np.array([60 + sensitivity, 255, 255])
    
    # define mask
    mask = cv2.inRange(image_hsv, lower_hsv, upper_hsv)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (11,11))
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
    
    return mask


def segment_plant(image):
    # apply mask to segment the images
    mask = create_mask_for_plant(image)
    output = cv2.bitwise_and(image, image, mask = mask)
    return output

In [None]:
# apply data augmentation functions defined earlier
for class_folder_name in os.listdir(TRAIN_DATA_DIR):
    
    # define class folder path, that is, the label
    class_folder_path = os.path.join(TRAIN_DATA_DIR, class_folder_name)
    
    # for every image in a class, apply augmentations and save in same directory
    for image_path in glob(os.path.join(class_folder_path, "*.png")):
        image = cv2.imread(image_path, cv2.IMREAD_COLOR)
        image = segment_plant(image)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        cv2.imwrite(image_path, image)

In [None]:
# apply same data augmentations for test images
for image_path in glob(os.path.join("input/test", "*.png")):    
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
    image = segment_plant(image)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    cv2.imwrite(image_path, image)

In [None]:
# define a random seed for all image transformations
np.random.seed(123)

# create an image data bunch
image_data_bunch = ImageDataBunch.from_folder(path,test = '../test', ds_tfms = get_transforms(), valid_pct = 0.25, size = 299, bs = 16, num_workers = 0)
image_data_bunch.normalize(imagenet_stats)

In [None]:
# print classes, for troubleshooting
print(image_data_bunch.classes)
len(image_data_bunch.classes),image_data_bunch.c

In [None]:
# show a batch of images
image_data_bunch.show_batch(rows = 2, figsize = (7,6))

In [None]:
# create the convolutional neural network model, with resnet 50 weights
model = create_cnn(image_data_bunch, models.resnet50, metrics = error_rate)

In [None]:
# find a suitable learning rate and graph it
model.lr_find()
model.recorder.plot()

In [None]:
# train the model based on learning rate decided above
model.fit_one_cycle(10, slice(5e-2))

In [None]:
# save model checkpoint
model.save('version-1')

In [None]:
# find a suitable learning rate and graph it
model.lr_find()
model.recorder.plot()

In [None]:
# unfreeze model, train a bit more
model.unfreeze()
model.fit_one_cycle(10, slice(9e-7))

In [None]:
# generate new image data bunch, with images of new size
image_data_bunch_new = ImageDataBunch.from_folder(path,test = '../test', ds_tfms = get_transforms(), valid_pct = 0.25, size = 350, bs = 16, num_workers = 0)
image_data_bunch_new.normalize(imagenet_stats)

In [None]:
# define a classification interpreter, based on model trained earlier
interpreter = ClassificationInterpretation.from_learner(model)

# print classification report
print(metrics.classification_report(interpreter.y_true.numpy(), interpreter.pred_class.numpy(),target_names = image_data_bunch_new.classes))

In [None]:
# save model checkpoint
model.save('version-2')

In [None]:
# assign new data bunch to the model
model.data=image_data_bunch_new

In [None]:
# unfreeze model train further
model.unfreeze()
model.fit_one_cycle(10, max_lr = slice(1e-5,1e-4))

In [None]:
# get predictions on the test data
predictions,y = model.get_preds(ds_type = DatasetType.Test)

In [None]:
predictions = np.argmax(predictions, axis = 1)
prediction_classes = [image_data_bunch_new.classes[i] for i in predictions]

In [None]:
# store the results in a file, for submission
prediction_file = pd.DataFrame({ 'file': os.listdir('input/test'), 'species': prediction_classes })
prediction_file.to_csv('results.csv', index=False)