In [None]:
# importing dependencies

import pandas as pd
import numpy as np
import cv2
import pickle 
import random
import matplotlib as matplt
import matplotlib.pyplot as plt
# import plotly.express as px
import sklearn
from sklearn.model_selection import train_test_split
from math import sqrt, floor
import os
from os.path import join, dirname
from os import getcwd
from os import listdir


import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras import callbacks
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image

## Set up directory

In [None]:
os.mkdir('/kaggle/working/data')
os.mkdir('/kaggle/working/data/test')
!cp /kaggle/input/plant-seedlings-classification/test/*.png /kaggle/working/data/test

## Model pipeline functions

In [None]:
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

def call_backs(file_name):
  cb = []
  best_cb= callbacks.ModelCheckpoint(file_name, 
                                    monitor='val_loss', 
                                    verbose=1, 
                                    save_best_only=True, 
                                    save_weights_only=False, 
                                    mode='auto', 
                                    save_freq='epoch')
  cb.append(best_cb)
  
  early = EarlyStopping(monitor="val_loss", 
                        mode="min", 
                        patience=15) # times of val loss not improving before stopping
  
  cb.append(early)

  # Reduce LR On Plateau
  lr_reduced = ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, min_delta=1e-5, patience=6, verbose=1)
  # 6 times of val loss not improving -> lower learning rate by 10x 
  
  cb.append(lr_reduced)
  return cb


## Load best model

In [None]:
from keras.models import load_model
best_model = load_model('../input/best-model/InceptionResNetV2_version2_Fold_1.h5')

## predict base on model


In [None]:
def color_segment_function(img_array):
  '''
  Generate a mask image for the images that is compatible to ImageDataGenerator
  :param image: an image read by cv2.imread
  :return result: masked image
  '''
  img_array= np.rint(img_array)
  img_array= img_array.astype('uint8')
  hsv_img= cv2.cvtColor(img_array, cv2.COLOR_RGB2HSV)
  mask = cv2.inRange(hsv_img, (24, 40, 0), (60, 255, 255))
  result = cv2.bitwise_and(img_array, img_array, mask=mask)
  result= result.astype('float64')
  return result

In [None]:
img_size = (299, 299)
test_dir = '/kaggle/working/data'
train_dir = '../input/plant-seedlings-classification/train'

In [None]:
batch_size= 32


image_generator = tf.keras.preprocessing.image.ImageDataGenerator(
        rescale=1./255,
        rotation_range=360,
        width_shift_range=0.3,
        height_shift_range=0.3,
        shear_range=0.3,
        zoom_range=0.5,
        vertical_flip=True,
        horizontal_flip=True,
        validation_split=0.2,
        preprocessing_function = color_segment_function,
    )


train_ds = image_generator.flow_from_directory(
  train_dir,
  shuffle = True,
  target_size=img_size,
  class_mode = 'categorical',
  batch_size=batch_size,
  subset="training")

val_ds = image_generator.flow_from_directory(
  train_dir,
  shuffle = True,
  target_size=(img_size[0], img_size[1]),
  class_mode = 'categorical',
  batch_size=batch_size,
  subset="validation")

# define image data generator
test_datagen = image.ImageDataGenerator(rescale=1./255, preprocessing_function= color_segment_function)

In [None]:
def predict(model, img_size, test_dir, image_generator_test,  csv_file_name="results_best.csv"):
    

    test_generator = image_generator_test.flow_from_directory(
        test_dir,
        target_size=(img_size[0], img_size[1]),
        class_mode = None,
        batch_size=1,
        shuffle=False)

    STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
    test_generator.reset()
    pred=model.predict(test_generator,
    steps=STEP_SIZE_TEST,
    verbose=1)

    predicted_class_indices=np.argmax(pred,axis=1)
    labels = (train_ds.class_indices)
    labels = dict((v,k) for k,v in labels.items())
    predictions = [labels[k] for k in predicted_class_indices]

    filenames=test_generator.filenames
    filenames = [i.split('/')[1] for i in filenames]
    results=pd.DataFrame({"file":filenames,
                          "species":predictions})
    results.to_csv(csv_file_name,index=False)


In [None]:

predict(best_model, img_size, test_dir, test_datagen)

## Fine tuing of model

In [None]:
for layer in best_model.layers:
    flag = False
    if layer.name == 'inception_resnet_v2':
        layer.trainable = True
        print('true')
        for nest_layer in layer.layers:
#             print(nest_layer.name)
            if nest_layer.name == 'conv2d_348':
                flag = True
                print('set to true')
    
            nest_layer.trainable = flag
#             print(nest_layer.trainable)
        
        break
    

In [None]:
best_model.summary()

In [None]:
learning_rate = 0.00001 # initial learning rate

# define the optimizer function
amsgrad= keras.optimizers.Adam(learning_rate= learning_rate, amsgrad=True) 

best_model.compile(optimizer=amsgrad, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
train_steps = train_ds.samples//batch_size
val_steps = val_ds.samples//batch_size

epochs = 20

# cb = save_model(1,1)
cb_ft = call_backs('model_finetune.h5')
history_ft = best_model.fit_generator(
                train_ds,
                steps_per_epoch = train_steps,
                validation_data = val_ds, 
                validation_steps = val_steps,
                epochs = epochs, callbacks=cb_ft)

In [None]:


def plot_model_hist(history, field, legend_pos, title, ylabel, xlabel):
  fig, ax1 = plt.subplots(figsize=(15,5))
  plt.plot(history.history[field])
  plt.plot(history.history['val_'+field])
  plt.legend(['train', 'test'], loc=legend_pos)
  plt.title(title)
  plt.ylabel(ylabel)
  plt.xlabel(xlabel)
  #plt.savefig(pathDir + "/Figures/part_A/" + question +  "_model_loss_" +  str(int(epochs)) + "_" + str(id) + ".png") # save figure
  plt.show()

In [None]:
plot_model_hist(history_ft, 'loss', 'upper right', 'fine tuned, cross entropy against num epochs', 'cross entropy', 'epochs')

In [None]:
plot_model_hist(history_ft, 'accuracy', 'upper left', 'fine tuned, accuracy against num epochs', 'accuracy', 'epochs')

In [None]:
# predict(best_model, img_size, test_dir, test_datagen)
predict(best_model, img_size, test_dir, test_datagen, "results_ft.csv")

## Load fine tune model

In [None]:
from keras.models import load_model
best_model_ft = load_model('model_finetune.h5')

In [None]:
predict(best_model_ft, img_size, test_dir, test_datagen, "results_best_ft.csv")

In [None]:
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_size[0], img_size[1]),
    class_mode = None,
    batch_size=1,
    shuffle=False)

STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
test_generator.reset()
pred=best_model_ft.predict_generator(test_generator,
steps=STEP_SIZE_TEST,
verbose=1)

predicted_class_indices=np.argmax(pred,axis=1)
labels = (train_ds.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

filenames=test_generator.filenames
filenames = [i.split('/')[1] for i in filenames]
results=pd.DataFrame({"file":filenames,
                      "species":predictions})
results.to_csv("results_best_ft.csv",index=False)