In [None]:
#Data management
import numpy as np
import pandas as pd
from collections import Counter
from sklearn.model_selection import train_test_split
import cv2
import re
import string
import os

#Model management
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers,models
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping
import torch
import joblib

#data visualize
import matplotlib.pyplot as plt
import seaborn as sns

#image
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Prepare data

* **Input** : Path , setting etc.

* **output**: train_data , test_data , train_label

In [None]:
class setting:
    amount_data = 10000
    train_folder = "../input/resized-plant2021/img_sz_256"
    test_folder = "../input/resized-plant2021/img_sz_256"
    x_col = "image"
    y_col = "labels"
    target_size = (128,128)
    batch_size = 32
    seed = 57

In [None]:
train_df = pd.read_csv("../input/plant-pathology-2021-fgvc8/train.csv")[:setting.amount_data]

train_datagen = ImageDataGenerator(rescale = 1/255.,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    validation_split = 0.10,
    zoom_range = 0.2,
    shear_range = 0.2,
    vertical_flip = False)

train_ds = train_datagen.flow_from_dataframe(
    train_df,
    directory = setting.train_folder,
    x_col = setting.x_col,
    y_col = setting.y_col,
    target_size = setting.target_size,
    class_mode='categorical',
    batch_size = setting.batch_size,
    subset = "training",
    shuffle = True,
    seed = setting.seed,
    validate_filenames = False
)

validate_ds = train_datagen.flow_from_dataframe(
    train_df,
    directory = setting.train_folder,
    x_col = setting.x_col,
    y_col = setting.y_col,
    target_size = setting.target_size,
    class_mode='categorical',
    batch_size = setting.batch_size,
    subset = "validation",
    shuffle = True,
    seed = setting.seed,
    validate_filenames = False
)

test

In [None]:
test_df = pd.read_csv("../input/plant-pathology-2021-fgvc8/train.csv")[setting.amount_data:]
test_df.index = range(len(test_df))

test_datagen = ImageDataGenerator(rescale = 1/255.)
test_ds = test_datagen.flow_from_dataframe(
    test_df,
    directory = setting.test_folder,
    x_col = "image",
    y_col = "labels",
    target_size = setting.target_size,
    class_mode= 'categorical',
)

# Get Models

* Input : -

* output: model


In [None]:
def get_VGG():
    model = keras.Sequential()
    model.add(layers.Conv2D(64,(3,3), padding="same", activation="relu", input_shape=(*setting.target_size,3)))
    model.add(layers.Conv2D(64,(3,3), padding="same", activation="relu"))
    model.add(layers.MaxPool2D())
    model.add(layers.Conv2D(128, (3,3), padding="same", activation="relu"))
    model.add(layers.Conv2D(128, (3,3), padding="same", activation="relu"))
    model.add(layers.MaxPool2D())
    model.add(layers.Conv2D(256, (3,3), padding="same", activation="relu"))
    model.add(layers.Conv2D(256, (3,3), padding="same", activation="relu"))
    model.add(layers.Conv2D(256, (3,3), padding="same", activation="relu"))
    model.add(layers.MaxPool2D())
    model.add(layers.Conv2D(512, (3,3), padding="same", activation="relu"))
    model.add(layers.Conv2D(512, (3,3), padding="same", activation="relu"))
    model.add(layers.Conv2D(512, (3,3), padding="same", activation="relu"))
    model.add(layers.MaxPool2D())
    model.add(layers.Conv2D(512, (3,3), padding="same", activation="relu"))
    model.add(layers.Conv2D(512, (3,3), padding="same", activation="relu"))
    model.add(layers.Conv2D(512, (3,3), padding="same", activation="relu"))
    model.add(layers.MaxPool2D())
    ######
    model.add(layers.Flatten())
    model.add(layers.Dense(4096,activation="relu"))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(2048,activation="relu"))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(12, activation="softmax"))
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0001), 
                  loss='categorical_crossentropy',
                  metrics=["accuracy"])
    return model

def get_CNN():
    model=keras.Sequential()
    model.add(layers.Conv2D(64,(3,3),activation='relu',padding='same',input_shape=(*setting.target_size,3)))
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(64,(3,3),activation='relu',padding='same'))
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(64,(3,3),activation='relu',padding='same'))
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(128,(3,3),activation='relu',padding='same'))
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Flatten())
    model.add(layers.Dense(12,activation='softmax'))

    # Compile the Model
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='categorical_crossentropy',
        metrics=['accuracy'])
    return model

In [None]:
model = get_VGG()

# FineTune model and submission

* **Input** : 1. train_x, 2. train_y , 3. test_x, 4. model , 5. relabels_dict

* **output**: submission


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, CSVLogger
def fit_model(train_x,train_y,model):
    ES = EarlyStopping(monitor='val_loss', 
                       patience=5, 
                       verbose=1,
                       min_delta=0.001,
                       restore_best_weights=True)
    model.fit(train_x,train_y,
              epochs=30,
              validation_split=0.05,
              callbacks = [ES])

def fit_generator_model(train_ds,validate_ds,model):

    ES = EarlyStopping(monitor='val_loss', patience=10, verbose=1,min_delta=0.001,restore_best_weights=True)
    callbacks=[ES]
    
    model.fit(train_ds,
              epochs=30,
              validation_data =validate_ds,
              callbacks = callbacks)


    
    
def get_submission(test_x,submission_id,model):
    Ztest = model.predict(test_x).argmax(axis=1)
    Ztest = pd.Series(Ztest,name="labels")
    ID = submission_id
    submission = pd.concat([ID,Ztest],axis=1)
    print(submission)
    submission.to_csv("submission.csv",index=False)
    return submission

In [None]:
fit_generator_model(train_ds,validate_ds,model)

In [None]:
model.evaluate(test_ds)

In [None]:
submission = get_submission(test_ds,test_df["image"],model)