In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


import os
import cv2
import time
import random
import itertools
import tensorflow as tf
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.style as style
import tensorflow_hub as hub

from PIL import Image
from datetime import datetime
from tensorflow.keras import layers
from tensorflow.keras.preprocessing import image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session



In [None]:
train_images_path ='../input/plant-pathology-2021-fgvc8/train_images/'
test_images_path = '../input/plant-pathology-2021-fgvc8/test_images/'
train = pd.read_csv('../input/plant-pathology-2021-fgvc8/train.csv', dtype=str)
train.head()

In [None]:
train.shape

In [None]:
# Get label frequencies in descending order
label_freq = train['labels'].apply(lambda s: str(s)).explode().value_counts().sort_values(ascending=False)

# Bar plot
style.use("fivethirtyeight")
plt.figure(figsize=(12,10))
sns.barplot(y=label_freq.index.values, x=label_freq, order=label_freq.index)
plt.title("Label frequency", fontsize=14)
plt.xlabel("")
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.show()

In [None]:
label_freq

In [None]:
# Transform labels into a list of labels
train['new_labels'] = train['labels'].apply(lambda s: [l for l in str(s).split()])
train.head()

In [None]:
train_paths = [os.path.join(train_images_path, str(f)) for f in train['image']]
corresponding_labels = [f for f in train['new_labels']]

In [None]:
nobs = 14 # Maximum number of images to display
ncols = 4 # Number of columns in display
nrows = nobs//ncols # Number of rows in display

style.use("default")
plt.figure(figsize=(12,2*nrows))
for i in range(nrows*ncols):
    ax = plt.subplot(nrows, ncols, i+1)
    plt.imshow(Image.open(train_paths[i]))
    plt.title(corresponding_labels[i], size=10)
    plt.axis('off')

In [None]:
#Label Encoding
# Fit the multi-label binarizer on the training set
print("Labels:")
mlb = MultiLabelBinarizer()
mlb.fit(train['new_labels'])

# Loop over all labels and show them
N_LABELS = len(mlb.classes_)
for (i, label) in enumerate(mlb.classes_):
    print("{}. {}".format(i, label))

In [None]:
# transform the new_labels to one-hot encoding 
df = pd.DataFrame(mlb.fit_transform(train['new_labels']),columns=mlb.classes_)
# Place the DataFrames side by side
new_df = pd.concat([train,df],axis=1)

In [None]:
new_df

In [None]:
columns = list(mlb.classes_)
columns

In [None]:
IMG_SIZE = 224 # Specify height and width of image to match the input format of the model
CHANNELS = 3 # Keep RGB color channels to match the input format of the model

In [None]:
base_gen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255.,
                                                           validation_split=0.2,
                                                           samplewise_center=True,
                                                           samplewise_std_normalization=True,
                                                           horizontal_flip=True, 
                                                           vertical_flip=False,
                                                           height_shift_range=0.05,
                                                           width_shift_range=0.1,
                                                           #rotation_range=20,
                                                           shear_range=0.1,
                                                           fill_mode='reflect',
                                                           zoom_range=0.15)

train_gen=base_gen.flow_from_dataframe(dataframe=new_df,
                                             directory=train_images_path,
                                             x_col='image',
                                             y_col='new_labels',
                                             batch_size=32,
                                             seed=42,
                                             shuffle=True,
                                             #class_mode='raw',
                                             class_mode='categorical',
                                             classes=columns,
                                             target_size=(IMG_SIZE,IMG_SIZE),
                                             subset='training')


valid_gen=base_gen.flow_from_dataframe(dataframe=new_df,
                                             directory=train_images_path,
                                             x_col='image',
                                             y_col='new_labels',
                                             batch_size=32,
                                             seed=42,
                                             shuffle=True,
                                             #class_mode='raw',
                                             class_mode='categorical',
                                             classes=columns,
                                             target_size=(IMG_SIZE,IMG_SIZE),
                                             subset='validation')

In [None]:
# feature_extractor_url ='https://tfhub.dev/google/imagenet/resnet_v2_50/feature_vector/4'
# feature_extractor_layer = hub.KerasLayer(feature_extractor_url,
#                                          input_shape=(IMG_SIZE,IMG_SIZE,CHANNELS),
#                                         trainable=False)

In [None]:
# model = tf.keras.Sequential([
#     feature_extractor_layer,
#     layers.Dense(1024, activation='relu', name='hidden_layer'),
#     layers.Dense(N_LABELS, activation='sigmoid', name='output')
# ])

# model.summary()

In [None]:

# #our custom model starts here (sequential)
# model =tf.keras.Sequential(
#     [
#         layers.Conv2D(filters=64, kernel_size=(5, 5), activation='relu', 
#                       input_shape=(IMG_SIZE,IMG_SIZE,CHANNELS)),
#         layers.BatchNormalization(axis=3),
#         layers.Conv2D(filters=64, kernel_size=(5, 5), activation='relu'),
#         layers.MaxPooling2D(pool_size=(2, 2)),
#         layers.BatchNormalization(axis=3),
#         layers.Dropout(0.25),
        
#         layers.Conv2D(filters=128, kernel_size=(5, 5), activation='relu'),
#         layers.BatchNormalization(axis=3),
#         layers.Conv2D(filters=128, kernel_size=(5, 5), activation='relu'),
#         layers.MaxPooling2D(pool_size=(2, 2)),
#         layers.BatchNormalization(axis=3),
#         layers.Dropout(0.25),
        
#         layers.Conv2D(filters=256, kernel_size=(5, 5), activation='relu'),
#         layers.BatchNormalization(axis=3),
#         layers.Conv2D(filters=256, kernel_size=(5, 5), activation='relu'),
#         layers.MaxPooling2D(pool_size=(2, 2)),
#         layers.BatchNormalization(axis=3),
#         layers.Dropout(0.5),
        
#         layers.Flatten(),
        
#         layers.Dense(512), # Fully connected layer
#         layers.BatchNormalization(),
#         layers.Dropout(0.5),
        
# #         layers.Dense(60, activation="relu"),  # Fully connected layer
# #         layers.BatchNormalization(),
# #         layers.Dropout(0.5),
        
#         layers.Dense(N_LABELS, activation="sigmoid")  # Classification layer or output layer
#     ]
# )

# # model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.0005), 
# #               loss=tf.keras.metrics.binary_crossentropy,
# #               metrics=['binary_accuracy', 'mae'])

# model.summary()




In [None]:
### Toy ResNet Model

### Toy ResNet Model

In [None]:
inputs = tf.keras.Input(shape=(224, 224, 3), name="img")
x = layers.Conv2D(256, 3, activation="relu")(inputs)
x = layers.Conv2D(256, 3, activation="relu")(x)
block_1_output = layers.MaxPooling2D(3)(x)

x = layers.Conv2D(128, 3, activation="relu", padding="same")(block_1_output)
x = layers.Conv2D(256, 3, activation="relu", padding="same")(x)
block_2_output = layers.add([x, block_1_output])

x = layers.Conv2D(64, 3, activation="relu", padding="same")(block_2_output)
x = layers.Conv2D(256, 3, activation="relu", padding="same")(x)
block_3_output = layers.add([x, block_2_output])

x = layers.Conv2D(32, 3, activation="relu")(block_3_output)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(256, activation="relu")(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(N_LABELS)(x)

model = tf.keras.Model(inputs, outputs)
model.summary()

In [None]:
#Learning rate & loss specified in Base paper
optimizer = [tf.keras.optimizers.Adam(learning_rate=1e-3,beta_1=0.9, beta_2=0.999), 
             tf.keras.optimizers.Adagrad(),
             tf.keras.optimizers.SGD(learning_rate=1e-3, momentum=0.9),
             tf.keras.optimizers.Adadelta(),
             tf.keras.optimizers.RMSprop(),
             tf.keras.optimizers.Nadam()]

model.compile(optimizer=optimizer[0], loss="binary_crossentropy", metrics=['binary_accuracy'])

In [None]:
# set up a checkpoint for model training
# https://keras.io/callbacks/
checkpointer = tf.keras.callbacks.ModelCheckpoint(filepath='weights.best.{epoch:02d}-{val_loss:.2f}.hdf5', verbose=1, save_best_only = True)
reduce = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss',factor=0.1,patience=2,mode='auto') 
early = tf.keras.callbacks.EarlyStopping(monitor='val_loss',min_delta=1e-4,patience=4,mode='auto')

callbacks_list = [checkpointer,reduce,early]

In [None]:
valid_X, valid_Y = next(valid_gen)
history = model.fit(train_gen,validation_data=(valid_X,valid_Y),callbacks=callbacks_list,epochs=2)

In [None]:
def  prediction(image_name, model):
    
    img_path = os.path.join(test_images_path, image_name)

    # Read and prepare image
    img = image.load_img(img_path, target_size=(IMG_SIZE,IMG_SIZE,CHANNELS))
    img = image.img_to_array(img)
    #img = img/255
    img = np.expand_dims(img, axis=0)

    # Generate prediction
    prediction = (model.predict(img) > 0.5).astype('int')
    print(model.predict(img))
    prediction = pd.Series(prediction[0])
    prediction.index = mlb.classes_
    prediction = prediction[prediction==1].index.values
    predicted_labels = ' '.join(prediction)
    
    return predicted_labels
    

In [None]:
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
t = f.suptitle('Model Perfomance', fontsize=12)
f.subplots_adjust(top=0.85, wspace=0.3)
print("plotting started")
max_epoch = len(history.history['binary_accuracy']) + 1
epoch_list = list(range(1, max_epoch))
ax1.plot(epoch_list, history.history['binary_accuracy'], label='Train Accuracy')
ax1.plot(epoch_list, history.history['val_binary_accuracy'], label='Validation Accuracy')
ax1.set_xticks(np.arange(1, max_epoch, 5))
ax1.set_ylabel('Accuracy Value')
ax1.set_xlabel('Epoch')
ax1.set_title('Accuracy')
l1 = ax1.legend(loc="best")
ax1.figure.savefig("Accuracy.png")

print("still ploting")

ax2.plot(epoch_list, history.history['loss'], label='Train Loss')
ax2.plot(epoch_list, history.history['val_loss'], label='Validation Loss')
ax2.set_xticks(np.arange(1, max_epoch, 5))
ax2.set_ylabel('Loss Value')
ax2.set_xlabel('Epoch')
ax2.set_title('Loss')
l2 = ax2.legend(loc="best")
ax2.figure.savefig("plot.png")

print("plotting finishing")

In [None]:
submission_df = pd.DataFrame(columns=['image','labels'])

for image_name in os.listdir(test_images_path):
    predicted_labels = prediction(image_name,model)
    submission_df=submission_df.append(pd.DataFrame({'image':[image_name],'labels':[predicted_labels]}))
submission_df

In [None]:
submission_df.to_csv('/kaggle/working/submission.csv', index=False)