In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2
import tensorflow.keras as keras
import tensorflow as tf
import matplotlib.pyplot as plt

from tqdm import tqdm
from keras_preprocessing.image import ImageDataGenerator
from keras.utils import to_categorical
from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.layers import Input, Activation, Dropout, Flatten, Dense, BatchNormalization
from tensorflow.keras.applications import VGG16, Xception, InceptionV3
from tensorflow.keras.models import Model
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
tf.config.list_physical_devices('GPU')

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"


In [None]:
df = pd.read_csv('../input/plant-pathology-2021-fgvc8/train.csv')
df["labels"]=df["labels"].apply(lambda x:x.split(" ")) 
#df['labels']=df['labels'].astype('category')
df

In [None]:
train_path= '../input/resized-plant2021/img_sz_256/'
img_size = (150,150)

In [None]:
datagen = ImageDataGenerator(
    rescale=1./255,
    zoom_range=0.1,
    horizontal_flip=True,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
)

train_generator=datagen.flow_from_dataframe(
    dataframe=df[:17500],
    directory=train_path,
    x_col="image",
    y_col="labels",
    batch_size=32,
    seed=42,
    shuffle=True,
    class_mode="categorical",
    target_size=img_size)



In [None]:
val_generator=datagen.flow_from_dataframe(
    dataframe=df[17500:],
    directory=train_path,
    x_col="image",
    y_col="labels",
    batch_size=32,
    seed=42,
    shuffle=True,
    class_mode="categorical",
    target_size=img_size)

In [None]:
test_generator=datagen.flow_from_dataframe(
    dataframe=df[17500:],
    directory=train_path,
    x_col="image",
    y_col="labels",
    batch_size=32,
    seed=42,
    shuffle=False,
    class_mode="categorical",
    target_size=img_size)

In [None]:
base_model = InceptionV3(include_top=False,
                      weights='../input/inceptionv3weights/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5',
                      input_shape=(150, 150, 3))
    


In [None]:
x = base_model.layers[-1].output
x = BatchNormalization()(x)
x = Flatten()(x)
x = Dense(1024,activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(512,activation='sigmoid')(x)
x = Dropout(0.2)(x)
x = Dense(256,activation='relu')(x)
predictions=Dense(6,activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=predictions, name='Plants')

In [None]:
model.summary()

In [None]:
import tensorflow_addons as tfa
f1 = tfa.metrics.F1Score(num_classes=6, average='macro')

In [None]:
opt = keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)
model.compile(loss='binary_crossentropy',
              optimizer=opt,
              metrics=[f1])

In [None]:
batch_size=32
epochs = 15

In [None]:
earlystop = tf.keras.callbacks.EarlyStopping(monitor="val_loss",
                                             min_delta=0.01,
                                             patience=3,
                                             verbose=0,
                                             mode="auto",
                                             baseline=None,
                                             restore_best_weights=False,
)



In [None]:
history = model.fit_generator(train_generator,
                              epochs=epochs,
                              steps_per_epoch=train_generator.samples//train_generator.batch_size,
                              validation_data=val_generator,
                              validation_steps=val_generator.n//batch_size,
                              shuffle=True)

In [None]:
for layer in model.layers[:-6]:
    layer.trainable = False

In [None]:
acc = history.history['f1_score']
val_acc = history.history['val_f1_score']

loss=history.history['loss']
val_loss=history.history['val_loss']

epochs_range = range(15)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
epochs = 10

In [None]:
history = model.fit_generator(train_generator,
                              epochs=epochs,
                              steps_per_epoch=train_generator.samples//train_generator.batch_size,
                              validation_data=val_generator,
                              validation_steps=val_generator.n//batch_size,
                              shuffle=True)


In [None]:
test_path= '../input/plant-pathology-2021-fgvc8/test_images'
files = os.listdir(test_path)
files.sort()
testdf= pd.DataFrame(files, columns=['Image'])
testdf



In [None]:
datagenTest = ImageDataGenerator(
    rescale=1./255,
)

img_size = (150,150)
test_generator=datagenTest.flow_from_dataframe(
    dataframe=testdf,
    directory=test_path,
    x_col='Image',
    y_col=None,
    batch_size=64,
    seed=42,
    shuffle=False,
    class_mode=None,
    target_size=img_size
)

In [None]:
results = model.predict(test_generator)
results

In [None]:
results[results>0.50]=1
results[results<=0.50]=0
results

In [None]:
predictions=[]
labels = train_generator.class_indices
labels = dict((v,k) for k,v in labels.items())
for row in results:
    l=[]
    for index,cls in enumerate(row):
        if cls:
            l.append(labels[index])
    predictions.append(" ".join(l))
filenames=test_generator.filenames
dfResults=pd.DataFrame({"image":filenames,
                      "labels":predictions})
dfResults.to_csv("submission.csv",index=False)
dfResults