In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Data Exploration

---

In [None]:
df = pd.read_csv("../input/plant-pathology-2021-fgvc8/train.csv")
df.head()

In [None]:
pd.options.plotting.backend = "plotly"
df.labels.hist()

In [None]:
df["coluna1"]=df.labels.str.split(" ")
df = df.explode("coluna1")

In [None]:
print(df.coluna1)

In [None]:
pd.options.plotting.backend = "plotly"
df.coluna1.hist()

# Modelos de Machine Learning

----

In [None]:
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers

# Keras 

----

In [None]:
from keras.models import Sequential
#Import from keras_preprocessing not from keras.preprocessing
from keras_preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.callbacks import ModelCheckpoint
from keras import regularizers, optimizers
import pandas as pd
import numpy as np
traindf=pd.read_csv("../input/plant-pathology-2021-fgvc8/train.csv",dtype=str)
traindf['labels'] = traindf['labels'].apply(lambda s: s.split(' '))
testdf=pd.read_csv("../input/plant-pathology-2021-fgvc8/sample_submission.csv",dtype=str)
datagen=ImageDataGenerator(rescale=1./255.,validation_split=0.25)

In [None]:
train_generator=datagen.flow_from_dataframe(
    dataframe=traindf,
    directory="../input/resized-plant2021/img_sz_384",
    x_col="image",
    y_col="labels",
    subset="training",
    batch_size=64,
    seed=98,
    shuffle=True,
    class_mode="categorical",
    target_size=(384,384))

valid_generator=datagen.flow_from_dataframe(
    dataframe=traindf,
    directory="../input/resized-plant2021/img_sz_384",
    x_col="image",
    y_col="labels",
    subset="validation",
    batch_size=64,
    seed=98,
    shuffle=True,
    class_mode="categorical",
    target_size=(384,384))

test_datagen=ImageDataGenerator(rescale=1./255.)

test_generator=test_datagen.flow_from_dataframe(
    dataframe=testdf,
    directory="../input/plant-pathology-2021-fgvc8/test_images",
    x_col="image",
    y_col=None,
    batch_size=3,
    seed=98,
    shuffle=False,
    class_mode=None,
    target_size=(384,384))

In [None]:
model=Sequential()

model.add(Conv2D(32, (3, 3), padding="same", activation='relu', input_shape=(384, 384,3)))
model.add(BatchNormalization(axis=3))
model.add(MaxPooling2D(pool_size=(3, 3)))
model.add(Dropout(0.25))
        
model.add(Conv2D(64, (3, 3), padding="same", activation='relu'))
model.add(BatchNormalization(axis=3))
model.add(Conv2D(64, (3, 3), padding="same", activation='relu'))
model.add(BatchNormalization(axis=1))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, (3, 3), padding="same", activation='relu'))
model.add(BatchNormalization(axis=3))
model.add(Conv2D(128, (3, 3), padding="same", activation='relu'))
model.add(BatchNormalization(axis=3))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation("relu"))
model.add(Dropout(0.25))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(Dense(6))
model.add(Activation("softmax"))
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01, decay=0.01/30),
    loss='binary_crossentropy',
    metrics=['accuracy'])

In [None]:
checkpoint=ModelCheckpoint(r'Models\model-x.h5',
                          monitor='val_accuracy',
                          mode='max',
                          save_best_only=True,
                          verbose=1)
callbacks=[checkpoint]

In [None]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
model.fit(train_generator,
          steps_per_epoch=STEP_SIZE_TRAIN,
          validation_data=valid_generator,
          validation_steps=STEP_SIZE_VALID,
          callbacks=callbacks,
          epochs=30
)

In [None]:
model.evaluate_generator(generator=valid_generator,
    steps=STEP_SIZE_TEST)

In [None]:
test_generator.reset()
pred=model.predict_generator(test_generator,
    steps=STEP_SIZE_TEST,
    verbose=1)

In [None]:
predicted_class_indices=np.argmax(pred,axis=1)

In [None]:
labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

In [None]:
filenames=test_generator.filenames
results=pd.DataFrame({"Filename":filenames,
                      "Predictions":predictions})
results.to_csv("results.csv",index=False)

In [None]:
resultado=pd.read_csv("./results.csv")

In [None]:
resultado.head()