# Exercício 2

**Francisco De Assis Marinho Aguiar**

**Leandro da Cruz Farias**

In [None]:
# General Libs
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, BatchNormalization, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2, preprocess_input
import numpy as np
import random
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.image as mpimg
import json
from tensorflow.keras.utils import to_categorical 
from sklearn.model_selection import train_test_split
import tensorflow as tf
from PIL import Image
import os
%matplotlib inline

In [None]:
dir_path = '../input/cassava-leaf-disease-classification'
train_read = pd.read_csv(dir_path + "/train.csv", sep=',')
train_read.head(5)

In [None]:
test_dir_path = '../input/cassava-leaf-disease-classification/test_images'
test_file_path = '/2216849948.jpg'
data = {'image_id': ['2216849948.jpg']}
test_df = pd.DataFrame(data=data)
test_df.head()

In [None]:
with open(dir_path + '/label_num_to_disease_map.json') as f:
    labelnames = json.loads(f.read())
    labelnames = {int(k): v for k,v in labelnames.items()}

In [None]:
train_read['label'] = train_read['label'].astype('string')

In [None]:
train_im_path = dir_path + '/train_images/'
fig = plt.figure(figsize=(15, 10))
npics= 6

count = 1
image_list = train_read[train_read['label'] == str(list(labelnames.keys())[list(labelnames.values()).index('Healthy')])]['image_id'].sample(frac=1)[:npics].to_list()  
for i, img in enumerate(image_list):
    
    sample = os.path.join(train_im_path, img) 
    sample_img = Image.open(sample)   
    ax = fig.add_subplot(npics/2 , 3, count, xticks=[],yticks=[])   
    plt.imshow(sample_img)
    count +=1
fig.suptitle('Healthy')
plt.tight_layout()
plt.show()

In [None]:
fig = plt.figure(figsize=(15, 10))
npics= 6
count = 1
image_list = train_read[train_read['label'] == str(list(labelnames.keys())[list(labelnames.values()).index('Cassava Bacterial Blight (CBB)')])]['image_id'].sample(frac=1)[:npics].to_list()  
for i, img in enumerate(image_list):
    
    sample = os.path.join(train_im_path, img) 
    sample_img = Image.open(sample)   
    ax = fig.add_subplot(npics/2 , 3, count, xticks=[],yticks=[])   
    plt.imshow(sample_img)
    count +=1
fig.suptitle('CBB')
plt.tight_layout()
plt.show()

In [None]:
target_size = (300, 300)
input_shape = (300, 300, 3)
batch_size = 64
seed = 10
epochs = 3

In [None]:
datagen = ImageDataGenerator(validation_split=0.2)
val_data_generator = ImageDataGenerator(validation_split=0.2)

train_generator = datagen.flow_from_dataframe(train_read,
                                              directory=train_im_path,
                                              x_col="image_id",
                                              y_col="label",
                                              target_size=target_size,
                                              batch_size=batch_size,
                                              shuffle=True, 
                                              seed=seed,
                                              class_mode="categorical",
                                              subset="training",)

val_generator = val_data_generator.flow_from_dataframe(train_read,
                                            directory=train_im_path,
                                            x_col="image_id",
                                            y_col="label",
                                            target_size=target_size,
                                            batch_size=batch_size,
                                            shuffle=True, 
                                            seed=seed,
                                            class_mode="categorical",
                                            subset="validation",)

classes = list(train_generator.class_indices.keys())
print('Classes: '+str(classes))
num_classes  = len(classes)

In [None]:
model = Sequential()
model.add(Conv2D(40, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=(input_shape)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(80, kernel_size=(3,3), activation='relu'))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))
model.summary()

# Compila o modelo
model.compile(loss='categorical_crossentropy',
              optimizer=Adam(),
              metrics=['accuracy'])

In [None]:
#Callback to save the best model
callbacks_list = [
    keras.callbacks.ModelCheckpoint(
        filepath='model.h5',
        monitor='val_loss', save_best_only=True, verbose=1),
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=10,verbose=1)
]

#Training

history = model.fit(
        train_generator,
        steps_per_epoch=train_generator.samples // batch_size,
        epochs=epochs,
        callbacks = callbacks_list,
        validation_data=val_generator,
        verbose = 1,
        validation_steps=val_generator.samples // batch_size)


In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(1, len(acc) + 1)

fig = plt.figure(figsize=(15, 5))
fig.add_subplot(121)

plt.plot(epochs_range, acc, linestyle='--', label = "Training acc")
plt.plot(epochs_range, val_acc, linestyle='-.', label = "Validation acc")
plt.title("Training and validation acc")
plt.legend()

fig.add_subplot(122)
plt.plot(epochs_range, loss, linestyle='--', label = "Training loss", alpha=0.8)
plt.plot(epochs_range, val_loss, linestyle='-.', label = "Validation loss", alpha=0.6)
plt.title("Training and validation loss")
plt.legend()

plt.show()

In [None]:
# Load the best saved model
from tensorflow.keras.models import load_model
model = load_model("model.h5")
score = model.evaluate(val_generator)

In [None]:
print('Val loss:', score[0])
print('Val accuracy:', score[1])

In [None]:
submission_df = pd.read_csv("../input/cassava-leaf-disease-classification/sample_submission.csv")
submission_df.head()

In [None]:
preds = []
# preds_no_argmax = []


test_images = os.listdir('/kaggle/input/cassava-leaf-disease-classification/test_images/')
preds = []

for i in test_images:
    image = Image.open(f'/kaggle/input/cassava-leaf-disease-classification/test_images/{i}')
    image = image.resize(target_size)
    image = np.expand_dims(image, axis=0)
    preds.append(np.argmax(model.predict(image)))

df_sub = pd.DataFrame({'image_id': test_images, 'label': preds})
df_sub.head()
df_sub.to_csv("submission.csv", index=None)

# Transferência de aprendizagem

In [None]:
base_model = InceptionResNetV2(weights='imagenet', include_top=False, input_shape=input_shape)

x = base_model.output
x = Flatten()(x)
x = Dense(100, activation='relu')(x)
predictions = Dense(num_classes, activation='softmax', kernel_initializer='random_uniform')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# Freezing pretrained layers
for layer in base_model.layers:
    layer.trainable=False
    
optimizer = Adam()
model.compile(optimizer=optimizer,loss='categorical_crossentropy',metrics=['accuracy'])

In [None]:
# Saving the best model
callbacks_list = [
    keras.callbacks.ModelCheckpoint(
        filepath='model_transfer.h5',
        monitor='val_loss', save_best_only=True, verbose=1),
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=2,verbose=1)
]

history = model.fit(
        train_generator,
        steps_per_epoch=train_generator.samples // batch_size,
        epochs=epochs,
        callbacks = callbacks_list,
        validation_data=val_generator,
        verbose = 1,
        validation_steps=val_generator.samples // batch_size)

In [None]:
history_dict = history.history
loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']

epochs_x = range(1, len(loss_values) + 1)
plt.figure(figsize=(10,10))
plt.subplot(2,1,1)
plt.plot(epochs_x, loss_values, 'bo', label='Training loss')
plt.plot(epochs_x, val_loss_values, 'b', label='Validation loss')
plt.title('Training and validation Loss and Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Loss')
#plt.legend()
plt.subplot(2,1,2)
acc_values = history_dict['accuracy']
val_acc_values = history_dict['val_accuracy']
plt.plot(epochs_x, acc_values, 'bo', label='Training acc')
plt.plot(epochs_x, val_acc_values, 'b', label='Validation acc')
#plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Acc')
plt.legend()
plt.show()

In [None]:
model = load_model('model_transfer.h5')
score = model.evaluate(val_generator)
print('Val loss:', score[0])
print('Val accuracy:', score[1])