In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow import keras
from tensorflow.keras import models
from tensorflow.keras import layers

In [None]:
tf.config.list_physical_devices('GPU')

In [None]:
data_path = '../input/cassava-leaf-disease-classification/'
train_df = pd.read_csv(data_path + 'train.csv')

In [None]:
train_df.head(5)

In [None]:
train_df['label'] = train_df['label'].map(lambda lbl: str(lbl))
train_df['image_id'] = train_df['image_id'].map(lambda img: data_path + 'train_images/' + img)
train_df.head(5)

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_val, y_train, y_val  = train_test_split(
    train_df['image_id'], train_df['label'], random_state = 0, shuffle = True
)

In [None]:
train_data = pd.merge(x_train, y_train, right_index = True, left_index = True)
train_data.head(5)

In [None]:
val_data = pd.merge(x_val, y_val, right_index = True, left_index = True)
val_data.head(5)

In [None]:
print('Size of training set: ' + str(train_data.size))
print('Size of validation set: ' + str(val_data.size))

In [None]:
train_datagen = ImageDataGenerator(
    rescale = 1./255,
    rotation_range = 45,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    fill_mode = 'nearest'
)

In [None]:
val_datagen = ImageDataGenerator(rescale = 1./255)

In [None]:
train_generator = train_datagen.flow_from_dataframe(
    train_data, directory = None, x_col = 'image_id', y_col = 'label',
    batch_size = 32, class_mode = 'categorical', target_size = (224, 224)
)

In [None]:
val_generator = val_datagen.flow_from_dataframe(
    val_data, directory = None, x_col = 'image_id', y_col = 'label',
    batch_size = 32, class_mode = 'categorical', target_size = (224, 224)
)

In [None]:
base_model = keras.applications.ResNet50(
    include_top = False, 
    weights = '../input/pretrained-resnet50-weights/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5', 
    input_shape = (224, 224, 3)
)

#base_model.trainable = False

In [None]:
base_model.summary()

In [None]:
model = models.Sequential()
model.add(base_model)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation = 'relu'))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(5, activation = 'softmax'))

In [None]:
model.compile(loss = 'categorical_crossentropy',
             optimizer = 'adam',
             metrics = ['accuracy']
)

In [None]:
rlr_callback = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5)       
checkpoint = keras.callbacks.ModelCheckpoint('model.h5',monitor = 'val_accuracy',
                      verbose = 0, save_best_only = True, mode = 'max')

In [None]:
history =  model.fit(
    train_generator, callbacks = [rlr_callback, checkpoint], 
    validation_data = val_generator,
    #steps_per_epoch = 100,
    epochs = 50,
    #validation_steps = 50
    batch_size = 256
)

In [None]:
model.save('./model.h5')

In [None]:
import matplotlib.pyplot as plt
train_accuracy = history.history['accuracy']
train_loss = history.history['loss']
val_accuracy = history.history['val_accuracy']
val_loss = history.history['val_loss']

epochs = range(len(train_accuracy))

plt.plot(epochs, train_accuracy, 'r', label = 'Training Accuracy')
plt.plot(epochs, val_accuracy, 'b', label = 'Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.figure()

plt.plot(epochs, train_loss, 'r', label = 'Training Loss')
plt.plot(epochs, val_loss, 'b', label = 'Validation Loss')
plt.title('Training and Validation Loss')
plt.figure()

In [None]:
'''from PIL import Image
test_img_path = '../input/cassava-leaf-disease-classification/test_images/'
file_names = os.listdir(test_img_path)
for img in file_names:
    image = Image.open(test_img_path + img)
    image = image.resize((224, 224))
    image = np.expand_dims(image, axis = 0)
    prediction = model.predict(image)

prediction = np.argmax(prediction)
prediction'''

In [None]:
'''result_df = pd.DataFrame(file_names, columns = ['image_id'])
result_df['label'] = prediction
result_df'''