In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import shutil
import json
from PIL import Image

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet import preprocess_input

In [None]:
image_path = '/kaggle/input/cassava-leaf-disease-classification/train_images/'

In [None]:
train = pd.read_csv('/kaggle/input/cassava-leaf-disease-classification/train.csv')

In [None]:
train.head()

In [None]:
label_to_disease = json.load(open('/kaggle/input/cassava-leaf-disease-classification/label_num_to_disease_map.json'))
train['disease'] = train.label.map(label_to_disease)

In [None]:
label_to_disease

In [None]:
train.label.value_counts()

In [None]:
Image.open(os.path.join(image_path, train[train.label == 0].image_id.iloc[0]))

In [None]:
Image.open(os.path.join(image_path, train[train.label == 1].image_id.iloc[0]))

In [None]:
Image.open(os.path.join(image_path, train[train.label == 2].image_id.iloc[0]))

In [None]:
Image.open(os.path.join(image_path, train[train.label == 3].image_id.iloc[0]))

In [None]:
Image.open(os.path.join(image_path, train[train.label == 4].image_id.iloc[0]))

In [None]:
train.label = train.label.astype(str)

In [None]:
data_generator = ImageDataGenerator(
    rotation_range=45,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    preprocessing_function=preprocess_input,
    validation_split=0.25,
)

In [None]:
train_data_loader = data_generator.flow_from_dataframe(
    train,
    directory=image_path,
    classes=['0', '1', '2', '3', '4'],
    x_col="image_id",
    y_col="label",
    target_size=(224, 224),
    subset='training'
)

In [None]:
val_data_loader = data_generator.flow_from_dataframe(
    train,
    directory=image_path,
    classes=['0', '1', '2', '3', '4'],
    x_col="image_id",
    y_col="label",
    target_size=(224, 224),
    subset='validation'
)

In [None]:
model = Sequential([
    ResNet50(
        include_top=False, 
        weights='/kaggle/input/tf-keras-pretrained-models/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5', 
        pooling='avg', 
        input_shape=(224, 224, 3)
    ),
    
    layers.Dense(5, activation='softmax')
])

In [None]:
callbacks = [ReduceLROnPlateau(factor=0.5, patience=5, verbose=1)]

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
model.fit(train_data_loader, 
          validation_data=val_data_loader, 
          batch_size=256, epochs=50, 
          callbacks=callbacks)

In [None]:
test_images = os.listdir('/kaggle/input/cassava-leaf-disease-classification/test_images/')

In [None]:
predict = []

for i in test_images:
    image = Image.open(f'/kaggle/input/cassava-leaf-disease-classification/test_images/{i}')
    image = image.resize((224, 224))
    
    image = preprocess_input(np.asarray(image))
    image = np.expand_dims(image, axis=0)
    
    predict.append(np.argmax(model.predict(image)))

In [None]:
submission = pd.DataFrame({'image_id': test_images, 'label': predict})

In [None]:
submission

In [None]:
submission.to_csv('submission.csv', index=None)