In [None]:
import os
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
#
import tensorflow as tf
import keras
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image

In [None]:
np.random.seed(8)
tf.random.set_seed(6)

In [None]:
base_dir = '/kaggle/input/cassava-leaf-disease-classification'
os.listdir(base_dir)

In [None]:
train_dir = os.path.join(base_dir, 'train_images')
test_dir = os.path.join(base_dir, 'test_images')

**Load data**

In [None]:
import json
train_df = pd.read_csv(os.path.join(base_dir, 'train.csv'))
sample_subm = pd.read_csv(os.path.join(base_dir, 'sample_submission.csv'))
f = open(os.path.join(base_dir, 'label_num_to_disease_map.json'), 'r')
label_num = json.load(f)

In [None]:
train_df.head()

In [None]:
label_dict = {int(i): lab for (i, lab) in label_num.items()}
label_dict

In [None]:
train_df.label = train_df.label.map(label_dict)

In [None]:
train_df.head()

In [None]:
train_df.dtypes

# shape of images

In [None]:
import cv2

imtest = cv2.imread(os.path.join(train_dir, os.listdir(train_dir)[0]))
imtest.shape

In [None]:
# add path to train_df
path = list()
for r in train_df.image_id:
    path.append(os.path.join(train_dir, r))
# path

In [None]:
train_df['path'] = path
# train_df

In [None]:
train_df.shape

# the model

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255,
                                  validation_split=0.35)
test_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
train_generator = train_datagen.flow_from_dataframe(dataframe=train_df,
                                                   directory=train_dir,
                                                   subset='training',
                                                   x_col='image_id',
                                                   y_col='label',
                                                   shuffle=True,
                                                   target_size=(100, 100),
                                                   batch_size=50,
                                                   class_mode='categorical')
valid_generator = train_datagen.flow_from_dataframe(dataframe=train_df,
                                                   directory=train_dir,
                                                   subset='validation',
                                                   x_col='image_id',
                                                   y_col='label',
                                                   shuffle=True,
                                                   target_size=(100, 100),
                                                   batch_size=50,
                                                   class_mode='categorical')

In [None]:
device_name = tf.test.gpu_device_name()
print(device_name)
tf.device(device_name)

In [None]:
base_model = keras.applications.vgg16.VGG16(weights='imagenet',
                                           include_top=False)
base_model.trainable = False
input_x = layers.Input(shape=(100,100,3))
x = base_model(input_x, training=False)
x = layers.Flatten()(x)
x = layers.Dense(5, activation='softmax')(x)
model = models.Model(inputs=input_x, outputs=x)
model.compile(loss='categorical_crossentropy',
             optimizer=keras.optimizers.Adam(learning_rate=0.0001),
             metrics=['acc'])
model.summary()

In [None]:
model.fit(train_generator,
         steps_per_epoch=50,
         validation_data=valid_generator,
         validation_steps=50,
          epochs=50,
         verbose=1)

In [None]:
acc = model.history.history['acc']
val_acc = model.history.history['val_acc']

In [None]:
plt.figure()
plt.plot(range(1, len(acc)+1), acc, 'b', label='train_acc')
plt.plot(range(1, len(acc)+1), val_acc, 'r', label='val_acc')
plt.title('Accuracy comparison')
plt.legend()

In [None]:
test_dir = os.path.join(base_dir, 'test_images')

In [None]:
img = image.load_img(os.path.join(test_dir, os.listdir(test_dir)[0]), target_size=(100, 100))
img = image.img_to_array(img)
img = np.expand_dims(img, axis=0)

In [None]:
img = np.vstack([img])

In [None]:
pred = model.predict(img)

In [None]:
pred = np.argmax(pred)
print(pred)

In [None]:
sample_subm

In [None]:
imgid = os.listdir(test_dir)[0]
imgid

In [None]:
output = pd.DataFrame({'image_id': sample_subm.image_id, 'label': pred})

In [None]:
output.to_csv("submission.csv", index=False)
print("saved")