### Setting it up

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import PIL
import tensorflow as tf
import pathlib

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

print(tf.__version__)

### Data

In [None]:
data_dir_train_path = '/kaggle/input/plant-seedlings-classification/train'
data_dir_train = pathlib.Path(data_dir_train_path)

folder_train = list(data_dir_train.glob('*'))
images_train = list(data_dir_train.glob('*/*.png')) #list of all images (full path)
print('Folder Structure:')
for f in folder_train:
    print(f)
print('\nNumber of images: ', len(images_train))

In [None]:
data_dir_test_path = '/kaggle/input/plant-seedlings-classification/test'
data_dir_test = pathlib.Path(data_dir_test_path)

images_test = list(data_dir_test.glob('*.png')) #list of all images (full path)
print('\nNumber of images: ', len(images_test))

### Exploreing the data

In [None]:
fig = plt.figure(figsize=(10, 10))
for i in range(9):
    plt.subplot(3, 3, i + 1)
    plt.title(str(images_train[i]).split('/')[-1], fontsize=10) #get the file name and disply as title
    plt.imshow(PIL.Image.open(images_train[i]))
    ax = plt.axis("off")

In [None]:
batch_size = 32
img_height = 256
img_width = 256

### Building Training Data Set from images

In [None]:
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir_train,
    validation_split=0.8, #80% training
    subset="training",
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size)

In [None]:
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir_train,
    validation_split=0.2, #20% validation
    subset="validation",
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size)

### Classification of data

In [None]:
class_names = train_ds.class_names
print('The name of the classes are: ')
for c in class_names:
    print('\t*',c)

### Exploreing Images after Classification

In [None]:
fig = plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
    for i in range(9):
        plt.subplot(3, 3, i + 1)
        plt.title(class_names[labels[i]], fontsize=10)
        plt.imshow(images[i].numpy().astype("uint8"))
        ax = plt.axis("off")

### Building Model

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

normalization_layer = layers.experimental.preprocessing.Rescaling(1./255)

normalized_ds = train_ds.map(lambda x, y: (normalization_layer(x), y)) #lambda
image_batch, labels_batch = next(iter(normalized_ds))

first_image = image_batch[0]

print(np.min(first_image), np.max(first_image)) #pixels values are now in [0,1].

In [None]:
data_augmentation = keras.Sequential(
    [
        layers.experimental.preprocessing.RandomFlip("horizontal", 
                                                 input_shape=(img_height, 
                                                              img_width,
                                                              3)),
        layers.experimental.preprocessing.RandomRotation(0.1),
        layers.experimental.preprocessing.RandomZoom(0.1),
    ]
)

In [None]:
plt.figure(figsize=(10, 10))
for images, _ in train_ds.take(1):
    for i in range(9):
        augmented_images = data_augmentation(images)
        plt.subplot(3, 3, i + 1)
        plt.imshow(augmented_images[0].numpy().astype("uint8"))
        ax = plt.axis("off")

### Compiling Model

In [None]:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.InputLayer(input_shape=(img_width,img_height,3,))) # Input layer
model.add(tf.keras.layers.Conv2D(64, kernel_size=(3,3), activation='relu')) # 2D Convolution layer
model.add(tf.keras.layers.MaxPool2D(pool_size = (2,2))) # Max Pool layer 
model.add(tf.keras.layers.BatchNormalization()) # Normalization layer
model.add(tf.keras.layers.Conv2D(64, kernel_size=(3,3), strides = (1,1), activation='relu')) # 2D Convolution layer
model.add(tf.keras.layers.MaxPool2D(pool_size = (2,2))) # Max Pool layer 
model.add(tf.keras.layers.BatchNormalization()) # Normalization layer
model.add(tf.keras.layers.Conv2D(128, kernel_size=(3,3), strides = (1,1), activation='relu')) # 2D Convolution layer
model.add(tf.keras.layers.MaxPool2D(pool_size = (2,2))) # Max Pool layer 
model.add(tf.keras.layers.BatchNormalization()) # Normalization layer
model.add(tf.keras.layers.Conv2D(128, kernel_size=(3,3), strides = (1,1), activation='relu')) # 2D Convolution layer
model.add(tf.keras.layers.MaxPool2D(pool_size = (2,2))) # Max Pool layer 
model.add(tf.keras.layers.GlobalMaxPool2D()) # Global Max Pool layer
model.add(tf.keras.layers.Flatten()) # Dense Layers after flattening the data
model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dropout(0.2)) # Dropout
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.BatchNormalization()) # Normalization layer
model.add(tf.keras.layers.Dense(12, activation='softmax')) # Add Output Layer

In [None]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
model.summary()

### Training

In [None]:
epochs = 15 #the cycle
activity = model.fit(train_ds, validation_data=val_ds, epochs=epochs)

In [None]:
acc = activity.history['accuracy']
val_acc = activity.history['val_accuracy']

loss = activity.history['loss']
val_loss = activity.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(16, 8))

plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.grid()

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.grid()

plt.show()

### Prediction

In [None]:
data_dir_test_path = '../input/plant-seedlings-classification/test/007b3da8b.png'

img = keras.preprocessing.image.load_img(data_dir_test_path, target_size=(img_height, img_width))
img_array = keras.preprocessing.image.img_to_array(img)
img_array = tf.expand_dims(img_array, 0) #in the format it should be to perform prediction

predictions = model.predict(img_array)
score = tf.nn.softmax(predictions[0])

In [None]:
plt.figure(figsize=(10, 10))
plt.title("This is a image of {} ({:.2f}% confidence).".format(class_names[np.argmax(score)], 100 * np.max(score)), fontsize=12)
plt.imshow(PIL.Image.open(data_dir_test_path))
ax = plt.axis("off")