In [None]:
# DIRECTORY STRUCTURE
# household_plants/
#     train/
#         bangkok kalachuchi/
#         jade plant/
#         neon pothos/
#         philodendron birkin/
#         red beauty aglaonema/
# 
#     test/
#         bangkok kalachuchi/
#         jade plant/
#         neon pothos/
#         philodendron birkin/
#         red beauty aglaonema/

In [None]:
import tensorflow as tf
import numpy as np
from matplotlib import pyplot as plt
from keras.preprocessing.image import ImageDataGenerator
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import zipfile

zip_ref = zipfile.ZipFile('/content/drive/MyDrive/household_plants.zip', 'r')
zip_ref.extractall('/full_dataset')
zip_ref.close()

### **INITIALIZE VALUES**

In [None]:
batch_size = 32
img_height, img_width = 224, 224

train_data_dir = '/full_dataset/household_plants/train'
validation_data_dir = '/full_dataset/household_plants/test'

total_samples = 750
train_split, test_split = .80, .20
nb_train_samples = total_samples * train_split
nb_validation_samples = total_samples * test_split
input_shape = (img_width, img_height, 3)

### **DATA AUGMENTATION**

In [None]:
# rescale standardizes data (if you dont, training time increases and result may not be accurate)
# data augmentation is important to prevent overfitting and to add variance to the training set
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    rotation_range=8,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

test_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
# flow_from_directory makes it so that the name of the 
# subdirectory are the labels

train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical',
    seed=123)

validation_generator = test_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False,
    seed=123)

In [None]:
classes = train_generator.class_indices
classes

### **CREATE MODEL**

In [None]:
num_classes = len(classes)
model = Sequential([
  # input layer + hidden layer 1
  Dense(units=64, input_shape=input_shape, activation='relu'),

  # hidden layer 2
  Dense(units=32, activation='relu'),
  Flatten(),                                  

  # output layer 
  Dense(num_classes, activation='softmax')                                  
])

### **INITIALIZING HYPERPARAMETERS**

In [None]:
model.compile(
    loss='categorical_crossentropy', 
    optimizer='adam',
    metrics=['accuracy'])

In [None]:
model.summary()

### **TRAINING DATA**

In [None]:
from timeit import default_timer as timer

class TimingCallback(keras.callbacks.Callback):
    def __init__(self, logs={}):
        self.logs=[]

    def on_epoch_begin(self, epoch, logs={}):
        self.starttime = timer()
        
    def on_epoch_end(self, epoch, logs={}):
        self.logs.append(timer()-self.starttime)

get_training_time = TimingCallback()

In [None]:
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, CSVLogger

epochs = 100

bst_model_path = 'best-model.h5'
mcp_save = ModelCheckpoint(bst_model_path,                                              # saves best model observed during training
                           monitor='val_loss', verbose=1, save_best_only=True,          
                           restore_best_weights=True)
                                   
hist = model.fit(
      train_generator,
      validation_data=validation_generator,
      epochs=epochs,
      steps_per_epoch=nb_train_samples // batch_size,                                   # specifies the total number of steps as soon as one epoch is finished and next epoch has started
      validation_steps=nb_validation_samples // batch_size,
      callbacks=[mcp_save, get_training_time]
)

In [None]:
print(get_training_time.logs)
print(f'{sum(get_training_time.logs)/60:.2f} minutes')

In [None]:
keras.backend.clear_session()

### **VISUALIZATION**

In [None]:
train_score = model.evaluate(train_generator, verbose=1)
test_score = model.evaluate(validation_generator, verbose=1)

print()
print('TRAINING SET')
print(f'[INFO] Accuracy: {train_score[1] * 100:.2f}')
print(f'[INFO] Loss: {train_score[0]}')
print()
print('TESTING SET')
print(f'[INFO] Accuracy: {test_score[1] * 100:.2f}')
print(f'[INFO] Loss: {test_score[0]}')

In [None]:
epochs = len(hist.history['loss']) # get epochs after early stopping

In [None]:
acc = hist.history['accuracy']
val_acc = hist.history['val_accuracy']

loss = hist.history['loss']
val_loss = hist.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

### **CONFUSION MATRIX #1**

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

target_names = list(train_generator.class_indices.keys())

# get confusion matrix
Y_pred = model.predict(validation_generator, nb_validation_samples // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)

cf_matrix = confusion_matrix(validation_generator.classes, y_pred)

# print classification report
print('Classification Report')
print(classification_report(validation_generator.classes, y_pred, target_names=target_names))

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

classes = ['bangkok kalachuchi', 'jade plant', 'neon pothos', 'philodendron birkin', 'red beauty aglaonema']

plt.figure(figsize = (10,10))
group_counts = ["{0:0.0f}".format(value) for value in
                cf_matrix.flatten()]

group_percentages = ["{0:.2%}".format(value) for value in
                     cf_matrix.flatten()/np.sum(cf_matrix)]

labels = [f"{v1}\n{v2}\n" for v1, v2 in
          zip(group_counts,group_percentages)]

labels = np.asarray(labels).reshape(5,5)
ax = sns.heatmap(cf_matrix, annot=labels, fmt='')

ax.set_title('Seaborn Confusion Matrix with labels\n\n');
ax.set_xlabel('\nPredicted Houseplant')
ax.set_ylabel('Actual Houseplant');

ax.xaxis.set_ticklabels(classes, rotation=45)
ax.yaxis.set_ticklabels(classes, rotation=45)

# displays confusion matrix
plt.show()