In [8]:
# some standard packages
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

# modelling packages
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten 
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau 
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import model_from_json
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam


# Model Evaluation
from sklearn.metrics import classification_report, confusion_matrix


Utilize the cell below only if using all of the Data Loader file instead of using the Keras Image Generator.

In [9]:
categories = ['class_0', 'class_1', 'class_2']

In [10]:
# Use the Keras ImageDataGenerator for memory efficiency and preprocessing ease
# This process replaces the method of obtaining our data via DataLoader.ipynb
train_datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    rescale=1./255,
    fill_mode='nearest',
    validation_split=0.2,
)

In [11]:
test_datagen = ImageDataGenerator(rescale = 1./255)

In [12]:
batch_size = 32

In [13]:
train_generator = train_datagen.flow_from_directory(
                                                    'data/final_BC_images/train',
                                                    target_size=(32, 32),
                                                    color_mode='rgb',
                                                    batch_size=batch_size,
                                                    class_mode='categorical',
                                                    shuffle=True,
                                                    subset='training')

Found 4862 images belonging to 3 classes.


In [14]:
validation_generator = train_datagen.flow_from_directory(
                                                        'data/final_BC_images/train',
                                                        target_size=(32, 32),
                                                        color_mode='rgb',
                                                        batch_size=batch_size,
                                                        class_mode='categorical',
                                                        shuffle=False,
                                                        subset='validation'
                                                        )

Found 1215 images belonging to 3 classes.


In [15]:
test_generator = test_datagen.flow_from_directory(
                                                  'data/final_BC_images/test',
                                                  target_size=(32, 32),
                                                  color_mode='rgb',
                                                  batch_size=batch_size,
                                                  class_mode='categorical',
                                                  shuffle=False)

Found 1632 images belonging to 3 classes.


In [16]:
# Saving the number of stepsizes for the training, validation and test sets 
train_stepsize = train_generator.samples//train_generator.batch_size 

valid_stepsize = validation_generator.samples//validation_generator.batch_size 

test_stepsize = test_generator.samples//test_generator.batch_size 

# Sanity check 
print(f'Training step size = {train_stepsize} \nValidation step size = {valid_stepsize} \nTest step size = {test_stepsize}')

Training step size = 151 
Validation step size = 37 
Test step size = 51


In [22]:
# Building the Model
model = Sequential()

# 3 convolutional layers
model.add(Conv2D(32, (3,3), input_shape = (32,32,3)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(128, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(512, (3,3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

# 3 hidden layers
model.add(Flatten())
model.add(Dense(256))
model.add(Activation('relu'))

model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.25))

model.add(Dense(128))
model.add(Activation('relu'))

# # The output layer with 9 neurons for 9 classes
model.add(Dense(3))
model.add(Activation('softmax'))


In [23]:
# Initiate early stop based on validation accuracy
ES = EarlyStopping(monitor='val_acc', patience=5, mode='auto', min_delta=0.0001, verbose=1)

In [24]:
# Istantiating Adam optimizer with a learning rate of 0.0001 and saving to variable 'optim'
optim = Adam(lr=0.001)

# Compiling the CNN model 
model.compile(optimizer=optim, loss='categorical_crossentropy', metrics=['acc'])

# Summary 
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            (None, 30, 30, 32)        896       
_________________________________________________________________
activation_11 (Activation)   (None, 30, 30, 32)        0         
_________________________________________________________________
batch_normalization_2 (Batch (None, 30, 30, 32)        128       
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 15, 15, 32)        0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 13, 13, 128)       36992     
_________________________________________________________________
activation_12 (Activation)   (None, 13, 13, 128)       0         
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 6, 6, 128)        

In [None]:
# Fitting the model to the training data
history = model.fit_generator(generator=train_generator,
                                steps_per_epoch=train_stepsize,
                                epochs=50,
                                validation_data=validation_generator,
                                validation_steps=valid_stepsize,
                                callbacks=[ES])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50

The results look promisinng for some epochs, but the validation loss and accuarcy are hopping around way too much. The model seems to be unstable.

In [22]:
# Getting bestmodel's predictions (as probabilities) on the test set 
test_probas = weeds_model.predict_generator(test_generator, steps=test_stepsize)

# Setting the model's class prediction as the class that received the highest probability for each image
test_predictions = test_probas.argmax(axis=1)

Instructions for updating:
Please use Model.predict, which supports generators.


In [31]:
len(test_probas)

1664

In [30]:
# Getting the true class labels for the test set
test_true = test_generator.classes

# Sanity check 
len(test_true)

1680

In [29]:
len(test_predictions)

1664

In [25]:
# Displaying the classification report for the test set
print('Classification Report\n \n', classification_report(test_true, test_predictions, target_names=categories))

ValueError: Found input variables with inconsistent numbers of samples: [1680, 1664]

In [26]:
# Get a confusion matrix 
test_matrix = pd.DataFrame(confusion_matrix(test_true, test_predictions), 
                           columns=['Predicted ' + cat_name for cat_name in categories], 
                           index=['True ' + cat_name for cat_name in categories])

# Plotting as a heatmap 
plt.figure()
sns.heatmap(test_matrix, cmap='Blues', annot=True, fmt='g')
plt.title('Normalized Confusion Matrix: Test Data')
plt.show()

ValueError: Found input variables with inconsistent numbers of samples: [1680, 1664]

In [None]:
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, stratify = y) 

In [None]:
# # Train the model
# history = model.fit_generator(generator=train_generator,
#                     steps_per_epoch=(11209) // batch_size,
#                     epochs=50, 
#                     validation_data=validation_generator,
#                     validation_steps=(2798) // batch_size,
#                     callbacks=[
#                         EarlyStopping(patience=3, restore_best_weights=True),
#                         ReduceLROnPlateau(patience=2)],
#                     verbose=1)