<a href="https://colab.research.google.com/github/kimys0279/AWSCertifiedSolutionsArchitectUnofficialStudyGuide/blob/master/Copy_of_Team08_ProjectF_Resnet50.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
 
import keras
from keras.models import Model, load_model
from keras.layers import Dense, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping, ModelCheckpoint

from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

In [None]:
#Access data in Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
CLASSES = [0, 1, 2, 3, 4, 5, 6, 7, 8]
CLASS_NAMES = ['Chinee Apple',
               'Lantana',
               'Parkinsonia',
               'Parthenium',
               'Prickly Acacia',
               'Rubber Vine',
               'Siam Weed',
               'Snake Weed',
               'Negatives']

In [None]:
# Training and validation data generator
datagen = ImageDataGenerator(
  preprocessing_function=keras.applications.resnet50.preprocess_input
  )

# Create iterators
base_path = "drive/MyDrive/deepweeds"
datasets_paths = { 'test': os.path.join(base_path,'data/test/'),
                   'train': os.path.join(base_path,'data/train/'),
                   'validation': os.path.join(base_path,'data/validation/') }

img_size = (224,224) # standard image size for the ResNet50 architecture
b_size = 32 # batch size

# Iterator for the training dataset
train_it = datagen.flow_from_directory(datasets_paths['train'], batch_size=b_size, target_size=img_size)
# Iterator for the validation dataset
val_it = datagen.flow_from_directory(datasets_paths['validation'],  batch_size=b_size, target_size=img_size)

Found 2700 images belonging to 9 classes.
Found 810 images belonging to 9 classes.


In [None]:
# Transfer learning
# Using ResNet50 for transfer learning: 
# https://stackoverflow.com/questions/50364706/massive-overfit-during-resnet50-transfer-learning

# loading the ResNet model
from keras.applications import ResNet50

# load model and specify a new input shape for images
base_model = ResNet50(input_shape=(img_size[0], img_size[1], 3), include_top=False) # include_top->include FC layers?

x = base_model.output

# freeze all layers in the base model
base_model.trainable = False

# # un-freeze the BatchNorm layers
# for layer in base_model.layers:
#     if "BatchNormalization" in layer.__class__.__name__:
#         layer.trainable = True

# Add a global average pooling layer
x = GlobalAveragePooling2D(name='avg_pool')(x)
# x = Dense(len(CLASSES), activation='relu', name='fc9')(x)
x = Dense(32, activation='relu',name='fc32')(x)
# Add fully connected output layer with sigmoid activation for multi label classification
outputs = Dense(len(CLASSES), activation='sigmoid', name='fc9')(x)
# Assemble the modified model
model = Model(inputs=base_model.input, outputs=outputs)

model.compile(loss='binary_crossentropy',
                    optimizer=Adam(learning_rate=0.0001),
                    metrics=['categorical_accuracy'])

# summarize the model
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1_conv[0][0]                 
______________________________________________________________________________________________

In [None]:
# Train the model

# Early stopping and model checkpoint
save_path = os.path.join(base_path,'baseline.h5') # path to save the model
# Callbacks
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
mc = ModelCheckpoint(save_path, monitor='val_loss', mode='min', verbose=1, save_best_only=True)

history = model.fit(
    train_it,
    steps_per_epoch = train_it.samples//train_it.batch_size + 1, # steps limited per the number of images in the dataset
    validation_data=val_it,
    validation_steps = val_it.samples//val_it.batch_size + 1, # steps limited per the number of images in the dataset
    epochs = 200, # max number of epochs
    callbacks=[es, mc], 
    verbose=1)

Epoch 1/200

In [None]:
def plot_history(history):
    # plot loss
    plt.title('Loss')
    plt.plot(history.history['loss'], color='blue', label='train')
    plt.plot(history.history['val_loss'], color='red', label='test')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'])
    plt.show()
    
    # plot accuracy
    plt.title('Accuracy')
    plt.plot(history.history['categorical_accuracy'], color='blue', label='train')
    plt.plot(history.history['val_categorical_accuracy'], color='red', label='test')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'])
    plt.show()
 
# Plotting results of training
plot_history(history)

In [None]:
# save history data
import pandas as pd

save_path = os.path.join(base_path,'baseline.csv')

df = pd.DataFrame.from_dict(history.history)
df.to_csv(save_path,index=False)
df.head()

In [None]:
# Load best model
save_path = os.path.join(base_path,'baseline.h5') # path to save the model
model = load_model(save_path)

# Create test data generator
test_datagen = ImageDataGenerator(
    preprocessing_function=keras.applications.resnet50.preprocess_input)

# Load files from directory
test_generator = test_datagen.flow_from_directory(
        datasets_paths['test'],
        target_size = img_size,
        shuffle = False,
        batch_size=b_size)

# Evaluate model on test set
predictions = model.predict_generator(test_generator, steps=test_generator.samples // b_size + 1)
y_true = test_generator.classes
y_pred = np.argmax(predictions, axis=1)
y_pred[np.max(predictions, axis=1) < 1 / 9] = 8  # Assign predictions worse than random guess to negative class

# Generate and print classification metrics and confusion matrix
print(classification_report(y_true, y_pred, labels=CLASSES, target_names=CLASS_NAMES))
conf_arr = confusion_matrix(y_true, y_pred, labels=CLASSES)

# Confussion matrix
conf_arr = confusion_matrix(y_true, y_pred)
fig, ax = plt.subplots(figsize=(7,4)) 
sns.heatmap(conf_arr, annot=True,cmap="Greens",fmt='g', xticklabels=CLASS_NAMES, yticklabels=CLASS_NAMES,ax=ax)
plt.title('Confusion Matrix')
plt.ylabel('True')
plt.xlabel('Predicted');