# Implement a ResNet-based classifier for house-style classification

In [None]:
import sys  
sys.path.insert(0, '../visual_home_finder')
import config, paths
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Input, Dense, AveragePooling2D, Dropout, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report
import numpy as np
from tensorflow import math
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd

In [None]:
# Find the number of training, validation and test images
num_train = len(list(paths.list_images(config.TRAIN_PATH)))
num_val = len(list(paths.list_images(config.VAL_PATH)))
num_test = len(list(paths.list_images(config.TEST_PATH)))
print(num_train, num_val, num_test)

In [None]:
# # Function for finding the mean and std of the training images
# images = list(paths.list_images(config.TRAIN_PATH))
# img_mean = 0
# img_var = 0
# for ii in range(num_train):    
#     img = image.load_img(images[ii], target_size = (224,224))
#     img = image.img_to_array(img)
#     img_mean += img.mean((0,1))
#     img_var += img.var((0,1))
# img_mean = img_mean/num_train
# img_std = np.sqrt(img_var/num_train)
img_mean = config.IMG_MEAN #np.array([123.526794, 129.04448, 119.95359], dtype=np.float32).reshape((1,1,3))
#img_std = 62#np.array([62.082836, 61.87381, 73.08175], dtype=np.float32).reshape((1,1,3))

In [None]:
#print(img_mean, img_std)

In [None]:
# Initialize training data augmentation object
train_aug = ImageDataGenerator(
    featurewise_center = True,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    horizontal_flip=True,
    fill_mode="nearest")
train_aug.mean = img_mean

# Initialize validation data augmentation object
val_aug = ImageDataGenerator(featurewise_center = True)
val_aug.mean = img_mean

In [None]:
# # Just to test that the normalization is working correctly
# iterator = train_aug.flow_from_directory(
#     config.TRAIN_PATH,
#     class_mode="categorical",
#     target_size=(224, 224),
#     color_mode="rgb",
#     shuffle=True,
#     batch_size=batch_size)
# batchX, batchy = iterator.next()
# print(batchX.shape, batchX.mean(), batchX.std())

In [None]:
# Create functions for generating training, test and validation data
batch_size = 32

train_gen = train_aug.flow_from_directory(
    config.TRAIN_PATH,
    class_mode="categorical",
    target_size=(224, 224),
    color_mode="rgb",
    shuffle=True,
    batch_size=batch_size)

val_gen = val_aug.flow_from_directory(
    config.VAL_PATH,
    class_mode="categorical",
    target_size=(224, 224),
    color_mode="rgb",
    shuffle=False,
    batch_size=batch_size)

test_gen = val_aug.flow_from_directory(
    config.TEST_PATH,
    class_mode="categorical",
    target_size=(224, 224),
    color_mode="rgb",
    shuffle=False,
    batch_size=batch_size)

In [None]:
# Load the ResNet model with the last classification layer removed
base_model = ResNet50(weights='imagenet', include_top = False, 
                      input_tensor = Input(shape=(224, 224, 3)))

In [None]:
base_model.summary()

In [None]:
# Add new layers to the base model
new_model = base_model.output
new_model = AveragePooling2D(pool_size=(7,7))(new_model)
new_model = Flatten(name='flatten')(new_model)
new_model = Dense(256, activation='relu')(new_model)
new_model = Dropout(0.5)(new_model)
new_model = Dense(len(config.CLASSES), activation="softmax")(new_model)

# Place new model at head of the base model
model = Model(inputs=base_model.input, outputs=new_model)

# Freeze all layers of base model
for layer in base_model.layers:
    layer.trainable = False

In [None]:
model.summary()

In [None]:
initial_lr = 0.001
num_epochs = 5

opt = Adam(lr = initial_lr, decay=initial_lr /5)
model.compile(loss="categorical_crossentropy", optimizer=opt,
              metrics=["accuracy", 'AUC'])

model_history = model.fit(
            train_gen,
            steps_per_epoch=num_train // batch_size,
            validation_data=val_gen,
            #validation_steps=num_val // batch_size,
            epochs=num_epochs)

In [None]:
# model.evaluate(train_gen)

In [None]:
# Reset test generator and used trained model to make predictions
print("Evaluating network...")
test_gen.reset()
pred_indices_raw = model.predict(test_gen)
# For each image, find the class
pred_indices = np.argmax(pred_indices_raw, axis=1)
# Classification Report
print(classification_report(test_gen.classes, pred_indices, 
                 target_names=test_gen.class_indices.keys()))

In [None]:
# Serialize model to disk
print("Save Model...")
model.save(os.path.sep.join([config.MODEL_PATH, 'home_model'], save_format="h5")

In [None]:
# # reset the testing generator and then use our trained model to
# # make predictions on the data
# print("[INFO] evaluating network...")
# test_train_gen.reset()
# pred_indices_raw = model.predict(test_train_gen)
# # for each image in the testing set we need to find the index of the
# # label with corresponding largest predicted probability
# pred_indices = np.argmax(pred_indices_raw, axis=1)
# # show a nicely formatted classification report
# print(classification_report(test_train_gen.classes, pred_indices, 
#                  target_names=test_train_gen.class_indices.keys()))
# # serialize the model to disk
# print("[INFO] savconfig.MODEL_PATHODEL_PATHl...")
# model.save(config.MODEL_PATH, save_format="h5")

In [None]:
# Create confusion matrix
con_mat = math.confusion_matrix(test_gen.classes, predictions=pred_indices).numpy()
con_mat_norm = np.around(con_mat.astype('float') / con_mat.sum(axis=1)[:, np.newaxis], decimals=2)
con_mat_df = pd.DataFrame(con_mat_norm,
                     index = config.CLASSES, 
                     columns = config.CLASSES)
figure = plt.figure(figsize=(8, 8))
sns.heatmap(con_mat_df, annot=True,cmap=plt.cm.Blues)
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()

In [None]:
# plot the training loss and accuracy
N = num_epochs
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, N), model_history.history["loss"], '*-', label="train_loss")
plt.plot(np.arange(0, N), model_history.history["val_loss"], '*-', label="val_loss")
plt.plot(np.arange(0, N), model_history.history["accuracy"], '*-', label="train_acc")
plt.plot(np.arange(0, N), model_history.history["val_accuracy"], '*-', label="val_acc")
plt.title("Training Loss and Accuracy on Dataset")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower left")

In [None]:
some_test_images = list(paths.list_images(config.TEST_PATH))
index = 101
print(some_test_images[index])

img = image.load_img(some_test_images[index], target_size = (224,224))
orig_img = image.img_to_array(img)
img = np.expand_dims(orig_img - img_mean, axis = 0)

model_output = model.predict(img)
print(model_output)
print(config.CLASSES[np.argmax(np.ravel(model_output))])
plt.imshow(orig_img/255)
print(config.CLASSES)