# This ML script would not have been possible without inspiration from, github user ***'wiamsuri'*** and their project ***'cnn-image-classifier-keras'***

In [None]:
from sklearn.datasets import load_files
from sklearn.model_selection import train_test_split
import numpy as np
from glob import glob
# from PIL import ImageFile
import random
from tqdm import tqdm
import keras
from keras.preprocessing import image
from keras.utils import np_utils
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense
from keras.models import Sequential
from keras.callbacks import ModelCheckpoint
import matplotlib.pyplot as plt
from skimage.transform import resize
import os
import pandas as pd

# Load Images From File

In [None]:
image_names = [item.replace('resources/data_for_training_06/', '') for item in sorted(glob("resources/data_for_training_06/*/"))]
number_of_image_categories = len(image_names)
print('%d image categories.' % number_of_image_categories)
print('Three categories:')
print(image_names[:3])

In [None]:
def load_dataset(path):
    data = load_files(path)
    image_files = np.array(data['filenames'])
    image_targets = np_utils.to_categorical(np.array(data['target']), number_of_image_categories)
    return image_files, image_targets


image_files, image_targets = load_dataset('resources/data_for_training_06/')

trains_validate_files, test_files, trains_validate_targets, test_targets = \
    train_test_split(image_files, image_targets, test_size=0.2, random_state=42)

train_files, valid_files, train_targets, valid_targets = \
    train_test_split(trains_validate_files, trains_validate_targets, test_size=0.25, random_state=42)

image_names = [item for item in sorted(glob("resources/data_for_training_06/*/"))]

print('%s images.\n' % len(np.hstack([train_files, valid_files, test_files])))
print('%d training images.' % len(train_files))
print('%d validation images.' % len(valid_files))
print('%d test images.'% len(test_files))

# Define function for preprocessing images

In [None]:
def path_to_tensor(img_path):

    img = image.load_img(img_path, grayscale=False, color_mode="rgb", target_size=(500, 500), interpolation="nearest")
    img_array = image.img_to_array(img)

    return np.expand_dims(img_array, axis=0)

def paths_to_tensor(img_paths):
    
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)

# Preprocess images

In [None]:
from PIL import ImageFile                            
ImageFile.LOAD_TRUNCATED_IMAGES = True                 

# pre-process the data for Keras
train_tensors = paths_to_tensor(train_files).astype('float32')/255
valid_tensors = paths_to_tensor(valid_files).astype('float32')/255
test_tensors = paths_to_tensor(test_files).astype('float32')/255

# Build Model

In [None]:
model = Sequential()
# ADJUST FILTER?
model.add(Conv2D(filters=2, kernel_size=2, padding='same',
                 activation='relu', input_shape=(500, 500, 3)))

model.add(MaxPooling2D(pool_size=2))

model.add(Conv2D(filters=4, kernel_size=2, padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.1))

model.add(Conv2D(filters=8, kernel_size=2, padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=12, kernel_size=2, padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.3))

model.add(Conv2D(filters=16, kernel_size=2, padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.4))


model.add(Flatten())

model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(3, activation='softmax'))


model.summary()

model.compile(optimizer=keras.optimizers.Adadelta(), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model with training and validating images

In [None]:
epochs = 16

checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.image_classifier.hdf5',
                               verbose=1, save_best_only=True)

history = model.fit(train_tensors, train_targets, validation_data=(valid_tensors, valid_targets),
          epochs=epochs, batch_size=128, callbacks=[checkpointer], verbose=1)

In [None]:
import pickle
# OPTIONAL WRITE HISTORY TO JSON
with open('trainHistoryDict', 'wb') as file_pi:
        pickle.dump(history.history, file_pi)

In [None]:
#READ in SAVED HISTORY
history = history = pickle.load(open('trainHistoryDict', "rb"))

In [None]:
import tensorflow as tf
from tensorflow import keras
from keras.models import load_model

In [None]:
model = tf.keras.models.load_model('saved_models/weights.best.image_classifier.hdf5')

In [None]:
# model.load_weights('saved_models/weights.best.image_classifier.hdf5')
# model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [None]:
predictions = [np.argmax(model.predict(np.expand_dims(tensor, axis=0))) for tensor in test_tensors]

test_accuracy = 100*np.sum(np.array(predictions)==np.argmax(test_targets, axis=1))/len(predictions)
print('Test accuracy: %.4f%%' % test_accuracy)

In [None]:
#Visualize the model's accuracy
plt.plot(history['accuracy'])
plt.plot(history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['training data', 'validation data'], loc='upper left')
plt.show()

In [None]:
for image in os.listdir('test_imgs'):
    img = plt.imread('test_imgs' + '/' + image)
    resized_image = resize(img, (500,500,3))
    predictions = model.predict(np.array([resized_image]))
#     print(f'{image}:\t {predictions}')
    if image.split("_")[0] == "brick":
        print(image, predictions)
#         print(f'{image}:\t {round(100 * predictions[0][0])}%')

# for image in os.listdir('test_imgs'):
#     img = plt.imread('test_imgs' + '/' + image)
#     resized_image = resize(img, (400,400,3))
#     predictions = model.predict(np.array([resized_image]))
# #     print(f'{image}:\t {predictions}')
#     if image.split("_")[0] == "brick":
# #         print(f'{image}:\t {round(100 * predictions[0][0])}%')

# for image in os.listdir('test_imgs'):
#     img = plt.imread('test_imgs' + '/' + image)
#     resized_image = resize(img, (400,400,3))
#     predictions = model.predict(np.array([resized_image]))
# #     print(f'{image}:\t {predictions}')
#     if image.split("_")[0] == "unknown":
# #         print(f'{image}:\t {round(100 * predictions[0][0])}%')

#['brick_10/', 'siding_20/', 'unknown_00/']

In [None]:
new_image = plt.imread('test_imgs/brick_5901.jpg')
img = plt.imshow(new_image)

In [None]:
#Sort the predictions from least to greatest
list_index = [0, 1, 2]
x = predictions

for i in range(3):
  for j in range(3):
    if x[0][list_index[i]] > x[0][list_index[j]]:
      temp = list_index[i]
      list_index[i] = list_index[j]
      list_index[j] = temp

#Show the sorted labels in order
print(list_index)

In [None]:
# Plot history: 
plt.plot(history['loss'], label='(training data)')
plt.plot(history['val_loss'], label='(validation data)')
plt.title('Loss')
plt.ylabel('value')
plt.xlabel('No. epoch')
plt.legend(loc="upper right")
plt.show()
