In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import os
from tensorflow import keras
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.xception import Xception, preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint

# Fix OMP: Error #15: Initializing libiomp5.dylib on MacOS
# https://github.com/dmlc/xgboost/issues/1715
os.environ['KMP_DUPLICATE_LIB_OK']='True'

In [2]:
# Load model and pupulate trained parameters
image_size = (128, 128)
num_classes = 100

# Start with Xception model trained on imagenet
base_model = Xception(
    include_top=False,
    weights="imagenet",
    input_shape=(128,128,3)
)
# Add a custom top classifier on top of Xception base model for our dataset with 100 classes
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(num_classes, activation='softmax')(x)
model = Model(inputs=base_model.inputs, outputs=predictions)

# Load trained model parameters
model.load_weights('ria_xception_model.h5')

model.compile(
    optimizer=Adam(lr=1e-4), 
    loss='categorical_crossentropy',
    metrics=['accuracy', 'top_k_categorical_accuracy'])

print(model.summary())

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 128, 128, 3) 0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 63, 63, 32)   864         input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, 63, 63, 32)   128         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv1_act (Activation)   (None, 63, 63, 32)   0           block1_conv1_bn[0][0]            
______________________________________________________________________________________________

In [3]:
# Read training data from directory
train_ds = ImageDataGenerator().flow_from_directory(
    '../data/images/train/',
    target_size=image_size,
    color_mode="rgb",
    classes=None,
    class_mode="categorical",
    batch_size=128,
    shuffle=True,
    interpolation="bilinear")

# Quick sanity check to see if model is still accurate with a batch of training data samples
trainX, trainY = train_ds.next()
[loss, acc, top5_acc] = model.evaluate(trainX, trainY, verbose=0)
print("Training Data: Accuracy = {0:7.2f}%, Top 5 Accuracy = {1:7.2f}%".format(acc*100, top5_acc*100))

Found 100000 images belonging to 100 classes.
Training Data: Accuracy =   99.22%, Top 5 Accuracy =  100.00%


In [4]:
# Read validation data from directory
val_ds = ImageDataGenerator().flow_from_directory(
    '../data/images/val/',
    target_size=image_size,
    color_mode="rgb",
    classes=None,
    class_mode="categorical",
    batch_size=1,
    shuffle=False,
    interpolation="bilinear")

# Read ground truth class for validation images
valY = np.loadtxt('../data/val.txt', usecols=[1]).astype(int)

Found 10000 images belonging to 1 classes.


In [5]:
# Predict validation image classification and compare to ground truth
test_count = len(val_ds.filenames)
acc = 0
top5_acc = 0
print("Testing model accuracy for {0} validation image files".format(test_count))
for i in range(1,test_count+1):
    valX = val_ds.next()
    out = model.predict(valX)
    predY = (-out[0]).argsort()[0:5]
    # print(val_ds.filenames[i], valY[i], predY)
    if i % 1000 == 0:
        print("{0:5d} Files: Accuracy = {1:7.2f}%, Top 5 Accuracy = {2:7.2f}%".format(i, acc*100/i, top5_acc*100/i))
    if valY[i-1] == predY[0]:
        acc += 1
    if valY[i-1] in predY:
        top5_acc += 1

acc /= test_count
top5_acc /= test_count
print("\nValidation Data: Accuracy = {0:7.2f}%, Top 5 Accuracy = {1:7.2f}%".format(acc*100, top5_acc*100))

Testing model accuracy for 10000 validation image files
 1000 Files: Accuracy =   43.60%, Top 5 Accuracy =   73.40%
 2000 Files: Accuracy =   41.95%, Top 5 Accuracy =   71.20%
 3000 Files: Accuracy =   40.87%, Top 5 Accuracy =   71.20%
 4000 Files: Accuracy =   40.92%, Top 5 Accuracy =   71.17%
 5000 Files: Accuracy =   40.72%, Top 5 Accuracy =   70.60%
 6000 Files: Accuracy =   40.92%, Top 5 Accuracy =   71.22%
 7000 Files: Accuracy =   40.99%, Top 5 Accuracy =   71.30%
 8000 Files: Accuracy =   41.35%, Top 5 Accuracy =   71.14%
 9000 Files: Accuracy =   41.26%, Top 5 Accuracy =   71.20%
10000 Files: Accuracy =   41.11%, Top 5 Accuracy =   71.04%

Validation Data: Accuracy =   41.12%, Top 5 Accuracy =   71.05%


In [8]:
# Read test data from directory
test_ds = ImageDataGenerator().flow_from_directory(
    '../data/images/test/',
    target_size=image_size,
    color_mode="rgb",
    classes=None,
    class_mode="categorical",
    batch_size=1,
    shuffle=False,
    interpolation="bilinear")

Found 10000 images belonging to 1 classes.


In [9]:
# Predict test image classification and output to file with top 5 predictions
test_count = len(test_ds.filenames)
fout = open("../data/test.txt", "w")
print("Model prediction for {0} test image files".format(test_count))
for i in range(1,test_count+1):
    testX = test_ds.next()
    out = model.predict(testX)
    predY = (-out[0]).argsort()[0:5]
    line = test_ds.filenames[i-1] + " " + " ".join(str(x) for x in predY)
    fout.write(line + os.linesep)
    if i % 1000 == 0:
        print("{0:5d} Files processed".format(i))
        
fout.close()
print("Predictions output to ../data/test.txt")


Model prediction for 10000 test image files
 1000 Files processed
 2000 Files processed
 3000 Files processed
 4000 Files processed
 5000 Files processed
 6000 Files processed
 7000 Files processed
 8000 Files processed
 9000 Files processed
10000 Files processed
Predictions output to ../data/test.txt
