In [13]:
import keras
from keras.preprocessing import image
import numpy as np
import pandas as pd
from glob import glob
import re
from tqdm import tqdm

In [2]:
inception_model = keras.applications.inception_v3.InceptionV3(include_top=True, weights='imagenet')
# inception_model.summary()

In [3]:
# Helper functions for image preparation
def path_to_tensor(img_path):
    # loads RGB image as PIL.Image.Image type
    img = image.load_img(img_path, target_size=(299, 299))
    # convert PIL.Image.Image type to 3D tensor with shape (224, 224, 3)
    x = image.img_to_array(img)
    # print('before', x.shape)
    # convert 3D tensor to 4D tensor with shape (1, 224, 224, 3) and return 4D tensor
    y = np.expand_dims(x, axis=0)
    # print('after expanding', y.shape)
    return y

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in img_paths]
    # print('before vstack', len(list_of_tensors), list_of_tensors[0].shape)
    z = np.vstack(list_of_tensors)
    # print('after vstack', z.shape)
    return z

In [5]:
# Collect all image paths from the DEV set
dev_paths = glob("../dataset/images/*")

# Convert to tensors, scaled to [0,1]
dev_tensors = paths_to_tensor(dev_paths).astype('float32')/255
print(dev_tensors.shape)

(1000, 299, 299, 3)


In [6]:
# Create a pandas dataframe with the image labels as index and only keeping the TrueLabel column
df = pd.read_csv("../dataset/dev_dataset.csv", index_col=0)
df = df[['TrueLabel']]
print(df.head())

In [11]:
# Make a list of all the image labels
il = []
for dp in dev_paths:
    il.append(re.search(r'(?<=\\).*\.png', dp).group(0)[:-4])
imageLabels = np.array(il)
print(len(imageLabels))

1000


In [30]:
# Make predictions
predictions = {imageLabels[i]: np.argmax(inception_model.predict(np.expand_dims(dev_tensors[i], axis=0)))+1 for i in tqdm(range(len(dev_paths)))}

# Pour predictions into a dataframe
df_pred = pd.DataFrame.from_dict(predictions, orient='index')
df_pred.columns = ['PredictedLabel']

# Merge dataframes with true labels and predicted labels
result = pd.concat([df, df_pred], axis=1)

# Add col to show if the prediction is correct
result['Match'] = result['TrueLabel'] == result['PredictedLabel']

100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [11:06<00:00,  1.18it/s]


{'000b7d55b6184b08': 389, '001b5e6f1b89fd3b': 577, '00312c7e7196baf4': 327, '00c3cd597f1ee96f': 138, '01244097ca8ffdfa': 91, '0134ba03294509a1': 982, '014f0024918a7b6f': 10, '01bdc0cfe670f708': 472, '01dd15caa1b2c7b4': 304, '01f824264783f58d': 968, '01fa4be27dd11b5d': 451, '0212aa8aa912ee45': 327, '024f73e53c3c1f1d': 580, '02dbe78a73db2808': 975, '02eb799d147d5f91': 625, '038fd7a750733bb5': 18, '03f90f7138f761e8': 664, '0411ca972e637297': 968, '04546cc5e537d7cd': 426, '04c1bde5affb7b5b': 653, '04cb9de291399d15': 465, '05437214d1ba9580': 810, '0546b47c13f154d3': 108, '05817a675e7506d1': 673, '059da9a174262427': 254, '05cac1419419ce29': 854, '05f7c084329ff0c6': 973, '060dfe239abfd547': 704, '064e1cc405062c85': 108, '068c5d66662a837a': 964, '06a0e36c443746b5': 354, '0707f3dfb7c29f5c': 606, '080bec22a6b1d37d': 760, '088560551b91ceb0': 983, '08fbf75e60a6efb7': 484, '093402dc35f59027': 734, '0aebe24fc257286e': 561, '0af0a5dfee6b84ff': 484, '0b1d45bd9ab1064e': 561, '0bd5123017590e39': 566, '0


Predictions df:
                  PredictedLabel
000b7d55b6184b08             389
001b5e6f1b89fd3b             577
00312c7e7196baf4             327
00c3cd597f1ee96f             138
01244097ca8ffdfa              91
Merged dataframes:
                  TrueLabel  PredictedLabel
000b7d55b6184b08        389             389
001b5e6f1b89fd3b        577             577
00312c7e7196baf4        327             327
00c3cd597f1ee96f        138             138
01244097ca8ffdfa         91              91
Add Match col:
                  TrueLabel  PredictedLabel  Match
000b7d55b6184b08        389             389   True
001b5e6f1b89fd3b        577             577   True
00312c7e7196baf4        327             327   True
00c3cd597f1ee96f        138             138   True
01244097ca8ffdfa         91              91   True

Number of correct predictions: 952 out of 1000


In [31]:
print('Add Match col:')
print(result.head())
print()
print('Number of correct predictions:',sum(result['Match']),'out of',len(dev_paths), "or", sum(result['Match'])/len(dev_paths))

Add Match col:
                  TrueLabel  PredictedLabel  Match
000b7d55b6184b08        389             389   True
001b5e6f1b89fd3b        577             577   True
00312c7e7196baf4        327             327   True
00c3cd597f1ee96f        138             138   True
01244097ca8ffdfa         91              91   True

Number of correct predictions: 952 out of 1000 or 0.952
