In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import matplotlib as mpl
import cv2
import time
import PIL
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
import numpy as np
from keras.initializers import glorot_normal
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
   tf.config.experimental.set_memory_growth(physical_devices[0], True)
from matplotlib import pyplot
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2


In [2]:
"""
Calculates dot product of x[0] and x[1] for mini_batch 

Assuming both have same size and shape

@param
x -> [ (size_minibatch, total_pixels, size_filter), (size_minibatch, total_pixels, size_filter) ]

"""
def dot_product(x):

    return keras.backend.batch_dot(x[0], x[1], axes=[1,1]) / x[0].get_shape().as_list()[1] 

"""
Calculate signed square root

@param
x -> a tensor

"""

def signed_sqrt(x):

    return keras.backend.sign(x) * keras.backend.sqrt(keras.backend.abs(x) + 1e-9)

"""
Calculate L2-norm

@param
x -> a tensor

"""

def L2_norm(x, axis=-1):

    return keras.backend.l2_normalize(x, axis=axis)

In [37]:
'''

    Take outputs of last layer of VGG and load it into Lambda layer which calculates outer product.
    
    Here both bi-linear branches have same shape.
    
    z -> output shape tuple
    x -> outpur og VGG tensor
    y -> copy of x as we modify x, we use x, y for outer product.
    
'''

def build_model():
    tensor_input = keras.layers.Input(shape=[150,150,3])

#   load pre-trained model
    tensor_input = keras.layers.Input(shape=[150,150,3])
    

    
    model_detector = keras.applications.vgg16.VGG16(
                            input_tensor=tensor_input, 
                            include_top=False,
                            weights='imagenet')
    
    model_detector2 = keras.applications.vgg16.VGG16(
                            input_tensor=tensor_input, 
                            include_top=False,
                            weights='imagenet')
    
    
    model_detector2 = keras.models.Sequential(layers=model_detector2.layers)
  
    for i, layer in enumerate(model_detector2.layers):
        layer._name = layer.name  +"_second"

    model2 = keras.models.Model(inputs=[tensor_input], outputs = [model_detector2.layers[-1].output])
                       
    x = model_detector.layers[17].output
    z = model_detector.layers[17].output_shape
    y = model2.layers[17].output
    
    print(model_detector.summary())
    
    print(model2.summary())
#   rehape to (batch_size, total_pixels, filter_size)
    x = keras.layers.Reshape([z[1] * z[2] , z[-1]])(x)
        
    y = keras.layers.Reshape([z[1] * z[2] , z[-1]])(y)
    
#   outer products of x, y
    x = keras.layers.Lambda(dot_product)([x, y])
    
#   rehape to (batch_size, filter_size_vgg_last_layer*filter_vgg_last_layer)
    x = keras.layers.Reshape([z[-1]*z[-1]])(x)
        
#   signed_sqrt
    x = keras.layers.Lambda(signed_sqrt)(x)
        
#   L2_norm
    x = keras.layers.Lambda(L2_norm)(x)

#   FC-Layer

    initializer = tf.keras.initializers.GlorotNormal()
            
    x = keras.layers.Dense(units=258, 
                           kernel_regularizer=keras.regularizers.l2(0.0),
                           kernel_initializer=initializer)(x)

    tensor_prediction = keras.layers.Activation("softmax")(x)

    model_bilinear = keras.models.Model(inputs=[tensor_input],
                                        outputs=[tensor_prediction])
    
    
#   Freeze VGG layers
    for layer in model_detector.layers:
        layer.trainable = False
        

    sgd = keras.optimizers.SGD(lr=1.0, 
                               decay=0.0,
                               momentum=0.9)

    model_bilinear.compile(loss="categorical_crossentropy", 
                           optimizer=sgd,
                           metrics=["categorical_accuracy"])

    model_bilinear.summary()
    
    return model_bilinear

In [38]:
model = build_model()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_40 (InputLayer)        [(None, 150, 150, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 150, 150, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 150, 150, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 75, 75, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 75, 75, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 75, 75, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 37, 37, 128)       0     



In [43]:

def train_model(epochs):

    hist = model.fit_generator(
                train_generator, 
                epochs=epochs, 
                validation_data=val_generator,
                workers=3,
                verbose=1
            )
        
    model.save_weights("./bilinear_weights/val_acc_" + hist.history['val_categorical_accuracy'][-1] +"_"+ str(epochs)+ ".h5")
    
    return hist



In [44]:
train_datagen = image.ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        fill_mode='nearest',
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)
test_datagen = image.ImageDataGenerator(rescale=1./255)

In [45]:
train_generator = train_datagen.flow_from_directory(
        '../nut_snacks/dataset_split/train',
        target_size=(150, 150),
        color_mode="rgb",
        batch_size=32,
        subset='training',
        class_mode='categorical')
val_generator = test_datagen.flow_from_directory(
        '../nut_snacks/dataset_split/val',
        target_size=(150, 150),
        color_mode="rgb",
        batch_size=32,
        subset='training',
        class_mode='categorical')
test_generator = test_datagen.flow_from_directory(
        '../nut_snacks/dataset_split/test',
        target_size=(150, 150),
        color_mode="rgb",
        shuffle = False,
        class_mode=None,
        batch_size=1)

Found 6965 images belonging to 258 classes.
Found 1290 images belonging to 258 classes.
Found 2064 images belonging to 258 classes.


In [27]:
hist =train_model(epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [28]:
hist =train_model(epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [29]:
for layer in model.layers:
    layer.trainable = True

sgd = keras.optimizers.SGD(lr=1e-3, decay=1e-9, momentum=0.9)

model.compile(loss="categorical_crossentropy", optimizer=sgd, metrics=["categorical_accuracy"])

In [31]:
hist =train_model(epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [32]:
model.save('./model_bilin')

INFO:tensorflow:Assets written to: ./model_bilin\assets


In [7]:
model2 = keras.models.load_model('./model_bilin')

In [11]:
preds = model2.predict_generator(test_generator, verbose=1)





In [12]:
preds_cls_idx = preds.argmax(axis=-1)
preds_cls_idx

array([  0,   0,   0, ...,   4, 257, 257], dtype=int64)

In [13]:
idx_to_cls = {v: k for k, v in train_generator.class_indices.items()}
preds_cls = np.vectorize(idx_to_cls.get)(preds_cls_idx)
preds_cls

array(['999999815342', '999999815342', '999999815342', ...,
       '999999981399', '999999996239', '999999996239'], dtype='<U12')

In [14]:
true_lables = []
true_lables_upc_idx_map = {}
true_lables_img = {}
upc_list = os.listdir('../nut_snacks/dataset_split/test/')
idx = 0
for upc in upc_list:
    img_folder = '../nut_snacks/dataset_split/test/' + upc +'/'
    img_list = os.listdir(img_folder)
    for img in img_list:
        true_lables.append(upc)
        true_lables_upc_idx_map[idx] = upc
        true_lables_img[idx] = img
        idx += 1
len(true_lables)

2064

In [15]:
wrong_predicted = []
count = 0
for idx in range(0, len(preds_cls)):
    if preds_cls[idx] != true_lables[idx]:
        wrong_predicted.append(idx)
    else:
        count += 1
count

2015

In [16]:
len(wrong_predicted)

49

In [17]:
accuracy = count/len(preds_cls)
accuracy

0.9762596899224806

In [18]:
wrong_pred_upc = set()
for label in wrong_predicted:
    wrong_pred_upc.add(true_lables_upc_idx_map[label])
len(wrong_pred_upc), len(wrong_predicted)

(37, 49)

In [19]:
images_pred_wrong = []
for label in wrong_predicted:
    images_pred_wrong.append(true_lables_img[label])
len(images_pred_wrong)

49

In [20]:
d={}
for i in range(0 ,len(images_pred_wrong)):
    if preds_cls[i] not in d.keys():
        d[preds_cls[i]] = 1
    else:
        d[preds_cls[i]] += 1
d

{'999999815342': 8,
 '999999981396': 8,
 '999999981397': 8,
 '999999981398': 7,
 '999999981582': 1,
 '999999981399': 8,
 '999999981400': 7,
 '999999981515': 1,
 '999999981401': 1}

In [21]:
for i in range(0 ,len(images_pred_wrong)):
    img ='../nut_snacks/dataset_split/test/' + true_lables_upc_idx_map[wrong_predicted[i]] + '/' + images_pred_wrong[i]
    print(img, preds_cls[i])

../nut_snacks/dataset_split/test/999999981398/walmart-supercenter-1521_16510774_Q02-002_zIK9W--1602-807-1835-1138.jpg 999999815342
../nut_snacks/dataset_split/test/999999981400/walmart-supercenter-1518_16559243_Q02-005_fq6tG--2166-470-2456-833.jpg 999999815342
../nut_snacks/dataset_split/test/999999981408/walmart-supercenter-1508_16550468_Q02-005_RFZYu--1231-812-1414-983.jpg 999999815342
../nut_snacks/dataset_split/test/999999981414/walmart-neighborhood-market-4142_16494957_Q02-002_8Hn4O--2738-2730-2971-3047.jpg 999999815342
../nut_snacks/dataset_split/test/999999981417/walmart-neighborhood-market-5855_16497595_Q02-002_XxQ8V--1573-2417-1760-2629.jpg 999999815342
../nut_snacks/dataset_split/test/999999981419/walmart-neighborhood-market-5855_16497595_Q02-001_3zRe0--524-3327-737-3698.jpg 999999815342
../nut_snacks/dataset_split/test/999999981427/walmart-supercenter-1004_16505800_Q02-003_XkNT5--1048-2853-1200-3121.jpg 999999815342
../nut_snacks/dataset_split/test/999999981427/walmart-super

In [22]:
from sklearn.metrics import f1_score,precision_score,recall_score,accuracy_score

In [23]:
f1 = f1_score(true_lables, preds_cls, average='weighted')
precision = precision_score(true_lables, preds_cls, average='weighted')
recall  = recall_score(true_lables, preds_cls, average='weighted')
accuracy = accuracy_score(true_lables, preds_cls)

In [24]:
print("f1 :", f1)
print("precision :", precision)
print("recall :", recall)
print("accuracy :", accuracy)


f1 : 0.9760910297333006
precision : 0.9794846948916717
recall : 0.9762596899224806
accuracy : 0.9762596899224806
