In [101]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras import layers

import PIL
import math

# Task 4

## To Setup project:
### have test_triplets and train_triplets in ./data/
### have all images  in jpeg in ./data/food

## Setup image dict, pretrained vgg16 model 

In [121]:
def get_train_triplets():
    return np.genfromtxt("./data/train_triplets.txt", dtype="str")


def get_test_triplets():
    return np.genfromtxt("./data/test_triplets.txt", dtype="str")

def get_image_path(name):
    return './data/food/' + name + '.jpg'


# Here we use VGG16 pretrained deep CNN to extract features from Images
# TODO: Increase accuracy by training the model on our dataset?
def setup_pretrained_model():
    #Decisions:
        # - max or avg?
    model = VGG16(weights='imagenet', include_top=False, pooling='avg')
    for layer in model.layers:
        layer.trainable = False
    return model


def feature_extraction(model,name):
    img = image.load_img(get_image_path(name),target_size=(224,224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    return model.predict(x)[0] ##[0] to change shape from (1,512) -> (512, )


def append(arr1,arr2):
    m = arr1.size
    n = arr2.size
    res = np.zeros(m + n)
    res[0:m] = arr1
    res[m:m + n] = arr2
    return res

def create_dict(model):
    triplets1 = get_train_triplets()
    triplets2 = get_test_triplets()
    strings1 = np.reshape(triplets1,(triplets1.size))
    strings2 = np.reshape(triplets2,(triplets2.size))
    for name in strings1:
        if(name not in DICT):
            DICT[name]  =  feature_extraction(model,name)
    for name in strings2:
        if(name not in DICT):
            DICT[name]  =  feature_extraction(model,name)

def get_feature(name):
    return DICT[name]



def setup_data(model,min,max,train):
    if(train):
        triplets = get_train_triplets()
    else:
        triplets = get_test_triplets()
    m,n = triplets.shape
    res = np.zeros((2 * m, 1024))
    labels = np.zeros(2 * m)
    for i in range(m):
        anchor = get_feature(triplets[i][0])
        pos = get_feature(triplets[i][1])
        neg = get_feature(triplets[i][2])
        res[2 * i ][0:512] = anchor
        res[2 * i ][512:1024] = pos

        res[2 * i + 1][0:512] = anchor
        res[2 * i + 1][512:1024] = neg

        labels[2 * i] = 1
        labels[2 * i + 1] = 0
    return res,labels

## Fully connect model (approximating the distance function of similiarities between two images based on extracted features and human input

In [134]:
def create_fully_connect_model():
    inputs = keras.Input(shape=(1024,))
    x = layers.Dense(256,activation="relu")(inputs)
    x = layers.Dense(256,activation="relu")(x)
    x = layers.Dense(256,activation="relu")(x)
    x = layers.Dense(256,activation="relu")(x)
    x = layers.Dense(256,activation="relu")(x)
    x = layers.Dense(256,activation="relu")(x)
    x = layers.Dense(256,activation="relu")(x)
    x = layers.Dense(64,activation="relu")(x)
    x = layers.Dense(64,activation="relu")(x)
    outputs = layers.Dense(1, activation="sigmoid")(x)
    model = keras.Model(inputs=inputs,outputs=outputs,name="fully_connected")
    return model

def parse_results(results):
    n = math.floor(results.size /2)
    ret = np.zeros(n)
    for i in range(n):
        if(results[i * 2] > results[i * 2 + 1]):
            ret[i] = 1
        else:
            ret[i] = 0
    return ret

In [85]:
DICT = {}
def dict():
    pretrained_model = setup_pretrained_model()
    create_dict(pretrained_model)
dict()

['02461' '03450' '02678' ... '02509' '02552' '03406']
['09896' '09640' '09177' ... '08475' '06082' '09044']


In [135]:
def main():
    pretrained_model = setup_pretrained_model()
    fully_connected_model = create_fully_connect_model()
    fully_connected_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=['accuracy'])
    train_data,labels = setup_data(pretrained_model,0,5000,True)
    fully_connected_model.fit(train_data,labels)
    
    test_data, labels = setup_data(pretrained_model,0,0,False)
    
    results = fully_connected_model.predict(test_data)
    output = parse_results(results)
    np.savetxt('testing.txt',results,fmt='%.18e')
    np.savetxt('submission.txt',output,fmt='%d')
main()

Train on 119030 samples
