In [0]:
import io
import os
import numpy as np
import pandas as pd
import pathlib
import tensorflow as tf

In [0]:
num_triplets = len(pd.read_csv("/content/drive/My Drive/train_triplets.txt").index)

In [20]:
path = '/content/drive/My Drive/food/'
data_dir = pathlib.Path(path)
image_count = len(list(data_dir.glob('*.jpg')))
print('Number of training images: ', image_count)

BATCH_SIZE = 32
IMG_HEIGHT = 420 #second index of shape 470 original
IMG_WIDTH = 300 #first index of shape
EPOCHS = 8
STEPS_PER_EPOCH = np.ceil(num_triplets/BATCH_SIZE)
margin = 0.3

Number of training images:  10010


## Reshape input images

In [21]:
img = tf.io.read_file('/content/drive/My Drive/food/00029.jpg')
img = tf.image.decode_jpeg(img, channels=3)
img = tf.image.convert_image_dtype(img, tf.float32)
img = tf.image.resize(img, [IMG_WIDTH, IMG_HEIGHT])
img

<tf.Tensor: shape=(300, 420, 3), dtype=float32, numpy=
array([[[0.12052289, 0.10483661, 0.21071897],
        [0.11137256, 0.09490196, 0.20313728],
        [0.10196079, 0.08235295, 0.20000002],
        ...,
        [0.8444448 , 0.7934644 , 0.83137286],
        [0.80705935, 0.7560789 , 0.8172551 ],
        [0.7890198 , 0.7380394 , 0.8083662 ]],

       [[0.0460915 , 0.0265817 , 0.14393464],
        [0.11596079, 0.09643138, 0.21384315],
        [0.10068628, 0.08107844, 0.1987255 ],
        ...,
        [0.7475816 , 0.6966012 , 0.7319606 ],
        [0.76270574, 0.7147841 , 0.76678395],
        [0.77958804, 0.72886264, 0.79485595]],

       [[0.0990523 , 0.07944445, 0.20460786],
        [0.13787583, 0.11826798, 0.24343139],
        [0.09313726, 0.07352941, 0.19869283],
        ...,
        [0.8014704 , 0.75049   , 0.78327864],
        [0.8247057 , 0.77686256, 0.82186234],
        [0.82920486, 0.7819935 , 0.83685184]],

       ...,

       [[0.78406376, 0.74144936, 0.7176585 ],
        [0.81

In [0]:
def image_vector(names):
  parts = tf.strings.split(names, " ")
  file_dir = '/content/drive/My Drive/food/'    
  image_names = parts
  img1 = tf.io.read_file(file_dir+parts[0]+'.jpg')
  img1 = tf.image.decode_jpeg(img1, channels=3)
  img1 = tf.image.convert_image_dtype(img1, tf.float32)
  img1 = tf.image.resize(img1, [IMG_WIDTH, IMG_HEIGHT])
  img2 = tf.io.read_file(file_dir+parts[1]+'.jpg')
  img2 = tf.image.decode_jpeg(img2, channels=3)
  img2 = tf.image.convert_image_dtype(img2, tf.float32)
  img2 = tf.image.resize(img2, [IMG_WIDTH, IMG_HEIGHT])
  img3 = tf.io.read_file(file_dir+parts[2]+'.jpg')
  img3 = tf.image.decode_jpeg(img3, channels=3)
  img3 = tf.image.convert_image_dtype(img3, tf.float32)
  img3 = tf.image.resize(img3, [IMG_WIDTH, IMG_HEIGHT])

  #change order of the images
  image_triplets = (img1, img2, img3)


  return image_triplets, 0

In [23]:
triplet_names = tf.data.TextLineDataset("/content/drive/My Drive/train_triplets.txt")
print(triplet_names)
triplet_ds = triplet_names.map(image_vector, num_parallel_calls=tf.data.experimental.AUTOTUNE)
#triplet_test  = triplet_ds.take(3000)

#triplet_train = triplet_ds.skip(3000)
triplet_train = triplet_ds
triplet_train = triplet_train.batch(BATCH_SIZE).repeat(EPOCHS)
triplet_train = triplet_train.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

<TextLineDatasetV2 shapes: (), types: tf.string>


## Model

In [0]:
def triplet_distances(anch, pos, neg):
  d_pos = tf.reduce_sum(tf.square(anch - pos), 1)
  d_neg = tf.reduce_sum(tf.square(anch - neg), 1)

  #wenn die Reihenfolge korrekt ist, dann ist der dpos kleiner und der loss null
  loss = tf.maximum(0., margin + d_pos - d_neg)
  return tf.reduce_mean(loss)

def model_loss(label, distance):
  return distance

In [0]:
import tensorflow.keras.backend as K

def score(losses):
  m = tf.constant(margin)
  losses = losses - m
  #wenn A näher B dann label 1
  label = tf.constant(1.) - tf.math.ceil(K.clip(losses, 0, 1))
  return label

#def metric(y_true, score): 
#    true_positives = tf.math.count_nonzero(score)
#    all_counts = tf.size(score)
#    return true_positives / all_counts

In [0]:
import tensorflow_hub as hub

model = tf.keras.Sequential([
    hub.KerasLayer("https://tfhub.dev/google/imagenet/inception_v3/feature_vector/4", trainable=False),
    #tf.keras.layers.Dropout(0.3),
    #tf.keras.layers.Dense(900, activation='relu'),
    tf.keras.layers.Dense(4500, activation='relu'),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(150, activation=None), # No activation on final dense layer
    tf.keras.layers.Lambda(lambda x: tf.math.l2_normalize(x, axis=1)) # L2 normalize embeddings
    ])

#model.build([None, IMG_WIDTH, IMG_HEIGHT, 3])  # Batch input shape.

## Transfer Features

In [0]:
inputA = tf.keras.Input(shape=(IMG_WIDTH, IMG_HEIGHT, 3), name='A')
inputB = tf.keras.Input(shape=(IMG_WIDTH, IMG_HEIGHT, 3), name='B')
inputC = tf.keras.Input(shape=(IMG_WIDTH, IMG_HEIGHT, 3), name='C')

A_features = model(inputA)
B_features = model(inputB)
C_features = model(inputC)

#outputs of model is actually the loss, if > 0 we have C belongs to A
trip_loss1 = triplet_distances(A_features, B_features, C_features)
#trip_loss2 = triplet_distances(A_features, C_features, B_features)

label = score(trip_loss1)

model = tf.keras.Model(inputs=[inputA, inputB, inputC], 
                       outputs=[trip_loss1,label])
#model.summary()

In [0]:
model.compile(loss=model_loss,
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
              loss_weights=[1., 0.])

In [29]:
model.fit(triplet_train, epochs=EPOCHS, 
          batch_size=BATCH_SIZE, 
          steps_per_epoch=STEPS_PER_EPOCH)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<tensorflow.python.keras.callbacks.History at 0x7fe9c00dd748>

In [34]:
model.save('/content/drive/My Drive/saved_model/my_model') 

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


Instructions for updating:
If using Keras pass *_constraint arguments to layers.


INFO:tensorflow:Assets written to: /content/drive/My Drive/saved_model/my_model/assets


INFO:tensorflow:Assets written to: /content/drive/My Drive/saved_model/my_model/assets


# SUBMISSION

In [0]:
# store submission in csv triplet_test
test_names = tf.data.TextLineDataset("/content/drive/My Drive/test_triplets.txt")
test_ds = test_names.map(image_vector, num_parallel_calls=tf.data.experimental.AUTOTUNE)
test_ds = test_ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
test_ds = test_ds.batch(1)
y_sub = model.predict(test_ds)

In [39]:
submission = pd.Series(y_sub[1])
submission = submission.astype(int)
submission.to_csv('/content/drive/My Drive/prediction.csv', index=False, header=False)
submission

0        1
1        0
2        0
3        0
4        1
        ..
59539    1
59540    0
59541    1
59542    0
59543    1
Length: 59544, dtype: int64