In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from matplotlib import pyplot as plt

import random

import tensorflow as tf
from keras.optimizers import *
from keras.layers import *
from keras.models import *
from keras.regularizers import l2

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os

NUM_CLASSES = 5


# Any results you write to the current directory are saved as output.

Using TensorFlow backend.


In [2]:
def loadfiles():
    imgs = []
    labels = []
    for dirname, _, filenames in os.walk('/kaggle/input'):
        for filename in filenames:
            file = os.path.join(dirname, filename)
            ext = filename.split('.')[1]
            if ext == 'csv':
                label = int(filename.split('_')[1])
                imgs.append(np.loadtxt(open(file, "rb"), delimiter=",", skiprows=1))
                labels.append(label)
    return np.array(imgs), np.array(labels)
            

In [3]:
imgs, labels = loadfiles()
indices = np.array(list(range(imgs.shape[0])))
np.random.shuffle(indices)
test_imgs = imgs[indices[:1000]]
test_labels = labels[indices[:1000]]
train_imgs = imgs[indices[1000:]]
train_labels = labels[indices[1000:]]

In [4]:
print(test_imgs)

[[[1.75853915e-04 1.17078140e-04 1.98359121e-04 ... 8.80602747e-05
   1.09317116e-04 7.43524230e-04]
  [2.98005296e-04 5.51570247e-06 1.05534218e-05 ... 2.36275118e-05
   6.84056868e-05 7.08381296e-04]
  [2.16972592e-04 4.01588932e-06 7.68376685e-06 ... 1.72027903e-05
   4.98050213e-05 5.15760388e-04]
  ...
  [2.60817033e-04 6.30310024e-05 6.28012058e-05 ... 2.56397755e-07
   6.71220448e-08 5.56050210e-08]
  [1.25836377e-04 3.45871231e-05 4.93457665e-05 ... 5.17971955e-07
   1.12310623e-07 6.33818829e-08]
  [3.40120256e-04 1.26635496e-04 2.91933538e-05 ... 4.17313487e-07
   1.54922247e-07 9.81054171e-08]]

 [[1.14557205e-03 1.61921920e-03 5.52529003e-04 ... 1.41617609e-04
   5.51348639e-05 1.10962673e-03]
  [1.92749838e-03 2.20328812e-02 1.44173326e-02 ... 3.89582448e-04
   6.63545099e-04 4.08985885e-04]
  [1.40337879e-03 1.60417669e-02 1.04970150e-02 ... 2.83648376e-04
   4.83115931e-04 2.97775696e-04]
  ...
  [7.35013472e-08 4.39076295e-08 7.17849531e-08 ... 5.39676876e-05
   7.38987

In [5]:
train_arr = [[] for i in range(NUM_CLASSES)]
for idx, i in enumerate(train_labels):
    train_arr[i].append(idx)
test_arr = [[] for i in range(NUM_CLASSES)]
for idx, i in enumerate(test_labels):
    test_arr[i].append(idx)

In [6]:
def get_batch(batch_size, s="train"):
    global NUM_CLASSES, train_imgs, test_imgs, train_labels, test_labels, train_arr, test_arr
    if s == "train":
        imgs = train_imgs
        labels = train_labels
        arr = train_arr
    else:
        imgs = test_imgs
        labels = test_labels
        arr = test_arr
        
    
    n_classes = NUM_CLASSES
    n_examples, w, h = imgs.shape
    pairs = [np.zeros((batch_size, w, h, 1)) for i in range(2)]
    targets = np.zeros((batch_size,))
    
    targets[batch_size//2:] = 1
    
    categories = np.random.randint(0, n_classes, batch_size)
    
    for i in range(batch_size):
        category = categories[i]
        idx_1 = random.choice(arr[category])
        pairs[0][i,:,:,:] = imgs[idx_1].reshape(w, h, 1)
        if i >= batch_size // 2:
            category_2 = category  
        else: 
            category_2 = (category + np.random.randint(1,n_classes)) % n_classes
        idx_2 = random.choice(arr[category_2])
        
        pairs[1][i,:,:,:] = imgs[idx_2].reshape(w, h, 1)
    
    return pairs, targets
        
    
    

In [7]:
def generate(batch_size, s="train"):
    while True:
        pairs, targets = get_batch(batch_size, s)
        yield (pairs, targets)

In [8]:
def get_siamese_model(input_shape):
    """
        Model architecture
    """
    
    # Define the tensors for the two input images
    left_input = Input(input_shape)
    right_input = Input(input_shape)
    
    # Convolutional Neural Network
    model = Sequential()
    model.add(Conv2D(64, (10,10), activation='relu', input_shape=input_shape,
                   kernel_initializer='random_normal', kernel_regularizer=l2(2e-4)))
    model.add(MaxPooling2D())
    model.add(Conv2D(128, (7,7), activation='relu',
                     kernel_initializer='random_normal',
                     bias_initializer='random_normal', kernel_regularizer=l2(2e-4)))
    model.add(MaxPooling2D())
    model.add(Conv2D(128, (4,4), activation='relu', kernel_initializer='random_normal',
                     bias_initializer='random_normal', kernel_regularizer=l2(2e-4)))
    model.add(MaxPooling2D())
    model.add(Conv2D(256, (4,4), activation='relu', kernel_initializer='random_normal',
                     bias_initializer='random_normal', kernel_regularizer=l2(2e-4)))
    model.add(Flatten())
    model.add(Dense(4096, activation='sigmoid',
                   kernel_regularizer=l2(1e-3),
                   kernel_initializer='random_normal',bias_initializer='random_normal'))
    
    # Generate the encodings (feature vectors) for the two images
    encoded_l = model(left_input)
    encoded_r = model(right_input)
    
    # Add a customized layer to compute the absolute difference between the encodings
    L1_layer = Lambda(lambda tensors:K.abs(tensors[0] - tensors[1]))
    L1_distance = L1_layer([encoded_l, encoded_r])
    
    # Add a dense layer with a sigmoid unit to generate the similarity score
    prediction = Dense(1,activation='sigmoid',bias_initializer='random_normal')(L1_distance)
    
    # Connect the inputs with the outputs
    siamese_net = Model(inputs=[left_input,right_input],outputs=prediction)
    
    # return the model
    return siamese_net

In [9]:
model = get_siamese_model(tuple(list(train_imgs[0].shape)+[1]))
optimizer = Adam(lr = 0.00006)
model.compile(loss="binary_crossentropy",optimizer=optimizer)
    
model.fit(generate(32), epochs=300, steps_per_epoch=100)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 84/300
Epoch 85/300
Epoch 86/300
Epoch 87/300
Epoch 88/300
Epoch 89/300
Epoch 90/300
Epoch 91/300
Epoch 92/300
Epoch 93/300
Epoch 94/300
Epoch 95/300
Epoch 96/300
Epoch 97/300
Epoch 98/300
Epoch 99/300
Epoch 100/300
Epoch 101/300
Epoch 102/300
Epoch 103/300
Epoch 104/300
Epoch 105/300
Ep

In [10]:
model.save_weights('epoch150-weights.h5')