In [1]:
import sys
import numpy as np
import pandas as pd
import pickle
import os
import matplotlib.pyplot as plt
%matplotlib inline

import time
import cv2
import imageio

import tensorflow as tf
from keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from keras.layers import Conv2D, ZeroPadding2D, Activation, Input, concatenate
from keras.models import Model

from tensorflow import keras 
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense, ReLU
from keras.layers import BatchNormalization

from keras.layers.pooling import MaxPooling2D
from keras.layers.merge import Concatenate
from keras.layers.core import Lambda, Flatten, Dense
from keras.initializers import glorot_uniform

from tensorflow.python.keras.layers import Layer, InputSpec
from keras.regularizers import l2
from keras import backend as K

from sklearn.utils import shuffle

import numpy.random as rng

In [2]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [3]:
# Check whether GPU is being or not
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 5301523683355209358
xla_global_id: -1
]


In [4]:
project_path = '/content/drive/My Drive/HPML_Project_Siamese_Networks_kpv222_spa9633/'

In [5]:
train_zip_path = project_path + 'images_background.zip'

In [6]:
validation_zip_path = project_path + 'images_evaluation.zip'

In [7]:
from zipfile import ZipFile

with ZipFile(train_zip_path, 'r') as z:
  z.extractall()
print("Training folder zip extraction completed!")

with ZipFile(validation_zip_path, 'r') as z:
  z.extractall()
print("Validation folder zip extraction completed!")

Training folder zip extraction completed!
Validation folder zip extraction completed!


In [8]:
train_folder = "/content/images_background/"
val_folder = '/content/images_evaluation/'
save_path = '/content/'
print(train_folder)
print(val_folder)
print(save_path)

/content/images_background/
/content/images_evaluation/
/content/


In [9]:
def loadimgs(path,n = 0):
    '''
    path => Path of train directory or test directory
    '''
    X=[]
    y = []
    cat_dict = {}
    lang_dict = {}
    curr_y = n
    # we load every alphabet seperately so we can isolate them later
    for alphabet in os.listdir(path):
        print("loading alphabet: " + alphabet)
        lang_dict[alphabet] = [curr_y,None]
        alphabet_path = os.path.join(path,alphabet)
        # every letter/category has it's own column in the array, so  load seperately
        for letter in os.listdir(alphabet_path):
            cat_dict[curr_y] = (alphabet, letter)
            category_images=[]
            letter_path = os.path.join(alphabet_path, letter)
            # read all the images in the current category
            for filename in os.listdir(letter_path):
                image_path = os.path.join(letter_path, filename)
                image = imageio.imread(image_path)
                category_images.append(image)
                y.append(curr_y)
            try:
                X.append(np.stack(category_images))
            # edge case  - last one
            except ValueError as e:
                print(e)
                print("error - category_images:", category_images)
            curr_y += 1
            lang_dict[alphabet][1] = curr_y - 1
    y = np.vstack(y)
    X = np.stack(X)
    return X,y,lang_dict

### Loading the train images into tensors

In [10]:
X,y,c=loadimgs(train_folder)

loading alphabet: Grantha
loading alphabet: Gujarati
loading alphabet: Futurama
loading alphabet: Malay_(Jawi_-_Arabic)
loading alphabet: Anglo-Saxon_Futhorc
loading alphabet: N_Ko
loading alphabet: Armenian
loading alphabet: Braille
loading alphabet: Blackfoot_(Canadian_Aboriginal_Syllabics)
loading alphabet: Balinese
loading alphabet: Arcadian
loading alphabet: Cyrillic
loading alphabet: Ojibwe_(Canadian_Aboriginal_Syllabics)
loading alphabet: Asomtavruli_(Georgian)
loading alphabet: Latin
loading alphabet: Tagalog
loading alphabet: Alphabet_of_the_Magi
loading alphabet: Tifinagh
loading alphabet: Japanese_(hiragana)
loading alphabet: Hebrew
loading alphabet: Mkhedruli_(Georgian)
loading alphabet: Inuktitut_(Canadian_Aboriginal_Syllabics)
loading alphabet: Early_Aramaic
loading alphabet: Burmese_(Myanmar)
loading alphabet: Japanese_(katakana)
loading alphabet: Greek
loading alphabet: Syriac_(Estrangelo)
loading alphabet: Bengali
loading alphabet: Korean
loading alphabet: Sanskrit


### Saving the train tensors on disk

In [11]:
with open(os.path.join(save_path,"train.pickle"), "wb") as f:
    pickle.dump((X,c),f)

### Loading the validation images into tensors

In [12]:
Xval,yval,cval=loadimgs(val_folder)

loading alphabet: Malayalam
loading alphabet: Glagolitic
loading alphabet: Tengwar
loading alphabet: Atlantean
loading alphabet: Manipuri
loading alphabet: Keble
loading alphabet: Sylheti
loading alphabet: Angelic
loading alphabet: Kannada
loading alphabet: Gurmukhi
loading alphabet: Tibetan
loading alphabet: ULOG
loading alphabet: Aurek-Besh
loading alphabet: Avesta
loading alphabet: Mongolian
loading alphabet: Atemayar_Qelisayer
loading alphabet: Ge_ez
loading alphabet: Old_Church_Slavonic_(Cyrillic)
loading alphabet: Oriya
loading alphabet: Syriac_(Serto)


### Saving the validation tensors on disk

In [13]:
with open(os.path.join(save_path,"val.pickle"), "wb") as f:
    pickle.dump((Xval,cval),f)

In [14]:
# del X, y ,c ,Xval, yval, cval

In [15]:
def initialize_weights(shape, name=None):
    """
        The paper, http://www.cs.utoronto.ca/~gkoch/files/msc-thesis.pdf
        suggests to initialize CNN layer weights with mean as 0.0 and standard deviation of 0.01
    """
    return np.random.normal(loc = 0.0, scale = 1e-2, size = shape)

In [16]:
def initialize_bias(shape, name=None):
    """
        The paper, http://www.cs.utoronto.ca/~gkoch/files/msc-thesis.pdf
        suggests to initialize CNN layer bias with mean as 0.5 and standard deviation of 0.01
    """
    return np.random.normal(loc = 0.5, scale = 1e-2, size = shape)

In [17]:
def euclidean_distance(vects):
    """Find the Euclidean distance between two vectors.

    Arguments:
        vects: List containing two tensors of same length.

    Returns:
        Tensor containing euclidean distance
        (as floating point value) between vectors.
    """

    x, y = vects
    sum_square = tf.math.reduce_sum(tf.math.square(x - y), axis=1, keepdims=True)
    return tf.math.sqrt(tf.math.maximum(sum_square, tf.keras.backend.epsilon()))

In [18]:
def rbf_distance(vects):
  x, y = vects
  beta = 2
  sum_square = tf.math.reduce_sum(tf.math.square(x - y), axis=1, keepdims=True)
  return tf.math.exp(tf.math.sqrt(tf.math.maximum(sum_square, tf.keras.backend.epsilon())))*(beta**2)

In [19]:
def loss(margin=1):
    """Provides 'constrastive_loss' an enclosing scope with variable 'margin'.

  Arguments:
      margin: Integer, defines the baseline for distance for which pairs
              should be classified as dissimilar. - (default is 1).

  Returns:
      'constrastive_loss' function with data ('margin') attached.
  """

    # Contrastive loss = mean( (1-true_value) * square(prediction) +
    #                         true_value * square( max(margin-prediction, 0) ))
    def contrastive_loss(y_true, y_pred):
        """Calculates the constrastive loss.

      Arguments:
          y_true: List of labels, each label is of type float32.
          y_pred: List of predictions of same length as of y_true,
                  each label is of type float32.

      Returns:
          A tensor containing constrastive loss as floating point value.
      """

        square_pred = tf.math.square(y_pred)
        margin_square = tf.math.square(tf.math.maximum(margin - (y_pred), 0))
        return tf.math.reduce_mean(
            (1 - y_true) * square_pred + (y_true) * margin_square
        )

    return contrastive_loss

In [20]:
from tensorflow.keras import backend as K

K.set_image_data_format('channels_last')

def get_siamese_model(input_shape):
    """
        Model architecture based on the one provided in: http://www.cs.utoronto.ca/~gkoch/files/msc-thesis.pdf
    """
    left_input = Input(input_shape)
    right_input = Input(input_shape)
    model = Sequential()
    model.add(Conv2D(64, (10,10), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D())
    model.add(Conv2D(128, (7,7), activation='relu'))
    model.add(MaxPooling2D())
    model.add(Conv2D(128, (4,4), activation='relu'))
    model.add(MaxPooling2D())
    model.add(Conv2D(256, (4,4), activation='relu'))
    model.add(Flatten())
    model.add(Dense(4096, activation='sigmoid'))
    encoded_l = model(left_input)
    encoded_r = model(right_input)

    L1_layer = Lambda(lambda tensors:K.abs(tensors[0] - tensors[1]))
    L1_distance = L1_layer([encoded_l, encoded_r])
    # prediction = Dense(1,activation='sigmoid')(L1_distance)

    # merge_layer = Lambda(euclidean_distance)([encoded_l, encoded_r])
    # normal_layer = tf.keras.layers.BatchNormalization()(merge_layer)

    # merge_layer = Lambda(rbf_distance)([encoded_l, encoded_r])
    # normal_layer = tf.keras.layers.BatchNormalization()(merge_layer)

    # DotP_layer = Lambda(lambda tensors:K.sum((tensors[0]*tensors[1]), axis=-1, keepidms=True)
    # DotP_distance = DotP_layer([encoded_l, encoded_r])

    # L2 distance
    # L2_layer_sqr = Lambda(lambda tensors:K.square(tensors[0] - tensors[1]))
    # L2_distance_sqr = L2_layer_sqr([encoded_l, encoded_r])

    # prediction = Dense(4096,activation='sigmoid')(L2_distance_sqr)
    # L2_layer_sqrt = Lambda(lambda tensors:K.sqrt(tensors[0]))
    # L2_distance_sqrt = L2_layer_sqrt([prediction])
    # prediction = Dense(1, activation='sigmoid')(L2_distance_sqrt)

    # RBF similarity
    beta = 2
    layer_sqr = Lambda(lambda tensors:K.square(tensors[0] - tensors[1])*(beta**2))
    distance_sqr = layer_sqr([encoded_l, encoded_r])

    prediction = Dense(4096,activation='sigmoid')(distance_sqr)
    layer_exp = Lambda(lambda tensors:tf.keras.backend.exp(tensors[0]))
    distance_exp = layer_exp([prediction])
    prediction = Dense(1, activation='sigmoid')(distance_exp)

    #prediction = Dense(1,activation='sigmoid')(normal_layer)
    #prediction = Dense(1,activation='sigmoid')(DotP_distance)
    siamese_net = Model(inputs=[left_input,right_input],outputs=prediction)

    return siamese_net

In [21]:
model = get_siamese_model((105, 105, 1))
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 105, 105, 1  0           []                               
                                )]                                                                
                                                                                                  
 input_2 (InputLayer)           [(None, 105, 105, 1  0           []                               
                                )]                                                                
                                                                                                  
 sequential (Sequential)        (None, 4096)         38947648    ['input_1[0][0]',                
                                                                  'input_2[0][0]']            

In [22]:
margin=1

In [23]:
optimizer = Adam(lr = 0.00006)
model.compile(loss="binary_crossentropy" ,optimizer=optimizer)
#model.compile(loss=loss(margin=margin) ,optimizer="RMSProp", metrics=["accuracy"])

  super(Adam, self).__init__(name, **kwargs)


### Loading the train tensors

In [24]:
with open(os.path.join(save_path, "train.pickle"), "rb") as f:
    (Xtrain, train_classes) = pickle.load(f)
    
print("Training alphabets: \n")
print(list(train_classes.keys()))

Training alphabets: 

['Grantha', 'Gujarati', 'Futurama', 'Malay_(Jawi_-_Arabic)', 'Anglo-Saxon_Futhorc', 'N_Ko', 'Armenian', 'Braille', 'Blackfoot_(Canadian_Aboriginal_Syllabics)', 'Balinese', 'Arcadian', 'Cyrillic', 'Ojibwe_(Canadian_Aboriginal_Syllabics)', 'Asomtavruli_(Georgian)', 'Latin', 'Tagalog', 'Alphabet_of_the_Magi', 'Tifinagh', 'Japanese_(hiragana)', 'Hebrew', 'Mkhedruli_(Georgian)', 'Inuktitut_(Canadian_Aboriginal_Syllabics)', 'Early_Aramaic', 'Burmese_(Myanmar)', 'Japanese_(katakana)', 'Greek', 'Syriac_(Estrangelo)', 'Bengali', 'Korean', 'Sanskrit']


In [25]:
with open(os.path.join(save_path, "val.pickle"), "rb") as f:
    (Xval, val_classes) = pickle.load(f)

print("Validation alphabets:", end="\n\n")
print(list(val_classes.keys()))

Validation alphabets:

['Malayalam', 'Glagolitic', 'Tengwar', 'Atlantean', 'Manipuri', 'Keble', 'Sylheti', 'Angelic', 'Kannada', 'Gurmukhi', 'Tibetan', 'ULOG', 'Aurek-Besh', 'Avesta', 'Mongolian', 'Atemayar_Qelisayer', 'Ge_ez', 'Old_Church_Slavonic_(Cyrillic)', 'Oriya', 'Syriac_(Serto)']


In [26]:
def get_batch(batch_size,s="train"):
    """Create batch of n pairs, half same class, half different class"""
    if s == 'train':
        X = Xtrain
        categories = train_classes
    else:
        X = Xval
        categories = val_classes
    n_classes, n_examples, w, h = X.shape

    # randomly sample several classes to use in the batch
    categories = rng.choice(n_classes,size=(batch_size,),replace=False)
    
    # initialize 2 empty arrays for the input image batch
    pairs=[np.zeros((batch_size, h, w,1)) for i in range(2)]
    
    # initialize vector for the targets
    targets=np.zeros((batch_size,))
    
    # make one half of it '1's, so 2nd half of batch has same class
    targets[batch_size//2:] = 1
    for i in range(batch_size):
        category = categories[i]
        idx_1 = rng.randint(0, n_examples)
        pairs[0][i,:,:,:] = X[category, idx_1].reshape(w, h, 1)
        idx_2 = rng.randint(0, n_examples)
        
        # pick images of same class for 1st half, different for 2nd
        if i >= batch_size // 2:
            category_2 = category  
        else: 
            # add a random number to the category modulo n classes to ensure 2nd image has a different category
            category_2 = (category + rng.randint(1,n_classes)) % n_classes
        
        pairs[1][i,:,:,:] = X[category_2,idx_2].reshape(w, h,1)
    
    return pairs, targets

In [27]:
def generate(batch_size, s="train"):
    """a generator for batches, so model.fit_generator can be used. """
    while True:
        pairs, targets = get_batch(batch_size,s)
        yield (pairs, targets)

In [28]:
def make_oneshot_task(N, s="val", language=None):
    """Create pairs of test image, support set for testing N way one-shot learning. """
    if s == 'train':
        X = Xtrain
        categories = train_classes
    else:
        X = Xval
        categories = val_classes
    n_classes, n_examples, w, h = X.shape
    
    indices = rng.randint(0, n_examples,size=(N,))
    if language is not None: # if language is specified, select characters for that language
        low, high = categories[language]
        if N > high - low:
            raise ValueError("This language ({}) has less than {} letters".format(language, N))
        categories = rng.choice(range(low,high),size=(N,),replace=False)

    else: # if no language specified just pick a bunch of random letters
        categories = rng.choice(range(n_classes),size=(N,),replace=False)            
    true_category = categories[0]
    ex1, ex2 = rng.choice(n_examples,replace=False,size=(2,))
    test_image = np.asarray([X[true_category,ex1,:,:]]*N).reshape(N, w, h,1)
    support_set = X[categories,indices,:,:]
    support_set[0,:,:] = X[true_category,ex2]
    support_set = support_set.reshape(N, w, h,1)
    targets = np.zeros((N,))
    targets[0] = 1
    targets, test_image, support_set = shuffle(targets, test_image, support_set)
    pairs = [test_image,support_set]

    return pairs, targets

In [29]:
def test_oneshot(model, N, k, s = "val", verbose = 0):
    """Test average N way oneshot learning accuracy of a siamese neural net over k one-shot tasks"""
    n_correct = 0
    if verbose:
        print("Evaluating model on {} random {} way one-shot learning tasks ... \n".format(k,N))
    for i in range(k):
        inputs, targets = make_oneshot_task(N,s)
        probs = model.predict(inputs)
        if np.argmax(probs) == np.argmax(targets):
            n_correct+=1
    percent_correct = (100.0 * n_correct / k)
    if verbose:
        print("Got an average of {}% {} way one-shot learning accuracy \n".format(percent_correct,N))
    return percent_correct

In [30]:
# Hyper parameters
evaluate_every = 200 # interval for evaluating on one-shot tasks
batch_size = 32
n_iter = 20000 # No. of training iterations
N_way = 10 # how many classes for testing one-shot tasks
n_val = 250 # how many one-shot tasks to validate on
best = -1

In [31]:
model_path = '/content/weights/'

In [32]:
print("Starting training process!")
print("-------------------------------------")
t_start = time.time()
for i in range(1, n_iter+1):
    (inputs,targets) = get_batch(batch_size)
    loss = model.train_on_batch(inputs, targets)
    if i % evaluate_every == 0:
        print("\n ------------- \n")
        print("Time for {0} iterations: {1} mins".format(i, (time.time()-t_start)/60.0))
        print("Train Loss: {0}".format(loss)) 
        val_acc = test_oneshot(model, N_way, n_val, verbose=True)
        model.save_weights(os.path.join(model_path, 'weights.{}.h5'.format(i)))
        if val_acc >= best:
            print("Current best: {0}, previous best: {1}".format(val_acc, best))
            best = val_acc

Starting training process!
-------------------------------------


KeyboardInterrupt: ignored

### Load model weights

In [None]:
model.load_weights(os.path.join(model_path, "weights.6800.h5"))

### Testing

### Baseline model based on Nearest Neighbors using Euclidean distance (L2 distance)

In [None]:
def nearest_neighbour_correct_L2(pairs,targets):
    """returns 1 if nearest neighbour gets the correct answer for a one-shot task
        given by (pairs, targets)"""
    L2_distances = np.zeros_like(targets)
    for i in range(len(targets)):
        L2_distances[i] = np.sum(np.sqrt(pairs[0][i]**2 - pairs[1][i]**2))
    if np.argmin(L2_distances) == np.argmax(targets):
        return 1
    return 0

In [None]:
def nearest_neighbour_correct_L1(pairs,targets):
    """returns 1 if nearest neighbour gets the correct answer for a one-shot task
        given by (pairs, targets)"""
    L1_distances = np.zeros_like(targets)
    
    for i in range(len(targets)):
      L1_distances[i] = np.sum(np.absolute(pairs[0][i] - pairs[1][i]))

      if np.argmin(L1_distances) == np.argmax(targets):
        return 1

    return 0

In [None]:
def test_nn_accuracy(N_ways,n_trials):
    """Returns accuracy of NN approach """
    print("Evaluating nearest neighbour on {} unique {} way one-shot learning tasks ...".format(n_trials,N_ways))

    n_right = 0
    
    for i in range(n_trials):
        pairs,targets = make_oneshot_task(N_ways,"val")
        correct = nearest_neighbour_correct_L1(pairs,targets)
        n_right += correct
    return 100.0 * n_right / n_trials

In [None]:
ways = np.arange(1,20,2)
resume =  False
trials = 50

In [None]:
val_accs, train_accs,nn_accs = [], [], []
for N in ways:    
    val_accs.append(test_oneshot(model, N, trials, "val", verbose=True))
    train_accs.append(test_oneshot(model, N, trials, "train", verbose=True))
    nn_acc = test_nn_accuracy(N, trials)
    nn_accs.append(nn_acc)
    print ("NN Accuracy = ", nn_acc)
    print("---------------------------------------------------------------------------------------------------------------")

### Save the accuracies on disk

In [None]:
with open(os.path.join(save_path,"accuracies.pickle"), "wb") as f:
    pickle.dump((val_accs,train_accs,nn_accs),f)

### Load the accuracies from disk

In [None]:
with open(os.path.join(save_path, "accuracies.pickle"), "rb") as f:
    (val_accs, train_accs, nn_accs) = pickle.load(f)

### Below two functions are used for visualizing test image and support set

In [None]:
def concat_images(X):
    """Concatenates a bunch of images into a big matrix for plotting purposes."""
    nc, h , w, _ = X.shape
    X = X.reshape(nc, h, w)
    n = np.ceil(np.sqrt(nc)).astype("int8")
    img = np.zeros((n*w,n*h))
    x = 0
    y = 0
    for example in range(nc):
        img[x*w:(x+1)*w,y*h:(y+1)*h] = X[example]
        y += 1
        if y >= n:
            y = 0
            x += 1
    return img

In [None]:
def plot_oneshot_task(pairs):
    fig,(ax1,ax2) = plt.subplots(nrows=1, ncols=2)
    ax1.matshow(pairs[0][0].reshape(105,105), cmap='gray')
    img = concat_images(pairs[1])
    ax1.get_yaxis().set_visible(False)
    ax1.get_xaxis().set_visible(False)
    ax2.matshow(img,cmap='gray')
    plt.xticks([])
    plt.yticks([])
    plt.show()

In [None]:
# Example of concat image visualization
pairs, targets = make_oneshot_task(16,"train","Sanskrit")
plot_oneshot_task(pairs)

### Resuts

In [None]:
fig,ax = plt.subplots(1)
ax.plot(ways, val_accs, "m", label="Siamese(val set)")
ax.plot(ways, train_accs, "y", label="Siamese(train set)")
plt.plot(ways, nn_accs, label="Nearest neighbour")

ax.plot(ways, 100.0/ways, "g", label="Random guessing")
plt.xlabel("Number of possible classes in one-shot tasks")
plt.ylabel("% Accuracy")
plt.title("Omiglot One-Shot Learning Performance of a Siamese Network")
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
inputs,targets = make_oneshot_task(20, "val", 'Oriya')
plt.show()

plot_oneshot_task(inputs)