In [None]:
from __future__ import division, print_function
from keras.models import Model
from keras.layers import Input, Conv2D, MaxPooling2D, BatchNormalization, Activation, Reshape, UpSampling2D
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.layers import Activation, Dense
import numpy as np
import pandas as pd
import cv2 as cv
import itertools

from itertools import combinations

import os
os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/'

# Multilayer Fully-connected Neural Network

In [None]:
visible = Input(shape=(10,))
hidden1 = Dense(10, activation='relu')(visible)
hidden2 = Dense(20, activation='relu')(hidden1)
hidden3 = Dense(10, activation='relu')(hidden2)
output = Dense(1, activation='sigmoid')(hidden3)
model = Model(inputs=visible, outputs=output)
# summarize layers
SVG(model_to_dot(model, show_layer_names=True, show_shapes=True, rankdir='TB').create(prog='dot', format='svg'))

In [None]:
visible = Input(shape=(64,64,1))
conv1 = Conv2D(32, kernel_size=4, activation='relu')(visible)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = Conv2D(16, kernel_size=4, activation='relu')(pool1)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
hidden1 = Dense(10, activation='relu')(pool2)
output = Dense(1, activation='sigmoid')(hidden1)
model = Model(inputs=visible, outputs=output)
# summarize layers
SVG(model_to_dot(model, show_layer_names=True, show_shapes=True, rankdir='TB').create(prog='dot', format='svg'))


# Shared Input Layer

In [None]:
from keras.layers.merge import concatenate
from keras.layers import Flatten
# input layer
visible = Input(shape=(64,64,1))
# first feature extractor
conv1 = Conv2D(32, kernel_size=4, activation='relu')(visible)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
flat1 = Flatten()(pool1)
# second feature extractor
conv2 = Conv2D(16, kernel_size=8, activation='relu')(visible)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
flat2 = Flatten()(pool2)
# merge feature extractors
merge = concatenate([flat1, flat2])
# interpretation layer
hidden1 = Dense(10, activation='relu')(merge)
# prediction output
output = Dense(1, activation='sigmoid')(hidden1)
model = Model(inputs=visible, outputs=output)
SVG(model_to_dot(model, show_layer_names=True, show_shapes=True, rankdir='TB').create(prog='dot', format='svg'))


# Shared Feature Extraction

In [None]:
# define input
visible = Input(shape=(100,1))
# feature extraction
extract1 = Dense(10)(visible)
# first interpretation model
interp1 = Dense(10, activation='relu')(extract1)
# second interpretation model
interp11 = Dense(10, activation='relu')(extract1)
interp12 = Dense(20, activation='relu')(interp11)
interp13 = Dense(10, activation='relu')(interp12)
# merge interpretation
merge = concatenate([interp1, interp13])
# output
output = Dense(1, activation='sigmoid')(merge)
model = Model(inputs=visible, outputs=output)
SVG(model_to_dot(model, show_layer_names=True, show_shapes=True, rankdir='TB').create(prog='dot', format='svg'))


# Multiple Input

In [None]:
visible1 = Input(shape=(64,64,1))
conv11 = Conv2D(32, kernel_size=4, activation='relu')(visible1)
pool11 = MaxPooling2D(pool_size=(2, 2))(conv11)
conv12 = Conv2D(16, kernel_size=4, activation='relu')(pool11)
pool12 = MaxPooling2D(pool_size=(2, 2))(conv12)
flat1 = Flatten()(pool12)
# second input model
visible2 = Input(shape=(32,32,3))
conv21 = Conv2D(32, kernel_size=4, activation='relu')(visible2)
pool21 = MaxPooling2D(pool_size=(2, 2))(conv21)
conv22 = Conv2D(16, kernel_size=4, activation='relu')(pool21)
pool22 = MaxPooling2D(pool_size=(2, 2))(conv22)
flat2 = Flatten()(pool22)
# merge input models
merge = concatenate([flat1, flat2])
# interpretation model
hidden1 = Dense(10, activation='relu')(merge)
hidden2 = Dense(10, activation='relu')(hidden1)
output = Dense(1, activation='sigmoid')(hidden2)
model = Model(inputs=[visible1, visible2], outputs=output)
SVG(model_to_dot(model, show_layer_names=True, show_shapes=True, rankdir='TB').create(prog='dot', format='svg'))


# Multiple Output

In [None]:
from keras.layers.recurrent import LSTM
from keras.layers.wrappers import TimeDistributed
# input layer
visible = Input(shape=(100,1))
# feature extraction
extract = LSTM(10, return_sequences=True)(visible)
# classification output
class11 = LSTM(10)(extract)
class12 = Dense(10, activation='relu')(class11)
output1 = Dense(1, activation='sigmoid')(class12)
# sequence output
output2 = TimeDistributed(Dense(1, activation='linear'))(extract)
# output
model = Model(inputs=visible, outputs=[output1, output2])
SVG(model_to_dot(model, show_layer_names=True, show_shapes=True, rankdir='TB').create(prog='dot', format='svg'))


# Siamese

In [None]:
imDim = 64
input_shape  = (imDim,imDim,1)
inp_img = Input(shape = (imDim,imDim,1), name = 'ImageInput')
model = inp_img

#     model = Input(shape=(imDim,imDim,1))
#     model.add(Input(shape = (imDim,imDim,1), name = 'FeatureNet_ImageInput'))
model = Conv2D(32,kernel_size=(3, 3),activation='relu',input_shape=input_shape,padding='valid')(model)
#     model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model = MaxPooling2D((2,2), padding='valid')(model)
model = Conv2D(64, (3, 3), activation='relu',padding='valid')(model)
#     model.add(Conv2D(32, (3, 3), activation='relu',padding='same'))
model = MaxPooling2D((2,2),padding='valid')(model)
#     model.add(Conv2D(16, (3, 3), activation='relu',padding='same'))
model = Conv2D(128, (3, 3), activation='relu',padding='valid')(model)
model = MaxPooling2D((2,2),padding='valid')(model)
#     model.add(Conv2D(1, (3, 3), activation='relu',padding='same'))
#     model.add(Conv2D(2, (3, 3), activation='relu',padding='same'))

model = Conv2D(256, (1, 1), activation='relu',padding='valid')(model)
model = MaxPooling2D((2,2),padding='valid')(model)

model = Conv2D(64, (1, 1), activation='relu',padding='valid')(model)
# model = MaxPooling2D((2,2),padding='valid')(model)
model = Flatten()(model)

# img_in = np.array((-1,imDim,imDim,1), dtype='float32')
# img_in = tf.placeholder(shape=(imDim,imDim,1), dtype='float32')

feat = Model(inputs=[inp_img], outputs=[model],name = 'Feat_Model')
feat.summary()


# In[27]:

left_img = Input(shape = (imDim,imDim,1), name = 'left_img')
right_img = Input(shape = (imDim,imDim,1), name = 'right_img')


# In[28]:

left_feats = feat(left_img)
right_feats = feat(right_img)


# In[35]:

from keras.layers import concatenate
import random


# In[36]:

merged_feats = concatenate([left_feats, right_feats], name = 'concat_feats')
merged_feats = Dense(1024, activation = 'linear')(merged_feats)
merged_feats = BatchNormalization()(merged_feats)
merged_feats = Activation('relu')(merged_feats)
merged_feats = Dense(4, activation = 'linear')(merged_feats)
merged_feats = BatchNormalization()(merged_feats)
merged_feats = Activation('relu')(merged_feats)
merged_feats = Dense(1, activation = 'sigmoid')(merged_feats)
similarity_model = Model(inputs = [left_img, right_img], outputs = [merged_feats], name = 'Similarity_Model')
similarity_model.summary()

similarity_model.compile('RMSprop','binary_crossentropy',metrics=['accuracy'])

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from keras.preprocessing.image import ImageDataGenerator

In [None]:
# this is the augmentation configuration for training
train_datagen = ImageDataGenerator(rescale = 1./255,
                                   shear_range = 0.2,
                                   rotation_range = 20,
                                   width_shift_range = 0.2,
                                   height_shift_range = 0.2)

# this is the configuratin for testing/validation (only rescaling because we don't want to make changes to our new data,
# we just want to predict from these images)
test_datagen = ImageDataGenerator(rescale = 1./255)

In [None]:
def generate_generator_multiple(generator,dir1, dir2, batch_size, img_height,img_width):
    genX1 = generator.flow_from_directory(dir1,
                                          target_size = (img_height,img_width),
                                          color_mode = 'grayscale',
                                          class_mode = None,
                                          batch_size = batch_size,
                                          shuffle=False, 
                                          seed=7)
    
    genX2 = generator.flow_from_directory(dir2,
                                          target_size = (img_height,img_width),
                                          color_mode = 'grayscale',
                                          class_mode = None,
                                          batch_size = batch_size,
                                          shuffle=False, 
                                          seed=7)
    
# #     print(len(genX1.filenames[batch_]))
#     labels = np.zeros(batch_size)
#     for i in range(batch_size):
#         if genX1.filenames[i] == genX2.filenames[i]:
#             labels[i] = 1
#         else:
#             labels[i] = 0
#     print(labels)            
    
#     while True:
#         X1i = genX1.next()
#         X2i = genX2.next()
# #         print(X2i.shape)
#         yield [X1i, X2i], np.zeros(32)   #Yield both images and their mutual label
    
    while True:
        labels = np.zeros(len(train_generator.next()) * len(train_generator.next()))
        for i in range(len(train_generator.next())):
            for j in range(len(train_generator.next())):
                
                if (os.path.basename(genX1.filenames[i]).split('_')[0][0:4]) == (os.path.basename(genX2.filenames[j]).split('_')[0][0:4]):
                    labels[c] = 1
                else:
                    labels[c] = 0
                output[c] = [genX1.next()[i], genX2.next()[j]], labels[i]
                c+=1
        yield output
            

train_generator = generate_generator_multiple(generator = train_datagen,
                                             dir1 = './seen-dataset/TrainingSet',
                                             dir2 = './seen-dataset/TrainingSet', 
                                             batch_size = 32,
                                             img_height = 64,
                                             img_width = 64)

test_generator = generate_generator_multiple(generator = test_datagen,
                                             dir1 = './seen-dataset/ValidationSet',
                                             dir2 = './seen-dataset/ValidationSet',
                                             batch_size = 32,
                                             img_height = 64, 
                                             img_width = 64)

In [None]:
history = similarity_model.fit_generator(train_generator, 
                                         steps_per_epoch = 1,
                                         epochs = 1, 
                                         validation_data = test_generator,
                                         validation_steps = 1,
                                         shuffle = False)

In [None]:
history = similarity_model.fit_generator(train_generator, 
                                         steps_per_epoch = 114389/32,
                                         epochs = 1, 
                                         validation_data = test_generator,
                                         validation_steps = 906/32,
                                         shuffle = False)

## Unseen Dataset

In [None]:
writer_pairs_unseen=pd.read_csv("./unseen-dataset/dataset_unseen_training_siamese.csv")
writer_pairs_unseen.head()

In [None]:
image_dir=os.listdir("./unseen-dataset/TrainingSet")
img_values = []
path = "./unseen-dataset/TrainingSet"
for file in os.listdir(path):
    ext = os.path.splitext(file)[1]
    var = cv.imread(os.path.join(path,file),0)
    var_new = 255-var
    img_values.append(np.array(var_new).reshape(64,64,1))

df=pd.DataFrame(columns= ['image_id','values'])
df['image_id']= image_dir
df['values']= img_values

df1= pd.merge(writer_pairs_unseen, df, left_on='left' , right_on='image_id',how='inner')
df2= pd.merge(df1,df, left_on='right', right_on='image_id')
data =df2.df(frac=1).reset_index(drop=True)
data=data.drop(['Unnamed: 0','image_id_x','image_id_y'],axis =1)
print(data)

In [None]:
data=data.iloc[:120000,:]

t = np.split(data, 30)
for i in range(len(t)//2):
    img_1 = list(t[i]['values_x'])
    img_2 = list(t[i]['values_y'])
    y = t[i]['label']
    resultant=[img_1,img_2]
    similarity_model.fit(resultant,y,validation_split=0.1,batch_size=100,epochs=1)

#### Unseen Validataon

In [None]:
writer_pairs_useen_val=pd.read_csv("./unseen-dataset/dataset_unseen_validation_siamese.csv")
writer_pairs_useen_val.head()

In [None]:
image_dir=os.listdir("./unseen-dataset/ValidationSet")
img_values = []
path = "./unseen-dataset/ValidationSet"
for file in os.listdir(path):
    ext = os.path.splitext(file)[1]
    var = cv.imread(os.path.join(path,file),0)
    var_new = 255-var
    img_values.append(np.array(var_new).reshape(64,64,1))

df=pd.DataFrame(columns= ['image_id','values'])
df['image_id']= image_dir
df['values']= img_values

df1= pd.merge(pairs_seen, df, left_on='left' , right_on='image_id',how='inner')
df2= pd.merge(df1,df, left_on='right', right_on='image_id')
data=df2.df(frac=1).reset_index(drop=True)
data=data.drop(['Unnamed: 0','image_id_x','image_id_y'],axis =1)

In [None]:
y_data= data['label']
data= data.drop(columns=['left','right','label'])
print(data.columns)
img_1 = list(data['values_x'])
img_2 = list(data['values_y'])
resultant=[img_1,img_2]
score=similarity_model.evaluate(resultant,y_data)
print("\nBinary Crossentropy %s: %.2f%%" % (similarity_model.metrics_names[0], score[0]*100))
print("\n%s: %.2f%%" % (similarity_model.metrics_names[1], score[1]*100))

## Seen Dataset

In [None]:
writer_pairs_seen=pd.read_csv("./seen-dataset/dataset_seen_training_siamese.csv")
writer_pairs_seen.head()

In [None]:
image_dir=os.listdir("./seen-dataset/TrainingSet")
img_values = []
path = "./seen-dataset/TrainingSet"
for file in os.listdir(path):
    ext = os.path.splitext(file)[1]
    var = cv.imread(os.path.join(path,file),0)
    var_new = 255-var
    img_values.append(np.array(var_new).reshape(64,64,1))

df=pd.DataFrame(columns= ['image_id','values'])
df['image_id']= image_dir
df['values']= img_values

df1= pd.merge(writer_pairs_seen, df, left_on='left' , right_on='image_id',how='inner')
df2= pd.merge(df1,df, left_on='right', right_on='image_id')
data=df2.df(frac=1).reset_index(drop=True)
data=data.drop(['Unnamed: 0','image_id_x','image_id_y'],axis =1)
print(data)

In [None]:
t = np.split(data, 30)
for i in range(len(t)//2):
    t1 = list(t[i]['values_x'])
    t2 = list(t[i]['values_y'])
    y = t[i]['label']
    final = [t1,t2]
    similarity_model.fit(final,y,validation_split=0.1,batch_size=100,epochs=1)

#### Seen Validaton

In [None]:
# Validation Dataset
pairs_seen=pd.read_csv("./seen-dataset/dataset_seen_validation_siamese.csv")
pairs_seen.shape

In [None]:
image_dir=os.listdir("./seen-dataset/ValidationSet")
img_values = []
path = "./seen-dataset/ValidationSet"
for file in os.listdir(path):
    ext = os.path.splitext(file)[1]
    var = cv.imread(os.path.join(path,file),0)
    var_new = 255-var
    img_values.append(np.array(var_new).reshape(64,64,1))

df = pd.DataFrame(columns= ['image_id','values'])
df['image_id']= image_dir
df['values']= img_values

df1 = pd.merge(pairs_seen, df, left_on='left' , right_on='image_id',how='inner')
df2 = pd.merge(df1,df, left_on='right', right_on='image_id')
data = df2.df(frac=1).reset_index(drop=True)
data = final.drop(['Unnamed: 0','image_id_x','image_id_y'],axis =1)

In [None]:
y_data= data['label']
data= data.drop(columns=['left','right','label'])
print(data.columns)
img_1 = list(data['values_x'])
img_2 = list(data['values_y'])
resultant=[img_1,img_2]
score=similarity_model.evaluate(resultant,y_data)
print("\nBinary Crossentropy %s: %.2f%%" % (similarity_model.metrics_names[0], score[0]*100))
print("\n%s: %.2f%%" % (similarity_model.metrics_names[1], score[1]*100))

In [None]:
from keras.preprocessing.image import ImageDataGenerator
######## Keras Data Generator ##################
datagen = ImageDataGenerator(featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True)
datagen.fit(x,y)

## Shuffled Dataset

In [None]:
writer_pairs_shuffled = pd.read_csv("./shuffled-dataset/dataset_shuffled_training_siamese.csv")
writer_pairs_shuffled.head()

In [None]:
image_dir=os.listdir("./shuffled-dataset/TrainingSet")
img_values = []
path = "./shuffled-dataset/TrainingSet"

for file in os.listdir(path):
    ext = os.path.splitext(file)[1]
    var = cv.imread(os.path.join(path,file),0)
    var_new = 255-var
    img_values.append(np.array(var_new).reshape(64,64,1))

df=pd.DataFrame(columns= ['image_id','values'])
df['image_id']= image_dir
df['values']= img_values
df


df1= pd.merge(writer_pairs_shuffled, df, left_on='left' , right_on='image_id',how='inner')
df2= pd.merge(df1,df, left_on='right', right_on='image_id')
data=df2.df(frac=1).reset_index(drop=True)
data=data.drop(['Unnamed: 0','image_id_x','image_id_y'],axis =1)
print(data)

In [None]:
data=data.iloc[:102000,:]

t = np.split(data, 30)
for i in range(len(t)//2):
    img_1 = list(t[i]['values_x'])
    img_2 = list(t[i]['values_y'])
    y = t[i]['label']
    resultant=[img_1,img_2]
    similarity_model.fit(resultant,y,validation_split=0.1,batch_size=100,epochs=1)

#### Shuffled validation

In [None]:
writer_pairs_shuffled_val=pd.read_csv("./shuffled-dataset/dataset_shuffled_validation_siamese.csv")
writer_pairs_shuffled_val.head()

In [None]:
image_dir=os.listdir("./shuffled-dataset/ValidationSet")
img_values = []
path = "./shuffled-dataset/ValidationSet"
for file in os.listdir(path):
    ext = os.path.splitext(file)[1]
    var = cv.imread(os.path.join(path,file),0)
    var_new = 255-var
    img_values.append(np.array(var_new).reshape(64,64,1))

df=pd.DataFrame(columns= ['image_id','values'])
df['image_id']= image_dir
df['values']= img_values

df1= pd.merge(writer_pairs_shuffled_val, df, left_on='left' , right_on='image_id',how='inner')
df2= pd.merge(df1,df, left_on='right', right_on='image_id')
data=df2.df(frac=1).reset_index(drop=True)
data=data.drop(['Unnamed: 0','image_id_x','image_id_y'],axis =1)

In [None]:
y_data= data['label']
data= data.drop(columns=['left','right','label'])
print(data.columns)
img_1 = list(data['values_x'])
img_2 = list(data['values_y'])
resultant=[img_1,img_2]
score=similarity_model.evaluate(resultant,y_data)
print("\nBinary Crossentropy %s: %.2f%%" % (similarity_model.metrics_names[0], score[0]*100))
print("\n%s: %.2f%%" % (similarity_model.metrics_names[1], score[1]*100))