In [None]:
from keras.models import Model
from keras.layers import Input, Conv2D, MaxPooling2D, BatchNormalization, Activation, Reshape, UpSampling2D
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
import pydot as pyd
from IPython.display import SVG
import sklearn.metrics.pairwise as metrics

from keras.layers import Flatten, Dense
import os
os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/'

# Multilayer Fully-connected Neural Network

In [None]:
visible = Input(shape=(10,))
hidden1 = Dense(10, activation='relu')(visible)
hidden2 = Dense(20, activation='relu')(hidden1)
hidden3 = Dense(10, activation='relu')(hidden2)
output = Dense(1, activation='sigmoid')(hidden3)
model = Model(inputs=visible, outputs=output)
# summarize layers
SVG(model_to_dot(model, show_layer_names=True, show_shapes=True, rankdir='TB').create(prog='dot', format='svg'))

In [None]:
visible = Input(shape=(64,64,1))
conv1 = Conv2D(32, kernel_size=4, activation='relu')(visible)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = Conv2D(16, kernel_size=4, activation='relu')(pool1)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
hidden1 = Dense(10, activation='relu')(pool2)
output = Dense(1, activation='sigmoid')(hidden1)
model = Model(inputs=visible, outputs=output)
# summarize layers
SVG(model_to_dot(model, show_layer_names=True, show_shapes=True, rankdir='TB').create(prog='dot', format='svg'))


# Shared Input Layer

In [None]:
from keras.layers.merge import concatenate
from keras.layers import Flatten
# input layer
visible = Input(shape=(64,64,1))
# first feature extractor
conv1 = Conv2D(32, kernel_size=4, activation='relu')(visible)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
flat1 = Flatten()(pool1)
# second feature extractor
conv2 = Conv2D(16, kernel_size=8, activation='relu')(visible)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
flat2 = Flatten()(pool2)
# merge feature extractors
merge = concatenate([flat1, flat2])
# interpretation layer
hidden1 = Dense(10, activation='relu')(merge)
# prediction output
output = Dense(1, activation='sigmoid')(hidden1)
model = Model(inputs=visible, outputs=output)
SVG(model_to_dot(model, show_layer_names=True, show_shapes=True, rankdir='TB').create(prog='dot', format='svg'))


# Shared Feature Extraction

In [None]:
# define input
visible = Input(shape=(100,1))
# feature extraction
extract1 = Dense(10)(visible)
# first interpretation model
interp1 = Dense(10, activation='relu')(extract1)
# second interpretation model
interp11 = Dense(10, activation='relu')(extract1)
interp12 = Dense(20, activation='relu')(interp11)
interp13 = Dense(10, activation='relu')(interp12)
# merge interpretation
merge = concatenate([interp1, interp13])
# output
output = Dense(1, activation='sigmoid')(merge)
model = Model(inputs=visible, outputs=output)
SVG(model_to_dot(model, show_layer_names=True, show_shapes=True, rankdir='TB').create(prog='dot', format='svg'))


# Multiple Input

In [None]:
visible1 = Input(shape=(64,64,1))
conv11 = Conv2D(32, kernel_size=4, activation='relu')(visible1)
pool11 = MaxPooling2D(pool_size=(2, 2))(conv11)
conv12 = Conv2D(16, kernel_size=4, activation='relu')(pool11)
pool12 = MaxPooling2D(pool_size=(2, 2))(conv12)
flat1 = Flatten()(pool12)
# second input model
visible2 = Input(shape=(32,32,3))
conv21 = Conv2D(32, kernel_size=4, activation='relu')(visible2)
pool21 = MaxPooling2D(pool_size=(2, 2))(conv21)
conv22 = Conv2D(16, kernel_size=4, activation='relu')(pool21)
pool22 = MaxPooling2D(pool_size=(2, 2))(conv22)
flat2 = Flatten()(pool22)
# merge input models
merge = concatenate([flat1, flat2])
# interpretation model
hidden1 = Dense(10, activation='relu')(merge)
hidden2 = Dense(10, activation='relu')(hidden1)
output = Dense(1, activation='sigmoid')(hidden2)
model = Model(inputs=[visible1, visible2], outputs=output)
SVG(model_to_dot(model, show_layer_names=True, show_shapes=True, rankdir='TB').create(prog='dot', format='svg'))


# Multiple Output

In [None]:
from keras.layers.recurrent import LSTM
from keras.layers.wrappers import TimeDistributed
# input layer
visible = Input(shape=(100,1))
# feature extraction
extract = LSTM(10, return_sequences=True)(visible)
# classification output
class11 = LSTM(10)(extract)
class12 = Dense(10, activation='relu')(class11)
output1 = Dense(1, activation='sigmoid')(class12)
# sequence output
output2 = TimeDistributed(Dense(1, activation='linear'))(extract)
# output
model = Model(inputs=visible, outputs=[output1, output2])
SVG(model_to_dot(model, show_layer_names=True, show_shapes=True, rankdir='TB').create(prog='dot', format='svg'))


# Siamese

In [None]:
imDim = 64
input_shape  = (imDim,imDim,1)
inp_img = Input(shape = (imDim,imDim,1), name = 'ImageInput')
model = inp_img

#     model = Input(shape=(imDim,imDim,1))
#     model.add(Input(shape = (imDim,imDim,1), name = 'FeatureNet_ImageInput'))
model = Conv2D(32,kernel_size=(3, 3),activation='relu',input_shape=input_shape,padding='valid')(model)
#     model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model = MaxPooling2D((2,2), padding='valid')(model)
model = Conv2D(64, (3, 3), activation='relu',padding='valid')(model)
#     model.add(Conv2D(32, (3, 3), activation='relu',padding='same'))
model = MaxPooling2D((2,2),padding='valid')(model)
#     model.add(Conv2D(16, (3, 3), activation='relu',padding='same'))
model = Conv2D(128, (3, 3), activation='relu',padding='valid')(model)
model = MaxPooling2D((2,2),padding='valid')(model)
#     model.add(Conv2D(1, (3, 3), activation='relu',padding='same'))
#     model.add(Conv2D(2, (3, 3), activation='relu',padding='same'))

model = Conv2D(256, (1, 1), activation='relu',padding='valid')(model)
model = MaxPooling2D((2,2),padding='valid')(model)

model = Conv2D(64, (1, 1), activation='relu',padding='valid')(model)
# model = MaxPooling2D((2,2),padding='valid')(model)
model = Flatten()(model)

# img_in = np.array((-1,imDim,imDim,1), dtype='float32')
# img_in = tf.placeholder(shape=(imDim,imDim,1), dtype='float32')

feat = Model(inputs=[inp_img], outputs=[model],name = 'Feat_Model')
feat.summary()


# In[27]:

left_img = Input(shape = (imDim,imDim,1), name = 'left_img')
right_img = Input(shape = (imDim,imDim,1), name = 'right_img')


# In[28]:

left_feats = feat(left_img)
right_feats = feat(right_img)


# In[35]:

from keras.layers import concatenate
import random


# In[36]:

merged_feats = concatenate([left_feats, right_feats], name = 'concat_feats')
merged_feats = Dense(1024, activation = 'linear')(merged_feats)
merged_feats = BatchNormalization()(merged_feats)
merged_feats = Activation('relu')(merged_feats)
merged_feats = Dense(4, activation = 'linear')(merged_feats)
merged_feats = BatchNormalization()(merged_feats)
merged_feats = Activation('relu')(merged_feats)
merged_feats = Dense(1, activation = 'sigmoid')(merged_feats)
similarity_model = Model(inputs = [left_img, right_img], outputs = [merged_feats], name = 'Similarity_Model')
similarity_model.summary()

In [None]:
display(SVG(model_to_dot(feat, show_layer_names=True, show_shapes=True, rankdir='TB').create(prog='dot', format='svg')))


In [None]:

display(SVG(model_to_dot(similarity_model, show_layer_names=True, show_shapes=True, rankdir='TB').create(prog='dot', format='svg')))


# Autoencoder

In [None]:
input_img = Input(shape=(64, 64, 1))  # adapt this if using `channels_first` image data format
# x = CoordinateChannel2D()(input_img)
x = Conv2D(16, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
# x = CoordinateChannel2D()(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
# x = CoordinateChannel2D()(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)
encoded = Flatten()(x)
encoded = Dense(8*8*8, activation='relu', name='latent')(encoded)
# model = Model(input_img,encoded)
# print(model.summary())
# at this point the representation is (4, 4, 8) i.e. 128-dimensional
r = Reshape(target_shape=(8,8,8))(encoded)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(r)
x = UpSampling2D((2, 2))(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same',name='output')(x)

autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')
autoencoder.summary()

In [None]:

display(SVG(model_to_dot(autoencoder, show_layer_names=True, show_shapes=True, rankdir='TB').create(prog='dot', format='svg')))


In [None]:
encoder = Model(autoencoder.inputs, autoencoder.get_layer('latent').output)
encoder.summary()

# What to do with latent features?
- Use latent features to explore euclidean, cosine distances

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from keras.preprocessing.image import ImageDataGenerator

In [None]:
# this is the augmentation configuration for training
train_datagen = ImageDataGenerator(rescale = 1./255,
                                   shear_range = 0.2,
                                   rotation_range = 20,
                                   width_shift_range = 0.2,
                                   height_shift_range = 0.2)

# this is the configuratin for testing/validation (only rescaling because we don't want to make changes to our new data,
# we just want to predict from these images)
test_datagen = ImageDataGenerator(rescale = 1./255)

In [None]:
train_generator = train_datagen.flow_from_directory('./unseen-dataset/TrainingSet',
                                                   target_size = (64,64),
                                                   color_mode = 'grayscale',
                                                   class_mode = None,
                                                   batch_size = 64)

test_generator = test_datagen.flow_from_directory('./unseen-dataset/ValidationSet', 
                                                 target_size = (64, 64),
                                                 color_mode = 'grayscale',
                                                 class_mode = None,
                                                 batch_size = 952,
                                                 shuffle = 'FALSE')

In [None]:
def fixed_generator(generator):      
    for batch in generator:
        yield (batch, batch)

In [None]:
hist=autoencoder.fit_generator(fixed_generator(train_generator), steps_per_epoch = 1, epochs=10,
                               validation_data = fixed_generator(test_generator), validation_steps = 1)

In [None]:
df_hist = pd.DataFrame(hist.history)
df_hist.plot( subplots=True, figsize=(15,15))

## Cosine Similarity for Validation Dataset

In [None]:
precision=0
recall=0
image_names, w_ids = [], []

# index and filenames
index = next(test_generator.index_generator)
images = test_generator._get_batches_of_transformed_samples(index)
# images.shape

for i in range(len(index)):
#     print(os.path.basename(test_generator.filenames[index[i]]))
    image_names.append(os.path.basename(test_generator.filenames[index[i]]))
    w_ids.append(os.path.basename(test_generator.filenames[index[i]]).split('_')[0][0:4]) #w_ids are here
    
image_names_ids = pd.DataFrame(list(zip(image_names, w_ids)))
latent_pred = encoder.predict(images)

cos_dist = pd.DataFrame(metrics.cosine_similarity(latent_pred, Y=None, dense_output=True), index=w_ids, columns=w_ids)

In [None]:
thresholds = [0.6,0.8,0.9, 0.95]
for th in thresholds:
#         th = 0.6
    t_p=0
    f_p=0
    f_n=0
    t_n=0
    for i in range(cos_dist.shape[0]):
        for j in range(cos_dist.shape[0]):
            if(cos_dist.index[i]==cos_dist.columns.values[j] and cos_dist.iloc[i,j]>th):
                t_p+=1
            elif(cos_dist.index[i]!=cos_dist.columns.values[j] and cos_dist.iloc[i,j]<th):
                t_n+=1
            elif(cos_dist.index[i]!=cos_dist.columns.values[j] and cos_dist.iloc[i,j]>th):
                f_p+=1
            else:
                f_n+=1
#         print(f_p)
    precision = t_p / (t_p + f_p)
    recall = t_p / (t_p + f_n)  
    print('precision for threshold of '+ str(th) + ' is: '+ str(precision))
    print('recall for threshold of ' + str(th) + ' is: ' + str(recall))
    total=np.sum((t_p,t_n,f_p,f_n))
    accuracy=(t_p+t_n)/total
    print('accuracy',accuracy)

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
from tqdm import tqdm_notebook

cos_dist_out = cosine_similarity(list(df_data['latent'].values))

recalls = []
precisions = []
threshs = []
for i in range(2,10):
    
    queried_count, idx_count, tp, tn, fp, fn = 0,0,0,0,0,0
    cos_thresh = i/10.0
    print('---------cos_thresh:',cos_thresh,'-----------')
    data_result = []
    fp_list = []
    fn_list = []
    r_counter = 0
    stop_at = 10
    prev_query = ''
    prev_given = ''
    given_count = 0
    threshs.append(cos_thresh)
    # total = cos_dist_out.shape[0]
    for row in tqdm_notebook(cos_dist_out):
        c_counter = 0
        given_idx = df_data.iloc[r_counter]['identity']
        given_img = df_data.iloc[r_counter]['imagename']
        for col in row:
            queried_idx = df_data.iloc[c_counter]['identity']
            queried_img = df_data.iloc[c_counter]['imagename']
            if int(given_idx) == int(queried_idx) and cos_dist_out[r_counter][c_counter]>cos_thresh:
                tp += 1
            if int(given_idx) != int(queried_idx) and cos_dist_out[r_counter][c_counter]<cos_thresh:
                tn += 1
            if int(given_idx) != int(queried_idx) and cos_dist_out[r_counter][c_counter]>cos_thresh:
                fp += 1
                fp_list.append(os.path.join(queried_idx,queried_img))
            if int(given_idx) == int(queried_idx) and cos_dist_out[r_counter][c_counter]<cos_thresh:
                fn += 1
                fn_list.append(os.path.join(queried_idx,queried_img))
            if int(given_idx) == int(queried_idx):
                idx_count += 1
    #             print('--i--',idx_count,'--i--')
    #             print(given_idx,queried_idx)
            if prev_query != queried_idx:
    #             print('--q--',queried_count,'--q--')
                queried_count += 1
            c_counter += 1
            prev_query = queried_idx
#             if queried_count == stop_at:
#                 break
        precision = tp/(fp+tp)
        recall = tp/(tp+fn)
        f1 = 2*(precision*recall)/(precision+recall)
        total = np.sum((tp,tn,fp,fn))
        accuracy = (tp+tn)/total
        data_result.append([given_idx,given_img, idx_count, tp, tn, fp, fn,precision,recall,f1,accuracy, total, fp_list, fn_list])
        if prev_given != given_idx:
    #         print('----',given_count,'-----')
            given_count+=1
        r_counter += 1
        prev_given = given_idx
        queried_count, idx_count, tp, tn, fp, fn = 0, 0, 0, 0, 0, 0
        fp_list = []
        fn_list = []
#         if given_count == stop_at:
#             break
    df_cos_dist_out = pd.DataFrame(data_result, columns=['identity','imagename','total_images_identity','TP','TN','FP','FN','precision','recall','f1','accuracy','total_images', 'FP_List','FN_List'])
    df_cos_dist_out=df_cos_dist_out.sort_values(by='identity',ascending=True).reset_index().drop(columns=['index'])
#     df_cos_dist_out.head()
    # print(df_cos_dist_out.keys())
    total = df_cos_dist_out.shape[0]
    total_images_identity = df_cos_dist_out['total_images_identity']
    tps = df_cos_dist_out['TP'].sum()
    fps = df_cos_dist_out['FP'].sum()
    tns = df_cos_dist_out['TN'].sum()
    fns = df_cos_dist_out['FN'].sum()
    print(tps,fps,tns,fns)
    precision = tps/(fps+tps)
    recall = tps/(tps+fns)
    recalls.append(recall)
    precisions.append(precision)
    f1 = 2*(precision*recall)/(precision+recall)
    print('precision:',precision,'\nrecall:',recall,'\nf1:',f1)
    acc = df_cos_dist_out['accuracy'].mean()
    print('Intra Writer Accuracy (Type 1):',np.mean(df_cos_dist_out['TP']/total_images_identity))
    print('Inter Writer Accuracy (Type 2)::',tns/total/total)
    print('acc:',acc)