In [1]:
import tensorflow as tf
from keras.layers import Input, Lambda,subtract,GlobalMaxPooling2D,Dense,GlobalAveragePooling2D, concatenate, Activation
from keras.applications.mobilenet import MobileNet as Net

from keras.preprocessing import image
from keras.applications.mobilenet import preprocess_input
from keras.models import Model
from keras.layers import Input, merge
from keras.optimizers import Adam
import cv2
import numpy as np
import os
#from tqdm import tqdm
from keras.models import Sequential

from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
import time
import glob

ALPHA = 0.7 # Triplet Loss Parameter

from keras.layers import Input,Lambda,subtract,GlobalMaxPooling2D,Dense,GlobalAveragePooling2D,concatenate,Activation
from keras.applications.xception import Xception as Net
from keras.preprocessing import image
from keras.applications.xception import preprocess_input
from keras.models import Model

import matplotlib.pyplot as plt

from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score

from sklearn.metrics import confusion_matrix
import random as rn

np.random.seed(0)
tf.set_random_seed(3)

from sklearn.model_selection import cross_val_score

  return f(*args, **kwds)
Using TensorFlow backend.


In [2]:
def triplet_loss(x):
    anchor, positive, negative = x
    
    #x = tf.constant([[1, 1, 1], [1, 1, 1]]) ------ tf.reduce_sum(x, 1)  # [3, 3]
    #It is trained in batches so, the tensor will be of the above shape for each triplet
    pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), 1)
    neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)), 1)
    
    #x = tf.constant([[1, 1, 1], [1, 1, 1]])--------->tf.reduce_sum(x, 0)  # [2, 2, 2]

    basic_loss = tf.add(tf.subtract(pos_dist, neg_dist), ALPHA)
    loss = tf.reduce_mean(tf.maximum(basic_loss, 0.0), 0)

    return loss

In [3]:
def make_model(input_shape):
    
    cnn_model = Sequential()
    
    cnn_model.add(Conv2D(filters= 32, kernel_size =(5,5), strides= (2,2),activation='relu', padding='same',input_shape=input_shape))
    cnn_model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2), name='pool1'))

    cnn_model.add(Conv2D(filters= 64, kernel_size =(3,3), strides= (1,1), padding='same', name='conv2'))  #256
    cnn_model.add(Activation('relu'))
    cnn_model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2), name='pool2'))

    cnn_model.add(Conv2D(filters= 128, kernel_size =(3,3), strides= (1,1), padding='same', name='conv3'))  #256
    cnn_model.add(Activation('relu'))
    cnn_model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2), name='pool3'))

    
    
    return cnn_model

In [4]:
def create_model(d1, d2, c):
    
    # The triplet network takes 3 input images: 2 of the same class and 1 out-of-class sample
    #shape: A shape tuple (integers), not including the batch size. For instance, shape=(32,) 
    #indicates that the expected input will be batches of 32-dimensional vectors.
    
    anchor_example =   Input(shape=(d1, d2, c), name = 'anchor')
    positive_example = Input(shape=(d1, d2, c), name = 'positive')
    negative_example = Input(shape=(d1, d2, c), name = 'negative')
        
    base_model = make_model(input_shape=(d1,d2,c))
    # the weights of this layer will be set to ones and fixed  (since they
    # are shared we could also leave them trainable to get a weighted sum)
    
    # feed all 3 inputs into the pretrained keras model
    x1 = base_model(anchor_example)
    x2 = base_model(positive_example)
    x3 = base_model(negative_example)
    
    # flatten/summarize the models output:
    # (here we could also use GlobalAveragePooling or simply Flatten everything)
    #Takes the max value for each of the filter
    anchor =   GlobalMaxPooling2D()(x1)
    positive = GlobalMaxPooling2D()(x2)
    negative = GlobalMaxPooling2D()(x3)
    
    
    #Loss gives the final loss value between a,p and n.
    loss = merge([anchor, positive, negative], mode=triplet_loss, output_shape=(1,))

    triplet_model = Model(inputs=[anchor_example, positive_example, negative_example],
                  outputs=loss)
    
    triplet_model.compile(loss='mean_absolute_error', optimizer=Adam())
    print(triplet_model.summary())
    
    return triplet_model

In [5]:
triplet_model=create_model(128, 128, 3)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
anchor (InputLayer)             (None, 128, 128, 3)  0                                            
__________________________________________________________________________________________________
positive (InputLayer)           (None, 128, 128, 3)  0                                            
__________________________________________________________________________________________________
negative (InputLayer)           (None, 128, 128, 3)  0                                            
__________________________________________________________________________________________________
sequential_1 (Sequential)       (None, 8, 8, 128)    94784       anchor[0][0]                     
                                                                 positive[0][0]                   
          

  name=name)


In [13]:
def img_to_array(image):
    img = cv2.resize(image, (128, 128)) 
    x_train = np.array([img])
    return x_train

In [14]:
def img_path_to_array(image_path):
    img1 = cv2.imread(image_path, 1)
    return img_to_array(img1)

# Training Data

In [15]:
def prepare_database_tr(location):
    database = {}

    # load all the images of individuals to recognize into the database
    for file in glob.glob(location):
        identity = os.path.splitext(os.path.basename(file))[0].strip('NISDCC-_6g')
        #print (identity)
        database[identity] = img_path_to_array(file)

    return database

In [16]:
train_image_arrays = prepare_database_tr('/home/ochhab3/images/sigcomp2009/training/*.PNG')

## Triplet Formation

In [17]:
triplet_str_tr=[]

t1= time.time()
for i in range(1,13):
    for ja in range(1,6):
        for k in range(23,52):
             for jn in range(1,6):
                    for jp in range(1,6):
                        #print (i,ja,k,jn,jp)
                        if (jp!=ja):
                            
                            if(i<10):
                                a='00'+ str(i)+'_00'+str(i)+'_00'+str(ja)
                                b='00'+ str(i) +'_00'+str(i)+'_00'+str(jp)
                                c='0'+str(k)+'_00'+str(i)+'_00'+str(jn)
                            else:
                                a='0'+str(i)+'_0'+str(i)+'_00'+str(ja)
                                b='0'+str(i)+'_0'+str(i)+'_00'+str(jp)
                                c='0'+str(k)+'_0'+str(i)+'_00'+str(jn)
                            
                            x= (a,b,c)
                            triplet_str_tr.append(x)
                        
                    
t2=time.time()

print (t2-t1)


0.17345070838928223


In [18]:
anchor_tr=[]
positive_tr=[]
negative_tr=[]

for img_name in triplet_str_tr:
    anchor_tr.append(train_image_arrays[img_name[0]])
    positive_tr.append(train_image_arrays[img_name[1]])
    negative_tr.append(train_image_arrays[img_name[2]])

In [19]:
x_a_tr=np.array(anchor_tr).reshape((len(anchor_tr), 128,128,3))
x_p_tr=np.array(positive_tr).reshape((len(positive_tr), 128,128,3))
x_n_tr=np.array(negative_tr).reshape((len(negative_tr), 128,128,3))


In [None]:
len(anchor_tr)

# Test Data

In [20]:
def prepare_test_dict(location):
    database = {}

    # load all the images of individuals to recognize into the database
    for file in glob.glob(location):
        identity = os.path.splitext(os.path.basename(file))[0].strip('NFI-')
        #print (identity)
        database[identity] = img_path_to_array(file)

    return database

In [21]:
test_genuine_dict = prepare_test_dict('/home/ochhab3/images/sigcomp2009/test/genuines/*')
test_forgery_dict = prepare_test_dict('/home/ochhab3/images/sigcomp2009/test/forgeries/*')

## Triplet Formation

In [22]:
all_ids = np.arange(1, 101)
missing_ids = np.array([5, 13, 25, 32, 34, 36, 38, 40, 48, 50, 52, 57, 60, 61, 65, 76, 78, 81, 82, 87, 95])
ids_available = np.array(list((set(all_ids)-set(missing_ids))))

In [23]:
j_formatted=[]
for i in range(1,13):
    if (i<10):
        new_j='0'+str(i)
    else:
        new_j=str(i)
    j_formatted.append(new_j)

In [24]:
i_formatted=[]
for i in ids_available:
    if (i<10):
        new_i='00'+str(i)
    elif(9<i<100):
        new_i='0'+str(i)
    else:
        new_i= str(i)
    i_formatted.append(new_i)

In [25]:
f_formatted=[]
for i in range(3,101):
    if (i<10):
        new_f='00'+str(i)
    elif(9<i<100):
        new_f='0'+str(i)
    else:
        new_f= str(i)
    f_formatted.append(new_f)

In [26]:
triplet_test_comb=[]
for i in i_formatted:
    for f in f_formatted:
        for ja in j_formatted:
            for jn in j_formatted[0:6]:
                for jp in j_formatted:
                    if (jp!=ja):
                        a=i+ja+i
                        p=i+jp+i
                        n=f+jn+i
                        triplet_test=(a,p,n)
                        triplet_test_comb.append(triplet_test)

In [27]:
final_triplet_test_id=[]
for i in range(0,len(triplet_test_comb)):
    if ((triplet_test_comb[i][0] in test_genuine_dict.keys()) and (triplet_test_comb[i][1] in test_genuine_dict.keys())and (triplet_test_comb[i][2] in test_forgery_dict.keys())):
        final_triplet_test_id.append(triplet_test_comb[i])

In [28]:
# subset test data triplets for training of cnn
import random
random.seed(0)
p_test=random.sample(range(1, len(final_triplet_test_id)), 45000)

In [29]:
new_test_id=[]
for i in p_test:
    new_test_id.append(final_triplet_test_id[i])


In [30]:
anchor_test=[]
positive_test=[]
negative_test=[]

for img_name in new_test_id:
    anchor_test.append(test_genuine_dict[img_name[0]])
    positive_test.append(test_genuine_dict[img_name[1]])
    negative_test.append(test_forgery_dict[img_name[2]])

In [31]:
x_a_test=np.array(anchor_test).reshape((len(anchor_test), 128,128,3))
x_p_test=np.array(positive_test).reshape((len(positive_test), 128,128,3))
x_n_test=np.array(negative_test).reshape((len(negative_test), 128,128,3))

In [32]:
len(anchor_test)

45000

# Dutch Data

In [33]:
def prepare_dut_dict(location):
    database = {}

    # load all the images of individuals to recognize into the database
    for file in glob.glob(location):
        identity = os.path.splitext(os.path.basename(file))[0].strip('K')
        #print (identity)
        database[identity] = img_path_to_array(file)

    return database

In [34]:
dut_all_dict = prepare_dut_dict('/home/ochhab3/images/dutch_updated/*')

In [35]:
dut_neg_dict={}

for i in dut_all_dict.keys():
    if len(i)==10 :
        dut_neg_dict[i]= dut_all_dict[i]


In [36]:
internal_count_ids = list(range(17, 70))
internal_count_ids.append(13)
internal_count_ids.sort()

In [37]:
internal_count_dict = {}
for i in range(0, len(internal_count_ids)):
    internal_count_dict['0'+str(internal_count_ids[i])]=1

dut_neg_dict_ord = {}

for i in dut_neg_dict.keys():
    person_id=i[-3:]
    image_id=internal_count_dict[person_id]
    #print(i, person_id, image_id)
    if image_id<10:
        dut_neg_dict_ord[person_id+'_0'+str(image_id)]= dut_neg_dict[i]
    else:
        dut_neg_dict_ord[person_id+'_'+str(image_id)]= dut_neg_dict[i]
    internal_count_dict[person_id]=internal_count_dict[person_id]+1
     

## Triplet Formation

In [38]:
dut_i_formatted = ['013']

for i in range(17, 70):
    dut_i_formatted.append('0'+str(i))

In [39]:
dut_j_formatted = []

for j in range(1,25):
    if (j<10):
        new_j='0'+str(j)
    else:
        new_j=str(j)
    dut_j_formatted.append(new_j)

In [40]:
triplet_dut_comb=[]
for i in dut_i_formatted:
        for ja in dut_j_formatted:
            for jn in dut_j_formatted[0:20]:
                for jp in dut_j_formatted:
                    if (jp!=ja):
                        a=ja+'_'+i
                        p=jp+'_'+i
                        n=i+'_'+jn
                        triplet_dut=(a,p,n)
                        triplet_dut_comb.append(triplet_dut)

In [41]:
final_triplet_dut_id=[]
for i in range(0,len(triplet_dut_comb)):
    if ((triplet_dut_comb[i][0] in dut_all_dict.keys()) and (triplet_dut_comb[i][1] in dut_all_dict.keys())and (triplet_dut_comb[i][2] in dut_neg_dict_ord.keys())):
        final_triplet_dut_id.append(triplet_dut_comb[i])

In [42]:
# subset dutch data triplets for training of cnn
import random
random.seed(0)
p_dut=random.sample(range(1, len(final_triplet_dut_id)), 40000)

In [43]:
new_dutch_id=[]
for i in p_dut:
    new_dutch_id.append(final_triplet_dut_id[i])


In [44]:
anchor_dut=[]
positive_dut=[]
negative_dut=[]

for img_name in new_dutch_id:
    anchor_dut.append(dut_all_dict[img_name[0]])
    positive_dut.append(dut_all_dict[img_name[1]])
    negative_dut.append(dut_neg_dict_ord[img_name[2]])

In [45]:
x_a_dut=np.array(anchor_dut).reshape((len(anchor_dut), 128,128,3))
x_p_dut=np.array(positive_dut).reshape((len(positive_dut), 128,128,3))
x_n_dut=np.array(negative_dut).reshape((len(negative_dut), 128,128,3))

In [46]:
len(anchor_dut)

40000

# Triplet Model

In [47]:
# running triplet model on train, test, and dutch

x_a_comb= np.vstack((x_a_tr,x_a_test,x_a_dut))
x_p_comb= np.vstack((x_p_tr,x_p_test,x_p_dut))
x_n_comb= np.vstack((x_n_tr,x_n_test,x_n_dut))

In [None]:
# x_n_comb.shape

In [49]:
#new_triplet_model = create_model(128, 128, 3)

In [None]:
new_triplet_model.load_weights('/home/ochhab3/triplet_model_all_e3.h5')

In [None]:
new_triplet_model.fit(x=[x_a_comb, x_p_comb, x_n_comb], y=np.zeros(x_a_comb.shape[0]), verbose=1, batch_size=2048, epochs=1)

In [None]:
#save the model 

from keras.models import load_model
new_triplet_model.save('/home/ochhab3/triplet_model_all_e4.h5')


In [None]:
# save the model weights

new_triplet_model.save_weights('/home/ochhab3/triplet_weights_all_e4.h5')

In [None]:
# importing the existing model

#new_model=create_model(128, 128, 3)
#new_model.load_weights('/home/ochhab3/triplet_weights.h5')

In [None]:
triplet_model.load_weights('/home/ochhab3/triplet_weights_dut_e2.h5')

# New Model

In [5]:
import keras
pretrained_model= keras.applications.mobilenet.MobileNet()

In [6]:
pretrained_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
conv1 (Conv2D)               (None, 112, 112, 32)      864       
_________________________________________________________________
conv1_bn (BatchNormalization (None, 112, 112, 32)      128       
_________________________________________________________________
conv1_relu (Activation)      (None, 112, 112, 32)      0         
_________________________________________________________________
conv_dw_1 (DepthwiseConv2D)  (None, 112, 112, 32)      288       
_________________________________________________________________
conv_dw_1_bn (BatchNormaliza (None, 112, 112, 32)      128       
_________________________________________________________________
conv_dw_1_relu (Activation)  (None, 112, 112, 32)      0         
__________

In [7]:
layers_needed=pretrained_model.layers[0:82]

In [8]:
#Defining the Sequential model
baseline_model=Sequential()
for layer in layers_needed:
    baseline_model.add(layer)

In [9]:
len(baseline_model.layers)

82

In [10]:
baseline_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
conv1 (Conv2D)               (None, 112, 112, 32)      864       
_________________________________________________________________
conv1_bn (BatchNormalization (None, 112, 112, 32)      128       
_________________________________________________________________
conv1_relu (Activation)      (None, 112, 112, 32)      0         
_________________________________________________________________
conv_dw_1 (DepthwiseConv2D)  (None, 112, 112, 32)      288       
_________________________________________________________________
conv_dw_1_bn (BatchNormaliza (None, 112, 112, 32)      128       
_________________________________________________________________
conv_dw_1_relu (Activation)  (None, 112, 112, 32)      0         
__________

In [11]:
layers_to_not_train= baseline_model.layers[0:79]

for layer in layers_to_not_train:
    layer.trainable=False
    

In [12]:
baseline_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
conv1 (Conv2D)               (None, 112, 112, 32)      864       
_________________________________________________________________
conv1_bn (BatchNormalization (None, 112, 112, 32)      128       
_________________________________________________________________
conv1_relu (Activation)      (None, 112, 112, 32)      0         
_________________________________________________________________
conv_dw_1 (DepthwiseConv2D)  (None, 112, 112, 32)      288       
_________________________________________________________________
conv_dw_1_bn (BatchNormaliza (None, 112, 112, 32)      128       
_________________________________________________________________
conv_dw_1_relu (Activation)  (None, 112, 112, 32)      0         
__________

In [50]:
def create_model_new(d1, d2, c):
    
    # The triplet network takes 3 input images: 2 of the same class and 1 out-of-class sample
    #shape: A shape tuple (integers), not including the batch size. For instance, shape=(32,) 
    #indicates that the expected input will be batches of 32-dimensional vectors.
    
    anchor_example =   Input(shape=(d1, d2, c), name = 'anchor')
    positive_example = Input(shape=(d1, d2, c), name = 'positive')
    negative_example = Input(shape=(d1, d2, c), name = 'negative')
        
    base_model = baseline_model
    # the weights of this layer will be set to ones and fixed  (since they
    # are shared we could also leave them trainable to get a weighted sum)
    
    # feed all 3 inputs into the pretrained keras model
    x1 = base_model(anchor_example)
    x2 = base_model(positive_example)
    x3 = base_model(negative_example)
    
    # flatten/summarize the models output:
    # (here we could also use GlobalAveragePooling or simply Flatten everything)
    #Takes the max value for each of the filter
    anchor =   GlobalMaxPooling2D()(x1)
    positive = GlobalMaxPooling2D()(x2)
    negative = GlobalMaxPooling2D()(x3)
    
    
    #Loss gives the final loss value between a,p and n.
    loss = merge([anchor, positive, negative], mode=triplet_loss, output_shape=(1,))

    triplet_model = Model(inputs=[anchor_example, positive_example, negative_example],
                  outputs=loss)
    
    triplet_model.compile(loss='mean_absolute_error', optimizer=Adam())
    print(triplet_model.summary())
    
    return triplet_model

In [51]:
new_triplet_model=create_model_new(128, 128, 3)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
anchor (InputLayer)             (None, 128, 128, 3)  0                                            
__________________________________________________________________________________________________
positive (InputLayer)           (None, 128, 128, 3)  0                                            
__________________________________________________________________________________________________
negative (InputLayer)           (None, 128, 128, 3)  0                                            
__________________________________________________________________________________________________
sequential_1 (Sequential)       multiple             3228864     anchor[0][0]                     
                                                                 positive[0][0]                   
          

  name=name)


In [None]:
new_triplet_model.fit(x=[x_a_comb, x_p_comb, x_n_comb], y=np.zeros(x_a_comb.shape[0]), verbose=1, batch_size=2048, epochs=1)

Epoch 1/1


## Embeddings

In [None]:
#Find the embeddings from the anchors
intermediate_layer_model_anchor = Model(inputs=triplet_model.input,
                                 outputs=triplet_model.get_layer('global_max_pooling2d_1').output)

In [None]:
#Find the embeddings from the positives
intermediate_layer_model_positive = Model(inputs=triplet_model.input,
                                 outputs=triplet_model.get_layer('global_max_pooling2d_2').output)

In [None]:
#Find the embeddings from the negatives
intermediate_layer_model_negative = Model(inputs=triplet_model.input,
                                 outputs=triplet_model.get_layer('global_max_pooling2d_3').output)

In [None]:
#x_a_tr.shape

### Train Data Embeddings

In [None]:
anc_emb_tr=intermediate_layer_model_anchor.predict([x_a_tr, x_p_tr, x_n_tr])
pos_emb_tr=intermediate_layer_model_positive.predict([x_a_tr, x_p_tr, x_n_tr])
neg_emb_tr=intermediate_layer_model_negative.predict([x_a_tr, x_p_tr, x_n_tr])

### Test Data Embeddings

In [None]:
anc_emb_te=intermediate_layer_model_anchor.predict([x_a_test, x_p_test,x_n_test])
pos_emb_te=intermediate_layer_model_positive.predict([x_a_test, x_p_test,x_n_test])
neg_emb_te=intermediate_layer_model_negative.predict([x_a_test, x_p_test,x_n_test])

### Dutch Data Embeddings

In [None]:
# anc_emb_dut=intermediate_layer_model_anchor.predict([x_a_dut, x_p_dut,x_n_dut])
# pos_emb_dut=intermediate_layer_model_positive.predict([x_a_dut, x_p_dut,x_n_dut])
# neg_emb_dut=intermediate_layer_model_negative.predict([x_a_dut, x_p_dut,x_n_dut])

### Train Data Embedding Dictionary

In [None]:
#Creating Unique Identification for training set

data_anc_tr = {}
data_pos_tr = {}
data_neg_tr = {}
for i in range(0,len(triplet_str_tr)):
    #Anchor dict
    a_id = triplet_str_tr[i][0]
    if a_id not in data_anc_tr.keys():
        data_anc_tr[a_id]= np.array(anc_emb_tr[i])
        
    #Pos dict
    p_id = triplet_str_tr[i][1]
    if p_id not in data_pos_tr.keys():
        data_pos_tr[p_id]= np.array(pos_emb_tr[i])
        
    #Neg dict
    n_id = triplet_str_tr[i][2]
    if n_id not in data_neg_tr.keys():
        data_neg_tr[n_id]= np.array(neg_emb_tr[i])


### Test Data Embedding Dictionary

In [None]:
#Creating Unique Identification for test set embedding

data_anc_te = {}
data_pos_te = {}
data_neg_te = {}
for i in range(0,len(final_triplet_test_id)):
    #Anchor dict
    a_id = final_triplet_test_id[i][0]
    if a_id not in data_anc_te.keys():
        data_anc_te[a_id]= np.array(anc_emb_te[i])
        
    #Pos dict
    p_id = final_triplet_test_id[i][1]
    if p_id not in data_pos_te.keys():
        data_pos_te[p_id]= np.array(pos_emb_te[i])
        
    #Neg dict
    n_id = final_triplet_test_id[i][2]
    if n_id not in data_neg_te.keys():
        data_neg_te[n_id]= np.array(neg_emb_te[i])

In [None]:
final_triplet_test_id[1]

### Train Data Pairs

In [None]:
#Pairwise pos str
pair_pos_str_tr=[]
for i in range(1,13):
    for ja in range(1,6):
        for jp in range(1,6):
            if (jp!=ja):
                if (i<10) :
                    a='00'+ str(i)+'_00'+str(i)+'_00'+str(ja)
                    b='00'+ str(i) +'_00'+str(i)+'_00'+str(jp)
                else :
                   a='0'+str(i)+'_0'+str(i)+'_00'+str(ja)
                   b='0'+str(i)+'_0'+str(i)+'_00'+str(jp)
                
                pos_pair=(a,b)
                pair_pos_str_tr.append(pos_pair)

In [None]:
#Pairwise negative str
pair_neg_str_tr=[]
for i in range(1,13):
    for ja in range(1,6):
        for k in range(23,52):
            for jn in range(1,6):
                if (i<10):
                    a='00'+ str(i)+'_00'+str(i)+'_00'+str(ja)
                    b='0'+ str(k) +'_00'+str(i)+'_00'+str(jn)
                else :
                    a='0'+ str(i)+'_0'+str(i)+'_00'+str(ja)
                    b='0'+ str(k)+'_0'+str(i)+'_00'+str(jn)
                    
                pair_neg=(a,b)
                pair_neg_str_tr.append(pair_neg)


### Test Data Pairs

In [None]:
pairwise_gen_ids = []

for i in ids_available:
    for j in range(2, 13):
        
        if (i < 10 and j < 10):
            a='00'+ str(i)+'0100'+str(i)
            p='00'+ str(i)+'0'+str(j)+'00'+str(i)
        elif (i < 10 and j > 9):
            a='00'+ str(i)+'0100'+str(i)
            p='00'+ str(i)+str(j)+'00'+str(i)
        elif (9 < i < 100 and j < 10):
            a='0'+str(i)+'010'+str(i)
            p='0'+ str(i)+'0'+str(j)+'0'+str(i)
        elif (9 < i < 100 and j > 9):
            a='0'+str(i)+'010'+str(i)
            p='0'+ str(i)+str(j)+'0'+str(i)
        elif (i > 99 and j < 10):
            a=str(i)+'01'+str(i)
            p=str(i)+'0'+str(j)+str(i)
        elif (i > 99 and j > 9):
            a=str(i)+'01'+str(i)
            p=str(i)+str(j)+str(i)
        pair_gen = (a,p)        
        pairwise_gen_ids.append(pair_gen)

In [None]:
len(pairwise_gen_ids)

In [None]:
# pairwise_gen_ids_avl=[]
# for i in range(0, len(pairwise_gen_ids)):
#     if ((pairwise_gen_ids[i][0] in test_genuine_dict.keys()) and (pairwise_gen_ids[i][1] in test_genuine_dict.keys())):
#         pairwise_gen_ids_avl.append(pairwise_gen_ids[i])


In [None]:
len(pairwise_gen_ids_avl)

In [None]:
pairwise_sforg_ids = []

for i in ids_available:
    for f in range(3, 101):
        for n in range(1, 7):
            if (i<10 and f<10):
                a='00'+ str(i)+'0100'+str(i)
                n='00'+ str(f)+'0'+str(n)+'00'+str(i)
            elif (i<10 and 9<f<100):
                a= '00'+ str(i)+'0100'+str(i)
                n= '0'+ str(f)+'0'+str(n)+'00'+str(i)
            elif (i<10 and f>99):
                a= '00'+ str(i)+'0100'+str(i)
                n=  str(f)+'0'+str(n)+'00'+str(i)
            elif (9<i<100 and f<10):
                a='0'+ str(i)+'010'+str(i)
                n='00'+ str(f)+'0'+str(n)+'0'+str(i)
            elif(9<i<100 and 9<f<100):
                a='0'+ str(i)+'010'+str(i)
                n='0'+ str(f)+'0'+str(n)+'0'+str(i)
            elif(9<i<100 and f>99):
                a='0'+ str(i)+'010'+str(i)
                n= str(f)+'0'+str(n)+'0'+str(i)
            elif(i>99 and f<10):
                a= str(i)+'01'+str(i)
                n='00'+ str(f)+'0'+str(n)+str(i)
            elif(i>99 and 9<f<100):
                a= str(i)+'01'+str(i)
                n='0'+ str(f)+'0'+str(n)+str(i)
            elif(i>99 and f>99):
                a= str(i)+'01'+str(i)
                n= str(f)+'0'+str(n)+str(i)
            
            sforg_ids=(a,n)
            pairwise_sforg_ids.append(sforg_ids)

In [None]:
# pairwise_sforg_ids_avl=[]
# for i in range(0, len(pairwise_sforg_ids)):
#     if ((pairwise_sforg_ids[i][0] in test_genuine_dict.keys()) and (pairwise_sforg_ids[i][1] in test_forgery_dict.keys())):
#         pairwise_sforg_ids_avl.append(pairwise_sforg_ids[i])

### Train Features

In [None]:
#Positive Features
pos_feat_tr=np.zeros((len(pair_pos_str_tr),128))
for i in range(0,len(pair_pos_str_tr)):
    x1=data_anc_tr[pair_pos_str_tr[i][0]]
    x2=data_anc_tr[pair_pos_str_tr[i][1]]
    pos_feat_tr[i]=x1-x2


In [None]:
#Negative Features
neg_feat_tr=np.zeros((len(pair_neg_str_tr),128))
for i in range(0,len(pair_neg_str_tr)):
    x1=data_anc_tr[pair_neg_str_tr[i][0]]
    x2=data_neg_tr[pair_neg_str_tr[i][1]]
    neg_feat_tr[i]=x1-x2

In [None]:
#Negative Sampled Feature

In [None]:
import random
random.seed(0)
p=random.sample(range(1, len(neg_feat_tr)), 800)

In [None]:
sampled_neg_feat_tr=[]
for i in p:
    sampled_neg_feat_tr.append(neg_feat_tr[i])


### Test Features

In [None]:
a=0
count=[]
for i in range(0,len(pairwise_gen_ids)):
    if (pairwise_gen_ids[i][0] in data_anc_te.keys() and pairwise_gen_ids[i][1] in data_pos_te.keys()):
        a=a+1
count.append(a)

In [None]:
count

In [None]:
#Positive Features
pos_feat_te=np.zeros((203,128))
a=0

for i in range(0,len(pairwise_gen_ids)):
    if (pairwise_gen_ids[i][0] in data_anc_te.keys() and pairwise_gen_ids[i][1] in data_pos_te.keys()):
        x1=data_anc_te[pairwise_gen_ids[i][0]]
        x2=data_pos_te[pairwise_gen_ids[i][1]]
        #print (i)
        pos_feat_te[a]= x1-x2
        a=a+1

In [None]:
a=0
count=[]
for i in range(0,len(pairwise_sforg_ids)):
    if (pairwise_sforg_ids[i][0] in data_anc_te.keys() and pairwise_sforg_ids[i][1] in data_neg_te.keys()):
        a=a+1
count.append(a)

In [None]:
#count

In [None]:
#Negative Features
neg_feat_te=np.zeros((624,128))
a=0

for i in range(0,len(pairwise_sforg_ids)):
    if (pairwise_sforg_ids[i][0] in data_anc_te.keys() and pairwise_sforg_ids[i][1] in data_neg_te.keys()):
        x1=data_anc_te[pairwise_sforg_ids[i][0]]
        x2=data_neg_te[pairwise_sforg_ids[i][1]]
        #print (i)
        neg_feat_te[a]= x1-x2
        a=a+1


### COMBINE TEST TRAIN FEAT

In [None]:
pos_feat_total=np.vstack((pos_feat_tr,pos_feat_te))

In [None]:
neg_feat_total=np.vstack((sampled_neg_feat_tr,neg_feat_te))

In [None]:
feat_total=np.vstack((pos_feat_total,neg_feat_total))

In [None]:
pos_Y = [0]*len(pos_feat_total)
#makes that no of ones as the len of the item given
neg_Y = [1]*len(neg_feat_total)
#rowwise appends the two arrays
train_y=np.append(pos_Y, neg_Y)

In [None]:
from sklearn.cross_validation import train_test_split
x_train, x_test, y_train, y_test = train_test_split(feat_total, train_y, test_size=0.2, random_state=42)

In [None]:
from sklearn.linear_model import LogisticRegression
log_model = LogisticRegression(random_state=0)
log_model.fit(x_train,y_train)

In [None]:
log_model.score(x_test,y_test)

In [None]:
print (recall_score(y_test, log_model.predict(x_test)))
print (precision_score(y_test, log_model.predict(x_test)))

In [None]:
y_pred=log_model.predict(x_test)

In [None]:
confusion_matrix(y_test, y_pred)