In [1]:
import os
import sys
import glob
from PIL import Image
import numpy as np
from collections import defaultdict
import itertools

In [2]:
from os import listdir
from matplotlib import image
from PIL import Image

In [None]:
# preprocessing

In [None]:
# directory_path = './Dataset/interim/real/'
# new_directory = './Dataset/preprocessed/real'
# # directory_path = './Dataset/interim/forged/'
# # new_directory = './Dataset/preprocessed/forged'
# for filename in listdir(directory_path):
#     # load image
#     image = Image.open(directory_path + '/' + filename)
#     # resize image and ignore original aspect ratio
#     img_resized = image.resize((200,200))
#     gs_image = img_resized.convert(mode='L')
#     # save
#     gs_image.save(new_directory + '/' + filename)

In [3]:
real_images = glob.glob('./Dataset/preprocessed/real/*.png')
forged_images = glob.glob('./Dataset/preprocessed/forged/*.png')

In [4]:
def get_image_id(image_path):
    """returns image ID from the image path"""
    image_id = image_path.split('/')[-1].split('_')[0]
    return image_id

In [41]:
# Create a dictionary to store all images.
real_images_dict = defaultdict(list)
forged_images_dict = defaultdict(list)

# Iterate over real images and put them in dictionary values for same image_id key.
for real_image, forged_image in zip(real_images, forged_images):
    
    # add image to dictionary
    real_image_id = get_image_id(real_image)
    real_images_dict[real_image_id].append(real_image)
    
    forged_image_id = get_image_id(forged_image)
    forged_images_dict[forged_image_id].append(forged_image)

In [42]:
# create tuples of image for training
negative_image_tuples = list()

for image_id in real_images_dict.keys():
    real = real_images_dict[image_id]
    forged = forged_images_dict[image_id]
    
    negative_image_tuples.extend(list(itertools.product(real, real, forged)))

In [43]:
def process(image_path):
    """returns processed images"""
    image = Image.open(image_path)  
    image_array = np.array(image)
    image_array_processed = 1 - image_array
    image_array_processed = image_array_processed / np.std(image_array_processed)
    image_array_processed = np.expand_dims(image_array_processed, axis=2)
    
    return image_array_processed

In [8]:
# pre-process data
image_1 = []
image_2 = []
image_3 = []
labels = []

for anchor, positive, negative in negative_image_tuples[:1000]:
    image_1.append(process(anchor))
    image_2.append(process(positive))
    image_3.append(process(negative))
    labels.append(0)


In [9]:
# Convert to numpy arrays
image_1_array = np.asarray(image_1)
image_2_array = np.asarray(image_2)
image_3_array = np.asarray(image_3)
labels_array = np.array(labels)

In [10]:
# shuffle numpy arrays
idx = np.random.choice(range(len(image_1)), size=len(image_1), replace=False)

X_1 = image_1_array[idx]
X_2 = image_2_array[idx]
X_3 = image_3_array[idx]
y = labels_array[idx]

In [11]:
# split data into train-valid-test set.
train_split = 0.8
valid_split = 0.9
train_offset = int(train_split * len(X_1))
valid_offset = int(valid_split * len(X_1))

X_1_train = X_1[:train_offset]
X_2_train = X_2[:train_offset]
X_3_train = X_3[:train_offset]
y_train = y[:train_offset]

X_1_valid = X_1[train_offset:valid_offset]
X_2_valid = X_2[train_offset:valid_offset]
X_3_valid = X_3[train_offset:valid_offset]
y_valid = y[train_offset:valid_offset]

X_1_test = X_1[valid_offset:]
X_2_test = X_2[valid_offset:]
X_3_test = X_3[valid_offset:]
y_test = y[valid_offset:]

In [12]:
X_1_train.shape

(800, 200, 200, 1)

In [13]:
# print(X_1_train.shape)

In [14]:
import tensorflow as tf
from keras.models import Sequential
from keras.optimizers import Adam
from keras.layers import Conv2D, ZeroPadding2D, Dropout, Activation, Input, concatenate
from keras.models import Model

from keras.layers.normalization import BatchNormalization
from keras.layers.pooling import MaxPooling2D
from keras.layers.merge import Concatenate
from keras.layers.core import Lambda, Flatten, Dense
from keras.initializers import glorot_uniform

from keras.engine.topology import Layer
from keras.regularizers import l2
from keras import backend as K

Using TensorFlow backend.


In [15]:
def initialize_weights(shape, dtype=None):
    """
        The paper, http://www.cs.utoronto.ca/~gkoch/files/msc-thesis.pdf
        suggests to initialize CNN layer weights with mean as 0.0 and standard deviation of 0.01
    """
    return np.random.normal(loc = 0.0, scale = 1e-2, size = shape)

In [16]:
def initialize_bias(shape, dtype=None):
    """
        The paper, http://www.cs.utoronto.ca/~gkoch/files/msc-thesis.pdf
        suggests to initialize CNN layer bias with mean as 0.5 and standard deviation of 0.01
    """
    return np.random.normal(loc = 0.5, scale = 1e-2, size = shape)

In [18]:
def get_siamese_model(input_shape):
    left_input = Input(input_shape)
    right_input = Input(input_shape)
    
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3),
                     activation='relu',
                     input_shape=input_shape))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(2048, activation='sigmoid',
                   kernel_regularizer=l2(1e-3),
                   kernel_initializer=initialize_weights,bias_initializer=initialize_bias))
   
    
    # Generate the encodings (feature vectors) for the two images
    encoded_l = model(left_input)
    encoded_r = model(right_input)
    
    # Add a customized layer to compute the absolute difference between the encodings
    L1_layer = Lambda(lambda tensors:K.abs(tensors[0] - tensors[1]))
    L1_distance = L1_layer([encoded_l, encoded_r])
    
    # Add a dense layer with a sigmoid unit to generate the similarity score
    prediction = Dense(1,activation='sigmoid',bias_initializer=initialize_bias)(L1_distance)
    
    # Connect the inputs with the outputs
    siamese_net = Model(inputs=[left_input,right_input],outputs=prediction)
    
    # return the model
    return siamese_net

In [19]:
model = get_siamese_model((200, 200, 1))
model.summary()


Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 200, 200, 1)  0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 200, 200, 1)  0                                            
__________________________________________________________________________________________________
sequential_1 (Sequential)       (None, 2048)         78959104    input_1[0][0]                    
                                                                 input_2[0][0]                    
__________________________________________________________________________________________________
lambda_1 (Lambda)               (None, 2048)         0           sequential_1[1][0]        

In [20]:
def identity_loss(y_true, y_pred):
    """
    Fake loss function for Keras.
    """
    return y_pred - 0 * y_true

In [21]:
model.compile(loss=identity_loss, optimizer=Adam(lr = 0.00006))

In [24]:
nepochs=5
model.fit([X_1_train, X_3_train], y_train,
          batch_size=128,
          epochs=nepochs,
          validation_data=([X_1_valid, X_3_valid], y_valid))

Train on 800 samples, validate on 100 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.callbacks.History at 0x7374c92e8>

In [32]:
from scipy import stats

In [26]:
y_pred_real = model.predict([X_1_test, X_2_test])

In [28]:
# print(y_pred_real)

In [33]:
stats.describe(y_pred_real)

DescribeResult(nobs=100, minmax=(array([0.25895727], dtype=float32), array([0.61945665], dtype=float32)), mean=array([0.5266993], dtype=float32), variance=array([0.01219847], dtype=float32), skewness=array([-0.7216221], dtype=float32), kurtosis=array([-1.0954739], dtype=float32))

In [34]:
y_pred_forgery = model.predict([X_1_test, X_3_test])

In [35]:
stats.describe(y_pred_forgery)

DescribeResult(nobs=100, minmax=(array([0.15123454], dtype=float32), array([0.6144328], dtype=float32)), mean=array([0.4387259], dtype=float32), variance=array([0.02108156], dtype=float32), skewness=array([-0.12084431], dtype=float32), kurtosis=array([-1.4292992], dtype=float32))

In [50]:
rr_max = np.argmax(y_pred_real)
rr_min = np.argmin(y_pred_real)
print(rr_max, rr_min)

2 15


In [47]:
negative_image_tuples

[('./Dataset/preprocessed/real/29_2.png',
  './Dataset/preprocessed/real/29_2.png',
  './Dataset/preprocessed/forged/29_2.png'),
 ('./Dataset/preprocessed/real/29_2.png',
  './Dataset/preprocessed/real/29_2.png',
  './Dataset/preprocessed/forged/29_3.png'),
 ('./Dataset/preprocessed/real/29_2.png',
  './Dataset/preprocessed/real/29_2.png',
  './Dataset/preprocessed/forged/29_1.png'),
 ('./Dataset/preprocessed/real/29_2.png',
  './Dataset/preprocessed/real/29_2.png',
  './Dataset/preprocessed/forged/29_0.png'),
 ('./Dataset/preprocessed/real/29_2.png',
  './Dataset/preprocessed/real/29_2.png',
  './Dataset/preprocessed/forged/29_4.png'),
 ('./Dataset/preprocessed/real/29_2.png',
  './Dataset/preprocessed/real/29_3.png',
  './Dataset/preprocessed/forged/29_2.png'),
 ('./Dataset/preprocessed/real/29_2.png',
  './Dataset/preprocessed/real/29_3.png',
  './Dataset/preprocessed/forged/29_3.png'),
 ('./Dataset/preprocessed/real/29_2.png',
  './Dataset/preprocessed/real/29_3.png',
  './Dataset/

In [51]:
negative_image_tuples[902]

('./Dataset/preprocessed/real/71_0.png',
 './Dataset/preprocessed/real/71_1.png',
 './Dataset/preprocessed/forged/71_0.png')

In [52]:
negative_image_tuples[915]

('./Dataset/preprocessed/real/71_0.png',
 './Dataset/preprocessed/real/71_3.png',
 './Dataset/preprocessed/forged/71_4.png')

In [53]:
rf_max = np.argmax(y_pred_forgery)
rf_min = np.argmin(y_pred_forgery)
print(rf_max, rf_min)

77 53


In [54]:
negative_image_tuples[977]

('./Dataset/preprocessed/real/34_1.png',
 './Dataset/preprocessed/real/34_3.png',
 './Dataset/preprocessed/forged/34_0.png')

In [55]:
negative_image_tuples[953]

('./Dataset/preprocessed/real/71_3.png',
 './Dataset/preprocessed/real/71_2.png',
 './Dataset/preprocessed/forged/71_2.png')