### Importing libraries

In [1]:
import numpy as np
import os
from tensorflow import keras
from PIL import Image
import math

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


### Specifying the dimension of the image

In [3]:
# Dimension of the image
dimen = 128

### Specifying the path of the images to be loaded and saved

In [4]:
# Enter the path name until 'images' [ Actual images are stored at ...images/sanyam/image0.png]

# /content/gdrive/MyDrive/ENEL645/ProjectSiameseModel/augmented_dataset/
# /content/gdrive/MyDrive/ENEL645/ProjectSiameseModel/Output_folder/

dir_path = input( 'Enter images directory path : ')
out_path = input( 'Enter images output path : ')


Enter images directory path : /content/gdrive/MyDrive/ENEL645/ProjectSiameseModel/augmented_dataset/
Enter images output path : /content/gdrive/MyDrive/ENEL645/ProjectSiameseModel/Output_folder/


### Loading and normalising the images 

In [5]:
sub_dir_list = os.listdir(dir_path )
images = list()
labels = list()

# Looping through the images for all the persons
for i in range( len( sub_dir_list ) ):
    label = i
    # getting the path for the images of a particular person[i.e. ..images/Sanyam]
    image_names = os.listdir( os.path.join(dir_path , sub_dir_list[i]) )
    # looping through each image of a particular person
    for image_path in image_names:
        # path of each image
        path = os.path.join(dir_path , sub_dir_list[i] , image_path )
        try :
            image = Image.open(path)
            resize_image = image.resize((dimen, dimen))
            array_ = list()
            for x in range(dimen):
                sub_array = list()
                for y in range(dimen):
                    sub_array.append(resize_image.load()[x, y])
                # Length of sub-array is 128 i.e. containing the data for each row of the image, i.e. 128 pixels of each row
                # These get appended to array_
                # Thus the array_ has 128 values, where each value further contains the values of 128 pixels
                array_.append(sub_array)
            #print('The length of the  array is: ', len(array_))
            image_data = np.array(array_)
            #print('Shape of image data is', image_data.shape)
            # Normalizing the images
            image = np.array(np.reshape(image_data, (dimen, dimen, 3))) / 255
            images.append(image)
            labels.append(label)
            # printing the label of image along with its shape
            #print('Shape of ', label, 'image is', image.shape)
        except:
            print( 'WARNING : File {} could not be processed.'.format( path ) )

### Analysing the image and labels

In [6]:
print('..............................................')
print('No of labels are', len(labels))
print('No of images are', len(images))
print('Shape of each image is', images[0].shape)
print('..............................................')

..............................................
No of labels are 100
No of images are 100
Shape of each image is (128, 128, 3)
..............................................


### Converting list of images to array of images

In [7]:
print('Initial type of images is', type(images))
images_array = np.array( images )
print('Type of images is ', type(images_array))
print('Shape of the images array is ', images_array.shape)
print('The shape of the image is', (images_array[0]).shape)

Initial type of images is <class 'list'>
Type of images is  <class 'numpy.ndarray'>
Shape of the images array is  (100, 128, 128, 3)
The shape of the image is (128, 128, 3)


### Preparing combinations of image with the other image and putting labels

In [8]:
# For our dataset of 45 images for each of 5 team members
samples_1 = list()
samples_2 = list()
labels = list()
for i in range( 100 ) :
    m  = i//20
    #print('Value of m is', m)
    for j in range( 100 ) :
        samples_1.append( images_array[i] )
        samples_2.append( images_array[j] )
        if((j>= 20*m) & (j<= (20*(m+1)-1))):
            labels.append( 1 )
        else :
            labels.append( 0 )

X1 = np.array( samples_1 )
X2 = np.array( samples_2 )
Y = np.array( labels)

### Analysing samples and labels

In [9]:
print('Shape of array of first set of images is', X1.shape)
print('Shape of array of second set of images is', X2.shape)
print('Number of labels are ', len(Y))

Shape of array of first set of images is (10000, 128, 128, 3)
Shape of array of second set of images is (10000, 128, 128, 3)
Number of labels are  10000


In [10]:
np.save( '{}/x1.npy'.format( out_path ), X1 )
np.save( '{}/x2.npy'.format( out_path ), X2 )
np.save( '{}/y.npy'.format( out_path ) , Y )

In [11]:
print(labels)

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [12]:
from collections import Counter


Counter(labels).keys() # equals to list(set(words))
Counter(labels).values() # counts the elements' frequency

dict_values([2000, 8000])