In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Model 

In [None]:
import os
import matplotlib.pyplot as plt
import pandas as pd
import io
import cv2
import numpy as np
from os import listdir
from os.path import isfile, join

import sklearn
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.preprocessing import OneHotEncoder
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

import keras
import tensorflow as tf
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications import ResNet50

from keras import layers
from keras.models import Model
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
from skimage.feature import hog
from skimage import data, exposure


In [None]:
# !pip install keras_vggface
# !pip install keras_applications 
# !pip install keras_preprocessing 

In [None]:
# !pip install git+https://github.com/rcmalli/keras-vggface.git

In [None]:
#from keras_vggface.vggface import VGGFace
from keras_vggface.utils import decode_predictions, preprocess_input

In [None]:
!pip install tensorflow_addons

In [None]:
import tensorflow_addons as tfa
import random
import os
from tqdm import tqdm

In [None]:
class SiameseNetwork(tf.keras.Model):
    def __init__(self, vgg_face):
        super(SiameseNetwork, self).__init__()
        self.vgg_face = vgg_face
        
    @tf.function
    def call(self, inputs):
        image_1, image_2, image_3 =  inputs
        with tf.name_scope("Anchor") as scope:
            feature_1 = self.vgg_face(image_1)
            feature_1 = tf.math.l2_normalize(feature_1, axis=-1)
        with tf.name_scope("Positive") as scope:
            feature_2 = self.vgg_face(image_2)
            feature_2 = tf.math.l2_normalize(feature_2, axis=-1)
        with tf.name_scope("Negative") as scope:
            feature_3 = self.vgg_face(image_3)
            feature_3 = tf.math.l2_normalize(feature_3, axis=-1)
        return [feature_1, feature_2, feature_3]
    
    @tf.function
    def get_features(self, inputs):
        return tf.math.l2_normalize(self.vgg_face(inputs), axis=-1)

In [None]:
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, dataset_path, batch_size=20, shuffle=True):
        self.dataset = self.curate_dataset(dataset_path)
        self.dataset_path = dataset_path
        self.shuffle = shuffle
        self.batch_size =batch_size
        self.no_of_people = len(list(self.dataset.keys()))
        self.on_epoch_end()
        #print(self.dataset.keys())
        
    def __getitem__(self, index):
        people = list(self.dataset.keys())[index * self.batch_size: (index + 1) * self.batch_size]
        P = []
        A = []
        N = []
        
        for person in people:
            anchor_index = random.randint(0, len(self.dataset[person])-1)
            a = self.get_image(person, anchor_index)
            
            positive_index = random.randint(0, len(self.dataset[person])-1)
            while positive_index == anchor_index and len(self.dataset[person]) != 1:
                positive_index = random.randint(0, len(self.dataset[person])-1)
                
            p = self.get_image(person, positive_index)
            
            negative_person_index = random.randint(0, self.no_of_people - 1)
            negative_person = list(self.dataset.keys())[negative_person_index]
            while negative_person == person:
                negative_person_index = random.randint(0, self.no_of_people - 1)
                negative_person = list(self.dataset.keys())[negative_person_index]
            
            negative_index = random.randint(0, len(self.dataset[negative_person])-1)
            n = self.get_image(negative_person, negative_index)
            P.append(p)
            A.append(a)
            N.append(n)
        A = np.asarray(A)
        N = np.asarray(N)
        P = np.asarray(P)
        return [A, P, N]
        
    def __len__(self):
        return self.no_of_people // self.batch_size
        
    def curate_dataset(self, dataset_path):
        dataset = {}
        dirs = [dir for dir in listdir(dataset_path)]
        for dir in dirs: 
            fichiers = [f for f in listdir(dataset_path+dir) if "jpeg" in f or "png" in f]
            for f in fichiers:
                if dir in dataset.keys():
                    dataset[dir].append(f)
                else:
                    dataset[dir] = [f]
        return dataset
        # with open(os.path.join(dataset_path, 'list.txt'), 'r') as f:
        #     dataset = {}
        #     image_list = f.read().split()
        #     for image in image_list:
        #         folder_name, file_name = image.split('/')
        #         if folder_name in dataset.keys():
        #             dataset[folder_name].append(file_name)
        #         else:
        #             dataset[folder_name] = [file_name]
        # return dataset
    
    def on_epoch_end(self):
        if self.shuffle:
            keys = list(self.dataset.keys())
            random.shuffle(keys)
            dataset_ =  {}
            for key in keys:
                dataset_[key] = self.dataset[key]
            self.dataset = dataset_
            
    def get_image(self, person, index):
        # print(os.path.join(self.dataset_path, os.path.join('images/' + person, self.dataset[person][index])))
        # gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        # faces = self.face_detector.detectMultiScale(gray, 1.3, 5)
        # try:
        #     (x,y,w,h) = faces[0]
        #     img = img[y:y+h, x:x+w]
        # except:
        #     pass
        
        img = cv2.imread(os.path.join(self.dataset_path, os.path.join(person, self.dataset[person][index])))
        img = cv2.resize(img, (224, 224))
        img = np.asarray(img, dtype=np.float64)
        img = preprocess_input(img)
        return img

In [None]:
K = tf.keras.backend
def loss_function(x, alpha = 0.2):
    # Triplet Loss function.
    anchor,positive,negative = x
    # distance between the anchor and the positive
    pos_dist = K.sum(K.square(anchor-positive),axis=1)
    # distance between the anchor and the negative
    neg_dist = K.sum(K.square(anchor-negative),axis=1)
    # compute loss
    basic_loss = pos_dist-neg_dist+alpha
    loss = K.mean(K.maximum(basic_loss,0.0))
    return loss

In [None]:
# vggface = tf.keras.models.Sequential()
# vggface.add(tf.keras.layers.Convolution2D(64, (3, 3), activation='relu', padding="SAME", input_shape=(224,224, 3)))
# vggface.add(tf.keras.layers.Convolution2D(64, (3, 3), activation='relu', padding="SAME"))
# vggface.add(tf.keras.layers.MaxPooling2D((2,2), strides=(2,2)))
 
# vggface.add(tf.keras.layers.Convolution2D(128, (3, 3), activation='relu', padding="SAME"))
# vggface.add(tf.keras.layers.Convolution2D(128, (3, 3), activation='relu', padding="SAME"))
# vggface.add(tf.keras.layers.MaxPooling2D((2,2), strides=(2,2)))
 
# vggface.add(tf.keras.layers.Convolution2D(256, (3, 3), activation='relu', padding="SAME"))
# vggface.add(tf.keras.layers.Convolution2D(256, (3, 3), activation='relu', padding="SAME"))
# vggface.add(tf.keras.layers.Convolution2D(256, (3, 3), activation='relu', padding="SAME"))
# vggface.add(tf.keras.layers.MaxPooling2D((2,2), strides=(2,2)))
 
# vggface.add(tf.keras.layers.Convolution2D(512, (3, 3), activation='relu', padding="SAME"))
# vggface.add(tf.keras.layers.Convolution2D(512, (3, 3), activation='relu', padding="SAME"))
# vggface.add(tf.keras.layers.Convolution2D(512, (3, 3), activation='relu', padding="SAME"))
# vggface.add(tf.keras.layers.MaxPooling2D((2,2), strides=(2,2)))
 
# vggface.add(tf.keras.layers.Convolution2D(512, (3, 3), activation='relu', padding="SAME"))
# vggface.add(tf.keras.layers.Convolution2D(512, (3, 3), activation='relu', padding="SAME"))
# vggface.add(tf.keras.layers.Convolution2D(512, (3, 3), activation='relu', padding="SAME"))
# vggface.add(tf.keras.layers.MaxPooling2D((2,2), strides=(2,2)))

# vggface.add(tf.keras.layers.Flatten())

# vggface.add(tf.keras.layers.Dense(4096, activation='relu'))
# vggface.add(tf.keras.layers.Dropout(0.5))
# vggface.add(tf.keras.layers.Dense(4096, activation='relu'))
# vggface.add(tf.keras.layers.Dropout(0.5))
# vggface.add(tf.keras.layers.Dense(2622, activation='softmax'))

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import ZeroPadding2D, Convolution2D, MaxPooling2D, Dropout, Flatten, Activation

def vgg_face():	
    model = Sequential()
    model.add(ZeroPadding2D((1,1),input_shape=(224,224, 3)))
    model.add(Convolution2D(64, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(128, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    model.add(Convolution2D(4096, (7, 7), activation='relu'))
    model.add(Dropout(0.5))
    model.add(Convolution2D(4096, (1, 1), activation='relu'))
    model.add(Dropout(0.5))
    model.add(Convolution2D(2622, (1, 1)))
    model.add(Flatten())
    model.add(Activation('softmax'))
    return model

In [None]:
# from keras.applications.vgg16 import VGG16

# # load model
# model = VGG16()
# # remove the output layer
# model = Model(inputs=model.inputs, outputs=model.layers[-2].output)
# # model.pop()
# # add new classifier layers
# x = layers.Dense(1024, activation='relu')(model.layers[-1].output)
# x = layers.Dropout(0.5)(x)
# x = layers.Dense(1024//2, activation='relu')(x)
# x = layers.Dropout(0.5)(x)
# output = Dense(128, use_bias=False)(x)
# # define new model
# model = Model(inputs=model.inputs, outputs=output)
# for layer in model.layers[:-5]:
#     layer.trainable = False

# model.summary()

In [None]:
model = vgg_face()
model.load_weights('../input/weights/vgg_face_weights.h5')
model.pop()
# model.add(tf.keras.layers.Dense(512))
# model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(2*512))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(128, use_bias=False, name='output'))
for layer in model.layers[:-3]:
    layer.trainable = False
model.summary()

In [None]:
model = SiameseNetwork(model)

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.00006)
#optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.00006)
#binary_cross_entropy = tf.keras.losses.BinaryCrossentropy()
def train(X):
    with tf.GradientTape() as tape:
        y_pred = model(X)
        loss = loss_function(y_pred)
    grad = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grad, model.trainable_variables))
    return loss

In [None]:
# model.save_weights('my_model.h5')
# model.built = True
# model.load_weights('my_model.h5')

In [None]:
data_generator = DataGenerator(dataset_path='../input/dataset6/dataset4/train/', batch_size=10)


losses = []
accuracy = []
epochs = 30
no_of_batches = data_generator.__len__()
print(no_of_batches)
for i in range(1, epochs+1, 1):
#     if i % 10 == 0:
#         checkpoint.save(checkpoint_path)
#         print("Checkpoint Saved")
    loss = 0
    with tqdm(total=no_of_batches) as pbar:
        
        description = "Epoch " + str(i) + "/" + str(epochs)
        pbar.set_description_str(description)
        
        for j in range(no_of_batches):
            data = data_generator[j]
            temp = train(data)
            loss += temp
            
            pbar.update()
            print_statement = "Loss :" + str(temp.numpy())
            pbar.set_postfix_str(print_statement)
        
        loss /= no_of_batches
        losses.append(loss.numpy())
        # with file_writer.as_default():
        #     tf.summary.scalar('Loss', data=loss.numpy(), step=i)
            
        print_statement = "Loss :" + str(loss.numpy())
        
        pbar.set_postfix_str(print_statement)



In [None]:
#model.get_layer('output').get_weights()[0]

# Prediction

In [None]:
data_generator = DataGenerator(dataset_path='../input/dataset6/dataset4/train/')
train_dict = data_generator.curate_dataset('../input/dataset6/dataset4/train/')
labels = []
features = []

i = 0
for k, v in train_dict.items():
    images = []
    for e in v:
        image_path = '../input/dataset6/dataset4/train/' + str(k) + '/' + str(e)
        image = cv2.imread(image_path)
        image = np.asarray(image, dtype=np.float64)
        images.append(image)

    
    images = np.asarray(images)
    images = preprocess_input(images)
    images = tf.convert_to_tensor(images)
    feature = model.get_features(images)
    feature = tf.reduce_mean(feature, axis=0)
    features.append(feature.numpy())
    labels.append(k)
    
features = np.asarray(features)

In [None]:
data_generator = DataGenerator(dataset_path='../input/dataset6/dataset4/test/')
test_dict = data_generator.curate_dataset('../input/dataset6/dataset4/test/')

labels_test = []
images_test = []
i = 0
for k, v in test_dict.items():
    for e in v:
        image_path = '../input/dataset6/dataset4/test/' + str(k) + '/' + str(e)
        image = cv2.imread(image_path)
        image = np.asarray(image, dtype=np.float64)
        images_test.append(image)
        labels_test.append(k)

In [None]:
from sklearn.utils import shuffle
features, labels = shuffle(features, labels)
images_test, labels_test = shuffle(images_test, labels_test)

In [None]:
def predict(images):
    preds = []
    for image in images:
        image = preprocess_input(image)
        img_features = model.get_features(np.expand_dims(image, axis=0))
        dist = tf.norm(img_features - features, axis=1)
        preds.append(labels[tf.argmin(dist)])
    return preds

In [None]:
# Accuracy
from sklearn.metrics import accuracy_score
preds = predict(images_test)
accuracy_score(preds, labels_test)

In [None]:
import matplotlib.image as mpimg
image_path = '../input/dataset5/dataset3/test/Adílio/Adílio17.jpeg'
plt.imshow(mpimg.imread(image_path))
image = cv2.imread(image_path)
image = np.asarray(image, dtype=np.float64)
predict([image])

# SVM

In [None]:
train_dict = data_generator.curate_dataset('../input/dataset6/dataset4/train/')
labels_train = []
features_train = []

for k, v in train_dict.items():
    images = []
    for e in v:
        image_path = '../input/dataset6/dataset4/train/' + str(k) + '/' + str(e)
        image = cv2.imread(image_path)
        image = np.asarray(image, dtype=np.float64)
        image = preprocess_input(image)
        img_features = model.get_features(np.expand_dims(image, axis=0))
        features_train.append(img_features[0].numpy())
        labels_train.append(k)

    

features_train = np.asarray(features_train)

In [None]:
test_dict = data_generator.curate_dataset('../input/dataset6/dataset4/test/')
labels_test = []
features_test = []

for k, v in test_dict.items():
    images = []
    for e in v:
        image_path = '../input/dataset6/dataset4/test/' + str(k) + '/' + str(e)
        image = cv2.imread(image_path)
        image = np.asarray(image, dtype=np.float64)
        image = preprocess_input(image)
        img_features = model.get_features(np.expand_dims(image, axis=0))
        features_test.append(img_features[0].numpy())
        labels_test.append(k)

    
features_test = np.asarray(features_test)

In [None]:
from sklearn.utils import shuffle
features_train, labels_train = shuffle(features_train, labels_train)
features_test, labels_test = shuffle(features_test, labels_test)

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
tuned_parameters = {'C': [0.1, 1, 10, 100, 1000], 'gamma': [1, 0.1, 0.01, 0.001, 0.0001], 'kernel': ['rbf']}

#clf = GridSearchCV(SVC(), tuned_parameters)
clf = SVC(C=10, gamma=1)
clf.fit(features_train, labels_train)

# print("Best parameters set found on development set:")
# print()
# print(clf.best_params_)

In [None]:
from sklearn.metrics import accuracy_score
preds = clf.predict(features_test)
print("Accuracy :  ", accuracy_score(labels_test, preds))

# Random forest

In [None]:
from sklearn.model_selection import RandomizedSearchCV
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]
# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}
print(random_grid)

In [None]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
le.fit(labels_train)
encoded_labels_train = le.transform(labels_train)
encoded_labels_test = le.transform(labels_test)

In [None]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier()
clf_random = RandomizedSearchCV(estimator = clf, param_distributions = random_grid, n_iter = 10, cv = 3, verbose=2, random_state=42, n_jobs = -1)
clf_random.fit(features_train, encoded_labels_train)

In [None]:
print(clf_random.best_params_)

In [None]:
from sklearn.metrics import accuracy_score
preds = clf_random.predict(features_test)
print("Accuracy :  ", accuracy_score(encoded_labels_test, preds))