In [1]:
!pip install facenet-pytorch 



In [2]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import numpy as np
import os
import pandas as pd
import cv2
from tqdm import tqdm 
import matplotlib.pyplot as plt
import tensorflow as tf
from PIL import Image
import mtcnn

In [3]:
from facenet_pytorch import InceptionResnetV1
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f'Running on device: {device}')

model = InceptionResnetV1(pretrained='vggface2', dropout_prob=0.5, device=device).eval()

Running on device: cuda:0


In [54]:
train_data_path = "/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/training_set"
test_data_path = "/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/testing_set_retina_pick1"

img_size = 160

In [55]:
def load_img(data_list):
    data_img = []
    for each in tqdm(data_list):
        img = cv2.imread(each)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (img_size, img_size))
        data_img.append(np.array(img))

    return data_img

In [56]:

x_data_list = []
y_data_list = []
for roots, dirs, files in os.walk(train_data_path):
    for each in files:
        if each.find('checkpoint') == -1:
            x_data_list.append(os.path.join(roots, each))
            y_data_list.append(roots.split("/")[-1])

            
testsetDD = pd.read_csv("/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/sample_submission.csv")
testsetDD['id'] = testsetDD['id'].astype(str) + '.png'
#print(testsetDD['id'])
fileNameOrder = testsetDD['id']

test_x_data_list = []
test_y_data_list = []
for roots, dirs, files in os.walk(test_data_path):
    for each in files:
        if each.find('checkpoint') == -1:
            test_x_data_list.append(os.path.join(roots, each))
            test_y_data_list.append(roots.split("/")[-1])
            

In [57]:
class_path = "/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/"

class_map = pd.read_csv(os.path.join(class_path, 'classmap.csv'),
                        header=None, index_col=0)

class_map[1]
s = []

class_map = class_map.to_dict()[1]
print(class_map)

s = list(class_map.keys())
for x in s[1:]:
    class_map[x] = int(class_map[x])
class_map['classname'] = -1

{'classname': 'classnum', 'rika': '0', 'risa': '1', 'yui': '2', 'akane': '3', 'neru': '4'}


In [58]:
y_data_list = pd.DataFrame(y_data_list, columns=['label'])
y_data_list[:10]

Unnamed: 0,label
0,neru
1,neru
2,neru
3,neru
4,neru
5,neru
6,neru
7,neru
8,neru
9,neru


In [59]:
num_class = len(y_data_list['label'].unique())
num_class

5

In [60]:
y_data = y_data_list['label'].map(class_map).values.copy()
y_data[:10]

array([4, 4, 4, 4, 4, 4, 4, 4, 4, 4])

In [61]:
facenet_input_size = (160,160)
def extract_face(data_list):
    data_img = []
    cnt = 0
    for filename in tqdm(data_list):
        img = Image.open(filename)
        img = img.convert('RGB')
#         img = prewhiten(img)

        w, h = img.size
#         print(img,img.size[0], img.size[1])
        img_array = np.array(img)
        detector = mtcnn.MTCNN()
        bounded_box = detector.detect_faces(img_array)
        if len(bounded_box) != 0:
            x1, y1, roiw, roih = bounded_box[0]['box']
        else:
            print('No.%d  can not detect face : %s ',( cnt , filename))
#             face_image = np.zeros((160,160), dtype=np.int)
            x1, y1, roiw, roih = 0, 0, w, h
            print(x1, y1, roiw, roih)
        x1, y1 = abs(x1), abs(y1)
        x2, y2 = x1 + roiw, y1 + roih
        face = img_array[y1:y2, x1:x2]
        face_image = Image.fromarray(face)
        face_image = face_image.resize(facenet_input_size)
#         face_image = prewhiten(face_image)
        cnt = cnt + 1
        data_img.append(np.array(face_image))

    return data_img

In [62]:
face_data_array = extract_face(x_data_list)

  2%|▏         | 10/538 [00:27<32:12,  3.66s/it]

No.%d  can not detect face : %s  (9, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/training_set/neru/072.png')
0 0 640 640


  2%|▏         | 11/538 [00:29<27:47,  3.16s/it]

No.%d  can not detect face : %s  (10, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/training_set/neru/103.png')
0 0 637 408


  8%|▊         | 42/538 [01:46<24:19,  2.94s/it]

No.%d  can not detect face : %s  (41, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/training_set/neru/003.png')
0 0 640 640


 16%|█▋        | 88/538 [03:48<20:42,  2.76s/it]

No.%d  can not detect face : %s  (87, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/training_set/neru/023.png')
0 0 640 640


 18%|█▊        | 98/538 [04:11<16:50,  2.30s/it]

No.%d  can not detect face : %s  (97, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/training_set/neru/014.png')
0 0 640 640


 23%|██▎       | 124/538 [05:24<17:03,  2.47s/it]

No.%d  can not detect face : %s  (123, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/training_set/yui/058.png')
0 0 417 417


 26%|██▋       | 142/538 [06:14<15:46,  2.39s/it]

No.%d  can not detect face : %s  (141, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/training_set/yui/012.png')
0 0 640 640


 27%|██▋       | 146/538 [06:29<26:15,  4.02s/it]

No.%d  can not detect face : %s  (145, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/training_set/yui/071.png')
0 0 640 640


 36%|███▌      | 192/538 [08:43<24:22,  4.23s/it]

No.%d  can not detect face : %s  (191, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/training_set/yui/101.png')
0 0 640 640


 37%|███▋      | 198/538 [08:59<15:55,  2.81s/it]

No.%d  can not detect face : %s  (197, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/training_set/yui/001.png')
0 0 582 317


 39%|███▉      | 210/538 [09:36<17:41,  3.24s/it]

No.%d  can not detect face : %s  (209, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/training_set/yui/044.png')
0 0 640 640


 51%|█████     | 272/538 [12:48<13:53,  3.13s/it]

No.%d  can not detect face : %s  (271, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/training_set/risa/063.png')
0 0 289 635


 69%|██████▉   | 370/538 [18:12<08:35,  3.07s/it]

No.%d  can not detect face : %s  (369, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/training_set/akane/062.png')
0 0 640 640


 82%|████████▏ | 440/538 [22:24<05:22,  3.29s/it]

No.%d  can not detect face : %s  (439, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/training_set/rika/043.png')
0 0 640 640


 86%|████████▌ | 461/538 [23:39<04:10,  3.26s/it]

No.%d  can not detect face : %s  (460, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/training_set/rika/087.png')
0 0 640 640


 91%|█████████ | 488/538 [25:22<02:51,  3.44s/it]

No.%d  can not detect face : %s  (487, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/training_set/rika/099.png')
0 0 324 324


 94%|█████████▍| 505/538 [26:27<01:52,  3.40s/it]

No.%d  can not detect face : %s  (504, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/training_set/rika/020.png')
0 0 608 608


100%|██████████| 538/538 [28:34<00:00,  3.19s/it]


In [63]:
np.shape(face_data_array)

(538, 160, 160, 3)

In [None]:
test_data_array = extract_face(test_x_data_list)

  2%|▏         | 7/438 [00:25<25:59,  3.62s/it]

No.%d  can not detect face : %s  (6, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/testing_set_retina_pick1/8C920CE63A.png')
0 0 1080 1080


  4%|▍         | 19/438 [01:15<26:03,  3.73s/it]

No.%d  can not detect face : %s  (18, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/testing_set_retina_pick1/29EC8E2C3E.png')
0 0 1080 1080


  8%|▊         | 35/438 [02:21<26:10,  3.90s/it]

No.%d  can not detect face : %s  (34, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/testing_set_retina_pick1/59DB41383B.png')
0 0 1080 1080


  8%|▊         | 37/438 [02:27<24:15,  3.63s/it]

No.%d  can not detect face : %s  (36, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/testing_set_retina_pick1/E6C7BCED4B.png')
0 0 1080 1080


  9%|▉         | 41/438 [02:42<24:09,  3.65s/it]

No.%d  can not detect face : %s  (40, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/testing_set_retina_pick1/697C12FB69.png')
0 0 1080 1080


 10%|█         | 45/438 [03:03<37:14,  5.69s/it]

No.%d  can not detect face : %s  (44, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/testing_set_retina_pick1/7000E8FB61.png')
0 0 1080 1080


 12%|█▏        | 51/438 [03:26<25:29,  3.95s/it]

No.%d  can not detect face : %s  (50, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/testing_set_retina_pick1/E1B2E2B632.png')
0 0 1080 1080


 13%|█▎        | 56/438 [03:44<22:49,  3.59s/it]

No.%d  can not detect face : %s  (55, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/testing_set_retina_pick1/DB9EDFE750.png')
0 0 1080 1080


 13%|█▎        | 58/438 [03:51<23:00,  3.63s/it]

No.%d  can not detect face : %s  (57, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/testing_set_retina_pick1/26CF244858.png')
0 0 1080 1080


 18%|█▊        | 78/438 [05:21<28:59,  4.83s/it]

No.%d  can not detect face : %s  (77, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/testing_set_retina_pick1/7042FD42EA.png')
0 0 1080 1080


 19%|█▉        | 84/438 [05:44<22:53,  3.88s/it]

No.%d  can not detect face : %s  (83, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/testing_set_retina_pick1/45D7BB6117.png')
0 0 1080 1080


 19%|█▉        | 85/438 [05:47<21:46,  3.70s/it]

No.%d  can not detect face : %s  (84, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/testing_set_retina_pick1/730049E065.png')
0 0 1080 1080


 21%|██        | 91/438 [06:09<21:36,  3.74s/it]

No.%d  can not detect face : %s  (90, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/testing_set_retina_pick1/4D4BC8FBBC.png')
0 0 1080 1080


 23%|██▎       | 102/438 [06:58<20:55,  3.74s/it]

No.%d  can not detect face : %s  (101, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/testing_set_retina_pick1/AFE75C7A36.png')
0 0 1080 1080


 24%|██▎       | 103/438 [07:02<20:07,  3.61s/it]

No.%d  can not detect face : %s  (102, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/testing_set_retina_pick1/A595081686.png')
0 0 1080 1080


 26%|██▋       | 116/438 [07:59<20:33,  3.83s/it]

No.%d  can not detect face : %s  (115, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/testing_set_retina_pick1/B5844A85AD.png')
0 0 1080 1080


 28%|██▊       | 122/438 [08:22<20:16,  3.85s/it]Exception ignored in: <function ScopedTFGraph.__del__ at 0x7f7a4898e598>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/framework/c_api_util.py", line 48, in __del__
    def __del__(self):
KeyboardInterrupt
 29%|██▉       | 129/438 [08:57<21:43,  4.22s/it]

No.%d  can not detect face : %s  (128, '/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/testing_set_retina_pick1/A33867A445.png')
0 0 1080 1080


 30%|██▉       | 130/438 [09:01<21:05,  4.11s/it]

In [None]:
np.shape(test_data_array)

# Embedding File

In [None]:
# convert face dataset to vector for facenet
# def get_embedding(model, face_list):
#     data_embedding =[]
#     for face_pixels in face_list:
#         face_pixels = face_pixels.astype('float32')
#         face_pixels = (face_pixels - face_pixels.mean()) / face_pixels.std()
#         samples = np.expand_dims(face_pixels, axis = 0)
#         yhat = model.predict(samples)
#         data_embedding.append(yhat[0])
#     return data_embedding
def get_embedding(model, face_pixels):
    # scale pixel values
    face_pixels = face_pixels.astype('float32')
    # standardize pixel values across channels (global)
    mean, std = face_pixels.mean(), face_pixels.std()
    face_pixels = (face_pixels - mean) / std
    # transform face into one sample
    samples = np.expand_dims(face_pixels, axis=0)
    # make prediction to get embedding
    yhat = model.predict(samples)
    return yhat[0]

In [None]:
from tensorflow.keras.models import Model, load_model

model = load_model('facenet_keras.h5')


In [None]:

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import (Input, Dense, Dropout, Activation,
                                     Flatten, BatchNormalization, Conv2D,
                                     MaxPooling2D)

from tensorflow.keras.applications.resnet_v2 import (ResNet101V2, preprocess_input)

from tensorflow.keras import initializers
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras import layers
from tensorflow.keras.regularizers import l1_l2
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam
from keras.optimizers import SGD
from keras.layers import Dense, Activation, LeakyReLU, Dropout
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Normalizer

In [None]:
data_embedding_x = []
test_embedding_x = []
for train_x in face_data_array:
    embedding_X = get_embedding(model, train_x)
    data_embedding_x.append(embedding_X)
    
for test_x in test_data_array:
    embedding_X = get_embedding(model, test_x)
    test_embedding_x.append(embedding_X)

In [None]:
np.shape(data_embedding_x)

In [None]:
np.savez_compressed('traing_dataset.npz',  data_embedding_x,  y_data)
np.savez_compressed('testing_dataset.npz',  test_embedding_x)

In [None]:
npz_trainging = np.load('traing_dataset.npz')
data_embedding_x,  y_data = npz_trainging['arr_0'], npz_trainging['arr_1']

In [None]:
npz_testing = np.load('testing_dataset.npz')
test_embedding_x = npz_testing['arr_0']

In [None]:
in_encoder = Normalizer(norm='l2')
trainX = in_encoder.transform(data_embedding_x)

In [None]:
out_encoder = LabelEncoder()
out_encoder.fit(y_data)
trainy = out_encoder.transform(y_data)

In [None]:
in_encoder = Normalizer(norm='l2')
testX = in_encoder.transform(test_embedding_x)

In [None]:
trainX, trainy = shuffle(trainX, trainy)

In [None]:
print(trainX.shape, trainy.shape)

In [None]:
trainX, trainy

In [None]:
testX

In [None]:
# fit model
svm_result_list = []
for step in range(1):
    model_svm = SVC(kernel='rbf', gamma = 0.5, probability = True)
    model_svm.fit(trainX, trainy)
    svm_result = model_svm.predict(testX)
    svm_result_prob = model_svm.predict_proba(testX)
    svm_result_list.append(svm_result)


In [None]:
svm_result

In [None]:
testDD = pd.read_csv("/home/jovyan/riqdataset/aia-data2020/CNN_who_is_she/sample_submission.csv")
AA = testDD.iloc[:,0]
cnt = 9
predResultHW = pd.DataFrame({'id':AA,'class':svm_result})
predResultHW.to_csv(f"submission{cnt}.csv", index = 0)