In [None]:
# Melakukan impor libraries yang diperlukan untuk visualisasi data.
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.gridspec as gridspec
import cv2
import time
import datetime

In [None]:
# Melakukan impor MTCNN untuk deteksi wajah.
! pip install mtcnn
from mtcnn.mtcnn import MTCNN

In [None]:
# Mencatat waktu dimulainya keseluruhan program visualisasi data.
global_start_time = time.time()

In [None]:
# Membaca sekaligus menampilkan isi dari data training.
df_train = pd.read_csv('../input/bdc-satriadata2021/train.csv')
df_train

In [None]:
# Mencatat waktu dimulainya pembacaan training images.
start_time = time.time()

# Mencatat index dari setiap training images.
img_train = {}
index_train = list(df_train.iloc[:,0])
subindex_train = [1, 2, 3]
filename_master_train = '../input/bdc-satriadata2021/Training/'

for index in index_train:
    # Membaca dan menyimpan training images.
    img_train[index] = {}
    img_index = str(index) + '/' + str(index) + '_'
    for subindex in subindex_train:
        img_subindex = str(subindex) + '.jpg'
        filename_train = filename_master_train + img_index + img_subindex
        img_train[index][subindex] = mpimg.imread(filename_train)

# Mencatat waktu berakhirnya pembacaan training images.
end_time = time.time()

# Menampilkan waktu eksekusi dari pembacaan training images.
execution_time = datetime.timedelta(seconds = end_time - start_time)
print("execution time: %s" % (execution_time))

In [None]:
# Mendeklarasikan pendeteksi wajah.
detector = MTCNN()
face_cascade = cv2.CascadeClassifier('../input/haar-cascades-for-face-detection/haarcascade_frontalface_default.xml')

In [None]:
# Membentuk daftar indeks wajah yang menjadi target.
face_index_list = {integer_0 + 1: {integer_1 + 1: 0 for integer_1 in range(3)} for integer_0 in range(770)}

face_index_update = {1: [(22,2), (22,3), (41,3), (63,1), (96,2), (107,3), (121,1), (128,2), (132,3), (138,1), (138,2),
                         (138,3), (149,3), (160,2), (160,3), (166,1), (175,2), (175,3), (179,3), (180,1), (208,3), (224,1),
                         (255,3), (263,3), (285,3), (291,3), (293,3), (307,3), (322,2), (323,3), (375,3), (399,2), (403,3),
                         (406,3), (420,3), (429,3), (473,2), (504,1), (519,2), (526,1), (552,1), (552,2), (552,3), (567,3),
                         (575,2), (585,1), (585,2), (648,2), (664,2), (689,2), (717,3), (729,1), (730,3), (739,3), (758,3),
                         (759,1)],
                     2: [(110,1), (140,3), (256,3), (502,1), (529,3), (669,2), (689,1)],
                     3: [(247,3), (342,3), (539,3), (669,1)],
                     
                     # Nilai -1 menunjukkan perlunya modifikasi MTCNN untuk dapat mendeteksi wajah.
                     -1: [(274,2), (460,3), (502,2), (649,3), (720,3), (755,2), (762,3)],
                     # Nilai -2 menunjukkan perlunya metode lain selain MTCNN untuk dapat mendeteksi wajah.
                     -2: [(111,1), (211,3)],
                     # Nilai -3 menunjukkan wajah tidak dapat terdeteksi dengan baik.
                     -3: [(104,1), (260,2), (290,2), (295,3), (767,1)]}

# Melakukan update nilai indeks wajah yang menjadi target.
for element_0 in face_index_update:
    for element_1 in face_index_update[element_0]:
        face_index_list[element_1[0]][element_1[1]] = element_0

# Menggunakan indeks wajah sementara untuk nilai indeks negatif.
face_temp_index = {0: [(111,1), (211,3), (274,2), (649,3), (720,3)],
                   1: [(460,3), (502,2), (755,2), (762,3)]}
face_index_ghost = {integer_0 + 1: {integer_1 + 1: -1 for integer_1 in range(3)} for integer_0 in range(770)}
for element_0 in face_temp_index:
    for element_1 in face_temp_index[element_0]:
        face_index_ghost[element_1[0]][element_1[1]] = element_0

In [None]:
# Mencatat waktu dimulainya pencatatan informasi wajah target.
start_time = time.time()

# Menyiapkan daftar informasi penting tiap wajah target.
important_list_master = []

for nomor in range(1, 1 + df_train.shape[0]):
    for urutan_gambar in range(1, 1 + 3):
        # Mendeteksi wajah.
        my_image = img_train[nomor][urutan_gambar].copy()
        face_location = detector.detect_faces(my_image)
        
        # Indeks wajah normal digunakan dalam deteksi wajah target.
        main_face_index = face_index_list[nomor][urutan_gambar]
        size_threshold = 0.7
        conf_threshold = 0.99
        
        # Indeks wajah sementara digunakan untuk nilai indeks negatif.
        if main_face_index < 0:
            shadow_face_index = face_index_ghost[nomor][urutan_gambar]
            size_threshold = 0
            conf_threshold = 0.95
        
        if len(face_location):
            # Menyeleksi wajah yang akan ditampilkan.
            size_box = []
            conf_box = []
            for element in range(len(face_location)):
                size_box.append(max(face_location[element]['box'][2], face_location[element]['box'][3]))
                conf_box.append(face_location[element]['confidence'])
            face_table = pd.DataFrame([size_box, conf_box]).T
            face_table[0] = face_table[0] / max(face_table[0])
            face_table = face_table[(face_table[0] > size_threshold) & (face_table[1] > conf_threshold)]
            face_priority_index = list(face_table.sort_values(0, ascending = False).index)
            if not len(face_priority_index):
                face_priority_index = [0]
        
        # Menyimpan informasi penting wajah target.
        main_face_info = {}
        if main_face_index >= 0:
            main_face_info = face_location[face_priority_index[main_face_index]]
        elif main_face_index != -3:
            main_face_info = face_location[face_priority_index[shadow_face_index]]
        important_list = [nomor, urutan_gambar]
        if len(main_face_info):
            if main_face_index > -2:
                important_list += main_face_info['box']
            else:
                # Deteksi wajah menggunakan OpenCV ketika MTCNN kurang cukup baik dalam mendeteksi wajah.
                gray = cv2.cvtColor(my_image, cv2.COLOR_RGB2GRAY)
                new_face_location = face_cascade.detectMultiScale(gray, 1.3, 5, 50)
                important_list += list(new_face_location[-1])
            for element in main_face_info['keypoints']:
                # Menyimpan koordinat setiap fitur wajah yang penting
                important_list += main_face_info['keypoints'][element]
        else:
            # Memberikan nilai dummy -1 untuk gambar wajah yang gagal terdeteksi.
            important_list += [-1 for element in range(14)]
        important_list_master.append(important_list)

# Penyusunan data yang berisi lokasi wajah target.
facial_location_data = pd.DataFrame(important_list_master)
facial_location_data.columns = ['nomor', 'urutan_gambar', 'x_coor', 'y_coor', 'width', 'height',
                                'x_lefteye', 'y_lefteye', 'x_righteye', 'y_righteye', 'x_nose', 'y_nose',
                                'x_mouthleft', 'y_mouthleft', 'x_mouthright', 'y_mouthright']

# Menyimpan catatan informasi wajah target dalam format .csv.
facial_location_data.to_csv('train_facial_data.csv', index = False)

# Mencatat waktu berakhirnya pencatatan informasi wajah target.
end_time = time.time()

# Menampilkan waktu eksekusi dari pencatatan informasi wajah target.
execution_time = datetime.timedelta(seconds = end_time - start_time)
print("execution time: %s" % (execution_time))

In [None]:
# Mencatat waktu dimulainya visualisasi training images.
start_time = time.time()

# Menyiapkan ukuran visualisasi data.
nrows = df_train.shape[0]
ncols = 3

# Menyiapkan tempat untuk penulisan judul.
title_frame = plt.figure(figsize = (6 * ncols, 10e-03), constrained_layout = False)
title_frame.patch.set_facecolor('white')
title_spec = gridspec.GridSpec(1, 1, figure = title_frame)

# Menuliskan judul.
main_title = title_frame.add_subplot(title_spec[0, 0])
main_title.set(xticks = [], yticks = [])
main_title.set_title('TRAINING IMAGES', fontsize = 72, color = '#007BA7')

for row in range(nrows):
    # Menyiapkan tempat untuk visualisasi data.
    frame = plt.figure(figsize = (6 * ncols, 8), constrained_layout = False)
    frame.patch.set_facecolor('white')
    spec = gridspec.GridSpec(1, ncols, figure = frame)

    for col in range(ncols):
        # Menyiapkan index dan data yang diperlukan untuk setiap training image.
        nomor = df_train.iloc[row][0]
        urutan_gambar = col + 1
        jenis_kelamin = ['perempuan', 'laki-laki'][df_train.iloc[row][1]]
        usia = df_train.iloc[row][2]
        
        # Mendeteksi wajah.
        my_image = img_train[nomor][urutan_gambar].copy()
        index = 3 * row + col
        if facial_location_data.iloc[index, 2] > -1:
            x_coor = facial_location_data.iloc[index, 2]
            y_coor = facial_location_data.iloc[index, 3]
            width = facial_location_data.iloc[index, 4]
            height = facial_location_data.iloc[index, 5]
            main_face = my_image[y_coor:y_coor+height, x_coor:x_coor+width, :]
        else:
            main_face = [[[0, 0, 0]]]
            
        # Memunculkan gambar.
        add_image = frame.add_subplot(spec[0, col])
        add_image.imshow(main_face)
        
        # Memunculkan judul.
        title = 'Gambar ' + str(nomor) + '_' + str(urutan_gambar)
        add_image.set_title(title, fontsize = 14)
        
        # Memunculkan label.
        label_0 = 'jenis kelamin: ' + jenis_kelamin + '\n'
        label_1 = 'usia: ' + str(usia) + ' tahun'
        add_image.set_xlabel(label_0 + label_1, fontsize = 12)
        add_image.set(xticks = [], yticks = [])
    
    # Menampilkan keseluruhan visualisasi data.
    plt.show()

# Mencatat waktu berakhirnya visualisasi training images.
end_time = time.time()

# Menampilkan waktu eksekusi dari visualisasi training images.
execution_time = datetime.timedelta(seconds = end_time - start_time)
print("execution time: %s" % (execution_time))

In [None]:
# Mencatat waktu berakhirnya keseluruhan program visualisasi data.
global_end_time = time.time()

# Menampilkan waktu eksekusi dari keseluruhan program visualisasi data.
total_execution_time = datetime.timedelta(seconds = global_end_time - global_start_time)
print("total execution time: %s" % (total_execution_time))