<a href="https://colab.research.google.com/github/rezjaq/MachLearn/blob/main/Casia_Webface.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
files.upload()

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


!kaggle datasets download -d ntl0601/casia-webface
!unzip casia-webface.zip -d /content/dataset/


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/dataset/casia-webface/010371/00485624.jpg  
  inflating: /content/dataset/casia-webface/010371/00485625.jpg  
  inflating: /content/dataset/casia-webface/010371/00485626.jpg  
  inflating: /content/dataset/casia-webface/010371/00485627.jpg  
  inflating: /content/dataset/casia-webface/010371/00485628.jpg  
  inflating: /content/dataset/casia-webface/010371/00485629.jpg  
  inflating: /content/dataset/casia-webface/010371/00485630.jpg  
  inflating: /content/dataset/casia-webface/010371/00485631.jpg  
  inflating: /content/dataset/casia-webface/010371/00485632.jpg  
  inflating: /content/dataset/casia-webface/010371/00485633.jpg  
  inflating: /content/dataset/casia-webface/010371/00485634.jpg  
  inflating: /content/dataset/casia-webface/010371/00485635.jpg  
  inflating: /content/dataset/casia-webface/010371/00485636.jpg  
  inflating: /content/dataset/casia-webface/010372/00485637.jpg  
  inflating

### **Import Library**

In [None]:
import tensorflow as tf
import numpy as np
import os
import matplotlib.pyplot as plt
import cv2 as cv
import random
from sklearn.model_selection import train_test_split
from skimage.feature import hog
from skimage import data, exposure
from sklearn.decomposition import PCA


### **Load Dataset**

In [None]:
def load_dataset(dataset_path, min_face_per_person=100, classes=None, img_size=(112, 112)):
    images, labels = [], []
    folder_names = sorted(
        [folder for folder in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, folder))]
    )[:classes]

    for folder_name in folder_names:
        folder_path = os.path.join(dataset_path, folder_name)

        img_names = os.listdir(folder_path)
        if len(img_names) >= min_face_per_person:
            for img_name in img_names:
                img_path = os.path.join(folder_path, img_name)
                img = cv.imread(img_path)

                if img is not None:
                    img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
                    img = cv.resize(img, img_size)
                    images.append(img)
                    labels.append(folder_name)

    return np.array(images), np.array(labels)  # Mengembalikan array gambar dan label

# Parameter dataset path dan nilai min_face_per_person
DATASET_PATH = '/content/dataset/casia-webface/'  # Path ke dataset
NUM_SAMPLES = 100  # Jumlah sampel
MIN_FACE_PER_PERSON = 100  # Minimum jumlah wajah per orang

# Load dataset
images, labels = load_dataset(DATASET_PATH, MIN_FACE_PER_PERSON)

print(f"Images shape: {images.shape}")  # Menampilkan bentuk array gambar


Images shape: (180600, 112, 112)


### **Feature extraction**

In [None]:
def extract_hog_features(images, orientations=8, pixels_per_cell=(16, 16), cells_per_block=(1, 1)):
    hog_features = []  # Inisialisasi list untuk menyimpan fitur HOG
    for image in images:
        # Ekstraksi fitur HOG untuk setiap gambar grayscale
        fd = hog(image,
                 orientations=orientations,
                 pixels_per_cell=pixels_per_cell,
                 cells_per_block=cells_per_block,
                 block_norm='L2-Hys',
                 feature_vector=True)  # Mendapatkan vektor fitur HOG
        hog_features.append(fd)  # Menambahkan fitur HOG ke list
    return np.array(hog_features)  # Mengembalikan fitur HOG sebagai array

# Fungsi untuk ekstraksi gabungan HOG + PCA
def extract_hog_pca_features(images, n_components=50, orientations=8, pixels_per_cell=(16, 16), cells_per_block=(1, 1)):
    # Ekstrak fitur HOG
    hog_features = extract_hog_features(images, orientations, pixels_per_cell, cells_per_block)

    # Terapkan PCA pada fitur HOG untuk mengurangi dimensi
    pca = PCA(n_components=n_components)  # Inisialisasi PCA dengan jumlah komponen yang ditentukan
    hog_pca_features = pca.fit_transform(hog_features)  # Mengaplikasikan PCA untuk reduksi dimensi

    # Gabungkan fitur asli HOG dengan hasil PCA
    combined_features = np.hstack((hog_features, hog_pca_features))  # Menggabungkan fitur HOG dan fitur PCA

    return combined_features, pca  # Mengembalikan fitur gabungan dan model PCA

# Contoh pemakaian
# Misalnya, images sudah berisi dataset gambar dalam bentuk grayscale
combined_features, pca_model = extract_hog_pca_features(images, n_components=50)

print(f"Combined features shape: {combined_features.shape}")  # Menampilkan bentuk array fitur gabungan


Combined features shape: (180600, 442)


### **Encoding Label**

In [None]:
# Encode the labels
unique_labels = np.unique(labels)  # Mendapatkan label unik dari dataset
labels_map = {label: i for i, label in enumerate(unique_labels)}  # Membuat peta label ke indeks numerik
y_encoded = np.array([labels_map[label] for label in labels])  # Mengonversi label menjadi format numerik

y_categorical = tf.keras.utils.to_categorical(y_encoded)  # Mengonversi label ke one-hot encoding

print(f"y_categorical.shape: {y_categorical.shape}")  # Menampilkan bentuk array y_categorical


y_categorical.shape: (180600, 921)


### **Split Dataset**

In [None]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(combined_features, y_categorical, test_size=0.2, random_state=42)  # Membagi dataset menjadi data latih dan data uji dengan proporsi 80:20

# Normalize the images
X_train = X_train.astype('float32') / 255.0  # Normalisasi data latih dengan membagi setiap elemen dengan 255
X_test = X_test.astype('float32') / 255.0  # Normalisasi data uji dengan membagi setiap elemen dengan 255

print(f"X_train.shape: {X_train.shape}")  # Menampilkan bentuk array X_train
print(f"X_test.shape: {X_test.shape}")  # Menampilkan bentuk array X_test


X_train.shape: (144480, 442)
X_test.shape: (36120, 442)


### **Build Model ANN**

In [None]:
# Create the model
model = tf.keras.models.Sequential()  # Inisialisasi model Sequential dari Keras
model.add(tf.keras.layers.Dense(256, activation='relu', input_shape=(X_train.shape[1],)))  # Layer Dense dengan 256 unit dan aktivasi ReLU
model.add(tf.keras.layers.BatchNormalization())  # Normalisasi batch untuk stabilitas pelatihan
model.add(tf.keras.layers.Dropout(0.5))  # Dropout untuk mengurangi overfitting dengan 50% dari unit
model.add(tf.keras.layers.Dense(128, activation='relu'))  # Layer Dense dengan 128 unit dan aktivasi ReLU
model.add(tf.keras.layers.BatchNormalization())  # Normalisasi batch
model.add(tf.keras.layers.Dropout(0.5))  # Dropout untuk mengurangi overfitting
model.add(tf.keras.layers.Dense(64, activation='relu'))  # Layer Dense dengan 64 unit dan aktivasi ReLU
model.add(tf.keras.layers.BatchNormalization())  # Normalisasi batch
model.add(tf.keras.layers.Dropout(0.3))  # Dropout untuk mengurangi overfitting dengan 30% dari unit
model.add(tf.keras.layers.Dense(len(unique_labels), activation='softmax'))  # Layer output dengan unit sebanyak label unik dan aktivasi softmax

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])  # Mengompilasi model dengan optimizer Adam dan loss categorical_crossentropy
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)  # Early stopping untuk menghentikan pelatihan jika val_loss tidak membaik dalam 10 epoch

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


### **Trainning Model**

In [None]:
model.fit(
    X_train,  # Data latih
    y_train,  # Label untuk data latih
    epochs=30,  # Jumlah epoch untuk pelatihan
    batch_size=128,  # Ukuran batch untuk pelatihan
    validation_split=0.2,  # Persentase data latih yang digunakan sebagai data validasi (20%)
    callbacks=[early_stopping]  # Menggunakan callback early stopping yang telah didefinisikan sebelumnya
)

Epoch 1/30
[1m903/903[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 15ms/step - accuracy: 0.0071 - loss: 6.6296 - val_accuracy: 0.0308 - val_loss: 5.9044
Epoch 2/30
[1m903/903[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 15ms/step - accuracy: 0.0299 - loss: 5.9267 - val_accuracy: 0.0386 - val_loss: 5.7871
Epoch 3/30
[1m903/903[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 12ms/step - accuracy: 0.0451 - loss: 5.6574 - val_accuracy: 0.0744 - val_loss: 5.3720
Epoch 4/30
[1m903/903[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 13ms/step - accuracy: 0.0544 - loss: 5.5302 - val_accuracy: 0.0876 - val_loss: 5.2113
Epoch 5/30
[1m903/903[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 12ms/step - accuracy: 0.0603 - loss: 5.4584 - val_accuracy: 0.1003 - val_loss: 5.1092
Epoch 6/30
[1m903/903[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 13ms/step - accuracy: 0.0638 - loss: 5.4115 - val_accuracy: 0.1055 - val_loss: 5.0574
Epoch 7/30
[1m9

<keras.src.callbacks.history.History at 0x7c023509a380>

### **Evaluate Model**

In [None]:
loss, accuracy = model.evaluate(X_test, y_test)  # Mengevaluasi model menggunakan data uji dan label uji
print(f'Accuracy: {accuracy * 100:.2f}%')  # Menampilkan akurasi model dalam persentase

[1m1129/1129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.1285 - loss: 4.8720
Accuracy: 13.21%
