In [7]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [8]:
# Fungsi untuk memuat gambar dari folder
def load_images_from_folder(folder, img_size=(150, 150)):
    images = []
    labels = []
    label_map = {}
    label_counter = 0
    
    for person_name in os.listdir(folder):
        person_folder = os.path.join(folder, person_name)
        if os.path.isdir(person_folder):
            if person_name not in label_map:
                label_map[person_name] = label_counter
                label_counter += 1
            
            for filename in os.listdir(person_folder):
                img_path = os.path.join(person_folder, filename)
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                if img is not None:
                    img = cv2.resize(img, img_size)
                    img_flatten = img.flatten()
                    images.append(img_flatten)
                    labels.append(label_map[person_name])
    
    return np.array(images), np.array(labels), label_map

In [9]:
# Fungsi untuk augmentasi gambar
def augment_images(images, labels, label_map, img_size=(150, 150), augment_times=100):
    datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )

    augmented_images = []
    augmented_labels = []
    
    for i, img in enumerate(images):
        label = labels[i]
        img = img.reshape(img_size + (1,))
        img = img.reshape((1,) + img.shape)
        aug_iter = datagen.flow(img, batch_size=1)
        
        for _ in range(augment_times):
            aug_img = next(aug_iter)[0].reshape(img_size)
            aug_img_flatten = aug_img.flatten()
            augmented_images.append(aug_img_flatten)
            augmented_labels.append(label)
    
    return np.array(augmented_images), np.array(augmented_labels)

In [10]:

# Path ke folder gambar
image_folder = '10_images'

# Load gambar dan label tanpa augmentasi
X, y, label_map = load_images_from_folder(image_folder)

# Cetak informasi dataset
print(f'Total images: {len(X)}')
print(f'Total labels: {len(y)}')
print(f'Label map: {label_map}')

# Membagi dataset menjadi data latih dan data uji
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Augmentasi data latih
X_train_aug, y_train_aug = augment_images(X_train, y_train, label_map)

# Gabungkan data latih asli dengan data augmentasi
X_train_combined = np.concatenate((X_train, X_train_aug))
y_train_combined = np.concatenate((y_train, y_train_aug))

Total images: 30
Total labels: 30
Label map: {'Colin_Powell': 0, 'Gerhard_Schroeder': 1, 'Tony_Blair': 2}


In [11]:
y_test

array([2, 1, 2, 0, 1, 0])

In [12]:
# Pipeline dengan standard scaler dan SVM
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', SVC())
])

# Definisikan parameter grid untuk GridSearchCV
param_grid = {
    'svm__kernel': ['rbf'],
    'svm__C': [0.01, 0.1, 1, 10],
    'svm__gamma': ['scale']  # hanya digunakan jika kernel adalah 'rbf'
}

# Inisialisasi GridSearchCV
grid_search = GridSearchCV(pipeline, param_grid, cv=2, verbose=2, n_jobs=-1)

# Melatih model menggunakan GridSearchCV
grid_search.fit(X_train_combined, y_train_combined)

# Cetak parameter terbaik dan skor terbaik
print("Best parameters found: ", grid_search.best_params_)
print("Best cross-validation score: ", grid_search.best_score_)

# Evaluasi model terbaik pada data uji
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Prediksi pada data latih (untuk melihat overfitting)
y_train_pred = best_model.predict(X_train_combined)
print("Training Accuracy:", accuracy_score(y_train_combined, y_train_pred))

Fitting 2 folds for each of 4 candidates, totalling 8 fits
Best parameters found:  {'svm__C': 10, 'svm__gamma': 'scale', 'svm__kernel': 'rbf'}
Best cross-validation score:  0.5903465346534653
Test Accuracy: 0.6666666666666666
Classification Report:
               precision    recall  f1-score   support

           0       0.67      1.00      0.80         2
           1       1.00      0.50      0.67         2
           2       0.50      0.50      0.50         2

    accuracy                           0.67         6
   macro avg       0.72      0.67      0.66         6
weighted avg       0.72      0.67      0.66         6

Training Accuracy: 1.0
