In [30]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [31]:
# Fungsi untuk memuat gambar dari folder dan augmentasi
def load_images_from_folder_with_augmentation(folder, img_size=(150, 150), augment_times=10):
    datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )

    images = []
    labels = []
    label_map = {}
    label_counter = 0
    
    for person_name in os.listdir(folder):
        person_folder = os.path.join(folder, person_name)
        if os.path.isdir(person_folder):
            if person_name not in label_map:
                label_map[person_name] = label_counter
                label_counter += 1
            
            for filename in os.listdir(person_folder):
                img_path = os.path.join(person_folder, filename)
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                if img is not None:
                    img = cv2.resize(img, img_size)
                    img_flatten = img.flatten()
                    images.append(img_flatten)
                    labels.append(label_map[person_name])
                    
                    # Augmentasi gambar
                    img = img.reshape((1,) + img.shape + (1,))
                    aug_iter = datagen.flow(img, batch_size=1)
                    for _ in range(augment_times):
                        aug_img = next(aug_iter)[0].reshape(img_size)
                        aug_img_flatten = aug_img.flatten()
                        images.append(aug_img_flatten)
                        labels.append(label_map[person_name])
    
    return np.array(images), np.array(labels), label_map

In [32]:

# Path ke folder gambar
image_folder = '10_images'

# Load gambar dan label dengan augmentasi
X, y, label_map = load_images_from_folder_with_augmentation(image_folder)

# Cetak informasi dataset
print(f'Total images: {len(X)}')
print(f'Total labels: {len(y)}')
print(f'Label map: {label_map}')

# Membagi dataset menjadi data latih dan data uji
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


Total images: 550
Total labels: 550
Label map: {'Colin_Powell': 0, 'Donald_Rumsfeld': 1, 'George_W_Bush': 2, 'Gerhard_Schroeder': 3, 'Tony_Blair': 4}


In [None]:
# Pipeline dengan standard scaler dan SVM
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', SVC())
])

# Definisikan parameter grid untuk GridSearchCV
param_grid = {
    'svm__kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'svm__C': [0.001, 0.01, 0.1, 1, 10, 100],
    'svm__gamma': ['scale', 'auto'],  # digunakan untuk 'rbf', 'poly', dan 'sigmoid'
    'svm__degree': [2, 3, 4],  # relevan untuk kernel 'poly'
    'svm__coef0': [0.0, 0.1, 0.5, 1.0],  # digunakan untuk 'poly' dan 'sigmoid'
    'svm__tol': [1e-3, 1e-4, 1e-5],
    'svm__class_weight': [None, 'balanced'],
    'svm__decision_function_shape': ['ovo', 'ovr']
}


# Inisialisasi GridSearchCV
grid_search = GridSearchCV(pipeline, param_grid, cv=2, verbose=2, n_jobs=-1)

# Melatih model menggunakan GridSearchCV
grid_search.fit(X_train, y_train)

# Cetak parameter terbaik dan skor terbaik
print("Best parameters found: ", grid_search.best_params_)
print("Best cross-validation score: ", grid_search.best_score_)

# Evaluasi model terbaik pada data uji
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Prediksi pada data latih (untuk melihat overfitting)
y_train_pred = best_model.predict(X_train)
print("Training Accuracy:", accuracy_score(y_train, y_train_pred))


Fitting 2 folds for each of 6912 candidates, totalling 13824 fits
Best parameters found:  {'svm__C': 10, 'svm__class_weight': None, 'svm__coef0': 0.0, 'svm__decision_function_shape': 'ovo', 'svm__degree': 2, 'svm__gamma': 'scale', 'svm__kernel': 'rbf', 'svm__tol': 0.001}
Best cross-validation score:  0.7
Test Accuracy: 0.8090909090909091
Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.94      0.92        31
           1       0.90      0.78      0.84        23
           2       0.55      0.73      0.63        15
           3       0.80      0.94      0.86        17
           4       0.83      0.62      0.71        24

    accuracy                           0.81       110
   macro avg       0.80      0.80      0.79       110
weighted avg       0.82      0.81      0.81       110

Training Accuracy: 1.0
