In [7]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import pandas as pd

In [4]:
# Fungsi untuk memuat gambar dari folder dan augmentasi
def load_images_from_folder_with_augmentation(folder, img_size=(150, 150), augment_times=9):
    datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )

    images = []
    labels = []
    label_map = {}
    label_counter = 0
    
    for person_name in os.listdir(folder):
        person_folder = os.path.join(folder, person_name)
        if os.path.isdir(person_folder):
            if person_name not in label_map:
                label_map[person_name] = label_counter
                label_counter += 1
            
            for filename in os.listdir(person_folder):
                img_path = os.path.join(person_folder, filename)
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                if img is not None:
                    img = cv2.resize(img, img_size)
                    img_flatten = img.flatten()
                    images.append(img_flatten)
                    labels.append(label_map[person_name])
                    
                    # Augmentasi gambar
                    img = img.reshape((1,) + img.shape + (1,))
                    aug_iter = datagen.flow(img, batch_size=1)
                    for _ in range(augment_times):
                        aug_img = next(aug_iter)[0].reshape(img_size)
                        aug_img_flatten = aug_img.flatten()
                        images.append(aug_img_flatten)
                        labels.append(label_map[person_name])
    
    return np.array(images), np.array(labels), label_map

In [5]:

# Path ke folder gambar
image_folder = '10_images'

# Load gambar dan label dengan augmentasi
X, y, label_map = load_images_from_folder_with_augmentation(image_folder)

# Cetak informasi dataset
print(f'Total images: {len(X)}')
print(f'Total labels: {len(y)}')
print(f'Label map: {label_map}')

# Membagi dataset menjadi data latih dan data uji
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


Total images: 500
Total labels: 500
Label map: {'Colin_Powell': 0, 'Donald_Rumsfeld': 1, 'George_W_Bush': 2, 'Gerhard_Schroeder': 3, 'Tony_Blair': 4}


In [8]:
pd.DataFrame(X_train)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22490,22491,22492,22493,22494,22495,22496,22497,22498,22499
0,57.075657,70.457001,82.241875,89.037498,91.581482,92.301521,92.417999,92.139359,91.648949,90.726700,...,235.000000,235.000000,235.000000,235.000000,235.000000,235.000000,235.000000,235.000000,235.000000,235.000000
1,158.550293,159.141968,159.733658,160.325333,160.917023,161.000000,161.000000,161.000000,161.000000,161.000000,...,150.383698,150.474182,151.065872,151.489136,151.430649,151.319733,150.804337,151.024277,151.615952,152.103821
2,109.478096,146.956512,173.661865,185.198730,182.131699,173.884079,169.000000,168.458420,165.000000,165.742828,...,94.771378,93.559540,69.782318,39.616726,21.862371,18.626953,25.830603,26.043333,21.818909,18.445862
3,64.182205,63.918121,63.654034,63.389946,63.125858,62.861771,62.597687,62.333599,62.069511,61.805424,...,44.000000,43.091667,41.977341,40.863018,40.000000,40.365635,41.479961,42.000000,42.708614,43.822937
4,75.062828,77.986382,80.909943,84.111328,88.009399,91.907471,96.006927,100.879524,100.373177,66.405640,...,48.239441,47.205063,46.230541,45.256023,45.478996,46.128674,46.778355,48.284096,50.233135,52.182171
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,217.000000,217.000000,217.000000,217.000000,217.000000,217.000000,217.000000,217.000000,217.000000,217.000000,...,144.259583,145.404266,146.165100,146.119858,145.364929,144.746536,144.543427,144.256546,143.541565,142.362228
396,49.000000,52.000000,57.000000,61.000000,67.000000,75.000000,83.000000,91.000000,100.000000,111.000000,...,20.000000,27.000000,31.000000,33.000000,32.000000,31.000000,33.000000,34.000000,34.000000,35.000000
397,8.920183,11.456613,14.872373,19.096685,22.558399,25.648634,28.614161,31.993931,36.459461,41.779690,...,47.429985,49.780121,52.044773,53.816017,55.512348,57.306934,59.682808,62.219090,64.540199,67.317039
398,168.000000,168.000000,168.000000,168.039047,168.144928,168.250809,168.356674,168.462555,168.568436,168.674316,...,100.358986,105.810997,109.537003,112.776260,117.657516,122.323265,125.168015,127.000000,127.000000,127.000000


In [12]:
# Pipeline dengan standard scaler dan SVM
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', SVC())
])

# Definisikan parameter grid untuk GridSearchCV
param_grid = {
    'svm__kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'svm__C': [0.001, 0.01, 0.1, 1, 10, 100],
    # 'svm__gamma': ['scale', 'auto'],  # digunakan untuk 'rbf', 'poly', dan 'sigmoid'
    # 'svm__degree': [2, 3, 4],  # relevan untuk kernel 'poly'
    # 'svm__coef0': [0.0, 0.1, 0.5, 1.0],  # digunakan untuk 'poly' dan 'sigmoid'
    'svm__tol': [1e-3, 1e-4, 1e-5],
    'svm__class_weight': [None, 'balanced'],
    'svm__decision_function_shape': ['ovo', 'ovr']
}


# Inisialisasi GridSearchCV
grid_search = GridSearchCV(pipeline, param_grid, cv=2, verbose=2, n_jobs=-1)

# Melatih model menggunakan GridSearchCV
grid_search.fit(X_train, y_train)

# Cetak parameter terbaik dan skor terbaik
print("Best parameters found: ", grid_search.best_params_)
print("Best cross-validation score: ", grid_search.best_score_)

# Evaluasi model terbaik pada data uji
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Prediksi pada data latih (untuk melihat overfitting)
y_train_pred = best_model.predict(X_train)
print("Training Accuracy:", accuracy_score(y_train, y_train_pred))


Fitting 2 folds for each of 288 candidates, totalling 576 fits
