In [1]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA


In [1]:
import os
import cv2
import numpy as np
import dlib
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator

detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('../models/landmarks.dat')

def crop_face(img, detector, predictor):
    dets = detector(img, 1)
    if len(dets) > 0:
        for k, d in enumerate(dets):
            shape = predictor(img, d)
            x1, y1, x2, y2 = d.left(), d.top(), d.right(), d.bottom()
            return img[y1:y2, x1:x2]
    return img

def load_images(image_folder, target_size=(150, 150)):
    images = []
    labels = []
    classes = os.listdir(image_folder)
    
    for class_name in classes:
        class_path = os.path.join(image_folder, class_name)
        if os.path.isdir(class_path):
            for image_name in os.listdir(class_path):
                image_path = os.path.join(class_path, image_name)
                if image_path.endswith(('.png', '.jpg', '.jpeg')):
                    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
                    resized_img = cv2.resize(img, target_size)
                    images.append(resized_img)
                    labels.append(class_name)
                    
    images = np.array(images)
    labels = np.array(labels)
    return images, labels, classes

def oversample_with_augmentation(X_train, y_train, img_height=150, img_width=150, sample=50):
    datagen = ImageDataGenerator(
        rotation_range=10,
        width_shift_range=0.1,
        height_shift_range=0.1,
        zoom_range=0.1,
        horizontal_flip=True
    )
    X_train_augmented = []
    y_train_augmented = []
    
    for x, y in zip(X_train, y_train):
        x = x.reshape((1, img_height, img_width, 1))
        for _ in range(sample):
            augmented = datagen.flow(x, batch_size=1)
            X_train_augmented.append(augmented[0].reshape(img_height, img_width, 1))
            y_train_augmented.append(y)

    X_train_augmented = np.array(X_train_augmented)
    y_train_augmented = np.array(y_train_augmented)
    return X_train_augmented, y_train_augmented

def crop_augmented_images(X, detector, predictor, target_size=(150, 150)):
    X_cropped = []
    for img in X:
        img_cropped = crop_face(img.squeeze(), detector, predictor)
        if img_cropped.size != 0:
            img_resized = cv2.resize(img_cropped, target_size)
            X_cropped.append(img_resized.reshape(target_size[0], target_size[1], 1))
    return np.array(X_cropped)

def save_images(images, labels, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    for class_name in np.unique(labels):
        class_folder = os.path.join(output_folder, class_name)
        os.makedirs(class_folder, exist_ok=True)
    
    for idx, (img, label) in enumerate(zip(images, labels)):
        img_path = os.path.join(output_folder, label, f'image_{idx}.png')
        cv2.imwrite(img_path, img)

image_folder = '../images/10_data'
images, labels, classes = load_images(image_folder)

X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, stratify=labels, random_state=42)

X_train_augmented, y_train_augmented = oversample_with_augmentation(X_train, y_train)

X_train_cropped = crop_augmented_images(X_train_augmented, detector, predictor)
X_test_cropped = crop_augmented_images(X_test, detector, predictor)

save_images(X_train_cropped, y_train_augmented, '../images/split/training')
save_images(X_test_cropped, y_test, '../images/split/testing')


ImportError: cannot import name 'ImageDataGenerator' from 'keras.preprocessing.image' (c:\Users\luft\anaconda3\Lib\site-packages\keras\api\preprocessing\image\__init__.py)

In [3]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA()),
    ('svm', SVC(verbose=True))
], verbose=True)

param_grid = {
    'pca__n_components': [25 , 50, 100],
    'svm__kernel': ['linear', 'rbf'],
    'svm__C': [1, 10, 100],
    'svm__gamma': ['scale', 'auto'],
    'svm__class_weight': ['balanced'],
}


grid_search = GridSearchCV(pipeline, param_grid, cv=5, verbose=2, n_jobs=-1)
grid_search.fit(X_train, y_train)

print("Best parameters found: ", grid_search.best_params_)
print("Best cross-validation score: ", grid_search.best_score_)

best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Fitting 5 folds for each of 36 candidates, totalling 180 fits
