In [5]:
from sklearn.metrics import ConfusionMatrixDisplay
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
from sklearn import svm, metrics, datasets
from sklearn.utils import Bunch
from sklearn.model_selection import GridSearchCV, train_test_split
from skimage.io import imread
from skimage.transform import resize


In [6]:

def load_image_files(container_path, dimension=(64, 64), max_images_per_class=1000):
    """
    Load a limited number of image files with categories as subfolder names 
    which performs like scikit-learn sample dataset
    
    Parameters
    ----------
    container_path : string or unicode
        Path to the main folder holding one subfolder per category
    dimension : tuple
        size to which images are adjusted to
    max_images_per_class : int
        Maximum number of images to load per class
        
    Returns
    -------
    Bunch
    """
    image_dir = Path(container_path)
    folders = [directory for directory in image_dir.iterdir() if directory.is_dir()]
    categories = [folder.name for folder in folders]

    descr = "A image classification dataset"
    images = []
    flat_data = []
    target = []
    
    for i, direc in enumerate(folders):
        count = 0  # Counter to limit number of images per class
        for file in direc.iterdir():
            if count >= max_images_per_class:
                break  # Stop if we've reached the limit for this class
            img = imread(file)
            img_resized = resize(img, dimension, anti_aliasing=True, mode='reflect')
            flat_data.append(img_resized.flatten()) 
            images.append(img_resized)
            target.append(i)
            count += 1  # Increment the counter
            print(count, i)
        
    flat_data = np.array(flat_data)
    target = np.array(target)
    images = np.array(images)

    return Bunch(data=flat_data,
                 target=target,
                 target_names=categories,
                 images=images,
                 DESCR=descr)

In [7]:
param_grids = [
    {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
    {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
    {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'coef0': [0, 1, 10], 'kernel': ['sigmoid']}
]
image_dataset = load_image_files('/path/to/dataset')
X_train, X_test, y_train, y_test = train_test_split(
    image_dataset.data, image_dataset.target, test_size=0.3, random_state=109)

for grid in param_grids:
    svc = svm.SVC(random_state=42)
    
    # Perform GridSearchCV with the kernel
    clf = GridSearchCV(svc, grid, cv=5)
    clf.fit(X_train, y_train)
    
    # Predict the labels for the test set
    y_pred = clf.predict(X_test)
    
    # Display confusion matrix with correct labels
    disp = ConfusionMatrixDisplay.from_estimator(
        clf, X_test, y_test,
        display_labels=image_dataset.target_names,
        values_format='d'
    )
    
    # Rotate the x-axis labels
    plt.xticks(rotation=90, ha='right')
    
    # Show the confusion matrix
    plt.show()
    
    # Print the classification report
    print("Classification report for - \n{}:\n{}\n".format(
        clf, metrics.classification_report(y_test, y_pred)
    ))



FileNotFoundError: [Errno 2] No such file or directory: '/path/to/dataset'