In [None]:
#Import all the libraries

import os
import numpy as np
import pathlib

from skimage.io import imread
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.decomposition import PCA

In [None]:
# Define the path to the directory containing the image folders
data_dir = pathlib.Path('./images/').with_suffix('')

Categories = ['Broken soybeans', 'Immature soybeans', 'Intact soybeans', 'Skin-damaged soybeans', 'Spotted soybeans']
image_data = []  # List to store flattened image vectors
labels = []  # List to store category labels


for category in Categories:
    print(f'Loading category: {category}')
    path = os.path.join(data_dir, category)
    
    # Loop through each image in the category folder
    for img_filename in os.listdir(path):
        img_path = os.path.join(path, img_filename)
        
        # Read the image using scikit-image
        img = imread(img_path)
        
        # Flatten the image and add it to the data
        image_data.append(img.flatten())
        
        # Add the label (category) to the labels list
        labels.append(category)

image_data = np.array(image_data)
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

In [None]:
n_components = 50
pca = PCA(n_components=n_components, random_state=42)
image_data_pca = pca.fit_transform(image_data)

In [None]:
# 80-20 split
X_train_pca, X_test_pca, y_train, y_test = train_test_split(image_data_pca, encoded_labels, test_size=0.2, random_state=42)

# Create an SVM model with a linear kernel
svm_model = SVC(kernel='linear', random_state=42)

# Train the SVM model on the PCA-transformed training data
svm_model.fit(X_train_pca, y_train)

# Evaluate the svm_model on the PCA-transformed testing data
y_pred_pca = svm_model.predict(X_test_pca)
accuracy_pca = accuracy_score(y_test, y_pred_pca)
report_pca = classification_report(y_test, y_pred_pca)

print(f'Accuracy with PCA: {accuracy_pca}')
print(f'Classification Report with PCA:\n{report_pca}')