# 6. Suport Vector Machine

In [None]:
from util import get_image_by_index, decode_class
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.svm import SVC

In [None]:
# Load the .npz file
data = np.load('dataset_features.npz')

# List all arrays within the .npz file
print(data.files)

# Access individual arrays by their names
X_train = data['trainset_features']
y_train = data['trainset_labels']

X_val = data['validset_features']
y_val = data['validset_labels']

X_test = data['testset_features']
y_test = data['testset_labels']

class_labels = data['class_labels']

In [None]:
# Create SVM with linear kernel
svc_linear = SVC(kernel='linear')

# Fit estimator
svc_linear.fit(X_train, decode_class(y_train))

In [None]:
print('Model Accuracy:')
accuracy = svc_linear.score(X_train, decode_class(y_train))
print(f'On train set: {accuracy:.3f}')
accuracy = svc_linear.score(X_val, decode_class(y_val))
print(f'On valid set: {accuracy:.3f}')
accuracy = svc_linear.score(X_test, decode_class(y_test))
print(f'On test  set: {accuracy:.3f}')

## SVM classifier with RBF kernel

In [None]:
# Create SVM with RBF kernel
svc_rbf = SVC(kernel='rbf', C=1, probability=True)

# Fit estimator
svc_rbf.fit(X_train, decode_class(y_train))

In [None]:
print('Model Accuracy:')
accuracy = svc_rbf.score(X_train, decode_class(y_train))
print(f'On train set: {accuracy:.3f}')
accuracy = svc_rbf.score(X_val, decode_class(y_val))
print(f'On valid set: {accuracy:.3f}')
accuracy = svc_rbf.score(X_test, decode_class(y_test))
print(f'On test  set: {accuracy:.3f}')

The classifier with the RBF kernel performs similarly to the one with the linear kernel. 

## Compute image probability

In [None]:
# Define image size and scaling
image_size = (224, 224)
batch_size = 32

datagen = ImageDataGenerator(rescale=1./255)  # Adjust rescaling if needed
test_images = datagen.flow_from_directory('./test', target_size=image_size, batch_size=batch_size, shuffle=False)

In [None]:
random_indices = np.random.choice(test_images.n, 10, replace=False)  # Select 10 random indices  
probabilities = []

for idx in random_indices:
    image, label = get_image_by_index(test_images, idx)
    probabilities.append(svc_rbf.predict_proba(X_test[idx,:].reshape(1,-1))[0]) 

In [None]:
# Plot the images and probabilities
fig, axes = plt.subplots(1, 10, figsize=(20, 10))  # 2 rows, 5 columns
for k, idx in enumerate(random_indices):
    # Plot the image
    axes[k].imshow(get_image_by_index(test_images, idx)[0])  # Adjust reshape for image dimensions
    axes[k].set_title(test_images.filenames[idx].split('/')[1])
    axes[k].axis('off')

plt.tight_layout()

# Create a DataFrame to store probabilities and their corresponding categories
categories = class_labels  # Replace with actual category names if available
probs_df = pd.DataFrame(probabilities, columns=categories)
probs_df


## Regularization parameter C = 0.0001

In [None]:
# Create SVM with RBF kernel
svc_rbf = SVC(kernel='rbf', C=0.0001, probability=True)

# Fit estimator
svc_rbf.fit(X_train, decode_class(y_train))

probabilities = []
for idx in random_indices:
    image, label = get_image_by_index(test_images, idx)
    probabilities.append(svc_rbf.predict_proba(X_test[idx,:].reshape(1,-1))[0]) 

In [None]:
# Plot the images and probabilities
fig, axes = plt.subplots(1, 10, figsize=(20, 10))  # 2 rows, 5 columns
for k, idx in enumerate(random_indices):
    # Plot the image
    axes[k].imshow(get_image_by_index(test_images, idx)[0])  # Adjust reshape for image dimensions
    axes[k].set_title(test_images.filenames[idx].split('/')[1])
    axes[k].axis('off')

plt.tight_layout()

# Create a DataFrame to store probabilities and their corresponding categories
categories = class_labels  # Replace with actual category names if available
probs_df = pd.DataFrame(probabilities, columns=categories)
probs_df

## Regularization parameter C = 1000

In [None]:
# Create SVM with RBF kernel
svc_rbf = SVC(kernel='rbf', C=1000, probability=True)

# Fit estimator
svc_rbf.fit(X_train, decode_class(y_train))

probabilities = []
for idx in random_indices:
    image, label = get_image_by_index(test_images, idx)
    probabilities.append(svc_rbf.predict_proba(X_test[idx,:].reshape(1,-1))[0]) 

In [None]:
# Plot the images and probabilities
fig, axes = plt.subplots(1, 10, figsize=(20, 10))  # 2 rows, 5 columns
for k, idx in enumerate(random_indices):
    # Plot the image
    axes[k].imshow(get_image_by_index(test_images, idx)[0])  # Adjust reshape for image dimensions
    axes[k].set_title(test_images.filenames[idx].split('/')[1])
    axes[k].axis('off')

plt.tight_layout()

# Create a DataFrame to store probabilities and their corresponding categories
categories = class_labels  # Replace with actual category names if available
probs_df = pd.DataFrame(probabilities, columns=categories)
probs_df