# SIFT + SVM Model

## Import Libraries, Modules, and Data

In [1]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt

from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.cluster import MiniBatchKMeans
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report

### Loading The Data

In [2]:
path_ds = '25-ds-casia-ss-tvt'

In [3]:
def ss_imgload(folder):
    images = []
    for filename in tqdm(os.listdir(folder), desc=f'Loading images from {folder}'):
        img_path = os.path.join(folder, filename)
        if os.path.isfile(img_path):
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is not None:
                # Resize the image to (256, 256)
                img = cv2.resize(img, (256, 256))
                
                # Apply CLAHE
                clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
                img = clahe.apply(img)
                
                images.append(img)
    return images

In [4]:
def ss_dataset(path_ds):
    ds = {'train': {'Au': [], 'Tp': []},
               'validation': {'Au': [], 'Tp': []},
               'test': {'Au': [], 'Tp': []}}
    
    for split in ds.keys():
        for label in ds[split].keys():
            folder = os.path.join(path_ds, split, label)
            images = ss_imgload(folder)
            descriptors_list = [ss_sift(img) for img in tqdm(images, desc=f'Extracting SIFT features from {label} images in {split} set')]
            ds[split][label] = descriptors_list
    
    return ds

## SIFT Algorithm

In [5]:
def ss_sift(image):
    sift = cv2.SIFT_create()
    keypoints, descriptors = sift.detectAndCompute(image, None)
    return descriptors

In [6]:
ds = ss_dataset(path_ds)

Loading images from 25-ds-casia-ss-tvt\train\Au: 100%|████████████████████████████| 1313/1313 [00:01<00:00, 841.48it/s]
Extracting SIFT features from Au images in train set: 100%|████████████████████████| 1313/1313 [00:15<00:00, 84.99it/s]
Loading images from 25-ds-casia-ss-tvt\train\Tp: 100%|██████████████████████████████| 898/898 [00:01<00:00, 685.34it/s]
Extracting SIFT features from Tp images in train set: 100%|██████████████████████████| 898/898 [00:10<00:00, 82.92it/s]
Loading images from 25-ds-casia-ss-tvt\validation\Au: 100%|████████████████████████| 375/375 [00:00<00:00, 1027.17it/s]
Extracting SIFT features from Au images in validation set: 100%|█████████████████████| 375/375 [00:04<00:00, 82.73it/s]
Loading images from 25-ds-casia-ss-tvt\validation\Tp: 100%|█████████████████████████| 257/257 [00:00<00:00, 663.94it/s]
Extracting SIFT features from Tp images in validation set: 100%|█████████████████████| 257/257 [00:03<00:00, 81.65it/s]
Loading images from 25-ds-casia-ss-tvt\t

### Bag of Visual Words

In [7]:
def ss_bovw(descriptors_list, vocab_size):
    # Stack all descriptors vertically in a numpy array
    all_descriptors = np.vstack([desc for desc_list in descriptors_list for desc in desc_list if desc is not None])
    
    # Perform k-means clustering to create a vocabulary
    kmeans = MiniBatchKMeans(n_clusters=vocab_size, random_state=0)
    kmeans.fit(all_descriptors)
    
    return kmeans

In [8]:
def ss_bovw_histogram(image_descriptors, kmeans):
    if image_descriptors is None:
        return np.zeros(len(kmeans.cluster_centers_))
    
    # Predict the cluster for each descriptor
    vocab = kmeans.predict(image_descriptors)
    
    # Build a histogram of words
    histogram, _ = np.histogram(vocab, bins=np.arange(0, len(kmeans.cluster_centers_) + 1))
    
    return histogram

In [14]:
def ss_features_labels(ds, kmeans):
    features = []
    labels = []
    for split in ds.keys():
        for label in ds[split].keys():
            for descriptors in tqdm(ds[split][label], desc=f'Building histograms for {label} images in {split} set'):
                histogram = ss_bovw_histogram(descriptors, kmeans)
                features.append(histogram)
                labels.append(0 if label == 'Au' else 1)
    return np.array(features), np.array(labels)

In [15]:
# Build the vocabulary using the training set
train_descriptors_list = ds['train']['Au'] + ds['train']['Tp']
vocab_size = 100  # Adjust as needed
kmeans = ss_bovw(train_descriptors_list, vocab_size)

In [16]:
# Prepare features and labels for training, validation, and test sets
X_train, y_train = ss_features_labels({'train': ds['train']}, kmeans)
X_val, y_val = ss_features_labels({'validation': ds['validation']}, kmeans)
X_test, y_test = ss_features_labels({'test': ds['test']}, kmeans)

Building histograms for Au images in train set: 100%|████████████████████████████| 1313/1313 [00:00<00:00, 1350.52it/s]
Building histograms for Tp images in train set: 100%|██████████████████████████████| 898/898 [00:00<00:00, 1297.40it/s]
Building histograms for Au images in validation set: 100%|█████████████████████████| 375/375 [00:00<00:00, 1388.58it/s]
Building histograms for Tp images in validation set: 100%|█████████████████████████| 257/257 [00:00<00:00, 1374.02it/s]
Building histograms for Au images in test set: 100%|███████████████████████████████| 186/186 [00:00<00:00, 1452.80it/s]
Building histograms for Tp images in test set: 100%|███████████████████████████████| 126/126 [00:00<00:00, 1447.96it/s]


In [17]:
# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

## SVM

In [18]:
# Train the SVM model
svm = SVC(kernel='linear', random_state=42)
svm.fit(X_train, y_train)

## Evaluation

In [20]:
# Evaluate the model on the validation set
y_train_pred = svm.predict(X_train)
print("Training Set Performance")
print(classification_report(y_train, y_train_pred))

Training Set Performance
              precision    recall  f1-score   support

           0       0.62      0.92      0.74      1313
           1       0.61      0.18      0.28       898

    accuracy                           0.62      2211
   macro avg       0.62      0.55      0.51      2211
weighted avg       0.62      0.62      0.55      2211



In [21]:
# Evaluate the model on the validation set
y_val_pred = svm.predict(X_val)
print("Validation Set Performance")
print(classification_report(y_val, y_val_pred))

Validation Set Performance
              precision    recall  f1-score   support

           0       0.61      0.93      0.74       375
           1       0.56      0.13      0.21       257

    accuracy                           0.60       632
   macro avg       0.58      0.53      0.47       632
weighted avg       0.59      0.60      0.52       632



In [22]:
# Evaluate the model on the test set
y_test_pred = svm.predict(X_test)
print("Test Set Performance")
print(classification_report(y_test, y_test_pred))

Test Set Performance
              precision    recall  f1-score   support

           0       0.61      0.95      0.74       186
           1       0.58      0.11      0.19       126

    accuracy                           0.61       312
   macro avg       0.60      0.53      0.46       312
weighted avg       0.60      0.61      0.52       312

