<a href="https://colab.research.google.com/github/mahithabushap/Cyber-Search-Engine-/blob/main/CALTECH101.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# DATA LOADING AND PREPROCESSING


**caltech-101**
1. Download and extract the dataset:

In [None]:
!wget https://data.caltech.edu/records/mzrjq-6wc02/files/caltech-101.zip
!unzip caltech-101.zip
!tar -xvzf caltech-101/101_ObjectCategories.tar.gz -C caltech-101/

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
101_ObjectCategories/chair/image_0005.jpg
101_ObjectCategories/chair/image_0006.jpg
101_ObjectCategories/chair/image_0007.jpg
101_ObjectCategories/chair/image_0008.jpg
101_ObjectCategories/chair/image_0010.jpg
101_ObjectCategories/chair/image_0011.jpg
101_ObjectCategories/chair/image_0012.jpg
101_ObjectCategories/chair/image_0013.jpg
101_ObjectCategories/chair/image_0014.jpg
101_ObjectCategories/chair/image_0016.jpg
101_ObjectCategories/chair/image_0017.jpg
101_ObjectCategories/chair/image_0018.jpg
101_ObjectCategories/chair/image_0019.jpg
101_ObjectCategories/chair/image_0020.jpg
101_ObjectCategories/chair/image_0022.jpg
101_ObjectCategories/chair/image_0023.jpg
101_ObjectCategories/chair/image_0024.jpg
101_ObjectCategories/chair/image_0025.jpg
101_ObjectCategories/chair/image_0026.jpg
101_ObjectCategories/chair/image_0028.jpg
101_ObjectCategories/chair/image_0029.jpg
101_ObjectCategories/chair/image_0030.jpg
101_ObjectC

# IMPORTING LIBRARIES

In [None]:
import cv2
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score

# DATA PREP

#Load and Resize Grayscale Images


In [None]:
def load_images_from_folder(folder, max_images=30):
    images, labels, label_map = [], [], {}
    label_idx = 0

    for class_name in sorted(os.listdir(folder)):
        class_path = os.path.join(folder, class_name)
        if not os.path.isdir(class_path): continue

        count = 0
        for fname in os.listdir(class_path):
            if not fname.endswith(".jpg") and not fname.endswith(".jpeg") and not fname.endswith(".png"):
                continue
            if count >= max_images:
                break
            img_path = os.path.join(class_path, fname)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is not None:
                img = cv2.resize(img, (300, 300))
                images.append(img)
                labels.append(label_idx)
                count += 1
        label_map[label_idx] = class_name
        label_idx += 1

    return images, labels, label_map

# Use your extracted path
images, labels, label_map = load_images_from_folder('caltech-101/101_ObjectCategories', max_images=30)
print(f"Loaded {len(images)} images from {len(label_map)} classes.")

Loaded 3060 images from 102 classes.


# SIFT FEATURE EXTRACTION

Dense SIFT Feature Extraction

In [None]:
# Use OpenCV's SIFT (enabled via opencv-contrib)
sift = cv2.SIFT_create()

def extract_dense_sift(img, step_size=8, patch_size=16):
    keypoints = []
    for y in range(0, img.shape[0], step_size):
        for x in range(0, img.shape[1], step_size):
            keypoints.append(cv2.KeyPoint(x, y, patch_size))
    keypoints, descriptors = sift.compute(img, keypoints)
    return descriptors


In [None]:
# Use OpenCV's SIFT (enabled via opencv-contrib)
sift = cv2.SIFT_create()

def extract_dense_sift(img, step_size=8, patch_size=16):
    keypoints = []
    for y in range(0, img.shape[0], step_size):
        for x in range(0, img.shape[1], step_size):
            keypoints.append(cv2.KeyPoint(x, y, patch_size))
    keypoints, descriptors = sift.compute(img, keypoints)
    return descriptors


In [None]:
test_desc = extract_dense_sift(images[0])
print(f"Dense SIFT shape: {test_desc.shape}")

Dense SIFT shape: (1444, 128)


In [None]:

from sklearn.cluster import MiniBatchKMeans

# Collect descriptors from a subset of images
desc_list = []
for img in images[:100]:  # speed: use only first 100 images
    desc = extract_dense_sift(img)
    if desc is not None:
        desc_list.append(desc)

# Stack all descriptors
all_desc = np.vstack(desc_list)
print(f"Total descriptors: {all_desc.shape}")

# Build vocabulary
k = 200  # number of visual words
kmeans = MiniBatchKMeans(n_clusters=k, batch_size=1000, random_state=42)
kmeans.fit(all_desc)

Total descriptors: (144400, 128)


In [None]:
def compute_spatial_pyramid(img, vocab, levels=2, k=200, step_size=8):
    descriptors = extract_dense_sift(img, step_size=step_size)
    if descriptors is None:
        return np.zeros(k * (4**(levels+1) - 1) // 3)

    height, width = img.shape
    # Re-create grid to match SIFT point locations
    keypoints = [cv2.KeyPoint(x, y, 16)
                 for y in range(0, height, step_size)
                 for x in range(0, width, step_size)]
    points = np.array([[kp.pt[0], kp.pt[1]] for kp in keypoints])

    word_labels = vocab.predict(descriptors)  # Assign visual word

    def region_hist(xmin, xmax, ymin, ymax):
        mask = (points[:, 0] >= xmin) & (points[:, 0] < xmax) & \
               (points[:, 1] >= ymin) & (points[:, 1] < ymax)
        region_words = word_labels[mask]
        hist = np.bincount(region_words, minlength=k)
        return hist

    pyramid = []
    for level in range(levels + 1):
        num_cells = 2 ** level
        cell_w, cell_h = width / num_cells, height / num_cells
        weight = 1 / (2 ** (levels - level))

        for i in range(num_cells):
            for j in range(num_cells):
                x0 = int(j * cell_w)
                x1 = int((j + 1) * cell_w)
                y0 = int(i * cell_h)
                y1 = int((i + 1) * cell_h)
                h = region_hist(x0, x1, y0, y1)
                pyramid.append(weight * h)

    return np.concatenate(pyramid)


In [None]:
X = [compute_spatial_pyramid(img, kmeans, levels=2, k=k) for img in images]
X = np.array(X)
y = np.array(labels)

print("Feature matrix shape:", X.shape)


Feature matrix shape: (3060, 4200)


# TRAINING MODEL

In [None]:
from collections import defaultdict

# Organizing data by class label
grouped_images = defaultdict(list)
for img, label in zip(images, labels):
    grouped_images[label].append(img)


In [None]:
import cv2
import numpy as np
import os
import random
from collections import defaultdict
from sklearn.svm import LinearSVC
from sklearn.cluster import MiniBatchKMeans
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import normalize
from sklearn.svm import SVC
from tqdm import tqdm



#  Training and Evaluation

n_runs = 10
per_class_accuracies = []
k = 300  # Higher vocabulary size

for run in range(n_runs):
    print(f"\nRun {run + 1}")
    X_train, y_train, X_test, y_test = [], [], [], []

    #for label, imgs in grouped_images.items():
    for label, imgs in tqdm(grouped_images.items(), desc="Processing images"):
        #if len(imgs) < 31:
        #    continue  # Skip classes with too few images

        imgs_copy = imgs.copy()
        random.shuffle(imgs_copy)
        train_imgs = imgs_copy[:15]
        test_imgs = imgs_copy[15:]

        for img in train_imgs:
            features = compute_spatial_pyramid(img, kmeans, levels=3, k=k)
            if features is not None:
                X_train.append(features)
                y_train.append(label)

        for img in test_imgs:
            features = compute_spatial_pyramid(img, kmeans, levels=3, k=k)
            if features is not None:
                X_test.append(features)
                y_test.append(label)

    print(f"Training on {len(X_train)} samples, testing on {len(X_test)}")

    #for label, imgs in grouped_images.items():
    # for label, imgs in tqdm(grouped_images.items(), desc="Processing images"):
    #   #if len(imgs) <= 31:
    #   #    continue

    #   imgs_copy = imgs.copy()
    #   random.shuffle(imgs_copy)
    #   train_imgs = imgs_copy[:15]
    #   test_imgs = imgs_copy[15:]


    # Normalize features
    X_train = normalize(X_train)
    X_test = normalize(X_test)

    # Train RBF kernel SVM
    clf = SVC(kernel='rbf', C=10, gamma='scale')  # You can try C=1 or C=10
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    # Compute per-class accuracy
    correct = defaultdict(int)
    total = defaultdict(int)

    for true, pred in zip(y_test, y_pred):
        if true == pred:
            correct[true] += 1
        total[true] += 1
    print(total)
    class_accs = [correct[l] / total[l] if total[l] > 0 else 0 for l in sorted(grouped_images.keys())]
    mean_per_class_acc = np.mean(class_accs)
    per_class_accuracies.append(mean_per_class_acc)
    print(f"Mean per-class accuracy: {mean_per_class_acc:.4f}")








Run 1


Processing images: 100%|██████████| 102/102 [19:17<00:00, 11.35s/it]


Training on 1530 samples, testing on 1530
defaultdict(<class 'int'>, {0: 15, 1: 15, 2: 15, 3: 15, 4: 15, 5: 15, 6: 15, 7: 15, 8: 15, 9: 15, 10: 15, 11: 15, 12: 15, 13: 15, 14: 15, 15: 15, 16: 15, 17: 15, 18: 15, 19: 15, 20: 15, 21: 15, 22: 15, 23: 15, 24: 15, 25: 15, 26: 15, 27: 15, 28: 15, 29: 15, 30: 15, 31: 15, 32: 15, 33: 15, 34: 15, 35: 15, 36: 15, 37: 15, 38: 15, 39: 15, 40: 15, 41: 15, 42: 15, 43: 15, 44: 15, 45: 15, 46: 15, 47: 15, 48: 15, 49: 15, 50: 15, 51: 15, 52: 15, 53: 15, 54: 15, 55: 15, 56: 15, 57: 15, 58: 15, 59: 15, 60: 15, 61: 15, 62: 15, 63: 15, 64: 15, 65: 15, 66: 15, 67: 15, 68: 15, 69: 15, 70: 15, 71: 15, 72: 15, 73: 15, 74: 15, 75: 15, 76: 15, 77: 15, 78: 15, 79: 15, 80: 15, 81: 15, 82: 15, 83: 15, 84: 15, 85: 15, 86: 15, 87: 15, 88: 15, 89: 15, 90: 15, 91: 15, 92: 15, 93: 15, 94: 15, 95: 15, 96: 15, 97: 15, 98: 15, 99: 15, 100: 15, 101: 15})
Mean per-class accuracy: 0.4026

Run 2


Processing images: 100%|██████████| 102/102 [19:22<00:00, 11.39s/it]


Training on 1530 samples, testing on 1530
defaultdict(<class 'int'>, {0: 15, 1: 15, 2: 15, 3: 15, 4: 15, 5: 15, 6: 15, 7: 15, 8: 15, 9: 15, 10: 15, 11: 15, 12: 15, 13: 15, 14: 15, 15: 15, 16: 15, 17: 15, 18: 15, 19: 15, 20: 15, 21: 15, 22: 15, 23: 15, 24: 15, 25: 15, 26: 15, 27: 15, 28: 15, 29: 15, 30: 15, 31: 15, 32: 15, 33: 15, 34: 15, 35: 15, 36: 15, 37: 15, 38: 15, 39: 15, 40: 15, 41: 15, 42: 15, 43: 15, 44: 15, 45: 15, 46: 15, 47: 15, 48: 15, 49: 15, 50: 15, 51: 15, 52: 15, 53: 15, 54: 15, 55: 15, 56: 15, 57: 15, 58: 15, 59: 15, 60: 15, 61: 15, 62: 15, 63: 15, 64: 15, 65: 15, 66: 15, 67: 15, 68: 15, 69: 15, 70: 15, 71: 15, 72: 15, 73: 15, 74: 15, 75: 15, 76: 15, 77: 15, 78: 15, 79: 15, 80: 15, 81: 15, 82: 15, 83: 15, 84: 15, 85: 15, 86: 15, 87: 15, 88: 15, 89: 15, 90: 15, 91: 15, 92: 15, 93: 15, 94: 15, 95: 15, 96: 15, 97: 15, 98: 15, 99: 15, 100: 15, 101: 15})
Mean per-class accuracy: 0.4183

Run 3


Processing images: 100%|██████████| 102/102 [19:21<00:00, 11.39s/it]


Training on 1530 samples, testing on 1530
defaultdict(<class 'int'>, {0: 15, 1: 15, 2: 15, 3: 15, 4: 15, 5: 15, 6: 15, 7: 15, 8: 15, 9: 15, 10: 15, 11: 15, 12: 15, 13: 15, 14: 15, 15: 15, 16: 15, 17: 15, 18: 15, 19: 15, 20: 15, 21: 15, 22: 15, 23: 15, 24: 15, 25: 15, 26: 15, 27: 15, 28: 15, 29: 15, 30: 15, 31: 15, 32: 15, 33: 15, 34: 15, 35: 15, 36: 15, 37: 15, 38: 15, 39: 15, 40: 15, 41: 15, 42: 15, 43: 15, 44: 15, 45: 15, 46: 15, 47: 15, 48: 15, 49: 15, 50: 15, 51: 15, 52: 15, 53: 15, 54: 15, 55: 15, 56: 15, 57: 15, 58: 15, 59: 15, 60: 15, 61: 15, 62: 15, 63: 15, 64: 15, 65: 15, 66: 15, 67: 15, 68: 15, 69: 15, 70: 15, 71: 15, 72: 15, 73: 15, 74: 15, 75: 15, 76: 15, 77: 15, 78: 15, 79: 15, 80: 15, 81: 15, 82: 15, 83: 15, 84: 15, 85: 15, 86: 15, 87: 15, 88: 15, 89: 15, 90: 15, 91: 15, 92: 15, 93: 15, 94: 15, 95: 15, 96: 15, 97: 15, 98: 15, 99: 15, 100: 15, 101: 15})
Mean per-class accuracy: 0.4092

Run 4


Processing images: 100%|██████████| 102/102 [19:24<00:00, 11.42s/it]


Training on 1530 samples, testing on 1530
defaultdict(<class 'int'>, {0: 15, 1: 15, 2: 15, 3: 15, 4: 15, 5: 15, 6: 15, 7: 15, 8: 15, 9: 15, 10: 15, 11: 15, 12: 15, 13: 15, 14: 15, 15: 15, 16: 15, 17: 15, 18: 15, 19: 15, 20: 15, 21: 15, 22: 15, 23: 15, 24: 15, 25: 15, 26: 15, 27: 15, 28: 15, 29: 15, 30: 15, 31: 15, 32: 15, 33: 15, 34: 15, 35: 15, 36: 15, 37: 15, 38: 15, 39: 15, 40: 15, 41: 15, 42: 15, 43: 15, 44: 15, 45: 15, 46: 15, 47: 15, 48: 15, 49: 15, 50: 15, 51: 15, 52: 15, 53: 15, 54: 15, 55: 15, 56: 15, 57: 15, 58: 15, 59: 15, 60: 15, 61: 15, 62: 15, 63: 15, 64: 15, 65: 15, 66: 15, 67: 15, 68: 15, 69: 15, 70: 15, 71: 15, 72: 15, 73: 15, 74: 15, 75: 15, 76: 15, 77: 15, 78: 15, 79: 15, 80: 15, 81: 15, 82: 15, 83: 15, 84: 15, 85: 15, 86: 15, 87: 15, 88: 15, 89: 15, 90: 15, 91: 15, 92: 15, 93: 15, 94: 15, 95: 15, 96: 15, 97: 15, 98: 15, 99: 15, 100: 15, 101: 15})
Mean per-class accuracy: 0.4163

Run 5


Processing images: 100%|██████████| 102/102 [19:22<00:00, 11.40s/it]


Training on 1530 samples, testing on 1530
defaultdict(<class 'int'>, {0: 15, 1: 15, 2: 15, 3: 15, 4: 15, 5: 15, 6: 15, 7: 15, 8: 15, 9: 15, 10: 15, 11: 15, 12: 15, 13: 15, 14: 15, 15: 15, 16: 15, 17: 15, 18: 15, 19: 15, 20: 15, 21: 15, 22: 15, 23: 15, 24: 15, 25: 15, 26: 15, 27: 15, 28: 15, 29: 15, 30: 15, 31: 15, 32: 15, 33: 15, 34: 15, 35: 15, 36: 15, 37: 15, 38: 15, 39: 15, 40: 15, 41: 15, 42: 15, 43: 15, 44: 15, 45: 15, 46: 15, 47: 15, 48: 15, 49: 15, 50: 15, 51: 15, 52: 15, 53: 15, 54: 15, 55: 15, 56: 15, 57: 15, 58: 15, 59: 15, 60: 15, 61: 15, 62: 15, 63: 15, 64: 15, 65: 15, 66: 15, 67: 15, 68: 15, 69: 15, 70: 15, 71: 15, 72: 15, 73: 15, 74: 15, 75: 15, 76: 15, 77: 15, 78: 15, 79: 15, 80: 15, 81: 15, 82: 15, 83: 15, 84: 15, 85: 15, 86: 15, 87: 15, 88: 15, 89: 15, 90: 15, 91: 15, 92: 15, 93: 15, 94: 15, 95: 15, 96: 15, 97: 15, 98: 15, 99: 15, 100: 15, 101: 15})
Mean per-class accuracy: 0.3974

Run 6


Processing images: 100%|██████████| 102/102 [19:22<00:00, 11.39s/it]


Training on 1530 samples, testing on 1530
defaultdict(<class 'int'>, {0: 15, 1: 15, 2: 15, 3: 15, 4: 15, 5: 15, 6: 15, 7: 15, 8: 15, 9: 15, 10: 15, 11: 15, 12: 15, 13: 15, 14: 15, 15: 15, 16: 15, 17: 15, 18: 15, 19: 15, 20: 15, 21: 15, 22: 15, 23: 15, 24: 15, 25: 15, 26: 15, 27: 15, 28: 15, 29: 15, 30: 15, 31: 15, 32: 15, 33: 15, 34: 15, 35: 15, 36: 15, 37: 15, 38: 15, 39: 15, 40: 15, 41: 15, 42: 15, 43: 15, 44: 15, 45: 15, 46: 15, 47: 15, 48: 15, 49: 15, 50: 15, 51: 15, 52: 15, 53: 15, 54: 15, 55: 15, 56: 15, 57: 15, 58: 15, 59: 15, 60: 15, 61: 15, 62: 15, 63: 15, 64: 15, 65: 15, 66: 15, 67: 15, 68: 15, 69: 15, 70: 15, 71: 15, 72: 15, 73: 15, 74: 15, 75: 15, 76: 15, 77: 15, 78: 15, 79: 15, 80: 15, 81: 15, 82: 15, 83: 15, 84: 15, 85: 15, 86: 15, 87: 15, 88: 15, 89: 15, 90: 15, 91: 15, 92: 15, 93: 15, 94: 15, 95: 15, 96: 15, 97: 15, 98: 15, 99: 15, 100: 15, 101: 15})
Mean per-class accuracy: 0.4131

Run 7


Processing images: 100%|██████████| 102/102 [19:20<00:00, 11.37s/it]


Training on 1530 samples, testing on 1530
defaultdict(<class 'int'>, {0: 15, 1: 15, 2: 15, 3: 15, 4: 15, 5: 15, 6: 15, 7: 15, 8: 15, 9: 15, 10: 15, 11: 15, 12: 15, 13: 15, 14: 15, 15: 15, 16: 15, 17: 15, 18: 15, 19: 15, 20: 15, 21: 15, 22: 15, 23: 15, 24: 15, 25: 15, 26: 15, 27: 15, 28: 15, 29: 15, 30: 15, 31: 15, 32: 15, 33: 15, 34: 15, 35: 15, 36: 15, 37: 15, 38: 15, 39: 15, 40: 15, 41: 15, 42: 15, 43: 15, 44: 15, 45: 15, 46: 15, 47: 15, 48: 15, 49: 15, 50: 15, 51: 15, 52: 15, 53: 15, 54: 15, 55: 15, 56: 15, 57: 15, 58: 15, 59: 15, 60: 15, 61: 15, 62: 15, 63: 15, 64: 15, 65: 15, 66: 15, 67: 15, 68: 15, 69: 15, 70: 15, 71: 15, 72: 15, 73: 15, 74: 15, 75: 15, 76: 15, 77: 15, 78: 15, 79: 15, 80: 15, 81: 15, 82: 15, 83: 15, 84: 15, 85: 15, 86: 15, 87: 15, 88: 15, 89: 15, 90: 15, 91: 15, 92: 15, 93: 15, 94: 15, 95: 15, 96: 15, 97: 15, 98: 15, 99: 15, 100: 15, 101: 15})
Mean per-class accuracy: 0.4131

Run 8


Processing images: 100%|██████████| 102/102 [19:20<00:00, 11.38s/it]


Training on 1530 samples, testing on 1530
defaultdict(<class 'int'>, {0: 15, 1: 15, 2: 15, 3: 15, 4: 15, 5: 15, 6: 15, 7: 15, 8: 15, 9: 15, 10: 15, 11: 15, 12: 15, 13: 15, 14: 15, 15: 15, 16: 15, 17: 15, 18: 15, 19: 15, 20: 15, 21: 15, 22: 15, 23: 15, 24: 15, 25: 15, 26: 15, 27: 15, 28: 15, 29: 15, 30: 15, 31: 15, 32: 15, 33: 15, 34: 15, 35: 15, 36: 15, 37: 15, 38: 15, 39: 15, 40: 15, 41: 15, 42: 15, 43: 15, 44: 15, 45: 15, 46: 15, 47: 15, 48: 15, 49: 15, 50: 15, 51: 15, 52: 15, 53: 15, 54: 15, 55: 15, 56: 15, 57: 15, 58: 15, 59: 15, 60: 15, 61: 15, 62: 15, 63: 15, 64: 15, 65: 15, 66: 15, 67: 15, 68: 15, 69: 15, 70: 15, 71: 15, 72: 15, 73: 15, 74: 15, 75: 15, 76: 15, 77: 15, 78: 15, 79: 15, 80: 15, 81: 15, 82: 15, 83: 15, 84: 15, 85: 15, 86: 15, 87: 15, 88: 15, 89: 15, 90: 15, 91: 15, 92: 15, 93: 15, 94: 15, 95: 15, 96: 15, 97: 15, 98: 15, 99: 15, 100: 15, 101: 15})
Mean per-class accuracy: 0.4105

Run 9


Processing images: 100%|██████████| 102/102 [19:26<00:00, 11.43s/it]


Training on 1530 samples, testing on 1530
defaultdict(<class 'int'>, {0: 15, 1: 15, 2: 15, 3: 15, 4: 15, 5: 15, 6: 15, 7: 15, 8: 15, 9: 15, 10: 15, 11: 15, 12: 15, 13: 15, 14: 15, 15: 15, 16: 15, 17: 15, 18: 15, 19: 15, 20: 15, 21: 15, 22: 15, 23: 15, 24: 15, 25: 15, 26: 15, 27: 15, 28: 15, 29: 15, 30: 15, 31: 15, 32: 15, 33: 15, 34: 15, 35: 15, 36: 15, 37: 15, 38: 15, 39: 15, 40: 15, 41: 15, 42: 15, 43: 15, 44: 15, 45: 15, 46: 15, 47: 15, 48: 15, 49: 15, 50: 15, 51: 15, 52: 15, 53: 15, 54: 15, 55: 15, 56: 15, 57: 15, 58: 15, 59: 15, 60: 15, 61: 15, 62: 15, 63: 15, 64: 15, 65: 15, 66: 15, 67: 15, 68: 15, 69: 15, 70: 15, 71: 15, 72: 15, 73: 15, 74: 15, 75: 15, 76: 15, 77: 15, 78: 15, 79: 15, 80: 15, 81: 15, 82: 15, 83: 15, 84: 15, 85: 15, 86: 15, 87: 15, 88: 15, 89: 15, 90: 15, 91: 15, 92: 15, 93: 15, 94: 15, 95: 15, 96: 15, 97: 15, 98: 15, 99: 15, 100: 15, 101: 15})
Mean per-class accuracy: 0.4039

Run 10


Processing images: 100%|██████████| 102/102 [19:26<00:00, 11.44s/it]


Training on 1530 samples, testing on 1530
defaultdict(<class 'int'>, {0: 15, 1: 15, 2: 15, 3: 15, 4: 15, 5: 15, 6: 15, 7: 15, 8: 15, 9: 15, 10: 15, 11: 15, 12: 15, 13: 15, 14: 15, 15: 15, 16: 15, 17: 15, 18: 15, 19: 15, 20: 15, 21: 15, 22: 15, 23: 15, 24: 15, 25: 15, 26: 15, 27: 15, 28: 15, 29: 15, 30: 15, 31: 15, 32: 15, 33: 15, 34: 15, 35: 15, 36: 15, 37: 15, 38: 15, 39: 15, 40: 15, 41: 15, 42: 15, 43: 15, 44: 15, 45: 15, 46: 15, 47: 15, 48: 15, 49: 15, 50: 15, 51: 15, 52: 15, 53: 15, 54: 15, 55: 15, 56: 15, 57: 15, 58: 15, 59: 15, 60: 15, 61: 15, 62: 15, 63: 15, 64: 15, 65: 15, 66: 15, 67: 15, 68: 15, 69: 15, 70: 15, 71: 15, 72: 15, 73: 15, 74: 15, 75: 15, 76: 15, 77: 15, 78: 15, 79: 15, 80: 15, 81: 15, 82: 15, 83: 15, 84: 15, 85: 15, 86: 15, 87: 15, 88: 15, 89: 15, 90: 15, 91: 15, 92: 15, 93: 15, 94: 15, 95: 15, 96: 15, 97: 15, 98: 15, 99: 15, 100: 15, 101: 15})
Mean per-class accuracy: 0.3980


In [None]:
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Train-test split (70/30)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

# Train the linear SVM
clf = LinearSVC(max_iter=5000)
clf.fit(X_train, y_train)

# Evaluate
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Test accuracy: {accuracy:.4f}")


Test accuracy: 0.4630
