In [16]:
import os
import cv2
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score


In [18]:
def load_data_from_folders(base_path, image_size=64, max_images_per_class=None):
    X = []
    y = []

    classes = ['Cat', 'Dog']  # Folder names = class labels

    for label, class_name in enumerate(classes):
        folder_path = os.path.join(base_path, class_name)
        image_files = os.listdir(folder_path)

        if max_images_per_class:
            image_files = image_files[:max_images_per_class]

        for img_name in tqdm(image_files, desc=f"Loading {class_name} images"):
            img_path = os.path.join(folder_path, img_name)

            img = cv2.imread(img_path)
            if img is None:
                continue  # Skip unreadable/corrupt files

            img = cv2.resize(img, (image_size, image_size))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            X.append(img.flatten())
            y.append(label)  # 0 = Cat, 1 = Dog

    return np.array(X), np.array(y)


In [20]:
DATASET_PATH = "."  # <-- Change if needed
IMAGE_SIZE = 64

# Limit images for faster testing (adjust as needed)
X, y = load_data_from_folders(DATASET_PATH, image_size=IMAGE_SIZE, max_images_per_class=2000)

print("Shape of X:", X.shape)
print("Shape of y:", y.shape)


Loading Cat images: 100%|██████████████████████████████████████████████████████████| 2000/2000 [00:35<00:00, 55.97it/s]
Loading Dog images: 100%|██████████████████████████████████████████████████████████| 2000/2000 [00:35<00:00, 57.07it/s]

Shape of X: (3987, 4096)
Shape of y: (3987,)





In [22]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [None]:
model = SVC(kernel='linear')  # Try 'rbf' kernel for non-linear boundaries
model.fit(X_train, y_train)


In [12]:
y_pred = model.predict(X_test)

from sklearn.metrics import accuracy_score, classification_report

acc = accuracy_score(y_test, y_pred)
print("Accuracy:", acc)
print("Formatted Accuracy: {:.2f}%".format(acc * 100))

print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=["Cat", "Dog"]))


Accuracy: 0.5238095238095238
Formatted Accuracy: 52.38%
Classification Report:
              precision    recall  f1-score   support

         Cat       0.52      0.54      0.53       200
         Dog       0.52      0.51      0.52       199

    accuracy                           0.52       399
   macro avg       0.52      0.52      0.52       399
weighted avg       0.52      0.52      0.52       399



In [14]:
import joblib
joblib.dump(model, "svm_cat_dog_model.pkl")


['svm_cat_dog_model.pkl']