In [1]:
import os
import numpy as np
import cv2
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from tqdm import tqdm
from skimage.feature import hog


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import zipfile
zip_path = "/content/drive/MyDrive/SVM_tester/dogs_vs_cats.zip"
with zipfile.ZipFile(zip_path,'r') as zip_ref:
  zip_ref.extractall("/content/data")



In [None]:
def extract_hog_features(data, image_size=(64, 64)):
    hog_features = []
    for img_flat in data:
        img = np.reshape(img_flat, image_size)
        features = hog(
            img,
            pixels_per_cell=(8, 8),
            cells_per_block=(2, 2),
            orientations=9,
            block_norm='L2-Hys',
            visualize=False,
            feature_vector=True
        )
        hog_features.append(features)
    return np.array(hog_features)


In [3]:
def load_images(folder, label, image_size=(64,64)):

  data=[]
  labels=[]

  for filename in tqdm(os.listdir(folder)): #Loop through each file (image) in the given folder


    img_path= os.path.join(folder, filename) #Joins folder path + filename to get full image path like:/content/data/PetImages/Cat/cat1.jpg
    try:
      img= cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) #reads imae using opencv grayscale: load it as b and w
      if img is None: #if img is corrupted
        continue
      img= cv2.resize(img, image_size) #resizing every img to same size to match shape
      data.append(img.flatten()) #converts 2d image to id array of 4096 values
      labels.append(label)
    except:
      continue
  return data, labels


In [None]:
cat_data, cat_labels = load_images('/content/data/PetImages/Cat', 0)
dog_data, dog_labels = load_images('/content/data/PetImages/Dog', 1)

X = np.array(cat_data + dog_data)
y = np.array(cat_labels + dog_labels)

cat_data_small = cat_data[:500]
dog_data_small = dog_data[:500]
cat_labels_small = cat_labels[:500]
dog_labels_small = dog_labels[:500]

X_small = np.array(cat_data_small + dog_data_small)
y_small = np.array(cat_labels_small + dog_labels_small)

X_hog = extract_hog_features(X_small)






100%|██████████| 12501/12501 [00:09<00:00, 1341.77it/s]
100%|██████████| 12501/12501 [00:10<00:00, 1236.81it/s]


In [None]:
from sklearn.decomposition import PCA

pca = PCA(n_components=100)  # Reduce to 100 meaningful features
X_pca = pca.fit_transform(X_small)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X_hog, y_small, test_size=0.2, random_state=42, stratify=y_small
)


model = SVC(kernel='linear')
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.73

Classification Report:
               precision    recall  f1-score   support

           0       0.77      0.66      0.71        50
           1       0.70      0.80      0.75        50

    accuracy                           0.73       100
   macro avg       0.73      0.73      0.73       100
weighted avg       0.73      0.73      0.73       100

