In [10]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/dogs-vs-cats/test1.zip
/kaggle/input/dogs-vs-cats/train.zip
/kaggle/input/dogs-vs-cats/sampleSubmission.csv


In [11]:
# 📁 Task 03 – Dogs vs Cats Classification using Support Vector Machine (SVM)

# ✅ Step 1: Import Libraries
import os
import numpy as np
import matplotlib.pyplot as plt
import cv2
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

In [18]:
# ✅ Step 2: Unzip Dataset (if not already extracted)
import zipfile
import os

zip_path = "/kaggle/input/dogs-vs-cats/train.zip"
extract_path = "/kaggle/working/train"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

In [19]:
# ✅ Step 3: Load and Preprocess Images

import numpy as np
import cv2
from tqdm import tqdm
import random

IMG_SIZE = 64
data = []

# All files are in the extracted directory
image_dir = extract_path + "/train"  # this is now correct

# Check path exists
print("Loaded from:", image_dir)
print("Total files found:", len(os.listdir(image_dir)))

# Only load a subset if needed for speed (e.g., 5000)
for img_name in tqdm(os.listdir(image_dir)[:5000]):
    label = 0 if img_name.startswith("dog") else 1
    img_path = os.path.join(image_dir, img_name)
    try:
        img_array = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        resized = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
        data.append([resized, label])
    except:
        continue

print("Total images loaded:", len(data))

Loaded from: /kaggle/working/train/train
Total files found: 25000


100%|██████████| 5000/5000 [00:03<00:00, 1453.46it/s]


Total images loaded: 5000


In [23]:
# ✅ Step 4: Shuffle & Prepare Data
import random

random.shuffle(data)
X = []
y = []

for features, label in data:
    X.append(features)
    y.append(label)

X = np.array(X).reshape(-1, IMG_SIZE * IMG_SIZE) / 255.0  # flatten & normalize
y = np.array(y)


In [25]:
# ✅ Step 5: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [26]:
# ✅ Step 6: Train SVM Classifier
svm = SVC(kernel='linear', C=1.0)
svm.fit(X_train, y_train)


In [27]:
# ✅ Step 7: Evaluate Model
y_pred = svm.predict(X_test)
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


Classification Report:
               precision    recall  f1-score   support

           0       0.53      0.51      0.52       496
           1       0.53      0.55      0.54       504

    accuracy                           0.53      1000
   macro avg       0.53      0.53      0.53      1000
weighted avg       0.53      0.53      0.53      1000


Confusion Matrix:
 [[254 242]
 [228 276]]


In [28]:
# ✅ Step 8: Save the Model
import joblib

joblib.dump(svm, 'svm_dogs_vs_cats.pkl')
np.save('X_test.npy', X_test)
np.save('y_test.npy', y_test)
