# Support Vector Machine to classify images of cats and Dogs

### Environment Setup

In [1]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import concurrent.futures

### Load Dataset

In [2]:
# Define paths to train and test sets
train_dir = r"D:\College Shitz\Code Files\Python\Prodigy Infotech\Task 03\Dataset\Train"
test_dir = r"D:\College Shitz\Code Files\Python\Prodigy Infotech\Task 03\Dataset\Test"

# Function to load a single image (helper function for multithreading)
def load_single_image(img_file, directory, label=None):
    img_path = os.path.join(directory, img_file)
    
    # Check if the file is an image and can be loaded
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  # Load image in grayscale
    if img is None:  # Check if the image is empty (failed to load)
        print(f"Warning: {img_file} could not be loaded. Skipping...")
        return None, None  # Return None for both image and label
    
    img = cv2.resize(img, (64, 64))  # Resize image to 64x64 pixels
    return img, label

In [3]:
# Function to load and process images from a directory using multithreading
def load_images_from_directory(directory, label):
    images = []
    labels = []
    img_files = os.listdir(directory)

    # Multithreading to load images faster
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = [executor.submit(load_single_image, img_file, directory, label) for img_file in img_files]
        for future in concurrent.futures.as_completed(futures):
            img, lbl = future.result()
            if img is not None:  # Skip if the image failed to load
                images.append(img)
                labels.append(lbl)
    
    return np.array(images), np.array(labels)

In [4]:
# Function to load and process test images (without labels)
def load_test_images(test_directory):
    images = []
    # Iterate over the subdirectories (e.g., 'Cat', 'Dog') inside the main test directory
    for subdir in os.listdir(test_directory):
        subdir_path = os.path.join(test_directory, subdir)
        
        # Skip non-directory files (like logs or hidden files)
        if not os.path.isdir(subdir_path):
            continue
        
        img_files = os.listdir(subdir_path)  # List all image files in the subdirectory
        
        # Multithreading to load images faster
        with concurrent.futures.ThreadPoolExecutor() as executor:
            futures = [executor.submit(load_single_image, img_file, subdir_path) for img_file in img_files]
            for future in concurrent.futures.as_completed(futures):
                img, _ = future.result()  # No label for test images
                images.append(img)

    return np.array(images)

# Load the test images (without labels) for both training and display purposes
X_test_original = load_test_images(test_dir)  # Keep original images for visualization

# Print the list of files in the test directory to verify
print("Files in the test directory:", os.listdir(test_dir))

# Filter out any None values from X_test_original (images that failed to load)
X_test_original = [img for img in X_test_original if img is not None]

# If no valid images are found, raise an error or handle the situation
if len(X_test_original) == 0:
    print("No valid test images were loaded!")
    print("Please check the directory and file types.")
else:
    print(f"Loaded {len(X_test_original)} test images.")

# Flatten the images for model input
X_test = np.array(X_test_original).reshape(len(X_test_original), -1)  # Flatten for model input

Files in the test directory: ['Cat', 'Dog']
Loaded 6250 test images.


In [5]:
# Load and process images for Cats and Dogs from train set
train_cats_dir = os.path.join(train_dir, 'cat')
train_dogs_dir = os.path.join(train_dir, 'dog')

train_images_cats, train_labels_cats = load_images_from_directory(train_cats_dir, 'cat')
train_images_dogs, train_labels_dogs = load_images_from_directory(train_dogs_dir, 'dog')

# Combine the cat and dog images and labels
X_train = np.concatenate((train_images_cats, train_images_dogs), axis=0)
y_train = np.concatenate((train_labels_cats, train_labels_dogs), axis=0)

In [6]:
# Load and process labeled test images from directories (cat and dog)
def load_test_images_from_directories(test_dir):
    images = []
    labels = []
    
    # Loop through cat and dog folders inside the test directory
    test_cats_dir = os.path.join(test_dir, 'cat')
    test_dogs_dir = os.path.join(test_dir, 'dog')
    
    # Load and process cat images
    cat_files = os.listdir(test_cats_dir)
    for img_file in cat_files:
        img, _ = load_single_image(img_file, test_cats_dir, 'cat')  # Label as 'cat'
        images.append(img)
        labels.append('cat')
        
    # Load and process dog images
    dog_files = os.listdir(test_dogs_dir)
    for img_file in dog_files:
        img, _ = load_single_image(img_file, test_dogs_dir, 'dog')  # Label as 'dog'
        images.append(img)
        labels.append('dog')
    
    return np.array(images), np.array(labels)


# Load the labeled test images and their labels
X_test, y_test = load_test_images_from_directories(test_dir)

### Image Preprocessing

In [7]:
# Initialize and fit the LabelEncoder
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)  # Fit label_encoder on the training labels

# Preprocess the data (Flatten images, encode labels)
X_train = X_train.reshape(X_train.shape[0], -1)  # Flatten 64x64 to 1D array
X_test = X_test.reshape(X_test.shape[0], -1)  # Flatten 64x64 to 1D array

# Encode the test labels ('cat' and 'dog') to numeric values
y_test = label_encoder.transform(y_test)  # Use the same label encoder as for the training set

# Encode the labels ('cat' and 'dog') to numeric values
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)  # 0 for cat, 1 for dog

# Scale the features for better performance with SVM
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### Implementing SVM

In [8]:
from sklearn.svm import LinearSVC

# Initialize the LinearSVC model (faster than SVC with linear kernel)
svm_model = LinearSVC(random_state=42, max_iter=10000)

# Train the model
svm_model.fit(X_train, y_train)

In [None]:
# Make predictions on the test set
y_pred = svm_model.predict(X_test)

# Decode the predicted labels back to the original class labels ('cat' and 'dog')
y_pred_labels = label_encoder.inverse_transform(y_pred)

# Display the images along with predicted labels as "Cat" or "Dog"
for i, (img, label) in enumerate(zip(X_test_original, y_pred_labels)):
    # Convert numeric label to original label (Cat or Dog)
    label_name = label_encoder.inverse_transform([label])[0]
    
    plt.imshow(img, cmap='gray')  # Use the original image for display
    plt.title(f"Predicted: {label_name}")  # Display 'Cat' or 'Dog'
    plt.axis('off')  # Turn off axes
    plt.show()

### Model Evaluatuion

In [10]:
# Evaluate the model using accuracy and classification report
print("Accuracy on test set:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy on test set: 0.7736
Classification Report:
              precision    recall  f1-score   support

           0       0.77      0.77      0.77      3125
           1       0.77      0.78      0.77      3125

    accuracy                           0.77      6250
   macro avg       0.77      0.77      0.77      6250
weighted avg       0.77      0.77      0.77      6250

