### `This Notebook is for classification Project "classify images of cats and dogs"`
###  Implement a support vector machine (SVM) to classify images of cats and dogs


### `By: Eng/ Khaled Ali Ahmed`

> [LinkedIn](www.linkedin.com/in/khaledalzebibi/)  <br />
> [GitHub](www.github.com/pykhaleda)   <br />
> [Facebook](www.facebook.com/pykhal)   <br />

In [1]:
## Major Libraries
import numpy as np
import pandas as pd
import cv2
## Other
import os
import zipfile
import warnings
warnings.filterwarnings("ignore")
import opendatasets as od
## sklearn 
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.svm import LinearSVC, SVC   
from sklearn.metrics import accuracy_score, classification_report, mean_squared_error
from sklearn import preprocessing
from sklearn import svm
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score

## keras
from keras.preprocessing.image import ImageDataGenerator




In [None]:
## download the dataset
od.download("https://www.kaggle.com/c/dogs-vs-cats/data")

In [None]:

local_zip = os.path.join(os.getcwd(),'dogs-vs-cats')
zip_files = ['test1', 'train']

for zip_file in zip_files:
    with zipfile.ZipFile("{}/{}.zip".format(local_zip, zip_file),"r") as z:
        z.extractall(".")
        print("{} unzipped".format(zip_file))


In [2]:
base_dir = os.getcwd()

print("Contents of base directory:")
print(os.listdir(base_dir))

print("\nContents of train directory:")
#print(os.listdir(f'{base_dir}/train'))



train_cats = os.path.join(base_dir, 'train/cats')
train_dogs = os.path.join(base_dir, 'train/dogs')
test_dir = os.path.join(base_dir, 'test1')


print('total cat training  images :', len(os.listdir(      train_cats ) ))
print('total dog training  images :', len(os.listdir(      train_dogs ) ))
print('total testing images :', len(os.listdir(      test_dir ) ))

Contents of base directory:
['.ipynb_checkpoints', 'dogs-vs-cats', 'mymodel.ipynb', 'test1', 'train']

Contents of train directory:
total cat training  images : 12500
total dog training  images : 12500
total testing images : 12500


In [3]:
## title Load and preprocess images
def load_images(folder):
    images = []
    labels = []
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
        img = cv2.resize(img, (64, 64))  # Resize the image
        images.append(img.flatten())  # Flatten the image matrix
        labels.append(1 if "dog" in filename else 0)  # Assign label 1 for dogs, 0 for cats
    return np.array(images), np.array(labels)

# Load training set
train_cats, label_cats = load_images(train_cats)
train_dogs, label_dogs = load_images(train_dogs)

# Concatenate cat and dog data
X_train = np.concatenate((train_cats, train_dogs), axis=0)
y_train = np.concatenate((label_cats, label_dogs), axis=0)
X_test = load_images(test_dir)[0]

In [4]:
print('traing set shape: ', X_train.shape)
print('labes of traing set shape: ', y_train.shape)
print('test set shape: ', X_test.shape)

traing set shape:  (25000, 4096)
labes of traing set shape:  (25000,)
test set shape:  (12500, 4096)


In [5]:
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)


In [6]:
## title Split the data into training and validation sets
X_train_final, X_dev, y_train_final, y_dev = train_test_split(X_train_scaled, y_train, test_size=0.2, random_state=53, shuffle=True)

In [7]:
X_train_final.shape

(20000, 4096)

In [None]:
# ## title Image augmentation
# datagen = ImageDataGenerator(rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode='nearest')
# datagen.fit(X_train_final.reshape(-1, 64, 64, 1))


In [None]:
# augmented_data = datagen.flow(X_train_final.reshape(-1, 64, 64, 1), y_train_final)
# X_train_augmented, y_train_augmented = augmented_data[0][0].reshape(-1, 64 * 64, 1), augmented_data[0][1]

In [None]:
# X_train_augmented = X_train_augmented.reshape(X_train_augmented.shape[0], -1)
# X_train_augmented.shape

In [None]:
## the first model 
svc_clf = SVC(kernel='poly', C=0.1)
svc_clf.fit(X_train_final, y_train_final)
train_acc = svc_clf.score(X_train_final, y_train_final)
print("train accuracy:", train_acc)

In [None]:
# Hyperparameter Tuning Using GridSearchCV
param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'poly', 'rbf'],
    'degree': [2, 3, 4],  # Adjust degree range based on kernel choice
    'gamma': [0.1, 1, 10],  # Adjust gamma range based on kernel choice
    'coef0': [0, 0.1, 1],  # Adjust coef0 range based on kernel choice
}

svc_clf = SVC()
grid_search = GridSearchCV(svc_clf, param_grid, scoring='accuracy', cv=5, n_jobs=-1)
grid_search.fit(X_train_final, y_train_final)
train_acc = svc_clf.score(X_train_final, y_train_final)
print("train accuracy:", train_acc)

In [None]:
# Get Best Parameters and Reshape Augmented Data
best_params = grid_search.best_params_
best_C = best_params['C']
best_kernel = best_params['kernel']
best_degree = best_params['degree']
best_gamma = best_params['gamma']
best_coef0 = best_params['coef0']


# Fit the Model with Augmented Data and Best Parameters
new_svc_clf = SVC(C=best_C, kernel=best_kernel, degree=best_degree, gamma=best_gamma, coef0=best_coef0)
new_svc_clf.fit(X_train_final, y_train_final)

train_acc = new_svc_clf.score(X_train_final, y_train_final)
print("train accuracy:", train_acc)

In [None]:
# Evaluate on dev Set (Replace with appropriate evaluation metric)
accuracy = new_svc_clf.score(X_dev, y_dev)
print("Test accuracy:", accuracy)

In [None]:
# y_pred_train = svc_clf_poly.predict(X_train_augmented)
# y_pred_dev = svc_clf_poly.predict(X_dev)
# y_pred_train = svc_clf_poly.predict(X_test)


# accuracy_train = accuracy_score(y_train_augmented, y_pred_train)
# accuracy_dev = accuracy_score(y_dev, y_pred_dev)
# print(f"Accuracy of traingin data : {accuracy * 100:.2f}%")

In [None]:
## title Make predictions on the test set & Evaluate the model
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, recall_score, f1_score

# Make predictions on the test set
y_pred_dev = new_svc_clf.predict(X_dev)

# Calculate accuracy
accuracy = accuracy_score(y_dev, y_pred_dev)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Print Confusion Matrix
cm = confusion_matrix(y_dev, y_pred)
print("Confusion Matrix:")
print(cm)

# Print Classification Report
report = classification_report(y_dev, y_pred)
print("Classification Report:")
print(report)

# Extract precision, recall, and F1-score from the report
precision = precision_score(y_dev, y_pred)
recall = recall_score(y_dev, y_pred)
f1 = f1_score(y_dev, y_pred)

# Print precision, recall, and F1-score
print(f"Precision: {precision:.4f}")
print(f"F1-Score: {f1:.4f}")
