<h3>Importing the necessary libraries</h3>
<ul>
    <li>cv2: openCV is a Python library used for computer vision and image processing tasks</li>
</ul>

In [1]:
import os
import cv2
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

<h3>Helper function for processing each image</h3>
<p>The function takes a folder of images and a corresponding label to those images as input. It then loads the images from the folder and resizes them to a fixed size, before appending them to the list of images and labels</p>

In [2]:
def process(folder, label):
    images = []
    labels = []
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        # Load the image from the specified file
        img = cv2.imread(img_path)
        if img is not None:
            # Resize the image to a fixed size
            img = cv2.resize(img, (100, 100))
            images.append(img)
            labels.append(label)
    return images, labels

<h3>Loading the labelled dataset using the <i>process</i> function defined above</h3>
<ul>
    <li>Label 0: is for cats</li>
    <li>Label 1: is for dogs</li>
</ul>

In [3]:
cat_images, cat_labels = process('C:/Users/ahmed/Downloads/archive/cats', 0)
dog_images, dog_labels = process('C:/Users/ahmed/Downloads/archive/dogs', 1)

<h3>Combine the cat and dog data</h3>

In [4]:
X = np.array(cat_images + dog_images)
y = np.array(cat_labels + dog_labels)

<h3>Flatten the images</h3>
<p>The .flatten() method returns a copy of an array collapsed into one dimension</p>

In [5]:
X = np.array([image.flatten() for image in X])

<h3>Normalize the features</h3>

In [6]:
scaler = preprocessing.StandardScaler()
X = scaler.fit_transform(X)

<h3>Randomly select a subset of features</h3>

<h3>Splitting the data into training and testing sets</h3>

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

<h3>Model 1: K-Nearest Neighbors (KNN)</h3>

In [8]:
# Default number of neighbors is 5
knn_model = KNeighborsClassifier()
knn_model.fit(X_train, y_train)

<h3>Model 2: Decision Trees</h3>

In [9]:
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)

<h3>Model 3: Logistic Regression</h3>

In [10]:
log_model = LogisticRegression(max_iter=1000, random_state=42)
log_model.fit(X_train, y_train)

In [16]:
models = [knn_model, dt_model, log_model]
model_names = ['KNN', 'Decision Tree', 'Logistic Regression']
scoring_metrics = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro']

for model, name, metric in zip(models, model_names, scoring_metrics):
    scores = cross_val_score(model, X_train, y_train, cv=10, scoring=metric)
    print(f"{name} Cross-Validation Scores:")
    print(f"{metric}: {np.mean(scores[0]):.4f}")
    # print(f"Precision: {np.mean(scores['test_precision_macro']):.4f}")
    # print(f"Recall: {np.mean(scores['test_recall_macro']):.4f}")
    # print(f"F1-score: {np.mean(scores['test_f1_macro']):.4f}")
    print("---------------")

KNN Cross-Validation Scores:
accuracy: 0.5102
---------------


KeyboardInterrupt: 

<h3>Evaluating the model against common metrics using the testing set</h3>

In [None]:
# A function to print The result in a table like format
def print_metrics(y_test, y_pred_knn, y_pred_dt, y_pred_log):
    print('\n' + ('-'*29) + f' {'Score Report'} ' + ('-'*29))
    print("{:<15} {:<20} {:<15} {:<15}".format('Metric', 'K-Nearest Neighbors', 'Decision Trees', 'Logistic Regression'))
    print("-" * 72)
    
    metrics = {
        'Accuracy': [accuracy_score(y_test, y_pred_knn), accuracy_score(y_test, y_pred_dt), accuracy_score(y_test, y_pred_log)],
        'Precision': [precision_score(y_test, y_pred_knn, average='weighted'), precision_score(y_test, y_pred_dt, average='weighted'), precision_score(y_test, y_pred_log, average='weighted')],
        'Recall': [recall_score(y_test, y_pred_knn, average='weighted'), recall_score(y_test, y_pred_dt, average='weighted'), recall_score(y_test, y_pred_log, average='weighted')],
        'F1-Score': [f1_score(y_test, y_pred_knn, average='weighted'), f1_score(y_test, y_pred_dt, average='weighted'), f1_score(y_test, y_pred_log, average='weighted')]
    }
    
    for metric, (knn_metric, dt_metric, log_metric) in metrics.items():
        print("{:<15} {:<20.4f} {:<15.4f} {:<15.4f}".format(metric, knn_metric, dt_metric, log_metric))

# Getting the predicted values using the testing set
knn_pred = knn_model.predict(X_test)
dt_pred = dt_model.predict(X_test)
log_pred = log_model.predict(X_test)

# Printing out the metrics
print_metrics(y_test, knn_pred, dt_pred, log_pred)


----------------------------- Score Report -----------------------------
Metric          K-Nearest Neighbors  Decision Trees  Logistic Regression
------------------------------------------------------------------------
Accuracy        0.5643               0.5500          0.6286         
Precision       0.5682               0.5539          0.6373         
Recall          0.5643               0.5500          0.6286         
F1-Score        0.5655               0.5513          0.6297         
