In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import cv2
from skimage import feature, exposure
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, roc_auc_score, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.utils import shuffle

In [None]:
# Function to extract HOG features from an image and visualize it
def extract_hog_features(image):
    # Convert the image to grayscale
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Calculate HOG features
    hog_features, hog_image = feature.hog(gray_image, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualize=True, channel_axis=-1)

    # Enhance the contrast of the HOG image for better visualization
    hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 10))

    return hog_features, hog_image_rescaled

In [None]:
# Function to load and preprocess image dataset
def load_and_preprocess_data(root_folder):
    class_labels = []
    images = []
    labels = []

    # Iterate through each class in the dataset
    for label in os.listdir(root_folder):
        class_path = os.path.join(root_folder, label)

        if os.path.isdir(class_path):
            class_labels.append(label)

            # Load and preprocess each image in the class
            for image_file in os.listdir(class_path):
                image_path = os.path.join(class_path, image_file)
                img = cv2.imread(image_path)  # Load image using OpenCV
                #img = cv2.resize(img, (128, 64))  # Resize to a common size
                hog_features, hog_image = extract_hog_features(img) # Extract HOG features and visualize
                # Display the original and HOG images side by side
                # fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
                fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(2, 2), sharex=True, sharey=True)
                ax1.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB), cmap=plt.cm.gray)
                ax1.set_title(' ')
                ax1.axis('off')
                ax2.imshow(hog_image, cmap=plt.cm.gray)
                ax2.set_title(f'{label} ')
                ax2.axis('off')
                plt.show()
                # Append HOG features to the features list
                images.append(hog_features)
                # Append the label to the labels list
                labels.append(label)

    return np.array(images), np.array(labels), class_labels

In [None]:
# Load and preprocess data
root_folder = 'C:\\Users\\youss\\Desktop\\Cats&&dogs'  # Replace with the path to your dataset
X, y, class_labels = load_and_preprocess_data(root_folder)

In [None]:
# Use LabelEncoder to convert class names into numeric labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Class Labels Converting Class Names Into Numeric Labels
class_labels_encoded = label_encoder.fit_transform(class_labels)
print(class_labels_encoded)

Exploring The Dataset

In [None]:
df = pd.DataFrame(X)
df['Label'] = y
df = shuffle(df).reset_index(drop=True)

X = df.drop(['Label'], axis=1)
y = df['Label']

df.head()

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2 ,random_state=43)

In [None]:
# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Using Logistic Regression AS A Model

In [None]:
# Logistic Regression

# Initialize and train logistic regression model
logreg = LogisticRegression()
logreg.fit(X_train, y_train) # x

In [None]:
# Train and Test Score
train_score = logreg.score(X_train, y_train)
test_score = logreg.score(X_test, y_test)

In [None]:
# Get the predicted probabilities for each class
y_pred_prob = logreg.predict_proba(X_test) # x

In [None]:
# Plot ROC curve for each class
plt.figure()

# Assuming y_pred_prob contains probabilities for each class
for i in range(len(class_labels)):
    fpr, tpr, _ = roc_curve(y_test == class_labels_encoded[i], y_pred_prob[:, i])
    roc_auc = roc_auc_score(y_test == class_labels_encoded[i], y_pred_prob[:, i])
    plt.plot(fpr, tpr, label=f'ROC curve (area = {roc_auc:.2f}) for class {class_labels[i]}')

plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Multiclass ROC Curve')
plt.legend(loc="lower right")
plt.show()

In [None]:
# Print results
print("Logistic Regression Accuracy:", accuracy_logreg)
print("Logistic Regression ROC AUC:", roc_auc_logreg)
print(classification_report(y_test, y_pred))
print("Train Score:", train_score)
print("Test Score:", test_score)