In [None]:
import numpy as np
import matplotlib.pyplot as plt
from skimage.feature import hog
from skimage import exposure
import os
from skimage import io, color, feature
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, auc, log_loss
from sklearn.preprocessing import LabelEncoder, label_binarize
import cv2
from sklearn.linear_model import SGDClassifier
from sklearn.utils import shuffle

In [None]:
# Function to extract HOG features from an image and visualize it
def extract_hog_features(image):
    # Convert the image to grayscale
    gray_image = color.rgb2gray(image)

    # Calculate HOG features
    hog_features, hog_image = feature.hog(gray_image, visualize=True)

    # Enhance the contrast of the HOG image for better visualization
    hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 10))

    return hog_features, hog_image_rescaled

In [None]:
# Function to calculate and display the confusion matrix
def plot_confusion_matrix(y_true, y_pred, classes, normalize=False, title=None, cmap=plt.cm.Blues):
    if not title:
        if normalize:
            title = 'Normalized Confusion Matrix'
        else:
            title = 'Confusion Matrix, without Normalization'

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)

    # Plot the confusion matrix
    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()

    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            plt.text(j, i, format(cm[i, j], fmt),
                     ha="center", va="center",
                     color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.show()

In [None]:
# Function to calculate and display the ROC curve
def plot_roc_curve(y_true, y_score, classes):
    fpr, tpr, _ = roc_curve(y_true, y_score)
    roc_auc = auc(fpr, tpr)

    # Plot the ROC curve
    plt.figure()
    plt.plot(fpr, tpr, label=f'ROC curve (area = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC) Curve')
    plt.legend(loc="lower right")
    plt.show()

In [None]:
# Function to plot log loss percentage
def plot_log_loss_percentage(log_loss_values, classes):
    plt.figure(figsize=(10, 6))
    plt.bar(classes, log_loss_values, color='blue')
    plt.title('Log Loss Percentage for Each Class')
    plt.xlabel('Class')
    plt.ylabel('Log Loss')
    plt.show()

In [None]:
# Function to calculate and display the loss curve
def plot_loss_curve(y_true, model, X_test, y_test):
    loss_values = []

    for i in range(1, 101):  # Set the maximum number of iterations (adjust as needed)
        model.partial_fit(X_train, y_train, classes=np.unique(y_train))
        y_prob = model.predict_proba(X_test)
        loss = log_loss(y_test, y_prob)
        loss_values.append(loss)

    # Print the loss values
    print("Loss Values for Each Iteration:")
    print(loss_values)

    # Plot the loss curve
    plt.figure(figsize=(8, 6))
    plt.plot(range(1, 101), loss_values, label='Loss Curve')
    plt.xlabel('Iteration')
    plt.ylabel('Log Loss')
    plt.title('Loss Curve on Testing Data')
    plt.legend()
    plt.show()

In [None]:
# Function to apply cross-validation and plot accuracy vs iteration
def cross_validate_and_plot(X, y, model, num_iterations=100, num_folds=5):
    skf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=42)
    accuracies = []

    for fold, (train_index, test_index) in enumerate(skf.split(X, y), 1):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        model.partial_fit(X_train, y_train, classes=np.unique(y_train))

        # Evaluate the model on the test set
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        accuracies.append(accuracy)

        # Print accuracy for each fold
        print(f"Fold {fold} Accuracy: {accuracy}")

    # Calculate and print the mean accuracy
    mean_accuracy = np.mean(accuracies)
    print(f"Mean Accuracy across all folds: {mean_accuracy}")

    # Plot accuracy vs iteration
    iterations = np.arange(1, num_folds + 1)
    plt.figure(figsize=(8, 6))
    plt.plot(iterations, accuracies, label='Cross-Validation Accuracy')
    plt.xlabel('Fold')
    plt.ylabel('Accuracy')
    plt.title('Cross-Validation Accuracy Curve')
    plt.legend()
    plt.show()


In [None]:
# Path to the root folder of your dataset
dataset_path = 'D:\\collage\\third year\\first semester\\Machine learning\\Project\\dataset\\archive\\cell_images\\cell_images'

In [None]:
# List all subdirectories (assuming each subdirectory corresponds to a class)
class_folders = [f.path for f in os.scandir(dataset_path) if f.is_dir()]
features_list = []
labels_list = []

for class_folder in class_folders:
    class_name = os.path.basename(class_folder)

    # Loop through each image in the class folder
    for image_filename in os.listdir(class_folder):
        image_path = os.path.join(class_folder, image_filename)

        # Load the image
        image = io.imread(image_path)

        # Resize the image to 64x64
        resized_image = cv2.resize(image, (64, 64))

        # Extract HOG features and visualize
        hog_features, hog_image = extract_hog_features(resized_image)

        # Display the original image and the HOG features
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(2, 2), sharex=True, sharey=True)

        ax1.axis('off')
        ax1.imshow(resized_image, cmap=plt.cm.gray)

        ax2.axis('off')
        ax2.imshow(hog_image, cmap=plt.cm.gray)

        plt.show()

        # Append HOG features to the features list
        features_list.append(hog_features)

        # Append the label to the labels list
        labels_list.append(class_name)


In [None]:
# Convert lists to NumPy arrays
features_array = np.array(features_list)
labels_array = np.array(labels_list)
# Use LabelEncoder to convert class names into numeric labels
label_encoder = LabelEncoder()
numeric_labels = label_encoder.fit_transform(labels_array)
# Shuffle the data
features_array, numeric_labels = shuffle(features_array, numeric_labels, random_state=42)

In [None]:
# Initialize logistic regression model for HOG
sgd_hog = SGDClassifier(loss='log_loss', max_iter=1000, random_state=42)

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features_array, numeric_labels, test_size=0.2, random_state=42)

In [None]:
# Apply cross-validation and plot accuracy vs iteration
cross_validate_and_plot(features_array, numeric_labels, sgd_hog, num_iterations=100, num_folds=5)

In [None]:
# Fit the model on the training data
sgd_hog.fit(X_train, y_train)

In [None]:
# Calculate predicted probabilities for each class
y_prob_hog = sgd_hog.predict_proba(X_test)

In [None]:
# Call the function to plot the loss curve
plot_loss_curve(y_test, sgd_hog, X_test, y_test)

In [None]:
# Make predictions on the test set
y_pred_hog = sgd_hog.predict(X_test)

In [None]:
# Evaluate the model
accuracy_hog = accuracy_score(y_test, y_pred_hog)
classification_rep_hog = classification_report(y_test, y_pred_hog)

In [None]:
print(f"Accuracy for HOG: {accuracy_hog}")
print("Classification Report for HOG:")
print(classification_rep_hog)

In [None]:
# Calculate and display the confusion matrix for HOG
classes_hog = list(label_encoder.classes_)
plot_confusion_matrix(y_test, y_pred_hog, classes_hog)

In [None]:
# Calculate and display the ROC curve for HOG
y_test_bin_hog = label_binarize(y_test, classes=np.unique(y_test))
y_score_hog = sgd_hog.decision_function(X_test)
plot_roc_curve(y_test, y_score_hog, classes_hog)