## We are using MINST sign languge dataset from Kaggel.

The original MNIST(Modified National Institute of Standards and Technology) image dataset of handwritten digits is a popular benchmark for image-based machine learning methods. However it doesn't work for 2 of the english alphabets J and Z because it requires gesture motion.

In [None]:
import random

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Define paths to the dataset
train_csv_path = '/content/drive/My Drive/Colab Notebooks/archive/sign_mnist_train.csv'
test_csv_path = '/content/drive/My Drive/Colab Notebooks/archive/sign_mnist_test.csv'

# Load the datasets
train_data = pd.read_csv(train_csv_path)
test_data = pd.read_csv(test_csv_path)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# checking unique value in dataset

labels=train_data.label.unique()
np.sort(labels)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24])

In [None]:
# Converting the pandas Dataframe into Numpy Arrays

inputs_array_train = train_data.iloc[:, 1:].to_numpy()
targets_array_train = train_data['label'].to_numpy()
inputs_array_test = test_data.iloc[:, 1:].to_numpy()
targets_array_test = test_data['label'].to_numpy()

In [None]:
class CustomLogisticRegression:
    def __init__(self, learning_rate=0.01, num_iterations=1000, num_classes=None):
        """
        Initialize the Logistic Regression model

        Parameters:
        - learning_rate: step size for gradient descent
        - num_iterations: number of training iterations
        - num_classes: number of unique classes in the dataset
        """
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
        self.num_classes = num_classes
        self.weights = None
        self.bias = None
        self.class_mapping = None  # To handle zero-based indexing

    def _softmax(self, z):
        """
        Softmax activation function for multiclass classification
        Prevents numerical instability by subtracting max value

        Parameters:
        - z: input array of logits

        Returns:
        - Softmax probabilities
        """
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)

    def _one_hot_encode(self, y):
        """
        Convert labels to one-hot encoded format

        Parameters:
        - y: original labels

        Returns:
        - One-hot encoded labels
        """
        if self.class_mapping is None:
            unique_classes = np.unique(y)
            self.class_mapping = {orig: idx for idx, orig in enumerate(unique_classes)}
            self.reverse_mapping = {idx: orig for orig, idx in self.class_mapping.items()}

        y_mapped = np.array([self.class_mapping[label] for label in y])
        one_hot = np.zeros((y.shape[0], self.num_classes))
        one_hot[np.arange(y.shape[0]), y_mapped] = 1
        return one_hot

    def fit(self, X, y):
        """
        Train the logistic regression model

        Parameters:
        - X: input features (num_samples, num_features)
        - y: target labels
        """
        self.num_classes = len(np.unique(y))  # Automatically detect number of classes
        num_features = X.shape[1]
        self.weights = np.zeros((num_features, self.num_classes))
        self.bias = np.zeros((1, self.num_classes))
        Y_one_hot = self._one_hot_encode(y)

        for i in range(self.num_iterations):
            # Forward pass
            linear_model = np.dot(X, self.weights) + self.bias
            y_predicted = self._softmax(linear_model)

            # Compute gradients
            dw = (1 / X.shape[0]) * np.dot(X.T, (y_predicted - Y_one_hot))
            db = (1 / X.shape[0]) * np.sum(y_predicted - Y_one_hot, axis=0, keepdims=True)

            # Update parameters
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

            # Print progress every 100 iterations
            if i % 100 == 0:
                loss = -np.sum(Y_one_hot * np.log(y_predicted + 1e-9)) / X.shape[0]
                print(f"Iteration {i}, Loss: {loss:.4f}")

    def predict(self, X):
        """
        Make predictions on input data
        """
        linear_model = np.dot(X, self.weights) + self.bias
        y_predicted = self._softmax(linear_model)
        predicted_indices = np.argmax(y_predicted, axis=1)
        return np.array([self.reverse_mapping[idx] for idx in predicted_indices])

    def accuracy(self, X, y):
        """
        Compute accuracy
        """
        predictions = self.predict(X)
        return np.mean(predictions == y)


def preprocess_sign_mnist(train_data, test_data):
    """
    Preprocess Sign MNIST dataset
    """
    X_train = train_data.drop('label', axis=1).values
    y_train = train_data['label'].values
    X_test = test_data.drop('label', axis=1).values
    y_test = test_data['label'].values

    X_train = X_train.astype('float32') / 255.0
    X_test = X_test.astype('float32') / 255.0

    print("Data preprocessing complete.")
    return X_train, X_test, y_train, y_test


def train_logistic_regression(X_train, X_test, y_train, y_test):
    """
    Train custom Logistic Regression on Sign MNIST
    """
    clf = CustomLogisticRegression(learning_rate=0.1, num_iterations=1000)
    clf.fit(X_train, y_train)

    train_accuracy = clf.accuracy(X_train, y_train)
    test_accuracy = clf.accuracy(X_test, y_test)

    print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
    print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")

    return clf


# Example Workflow
# Assuming `train_data` and `test_data` are pandas DataFrames with "label" column
# X_train, X_test, y_train, y_test = preprocess_sign_mnist(train_data, test_data)
# clf = train_logistic_regression(X_train, X_test, y_train, y_test)


In [None]:
class CustomPCA:
    def __init__(self, n_components=None, variance_threshold=None):
        """
        Initialize CustomPCA.
        :param n_components: Number of principal components to retain (optional).
        :param variance_threshold: Fraction of variance to retain (optional).
        """
        self.n_components = n_components
        self.variance_threshold = variance_threshold
        self.components = None  # Principal components
        self.mean = None        # Mean of each feature

    def fit_transform(self, X):
        """
        Fit the PCA model and transform the data.
        :param X: Feature matrix (n_samples, n_features)
        :return: Transformed data with reduced dimensions
        """
        # Center the data by subtracting the mean
        self.mean = np.mean(X, axis=0)
        X_centered = X - self.mean

        # Compute the covariance matrix
        covariance_matrix = np.cov(X_centered, rowvar=False)

        # Compute eigenvalues and eigenvectors
        eigenvalues, eigenvectors = np.linalg.eigh(covariance_matrix)

        # Sort eigenvalues and eigenvectors in descending order
        sorted_indices = np.argsort(eigenvalues)[::-1]
        eigenvalues = eigenvalues[sorted_indices]
        eigenvectors = eigenvectors[:, sorted_indices]

        # Select the number of components based on n_components or variance threshold
        if self.n_components is not None:
            eigenvectors = eigenvectors[:, :self.n_components]
        elif self.variance_threshold is not None:
            total_variance = np.sum(eigenvalues)
            variance_retained = 0
            num_components = 0
            for eigenvalue in eigenvalues:
                variance_retained += eigenvalue
                num_components += 1
                if variance_retained / total_variance >= self.variance_threshold:
                    break
            eigenvectors = eigenvectors[:, :num_components]

        self.components = eigenvectors

        # Project data onto the selected principal components
        return np.dot(X_centered, self.components)

    def transform(self, X):
        """
        Transform data using the fitted PCA model.
        :param X: Feature matrix (n_samples, n_features)
        :return: Transformed data
        """
        X_centered = X - self.mean
        return np.dot(X_centered, self.components)


In [None]:
def custom_confusion_matrix(y_true, y_pred, num_classes, class_names=None):
    """
    Compute the confusion matrix manually and plot it with a stylish design.

    Parameters:
        y_true: Array of true labels
        y_pred: Array of predicted labels
        num_classes: Number of unique classes
        class_names: List of class names corresponding to the classes (optional)

    Returns:
        Confusion matrix as a 2D numpy array
    """

    # Initialize confusion matrix
    matrix = np.zeros((num_classes, num_classes), dtype=int)

    for true, pred in zip(y_true, y_pred):
        matrix[true][pred] += 1  # Increment the cell corresponding to (true, predicted)

    # Plotting the styled confusion matrix
    plt.figure(figsize=(12,10))
    sns.heatmap(matrix, annot=True, fmt='d', cmap='Blues', cbar=False,
                xticklabels=class_names if class_names else range(num_classes),
                yticklabels=class_names if class_names else range(num_classes),
                linewidths=0.5, linecolor='black')

    plt.title('Confusion Matrix', fontsize=14)
    plt.xlabel('Predicted Labels', fontsize=12)
    plt.ylabel('True Labels', fontsize=12)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12, rotation=0)
    plt.tight_layout()
    plt.show()

    return matrix


def custom_classification_report(y_true, y_pred, num_classes, class_names=None):
    """
    Compute and display the classification report in a styled table format.

    Parameters:
        y_true: Array of true labels
        y_pred: Array of predicted labels
        num_classes: Number of unique classes
        class_names: List of class names (optional)

    Returns:
        A pandas DataFrame containing precision, recall, F1-score, and support.
    """
    import pandas as pd
    import numpy as np

    # Compute confusion matrix
    cm = custom_confusion_matrix(y_true, y_pred, num_classes)

    # Initialize report dictionary
    report = {}
    for cls in range(num_classes):
        true_positive = cm[cls, cls]
        false_positive = sum(cm[:, cls]) - true_positive
        false_negative = sum(cm[cls, :]) - true_positive

        # Handle cases with no positive or negative samples for this class
        precision = true_positive / (true_positive + false_positive) if (true_positive + false_positive) > 0 else 0.0
        recall = true_positive / (true_positive + false_negative) if (true_positive + false_negative) > 0 else 0.0
        f1_score = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0
        support = sum(y_true == cls)

        report[cls] = {
            "precision": precision,
            "recall": recall,
            "f1-score": f1_score,
            "support": support
        }

    # Convert the report dictionary to a DataFrame
    class_names = class_names if class_names else [f"Class {i}" for i in range(num_classes)]
    df_report = pd.DataFrame.from_dict(report, orient='index')
    df_report.index = class_names

    # Add average metrics
    macro_avg = df_report[["precision", "recall", "f1-score"]].mean()
    weighted_avg = df_report[["precision", "recall", "f1-score"]].multiply(df_report["support"], axis=0).sum() / df_report["support"].sum()

    df_report.loc["macro avg"] = macro_avg
    df_report.loc["weighted avg"] = weighted_avg

    return df_report


In [None]:
# Create and train the model
clf = CustomLogisticRegression(
    learning_rate=0.1,
    num_iterations=1000,
    num_classes=len(np.unique(y_train))
)

# Fit the model
clf.fit(X_train, y_train)

# Compute accuracies
train_accuracy = clf.accuracy(X_train, y_train)
test_accuracy = clf.accuracy(X_test, y_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")

Iteration 0, Loss: 3.1781
Iteration 100, Loss: 2.3069
Iteration 200, Loss: 1.9253
Iteration 300, Loss: 1.7007
Iteration 400, Loss: 1.5470
Iteration 500, Loss: 1.4323
Iteration 600, Loss: 1.3419
Iteration 700, Loss: 1.2679
Iteration 800, Loss: 1.2056
Iteration 900, Loss: 1.1520
Training Accuracy: 76.39%
Testing Accuracy: 63.43%


In [None]:
print(f"Shape of X_train: {X_train.shape}")
print(f"Shape of X_test: {X_test.shape}")


Shape of X_train: (27455, 784)
Shape of X_test: (7172, 784)


In [None]:
# Step 1: Predict labels for training and testing data
y_train_pred = clf.predict(X_train)
y_test_pred = clf.predict(X_test)

# Step 2: Number of classes and class names (optional)
num_classes = len(np.unique(y_train))
class_names = [f"Class {i}" for i in range(num_classes)]  # You can define specific class names if needed

# Step 3: Generate and display classification report for training data
print("Classification Report for Training Data:")
train_report = custom_classification_report(y_train, y_train_pred, num_classes, class_names)
print(train_report)

# Step 4: Generate and display classification report for testing data
print("\nClassification Report for Testing Data:")
test_report = custom_classification_report(y_test, y_test_pred, num_classes, class_names)
print(test_report)

# Step 5: Optionally save or display the confusion matrix
print("\nConfusion Matrix for Testing Data:")
custom_confusion_matrix(y_test, y_test_pred, num_classes, class_names)


ValueError: shapes (27455,784) and (113,) not aligned: 784 (dim 1) != 113 (dim 0)

In [None]:
# Apply Custom PCA to reduce dimensionality
custom_pca = CustomPCA(variance_threshold=0.95)  # Retain 95% variance
X_train_pca = custom_pca.fit_transform(X_train)
X_test_pca = custom_pca.transform(X_test)

# Fit the model
clf.fit(X_train_pca, y_train)
train_accuracy = clf.accuracy(X_train_pca, y_train)  # Training data and labels
test_accuracy = clf.accuracy(X_test_pca, y_test)    # Testing data and labels


print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")

Training Accuracy: 73.62%
Testing Accuracy: 62.65%


### Shuffling input data to train to see if the accuracy changes.

In [None]:
# Function to shuffle data and labels together
def shuffle_data(X, y):
    # Generate a permutation of indices
    indices = np.arange(X.shape[0])
    np.random.shuffle(indices)
    # Shuffle data and labels
    return X[indices], y[indices]

# Shuffle the training data
X_train_shuffled, y_train_shuffled = shuffle_data(X_train, y_train)

# If you are using PCA, apply it on the shuffled data
custom_pca = CustomPCA(variance_threshold=0.95)  # Retain 95% variance
X_train_pca_shuffled = custom_pca.fit_transform(X_train_shuffled)
X_test_pca = custom_pca.transform(X_test)

# Train the model on the shuffled data
clf.fit(X_train_pca_shuffled, y_train_shuffled)

# Compute accuracies
train_accuracy = clf.accuracy(X_train_pca_shuffled, y_train_shuffled)
test_accuracy = clf.accuracy(X_test_pca, y_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")


Training Accuracy: 73.62%
Testing Accuracy: 62.65%
