In [1]:
import pandas as pd
import numpy as np

from itertools import product
from custom_functions import custom_accuracy_score, custom_confusion_matrix, custom_classification_report

In [2]:
# Load the training dataset
train_csv_path = './archive/sign_mnist_train.csv'
train_data = pd.read_csv(train_csv_path)

# Load the testing dataset
test_csv_path = './archive/sign_mnist_test.csv'
test_data = pd.read_csv(test_csv_path)

In [3]:
class CustomLogisticRegression:
    def __init__(self, learning_rate=0.01, num_iterations=1000, num_classes=None):
        """
        Initialize the Logistic Regression model
        
        Parameters:
        - learning_rate: step size for gradient descent
        - num_iterations: number of training iterations
        - num_classes: number of unique classes in the dataset
        """
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
        self.num_classes = num_classes
        self.weights = None
        self.bias = None
        self.class_mapping = None  # To handle zero-based indexing

    def _softmax(self, z):
        """
        Softmax activation function for multiclass classification
        Prevents numerical instability by subtracting max value
        
        Parameters:
        - z: input array of logits
        
        Returns:
        - Softmax probabilities
        """
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)

    def _one_hot_encode(self, y):
        """
        Convert labels to one-hot encoded format
        
        Parameters:
        - y: original labels
        
        Returns:
        - One-hot encoded labels
        """
        # Remap labels to zero-based index if needed
        if self.class_mapping is None:
            unique_classes = np.unique(y)
            self.class_mapping = {orig: idx for idx, orig in enumerate(unique_classes)}
            self.reverse_mapping = {idx: orig for orig, idx in self.class_mapping.items()}
        
        # Map original labels to zero-based index
        y_mapped = np.array([self.class_mapping[label] for label in y])
        
        # Create one-hot encoding
        one_hot = np.zeros((y.shape[0], self.num_classes))
        one_hot[np.arange(y.shape[0]), y_mapped] = 1
        return one_hot

    def fit(self, X, y):
        """
        Train the logistic regression model
        
        Parameters:
        - X: input features (num_samples, num_features)
        - y: target labels
        """
        # Determine number of classes if not specified
        unique_classes = np.unique(y)
        self.num_classes = len(unique_classes)
        
        # Initialize weights and bias
        num_features = X.shape[1]
        self.weights = np.zeros((num_features, self.num_classes))
        self.bias = np.zeros((1, self.num_classes))
        
        # One-hot encode labels
        Y_one_hot = self._one_hot_encode(y)
        
        # Gradient descent
        for _ in range(self.num_iterations):
            # Forward pass
            linear_model = np.dot(X, self.weights) + self.bias
            y_predicted = self._softmax(linear_model)
            
            # Compute gradients
            dw = (1/X.shape[0]) * np.dot(X.T, (y_predicted - Y_one_hot))
            db = (1/X.shape[0]) * np.sum(y_predicted - Y_one_hot, axis=0, keepdims=True)
            
            # Update parameters
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

    def predict(self, X):
        """
        Make predictions on input data
        
        Parameters:
        - X: input features
        
        Returns:
        - Predicted class labels (original class labels)
        """
        linear_model = np.dot(X, self.weights) + self.bias
        y_predicted = self._softmax(linear_model)
        
        # Get indices of max probabilities
        predicted_indices = np.argmax(y_predicted, axis=1)
        
        # Map back to original class labels
        return np.array([self.reverse_mapping[idx] for idx in predicted_indices])

    def predict_proba(self, X):
        """
        Predict class probabilities
        
        Parameters:
        - X: input features
        
        Returns:
        - Predicted class probabilities
        """
        linear_model = np.dot(X, self.weights) + self.bias
        return self._softmax(linear_model)

    def accuracy(self, X, y):
        """
        Compute model accuracy
        
        Parameters:
        - X: input features
        - y: true labels
        
        Returns:
        - Accuracy score
        """
        predictions = self.predict(X)
        return np.mean(predictions == y)

def preprocess_sign_mnist(train_data, test_data):
    """
    Preprocess Sign MNIST dataset
    
    Parameters:
    - train_data: Training dataframe
    - test_data: Testing dataframe
    
    Returns:
    - Preprocessed X_train, X_test, y_train, y_test
    """
    # Separate features and labels
    X_train = train_data.drop('label', axis=1).values
    y_train = train_data['label'].values
    
    X_test = test_data.drop('label', axis=1).values
    y_test = test_data['label'].values
    
    # Normalize pixel values to [0, 1]
    X_train = X_train.astype('float32') / 255.0
    X_test = X_test.astype('float32') / 255.0
    
    print("Unique training classes:", np.unique(y_train))
    print("Unique testing classes:", np.unique(y_test))
    
    return X_train, X_test, y_train, y_test

def train_logistic_regression(X_train, X_test, y_train, y_test):
    """
    Train custom Logistic Regression on Sign MNIST
    
    Parameters:
    - X_train: Training features
    - X_test: Testing features
    - y_train: Training labels
    - y_test: Testing labels
    
    Returns:
    - Trained model
    - Training and test accuracies
    """
    # Create and train the model
    clf = CustomLogisticRegression(
        learning_rate=0.1,  # You can tune this
        num_iterations=1000,  # You can increase for better convergence
        num_classes=len(np.unique(y_train))
    )
    
    # Fit the model
    clf.fit(X_train, y_train)
    
    # Compute accuracies
    train_accuracy = clf.accuracy(X_train, y_train)
    test_accuracy = clf.accuracy(X_test, y_test)
    
    return clf, train_accuracy, test_accuracy

In [4]:
def preprocess_sign_mnist(train_data, test_data):
    # Separate features and labels
    X_train = train_data.drop('label', axis=1).values
    y_train = train_data['label'].values
    
    X_test = test_data.drop('label', axis=1).values
    y_test = test_data['label'].values
    
    # Normalize pixel values to [0, 1]
    X_train = X_train.astype('float32') / 255.0
    X_test = X_test.astype('float32') / 255.0
    
    print("Unique training classes:", np.unique(y_train))
    print("Unique testing classes:", np.unique(y_test))
    
    return X_train, X_test, y_train, y_test

In [5]:
# Preprocess the data
X_train, X_test, y_train, y_test = preprocess_sign_mnist(train_data, test_data)

Unique training classes: [ 0  1  2  3  4  5  6  7  8 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24]
Unique testing classes: [ 0  1  2  3  4  5  6  7  8 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24]


In [None]:
# Create and train the model
clf = CustomLogisticRegression(
    learning_rate=0.1, 
    num_iterations=1000,  
    num_classes=len(np.unique(y_train))
)

# Fit the model
clf.fit(X_train, y_train)

In [None]:
# Compute accuracies
train_accuracy = clf.accuracy(X_train, y_train)
test_accuracy = clf.accuracy(X_test, y_test)

# Evaluation metrics using custom implementations
custom_accuracy_score(y_test, y_pred)


print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")

In [None]:
# Predictions
y_pred = clf.predict(X_test)

# Confusion Matrix
custom_confusion_matrix(y_true=y_test, y_pred=y_pred, num_classes=len(np.unique(y_train)))

# Classification Report
print("\nClassification Report:")
custom_classification_report(y_test, y_pred, num_classes=len(np.unique(y_train)))


In [None]:
# Visualize some predictions
import matplotlib.pyplot as plt

# Select a few random test samples
num_samples_to_show = 5
indices = np.random.randint(0, X_test.shape[0], num_samples_to_show)

plt.figure(figsize=(15, 3))
for i, idx in enumerate(indices):
    plt.subplot(1, num_samples_to_show, i+1)
    plt.imshow(X_test[idx].reshape(28, 28), cmap='gray')
    plt.title(f"Pred: {y_pred[idx]}, True: {y_test[idx]}")
    plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
class CustomLogisticRegressionWithReg(CustomLogisticRegression):
    def __init__(self, learning_rate=0.01, num_iterations=1000, num_classes=None, reg_strength=0.01):
        super().__init__(learning_rate, num_iterations, num_classes)
        self.reg_strength = reg_strength  # L2 Regularization strength

    def fit(self, X, y):
        unique_classes = np.unique(y)
        self.num_classes = len(unique_classes)
        num_features = X.shape[1]
        self.weights = np.zeros((num_features, self.num_classes))
        self.bias = np.zeros((1, self.num_classes))
        Y_one_hot = self._one_hot_encode(y)
        
        for _ in range(self.num_iterations):
            linear_model = np.dot(X, self.weights) + self.bias
            y_predicted = self._softmax(linear_model)
            dw = (1 / X.shape[0]) * np.dot(X.T, (y_predicted - Y_one_hot)) + self.reg_strength * self.weights
            db = (1 / X.shape[0]) * np.sum(y_predicted - Y_one_hot, axis=0, keepdims=True)
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db


In [None]:
def tune_hyperparameters(X_train, y_train, X_test, y_test):
    learning_rates = [0.01, 0.05, 0.1]
    num_iterations_list = [500, 1000, 2000]
    reg_strengths = [0.01, 0.1, 1.0]

    best_model = None
    best_accuracy = 0
    best_params = None

    for lr, num_iter, reg in product(learning_rates, num_iterations_list, reg_strengths):
        print(f"Training with learning_rate={lr}, num_iterations={num_iter}, reg_strength={reg}")
        model = CustomLogisticRegressionWithReg(learning_rate=lr, num_iterations=num_iter, reg_strength=reg)
        model.fit(X_train, y_train)
        accuracy = model.accuracy(X_test, y_test)
        print(f"Testing accuracy: {accuracy * 100:.2f}%")

        if accuracy > best_accuracy:
            best_model = model
            best_accuracy = accuracy
            best_params = (lr, num_iter, reg)

    print("\nBest Hyperparameters:")
    print(f"Learning Rate: {best_params[0]}, Number of Iterations: {best_params[1]}, Regularization Strength: {best_params[2]}")
    print(f"Best Testing Accuracy: {best_accuracy * 100:.2f}%")
    return best_model, best_params


In [None]:
# Preprocess the data
X_train, X_test, y_train, y_test = preprocess_sign_mnist(train_data, test_data)

In [12]:
# Preprocess the data
X_train, X_test, y_train, y_test = preprocess_sign_mnist(train_data, test_data)

# Hyperparameter tuning with detailed output
print("Starting hyperparameter tuning...\n")
best_model, best_params = tune_hyperparameters(X_train, y_train, X_test, y_test)

# Evaluate the best model
print("\nEvaluating the best model with optimal hyperparameters...")
evaluate_model(best_model, X_test, y_test)

# Print the final results
print("\nSummary of Best Model:")
print(f"Best Hyperparameters:\n  Learning Rate: {best_params[0]}\n  Number of Iterations: {best_params[1]}\n  Regularization Strength: {best_params[2]}")
print(f"Best Testing Accuracy: {best_model.accuracy(X_test, y_test) * 100:.2f}%")


Unique training classes: [ 0  1  2  3  4  5  6  7  8 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24]
Unique testing classes: [ 0  1  2  3  4  5  6  7  8 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24]
Starting hyperparameter tuning...

Training with learning_rate=0.01, num_iterations=500, reg_strength=0.01
Testing accuracy: 39.75%
Training with learning_rate=0.01, num_iterations=500, reg_strength=0.1
Testing accuracy: 37.95%
Training with learning_rate=0.01, num_iterations=500, reg_strength=1.0
Testing accuracy: 20.58%
Training with learning_rate=0.01, num_iterations=1000, reg_strength=0.01
Testing accuracy: 45.96%
Training with learning_rate=0.01, num_iterations=1000, reg_strength=0.1
Testing accuracy: 42.68%
Training with learning_rate=0.01, num_iterations=1000, reg_strength=1.0
Testing accuracy: 20.84%
Training with learning_rate=0.01, num_iterations=2000, reg_strength=0.01
Testing accuracy: 51.44%
Training with learning_rate=0.01, num_iterations=2000, reg_strength=0.1
Testing accuracy:

In [None]:
from itertools import product

def tune_hyperparameters_generic(X_train, y_train, X_test, y_test, 
                                  learning_rates, num_iterations_list, reg_strengths):
    """
    Generic hyperparameter tuning function for logistic regression.

    Parameters:
        X_train, y_train: Training data and labels
        X_test, y_test: Testing data and labels
        learning_rates: List of learning rates to try
        num_iterations_list: List of iteration counts to try
        reg_strengths: List of regularization strengths to try

    Returns:
        best_model: Trained model with the best parameters
        best_params: Tuple of (learning_rate, num_iterations, reg_strength) for the best model
    """
    print("Starting hyperparameter tuning...\n")
    
    best_model = None
    best_accuracy = 0
    best_params = None

    # Iterate through all combinations of hyperparameters
    for lr, num_iter, reg in product(learning_rates, num_iterations_list, reg_strengths):
        print(f"Training with learning_rate={lr}, num_iterations={num_iter}, reg_strength={reg}")
        model = CustomLogisticRegressionWithReg(learning_rate=lr, num_iterations=num_iter, reg_strength=reg)
        model.fit(X_train, y_train)
        accuracy = model.accuracy(X_test, y_test)
        print(f"Testing accuracy: {accuracy * 100:.2f}%\n")

        # Check for the best model
        if accuracy > best_accuracy:
            best_model = model
            best_accuracy = accuracy
            best_params = (lr, num_iter, reg)

    # Final summary of the best model
    print("\nEvaluating the best model with optimal hyperparameters..................")
    evaluate_model(best_model, X_test, y_test)

    print("\nSummary of Best Model:")
    print(f"Best Hyperparameters:\n  Learning Rate: {best_params[0]}\n  Number of Iterations: {best_params[1]}\n  Regularization Strength: {best_params[2]}")
    print(f"Best Testing Accuracy: {best_accuracy * 100:.2f}%")

    return best_model, best_params

# Example usage:
# Define hyperparameter ranges
learning_rates = [0.01, 0.02, 0.03, 0.05]
num_iterations_list = [2000, 3000, 5000]
reg_strengths = [0.005, 0.01, 0.02]


# learning_rates = [0.005, 0.01, 0.02, 0.03, 0.05]
# num_iterations_list = [2000, 3000, 5000, 7000]
# reg_strengths = [0.005, 0.01, 0.02, 0.03, 0.04]


# Preprocess the data
X_train, X_test, y_train, y_test = preprocess_sign_mnist(train_data, test_data)

# Call the generic tuning function
best_model, best_params = tune_hyperparameters_generic(
    X_train, y_train, X_test, y_test, 
    learning_rates, num_iterations_list, reg_strengths
)
