In [1]:
import numpy as np
import pandas as pd

In [2]:
# Load the training dataset
train_csv_path = './archive/sign_mnist_train.csv'
train_data = pd.read_csv(train_csv_path)

# Load the testing dataset
test_csv_path = './archive/sign_mnist_test.csv'
test_data = pd.read_csv(test_csv_path)


In [3]:
# Separate features and labels for training
X_train = train_data.iloc[:, 1:].values  # Pixel values
Y_train = train_data.iloc[:, 0].values  # Labels


# Separate features and labels for testing
X_test = test_data.iloc[:, 1:].values
Y_test = test_data.iloc[:, 0].values

In [4]:
# Define a custom QDA class with options for covariance type
class CustomQDA:
    def __init__(self, covariance_type="general"):
        assert covariance_type in ["general", "independent", "isotropic"], "Invalid covariance type"
        self.covariance_type = covariance_type
        self.means = {}
        self.covariances = {}
        self.priors = {}

    def fit(self, X, y):
        classes = np.unique(y)
        for c in classes:
            X_c = X[y == c]
            self.means[c] = np.mean(X_c, axis=0)
            self.priors[c] = X_c.shape[0] / X.shape[0]

            # Covariance matrix types
            if self.covariance_type == "general":
                self.covariances[c] = np.cov(X_c, rowvar=False)
            elif self.covariance_type == "independent":
                self.covariances[c] = np.diag(np.var(X_c, axis=0))  # Diagonal covariance matrix
            elif self.covariance_type == "isotropic":
                variance = np.mean(np.var(X_c, axis=0))  # Average variance
                self.covariances[c] = variance * np.identity(X_c.shape[1])  # Scalar variance matrix

    def predict(self, X):
        return np.array([self._classify(x) for x in X])

    def _classify(self, x):
        scores = {}
        for c in self.means:
            mean = self.means[c]
            covariance = self.covariances[c]
            prior = self.priors[c]
            diff = x - mean
            inv_cov = np.linalg.inv(covariance)
            log_det_cov = np.log(np.linalg.det(covariance))
            score = -0.5 * (diff.T @ inv_cov @ diff) - 0.5 * log_det_cov + np.log(prior)
            scores[c] = score
        return max(scores, key=scores.get)

In [None]:
from sklearn.preprocessing import StandardScaler


# Normalize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train CustomQDA model
qda = CustomQDA(covariance_type="general")
qda.fit(X_train, Y_train)

# Predict test data
y_pred = qda.predict(X_test)

# Evaluate accuracy
accuracy = np.mean(y_pred == y_test)
print(f"Test Accuracy: {accuracy:.2f}")


  log_det_cov = np.log(np.linalg.det(covariance))
