In [9]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load the CSV files
train_data = pd.read_csv('./data/fashion-mnist_train.csv')
test_data = pd.read_csv('./data/fashion-mnist_test.csv')

# Separate features (X) and labels (y) for training and testing
X_train = train_data.iloc[:, 1:].values  
y_train = train_data.iloc[:, 0].values  

X_test = test_data.iloc[:, 1:].values    
y_test = test_data.iloc[:, 0].values  

# normalize/scale the pixel values to [0, 1]
X_train = X_train / 255.0
X_test = X_test / 255.0




# Normalizing the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize the classifiers
qda = QuadraticDiscriminantAnalysis()
rf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train Quadratic Discriminant Analysis (QDA)
qda.fit(X_train, y_train)

# Train Random Forest Classifier
rf.fit(X_train, y_train)

# Predict using QDA
y_pred_qda = qda.predict(X_test)

# Predict using Random Forest
y_pred_rf = rf.predict(X_test)

# Evaluating the models
qda_accuracy = accuracy_score(y_test, y_pred_qda)
rf_accuracy = accuracy_score(y_test, y_pred_rf)

qda_conf_matrix = confusion_matrix(y_test, y_pred_qda)
rf_conf_matrix = confusion_matrix(y_test, y_pred_rf)

qda_report = classification_report(y_test, y_pred_qda)
rf_report = classification_report(y_test, y_pred_rf)


print(f"{qda_accuracy=}")
print(f"{rf_accuracy=}")
print(f"{qda_conf_matrix=}")
print(f"{rf_conf_matrix=}")
print(f"{qda_report=}")
print(f"{rf_report=}")





qda_accuracy=0.5694
rf_accuracy=0.8852
qda_conf_matrix=array([[623,  73,  27, 178,   8,   0,  50,   0,  41,   0],
       [  0, 955,   0,  29,   2,   0,  12,   0,   2,   0],
       [  1,   1, 352, 299, 251,   0,  71,   0,  25,   0],
       [  1, 506,   1, 468,   2,   0,  18,   0,   4,   0],
       [  0,  14,  25, 444, 467,   0,  42,   0,   8,   0],
       [  0,   1,   0,   2,   0,  94,   9, 594,  32, 268],
       [111,  41,  43, 374, 226,   0, 156,   0,  49,   0],
       [  0,   0,   0,   0,   0,   3,   0, 959,   0,  38],
       [  1,  16,   6,  92,  18,   2,  15,   1, 848,   1],
       [  0,   0,   0,   1,   0,   6,   0, 212,   9, 772]])
rf_conf_matrix=array([[860,   0,  12,  31,   4,   1,  79,   0,  13,   0],
       [  2, 971,   5,  17,   1,   1,   3,   0,   0,   0],
       [  8,   1, 801,  13, 113,   0,  56,   0,   8,   0],
       [ 17,   6,   8, 937,  18,   0,  14,   0,   0,   0],
       [  1,   1,  64,  26, 863,   0,  42,   0,   3,   0],
       [  0,   0,   0,   0,   0, 947,   0,  

In [18]:
from sklearn.decomposition import PCA

# Applying PCA to reduce dimensionality
pca = PCA(n_components=20)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

# Train QDA on the reduced dataset
qda.fit(X_train_pca, y_train)
y_pred_qda = qda.predict(X_test_pca)
accuracy_qda = accuracy_score(y_test, y_pred_qda)
print(f"QDA Accuracy after PCA: {accuracy_qda}")

QDA Accuracy after PCA: 0.7244


In [22]:
from sklearn.mixture import GaussianMixture
from sklearn.metrics import accuracy_score

# Define number of components for GMM (adjust based on complexity of the data)
n_components = 5  # You can try different values (e.g., 2, 3, 5, etc.)

# Train a GMM for each class
gmm_models = {}
for label in np.unique(y_train):
    gmm = GaussianMixture(n_components=n_components, covariance_type='full', random_state=42)
    gmm.fit(X_train[y_train == label])
    gmm_models[label] = gmm

# Predict function for MDDA
def predict_mdda(X):
    probs = np.zeros((X.shape[0], len(gmm_models)))
    for label, gmm in gmm_models.items():
        probs[:, label] = gmm.score_samples(X)  # Log-likelihood of the data under each GMM
    return np.argmax(probs, axis=1)

# Predict using MDDA
y_pred_mdda = predict_mdda(X_test)

# Evaluate MDDA accuracy
accuracy_mdda = accuracy_score(y_test, y_pred_mdda)
print(f"MDDA Accuracy: {accuracy_mdda}")


MDDA Accuracy: 0.7013
