In [None]:
#importing libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import train_test_split, GridSearchCV #Helps find the best combination of model parameters
from sklearn.metrics import accuracy_score, balanced_accuracy_score, confusion_matrix, roc_auc_score, classification_report, roc_curve, auc
from sklearn.preprocessing import label_binarize #Converts labels into a format suitable for multiclass metrics like ROC-AUC

# loading dataset
digits = datasets.load_digits()
X, y = digits.data, digits.target
y_bin = label_binarize(y, classes=np.unique(y))#Converts the labels into a binary format (One-vs-Rest approach), which is necessary for the ROC-AUC score calculation

# splitting data
X_train, X_test, y_train, y_test, y_bin_train, y_bin_test = train_test_split(X, y, y_bin, test_size=0.2, random_state=42)

# hyperparameter tuning
param_grid = [{"solver": ["svd"]}, {"solver": ["lsqr", "eigen"], "shrinkage": ["auto"]}]
grid_search = GridSearchCV(LinearDiscriminantAnalysis(), param_grid, cv=5, scoring="accuracy", n_jobs=-1)
grid_search.fit(X_train, y_train) #trains the model with the best parameters found by GridSearchCV
#solver: algorithm used to solve the LDA. testing different options (svd, lsqr, eigen), shrinkage: this is used to apply regularization to some solvers like lsqr and eigen, GridSearchCV: tests different combinations of parameters and finds the best one using 5-fold cross-validation


lda = grid_search.best_estimator_
y_pred = lda.predict(X_test)
y_prob = lda.predict_proba(X_test)

# evaluation
print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred):.4f}")
print(f"ROC-AUC: {roc_auc_score(y_bin_test, y_prob, multi_class='ovr'):.4f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# confusion matrix
cm = confusion_matrix(y_test, y_pred)
plt.imshow(cm, cmap="Blues"), plt.colorbar()
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        plt.text(j, i, cm[i, j], ha="center", va="center", color="black")
plt.title("Confusion Matrix"), plt.xlabel("Predicted"), plt.ylabel("True"), plt.show()

# ROC curve
plt.figure(figsize=(8,6))
for i in range(y_bin.shape[1]):
    fpr, tpr, _ = roc_curve(y_bin_test[:, i], y_prob[:, i])
    plt.plot(fpr, tpr, label=f"Digit {i} (AUC = {auc(fpr, tpr):.2f})")
plt.plot([0, 1], [0, 1], "k--")
plt.xlabel("False Positive Rate"), plt.ylabel("True Positive Rate")
plt.title("Multiclass ROC Curve"), plt.legend(), plt.show()
