In [30]:
from sklearn.datasets import fetch_openml
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import classification_report,accuracy_score
from sklearn.neighbors import KNeighborsClassifier
import numpy as np

In [6]:
# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [14]:
# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Without applying LDA
knn_classifier = KNeighborsClassifier(n_neighbors=1)
knn_classifier.fit(X_train_scaled, y_train)
y_pred_without_lda = knn_classifier.predict(X_test_scaled)

In [17]:
# Apply LDA
lda = LinearDiscriminantAnalysis(n_components=2)
X_train_lda = lda.fit_transform(X_train_scaled, y_train)
X_test_lda = lda.transform(X_test_scaled)

# Train and test the model after applying LDA
knn_classifier_lda = KNeighborsClassifier(n_neighbors=1)
knn_classifier_lda.fit(X_train_lda, y_train)
y_pred_with_lda = knn_classifier_lda.predict(X_test_lda)


In [32]:
# Classification report without LDA
print("Classification Report without LDA:")
print(classification_report(y_test, y_pred_without_lda))
accuracy_without_lda = accuracy_score(y_test, y_pred_without_lda)



# Classification report with LDA
print("\nClassification Report with LDA:")
print(classification_report(y_test, y_pred_with_lda))
accuracy_with_lda = accuracy_score(y_test, y_pred_with_lda)


print("\nAccuracy without LDA:", accuracy_without_lda)
print("Accuracy with LDA:", accuracy_with_lda)



Classification Report without LDA:
              precision    recall  f1-score   support

           0       0.95      0.98      0.97      2267
           1       0.94      0.99      0.97      2603
           2       0.94      0.93      0.94      2350
           3       0.92      0.94      0.93      2383
           4       0.95      0.93      0.94      2144
           5       0.94      0.92      0.93      2107
           6       0.97      0.97      0.97      2294
           7       0.94      0.92      0.93      2455
           8       0.97      0.89      0.93      2196
           9       0.91      0.92      0.92      2301

    accuracy                           0.94     23100
   macro avg       0.94      0.94      0.94     23100
weighted avg       0.94      0.94      0.94     23100


Classification Report with LDA:
              precision    recall  f1-score   support

           0       0.94      0.97      0.95      2267
           1       0.92      0.97      0.94      2603
          

Without LDA:

The model achieved an accuracy of 94.28%. Precision, recall, and F1-score for each class are relatively high, indicating good performance across all classes.

With LDA:

After applying Linear Discriminant Analysis, the model's accuracy decreased slightly to 90.68%. This indicates that LDA may have reduced the model's ability to distinguish between classes compared to the original dataset. Additionally, there is a decrease in precision, recall, and F1-score for some classes, indicating a slight decrease in performance.



High Dimnesional Dataset

In [20]:
mnist = fetch_openml('mnist_784', version=1)
X = mnist.data.astype('float32')
y = mnist.target.astype('int64')

  warn(


In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [22]:
# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [23]:
# Without applying LDA
knn_classifier = KNeighborsClassifier(n_neighbors=3)
knn_classifier.fit(X_train_scaled, y_train)
y_pred_without_lda = knn_classifier.predict(X_test_scaled)

# Apply LDA
lda = LinearDiscriminantAnalysis(n_components=9)  # Reduce to 9 components for MNIST
X_train_lda = lda.fit_transform(X_train_scaled, y_train)
X_test_lda = lda.transform(X_test_scaled)

In [24]:
# Train and test the model after applying LDA
knn_classifier_lda = KNeighborsClassifier(n_neighbors=3)
knn_classifier_lda.fit(X_train_lda, y_train)
y_pred_with_lda = knn_classifier_lda.predict(X_test_lda)

In [27]:
# Classification report without LDA
accuracy_without_lda = accuracy_score(y_test, y_pred_without_lda)
variance_without_lda = X_train.var()

print("Without LDA:")
print("Variance:", variance_without_lda)
print("Accuracy:", accuracy_without_lda)
print("Classification Report:")
print(classification_report(y_test, y_pred_without_lda))


# Classification report with LDA
accuracy_with_lda = accuracy_score(y_test, y_pred_with_lda)
variance_with_lda = X_train_lda.var()

print("\nWith LDA:")
print("Variance:", variance_with_lda)
print("Accuracy:", accuracy_with_lda)
print("Classification Report:")
print(classification_report(y_test, y_pred_with_lda))

Without LDA:
Variance: pixel1      0.000000
pixel2      0.000000
pixel3      0.000000
pixel4      0.000000
pixel5      0.000000
              ...   
pixel780    0.081962
pixel781    0.000000
pixel782    0.000000
pixel783    0.000000
pixel784    0.000000
Length: 784, dtype: float32
Accuracy: 0.9428138528138528
Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.98      0.97      2267
           1       0.94      0.99      0.97      2603
           2       0.94      0.93      0.94      2350
           3       0.92      0.94      0.93      2383
           4       0.95      0.93      0.94      2144
           5       0.94      0.92      0.93      2107
           6       0.97      0.97      0.97      2294
           7       0.94      0.92      0.93      2455
           8       0.97      0.89      0.93      2196
           9       0.91      0.92      0.92      2301

    accuracy                           0.94     23100
   macro avg    

With LDA

Variance:

The variance after applying LDA is shown as a single value (2.8161333 in this case), which represents the variance of the transformed data after dimensionality reduction. This reduced variance indicates that the data has been compressed into fewer dimensions while preserving the most important information.

Accuracy:

With LDA: 

The accuracy of the classifier with LDA is 90.68%, which is slightly lower compared to the accuracy without LDA. This suggests that although LDA reduces the dimensionality of the data, it may discard some information that was helpful for classification.



Without LDA

Variance:

 The variance values for individual pixels are shown, where most of the pixels have a variance of 0, indicating low variance within each pixel across the dataset.


Accuracy:

Without LDA: 
The accuracy of the classifier without LDA is 94.28%, indicating that the model correctly classifies approximately 94.28% of the test data samples.

Observations:

LDA effectively reduces the dimensionality of the feature space while preserving the discriminatory information needed for classification.
The improvement in performance (precision, recall, and F1-score) with LDA is more prominent in the smaller dataset compared to the larger MNIST dataset.
The results indicate that LDA can enhance the performance of machine learning models, especially in scenarios with high-dimensional datasets like the MNIST dataset.