In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.manifold import TSNE
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler

iris = load_iris()
X = iris.data
y = iris.target


In [2]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [3]:
def apply_lda(X, y, n_components):
    max_components = min(X.shape[1], len(np.unique(y)) - 1)
    if n_components > max_components:
        raise ValueError(f"LDA cannot have n_components > {max_components}.")
    lda = LDA(n_components=n_components)
    return lda.fit_transform(X, y)


def apply_pca(X, n_components):
    pca = PCA(n_components=n_components)
    return pca.fit_transform(X)

def apply_svd(X, n_components):
    svd = TruncatedSVD(n_components=n_components)
    return svd.fit_transform(X)

def apply_tsne(X, n_components):
    tsne = TSNE(n_components=n_components, random_state=42)
    return tsne.fit_transform(X)



In [4]:
def perform_cross_validation(X_transformed, y):
    clf = RandomForestClassifier(random_state=42)
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    scores = cross_val_score(clf, X_transformed, y, cv=cv, scoring='accuracy')
    return np.mean(scores), np.std(scores)


In [5]:
# Case 1: 2 features
print("Case 1: Reduction to 2 features")
for method, func in [("LDA", apply_lda), ("PCA", apply_pca), ("SVD", apply_svd), ("TSNE", apply_tsne)]:
    if method == "LDA" and 2 > min(X.shape[1], len(np.unique(y)) - 1):
        print(f"{method}: Skipping (n_components > max allowed)")
        continue
    X_transformed = func(X_scaled, n_components=2) if method != "LDA" else func(X, y, n_components=2)
    mean_acc, std_acc = perform_cross_validation(X_transformed, y)
    print(f"{method}: Mean Accuracy = {mean_acc:.4f}, Std = {std_acc:.4f}")

# Case 2: 3 features
print("\nCase 2: Reduction to 3 features")
for method, func in [("LDA", apply_lda), ("PCA", apply_pca), ("SVD", apply_svd), ("TSNE", apply_tsne)]:
    if method == "LDA" and 3 > min(X.shape[1], len(np.unique(y)) - 1):
        print(f"{method}: Skipping (n_components > max allowed)")
        continue
    X_transformed = func(X_scaled, n_components=3) if method != "LDA" else func(X, y, n_components=3)
    mean_acc, std_acc = perform_cross_validation(X_transformed, y)
    print(f"{method}: Mean Accuracy = {mean_acc:.4f}, Std = {std_acc:.4f}")


Case 1: Reduction to 2 features
LDA: Mean Accuracy = 0.9733, Std = 0.0389
PCA: Mean Accuracy = 0.8800, Std = 0.0859
SVD: Mean Accuracy = 0.8800, Std = 0.0859
TSNE: Mean Accuracy = 0.9200, Std = 0.0806

Case 2: Reduction to 3 features
LDA: Skipping (n_components > max allowed)
PCA: Mean Accuracy = 0.9333, Std = 0.0699
SVD: Mean Accuracy = 0.9333, Std = 0.0699
TSNE: Mean Accuracy = 0.9333, Std = 0.0471
