In [6]:
import torch
import torchvision
from sklearn.decomposition import PCA, KernelPCA
from sklearn.manifold import MDS, LocallyLinearEmbedding, Isomap
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np


In [7]:
# 加载MNIST数据集
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])
mnist_data = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)

# 将数据转换为NumPy数组，并进行采样
data = mnist_data.data.numpy().reshape(len(mnist_data), -1)
labels = mnist_data.targets.numpy()

# 数据采样
sample_size = 5000  # 减少样本数量
indices = np.random.choice(len(data), sample_size, replace=False)
sampled_data = data[indices]
sampled_labels = labels[indices]


In [3]:
def reduce_dimensions(method, data, n_components=20):  # 减少目标维数
    if method == 'PCA':
        model = PCA(n_components=n_components)
    elif method == 'KPCA':
        model = KernelPCA(n_components=n_components, kernel='rbf')
    elif method == 'MDS':
        model = MDS(n_components=n_components)
    elif method == 'LLE':
        model = LocallyLinearEmbedding(n_components=n_components)
    elif method == 'ISOMAP':
        model = Isomap(n_components=n_components)
    else:
        raise ValueError('Invalid dimensionality reduction method')
    return model.fit_transform(data)

def classify_and_evaluate(classifier, train_data, train_labels, test_data, test_labels):
    classifier.fit(train_data, train_labels)
    predictions = classifier.predict(test_data)
    return accuracy_score(test_labels, predictions)


In [8]:
methods = ['PCA', 'KPCA', 'MDS', 'LLE', 'ISOMAP']
classifiers = [SVC(), DecisionTreeClassifier()]
results = {}

for method in methods:
    # 降维
    reduced_data = reduce_dimensions(method, sampled_data)
    # 划分数据集
    X_train, X_test, y_train, y_test = train_test_split(reduced_data, sampled_labels, test_size=0.2)
    
    for classifier in classifiers:
        key = f'{method}_{type(classifier).__name__}'
        # 分类和评估
        accuracy = classify_and_evaluate(classifier, X_train, y_train, X_test, y_test)
        results[key] = accuracy

# 打印结果
for key, value in results.items():
    print(f'{key}: {value}')




PCA_SVC: 0.962
PCA_DecisionTreeClassifier: 0.74
KPCA_SVC: 0.118
KPCA_DecisionTreeClassifier: 0.092
MDS_SVC: 0.497
MDS_DecisionTreeClassifier: 0.357
LLE_SVC: 0.736
LLE_DecisionTreeClassifier: 0.736
ISOMAP_SVC: 0.546
ISOMAP_DecisionTreeClassifier: 0.439
