In [2]:
import numpy as np
from sklearn.datasets import fetch_lfw_people
from sklearn.decomposition import PCA, KernelPCA
from sklearn.manifold import MDS, LocallyLinearEmbedding, Isomap
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score


In [3]:
# 加载 LFW 数据集
lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)

# 提取面部图像的特征向量和标签
X = lfw_people.data
y = lfw_people.target
target_names = lfw_people.target_names


In [6]:
def reduce_dimensions(method, data):
    if method == 'PCA':
        model = PCA(n_components=100)
    elif method == 'KPCA':
        model = KernelPCA(n_components=100, kernel='rbf')
    elif method == 'MDS':
        model = MDS(n_components=2)
    elif method == 'LLE':
        model = LocallyLinearEmbedding(n_components=2)
    elif method == 'ISOMAP':
        model = Isomap(n_components=2)
    else:
        raise ValueError('Invalid dimensionality reduction method')
    return model.fit_transform(data)

def feature_selection(data, labels, num_features=50):
    # 确保不选择超过现有特征数的特征
    num_features = min(num_features, data.shape[1])
    selector = SelectKBest(f_classif, k=num_features)
    return selector.fit_transform(data, labels)


def classify_and_evaluate(classifier, train_data, train_labels, test_data, test_labels):
    classifier.fit(train_data, train_labels)
    predictions = classifier.predict(test_data)
    return classification_report(test_labels, predictions, target_names=target_names)


In [7]:
methods = ['PCA', 'KPCA', 'MDS', 'LLE', 'ISOMAP']
classifiers = [SVC(), GaussianNB(), DecisionTreeClassifier()]
results = {}

for method in methods:
    reduced_data = reduce_dimensions(method, X)
    selected_data = feature_selection(reduced_data, y)
    
    X_train, X_test, y_train, y_test = train_test_split(selected_data, y, test_size=0.2)
    
    for classifier in classifiers:
        key = f'{method}_{type(classifier).__name__}'
        report = classify_and_evaluate(classifier, X_train, y_train, X_test, y_test)
        results[key] = report

# 打印结果
for key, report in results.items():
    print(f'{key}:\n{report}\n')


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


PCA_SVC:
                   precision    recall  f1-score   support

     Ariel Sharon       0.86      0.40      0.55        15
     Colin Powell       0.67      0.90      0.77        39
  Donald Rumsfeld       0.89      0.65      0.76        26
    George W Bush       0.84      0.97      0.90       112
Gerhard Schroeder       0.71      0.50      0.59        24
      Hugo Chavez       0.89      0.73      0.80        11
       Tony Blair       0.83      0.65      0.73        31

         accuracy                           0.80       258
        macro avg       0.81      0.69      0.73       258
     weighted avg       0.81      0.80      0.79       258


PCA_GaussianNB:
                   precision    recall  f1-score   support

     Ariel Sharon       0.77      0.67      0.71        15
     Colin Powell       0.70      0.85      0.77        39
  Donald Rumsfeld       0.80      0.62      0.70        26
    George W Bush       0.78      0.91      0.84       112
Gerhard Schroeder       0.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
