In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
import cv2
import os
from PIL import Image
import urllib.request
import zipfile
import io

# 设置中文字体
import matplotlib
matplotlib.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'Arial Unicode MS', 'DejaVu Sans']
matplotlib.rcParams['axes.unicode_minus'] = False

# 初始化变量
faces = []
original_faces = []
mean_face = None
eigenfaces = None
weights = None
explained_variance_ratio_ = None
face_height = None
face_width = None

In [None]:
output_dir = './att_faces'

# 创建输出目录
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# 定义数据集URL
dataset_url = "https://www.cl.cam.ac.uk/research/dtg/attarchive/pub/data/att_faces.zip"

print("正在下载AT&T人脸数据库...")

#请完成数据集下载代码

In [None]:
# 加载图像
if dataset_dir:
    max_subjects = 40
    images_per_subject = 10
    resize = (112, 92)
    
    face_height, face_width = resize
    images = []
    original_images = []
    
    subject_count = 0
    
    # 遍历所有人脸文件夹
    for s in range(1, min(max_subjects + 1, 41)):  # AT&T有40个人
        subject_dir = os.path.join(dataset_dir, f"s{s}")
        
        if not os.path.exists(subject_dir):
            print(f"目录 {subject_dir} 不存在，跳过")
            continue
        
        image_count = 0
        
        # 遍历该人的所有图像
        for i in range(1, images_per_subject + 1):
            image_path = os.path.join(subject_dir, f"{i}.pgm")
            
            if not os.path.exists(image_path):
                print(f"图像 {image_path} 不存在，跳过")
                continue
            
            try:
                # 读取图像(PGM格式)
                
                if img is not None:
                    # 保存原始图像
                    original_images.append(img)
                    
                    # 如果需要调整大小
                    if resize != (112, 92):
                        img = cv2.resize(img, resize)
                    
                    # 展平为一维向量
                    
                    image_count += 1
            except Exception as e:
                print(f"加载图像 {image_path} 时出错: {e}")
        
        print(f"已加载主体 {s} 的 {image_count} 张图像")
        subject_count += 1
        
        if subject_count >= max_subjects:
            break
    
    if not images:
        raise ValueError("未找到有效的图像文件")
    
    # 转换为NumPy数组
    faces = np.array(images)
    original_faces = original_images
    
    print(f"总共加载了 {len(images)} 张图像")

In [None]:
# 计算PCA
if len(faces) > 0:
    n_components = 100
    
    # 1. 计算平均脸
    
    # 2. 中心化数据（减去平均脸）
    
    # 3. 使用SVD分解，计算特征值和特征向量
    
    # 4. 计算解释方差比
    total_var = np.sum(eigenvalues)
    explained_variance_ratio_ = eigenvalues / total_var

In [None]:
# 可视化平均脸
if mean_face is not None:
    plt.figure(figsize=(5, 5))
    plt.imshow(mean_face.reshape(face_height, face_width), cmap='gray')
    plt.title('Average Face')
    plt.axis('off')
    plt.tight_layout()
    plt.show()

In [None]:
# 可视化特征脸
if eigenfaces is not None:
    n = min(10, len(eigenfaces))
    rows = (n + 4) // 5  # 每行最多5个图像
    
    plt.figure(figsize=(15, 3 * rows))
    
    for i in range(n):
        plt.subplot(rows, 5, i + 1)
        
        # 对特征脸进行归一化以便可视化
        eigenface = eigenfaces[i].reshape(face_height, face_width)
        
        # 计算最小值和最大值以进行归一化
        min_val = np.min(eigenface)
        max_val = np.max(eigenface)
        
        # 归一化到[0, 1]范围
        if max_val > min_val:
            eigenface = (eigenface - min_val) / (max_val - min_val)
        
        plt.imshow(eigenface, cmap='gray')
        
        variance = explained_variance_ratio_[i] * 100
        plt.title(f'Eigenface {i+1}\n({variance:.1f}% var)')
        plt.axis('off')
    
    plt.tight_layout()
    plt.show()