**姓名：** Zhai Qiuyu

**EID：** qiuyuzhai2

In [None]:
%matplotlib inline
import matplotlib_inline   # setup output image format
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')
import matplotlib.pyplot as plt
import matplotlib
from numpy import *
from sklearn import *
import os
import zipfile
import fnmatch
random.seed(100)
import skimage.io
import skimage.color
import skimage.transform
from scipy import ndimage

In [None]:
import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F
from torchvision import transforms
import sys
print("Python:", sys.version, "PyTorch:", torch.__version__)

## 1. 数据加载与预处理
接下来我们需要加载图像。请下载 `faces.zip`，并将其放在与本 ipynb 文件相同的目录下。**不要解压**。然后运行下面的单元来加载图像。

In [None]:
imgdata = {'train':[], 'test':[]}
classes = {'train':[], 'test':[]}

# 数据集较大，因此对子集进行采样（subsample）以便更快加载和训练
# 训练集下采样因子（每隔 train_subsample 个样本取 1 个；此处为 1 表示不下采样）
train_subsample = 1
train_counter = [0, 0]
# 测试集中每个类别的最大样本数
test_maxsample = 472
test_counter = [0, 0]

# 加载压缩包（请将 faces.zip 放在当前 ipynb 同目录下，无需解压）
filename = 'faces.zip'
zfile = zipfile.ZipFile(filename, 'r')

for name in zfile.namelist():
    # 筛选匹配的人脸图片路径：faces/<train|test>/<face|nonface>/<文件名>.png
    if fnmatch.fnmatch(name, "faces/*/*/*.png"):

        # 路径示例：faces/train/face/fname.png
        (fdir1, fname)  = os.path.split(name)     # 提取文件名
        (fdir2, fclass) = os.path.split(fdir1)    # 提取类别目录（face 或 nonface）
        (fdir3, fset)   = os.path.split(fdir2)    # 提取数据集分割（train 或 test）
        # 类别映射：1 = 人脸，0 = 非人脸
        myclass = int(fclass == "face")

        loadme = False
        if fset == 'train':
            # 训练集：按下采样因子选择是否加载
            if (train_counter[myclass] % train_subsample) == 0:
                loadme = True
            train_counter[myclass] += 1
        elif fset == 'test':
            # 测试集：控制每类的最大样本量
            if test_counter[myclass] < test_maxsample:
                loadme = True
            test_counter[myclass] += 1

        if (loadme):
            # 在内存中打开文件，并读取为灰度图像
            myfile = zfile.open(name)
            # img = matplotlib.image.imread(myfile)  # 另一种读取方式（保留参考）
            img = skimage.io.imread(myfile, as_gray=True)
            myfile.close()

            # 追加图像与对应标签到数据列表
            imgdata[fset].append(img)
            classes[fset].append(myclass)


zfile.close()
imgsize = img.shape

print(len(imgdata['train']))
print(len(imgdata['test']))
trainclass2start = sum(classes['train'])

接下来我们将把图像列表转换为图像张量，以便于后续处理。

In [None]:
# 将列表转换为 numpy 数组
trainY = asarray(classes['train'])
testY  = asarray(classes['test'])

# 将类别标签转换为二值指示（one-hot 编码）
trainYb_np = zeros((len(trainY), 2))
trainYb_np[arange(len(trainY)), trainY] = 1
testYb_np = zeros((len(testY), 2))
testYb_np[arange(len(testY)), testY] = 1
trainYb = F.one_hot(torch.tensor(trainY, dtype=torch.long), num_classes=2).float()
testYb = F.one_hot(torch.tensor(testY, dtype=torch.long), num_classes=2).float()

# 将 ndarray 列表转换为单个 ndarray（numpy 版本）
trainI_np = asarray(imgdata['train']).reshape((6977,19,19,1))
testI_np = asarray(imgdata['test']).reshape((944,19,19,1))
trainI = torch.tensor(trainI_np, dtype=torch.float32).permute(0, 3, 1, 2)  # 将 NHWC 转为 NCHW
testI = torch.tensor(testI_np, dtype=torch.float32).permute(0, 3, 1, 2)   # 将 NHWC 转为 NCHW

# 清理内存
del imgdata

# 打乱数据（原始顺序按类别排列）
random.seed(123)
inds1 = random.permutation(len(trainI_np)).tolist()
inds2 = random.permutation(len(testI_np)).tolist()
trainYb = trainYb[inds1]
testYb = testYb[inds2]
trainY = trainY[inds1]
testY = testY[inds2]
trainI = trainI[inds1]
testI = testI[inds2]

print(trainI.shape)
print(testI.shape)

每幅图像是一个 19x19x1 的像素数组。最后一个维度表示图像的通道数——本例为灰度图像，因此只有 1 个通道。运行下面的代码以显示一个示例：

In [None]:
print(img.shape)
plt.subplot(1,2,1)
plt.imshow(squeeze(trainI[1]), cmap='gray', interpolation='nearest')
plt.title("face sample")
plt.subplot(1,2,2)
plt.imshow(squeeze(trainI[2]), cmap='gray', interpolation='nearest')
plt.title("non-face sample")
plt.show()

运行下面的代码以显示更多图像！

In [None]:
# 显示图像

# function to make an image montage
def image_montage(X, imsize=None, maxw=10):
    """X can be a list of images, or a matrix of vectorized images.
      Specify imsize when X is a matrix."""
    tmp = []
    numimgs = len(X)

    # create a list of images (reshape if necessary)
    for i in range(0,numimgs):
        if imsize != None:
            tmp.append(X[i].reshape(imsize))
        else:
            tmp.append(squeeze(X[i]))

    # add blanks
    if (numimgs > maxw) and (mod(numimgs, maxw) > 0):
        leftover = maxw - mod(numimgs, maxw)
        meanimg = 0.5*(X[0].max()+X[0].min())
        for i in range(0,leftover):
            tmp.append(ones(tmp[0].shape)*meanimg)

    # make the montage
    tmp2 = []
    for i in range(0,len(tmp),maxw):
        tmp2.append( hstack(tmp[i:i+maxw]) )
    montimg = vstack(tmp2)
    return montimg

# show images in a plot
def show_imgs(W_list, nc=10, highlight_green=None, highlight_red=None, titles=None):
    # nc is the number of columns
    nfilter = len(W_list)
    nr = (nfilter - 1) // nc + 1
    for i in range(nr):
        for j in range(nc):
            idx = i * nc + j
            if idx == nfilter:
                break
            plt.subplot(nr, nc, idx + 1)
            cur_W = W_list[idx]
            plt.imshow(cur_W,cmap='gray', interpolation='nearest')
            if titles is not None:
                if isinstance(titles, str):
                    plt.title(titles.format(idx))
                else:
                    plt.title(titles[idx])

            if ((highlight_green is not None) and highlight_green[idx]) or \
               ((highlight_red is not None) and highlight_red[idx]):
                ax = plt.gca()
                if highlight_green[idx]:
                    mycol = '#00FF00'
                else:
                    mycol = 'r'
                for S in ['bottom', 'top', 'right', 'left']:
                    ax.spines[S].set_color(mycol)
                    ax.spines[S].set_lw(2.0)
                ax.xaxis.set_ticks_position('none')
                ax.yaxis.set_ticks_position('none')
                ax.set_xticks([])
                ax.set_yticks([])
            else:
                plt.gca().set_axis_off()

# show a few images
plt.figure(figsize=(9,4))
plt.imshow(image_montage(trainI[trainYb[:,0]==1][0:50]), cmap='gray', interpolation='nearest')
plt.show()

plt.figure(figsize=(9,4))
plt.imshow(image_montage(trainI[trainYb[:,1]==1][0:50]), cmap='gray', interpolation='nearest')
plt.show()

接下来我们将从训练数据中划分训练集与验证集。

In [None]:
# generate fixed validation set of 10% of the training set
vtrainI, validI, vtrainYb, validYb = \
  model_selection.train_test_split(trainI, trainYb,
  train_size=0.9, test_size=0.1, random_state=4488)

# make validation data
validsetI = (validI, validYb)

print(vtrainI.shape)
print(validI.shape)

In [None]:
# 画图
def plot_history(history):
    fig, ax1 = plt.subplots()

    ax1.plot(history.history['loss'], 'r', label="training loss ({:.6f})".format(history.history['loss'][-1]))
    ax1.plot(history.history['val_loss'], 'r--', label="validation loss ({:.6f})".format(history.history['val_loss'][-1]))
    ax1.grid(True)
    ax1.set_xlabel('iteration')
    ax1.legend(loc="best", fontsize=9)
    ax1.set_ylabel('loss', color='r')
    ax1.tick_params('y', colors='r')

    if 'accuracy' in history.history:
        ax2 = ax1.twinx()

        ax2.plot(history.history['accuracy'], 'b', label="training acc ({:.4f})".format(history.history['accuracy'][-1]))
        ax2.plot(history.history['val_accuracy'], 'b--', label="validation acc ({:.4f})".format(history.history['val_accuracy'][-1]))

        ax2.legend(loc="best", fontsize=9)
        ax2.set_ylabel('acc', color='b')
        ax2.tick_params('y', colors='b')

In [None]:
# 早停机制
class EarlyStopping:
    def __init__(self, monitor='val_accuracy', min_delta=0.0001, patience=5, verbose=1, mode='auto'):
        self.monitor = monitor                         # use validation accuracy for stopping
        self.min_delta = min_delta
        self.patience = patience
        self.verbose = verbose
        self.mode = mode
        self.best_score = None
        self.counter = 0
        self.early_stop = False

    def __call__(self, score):
        if self.best_score is None:
            self.best_score = score
        elif score < self.best_score + self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                if self.verbose:
                    print(f"early stopping")
                self.early_stop = True
        else:
            self.best_score = score
            self.counter = 0

 Convert history to match original format

In [None]:
# Convert history to match original format
class HistoryWrapper:
    def __init__(self, history_dict):
        self.history = history_dict

In [None]:
# 基础配置
batch_size = 50
epochs = 100

In [None]:
# 数据加载器
train_dataset = TensorDataset(vtrainI, vtrainYb)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_dataset = TensorDataset(validI, validYb)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

# 逻辑回归
- 现在我们尝试使用Pytorch训练一个简单的逻辑回归分类器。由于输入是图像，我们将首先使用“Flatten”层将输入图像转换为向量。

In [None]:
# initialize random seed
torch.manual_seed(4487)
random.seed(4487)

# build the network for logistic regression
nn_model = nn.Sequential(
    nn.Flatten(),                                     # vectorize the input image
    nn.Linear(19*19*1, 2),                           # classification layer (2 classes)
    nn.Softmax(dim=1)
)

# early stopping criteria
earlystop = EarlyStopping(monitor='val_accuracy', min_delta=0.0001, patience=5, verbose=1, mode='auto')
callbacks_list = [earlystop]

# compile and fit the network
criterion = nn.CrossEntropyLoss()                     # categorical_crossentropy equivalent
optimizer = optim.SGD(nn_model.parameters(), lr=0.05, momentum=0.9, nesterov=True)
                                                      # also calculate accuracy during training

history = {'loss': [], 'accuracy': [], 'val_loss': [], 'val_accuracy': []}

for epoch in range(epochs):
    # Training phase
    nn_model.train()
    train_loss = 0.0
    train_correct = 0
    train_total = 0

    for batch_x, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = nn_model(batch_x)
        loss = criterion(outputs, torch.argmax(batch_y, dim=1))
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        train_total += batch_y.size(0)
        train_correct += (predicted == torch.argmax(batch_y, dim=1)).sum().item()

    # Validation phase
    nn_model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for batch_x, batch_y in valid_loader:
            outputs = nn_model(batch_x)
            loss = criterion(outputs, torch.argmax(batch_y, dim=1))

            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_total += batch_y.size(0)
            val_correct += (predicted == torch.argmax(batch_y, dim=1)).sum().item()

    # Calculate metrics
    train_loss /= len(train_loader)
    val_loss /= len(valid_loader)
    train_acc = train_correct / train_total
    val_acc = val_correct / val_total

    # Store history
    history['loss'].append(train_loss)
    history['val_loss'].append(val_loss)
    history['accuracy'].append(train_acc)
    history['val_accuracy'].append(val_acc)

    # Early stopping check
    earlystop(val_acc)
    if earlystop.early_stop:
        break


history = HistoryWrapper(history)

plot_history(history)

# Prediction
nn_model.eval()
with torch.no_grad():
    pred_outputs = nn_model(testI)                    # verbose=False equivalent
    predY = torch.argmax(pred_outputs, dim=1).numpy()

acc = metrics.accuracy_score(testY, predY)
print("test accuracy:", acc)


## 2. 使用 CNN 进行检测

训练一个 CNN 来判断图像小块是否为人脸。使用 `vtrainI` 和 `vtrainYb` 作为训练集，`validsetI` 作为验证集。你可以尝试不同的网络结构，并调整学习率、迭代次数、早停、正则化等超参数以获得更好的结果。为了加快训练，建议使用较大的批量（例如 50）。记得加入“回调”（如早停）以便监控训练过程。

In [None]:
# 构建并训练一个用于 19x19 灰度图的 CNN（不使用数据增强）
# 复用上文 DataLoader: train_loader/valid_loader（基于 vtrainI/validI）

torch.manual_seed(4487)
random.seed(4487)

class CNN19(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, padding=1),  # (N,16,19,19)
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),                              # (N,16,9,9)
            nn.Conv2d(16, 32, kernel_size=3, padding=1), # (N,32,9,9)
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),                              # (N,32,4,4)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),                 # 32*4*4=512
            nn.Linear(32*4*4, 64),
            nn.ReLU(inplace=True),
            nn.Linear(64, 2)             # 输出 logits，配合 CrossEntropyLoss 使用
        )
    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

cnn_model = CNN19()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(cnn_model.parameters(), lr=1e-3)

# 早停（基于验证准确率）
earlystop = EarlyStopping(monitor='val_accuracy', min_delta=1e-4, patience=6, verbose=1, mode='auto')

# 记录训练曲线
history = {"loss": [], "val_loss": [], "accuracy": [], "val_accuracy": []}

best_val_acc = -1
best_state = None

# 训练轮次（可根据需要调整）
epochs_cnn = 50
for epoch in range(epochs_cnn):
    cnn_model.train()
    train_loss, train_correct, train_total = 0.0, 0, 0

    for bx, byb in train_loader:
        optimizer.zero_grad()
        logits = cnn_model(bx)
        target = torch.argmax(byb, dim=1)  # one-hot -> 索引
        loss = criterion(logits, target)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        pred = torch.argmax(logits.detach(), dim=1)
        train_total += byb.size(0)
        train_correct += (pred == target).sum().item()

    # 验证
    cnn_model.eval()
    val_loss, val_correct, val_total = 0.0, 0, 0
    with torch.no_grad():
        for bx, byb in valid_loader:
            logits = cnn_model(bx)
            target = torch.argmax(byb, dim=1)
            loss = criterion(logits, target)
            val_loss += loss.item()
            pred = torch.argmax(logits, dim=1)
            val_total += byb.size(0)
            val_correct += (pred == target).sum().item()

    avg_train_loss = train_loss / max(1, len(train_loader))
    avg_val_loss = val_loss / max(1, len(valid_loader))
    train_acc = train_correct / max(1, train_total)
    val_acc = val_correct / max(1, val_total)

    history['loss'].append(avg_train_loss)
    history['val_loss'].append(avg_val_loss)
    history['accuracy'].append(train_acc)
    history['val_accuracy'].append(val_acc)

    # 保留最佳模型参数
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_state = {k: v.cpu().clone() for k, v in cnn_model.state_dict().items()}

    print(f"Epoch {epoch+1}/{epochs_cnn} - loss: {avg_train_loss:.4f} - acc: {train_acc:.4f} - val_loss: {avg_val_loss:.4f} - val_acc: {val_acc:.4f}")

    # 早停
    earlystop(val_acc)
    if earlystop.early_stop:
        break

# 恢复最佳权重
if best_state is not None:
    cnn_model.load_state_dict(best_state)

# 兼容绘图
history = HistoryWrapper(history)

In [None]:
# 可视化 CNN 训练过程
plot_history(history)

In [None]:
# 在测试集上评估 CNN
cnn_model.eval()
with torch.no_grad():
    logits = cnn_model(testI)
    predY_cnn = torch.argmax(logits, dim=1).numpy()

acc_cnn = metrics.accuracy_score(testY, predY_cnn)
print("test accuracy (CNN):", acc_cnn)

# 可选：混淆矩阵
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(testY, predY_cnn)
print("Confusion matrix:\n", cm)

In [None]:
# 随机展示若干预测样例
import numpy as np
nshow = 6
idx = np.random.choice(len(testI), size=nshow, replace=False)

plt.figure(figsize=(9,3))
for i, k in enumerate(idx):
    plt.subplot(1, nshow, i+1)
    plt.imshow(squeeze(testI[k]), cmap='gray', interpolation='nearest')
    plt.title(f"gt:{testY[k]} pred:{predY_cnn[k]}")
    plt.axis('off')
plt.tight_layout()
plt.show()

_How does the MLP compare to the linear and non-linear classifiers that you tried in Tutorial 4?_
- **INSERT YOUR ANSWER HERE**

## 3. Data Augmentation

Now use data augmentation (introduced in the last tutorial) to try to improve the accuracy.

We can also add per-pixel noise or transformations. We define a few functions for adding per-pixel noise.  The following functions will add Gaussian pixel noise, add corruption noise (setting some input pixels to 0), scale and shift pixel values (changing contrast and brightness).

In [None]:
def add_gauss_noise(X, sigma2=0.05):
    # add Gaussian noise with zero mean, and variance sigma2
    X = X.float()
    noise = torch.normal(0, sigma2, X.shape, dtype=torch.float32, device=X.device)
    return (X + noise).float()

def add_corrupt_noise(X, p=0.1):
    # apply pixel corruption (zero out value) with probability p
    X = X.float()
    mask = torch.rand(X.shape, dtype=torch.float32, device=X.device) > p
    return (X * mask.float()).float()

def add_scale_shift(X, sigma2=0.1, alpha2=0.2):
    # randomly scale and shift the pixel values (same for each image)
    # Xnew = a X + b
    # a is sampled from a Gaussian with mean 1, and variance sigma2
    # b is sampled from a Gaussian with mean 0, and variance alpha2
    X = X.float()

    if X.ndim == 3:
        dshape = (X.shape[0], 1, 1)
    elif X.ndim == 4:
        dshape = (X.shape[0], 1, 1, 1)
    else:
        dshape = (1,)

    a = torch.normal(1, sigma2, dshape, dtype=torch.float32, device=X.device)
    b = torch.normal(0, alpha2, dshape, dtype=torch.float32, device=X.device)

    result = torch.clamp(a * X + b, 0.0, 1.0)
    return result.float()

Next, we define a function for adding per-pixel noise (in this case just Gaussian noise). The noise is included using the `transforms.Compose`.

In [None]:
# build the noise function
def addNoise(X):
    return add_gauss_noise(X, 0.04)

# build the data augmenter
transform = transforms.Compose([
    transforms.ToPILImage(),

    # Random rotation within ±10 degrees
    transforms.RandomRotation(10),

    # Random horizontal flipping
    transforms.RandomHorizontalFlip(),

    # Random affine transformation for width/height shift and shear
    transforms.RandomAffine(
        degrees=0,  # No additional rotation (already handled by RandomRotation)
        translate=(0.05, 0.05),  # Width and height shift (5% of image size)
        shear=5  # Shearing within ±5 degrees
    ),

    # Random zooming (simulated using RandomResizedCrop)
    transforms.RandomResizedCrop(size=(19, 19), scale=(0.95, 1.05)),  # Adjust size if needed

    # Convert PIL image to PyTorch tensor
    transforms.ToTensor(),

    # Add custom noise
    transforms.Lambda(addNoise)
])

Next we can show some examples of augmented images. Run the code below to see different random augmentations.

In [None]:
img = trainI[4]
imgs = [img[0].detach().numpy()]

cnt = 0
while cnt < 5:
    # Apply augmentation - img already has correct shape (1, 19, 19)
    augmented = transform(img)

    # Convert to numpy for display (remove channel dimension)
    augmented_np = augmented[0].detach().numpy()

    imgs.append(augmented_np)
    cnt += 1

titles = ['original image', 'augmented', 'augmented', 'augmented', 'augmented', 'augmented']
plt.figure(figsize=(8,6))
show_imgs(imgs, nc=3, titles=titles)


The augmented images look similar to the original image, but contain small differences that the network can use to learn more about the class.

Now let's try training logistic regression with data augmentation.  We also disable early stopping so that the training sees more augmented data.

Customize dataset for augmentation and set some parameters

In [None]:
class AugmentedTensorDataset(TensorDataset):
    def __init__(self, *tensors, transform=None):
        super().__init__(*tensors)
        self.transform = transform

    def __getitem__(self, index):
        img = self.tensors[0][index]
        label = self.tensors[1][index]

        if self.transform:
            img = self.transform(img)

        return img, label

batch_size = 50
epochs = 50
steps_per_epoch = len(vtrainI)/batch_size

# Create data loaders
train_dataset = AugmentedTensorDataset(trainI, trainYb, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_dataset = TensorDataset(validI, validYb)       # specify the validation set
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)


In [None]:
# 使用逻辑回归 + 数据增强进行基线训练（保留原实现）
# 提示：对于 CrossEntropyLoss，一般不需要在模型中显式加入 Softmax，
# 但此处保留原写法以不改变逻辑；你也可以尝试去掉 Softmax 观察效果差异。

torch.manual_seed(4487)
random.seed(4487)

# 构建逻辑回归网络
nn_model = nn.Sequential(
    nn.Flatten(),                                     # 将输入图像展平为向量
    nn.Linear(19*19*1, 2),                           # 分类层（2 类）
    nn.Softmax(dim=1)
)

# 编译并训练
criterion = nn.CrossEntropyLoss()                     # 等价于分类交叉熵
optimizer = optim.SGD(nn_model.parameters(), lr=0.05, momentum=0.9, nesterov=True)

history = {'loss': [], 'accuracy': [], 'val_loss': [], 'val_accuracy': []}

epochs = 50

for epoch in range(epochs):
    # 训练阶段
    nn_model.train()
    train_loss = 0.0
    train_correct = 0
    train_total = 0

    for batch_x, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = nn_model(batch_x)
        loss = criterion(outputs, torch.argmax(batch_y, dim=1))
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        train_total += batch_y.size(0)
        train_correct += (predicted == torch.argmax(batch_y, dim=1)).sum().item()

    # 验证阶段
    nn_model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for batch_x, batch_y in valid_loader:
            outputs = nn_model(batch_x)
            loss = criterion(outputs, torch.argmax(batch_y, dim=1))

            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_total += batch_y.size(0)
            val_correct += (predicted == torch.argmax(batch_y, dim=1)).sum().item()

    # 计算指标
    avg_train_loss = train_loss/len(train_loader)
    avg_val_loss = val_loss / len(valid_loader)
    train_acc = train_correct / train_total
    val_acc = val_correct / val_total

    # 记录历史
    history['loss'].append(avg_train_loss)
    history['val_loss'].append(avg_val_loss)
    history['accuracy'].append(train_acc)
    history['val_accuracy'].append(val_acc)

    print(f'Epoch {epoch+1}/{epochs} - '
          f'loss: {avg_train_loss:.4f} - '
          f'accuracy: {train_acc:.4f} - '
          f'val_loss: {avg_val_loss:.4f} - '
          f'val_accuracy: {val_acc:.4f}')

history = HistoryWrapper(history)

In [None]:
# 可视化曲线 + 在测试集上评估（逻辑回归 + 数据增强）
plot_history(history)

# 预测
nn_model.eval()
with torch.no_grad():
    pred_outputs = nn_model(testI)
    predY = torch.argmax(pred_outputs, dim=1).numpy()

acc = metrics.accuracy_score(testY, predY)
print("test accuracy:", acc)

使用数据增强后，测试集准确率从 0.60 提升到了 0.70！（你的结果可能会有所不同）

现在请在上一节中效果最好的 CNN 基础上，结合数据增强进行训练。
尝试不同的逐像素噪声强度，以及不同的 transforms 配置和它们的组合，看看是否能进一步提升准确率。

In [None]:
# 训练：使用上一节里表现最好的 CNN，并结合数据增强
# 注意：这里我们覆盖 nn_model，使得后续推理（例如滑窗检测）使用该 CNN。

# 固定随机种子，便于复现实验
torch.manual_seed(4487)
random.seed(4487)

# 定义一个适用于 19x19 灰度图的小型 CNN
class BestCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            # 输入: (N, 1, 19, 19)
            nn.Conv2d(1, 16, kernel_size=3, padding=1),  # -> (N,16,19,19)
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),                              # -> (N,16,9,9)

            nn.Conv2d(16, 32, kernel_size=3, padding=1), # -> (N,32,9,9)
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),                              # -> (N,32,4,4)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),                 # -> (N, 32*4*4=512)
            nn.Linear(32*4*4, 64),
            nn.ReLU(inplace=True),
            nn.Linear(64, 2)             # 输出 logits（不要加 Softmax，CrossEntropyLoss 内部已包含）
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

# 使用数据增强的数据加载器（在前文 AugmentedTensorDataset 里已构建 train_loader/valid_loader）
# train_loader: 使用 transform=transform 的增强
# valid_loader: 不做增强，仅评估

nn_model = BestCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(nn_model.parameters(), lr=1e-3)

# 早停（复用之前实现的 EarlyStopping）
earlystop = EarlyStopping(monitor='val_accuracy', min_delta=1e-4, patience=6, verbose=1, mode='auto')

history = {"loss": [], "accuracy": [], "val_loss": [], "val_accuracy": []}

epochs = 50
for epoch in range(epochs):
    # 训练
    nn_model.train()
    train_loss, train_correct, train_total = 0.0, 0, 0
    for batch_x, batch_yb in train_loader:
        optimizer.zero_grad()
        logits = nn_model(batch_x)
        target = torch.argmax(batch_yb, dim=1)  # one-hot -> 类别索引
        loss = criterion(logits, target)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        pred = torch.argmax(logits.detach(), dim=1)
        train_total += batch_yb.size(0)
        train_correct += (pred == target).sum().item()

    # 验证
    nn_model.eval()
    val_loss, val_correct, val_total = 0.0, 0, 0
    with torch.no_grad():
        for batch_x, batch_yb in valid_loader:
            logits = nn_model(batch_x)
            target = torch.argmax(batch_yb, dim=1)
            loss = criterion(logits, target)

            val_loss += loss.item()
            pred = torch.argmax(logits, dim=1)
            val_total += batch_yb.size(0)
            val_correct += (pred == target).sum().item()

    # 记录指标
    avg_train_loss = train_loss / max(1, len(train_loader))
    avg_val_loss = val_loss / max(1, len(valid_loader))
    train_acc = train_correct / max(1, train_total)
    val_acc = val_correct / max(1, val_total)

    history['loss'].append(avg_train_loss)
    history['val_loss'].append(avg_val_loss)
    history['accuracy'].append(train_acc)
    history['val_accuracy'].append(val_acc)

    print(f"Epoch {epoch+1}/{epochs} - loss: {avg_train_loss:.4f} - accuracy: {train_acc:.4f} - val_loss: {avg_val_loss:.4f} - val_accuracy: {val_acc:.4f}")

    # 早停检查
    earlystop(val_acc)
    if earlystop.early_stop:
        break

# 兼容上面的 plot_history(history)
history = HistoryWrapper(history)

In [None]:
# 可视化训练过程（数据增强 + 最佳 CNN）
plot_history(history)

In [None]:
# 在测试集上评估（使用数据增强训练得到的最佳 CNN）
nn_model.eval()
with torch.no_grad():
    logits_test = nn_model(testI)
    predY = torch.argmax(logits_test, dim=1).numpy()

acc = metrics.accuracy_score(testY, predY)
print("test accuracy (CNN + augmentation):", acc)

In [None]:
# 额外：查看混淆矩阵与分类报告
from sklearn.metrics import confusion_matrix, classification_report

cm = confusion_matrix(testY, predY)
print("Confusion matrix:\n", cm)
print("\nClassification report:\n", classification_report(testY, predY, digits=4))

# 测试图像
现在我们来在真实图像上尝试人脸检测。下载 "nasa-small.png" 并放在与你的 ipynb 文件相同的目录下。下面的代码会加载图像、裁剪滑动窗口补丁并提取特征。（这一步可能需要几分钟）

In [None]:
fname = "nasa-small.png"

In [None]:
# load image
testimg = skimage.io.imread(fname, as_gray=True)
print(testimg.shape)
plt.imshow(testimg, cmap='gray')

In [None]:
# step size for the sliding window
step = 4

# extract window patches with step size of 4
patches = skimage.util.view_as_windows(testimg, (19,19), step=step)
psize = patches.shape
# collapse the first 2 dimensions
patches2 = patches.reshape((psize[0]*psize[1], psize[2], psize[3], 1))
print(patches2.shape)

# histogram equalize patches (improves contrast)
#newI = empty(patches2.shape)
#for i in range(patches2.shape[0]):
#    newI[i,:,:] = skimage.exposure.equalize_hist(patches2[i,:,:])
newI = patches2


现在使用你的分类器进行预测。已经提取好的小图块保存在 `newI` 中。

In [None]:
# 使用你上面训练好的分类器（nn_model）对 newI 的窗口进行预测
# 注意：nn_model 现在是“数据增强 + 最佳 CNN”训练得到的模型

patches_tensor = torch.FloatTensor(patches2.transpose(0, 3, 1, 2))  # (N, H, W, C)->(N, C, H, W)
with torch.no_grad():
    outputs = nn_model(patches_tensor)
    prednewY = torch.argmax(outputs, dim=1).numpy()

接下来我们将在图像上可视化结果。使用下方代码进行显示。`prednewY` 是预测结果向量。

In [None]:
# reshape prediction to an image
imgY = prednewY.reshape(psize[0], psize[1])

# zoom back to image size
imgY2 = ndimage.interpolation.zoom(imgY, step, output=None, order=0)
# pad the top and left with half the window size
imgY2 = vstack((zeros((9, imgY2.shape[1])), imgY2))
imgY2 = hstack((zeros((imgY2.shape[0],9)), imgY2))
# pad right and bottom to same size as image
if (imgY2.shape[0] != testimg.shape[0]):
    imgY2 = vstack((imgY2, zeros((testimg.shape[0]-imgY2.shape[0], imgY2.shape[1]))))
if (imgY2.shape[1] != testimg.shape[1]):
    imgY2 = hstack((imgY2, zeros((imgY2.shape[0],testimg.shape[1]-imgY2.shape[1]))))

# show detections with image
#detimg = dstack(((0.5*imgY2+0.5)*testimg, 0.5*testimg, 0.5*testimg))
nimgY2 = 1-imgY2
tmp = nimgY2*testimg
detimg = dstack((imgY2+tmp, tmp, tmp))

# show it!
plt.figure(figsize=(9,9))
plt.subplot(2,1,1)
plt.imshow(imgY2, interpolation='nearest')
plt.title('detection map')
plt.subplot(2,1,2)
plt.imshow(detimg)
plt.title('image')
plt.axis('image')