In [None]:
import gzip
import struct

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from opacus import PrivacyEngine
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms


# 自定义 MNIST 数据集加载器，支持读取 .gz 文件（Written by KIMI）
class MNISTDataset(Dataset):
    def __init__(self, images_path, labels_path, transform=None):
        with gzip.open(labels_path, 'rb') as lbpath:
            magic, n = struct.unpack('>II', lbpath.read(8))
            self.labels = np.frombuffer(lbpath.read(), dtype=np.uint8)

        with gzip.open(images_path, 'rb') as imgpath:
            magic, num, rows, cols = struct.unpack('>IIII', imgpath.read(16))
            self.images = np.frombuffer(imgpath.read(), dtype=np.uint8).reshape(len(self.labels), 28, 28)

        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label


# 定义简单的神经网络模型
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 128)
        self.fc2 = nn.Linear(128, 10)
        self.relu = nn.ReLU()
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        x = x.view(-1, 28 * 28)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.softmax(x)
        return x


# 计算准确率的函数
def calculate_accuracy(model, data_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in data_loader:
            output = model(data)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    return correct / total


# 数据预处理
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# 加载本地 MNIST 数据集
train_dataset = MNISTDataset(
    images_path='./mnist_handwriting_data/train-images-idx3-ubyte.gz',
    labels_path='./mnist_handwriting_data/train-labels-idx1-ubyte.gz',
    transform=transform
)
test_dataset = MNISTDataset(
    images_path='./mnist_handwriting_data/t10k-images-idx3-ubyte.gz',
    labels_path='./mnist_handwriting_data/t10k-labels-idx1-ubyte.gz',
    transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

# 不使用差分隐私的模型
model_no_dp = Net()
criterion = nn.NLLLoss()
optimizer_no_dp = optim.SGD(model_no_dp.parameters(), lr=0.01)

# 训练不使用差分隐私的模型
for epoch in range(10):
    model_no_dp.train()
    running_loss = 0.0
    for i, (data, target) in enumerate(train_loader):
        optimizer_no_dp.zero_grad()
        output = model_no_dp(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer_no_dp.step()
        running_loss += loss.item()
    print(f"Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}")

# 计算不使用差分隐私的模型的准确率
accuracy_no_dp = calculate_accuracy(model_no_dp, test_loader)
print(f"Accuracy without DP: {accuracy_no_dp:.4f}")

# 使用差分隐私的模型
model_dp = Net()
optimizer_dp = optim.SGD(model_dp.parameters(), lr=0.01)

# 使用 PrivacyEngine
privacy_engine = PrivacyEngine()
model_dp, optimizer_dp, train_loader_dp = privacy_engine.make_private(
    module=model_dp,
    optimizer=optimizer_dp,
    data_loader=train_loader,
    noise_multiplier=1.1,
    max_grad_norm=1.0,
)

# 训练使用差分隐私的模型
for epoch in range(10):
    model_dp.train()
    running_loss = 0.0
    for i, (data, target) in enumerate(train_loader_dp):
        optimizer_dp.zero_grad()
        output = model_dp(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer_dp.step()
        running_loss += loss.item()

    epsilon = privacy_engine.accountant.get_epsilon(delta=1e-5)
    print(f"Epoch {epoch + 1}, Loss: {running_loss / len(train_loader_dp)}, Epsilon: {epsilon:.2f}")

# 计算使用差分隐私的模型的准确率
accuracy_dp = calculate_accuracy(model_dp, test_loader)
print(f"Accuracy with DP: {accuracy_dp:.4f}")


  img = torch.from_numpy(pic.transpose((2, 0, 1))).contiguous()


Epoch 1, Loss: 0.5845314886555997
Epoch 2, Loss: 0.30062147266447925
Epoch 3, Loss: 0.25347521135420686
Epoch 4, Loss: 0.2211914599887026
Epoch 5, Loss: 0.19599560045722578
Epoch 6, Loss: 0.17581279328795893
Epoch 7, Loss: 0.15958351554916994
Epoch 8, Loss: 0.14600399256086172
Epoch 9, Loss: 0.13501612401640872
Epoch 10, Loss: 0.12541272491415237
Accuracy without DP: 0.9627


  self._maybe_warn_non_full_backward_hook(args, result, grad_fn)


Epoch 1, Loss: 1.700052529509896, Epsilon: 0.14
Epoch 2, Loss: 0.9080849973314098, Epsilon: 0.19
Epoch 3, Loss: 0.6383176138406115, Epsilon: 0.23
Epoch 4, Loss: 0.5202780249816522, Epsilon: 0.27
Epoch 5, Loss: 0.47317902967810377, Epsilon: 0.30
Epoch 6, Loss: 0.445561758196875, Epsilon: 0.33
Epoch 7, Loss: 0.4321227859951921, Epsilon: 0.36
Epoch 8, Loss: 0.4338335406694458, Epsilon: 0.38
Epoch 9, Loss: 0.4244533451492471, Epsilon: 0.40
Epoch 10, Loss: 0.4226146987887588, Epsilon: 0.43
Accuracy with DP: 0.8916
