In [29]:
import torch
from torch.utils import data  # TODO
from d2l import torch as d2l

# 读取数据

数据形状：
$y \in \mathbb{R}^1, x \in \mathbb{R}^2$

In [30]:
batch_size = 10
sample_num = 1000
lr = 0.01
num_epochs = 20
epsilon=10
delta=0.01
max_grad_norm=5.0

In [31]:
true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = d2l.synthetic_data(true_w, true_b, sample_num)

In [32]:
# 生成训练数据序列
def load_data(data_array, batch_size):
    # dataset = data.TensorDataset(data_array) # TODO
    dataset = data.TensorDataset(*data_array)  # TODO when use *?
    return data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [33]:
data_iter = load_data((features, labels), batch_size)

# 构建模型

In [34]:
# model
from torch import nn

# 构建网络
net = nn.Sequential(nn.Linear(2, 1)) # Sequential用于堆砌不同的层——这里只有一个线性层，输入是2维，输出是1维

# 参数初始化
net[0].weight.data.normal_(0, 0.01)  # weights初始化为标准正态分布；这里是直接replace，而不是新生成的内存空间
net[0].bias.data.fill_(0) # bias初始化为0

tensor([0.])

**OPACUS**
调用opacus的ModuleValidator对象来判断搭建的模型是否可以DP化（BN这种层是无法DP化的，因为这里假设多个sample独立加噪音，BN会导致单个数据的改变影响到多个样本梯度，而不是一个）

In [35]:
from opacus.validators import ModuleValidator

errors = ModuleValidator.validate(net, strict=False)
print(errors[-5:])
net = ModuleValidator.fix(net)
ModuleValidator.validate(net, strict=False)

[]


[]

In [36]:
# 启动optimizer
optimizer = torch.optim.SGD(params=net.parameters(), lr=lr) # 第一个参数是网络中待优化的参数

**OPACUS**
调用opacus的PrivacyEngine对象，将net、optimizer、data_iter这几个实例重新wrap一下

In [37]:
from opacus import PrivacyEngine
privacy_engine = PrivacyEngine(accountant="rdp") # 选择隐私统计技术为RDP
# OPTION1：给定epsilon的情况，设定net, optimizer, data_iter；除了这三个东西，还需要传入的参数：
# 1. epsilon、delta以及C（clip的参数）
# 2. epoch（用于指导epsilon的compose）
net, optimizer, data_iter = privacy_engine.make_private_with_epsilon(
    module=net,
    optimizer=optimizer,
    data_loader=data_iter,
    epochs=num_epochs,
    target_epsilon=epsilon,
    target_delta=delta,
    max_grad_norm=max_grad_norm,
)



In [38]:
# 构建loss
loss=nn.MSELoss()

In [39]:
# train
for epoch in range(num_epochs):
    for X, y in data_iter:
        # loss = net(X) # TODO
        optimizer.zero_grad()
        l = loss(net(X), y)
        # optimizer.zero_grad()
        l.backward()
        optimizer.step()
    # l = loss(net(features), labels)  # TODO all data
    print(f"epoch:{epoch}, loss:{l}")
    epsilon = privacy_engine.get_epsilon(delta)
    print(f"epsilon:{epsilon}, delta:{delta}")



epoch:0, loss:9.21086311340332
epsilon:2.795883159554287, delta:0.01
epoch:1, loss:1.6003155708312988
epsilon:3.5034487725000676, delta:0.01
epoch:2, loss:0.07424916326999664
epsilon:4.071951297116898, delta:0.01
epoch:3, loss:0.004222839139401913
epsilon:4.571115656894907, delta:0.01
epoch:4, loss:0.00020390874124132097
epsilon:5.0164283670466565, delta:0.01
epoch:5, loss:0.0002676925214473158
epsilon:5.446633502303051, delta:0.01
epoch:6, loss:0.0010992612224072218
epsilon:5.838976219786288, delta:0.01
epoch:7, loss:0.0002140703290933743
epsilon:6.213276276203157, delta:0.01
epoch:8, loss:0.0006299919332377613
epsilon:6.587576332620027, delta:0.01
epoch:9, loss:0.00040755074587650597
epsilon:6.937089880916106, delta:0.01
epoch:10, loss:0.0006456022965721786
epsilon:7.265151831433438, delta:0.01
epoch:11, loss:0.0007640901603735983
epsilon:7.593213781950772, delta:0.01
epoch:12, loss:0.000576560792978853
epsilon:7.9212757324681045, delta:0.01
epoch:13, loss:0.0013757436536252499
epsil

检查训练得到的模型参数和真实值的差距

In [40]:
# Get parameters of the net
# w = net[0].weight.data # The wrapped net does not allow subscript
# b = net[0].bias.data
for param in net.parameters(): # parameters() return a generator
    print(param)

Parameter containing:
tensor([[ 1.9953, -3.4089]], requires_grad=True)
Parameter containing:
tensor([4.2111], requires_grad=True)
