In [None]:
# Optional: setup NoTexBook theme
%load_ext notexbook

%texify

# Model Inversion Attack - Model Training

In this notebook we will repeat the same operations done in preparation for the **Model Inversion Attack** (in section 1) 

The very **big** difference this time though is that we will be using **Opacus** to train our ML model.

$\rightarrow$ ‼️ The very **remarkable** thing to notice is **how little** the implementation changes wrt. to the previous notebook
(in fact, we will be using the **same** `train` function defined previously)

In [None]:
import torch as th
import numpy as np

from matplotlib import pyplot as plt

%matplotlib inline

In [None]:
import warnings
warnings.simplefilter("ignore")


# Tweak to reuse the Python modules defined in previous section
import sys, os
from pathlib import Path

sys.path.insert(0, os.path.join(os.path.abspath(os.path.curdir), "..", "2-ml-models-attacks"))

In [None]:
from dataset import ORLFaces
from torchvision.transforms import ToTensor, Grayscale, Compose
from torch.utils.data import DataLoader

In [None]:
SEED = 123456

np.random.seed(SEED)
th.manual_seed(SEED)
if th.cuda.is_available():
    th.cuda.manual_seed_all(SEED)
    th.backends.cudnn.deterministic = True

In [None]:
# NOTE: This is a hack to get around "User-agent" limitations when downloading MNIST datasets
#       see, https://github.com/pytorch/vision/issues/3497 for more information
from six.moves import urllib

opener = urllib.request.build_opener()
opener.addheaders = [("User-agent", "Mozilla/5.0")]
urllib.request.install_opener(opener)

from pathlib import Path
import os

DATA_FOLDER = Path(os.path.join(os.path.abspath(os.path.curdir), "..")) / "data"

In [None]:
imgs_trasform = Compose([Grayscale(num_output_channels=1), ToTensor()])

orl_faces_train = ORLFaces(
    root=DATA_FOLDER, download=True, split="train", transform=imgs_trasform
)
orl_faces_test = ORLFaces(
    root=DATA_FOLDER, download=True, split="test", transform=imgs_trasform
)

In [None]:
BATCH_SIZE = 32

train_loader = DataLoader(
    orl_faces_train, batch_size=BATCH_SIZE, shuffle=True, drop_last=False
)
test_loader = DataLoader(
    orl_faces_test, batch_size=BATCH_SIZE, shuffle=False, drop_last=False
)

Show some of the training images, for fun

In [None]:
from torchvision.utils import make_grid


def imshow(img):
    npimg = img.numpy()
    plt.figure(figsize=(10, 12))
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

In [None]:
# get some random training images
images, labels = next(iter(train_loader))

In [None]:
# show images
imshow(make_grid(images))
# print labels
print(" ".join(f"{labels[j]}" for j in range(BATCH_SIZE)))

## Privacy Parameters and Opacus

In [None]:
from models import SoftmaxRegression, MLP

In [None]:
softmax_reg = SoftmaxRegression()

In [None]:
from opacus.validators import ModuleValidator

errors = ModuleValidator.validate(softmax_reg, strict=False)
errors

In [None]:
λ = 0.1 # optimiser learning rate

In [None]:
softmax_reg = SoftmaxRegression()
softmax_sgd = th.optim.SGD(softmax_reg.parameters(), lr=λ)

In [None]:
from opacus import PrivacyEngine

In [None]:
from train import train

In [None]:
MAX_GRAD_NORM = 1.2
EPSILON = 50
DELTA = 1e-5
EPOCHS = 200  # we have increased by 100 the number of epochs of training

In [None]:
privacy_engine = PrivacyEngine(accountant="gdp")

softmax_reg, softmax_sgd, train_loader = privacy_engine.make_private_with_epsilon(
    module=softmax_reg,
    optimizer=softmax_sgd,
    data_loader=train_loader,
    epochs=EPOCHS,
    target_epsilon=EPSILON,
    target_delta=DELTA,
    max_grad_norm=MAX_GRAD_NORM,
)

print(f"Using sigma={softmax_sgd.noise_multiplier} and C={MAX_GRAD_NORM}")

In [None]:
train(model=softmax_reg, optimiser=softmax_sgd, loaders=(train_loader, test_loader), 
      model_name="softmax_reg_opacus_test", verbose=False, epochs=EPOCHS)