In [2]:
%load_ext autoreload
%autoreload 2
import torch
import torch.utils.data
import numpy as np
import sklearn.datasets
import sklearn.model_selection
import matplotlib.pyplot as plt

import abstract_gradient_training as agt

In [None]:
"""Initialise the halfmoons training data."""
seed = 0
batchsize = 5000  # number of samples per batch
test_size = 5000
n_users = 500
# batchsize = 3  # number of samples per batch
# test_size = 1
# n_users = 1
n_batches = 1  # number of batches per epoch
n_epochs = 10  # number of epochs

torch.manual_seed(seed)
# load the dataset
x, y = sklearn.datasets.make_moons(noise=0.1, n_samples=n_batches*batchsize + test_size, random_state=seed)
# to make it easier to train, we'll space the moons out a bit and add some polynomial features
x[y==0, 1] += 0.2
x = np.hstack((x, x**2, (x[:, 0] * x[:, 1])[:, None], x**3))


# Train-test split
x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(
    x, y, test_size=test_size / (n_batches * batchsize + test_size), random_state=seed
)


# Assign users randomly to each set after the split
user_train = np.random.randint(1, n_users + 1, size=len(x_train))
user_test = np.random.randint(1, n_users + 1, size=len(x_test))

# Convert to PyTorch tensors
x_train = torch.from_numpy(x_train).float()
x_test = torch.from_numpy(x_test).float()
y_train = torch.from_numpy(y_train)
y_test = torch.from_numpy(y_test)
user_train = torch.from_numpy(user_train)
user_test = torch.from_numpy(user_test)

# Combine inputs and both labels into TensorDatasets
dataset_train = torch.utils.data.TensorDataset(x_train, user_train, y_train)
dataset_test = torch.utils.data.TensorDataset(x_test, user_test, y_test)

print(y)

# Create DataLoaders
dataloader_train = torch.utils.data.DataLoader(dataset_train, batch_size=batchsize, shuffle=True)
dataloader_test = torch.utils.data.DataLoader(dataset_test, batch_size=batchsize, shuffle=False)



[1 0 0 ... 0 1 0]


In [None]:
import importlib
importlib.reload(agt)
"""Let's train a logistic classifier on the halfmoons example above."""
# model = torch.nn.Sequential(torch.nn.Linear(7, 2))
torch.manual_seed(1)
model = torch.nn.Sequential(
    torch.nn.Linear(7, 128),
    torch.nn.ReLU(),
    torch.nn.Linear(128, 2),
)
config = agt.AGTConfig(
    fragsize=2000,
    learning_rate=0.5,
    n_epochs=10,
    device="cuda:0",
    l2_reg=0.01,
    k_private=1,
    loss="cross_entropy",
    log_level="INFO",
    lr_decay=2.0,
    clip_gamma=1.0,
    lr_min=0.001,
    optimizer="SGDM", # we'll use SGD with momentum
    optimizer_kwargs={"momentum": 0.9, "nesterov": True},
)
k_values = [1,2,5,10,20,50,100]   # using more values here will improve the guarantees AGT will give
bounded_model_dict = {}  # we'll store our results for each value of 'k' as a dictionary from 'k' to the bounded model

for k_private in k_values:
    config.k_private=k_private
    torch.manual_seed(seed)
    bounded_model = agt.bounded_models.IntervalBoundedModel(model)
    bounded_model = agt.privacy_certified_training_user_level(bounded_model, config, dataloader_train)
    bounded_model_dict[k_private] = bounded_model
    
    # as a metric, compute the number of predictions in the test set certified at this value of k_private
    certified_preds = agt.test_metrics.certified_predictions(bounded_model, x_test)
    print(f"Certified Predictions at k={k_private}: {certified_preds:.2f}")

[AGT] [INFO    ] [17:47:02] Starting epoch 1


here1
here2


[AGT] [INFO    ] [17:47:57] Dataloader has only one batch per epoch, effective batchsize may be smaller than expected.
[AGT] [INFO    ] [17:47:57] Starting epoch 2


here3
here4
here1
here2


[AGT] [INFO    ] [17:48:50] Starting epoch 3


here3
here4
here1
here2


[AGT] [INFO    ] [17:49:44] Starting epoch 4


here3
here4
here1
here2


[AGT] [INFO    ] [17:50:38] Starting epoch 5


here3
here4
here1
here2


[AGT] [INFO    ] [17:51:32] Starting epoch 6


here3
here4
here1
here2


[AGT] [INFO    ] [17:52:25] Starting epoch 7


here3
here4
here1
here2


[AGT] [INFO    ] [17:53:19] Starting epoch 8


here3
here4
here1
here2


[AGT] [INFO    ] [17:54:12] Starting epoch 9


here3
here4
here1
here2


[AGT] [INFO    ] [17:55:06] Starting epoch 10


here3
here4
here1
here2




here3
here4
Certified Predictions at k=1: 0.94


In [6]:
"""Let's use this set of bounded models to for better private prediction using the smooth sensitivity mechanism."""

epsilon = 0.2  # privacy loss
noise_free_acc = agt.test_metrics.test_accuracy(bounded_model_dict[1], x_test, y_test)[0]

# compute accuracy using the smooth sensitivity Cauchy mechanism
smooth_sens_noise_level = agt.privacy_utils.get_calibrated_noise_level(
    x_test, bounded_model_dict, epsilon, noise_type="cauchy"
)
smooth_sens_acc = agt.privacy_utils.noisy_test_accuracy(
    bounded_model_dict[1], x_test, y_test, noise_level=smooth_sens_noise_level, noise_type="cauchy"
)


# compute accuracy when using the global sensitivity mechanism
global_sens_acc = agt.privacy_utils.noisy_test_accuracy(
    bounded_model_dict[1], x_test, y_test, noise_level=1.0 / epsilon
)

print(f"Noise Free Accuracy: {noise_free_acc:.2f}")
print(f"Smooth Sensitivity Accuracy: {smooth_sens_acc:.2f}")
print(f"Global Sensitivity Accuracy: {global_sens_acc:.2f}")

Noise Free Accuracy: 0.81
Smooth Sensitivity Accuracy: 0.50
Global Sensitivity Accuracy: 0.52


In [None]:
# """Initialise a large model (which will be random here but would be a pre-trained model in practice)."""
# model = torch.nn.Sequential(
#     torch.nn.Linear(7, 128),
#     torch.nn.ReLU(),
#     torch.nn.Linear(128, 128),
#     torch.nn.ReLU(),
#     torch.nn.Linear(128, 128),
#     torch.nn.ReLU(),
#     torch.nn.Linear(128, 2)
# )
# config = agt.AGTConfig(
#     learning_rate=0.5,
#     n_epochs=2,
#     loss="cross_entropy",
#     log_level="INFO",
#     device="cuda:0",
#     clip_gamma=0.1,
#     k_private=10
# )

# # first try training the whole thing - observe that the certified accuracy goes to zero
# bounded_model = agt.bounded_models.IntervalBoundedModel(model)
# bounded_model = agt.privacy_certified_training_user_level(bounded_model, config, dataloader_train, dataloader_test)

# # second, split the model into a fixed part and a trainable part
# fixed_layers, trainable_layers = model[:4], model[4:]
# # wrap both in bounded models, using the first as the 'transform' argument to the second
# transform = agt.bounded_models.IntervalBoundedModel(fixed_layers, trainable=False)
# bounded_model = agt.bounded_models.IntervalBoundedModel(trainable_layers, transform=transform)
# # train the model
# bounded_model = agt.privacy_certified_training_user_level(bounded_model, config, dataloader_train, dataloader_test)

[AGT] [INFO    ] [03:30:35] Starting epoch 1


Batch content: 6


[AGT] [INFO    ] [03:30:35] Batch 1. Loss (accuracy): 0.460 <= 0.460 <= 0.460
[AGT] [INFO    ] [03:30:36] Starting epoch 2
[AGT] [INFO    ] [03:30:36] Batch 2. Loss (accuracy): 0.496 <= 0.498 <= 0.944
[AGT] [INFO    ] [03:30:37] Batch 3. Loss (accuracy): 0.000 <= 0.896 <= 1.000
[AGT] [INFO    ] [03:30:38] Final Eval. Loss (accuracy): 0.000 <= 0.898 <= 1.000
[AGT] [INFO    ] [03:30:38] Starting epoch 1


Batch content: 6


[AGT] [INFO    ] [03:30:38] Batch 1. Loss (accuracy): 0.460 <= 0.460 <= 0.460
[AGT] [INFO    ] [03:30:39] Starting epoch 2
[AGT] [INFO    ] [03:30:39] Batch 2. Loss (accuracy): 0.498 <= 0.498 <= 0.498
[AGT] [INFO    ] [03:30:40] Batch 3. Loss (accuracy): 0.582 <= 0.782 <= 0.920
[AGT] [INFO    ] [03:30:41] Final Eval. Loss (accuracy): 0.582 <= 0.912 <= 0.982


: 