In [None]:
%load_ext autoreload
%autoreload 2
import torch
import copy
import os

import torch
import tqdm
import torchvision

from typing import Literal

import abstract_gradient_training as agt
from abstract_gradient_training import AGTConfig
from abstract_gradient_training.bounded_models import IntervalBoundedModel

import uci_datasets  # python -m pip install git+https://github.com/treforevans/uci_datasets.git
torch.manual_seed(0)

In [None]:
batchsize = 1000000
data = uci_datasets.Dataset("houseelectric")
drop = 0
x_train, y_train, x_test, y_test = data.get_split(split=0)

# Normalise the features and labels
x_train_mu, x_train_std = x_train.mean(axis=0), x_train.std(axis=0)
x_train = (x_train - x_train_mu) / x_train_std
x_test = (x_test - x_train_mu) / x_train_std
y_train_min, y_train_range = y_train.min(axis=0), y_train.max(axis=0) - y_train.min(axis=0)
y_train = (y_train - y_train_min) / y_train_range
y_test = (y_test - y_train_min) / y_train_range

# Determine number of samples to keep (45%)
num_samples = x_train.shape[0]
keep_size = int(num_samples * (1 - drop))

# Subset the training data
x_train = x_train[:keep_size]
y_train = y_train[:keep_size]

# Form datasets and dataloaders
train_data = torch.utils.data.TensorDataset(torch.from_numpy(x_train).float(), torch.from_numpy(y_train).float())
test_data = torch.utils.data.TensorDataset(torch.from_numpy(x_test).float(), torch.from_numpy(y_test).float())
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batchsize, shuffle=False)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=1000, shuffle=False)

In [None]:
# set up the AGT configuration
nominal_config = AGTConfig(
    fragsize=2000,
    learning_rate=0.25,
    n_epochs=10,
    device="cuda:1",
    l2_reg=0.01,
    k_private=1,
    loss="mse",
    log_level="DEBUG",
    lr_decay=2.0,
    clip_gamma=1.0,
    lr_min=0.001,
    optimizer="SGDM", # we'll use SGD with momentum
    optimizer_kwargs={"momentum": 0.9, "nesterov": True},
)

In [None]:
# to use privacy-safe certificates, we need to run AGT for a range of k_private values

# we'll just pick a reasonable range of k_private values. adding more values will increase the runtime
# but also result in tighter privacy results. even a few values are sufficient to demonstrate tighter privacy

k_private_values = [1, 2, 5, 10, 20, 50, 100] 
privacy_bounded_models = {}
config = copy.deepcopy(nominal_config)
path = "path/to/save"

for k_private in tqdm.tqdm(k_private_values):
    # update config
    config.k_private = k_private
    # form bounded model
    torch.manual_seed(1)
    # get the nn model
    model = torch.nn.Sequential(torch.nn.Linear(11, 128), torch.nn.ReLU(), torch.nn.Linear(128, 1)).to(config.device)
    bounded_model = IntervalBoundedModel(model, trainable=True)
    # run AGT
    agt.privacy_certified_training(bounded_model, config, train_loader, dl_val=test_loader)
    privacy_bounded_models[k_private] = bounded_model
    privacy_bounded_models[k_private].save_params(f"path")