In [9]:
#fixed privacy budget for all epochs
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import matplotlib.pyplot as plt
import math
from opacus.accountants import create_accountant
from typing import Optional


# Set random seed for reproducibility
torch.manual_seed(0)

# Load and preprocess the dataset
car_data = pd.read_csv('/content/car.data', header=None)

# Preprocessing steps:
features = car_data.iloc[:, :-1]
targets = car_data.iloc[:, -1]

# Convert categorical features and targets into numerical values
encoder = LabelEncoder()
features = features.apply(encoder.fit_transform)
targets = encoder.fit_transform(targets)

# Scale the features
scaler = StandardScaler()
features = scaler.fit_transform(features)

# Split into train and test
X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.2, random_state=42)

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Create a custom dataset class
class CarDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Create DataLoader for training and testing
batch_size = 64
train_dataset = CarDataset(X_train_tensor, y_train_tensor)
test_dataset = CarDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Define the NN model for tabular data
class NN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(NN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, output_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Loss function
def loss_fn(predictions, targets):
    return F.cross_entropy(predictions, targets)

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the model, optimizer, and other hyperparameters
input_dim = X_train.shape[1]
output_dim = len(set(y_train))
model = NN(input_dim, output_dim).to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# DP-SGD parameters
max_grad_norm = 1.0
target_epsilon = 1.0  # Privacy budget target (fixed at 3)
delta = 1e-5          # Delta value
sample_rate = batch_size / len(train_dataset)
num_epochs = 50       # Train for full epochs

# Import the GaussianAccountant class from your accountant file
import warnings
from opacus.accountants import IAccountant
from opacus.accountants.analysis import gdp as privacy_analysis

class GaussianAccountant(IAccountant):
    def __init__(self):
        warnings.warn(
            "GDP accounting is experimental and can underestimate privacy expenditure."
            "Proceed with caution. More details: https://arxiv.org/pdf/2106.02848.pdf"
        )
        super().__init__()

    def step(self, *, noise_multiplier: float, sample_rate: float):
        if len(self.history) >= 1:
            last_noise_multiplier, last_sample_rate, num_steps = self.history.pop()
            if (
                last_noise_multiplier != noise_multiplier
                or last_sample_rate != sample_rate
            ):
                raise ValueError(
                    "Noise multiplier and sample rate have to stay constant in GaussianAccountant."
                )
            else:
                self.history = [
                    (last_noise_multiplier, last_sample_rate, num_steps + 1)
                ]

        else:
            self.history = [(noise_multiplier, sample_rate, 1)]

    def get_epsilon(self, delta: float, poisson: bool = True) -> float:
        """
        Return privacy budget (epsilon) expended so far.

        Args:
            delta: target delta
            poisson: ``True`` is input batches was sampled via Poisson sampling,
                ``False`` otherwise
        """

        compute_eps = (
            privacy_analysis.compute_eps_poisson
            if poisson
            else privacy_analysis.compute_eps_uniform
        )
        noise_multiplier, sample_rate, num_steps = self.history[-1]
        return compute_eps(
            steps=num_steps,
            noise_multiplier=noise_multiplier,
            sample_rate=sample_rate,
            delta=delta,
        )

    def __len__(self):
        return len(self.history)

    @classmethod
    def mechanism(cls) -> str:
        return "gdp"


# Initialize the GaussianAccountant
accountant = GaussianAccountant()

MAX_SIGMA = 1e6

# Get noise multiplier function
def get_noise_multiplier(
    *,
    target_epsilon: float,
    target_delta: float,
    sample_rate: float,
    epochs: Optional[int] = None,
    steps: Optional[int] = None,
    accountant: str = "gdp",
    epsilon_tolerance: float = 0.01,
    **kwargs,
) -> float:
    if (steps is None) == (epochs is None):
        raise ValueError("get_noise_multiplier takes as input EITHER a number of steps or a number of epochs")
    if steps is None:
        steps = int(epochs / sample_rate)

    eps_high = float("inf")
    accountant = create_accountant(mechanism=accountant)

    sigma_low, sigma_high = 0, 10
    while eps_high > target_epsilon:
        sigma_high = 2 * sigma_high
        accountant.history = [(sigma_high, sample_rate, steps)]
        eps_high = accountant.get_epsilon(delta=target_delta, **kwargs)
        if sigma_high > MAX_SIGMA:
            raise ValueError("The privacy budget is too low.")

    while target_epsilon - eps_high > epsilon_tolerance:
        sigma = (sigma_low + sigma_high) / 2
        accountant.history = [(sigma, sample_rate, steps)]
        eps = accountant.get_epsilon(delta=target_delta, **kwargs)

        if eps < target_epsilon:
            sigma_high = sigma
            eps_high = eps
        else:
            sigma_low = sigma

    return sigma_high

# Calculate the noise multiplier before training using the total number of steps
noise_multiplier = get_noise_multiplier(
    target_epsilon=target_epsilon,
    target_delta=delta,
    sample_rate=sample_rate,
    epochs=num_epochs,
)

print(f"Calculated noise multiplier for fixed privacy budget: {noise_multiplier}")

#  functions for DP-SGD
def compute_per_sample_gradients(model, loss_fn, data, targets):
    model.zero_grad()
    outputs = model(data)
    loss = loss_fn(outputs, targets)
    per_sample_grads = torch.autograd.grad(loss, model.parameters(), create_graph=True)
    return per_sample_grads

def clip_gradients(gradients, max_norm):
    total_norm = torch.norm(torch.stack([torch.norm(g.detach(), p=2) for g in gradients]))
    clip_coef = max_norm / (total_norm + 1e-6)
    clip_coef_clamped = torch.clamp(clip_coef, max=1.0)
    return [g.detach() * clip_coef_clamped for g in gradients]

def add_noise(gradients, noise_multiplier, max_norm):
    noised_gradients = []
    for grad in gradients:
        noise = torch.normal(0, noise_multiplier * max_norm / batch_size, grad.shape, device=grad.device)
        noised_gradients.append(grad + noise)
    return noised_gradients

def train_step(model, optimizer, data, targets):
    per_sample_grads = compute_per_sample_gradients(model, loss_fn, data, targets)
    clipped_grads = clip_gradients(per_sample_grads, max_grad_norm)
    noised_grads = add_noise(clipped_grads, noise_multiplier, max_grad_norm)

    optimizer.zero_grad()
    for param, noised_grad in zip(model.parameters(), noised_grads):
        param.grad = noised_grad
    optimizer.step()

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.cross_entropy(output, target, reduction='sum').item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)

    return test_loss, accuracy

# Training loop
steps = 0
losses = []
epsilons = []
train_accuracies = []

for epoch in range(num_epochs):  # Train for full epochs
    model.train()
    epoch_loss = 0
    correct_train = 0
    total_train = 0

    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)

        # Perform a training step
        train_step(model, optimizer, data, target)
        steps += 1

        # Update accountant with current noise multiplier and sample rate
        accountant.step(noise_multiplier=noise_multiplier, sample_rate=sample_rate)

        # Compute loss for the current batch
        outputs = model(data)
        loss = loss_fn(outputs, target)
        epoch_loss += loss.item()

        # Compute the number of correct predictions in the training batch
        pred = outputs.argmax(dim=1, keepdim=True)
        correct_train += pred.eq(target.view_as(pred)).sum().item()
        total_train += target.size(0)

    # Compute average loss over the training data
    losses.append(epoch_loss / len(train_loader))

    # Calculate training accuracy
    train_accuracy = 100. * correct_train / total_train
    train_accuracies.append(train_accuracy)

    # Evaluate on test set
    test_loss, test_accuracy = test(model, device, test_loader)

    # Get the current epsilon from GaussianAccountant
    epsilon = accountant.get_epsilon(delta=delta, poisson=True)  # Use Poisson sampling
    epsilons.append(epsilon)

    # Print results for the current epoch
    print(f'Epoch {epoch+1}: Train Loss: {epoch_loss / len(train_loader):.4f}, '
          f'Train Accuracy: {train_accuracy:.2f}%, '
          f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%, '
          f'Epsilon: {epsilon:.4f}')







Calculated noise multiplier for fixed privacy budget: 5.7421875
Epoch 1: Train Loss: 1.3915, Train Accuracy: 28.22%, Test Loss: 1.2893, Test Accuracy: 61.56%, Epsilon: 0.1190
Epoch 2: Train Loss: 1.1802, Train Accuracy: 70.04%, Test Loss: 1.1063, Test Accuracy: 67.92%, Epsilon: 0.1737
Epoch 3: Train Loss: 1.0179, Train Accuracy: 70.55%, Test Loss: 0.9841, Test Accuracy: 67.92%, Epsilon: 0.2167
Epoch 4: Train Loss: 0.9214, Train Accuracy: 70.55%, Test Loss: 0.9160, Test Accuracy: 67.92%, Epsilon: 0.2535
Epoch 5: Train Loss: 0.8604, Train Accuracy: 70.55%, Test Loss: 0.8747, Test Accuracy: 67.92%, Epsilon: 0.2862
Epoch 6: Train Loss: 0.8289, Train Accuracy: 70.55%, Test Loss: 0.8482, Test Accuracy: 67.92%, Epsilon: 0.3161
Epoch 7: Train Loss: 0.8031, Train Accuracy: 70.55%, Test Loss: 0.8280, Test Accuracy: 67.92%, Epsilon: 0.3438
Epoch 8: Train Loss: 0.7873, Train Accuracy: 70.55%, Test Loss: 0.8131, Test Accuracy: 67.92%, Epsilon: 0.3697
Epoch 9: Train Loss: 0.7752, Train Accuracy: 70.