In [1]:
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Runs MNIST training with differential privacy.

"""

import argparse

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from opacus import PrivacyEngine
from torchvision import datasets, transforms
from tqdm import tqdm

from opacus import PrivacyEngine
import os
from typing import Any, Tuple

import numpy as np
import pandas as pd
import torch
import torchvision
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms

In [2]:
import torch.nn as nn
from torch import Tensor, nn


class CelebaNet(nn.Module):
    """This class defines the CelebaNet."""

    def __init__(
        self,
        in_channels: int = 3,
        num_classes: int = 4,
        dropout_rate: float = 0,
    ) -> None:
        """Initializes the CelebaNet network.

        Args:
        ----
            in_channels (int, optional): Number of input channels . Defaults to 3.
            num_classes (int, optional): Number of classes . Defaults to 2.
            dropout_rate (float, optional): _description_. Defaults to 0.2.
        """
        super().__init__()
        self.cnn1 = nn.Conv2d(
            in_channels,
            8,
            kernel_size=(3, 3),
            padding=(1, 1),
            stride=(1, 1),
        )
        self.cnn2 = nn.Conv2d(8, 16, kernel_size=(3, 3), padding=(1, 1), stride=(1, 1))
        self.cnn3 = nn.Conv2d(16, 32, kernel_size=(3, 3), padding=(1, 1), stride=(1, 1))
        self.fc1 = nn.Linear(2048, num_classes)
        self.gn_relu = nn.Sequential(
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
        )

    def forward(self, input_data: Tensor) -> Tensor:
        """Defines the forward pass of the network.

        Args:
            input_data (Tensor): Input data

        Returns
        -------
            Tensor: Output data
        """
        out = self.gn_relu(self.cnn1(input_data))
        out = self.gn_relu(self.cnn2(out))
        out = self.gn_relu(self.cnn3(out))
        out = out.reshape(out.size(0), -1)
        out = self.fc1(out)
        return out

In [3]:
class CelebaDataset(Dataset):
    """Definition of the dataset used for the Celeba Dataset."""

    def __init__(
        self,
        csv_path: str,
        image_path: str,
        transform: torchvision.transforms = None,
        debug: bool = True,
    ) -> None:
        """Initialization of the dataset.

        Args:
        ----
            csv_path (str): path of the csv file with all the information
             about the dataset
            image_path (str): path of the images
            transform (torchvision.transforms, optional): Transformation to apply
            to the images. Defaults to None.
        """
        dataframe = pd.read_csv(csv_path)

        self.targets = dataframe["Target"].tolist()
        self.classes = dataframe["Target"].tolist()

        self.samples = list(dataframe["image_id"])
        self.n_samples = len(dataframe)
        self.transform = transform
        self.image_path = image_path
        self.debug = debug
        if not self.debug:
            self.images = [
                Image.open(os.path.join(self.image_path, sample)).convert(
                    "RGB",
                )
                for sample in self.samples
            ]

    def __getitem__(self, index: int):
        """Returns a sample from the dataset.

        Args:
            idx (_type_): index of the sample we want to retrieve

        Returns
        -------
            _type_: sample we want to retrieve

        """
        if self.debug:
            img = Image.open(
                os.path.join(self.image_path, self.samples[index]),
            ).convert(
                "RGB",
            )
        else:
            img = self.images[index]

        if self.transform:
            img = self.transform(img)

        return (
            img,
            self.targets[index],
        )

    def __len__(self) -> int:
        """This function returns the size of the dataset.

        Returns
        -------
            int: size of the dataset
        """
        return self.n_samples

In [4]:
transform = transforms.Compose(
    [
        transforms.Resize((64, 64)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ],
)
train_dataset = CelebaDataset(
    csv_path="../data/celeba/train_smiling.csv",
    image_path="../data/celeba/img_align_celeba",
    transform=transform,
    debug=True,
)
test_dataset = CelebaDataset(
    csv_path="../data/celeba/test_smiling.csv",
    image_path="../data/celeba/img_align_celeba",
    transform=transform,
    debug=True,
)

# train_dataset = torch.load("../data/celeba/cluster_0_node_0_private_train.pt")

train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=512,
    shuffle=False,
    num_workers=0,
)
test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=512,
    shuffle=False,
    num_workers=0,
)

In [5]:
import numpy as np
from opacus.utils.batch_memory_manager import BatchMemoryManager


def accuracy(preds, labels):
    return (preds == labels).mean()


def train(model, train_loader, optimizer, epoch, device, privacy_engine):
    model.train()
    criterion = nn.CrossEntropyLoss()

    DELTA = 1e-5
    losses = []
    top1_acc = []

    with BatchMemoryManager(
        data_loader=train_loader, max_physical_batch_size=128, optimizer=optimizer
    ) as memory_safe_data_loader:
        for i, (images, target) in enumerate(memory_safe_data_loader):
            optimizer.zero_grad()
            images = images.to(device)
            target = target.to(device)

            # compute output
            output = model(images)
            loss = criterion(output, target)

            preds = np.argmax(output.detach().cpu().numpy(), axis=1)
            labels = target.detach().cpu().numpy()

            # measure accuracy and record loss
            acc = accuracy(preds, labels)

            losses.append(loss.item())
            top1_acc.append(acc)

            loss.backward()
            optimizer.step()

            if (i + 1) % 50 == 0:
                epsilon = privacy_engine.get_epsilon(DELTA)
                print(
                    f"\tTrain Epoch: {epoch} \t"
                    f"Loss: {np.mean(losses):.6f} "
                    f"Acc@1: {np.mean(top1_acc) * 100:.6f} "
                    f"(ε = {epsilon:.2f}, δ = {DELTA})"
                )


In [6]:
model = CelebaNet()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
privacy_engine = PrivacyEngine(accountant="rdp")

(
    private_model,
    private_optimizer,
    private_train_loader,
) = privacy_engine.make_private(
    module=model,
    optimizer=optimizer,
    data_loader=train_loader,
    noise_multiplier=1.0,
    max_grad_norm=5.0,
)
private_model.train()



GradSampleModule(CelebaNet(
  (cnn1): Conv2d(3, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (cnn2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (cnn3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=2048, out_features=4, bias=True)
  (gn_relu): Sequential(
    (0): ReLU()
    (1): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  )
))

In [7]:
for iteration in range(0, 10):
    train(
        model=private_model,
        train_loader=private_train_loader,
        optimizer=private_optimizer,
        epoch=iteration,
        device=device,
        privacy_engine=privacy_engine,
    )




	Train Epoch: 0 	Loss: 1.368281 Acc@1: 30.742811 (ε = 0.78, δ = 1e-05)
	Train Epoch: 0 	Loss: 1.363325 Acc@1: 32.000202 (ε = 0.83, δ = 1e-05)
	Train Epoch: 0 	Loss: 1.356264 Acc@1: 35.127597 (ε = 0.83, δ = 1e-05)
	Train Epoch: 0 	Loss: 1.331021 Acc@1: 38.410917 (ε = 0.83, δ = 1e-05)
	Train Epoch: 0 	Loss: 1.312420 Acc@1: 41.351678 (ε = 0.83, δ = 1e-05)
	Train Epoch: 0 	Loss: 1.294038 Acc@1: 44.027400 (ε = 0.83, δ = 1e-05)
	Train Epoch: 0 	Loss: 1.272408 Acc@1: 46.442027 (ε = 0.83, δ = 1e-05)
	Train Epoch: 0 	Loss: 1.244345 Acc@1: 48.729917 (ε = 0.84, δ = 1e-05)
	Train Epoch: 0 	Loss: 1.232480 Acc@1: 50.597119 (ε = 0.84, δ = 1e-05)
	Train Epoch: 0 	Loss: 1.218044 Acc@1: 52.069290 (ε = 0.84, δ = 1e-05)
	Train Epoch: 0 	Loss: 1.202789 Acc@1: 53.528384 (ε = 0.84, δ = 1e-05)
	Train Epoch: 0 	Loss: 1.184230 Acc@1: 54.902689 (ε = 0.84, δ = 1e-05)
	Train Epoch: 0 	Loss: 1.167134 Acc@1: 56.149107 (ε = 0.84, δ = 1e-05)
	Train Epoch: 0 	Loss: 1.152027 Acc@1: 57.228530 (ε = 0.84, δ = 1e-05)
	Train

KeyboardInterrupt: 

: 