# Import all libraries

In [None]:
from logging.config import DEFAULT_LOGGING_CONFIG_PORT
import os
from pyexpat import model
import torch
import pandas
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision import datasets, transforms, models
from torch import nn, optim
from sklearn.metrics import f1_score, roc_auc_score, confusion_matrix
from pynvml import *
from typing import Any, Callable, List, Optional, Union, Tuple
from functools import partial
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from scipy import ndimage, stats
from numpy.core.fromnumeric import mean
from opacus import PrivacyEngine
import math
import numpy as np
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
from torch.optim import lr_scheduler
import pickle
import glob

Connect to gpu and get number of available devices

In [None]:
print("--->", torch.cuda.device_count())
device_number = int(sys.argv[9])

In [None]:
# method to understand how much memory the code consumes from CUDA

def get_memory():
    nvmlInit()
    h = nvmlDeviceGetHandleByIndex(device_number)
    info = nvmlDeviceGetMemoryInfo(h)
    print("\n_______ MEMORY _______")
    print(f'total    : {info.total}')
    print(f'free     : {info.free}')
    print(f'used     : {info.used}\n')

get_memory()

# Implementation of Architectures

In [None]:

class AlexNet(nn.Module):
    def __init__(self, in_channels=3, num_classes: int = 1000, dropout: float = 0.5) -> None:
        super(AlexNet, self).__init__()
        # _log_api_usage_once(self)
        self.features = nn.Sequential(nn.Conv2d(in_channels, 64, kernel_size=11, stride=4, padding=2),
                                      nn.ReLU(inplace=True),
                                      nn.MaxPool2d(kernel_size=3, stride=2),
                                      nn.Conv2d(64, 192, kernel_size=5, padding=2),
                                      nn.ReLU(inplace=True),
                                      nn.MaxPool2d(kernel_size=3, stride=2),
                                      nn.Conv2d(192, 384, kernel_size=3, padding=1),
                                      nn.ReLU(inplace=True),
                                      nn.Conv2d(384, 256, kernel_size=3, padding=1),
                                      nn.ReLU(inplace=True),
                                      nn.Conv2d(256, 256, kernel_size=3, padding=1),
                                      nn.ReLU(inplace=True),
                                      nn.MaxPool2d(kernel_size=3, stride=2),
                                      )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
                                        nn.Dropout(p=dropout),
                                        nn.Linear(256 * 6 * 6, 4096),
                                        nn.ReLU(inplace=True),
                                        nn.Dropout(p=dropout),
                                        nn.Linear(4096, 512),
                                        nn.ReLU(inplace=True),
                                        nn.Linear(512, num_classes),
                                        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


class SeparableConv2d(nn.Module):
    def __init__(self,in_channels,out_channels,kernel_size=1,stride=1,padding=0,dilation=1,bias=False):
        super(SeparableConv2d,self).__init__()

        self.conv1 = nn.Conv2d(in_channels,in_channels,kernel_size,stride,padding,dilation,groups=in_channels,bias=bias)
        self.pointwise = nn.Conv2d(in_channels,out_channels,1,1,0,1,1,bias=bias)
    
    def forward(self,x):
        x = self.conv1(x)
        x = self.pointwise(x)
        return x


class Block(nn.Module):
    def __init__(self,in_filters,out_filters,reps,strides=1,start_with_relu=True,grow_first=True):
        super(Block, self).__init__()

        if out_filters != in_filters or strides!=1:
            self.skip = nn.Conv2d(in_filters,out_filters,1,stride=strides, bias=False)
            self.skipbn = nn.BatchNorm2d(out_filters)
        else:
            self.skip=None
        
        self.relu = nn.ReLU(inplace=True)
        rep=[]

        filters=in_filters
        if grow_first:
            rep.append(self.relu)
            rep.append(SeparableConv2d(in_filters,out_filters,3,stride=1,padding=1,bias=False))
            rep.append(nn.BatchNorm2d(out_filters))
            filters = out_filters

        for i in range(reps-1):
            rep.append(self.relu)
            rep.append(SeparableConv2d(filters,filters,3,stride=1,padding=1,bias=False))
            rep.append(nn.BatchNorm2d(filters))
        
        if not grow_first:
            rep.append(self.relu)
            rep.append(SeparableConv2d(in_filters,out_filters,3,stride=1,padding=1,bias=False))
            rep.append(nn.BatchNorm2d(out_filters))

        if not start_with_relu:
            rep = rep[1:]
        else:
            rep[0] = nn.ReLU(inplace=False)

        if strides != 1:
            rep.append(nn.MaxPool2d(3,strides,1))
        self.rep = nn.Sequential(*rep)

    def forward(self,inp):
        x = self.rep(inp)

        if self.skip is not None:
            skip = self.skip(inp)
            skip = self.skipbn(skip)
        else:
            skip = inp

        x+=skip
        return x

In [None]:

class Xception(nn.Module):
    """
    Xception optimized for the ImageNet dataset, as specified in
    https://arxiv.org/pdf/1610.02357.pdf
    """
    def __init__(self, in_channels=3, num_classes=1000, dropout=0.5):
        """ Constructor
        Args:
            num_classes: number of classes
        """
        super(Xception, self).__init__()

        
        self.num_classes = num_classes

        self.conv1 = nn.Conv2d(in_channels, 32, 3, 2, 0, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu = nn.ReLU(inplace=True)

        self.conv2 = nn.Conv2d(32,64,3,bias=False)
        self.bn2 = nn.BatchNorm2d(64)
        #do relu here

        self.block1=Block(64,128,2,2,start_with_relu=False,grow_first=True)
        self.block2=Block(128,256,2,2,start_with_relu=True,grow_first=True)
        self.block3=Block(256,728,2,2,start_with_relu=True,grow_first=True)

        self.block4=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block5=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block6=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block7=Block(728,728,3,1,start_with_relu=True,grow_first=True)

        self.block8=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block9=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block10=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block11=Block(728,728,3,1,start_with_relu=True,grow_first=True)

        self.block12=Block(728,1024,2,2,start_with_relu=True,grow_first=False)

        self.conv3 = SeparableConv2d(1024,1536,3,1,1)
        self.bn3 = nn.BatchNorm2d(1536)

        #do relu here
        self.conv4 = SeparableConv2d(1536,2048,3,1,1)
        self.bn4 = nn.BatchNorm2d(2048)
        
        self.fc = nn.Sequential(nn.Dropout(p=dropout),                    
                                nn.Linear(2048, 1024),
                                nn.ReLU(inplace=True),
                                nn.Dropout(p=dropout),
                                nn.Linear(1024, 512),
                                nn.ReLU(inplace=True),
                                nn.Linear(512, num_classes),
                                nn.Softmax(dim=1),
                                )

        #------- init weights --------
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
        #-----------------------------

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.block5(x)
        x = self.block6(x)
        x = self.block7(x)
        x = self.block8(x)
        x = self.block9(x)
        x = self.block10(x)
        x = self.block11(x)
        x = self.block12(x)
        
        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu(x)
        
        x = self.conv4(x)
        x = self.bn4(x)
        x = self.relu(x)

        x = F.adaptive_avg_pool2d(x, (1, 1))
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

In [None]:

def target_model_fn(input_channel=1, num_classes=10):
    global model_architecture

    if model_architecture == "resnet18":
    # load resnet 18 and change to fit problem dimensionality
        model = models.resnet18()
        model.conv1 = nn.Conv2d(3, 64, kernel_size=(7,7), stride=(2,2), padding=(3,3), bias=False)
        model.fc = nn.Sequential(nn.Linear(512, num_classes), nn.LogSoftmax(dim=1))
    elif model_architecture == "alexnet":
        model = AlexNet(in_channels=input_channel, num_classes=num_classes)
    elif model_architecture == "xception":
        model = Xception(in_channels=input_channel, num_classes=num_classes)
    
    optimizer = optim.Adam(model.parameters())
    return model, optimizer

The code uses a lot of CUDA memory, in order to get CUDA out of memory error less frequent we use the next method

In [None]:
def clear_cache():
    with torch.cuda.device(f'cuda:{device_number}'):
        torch.cuda.empty_cache()
    gc.collect()

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = f"{device_number}"
device = torch.device(f"cuda:{device_number}")

def clear_cache():
    with torch.cuda.device(f'cuda:{device_number}'):
        torch.cuda.empty_cache()
    gc.collect()

print("Imported all libraries")
clear_cache()
get_memory()

MEAN_DATA = 0
STD_DATA = 0

# Datasets retrieval

In [None]:

class CelebA(torch.utils.data.Dataset):
    base_folder = "celebA"

    def __init__(
            self,
            root: str,
            attr_list: str,
            target_type: Union[List[str], str] = "attr",
            transform: Optional[Callable] = None,
            target_transform: Optional[Callable] = None,
    ) -> None:

        if isinstance(target_type, list):
            self.target_type = target_type
        else:
            self.target_type = [target_type]

        self.root = root
        self.transform = transform
        self.target_transform =target_transform
        self.attr_list = attr_list

        fn = partial(os.path.join, self.root, self.base_folder+"/processed")
        splits = pandas.read_csv(fn("list_eval_partition.txt"), delim_whitespace=True, header=None, index_col=0)
        attr = pandas.read_csv(fn("list_attr_celeba.txt"), delim_whitespace=True, header=1)

        mask = slice(None)

        self.filename = splits[mask].index.values
        self.attr = torch.as_tensor(attr[mask].values)
        self.attr = torch.div(self.attr + 1, 2, rounding_mode='floor')  # map from {-1, 1} to {0, 1}
        self.attr_names = list(attr.columns)

    def __getitem__(self, index: int) -> Tuple[Any, Any]:
        X = Image.open(os.path.join(self.root, self.base_folder, "raw/img_celeba", self.filename[index]))

        target: Any = []
        for t, nums in zip(self.target_type, self.attr_list):
            if t == "attr":
                final_attr = 0
                for i in range(len(nums)):
                    final_attr += 2 ** i * self.attr[index][nums[i]]
                target.append(final_attr)
            else:
                # TODO: refactor with utils.verify_str_arg
                raise ValueError("Target type \"{}\" is not recognized.".format(t))

        if self.transform is not None:
            X = self.transform(X)

        if target:
            target = tuple(target) if len(target) > 1 else target[0]

            if self.target_transform is not None:
                target = self.target_transform(target)
        else:
            target = None

        return X, target

    def __len__(self) -> int:
        return len(self.attr)

    def extra_repr(self) -> str:
        lines = ["Target type: {target_type}", "Split: {split}"]
        return '\n'.join(lines).format(**self.__dict__)

from typing import Any, Callable, List, Optional, Union, Tuple
from PIL import Image
class UTKFaceDataset(torch.utils.data.Dataset):
    def __init__(self, root, attr: Union[List[str], str] = "gender", transform=None, target_transform=None)-> None:
        self.root = root
        self.transform = transform
        self.target_transform = target_transform
        self.processed_path = os.path.join(self.root, "data/UTKFace/processed")
        self.files = os.listdir(self.processed_path)
        if isinstance(attr, list):
            self.attr = attr
        else:
            self.attr = [attr]

        self.lines = []
        for txt_file in self.files:
            txt_file_path = os.path.join(self.processed_path, txt_file)
            print(txt_file_path)
            with open(txt_file_path, 'r') as f:
                assert f is not None
                for i in f:
                    image_name = i.split('jpg ')[0]
                    attrs = image_name.split('_')
                    if len(attrs) < 4 or int(attrs[2]) >= 4  or '' in attrs:
                        continue
                    self.lines.append(image_name+'jpg')


    def __len__(self):
        return len(self.lines)

    def __getitem__(self, index:int)-> Tuple[Any, Any]:
        attrs = self.lines[index].split('_')

        age = int(attrs[0])
        gender = int(attrs[1])
        race = int(attrs[2])

        image_path = os.path.join(self.root, 'data/UTKFace/raw', self.lines[index]+'.chip.jpg').rstrip()
  
        image = Image.open(image_path).convert('RGB')

        target: Any = []
        for t in self.attr:
            if t == "age":
                target.append(age)
            elif t == "gender":
                target.append(gender)
            elif t == "race":
                target.append(race)
            
            else:
                raise ValueError("Target type \"{}\" is not recognized.".format(t))

        if self.transform:
            image = self.transform(image)

        if target:
            target = tuple(target) if len(target) > 1 else target[0]

            if self.target_transform is not None:
                target = self.target_transform(target)
        else:
            target = None

        return image, target

        
def get_model_dataset(dataset_name, attr, root):
    global num_classes, MEAN_DATA, STD_DATA

    if dataset_name.lower() == "utkface":
        if isinstance(attr, list):
            num_classes = []
            for a in attr:
                if a == "age":
                    num_classes.append(117)
                elif a == "gender":
                    num_classes.append(2)
                elif a == "race":
                    num_classes.append(4)
                else:
                    raise ValueError("Target type \"{}\" is not recognized.".format(a))
        else:
            if attr == "age":
                num_classes = 117
            elif attr == "gender":
                num_classes = 2
            elif attr == "race":
                num_classes = 4
            else:
                raise ValueError("Target type \"{}\" is not recognized.".format(attr))

        transform = transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.Resize((64, 64)),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ])

        dataset = UTKFaceDataset(root=root, attr=attr, transform=transform)
        input_channel = 3

    elif dataset_name.lower() == "stl10":
        num_classes = 10
        transform = transforms.Compose([
            transforms.Resize((64, 64)),
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
        ])

        print("Loading stl10")

        train_set = torchvision.datasets.STL10(
                root=root, split='train', transform=transform, download=True)
            
        test_set = torchvision.datasets.STL10(
                root=root, split='test', transform=transform, download=True)

        dataset = train_set + test_set
        input_channel = 3
        
    elif dataset_name.lower() == "celeba":
        if isinstance(attr, list):
            for a in attr:
                if a != "attr":
                    raise ValueError("Target type \"{}\" is not recognized.".format(a))

                num_classes = [8, 4]
                attr_list = [[18, 21, 31], [20, 39]]
        else:
            if attr == "attr":
                num_classes = 8
                attr_list = [[18, 21, 31]]
            else:
                raise ValueError("Target type \"{}\" is not recognized.".format(attr))

        transform = transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.Resize((64, 64)),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ])

        dataset = CelebA(root=root, attr_list=attr_list, target_type=attr, transform=transform)
        input_channel = 3

    elif dataset_name.lower() == "fmnist":
        num_classes = 10
        transform = transforms.Compose([
            transforms.Resize((64, 64)),
            transforms.ToTensor(),
            transforms.Normalize((0.1307,), (0.3081,))
        ])

        train_set = torchvision.datasets.FashionMNIST(
                root=root, train=True, download=True, transform=transform)
        test_set = torchvision.datasets.FashionMNIST(
                root=root, train=False, download=True, transform=transform)

        dataset = train_set + test_set
        input_channel = 1

    if isinstance(num_classes, int):
        target_model, target_optim = target_model_fn(input_channel=input_channel, num_classes=num_classes)
        shadow_model, shadow_optim = target_model_fn(input_channel=input_channel, num_classes=num_classes)
    else:
        target_model, target_optim = target_model_fn(input_channel=input_channel, num_classes=num_classes[0])
        shadow_model, shadow_optim = target_model_fn(input_channel=input_channel, num_classes=num_classes[0])

    return num_classes, dataset, target_model, shadow_model


In [None]:
def prepare_dataset(dataset, attr, root):
    num_classes, dataset, target_model, shadow_model = get_model_dataset(dataset, attr=attr, root=root)
    length = len(dataset)
    each_length = length//4
    target_train, target_test, shadow_train, shadow_test, _ = torch.utils.data.random_split(dataset, [each_length, each_length, each_length, each_length, len(dataset)-(each_length*4)])
    
    return num_classes, target_train, target_test, shadow_train, shadow_test, target_model, shadow_model

In order to add to the data a noise based on the right variation and mean of the dataset, we declare global variables and assign them a value according to the dataset

In [None]:
def assign_mean_std(dataset_name):
    global MEAN_DATA, STD_DATA

    if dataset_name == "stl10":
        MEAN_DATA = np.mean([0.4914, 0.4822, 0.4465])
        STD_DATA = np.mean([0.2023, 0.1994, 0.2010])        
    elif dataset_name == "utkface":
        MEAN_DATA = np.mean([0.5, 0.5, 0.5])
        STD_DATA = np.mean([0.5, 0.5, 0.5])
    elif dataset_name == "fmnist":
        MEAN_DATA = 0.1307
        STD_DATA = 0.3081
    elif dataset_name == "celeba":
        MEAN_DATA = np.mean([0.5, 0.5, 0.5])
        STD_DATA = np.mean([0.5, 0.5, 0.5])

This section allows to choose parameters

In [None]:
attr="race"
# dataset_name="stl10"
dataset_name = sys.argv[1]

if dataset_name == "stl10":
    num_classes = 10
    input_channel = 3
elif dataset_name == "utkface":
    num_classes = 4
    input_channel = 3
elif dataset_name == "fmnist":
    input_channel = 3
    num_classes = 10

root = "data" # where is data folder for datasets
use_DP = 0
noise = 0
norm = 0
delta = 1e-5
batch_size = 64

root_folder = "" # where all the outputs of the code are saved
TARGET_PATH = f"{root_folder}target_models/{dataset_name}/"

# model_architecture = "resnet18"
# model_architecture = "alexnet"
# model_architecture = "xception"
model_architecture = sys.argv[2]

# layers_neurons_percentage = 100
# number_of_layers = 1
layers_neurons_percentage = int(sys.argv[3])
number_of_layers = int(sys.argv[4])

# method = "NoIntermediate"
# method = "RandomForest"
# method = "Ttest"
# method = "KS2Samp"
# method = "KLDivergence"
# method = "Bootstrapping"
# method = "Noise"
# method = "JustNoise"
method = sys.argv[5]

# These 3 need to only be calculated once
datasets_ready = True
pretrained = True
test_models = False

# recalculate = True # for each method only calculate this once
recalculate = eval(sys.argv[6])

# These 2 need to only be calculated once
outputs_calculated = eval(sys.argv[7])
noise_difference_calculated = eval(sys.argv[8])

# If you got an error due to the attack model architecture, 
# assign this to True, it will allow to run the code faster
prepare_dataset_pretrained = False

The code needs different folders to save results. Next function allows to create them all automatically

In [None]:
def check_create_folders():
    all_paths = [f"{root_folder}datasets", f"{root_folder}datasets/{dataset_name}", f"{root_folder}target_models", \
        f"{root_folder}target_models/{dataset_name}", f"{root_folder}attack", f"{root_folder}attack/{dataset_name}", \
            f"{root_folder}attack/{dataset_name}/{model_architecture}", f"{root_folder}attack/{dataset_name}/{model_architecture}/outputs/", \
                f"{root_folder}attack/{dataset_name}/{model_architecture}/{method}", f"{root_folder}attack/{dataset_name}/{model_architecture}/{method}/outputs"]

    for path in all_paths:
        if not os.path.isdir(path):
            os.mkdir(path)

check_create_folders()

# Prepare Datasets

In [None]:
if not datasets_ready:
    num_classes, target_train, target_test, shadow_train, shadow_test, target_model, shadow_model = prepare_dataset(dataset_name, attr, root)

    print("Number of samples in training set:", len(target_train))
    print("Number of samples in test set:", len(target_test))

    torch.save(target_train, f"{root_folder}datasets/"+dataset_name+"/in_data_target")
    torch.save(target_test, f"{root_folder}datasets/"+dataset_name+"/out_data_target")

    torch.save(shadow_train, f"{root_folder}datasets/"+dataset_name+"/in_data_shadow")
    torch.save(shadow_test, f"{root_folder}datasets/"+dataset_name+"/out_data_shadow")

target_train = torch.load(f"{root_folder}datasets/"+dataset_name+"/in_data_target")
target_test = torch.load(f"{root_folder}datasets/"+dataset_name+"/out_data_target")

shadow_train = torch.load(f"{root_folder}datasets/"+dataset_name+"/in_data_shadow")
shadow_test = torch.load(f"{root_folder}datasets/"+dataset_name+"/out_data_shadow")


# Target Model Training

In [None]:
def train_model(PATH, device, train_set, test_set, model, use_DP, noise, norm, delta):
    batch_size = 64

    train_loader = torch.utils.data.DataLoader(
        train_set, batch_size=64, shuffle=True, num_workers=2)
    test_loader = torch.utils.data.DataLoader(
        test_set, batch_size=64, shuffle=True, num_workers=2)
    
    model = model_training(train_loader, test_loader, model, device, use_DP, noise, norm, delta)
    acc_train = 0
    acc_test = 0

    for i in range(100):
        print("<======================= Epoch " + str(i+1) + " =======================>")
        print("target training")

        acc_train = model.train()
        print("target testing")
        acc_test = model.test()

        overfitting = round(acc_train - acc_test, 6)
        print('The overfitting rate is %s' % overfitting)

    FILE_PATH = PATH + f"{dataset_name}_{model_architecture}_target.pth"
    model.saveModel(FILE_PATH)
    print("Saved target model!!!")
    print("Finished training!!!")

    return acc_train, acc_test, overfitting

In [None]:
# Testing method
def test(model, loader, dname="Test set", printable=True):
    model.eval()
    test_loss = 0
    total = 0
    correct = 0
    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device) # send to device
            output = model(data)
            _, pred = torch.max(output.data, 1)
            total += target.size(0)
            correct += (pred == target).sum().item()
    test_loss /= len(loader.dataset)
    if printable:
        print('{}: Mean loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
            dname, test_loss, correct, total, 
            100. * correct / total
            ))
    return 100 * correct / total

In [None]:
# Class for target model
class model_training():
    def __init__(self, trainloader, testloader, model, device, use_DP, noise, norm, delta):
        self.use_DP = use_DP
        self.device = device
        self.delta = delta
        self.net = model.to(self.device)
        self.trainloader = trainloader
        self.testloader = testloader

        if self.device == 'cuda':
            self.net = torch.nn.DataParallel(self.net)
            cudnn.benchmark = True

        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.SGD(self.net.parameters(), lr=1e-2, momentum=0.9, weight_decay=5e-4)

        self.noise_multiplier, self.max_grad_norm = noise, norm
        
        if self.use_DP:
            self.privacy_engine = PrivacyEngine()
            self.model, self.optimizer, self.trainloader = self.privacy_engine.make_private(
                module=model,
                optimizer=self.optimizer,
                data_loader=self.trainloader,
                noise_multiplier=self.noise_multiplier,
                max_grad_norm=self.max_grad_norm,
            )
            print( 'noise_multiplier: %.3f | max_grad_norm: %.3f' % (self.noise_multiplier, self.max_grad_norm))


    # Training
    def train(self):
        self.net.train()
        
        train_loss = 0
        correct = 0
        total = 0
        
        for batch_idx, (inputs, targets) in enumerate(self.trainloader):
            if isinstance(targets, list):
                targets = targets[0]

            if str(self.criterion) != "CrossEntropyLoss()":
                targets = torch.from_numpy(np.eye(self.num_classes)[targets]).float()
             
            inputs, targets = inputs.to(self.device), targets.to(self.device)
            self.optimizer.zero_grad()
            outputs = self.net(inputs)

            loss = self.criterion(outputs, targets)
            loss.backward()
            self.optimizer.step()

            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            if str(self.criterion) != "CrossEntropyLoss()":
                _, targets= targets.max(1)

            correct += predicted.eq(targets).sum().item()

        if self.use_DP:
            epsilon, best_alpha = self.privacy_engine.accountant.get_privacy_spent(delta=self.delta)
            # epsilon, best_alpha = self.optimizer.privacy_engine.get_privacy_spent(1e-5)
            print("\u03B1: %.3f \u03B5: %.3f \u03B4: 1e-5" % (best_alpha, epsilon))
                
        print( 'Train Acc: %.3f%% (%d/%d) | Loss: %.3f' % (100.*correct/total, correct, total, 1.*train_loss/batch_idx))

        return 1.*correct/total

    
    def saveModel(self, path):
        torch.save(self.net.state_dict(), path)

    def get_noise_norm(self):
        return self.noise_multiplier, self.max_grad_norm

    def test(self):
        self.net.eval()
        test_loss = 0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, targets in self.testloader:
                if isinstance(targets, list):
                    targets = targets[0]
                if str(self.criterion) != "CrossEntropyLoss()":
                    targets = torch.from_numpy(np.eye(self.num_classes)[targets]).float()

                inputs, targets = inputs.to(self.device), targets.to(self.device)
                outputs = self.net(inputs)

                loss = self.criterion(outputs, targets)

                test_loss += loss.item()
                _, predicted = outputs.max(1)
                total += targets.size(0)
                if str(self.criterion) != "CrossEntropyLoss()":
                    _, targets= targets.max(1)

                correct += predicted.eq(targets).sum().item()

            print( 'Test Acc: %.3f%% (%d/%d)' % (100.*correct/total, correct, total))

        return 1.*correct/total

In [None]:
target_model, _ = target_model_fn(input_channel, num_classes)

# We only train the models if we don't have them pretrained
if not pretrained:
    train_model(TARGET_PATH, device, target_train, target_test, target_model, use_DP, noise, norm, delta)

if test_models:
    test_target_model, _ = target_model_fn(input_channel, num_classes)
    test_target_model.load_state_dict(torch.load(TARGET_PATH + f"{dataset_name}_{model_architecture}_target.pth"))

    train_loader = torch.utils.data.DataLoader(
        target_train, batch_size=64, shuffle=True, num_workers=2)
    test_loader = torch.utils.data.DataLoader(
        target_test, batch_size=64, shuffle=True, num_workers=2)

    print("\n-------------------------------------------------")
    test_target_model_train = model_training(train_loader, train_loader, test_target_model, device, use_DP, noise, norm, delta)
    print("Target Model Train Acc:", test_target_model_train.test())

    test_target_model_test = model_training(train_loader, test_loader, test_target_model, device, use_DP, noise, norm, delta)
    print("Target Model Test Acc:", test_target_model_test.test())
    print("\n-------------------------------------------------")

np.set_printoptions(threshold=np.inf)

# Shadow Model Training

In [None]:
# Class for shadow model
class shadow():
    def __init__(self, trainloader, testloader, model, device, use_DP, noise, norm, loss, optimizer, delta):
        self.delta = delta
        self.use_DP = use_DP
        self.device = device
        self.model = model.to(self.device)
        self.trainloader = trainloader
        self.testloader = testloader

        self.criterion = loss
        self.optimizer = optimizer

        self.noise_multiplier, self.max_grad_norm = noise, norm
        
        if self.use_DP:
            self.privacy_engine = PrivacyEngine()
            self.model, self.optimizer, self.trainloader = self.privacy_engine.make_private(
                module=self.model,
                optimizer=self.optimizer,
                data_loader=self.trainloader,
                noise_multiplier=self.noise_multiplier,
                max_grad_norm=self.max_grad_norm,
            )
            print( 'noise_multiplier: %.3f | max_grad_norm: %.3f' % (self.noise_multiplier, self.max_grad_norm))
            
        self.scheduler = lr_scheduler.MultiStepLR(self.optimizer, [50, 100], 0.1)

    # Training
    def train(self):
        self.model.train()
        
        train_loss = 0
        correct = 0
        total = 0
        
        for batch_idx, (inputs, targets) in enumerate(self.trainloader):
            inputs, targets = inputs.to(self.device), targets.to(self.device)

            self.optimizer.zero_grad()
            outputs = self.model(inputs)

            loss = self.criterion(outputs, targets)
            loss.backward()
            self.optimizer.step()

            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

        if self.use_DP:
            epsilon, best_alpha = self.privacy_engine.accountant.get_privacy_spent(delta=self.delta)
            # epsilon, best_alpha = self.optimizer.privacy_engine.get_privacy_spent(1e-5)
            print("\u03B1: %.3f \u03B5: %.3f \u03B4: 1e-5" % (best_alpha, epsilon))
                
        print( 'Train Acc: %.3f%% (%d/%d) | Loss: %.3f' % (100.*correct/total, correct, total, 1.*train_loss/batch_idx))

        return 1.*correct/total


    def saveModel(self, path):
        torch.save(self.model.state_dict(), path)

    def get_noise_norm(self):
        return self.noise_multiplier, self.max_grad_norm

    def test(self):
        self.model.eval()
        test_loss = 0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, targets in self.testloader:
                inputs, targets = inputs.to(self.device), targets.to(self.device)
                outputs = self.model(inputs)

                loss = self.criterion(outputs, targets)

                test_loss += loss.item()
                _, predicted = outputs.max(1)
                total += targets.size(0)
                correct += predicted.eq(targets).sum().item()

            print( 'Test Acc: %.3f%% (%d/%d)' % (100.*correct/total, correct, total))

        return 1.*correct/total

In [None]:
def train_shadow_model(PATH, device, shadow_model, train_loader, test_loader, use_DP, noise, norm, loss, optimizer, delta):
    model = shadow(train_loader, test_loader, shadow_model, device, use_DP, noise, norm, loss, optimizer, delta)
    acc_train = 0
    acc_test = 0
    batch_size = 64

    if early_stop:
        # to track the average training loss per epoch as the model trains
        avg_train_losses = []
        # to track the average validation loss per epoch as the model trains
        avg_valid_losses = [] 
        train_loader, valid_loader = create_datasets(batch_size, shadow_train)

        early_stopping = EarlyStopping(patience=patience, verbose=True)

    for i in range(100):
        print("<======================= Epoch " + str(i+1) + " =======================>")
        print("shadow training")

        if early_stop:
            print(f"\rEpoch {i}  "  , end="")
            shadow_model, avg_train_losses, avg_valid_losses, return_cond \
                = model.train_early_stop(early_stopping, avg_train_losses, avg_valid_losses, \
                    shadow_model, model.optimizer, i, train_loader, valid_loader, returnable=True)

            print("\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
            print("\Shadow Model train accuracy: ")
            acc_train = model.train()
            print(acc_train)
            print("\Shadow Model Test Accuracy")
            acc_test = model.test()
            print(acc_test)

            if return_cond:
                break
        else:
            acc_train = model.train()
            print("shadow testing")
            acc_test = model.test()

        overfitting = round(acc_train - acc_test, 6)

        print('The overfitting rate is %s' % overfitting)

    FILE_PATH = TARGET_PATH + f"{dataset_name}_{model_architecture}_shadow.pth"
    model.saveModel(FILE_PATH)
    print("saved shadow model!!!")
    print("Finished training!!!")

    return acc_train, acc_test, overfitting

In [None]:
loss = nn.CrossEntropyLoss()

shadow_model, _ = target_model_fn(input_channel, num_classes)
if not pretrained:
    shadow_trainloader = torch.utils.data.DataLoader(
        shadow_train, batch_size=batch_size, shuffle=True, num_workers=2)
    shadow_testloader = torch.utils.data.DataLoader(
        shadow_test, batch_size=batch_size, shuffle=True, num_workers=2)

    optimizer = optim.SGD(shadow_model.parameters(), lr=1e-2, momentum=0.9, weight_decay=5e-4)
    train_shadow_model(TARGET_PATH, device, shadow_model, shadow_trainloader, shadow_testloader, use_DP, noise, norm, loss, optimizer, delta)

if test_models:
    test_shadow_model, _ = target_model_fn(input_channel, num_classes)
    test_shadow_model.load_state_dict(torch.load(TARGET_PATH + f"{dataset_name}_{model_architecture}_shadow.pth"))

    shadow_trainloader = torch.utils.data.DataLoader(
        shadow_train, batch_size=batch_size, shuffle=True, num_workers=2)
    shadow_testloader = torch.utils.data.DataLoader(
        shadow_test, batch_size=batch_size, shuffle=True, num_workers=2)

    optimizer = optim.SGD(test_shadow_model.parameters(), lr=1e-2, momentum=0.9, weight_decay=5e-4)
    print("\n-------------------------------------------------")
    test_shadow_model_train = shadow(shadow_trainloader, shadow_trainloader, test_shadow_model, device, use_DP, noise, norm, loss, optimizer, delta)
    print("Shadow Model Train Acc:", test_shadow_model_train.test())

    test_shadow_model_test = shadow(shadow_testloader, shadow_testloader, test_shadow_model, device, use_DP, noise, norm, loss, optimizer, delta)
    print("Shadow Model Test Acc:", test_shadow_model_test.test())
    print("\n-------------------------------------------------")

# Perform statistical analysis to get the ranks of each layer, to understand what layer leaks the most information

In [None]:
# One of the methods adds the noise to the initial input and 
# get the according output from the layer based on it
def addNoiseInput(data, feature_vector):
    global MEAN_DATA, STD_DATA

    # data = torch.from_numpy(data)
    # Noise is calculated by tensor + tensor's size * standard deviation + mean
    noise_input = data + torch.randn(data.size()) * STD_DATA + MEAN_DATA
    output = feature_vector(noise_input).flatten()

    # output = feature_vector(noise_input).flatten()

    return noise_input, output

In [None]:
# Method allows to get the output from specific layer based on the input

# Condition = true when we only need correctly predicted label, 
# false = when we need prediction with true label of target_class
def getTrainPredictions(model, shadow_in, shadow_out, target_layer, condition=True, noise=False):
    model.eval()
    output_train_in = []
    output_train_out = []

    noisy_output_train_in = []
    noisy_output_train_out = []
    
    list_of_layers = list(model.children())
    for j in (range(len(list_of_layers))):
        if j != target_layer:
            continue

        if j == len(list_of_layers)-1:
            feature_vector = model
        else:
            feature_vector = nn.Sequential(*list(model.children())[0:j+1]) # get first j layers

        correct_by_feature_vector_in = 0
        correct_by_model_in = 0
        total_in = 0

        correct_by_feature_vector_out = 0
        correct_by_model_out = 0
        total_out = 0

        # Get predictions of train in and train out samples from target model
        with torch.no_grad():
            print("Getting predictions IN")
            in_loader = torch.utils.data.DataLoader(shadow_in, batch_size=1)

            counter = 1
            for og_data, og_target in in_loader:
                # data, target = og_data.to(device), og_target.to(device) # send to device
                data = og_data.clone()
                target = og_target.clone()

                total_in += 1
                output_target = model(data)
                _, pred_target = torch.topk(output_target, 1, dim=1, largest=True, sorted=True)
                for i, t in enumerate(target):
                    if t in pred_target[i]:
                        correct_by_model_in += 1

                output_feature = feature_vector(data)
                _, pred_feature = torch.topk(output_feature, 1, dim=1, largest=True, sorted=True)
                for i, t in enumerate(target):
                    if t in pred_feature[i]:
                        correct_by_feature_vector_in += 1

                output_label = model(data)
                _, pred = torch.topk(output_label, 1, dim=1, largest=True, sorted=True)                       

                if condition:
                    for i, t in enumerate(target):
                        if t in pred[i]:
                            original_output = feature_vector(data).flatten()
                            output_train_in.append(original_output.cpu().numpy())

                            if noise:
                                noise_input, noise_output = addNoiseInput(og_data, feature_vector)
                                # noisy_input_train_in.append(noise_input)
                                noisy_output_train_in.append(noise_output.cpu().numpy())
                else:
                    for i, t in enumerate(target):
                        original_output = feature_vector(data).flatten()
                        output_train_in.append(original_output.cpu().numpy())

                        if noise:
                            noise_input, noise_output = addNoiseInput(og_data, feature_vector)
                            # noisy_input_train_in.append(noise_input)
                            noisy_output_train_in.append(noise_output.cpu().numpy())

                data = None
                target = None
                clear_cache()

                if counter % 500 == 0:
                    get_memory()
                counter = counter + 1

            print("Getting predictions OUT")
            out_loader = torch.utils.data.DataLoader(shadow_out, batch_size=1)

            counter = 0
            for og_data, og_target in out_loader:
                # data, target = og_data.to(device), og_target.to(device) # send to device
                data = og_data.clone()
                target = og_target.clone()

                total_out += 1
                output_target = model(data)
                _, pred_target = torch.topk(output_target, 1, dim=1, largest=True, sorted=True)
                for i, t in enumerate(target):
                    if t in pred_target[i]:
                        correct_by_model_out += 1

                output_feature = feature_vector(data)
                _, pred_feature = torch.topk(output_feature, 1, dim=1, largest=True, sorted=True)
                for i, t in enumerate(target):
                    if t in pred_feature[i]:
                        correct_by_feature_vector_out += 1

                output_label = model(data)
                _, pred = torch.topk(output_label, 1, dim=1, largest=True, sorted=True)

                if condition:
                    for i, t in enumerate(target):
                        if t in pred[i]:
                            original_output = feature_vector(data).flatten()
                            output_train_out.append(original_output.cpu().numpy())

                            if noise:
                                noise_input, noise_output = addNoiseInput(og_data, feature_vector)
                                # noisy_input_train_out.append(noise_input)
                                noisy_output_train_out.append(noise_output.cpu().numpy())
                else:
                    for i, t in enumerate(target):
                        output = feature_vector(data).flatten()
                        output_train_out.append(output.cpu().numpy())

                        if noise:
                                noise_input, noise_output = addNoiseInput(og_data, feature_vector)
                                # noisy_input_train_out.append(noise_input)
                                noisy_output_train_out.append(noise_output.cpu().numpy())

                data = None
                target = None
                clear_cache()

                if counter % 500 == 0:
                    get_memory()
                counter = counter + 1

        print("------------------------------")
        print("Layer ", j, " analysis")
        print("IN DATA. Correctly predicted by feature vector: ", correct_by_feature_vector_in)
        print("IN DATA. Incorrectly predicted by feature vector: ", total_in - correct_by_feature_vector_in)
        print("******************************")

        print("IN DATA. Correctly predicted by target model: ", correct_by_model_in)
        print("IN DATA. Incorrectly predicted by target model: ", total_in - correct_by_model_in)
        print("IN DATA. Total: ", total_in)

        print("==============================")
        print("OUT DATA. Correctly predicted by feature vector: ", correct_by_feature_vector_out)
        print("OUT DATA. Incorrectly predicted by feature vector: ", total_out - correct_by_feature_vector_out)
        print("******************************")

        print("OUT DATA. Correctly predicted by target model: ", correct_by_model_out)
        print("OUT DATA. Incorrectly predicted by target model: ", total_in - correct_by_model_out)
        print("OUT DATA. Total: ", total_out)
    
    return output_train_in, output_train_out, noisy_output_train_in, noisy_output_train_out


In [None]:
# Used for KL divergence

def compute_probs(data, n, min_n, max_n): 
    h, e = np.histogram(data, n, range=(min_n, max_n))
    # print("shape ", len(data))
    p = h/len(data)
    return e, p

def get_probs(list_of_tuples): 
    p = np.array([p[0] for p in list_of_tuples])
    q = np.array([p[1] for p in list_of_tuples])
    return p, q

def support_intersection(p, q): 
    sup_int = (
        list(
            filter(
                lambda x: (x[0]!=0) & (x[1]!=0), zip(p, q)
            )
        )
    )
    return sup_int

def kl_divergence(p, q): 
    return np.sum(p*np.log(p/q))

In [None]:
# Once certain methods p-values are calculated, we use Random Forest to get 
# the accuracy of the layer and rank it later
def random_forest_layers_train(data_in, data_out, p_vals, layer_number):

    layers_topk_data_in = []
    layers_topk_data_out = []

    number_of_neurons_in_layer = len(data_in[0])
    layers_neurons_number = round((layers_neurons_percentage / 100) * number_of_neurons_in_layer)

    top_vals = sorted(range(len(p_vals)), key=lambda i: p_vals[i], reverse=False)[:layers_neurons_number]

    for i in top_vals:
        layers_topk_data_in.append([row[i] for row in data_in])
        layers_topk_data_out.append([row[i] for row in data_out])

    # Train Random Forest on the output
    all_X_train = np.vstack([np.array(data_in),np.array(data_out)])
    all_y_train = np.vstack([np.ones((len(np.array(data_in)),1)),np.zeros((len(np.array(data_out)),1))])

    X_train, X_test, y_train, y_test = train_test_split(all_X_train, all_y_train, test_size=0.25, random_state=12)

    rf = RandomForestClassifier(n_estimators=100)
    rf.fit(X_train, y_train.ravel())
    accuracy = accuracy_score(y_test, rf.predict(X_test))

    print("Layer " + str (layer_number) + " Accuracy " + str(accuracy))

    return accuracy, rf.feature_importances_, layers_topk_data_in, layers_topk_data_out, top_vals


In [None]:
# Useful for the logs
print("-------> Dataset", dataset_name)
print("-------> METHOD", method)
print("-------> layers_neurons_percentage", layers_neurons_percentage)
print("-------> layers number ", number_of_layers)

In [None]:
layer_neurons = []
layer_numbers = []
layers_neurons_number = 0
intermediate_layer_input = 0

In [None]:
# Gets the shadow data and outputs the ranks of each layer with saved p-values
def get_output_probs(shadow_model, shadow_in, shadow_out, method):
    global target_layer
    global layer_neurons
    global number_of_layers
    global layers_neurons_number
    global intermediate_layer_input

    list_of_layers = list(shadow_model.children())
    j = len(list_of_layers) - 1

    # Custom number of best layers
    layers_scores = []
    layers_vals = []

    if recalculate:
        for j in (range(0, len(list_of_layers))):
            print("---> Layer ", j)
            if not outputs_calculated:
                # condition = True, when only considering correct predictions; False, when considering all 
                print("Calculating outputs")
                output_train_in, output_train_out, noisy_output_train_in, noisy_output_train_out = \
                    getTrainPredictions(shadow_model, shadow_in, shadow_out, j, condition=True, noise=True)

                print("Calculated outputs")
                torch.save(noisy_output_train_in, f"{root_folder}attack/{dataset_name}/{model_architecture}/outputs/noisy_output_train_in_layer_{j}")
                torch.save(noisy_output_train_out, f"{root_folder}attack/{dataset_name}/{model_architecture}/outputs/noisy_output_train_out_layer_{j}")

                torch.save(output_train_in, f"{root_folder}attack/{dataset_name}/{model_architecture}/outputs/output_train_in_layer_{j}")
                torch.save(output_train_out, f"{root_folder}attack/{dataset_name}/{model_architecture}/outputs/output_train_out_layer_{j}")

                print("Saved Them")

                clear_cache()
                get_memory()
            
            if not noise_difference_calculated:

                noisy_output_train_in = torch.load(f"{root_folder}attack/{dataset_name}/{model_architecture}/outputs/noisy_output_train_in_layer_{j}")
                noisy_output_train_out = torch.load(f"{root_folder}attack/{dataset_name}/{model_architecture}/outputs/noisy_output_train_out_layer_{j}")

                output_train_in = torch.load(f"{root_folder}attack/{dataset_name}/{model_architecture}/outputs/output_train_in_layer_{j}")
                output_train_out = torch.load(f"{root_folder}attack/{dataset_name}/{model_architecture}/outputs/output_train_out_layer_{j}")

                print("Calculating Difference IN")

                diff_in = []
                for l in range(len(output_train_in)):
                    diff_in.append(abs(output_train_in[l] - noisy_output_train_in[l]))

                clear_cache()
                get_memory()

                print("Calculating Difference OUT")

                diff_out = []
                for l in range(len(output_train_out)):
                    diff_out.append(abs(output_train_out[l] - noisy_output_train_out[l]))

                print("output_train_in", len(output_train_in), " --- ", len(output_train_in[0]))
                print("output_train_out", len(output_train_out), " --- ", len(output_train_out[0]))
                print("noisy_output_train_in", len(noisy_output_train_in), " --- ", len(noisy_output_train_in[0]))
                print("noisy_output_train_out", len(noisy_output_train_out), " --- ", len(noisy_output_train_out[0]))
                print("difference_in", len(diff_in), " --- ", len(diff_in[0]))
                print("difference_out", len(diff_out), " --- ", len(diff_out[0]))

                print("Calculated Everything")

                torch.save(diff_in, f"{root_folder}attack/{dataset_name}/{model_architecture}/outputs/difference_in_layer_{j}")
                torch.save(diff_out, f"{root_folder}attack/{dataset_name}/{model_architecture}/outputs/difference_out_layer_{j}")
                
                print("Saved")

                clear_cache()
                get_memory()

            output_train_in = torch.load(f"{root_folder}attack/{dataset_name}/{model_architecture}/outputs/output_train_in_layer_{j}")
            output_train_out = torch.load(f"{root_folder}attack/{dataset_name}/{model_architecture}/outputs/output_train_out_layer_{j}")
            
            if method == "RandomForest":
                all_X_train = np.vstack([np.array(output_train_in),np.array(output_train_out)])
                all_y_train = np.vstack([np.ones((len(np.array(output_train_in)),1)),np.zeros((len(np.array(output_train_out)),1))])

                X_train, X_test, y_train, y_test = train_test_split(all_X_train, all_y_train, test_size=0.25, random_state=12)

                rf = RandomForestClassifier(n_estimators=100)
                rf.fit(X_train, y_train.ravel())
                accuracy = accuracy_score(y_test, rf.predict(X_test))

                print("Layer " + str (j) + " Accuracy " + str(accuracy))
                layers_scores.append(accuracy)
                layers_vals.append([rf.feature_importances_])
            else:
                p_vals = []
                if method == "Ttest" or method == "KS2Samp" or method == "Noise" or "JustNoise":
                    if method == "Noise":
                        output_train_in = torch.load(f"{root_folder}attack/{dataset_name}/{model_architecture}/outputs/difference_in_layer_{j}")
                        output_train_out = torch.load(f"{root_folder}attack/{dataset_name}/{model_architecture}/outputs/difference_out_layer_{j}")
                    if method == "JustNoise":
                        output_train_in = torch.load(f"{root_folder}attack/{dataset_name}/{model_architecture}/outputs/noisy_output_train_in_layer_{j}")
                        output_train_out = torch.load(f"{root_folder}attack/{dataset_name}/{model_architecture}/outputs/noisy_output_train_out_layer_{j}")

                    # Get P-values of neurons' outputs
                    for i in range(len(output_train_in[0])):
                        row_in = [row[i] for row in output_train_in]
                        row_out = [row[i] for row in output_train_out]
                        
                        if method == "Ttest" or method == "Noise" or "JustNoise":
                            stat, p = stats.ttest_ind(row_in, row_out, equal_var=False)
                        elif method == "KS2Samp":
                            stat, p = stats.ks_2samp(row_in, row_out)
                        p_vals.append(p)

                elif method == "KLDivergence":
                    for i in range(len(output_train_in[0])):
                        row_in = [row[i] for row in output_train_in]
                        row_out = [row[i] for row in output_train_out]

                        min_n = min(row_in+row_out)
                        max_n = max(row_in+row_out)
                        n_bins = 50
                        e, p = compute_probs(row_in, n_bins, min_n, max_n)
                        _, q = compute_probs(row_out, n_bins, min_n, max_n)                    

                        list_of_tuples = support_intersection(p, q)
                        p, q = get_probs(list_of_tuples)

                        divergence = kl_divergence(p, q)
                        p_vals.append(divergence)

                elif method == "Bootstrapping":
                    for i in range(len(output_train_in[0])):
                        column_p = []

                        col_in = [col[i] for col in output_train_in]
                        col_out = [col[i] for col in output_train_out]

                        # Random sample with replacement
                        for c in range(5):
                            sample_col_in = np.random.choice(col_in, replace=True, size=100)
                            sample_col_out = np.random.choice(col_out, replace=True, size=100)

                            stat, p = stats.ks_2samp(sample_col_in, sample_col_out)
                            column_p.append(p)

                        p_vals.append(mean(column_p))

                print("Calculating RF accuracy")
                accuracy, feature_importances, layers_topk_data_in, layers_topk_data_out, top_vals = random_forest_layers_train(output_train_in, output_train_out, p_vals, j)
                # layers_scores.append(accuracy)
                # layers_vals.append([feature_importances])

                print("Saving data of layer")

                if method != "RandomForest":
                    torch.save(accuracy, f"{root_folder}attack/{dataset_name}/{model_architecture}/{method}/outputs/accuracy_in_layer_{j}")
                    torch.save(feature_importances, f"{root_folder}attack/{dataset_name}/{model_architecture}/{method}/outputs/feature_importances_in_layer_{j}")
                
                torch.save(layers_topk_data_in, f"{root_folder}attack/{dataset_name}/{model_architecture}/{method}/outputs/layers_topk_data_in_in_layer_{j}")
                torch.save(layers_topk_data_out, f"{root_folder}attack/{dataset_name}/{model_architecture}/{method}/outputs/layers_topk_data_out_in_layer_{j}")
                torch.save(top_vals, f"{root_folder}attack/{dataset_name}/{model_architecture}/{method}/outputs/top_vals_in_layer_{j}")
                print("Saved data")     

        print("Saving layers' values")
        if method != "RandomForest":
            for j in (range(0, len(list_of_layers))):
                layers_scores.append(torch.load(f"{root_folder}attack/{dataset_name}/{model_architecture}/{method}/outputs/accuracy_in_layer_{j}"))
                layers_vals.append([torch.load(f"{root_folder}attack/{dataset_name}/{model_architecture}/{method}/outputs/feature_importances_in_layer_{j}")])

        # Sort layers by ranks
        layers_numbers = list(range(len(list_of_layers)))
        sorted_layers_scores, sorted_layers_numbers = zip(*sorted(zip(layers_scores, layers_numbers)))
        sorted_layers_numbers = sorted_layers_numbers[::-1]
        sorted_layers_scores = sorted_layers_scores[::-1]

        torch.save(sorted_layers_numbers, f"{root_folder}attack/{dataset_name}/{model_architecture}/{method}/outputs/sorted_layers_numbers")
        torch.save(layers_vals, f"{root_folder}attack/{dataset_name}/{model_architecture}/{method}/outputs/layers_vals")
        torch.save(sorted_layers_scores, f"{root_folder}attack/{dataset_name}/{model_architecture}/{method}/outputs/sorted_layers_scores")

    sorted_layers_numbers = torch.load(f"{root_folder}attack/{dataset_name}/{model_architecture}/{method}/outputs/sorted_layers_numbers")
    layers_vals = torch.load(f"{root_folder}attack/{dataset_name}/{model_architecture}/{method}/outputs/layers_vals")
    sorted_layers_scores = torch.load(f"{root_folder}attack/{dataset_name}/{model_architecture}/{method}/outputs/sorted_layers_scores")

    print("\nLayers Ranks")
    for i in range(len(sorted_layers_scores)):
        print("Layer", sorted_layers_numbers[i], "  Score", sorted_layers_scores[i])

    top_values = []

    for k in range(number_of_layers):
        print("Getting Layer", sorted_layers_numbers[k])

        if sorted_layers_numbers[k] == len(list_of_layers) - 1:
            print("Pass")
            k = k + 1

        print("Getting Layer", sorted_layers_numbers[k])

        if method == "RandomForest":
            output_train_in_all = torch.load(f"{root_folder}attack/{dataset_name}/{model_architecture}/outputs/output_train_in_layer_{sorted_layers_numbers[k]}")

            number_of_neurons_in_layer = len(output_train_in_all[0])
            layers_neurons_number = round((layers_neurons_percentage / 100) * number_of_neurons_in_layer)

            print("layers_neurons_number", layers_neurons_number)
            
            layer_neurons.append(layers_neurons_number)
            layer_numbers.append(sorted_layers_numbers[k])

            top_neurons = sorted(range(len(layers_vals[sorted_layers_numbers[k]][0])), key=lambda i: layers_vals[sorted_layers_numbers[k]][0][i], reverse=True)[:layers_neurons_number]
            top_values.append(top_neurons)
        else:
            if method == "Noise":
                output_train_in_all = torch.load(f"{root_folder}attack/{dataset_name}/{model_architecture}/outputs/difference_in_layer_{sorted_layers_numbers[k]}")
            elif method == "JustNoise":
                print("Hello")
                output_train_in_all = torch.load(f"{root_folder}attack/{dataset_name}/{model_architecture}/outputs/noisy_output_train_in_layer_{sorted_layers_numbers[k]}")
                print("output train in", len(output_train_in_all[0]))
            else:
                output_train_in_all = torch.load(f"{root_folder}attack/{dataset_name}/{model_architecture}/outputs/output_train_in_layer_{sorted_layers_numbers[k]}")
                output_train_out_all = torch.load(f"{root_folder}attack/{dataset_name}/{model_architecture}/outputs/output_train_out_layer_{sorted_layers_numbers[k]}")

            number_of_neurons_in_layer = len(output_train_in_all[0])
            layers_neurons_number = round((layers_neurons_percentage / 100) * number_of_neurons_in_layer)
            print("layers_neurons_number", layers_neurons_number)

            layer_neurons.append(layers_neurons_number)
            layer_numbers.append(sorted_layers_numbers[k])

            top_neurons_preprocessed = torch.load(f"{root_folder}attack/{dataset_name}/{model_architecture}/{method}/outputs/top_vals_in_layer_{sorted_layers_numbers[k]}")

            if method == "KLDivergence":
                top_neurons = sorted(range(len(top_neurons_preprocessed)), key=lambda i: top_neurons_preprocessed[i], reverse=True)[:layers_neurons_number]
            else:
                top_neurons = sorted(range(len(top_neurons_preprocessed)), key=lambda i: top_neurons_preprocessed[i], reverse=False)[:layers_neurons_number]
                print("top neurons")
            
            top_values.append(top_neurons)
        
        intermediate_layer_input = intermediate_layer_input + layers_neurons_number

    # layer_neurons = new_layer_neurons
    torch.save(sorted_layers_numbers[0:number_of_layers], f"{root_folder}attack/{dataset_name}/{model_architecture}/{method}/outputs/layers_numbers")
    torch.save(top_values, f"{root_folder}attack/{dataset_name}/{model_architecture}/{method}/outputs/top_values")


In [None]:
# No intermediate uses the base pipeline
if method != "NoIntermediate":
    test_shadow_model, _ = target_model_fn(input_channel, num_classes)
    test_shadow_model.load_state_dict(torch.load(TARGET_PATH + f"{dataset_name}_{model_architecture}_shadow.pth"))
    get_output_probs(test_shadow_model, shadow_train, shadow_test, method)

# Membership Inference Attack

In [None]:
# Method assigns 0s and 1s depending on the relation to the training
def get_attack_dataset_with_shadow(target_train, target_test, shadow_train, shadow_test, batch_size):
    mem_train, nonmem_train, mem_test, nonmem_test = list(shadow_train), list(shadow_test), list(target_train), list(target_test)

    for i in range(len(mem_train)):
        mem_train[i] = mem_train[i] + (1,)
    for i in range(len(nonmem_train)):
        nonmem_train[i] = nonmem_train[i] + (0,)
    for i in range(len(nonmem_test)):
        nonmem_test[i] = nonmem_test[i] + (0,)
    for i in range(len(mem_test)):
        mem_test[i] = mem_test[i] + (1,)

    train_length = min(len(mem_train), len(nonmem_train))
    test_length = min(len(mem_test), len(nonmem_test))

    mem_train, _ = torch.utils.data.random_split(mem_train, [train_length, len(mem_train) - train_length])
    non_mem_train, _ = torch.utils.data.random_split(nonmem_train, [train_length, len(nonmem_train) - train_length])
    mem_test, _ = torch.utils.data.random_split(mem_test, [test_length, len(mem_test) - test_length])
    non_mem_test, _ = torch.utils.data.random_split(nonmem_test, [test_length, len(nonmem_test) - test_length])
    
    attack_train = mem_train + non_mem_train
    attack_test = mem_test + non_mem_test

    attack_trainloader = torch.utils.data.DataLoader(
        attack_train, batch_size=batch_size, shuffle=True, num_workers=2)
    attack_testloader = torch.utils.data.DataLoader(
        attack_test, batch_size=batch_size, shuffle=True, num_workers=2)

    return attack_trainloader, attack_testloader

In [None]:
def get_gradient_size(model):
    gradient_size = []
    gradient_list = reversed(list(model.named_parameters()))
    for name, parameter in gradient_list:
        if 'weight' in name:
            gradient_size.append(parameter.shape)

    return gradient_size

In [None]:
# Class for the attack model and definition of its architecture
class WhiteBoxAttackModel(nn.Module):
    def __init__(self, class_num, total, intermediate_layer_input):
        super(WhiteBoxAttackModel, self).__init__()

        self.intermediate_layer_input = intermediate_layer_input

        self.Intermediate_Output_Component_result = nn.Sequential(
            nn.Dropout(p=0.2),
            # nn.Linear(class_num, 128),
            nn.Linear(self.intermediate_layer_input, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
        )

        self.Output_Component = nn.Sequential(
			nn.Dropout(p=0.2),
			nn.Linear(class_num, 128),
			nn.ReLU(),
			nn.Linear(128, 64),
		)

        self.Loss_Component = nn.Sequential(
			nn.Dropout(p=0.2),
			nn.Linear(1, 128),
			nn.ReLU(),
			nn.Linear(128, 64),
		)

        self.Gradient_Component = nn.Sequential(
			nn.Dropout(p=0.2),
			nn.Conv2d(1, 1, kernel_size=5, padding=2),
			nn.BatchNorm2d(1),
			nn.ReLU(),
			nn.MaxPool2d(kernel_size=2),
			nn.Flatten(),
			nn.Dropout(p=0.2),
			nn.Linear(total, 256),
			nn.ReLU(),
			nn.Dropout(p=0.2),
			nn.Linear(256, 128),
			nn.ReLU(),
			nn.Linear(128, 64),
		)

        self.Label_Component = nn.Sequential(
			nn.Dropout(p=0.2),
			nn.Linear(class_num, 128),
			nn.ReLU(),
			nn.Linear(128, 64),
		)
        
        self.Encoder_Component = nn.Sequential(
            nn.Dropout(p=0.2),
			nn.Linear(320, 256),
			nn.ReLU(),
            nn.Dropout(p=0.2),
			nn.Linear(256, 128),
			nn.ReLU(),
			nn.Dropout(p=0.2),
			nn.Linear(128, 64),
			nn.ReLU(),
			nn.Linear(64, 2),
		)


    def forward(self, intermediate_output, output, loss, gradient, label):

        if method != "NoIntermediate":
            try:
                intermediate_output = intermediate_output.to(device)
                self.intermediate_layer_input = intermediate_output.shape[0]

                self.Intermediate_Output_Component_result = nn.Sequential(
                    nn.Dropout(p=0.2),
                    nn.Linear(self.intermediate_layer_input, 128),
                    nn.ReLU(),
                    nn.Linear(128, 64),
                )
                self.Intermediate_Output_Component_result = self.Intermediate_Output_Component_result.to(device)
                
                Intermediate_Output_Component_result = self.Intermediate_Output_Component_result(intermediate_output)
            except:
                intermediate_output = np.transpose(intermediate_output.cpu().detach().numpy())
                intermediate_output = torch.squeeze(torch.Tensor(intermediate_output), 1)
                intermediate_output = intermediate_output.to(device)

                self.intermediate_layer_input = intermediate_output.shape[1]

                self.Intermediate_Output_Component_result = nn.Sequential(
                    nn.Dropout(p=0.2),
                    nn.Linear(self.intermediate_layer_input, 128),
                    nn.ReLU(),
                    nn.Linear(128, 64),
                )
                self.Intermediate_Output_Component_result = self.Intermediate_Output_Component_result.to(device)
                
                Intermediate_Output_Component_result = self.Intermediate_Output_Component_result(intermediate_output)
        else:
            Output_Component_result = self.Output_Component(output)

        Loss_Component_result = self.Loss_Component(loss)
        Gradient_Component_result = self.Gradient_Component(gradient)
        Label_Component_result = self.Label_Component(label)

        Output_Component_result = self.Output_Component(output)
        final_inputs = torch.cat((Intermediate_Output_Component_result, Output_Component_result, Loss_Component_result, Gradient_Component_result, Label_Component_result), 1)

        final_result = self.Encoder_Component(final_inputs)
        return final_result

In [None]:
def weights_init(m):
    if isinstance(m, nn.Conv2d):
        nn.init.normal_(m.weight.data)
        m.bias.data.fill_(0)
    elif isinstance(m,nn.Linear):
        nn.init.xavier_normal_(m.weight)
        nn.init.constant_(m.bias, 0)

Main class for the attack

In [None]:
class attack_for_whitebox():
    def __init__(self, TARGET_PATH, SHADOW_PATH, ATTACK_SETS, attack_train_loader, attack_test_loader, target_model, shadow_model, attack_model, device, class_num):
        self.device = device
        self.class_num = class_num

        self.ATTACK_SETS = ATTACK_SETS

        self.TARGET_PATH = TARGET_PATH
        self.target_model = target_model.to(self.device)
        self.target_model.load_state_dict(torch.load(self.TARGET_PATH))
        self.target_model.eval()


        self.SHADOW_PATH = SHADOW_PATH
        self.shadow_model = shadow_model.to(self.device)
        self.shadow_model.load_state_dict(torch.load(self.SHADOW_PATH))
        self.shadow_model.eval()

        self.attack_train_loader = attack_train_loader
        self.attack_test_loader = attack_test_loader

        self.attack_model = attack_model.to(self.device)
        torch.manual_seed(0)
        self.attack_model.apply(weights_init)

        self.target_criterion = nn.CrossEntropyLoss(reduction='none')
        self.attack_criterion = nn.CrossEntropyLoss()
        #self.optimizer = optim.SGD(self.attack_model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
        self.optimizer = optim.Adam(self.attack_model.parameters(), lr=1e-5)

        self.attack_train_data = None
        self.attack_test_data = None
        
    # This method allows to get only certain neurons for each layer's output based on the method
    def _get_data(self, model, inputs, targets):
        global method
        global layers_neurons_number
        global layer_numbers
        global layer_neurons

        intermediate_output = []
        results = []

        if method != "NoIntermediate":
            list_of_layers = list(shadow_model.children())
            
            index_layer = 0
            layers_vals = torch.load(f"{root_folder}attack/{dataset_name}/{model_architecture}/{method}/outputs/top_values")

            for j in layer_numbers:
                temporary_output = []

                if j == len(list_of_layers)-1:
                        feature_vector = model
                else:
                    feature_vector = nn.Sequential(*list(model.children())[0:j+1]) # get first j layers

                if method == "RandomForest":
                    for i in range(len(inputs)):
                        input = inputs[i]
                        squeezed_input = torch.unsqueeze(input, 0)
                        output_vector = feature_vector(squeezed_input)
                        output = output_vector.flatten()
                        temporary_output.append(output.cpu().detach().numpy())

                    top_neurons = sorted(range(len(layers_vals[index_layer])), key=lambda i: layers_vals[index_layer][i], reverse=True)[:layer_neurons[index_layer]]
                    index_layer = index_layer + 1
                    for i in top_neurons:
                        intermediate_output.append([row[i] for row in temporary_output])
                else:
                    noisy_output = []
                    difference = []

                    for i in range(len(inputs)):
                        if method == "JustNoise":
                            noise_input = inputs[i] + torch.randn(inputs[i].cpu().size()).to(device) * STD_DATA + MEAN_DATA
                            squeezed_input = torch.unsqueeze(noise_input, 0)
                            output_vector = feature_vector(squeezed_input)
                            noise_output = output_vector.cuda().flatten()
                            temporary_output.append(noise_output.cpu().detach().numpy())
                        else:
                            input = inputs[i]
                            squeezed_input = torch.unsqueeze(input, 0)
                            output_vector = feature_vector(squeezed_input)
                            og_output = output_vector.flatten()
                            temporary_output.append(og_output.cpu().detach().numpy())

                            if method == "Noise":
                                noise_input = inputs[i] + torch.randn(inputs[i].cpu().size()).to(device) * STD_DATA + MEAN_DATA
                                squeezed_input = torch.unsqueeze(noise_input, 0)
                                output_vector = feature_vector(squeezed_input)
                                noise_output = output_vector.flatten()

                                noisy_output.append(noise_output.cpu().detach().numpy())

                                diff = []
                                for l in range(len(og_output)):
                                    diff.append(abs(og_output[l] - noise_output[l]))
                                difference.append(diff)
                            

                    if method == "Noise":
                        temporary_output = difference

                    top_neurons = sorted(range(len(layers_vals[index_layer])), key=lambda i: layers_vals[index_layer][i], reverse=True)[:layer_neurons[index_layer]]

                    if method == "KLDivergence":
                        top_neurons = sorted(range(len(top_neurons)), key=lambda i: top_neurons[i], reverse=True)[:layer_neurons[index_layer]]
                    else:
                        top_neurons = sorted(range(len(top_neurons)), key=lambda i: top_neurons[i], reverse=False)[:layer_neurons[index_layer]]

                    index_layer = index_layer + 1
                    for i in top_neurons:
                        intermediate_output.append([row[i] for row in temporary_output])

            intermediate_output = torch.tensor(intermediate_output, requires_grad=True).to(device)
            intermediate_output, _ = torch.sort(intermediate_output, descending=True)
            intermediate_output = intermediate_output.to(device)
        
        results = model(inputs)
        # outputs = F.softmax(outputs, dim=1)
        losses = self.target_criterion(results, targets)

        gradients = []
        
        for loss in losses:
            loss.backward(retain_graph=True)

            gradient_list = reversed(list(model.named_parameters()))

            for name, parameter in gradient_list:
                if 'weight' in name:
                    gradient = parameter.grad.clone() # [column[:, None], row].resize_(100,100)
                    gradient = gradient.unsqueeze_(0)
                    gradients.append(gradient.unsqueeze_(0))
                    break

        labels = []
        for num in targets:
            label = [0 for i in range(self.class_num)]
            label[num.item()] = 1
            labels.append(label)

        gradients = torch.cat(gradients, dim=0)
        losses = losses.unsqueeze_(1).detach()
        outputs, _ = torch.sort(results, descending=True)
        labels = torch.Tensor(labels)

        return intermediate_output, outputs, losses, gradients, labels

    # This method allows to get 
    def prepare_dataset(self):
        print("Preparing Train Dataset")

        # k = 0
        with open(self.ATTACK_SETS + "train.p", "wb") as f:
            for inputs, targets, members in self.attack_train_loader:
                inputs, targets = inputs.to(self.device), targets.to(self.device)
                intermediate_output, output, loss, gradient, label = self._get_data(self.shadow_model, inputs, targets)
                pickle.dump((intermediate_output, output, loss, gradient, label, members), f)

        print("Finished Saving Train Dataset")

        print("Preparing Test Dataset")

        k = 0
        with open(self.ATTACK_SETS + "test.p", "wb") as f:
            for inputs, targets, members in self.attack_test_loader:
                inputs, targets = inputs.to(self.device), targets.to(self.device)
                intermediate_output, output, loss, gradient, label = self._get_data(self.target_model, inputs, targets)
                pickle.dump((intermediate_output, output, loss, gradient, label, members), f)
        print("Finished Saving Test Dataset")

    def train(self, epoch, result_path):
        self.attack_model.train()
        batch_idx = 1
        train_loss = 0
        correct = 0
        total = 0

        final_train_gndtrth = []
        final_train_predict = []
        final_train_probabe = []

        final_result = []

        with open(self.ATTACK_SETS + "train.p", "rb") as f:
            while(True):
                try: 
                    intermediate_output, output, loss, gradient, label, members = pickle.load(f)
                    output, loss, gradient, label, members = output.to(self.device), loss.to(self.device), gradient.to(self.device), label.to(self.device), members.to(self.device)
                    results = self.attack_model(intermediate_output, output, loss, gradient, label)
                    losses = self.attack_criterion(results, members)
                    
                    losses.backward()
                    self.optimizer.step()

                    train_loss += losses.item()
                    _, predicted = results.max(1)
                    total += members.size(0)
                    correct += predicted.eq(members).sum().item()

                    if epoch:
                        final_train_gndtrth.append(members)
                        final_train_predict.append(predicted)
                        final_train_probabe.append(results[:, 1])

                    batch_idx += 1
                except EOFError:
                    break	

        if epoch:
            final_train_gndtrth = torch.cat(final_train_gndtrth, dim=0).cpu().detach().numpy()
            final_train_predict = torch.cat(final_train_predict, dim=0).cpu().detach().numpy()
            final_train_probabe = torch.cat(final_train_probabe, dim=0).cpu().detach().numpy()

            conf_matrix =  confusion_matrix(final_train_gndtrth, final_train_predict)

            train_f1_score = f1_score(final_train_gndtrth, final_train_predict)
            train_roc_auc_score = roc_auc_score(final_train_gndtrth, final_train_probabe)

            final_result.append(train_f1_score)
            final_result.append(train_roc_auc_score)

            with open(result_path, "wb") as f:
                pickle.dump((final_train_gndtrth, final_train_predict, final_train_probabe), f)
            
            print("Saved Attack Train Ground Truth and Predict Sets")
            print("Train F1: %f\nAUC: %f" % (train_f1_score, train_roc_auc_score))

            print("\n\n\n\n")
            tn, fp, fn, tp = conf_matrix.ravel()
            print( 'Train Acc: %.3f%% (%d/%d)' % (100.*correct/(1.0*total), correct, total))
            print("Recall:", str(round((tp/(tp+fn)) * 100, 2)))
            print("Negative Recall:", str(round((tn/(tn+fp)) * 100, 2)))
            print("Train F1: %f\nAUC: %f" % (train_f1_score, train_roc_auc_score))
            print("TP: ", tp, "FP", fp, "TN", tn, "FN", fn)

        try:
            final_result.append(1.*correct/total)
        except:
            final_result.append(1.*correct/1)
            
        print( 'Train Acc: %.3f%% (%d/%d) | Loss: %.3f' % (100.*correct/total, correct, total, 1.*train_loss/batch_idx))

        return final_result


    def test(self, epoch, result_path):
        self.attack_model.eval()
        batch_idx = 1
        correct = 0
        total = 0

        final_test_gndtrth = []
        final_test_predict = []
        final_test_probabe = []

        final_result = []

        with torch.no_grad():
            with open(self.ATTACK_SETS + "test.p", "rb") as f:
                while(True):
                    try:
                        intermediate_output, output, loss, gradient, label, members = pickle.load(f)
                        output, loss, gradient, label, members = output.to(self.device), loss.to(self.device), gradient.to(self.device), label.to(self.device), members.to(self.device)

                        results = self.attack_model(intermediate_output, output, loss, gradient, label)

                        _, predicted = results.max(1)
                        total += members.size(0)
                        correct += predicted.eq(members).sum().item()

                        results = F.softmax(results, dim=1)

                        if epoch:
                            final_test_gndtrth.append(members)
                            final_test_predict.append(predicted)
                            final_test_probabe.append(results[:, 1])

                        batch_idx += 1
                    except EOFError:
                        break

        if epoch:
            final_test_gndtrth = torch.cat(final_test_gndtrth, dim=0).cpu().numpy()
            final_test_predict = torch.cat(final_test_predict, dim=0).cpu().numpy()
            final_test_probabe = torch.cat(final_test_probabe, dim=0).cpu().numpy()
            
            conf_matrix = confusion_matrix(final_test_gndtrth, final_test_predict)  

            test_f1_score = f1_score(final_test_gndtrth, final_test_predict)
            test_roc_auc_score = roc_auc_score(final_test_gndtrth, final_test_probabe)

            final_result.append(test_f1_score)
            final_result.append(test_roc_auc_score)


            with open(result_path, "wb") as f:
                pickle.dump((final_test_gndtrth, final_test_predict, final_test_probabe), f)

            print("Saved Attack Test Ground Truth and Predict Sets")
            print("Test F1: %f\nAUC: %f" % (test_f1_score, test_roc_auc_score))

            print("\n\n\n\n")
            tn, fp, fn, tp = conf_matrix.ravel()
            print( 'Test Acc: %.3f%% (%d/%d)' % (100.*correct/(1.0*total), correct, total))
            print("Recall:", str(round((tp/(tp+fn)) * 100, 2)))
            print("Negative Recall:", str(round((tn/(tn+fp)) * 100, 2)))
            print("Test F1: %f\nAUC: %f" % (test_f1_score, test_roc_auc_score))
            print("TP: ", tp, "FP", fp, "TN", tn, "FN", fn)

        final_result.append(1.*correct/total)
        print( 'Test Acc: %.3f%% (%d/%d)' % (100.*correct/(1.0*total), correct, total))

        return final_result

    def delete_pickle(self):
        train_file = glob.glob(self.ATTACK_SETS +"train.p")
        for trf in train_file:
            os.remove(trf)

        test_file = glob.glob(self.ATTACK_SETS +"test.p")
        for tef in test_file:
            os.remove(tef)

    def saveModel(self, path):
        torch.save(self.attack_model.state_dict(), path)

# Run the attaack using shadow model approach

In [None]:
def attack_mode3(TARGET_PATH, SHADOW_PATH, ATTACK_PATH, device, attack_trainloader, attack_testloader, target_model, shadow_model, attack_model, get_attack_set, num_classes):
    MODELS_PATH = ATTACK_PATH + "_meminf_attack3.pth"
    RESULT_PATH = ATTACK_PATH + "_meminf_attack3.p"
    ATTACK_SETS = ATTACK_PATH + "_meminf_attack_mode3_"

    attack = attack_for_whitebox(TARGET_PATH, SHADOW_PATH, ATTACK_SETS, attack_trainloader, attack_testloader, target_model, shadow_model, attack_model, device, num_classes)
    
    if not prepare_dataset_pretrained:
        print("Preparing attack dataset")
        attack.delete_pickle()
        attack.prepare_dataset()

    for i in range(50):
        flag = 1 if i == 49 else 0
        print("Epoch %d :" % (i+1))
        res_train = attack.train(flag, RESULT_PATH)
        res_test = attack.test(flag, RESULT_PATH)

    attack.saveModel(MODELS_PATH)
    print("Saved Attack Model")

    return res_train, res_test

In [None]:
def test_meminf(PATH, device, num_classes, target_train, target_test, shadow_train, shadow_test, target_model, shadow_model, use_DP, noise, norm, delta):
    global intermediate_layer_input

    batch_size = 64
    
    attack_trainloader, attack_testloader = get_attack_dataset_with_shadow(
        target_train, target_test, shadow_train, shadow_test, batch_size)

    #for white box
    gradient_size = get_gradient_size(target_model)
    total = gradient_size[0][0] // 2 * gradient_size[0][1] // 2

    attack_model = WhiteBoxAttackModel(num_classes, total, intermediate_layer_input)

    ATTACK_PATH = f"{root_folder}attack/{dataset_name}/{model_architecture}/{method}/"
    
    attack_mode3(PATH + f"{dataset_name}_{model_architecture}_target.pth", \
        PATH + f"{dataset_name}_{model_architecture}_shadow.pth", ATTACK_PATH, device, \
        attack_trainloader, attack_testloader, target_model, shadow_model, attack_model, 1, num_classes)


In [None]:
target_model, _ = target_model_fn(input_channel, num_classes)
shadow_model, _ = target_model_fn(input_channel, num_classes)
test_meminf(TARGET_PATH, device, num_classes, target_train, target_test, shadow_train, shadow_test, target_model, shadow_model, use_DP, noise, norm, delta)