<a href="https://colab.research.google.com/github/shrebox/PETS-Project/blob/main/Membership_Inference_Baseline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Membership Inference
Based on: https://arxiv.org/pdf/1806.01246.pdf

## 1. Datasets: MNIST, CIFAR-10

Data is split half into D_shadow and D_target

D_shadow is split half into D_shadow_train and D_shadown_test

D_target is split half into D_target_train (members) and D_target_test (non-members)

## 2. Shadow model == Target Model:

CNN with two convlutional layers, two pooling layers with one hidden layer containing 128 units in the end.

## 3. Attack model:

Input will be the top 3 posteriors generated from testing the D_shadow (D_shadow_train + D_shadow_test) from Shadow Model and labels will be 1 (D_shadow_train) or 0 (D_shadow_test).

Train data:
Input vector X_attack_train = [[max1, max2, max3],....]
Labels Y_attack_train = [[1] or [0]....]

MLP (64 unit hidden layer and softmax output layer)

## 4. Evaluation:

Precision, recall and accuracy.

Tagert models accuracy to check for overfitting for better membership inference.

# 0. Imports

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.utils.data import ConcatDataset
from torch.utils.data import Subset
from torch.utils.data import random_split
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import numpy as np
from sklearn.model_selection import train_test_split

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# 1. Dataset

In [2]:
# function for transforming the datasets range 

transform_mnist = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5), (0.5)) # because the MNIST has only 1 channel, so we set (0.5), (0.5).
                                        # if the dataset has three channel, like CIFAR-10, we set (0.5,0.5,0.5), (0.5,0.5,0.5)
     ])

transform_cifar = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # because the MNIST has only 1 channel, so we set (0.5), (0.5).
                                        # if the dataset has three channel, like CIFAR-10, we set (0.5,0.5,0.5), (0.5,0.5,0.5)
     ])

# importing and downloading the datasets

mnist_trainset = torchvision.datasets.MNIST(root='./drive/MyDrive/pets_project/datasets', train=True,
                                        download=False, transform=transform_mnist)
mnist_testset = torchvision.datasets.MNIST(root='./drive/MyDrive/pets_project/datasets', train=False,
                                       download=False, transform=transform_mnist)

cifar_trainset = torchvision.datasets.CIFAR10(root='./drive/MyDrive/pets_project/datasets', train=True,
                                        download=False, transform=transform_cifar)
cifar_testset = torchvision.datasets.CIFAR10(root='./drive/MyDrive/pets_project/datasets', train=False,
                                       download=False, transform=transform_cifar)

## concatenate data

mnist_data = ConcatDataset([mnist_trainset, mnist_testset])

cifar_data = ConcatDataset([cifar_trainset, cifar_testset])

# mnist_data = torch.cat((mnist_trainset.data,mnist_testset.data),0)
# mnist_labels = torch.cat((mnist_trainset.train_labels,mnist_testset.test_labels),0)

# cifar_data = np.concatenate((cifar_trainset.data, cifar_testset.data),axis=0)
# cifar_labels = cifar_trainset.targets + cifar_testset.targets

## split data into half

D_shadow_mnist, D_target_mnist = torch.utils.data.random_split(mnist_data, [35000, 35000])

D_shadow_cifar, D_target_cifar = torch.utils.data.random_split(cifar_data, [30000, 30000])

# D_shadow_X_mnist, D_target_X_mnist, D_shadow_y_mnist, D_target_y_mnist = train_test_split(mnist_data, mnist_labels, test_size=0.5, random_state=42)

# D_shadow_X_cifar, D_target_X_cifar, D_shadow_y_cifar, D_target_y_cifar = train_test_split(cifar_data, cifar_labels, test_size=0.5, random_state=42)


# 2. Shadow Model

In [11]:
## Split the D_shadow into half

D_train_shadow_mnist, D_out_shadow_mnist = torch.utils.data.random_split(D_shadow_mnist, [17500, 17500])

D_train_shadow_cifar, D_out_shadow_cifar = torch.utils.data.random_split(D_shadow_cifar, [15000, 15000])

# D_train_shadow_X_mnist, D_out_shadow_X_mnist, D_train_shadow_y_mnist, D_out_shadow_y_mnist = train_test_split(D_shadow_X_mnist, D_shadow_y_mnist, test_size=0.5, random_state=42)

In [12]:
## training parameters: ephocs = 50, batch_size = 100, learning_rate = 0.001, n_hidden=128,l2_ratio = 1e-07, model_type = cnn

batch_size = 100 
trainloader_shadow_mnist = DataLoader(D_train_shadow_mnist, batch_size=batch_size,
                                          shuffle=True, num_workers=2)
testloader_shadow_mnist = DataLoader(D_out_shadow_mnist, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

batch_size = 100 
trainloader_shadow_cifar = DataLoader(D_train_shadow_cifar, batch_size=batch_size,
                                          shuffle=True, num_workers=2)
testloader_shadow_cifar = DataLoader(D_out_shadow_cifar, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

In [None]:
# nn parameters: num_filters = 32, filter_size = (5,5), gain = 'relu'
# Conv2DLayer(incoming, num_filters, filter_size, stride=(1, 1), pad=0,
# torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1,

# MNIST:

# (W−F+2P)/S + 1
# P = (F-1)/2

# input = 28 x 28 x 1

# conv1 - W = 28, F = 5, P = 2, S = 1 --> padding is 2 as the lasagne implementation has 'same' parameter that corresponds to floor of filter size/2
# 	28 - 5 + 4 + 1 = 28

# 28 x 28 x 32

# maxpool - 14 x 14 x 32

# conv2 - 14 - 5 + 1 = 10 --> padding is 0 as no parameter in the lasagne implementation

# 10 x 10 x 32

# maxpool - 5 x 5 x 32

            
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()

        # # MNIST
        # self.conv1 = nn.Conv2d(1, 32, 5, 1, padding=2)
        # self.conv2 = nn.Conv2d(32, 32, 5, 1)
        # self.fc1 = nn.Linear(800, 128)
        # self.fc2 = nn.Linear(128, 10)

        # CIFAR
        self.conv1 = nn.Conv2d(3, 32, 5, 1, padding=2)
        self.conv2 = nn.Conv2d(32, 32, 5, 1)
        self.fc1 = nn.Linear(1152, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = torch.flatten(x,1)
        x = self.fc1(x)
        x = F.relu(x)
        output = self.fc2(x)
        return output

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

net = Net().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

trainloader = trainloader_shadow_cifar
testloader = testloader_shadow_cifar

def train():
    for epoch in range(50):  # loop over the dataset multiple times
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            
            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)

            # zero the parameter gradients
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 20 == 19:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                    (epoch + 1, i + 1, running_loss / 20))
                running_loss = 0.0

    print('Finished Training')

    PATH = './drive/MyDrive/pets_project/models/shadow/shadow_trained_model_cifar.pt'
    torch.save(net.state_dict(), PATH)

def test():
    net = Net().to(device)
    PATH = './drive/MyDrive/pets_project/models/shadow/shadow_trained_model_cifar.pt'
    para = torch.load(PATH)
    net.load_state_dict(para)

    correct = 0
    total = 0
    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)

            _, predicted = torch.max(outputs.data, 1)
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: %d %%' % (
        100 * correct / total))

def showdata():
    import matplotlib.pyplot as plt
    import numpy as np

    # functions to show an image
    def imshow(img):
        img = img / 2 + 0.5     # unnormalize
        npimg = img.numpy()
        plt.imshow(np.transpose(npimg, (1, 2, 0)))
        plt.savefig('dataset.png')

    # get some random training images
    dataiter = iter(trainloader)
    images, labels = dataiter.next()

    # show images
    imshow(torchvision.utils.make_grid(images))
    
if __name__ == '__main__':
    # showdata()
    train()
    test()

Using cuda device
[1,    20] loss: 2.171
[1,    40] loss: 1.946
[1,    60] loss: 1.857
[1,    80] loss: 1.751
[1,   100] loss: 1.696
[1,   120] loss: 1.611
[1,   140] loss: 1.596
[2,    20] loss: 1.517
[2,    40] loss: 1.491
[2,    60] loss: 1.417
[2,    80] loss: 1.475
[2,   100] loss: 1.399
[2,   120] loss: 1.431
[2,   140] loss: 1.344
[3,    20] loss: 1.275
[3,    40] loss: 1.373
[3,    60] loss: 1.296
[3,    80] loss: 1.271
[3,   100] loss: 1.268
[3,   120] loss: 1.237
[3,   140] loss: 1.280
[4,    20] loss: 1.204
[4,    40] loss: 1.169
[4,    60] loss: 1.159
[4,    80] loss: 1.154
[4,   100] loss: 1.166
[4,   120] loss: 1.168
[4,   140] loss: 1.140
[5,    20] loss: 1.084
[5,    40] loss: 1.069
[5,    60] loss: 1.088
[5,    80] loss: 1.081
[5,   100] loss: 1.117
[5,   120] loss: 1.069
[5,   140] loss: 1.073
[6,    20] loss: 0.983
[6,    40] loss: 0.954
[6,    60] loss: 1.018
[6,    80] loss: 0.998
[6,   100] loss: 1.019
[6,   120] loss: 1.003
[6,   140] loss: 1.034
[7,    20] loss:

In [23]:

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()

        # MNIST
        self.conv1 = nn.Conv2d(1, 32, 5, 1, padding=2)
        self.conv2 = nn.Conv2d(32, 32, 5, 1)
        self.fc1 = nn.Linear(800, 128)
        self.fc2 = nn.Linear(128, 10)

        # # CIFAR
        # self.conv1 = nn.Conv2d(3, 32, 5, 1, padding=2)
        # self.conv2 = nn.Conv2d(32, 32, 5, 1)
        # self.fc1 = nn.Linear(1152, 128)
        # self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = torch.flatten(x,1)
        x = self.fc1(x)
        x = F.relu(x)
        output = self.fc2(x)
        return output

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

net = Net().to(device)
PATH = './drive/MyDrive/pets_project/models/shadow/shadow_trained_model.pt'
para = torch.load(PATH)
net.load_state_dict(para)

correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in testloader_shadow_mnist:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Using cuda device
175
Accuracy of the network on the 10000 test images: 98 %


In [17]:
a = torch.topk(outputs.data, 3)

In [28]:
type(a.values)

torch.Tensor

# Target Model

In [156]:
# Split the target data into half

D_train_target_mnist, D_out_target_mnist = torch.utils.data.random_split(D_target_mnist, [17500, 17500])

D_train_target_cifar, D_out_target_cifar = torch.utils.data.random_split(D_target_cifar, [15000, 15000])

In [157]:
# training parameters: ephocs = 50, batch_size = 100, learning_rate = 0.001, n_hidden=128,l2_ratio = 1e-07, model_type = cnn

batch_size = 100 
trainloader_target_mnist = DataLoader(D_train_target_mnist, batch_size=batch_size,
                                          shuffle=True, num_workers=2)
testloader_target_mnist = DataLoader(D_out_target_mnist, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

batch_size = 100 
trainloader_target_cifar = DataLoader(D_train_target_cifar, batch_size=batch_size,
                                          shuffle=True, num_workers=2)
testloader_target_cifar = DataLoader(D_out_target_cifar, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

In [None]:
# nn parameters: num_filters = 32, filter_size = (5,5), gain = 'relu'
# Conv2DLayer(incoming, num_filters, filter_size, stride=(1, 1), pad=0,
# torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1,
            
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        # # MNIST
        # self.conv1 = nn.Conv2d(1, 32, 5, 1, padding=2)
        # self.conv2 = nn.Conv2d(32, 32, 5, 1)
        # self.fc1 = nn.Linear(800, 128)
        # self.fc2 = nn.Linear(128, 10)

        # CIFAR10
        self.conv1 = nn.Conv2d(3, 32, 5, 1, padding=2)
        self.conv2 = nn.Conv2d(32, 32, 5, 1)
        self.fc1 = nn.Linear(1152, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = torch.flatten(x,1)
        x = self.fc1(x)
        x = F.relu(x)
        output = self.fc2(x)
        return output

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

net = Net().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

trainloader = trainloader_target_cifar
testloader = testloader_target_cifar

def train():
    for epoch in range(50):  # loop over the dataset multiple times
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)

            # zero the parameter gradients
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 20 == 19:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                    (epoch + 1, i + 1, running_loss / 20))
                running_loss = 0.0

    print('Finished Training')

    PATH = './drive/MyDrive/pets_project/models/target/target_trained_model_cifar.pt'
    torch.save(net.state_dict(), PATH)

def test():
    net = Net().to(device)
    PATH = './drive/MyDrive/pets_project/models/target/target_trained_model_cifar.pt'
    para = torch.load(PATH)
    net.load_state_dict(para)

    correct = 0
    total = 0
    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)

            _, predicted = torch.max(outputs.data, 1)
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: %d %%' % (
        100 * correct / total))
    
def showdata():
    import matplotlib.pyplot as plt
    import numpy as np

    # functions to show an image
    def imshow(img):
        img = img / 2 + 0.5     # unnormalize
        npimg = img.numpy()
        plt.imshow(np.transpose(npimg, (1, 2, 0)))
        plt.savefig('dataset.png')

    # get some random training images
    dataiter = iter(trainloader)
    images, labels = dataiter.next()

    # show images
    imshow(torchvision.utils.make_grid(images))
    
if __name__ == '__main__':
    # showdata()
    train()
    test()

Using cuda device
[1,    20] loss: 2.160
[1,    40] loss: 1.956
[1,    60] loss: 1.786
[1,    80] loss: 1.673
[1,   100] loss: 1.619
[1,   120] loss: 1.538
[1,   140] loss: 1.584
[2,    20] loss: 1.471
[2,    40] loss: 1.480
[2,    60] loss: 1.414
[2,    80] loss: 1.439
[2,   100] loss: 1.405
[2,   120] loss: 1.341
[2,   140] loss: 1.367
[3,    20] loss: 1.292
[3,    40] loss: 1.274
[3,    60] loss: 1.273
[3,    80] loss: 1.291
[3,   100] loss: 1.298
[3,   120] loss: 1.221
[3,   140] loss: 1.247
[4,    20] loss: 1.156
[4,    40] loss: 1.179
[4,    60] loss: 1.131
[4,    80] loss: 1.123
[4,   100] loss: 1.147
[4,   120] loss: 1.118
[4,   140] loss: 1.171
[5,    20] loss: 1.030
[5,    40] loss: 1.056
[5,    60] loss: 1.068
[5,    80] loss: 1.072
[5,   100] loss: 1.068
[5,   120] loss: 1.062
[5,   140] loss: 1.056
[6,    20] loss: 0.939
[6,    40] loss: 0.977
[6,    60] loss: 0.919
[6,    80] loss: 1.021
[6,   100] loss: 0.932
[6,   120] loss: 0.972
[6,   140] loss: 1.000
[7,    20] loss:

# Attack Model

MLP (64 unit hidden layer and softmax output layer)



In [24]:
# output = classifier.train_model(dataset=dataset,
# 									epochs=50,
# 									batch_size=10,
# 									learning_rate=0.01,
# 									n_hidden=64,
# 									l2_ratio = 1e-6,
# 									model='softmax')

# def get_softmax_model(n_in, n_out):
#     net = dict()
#     net['input'] = lasagne.layers.InputLayer((None, n_in[1]))
    
#     net['output'] = lasagne.layers.DenseLayer(
#         net['input'],
#         num_units=n_out,
#         nonlinearity=lasagne.nonlinearities.softmax)
#     return net


## Dataset Generation

In [130]:
# Using shadow model 

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()

        # # MNIST
        # self.conv1 = nn.Conv2d(1, 32, 5, 1, padding=2)
        # self.conv2 = nn.Conv2d(32, 32, 5, 1)
        # self.fc1 = nn.Linear(800, 128)
        # self.fc2 = nn.Linear(128, 10)

        # CIFAR
        self.conv1 = nn.Conv2d(3, 32, 5, 1, padding=2)
        self.conv2 = nn.Conv2d(32, 32, 5, 1)
        self.fc1 = nn.Linear(1152, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = torch.flatten(x,1)
        x = self.fc1(x)
        x = F.relu(x)
        output = self.fc2(x)
        return output

In [146]:
# Below used to generate the attack data by changing the field values manually

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

net = Net().to(device)
PATH = './drive/MyDrive/pets_project/models/shadow/shadow_model_cifar.pt'
para = torch.load(PATH)
net.load_state_dict(para)

correct = 0
total = 0

count = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in trainloader_shadow_cifar:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)

        topkvalues = (torch.topk(outputs.data, 3)).values
        if count == 0:
          D_attack_train_X = topkvalues
        else:
          D_attack_train_X = torch.cat((D_attack_train_X, topkvalues), 0)
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        count+=1

print('Accuracy of the network on the 10000 train images: %d %%' % (
    100 * correct / total))

torch.save(D_attack_train_X, './drive/MyDrive/pets_project/datasets/attack/cifar/training/D_attack_train_X_cifar.pt')

D_attack_train_y = torch.ones(len(D_attack_train_X))
torch.save(D_attack_train_y, './drive/MyDrive/pets_project/datasets/attack/cifar/training/D_attack_train_y_cifar.pt')

Using cuda device
Accuracy of the network on the 10000 train images: 71 %


In [147]:
# Loading data

D_attack_train_X_mnist = torch.load('./drive/MyDrive/pets_project/datasets/attack/mnist/training/D_attack_train_X_mnist.pt')
D_attack_train_y_mnist = torch.load('./drive/MyDrive/pets_project/datasets/attack/mnist/training/D_attack_train_y_mnist.pt')
D_attack_test_X_mnist = torch.load('./drive/MyDrive/pets_project/datasets/attack/mnist/training/D_attack_test_X_mnist.pt')
D_attack_test_y_mnist = torch.load('./drive/MyDrive/pets_project/datasets/attack/mnist/training/D_attack_test_y_mnist.pt')

D_attack_X_mnist = torch.cat((D_attack_train_X_mnist, D_attack_test_X_mnist), 0)
D_attack_y_mnist = (torch.cat((D_attack_train_y_mnist, D_attack_test_y_mnist), 0)).to(torch.long)

D_attack_train_X_cifar = torch.load('./drive/MyDrive/pets_project/datasets/attack/cifar/training/D_attack_train_X_cifar.pt')
D_attack_train_y_cifar = torch.load('./drive/MyDrive/pets_project/datasets/attack/cifar/training/D_attack_train_y_cifar.pt')
D_attack_test_X_cifar = torch.load('./drive/MyDrive/pets_project/datasets/attack/cifar/training/D_attack_test_X_cifar.pt')
D_attack_test_y_cifar = torch.load('./drive/MyDrive/pets_project/datasets/attack/cifar/training/D_attack_test_y_cifar.pt')

D_attack_X_cifar = torch.cat((D_attack_train_X_cifar, D_attack_test_X_cifar), 0)
D_attack_y_cifar = (torch.cat((D_attack_train_y_cifar, D_attack_test_y_cifar), 0)).to(torch.long)

## Architecture

In [88]:
# Attack model architecture

class Net_attack(nn.Module):

    def __init__(self):
        super(Net_attack, self).__init__()

        self.fc1 = nn.Linear(3,64)
        self.fc2 = nn.Linear(64,2)

    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        output = self.fc2(x)
        return output

### Training

In [152]:
# For splitting the data into batches: https://stackoverflow.com/questions/45113245/how-to-get-mini-batches-in-pytorch-in-a-clean-and-efficient-way

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

net = Net_attack().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.01)

batch_size = 10
X = D_attack_X_cifar
y = D_attack_y_cifar

def train_attack():
    for epoch in range(50):  # loop over the dataset multiple times

        # X is a torch Variable
        running_loss = 0.0
        permutation = torch.randperm(X.size()[0])

        for i in range(0,X.size()[0], batch_size):
            indices = permutation[i:i+batch_size]
            inputs, labels = X[indices], y[indices]
            inputs, labels = inputs.to(device), labels.to(device)
            
            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)

            # zero the parameter gradients
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # print statistics
            # print(loss.item())
            running_loss += loss.item()
            if i % 1 == 0:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                    (epoch + 1, i + 1, running_loss / 1))
                running_loss = 0.0

    print('Finished Training')

    PATH = './drive/MyDrive/pets_project/models/attack/attack_model_cifar.pt'
    torch.save(net.state_dict(), PATH)

if __name__ == '__main__':
    # showdata()
    train_attack()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[49, 10011] loss: 0.702
[49, 10021] loss: 0.701
[49, 10031] loss: 0.700
[49, 10041] loss: 0.714
[49, 10051] loss: 0.693
[49, 10061] loss: 0.689
[49, 10071] loss: 0.690
[49, 10081] loss: 0.694
[49, 10091] loss: 0.693
[49, 10101] loss: 0.692
[49, 10111] loss: 0.692
[49, 10121] loss: 0.693
[49, 10131] loss: 0.690
[49, 10141] loss: 0.697
[49, 10151] loss: 0.684
[49, 10161] loss: 0.683
[49, 10171] loss: 0.700
[49, 10181] loss: 0.687
[49, 10191] loss: 0.686
[49, 10201] loss: 0.694
[49, 10211] loss: 0.685
[49, 10221] loss: 0.724
[49, 10231] loss: 0.694
[49, 10241] loss: 0.715
[49, 10251] loss: 0.704
[49, 10261] loss: 0.675
[49, 10271] loss: 0.714
[49, 10281] loss: 0.685
[49, 10291] loss: 0.676
[49, 10301] loss: 0.685
[49, 10311] loss: 0.694
[49, 10321] loss: 0.675
[49, 10331] loss: 0.714
[49, 10341] loss: 0.694
[49, 10351] loss: 0.675
[49, 10361] loss: 0.684
[49, 10371] loss: 0.705
[49, 10381] loss: 0.684
[49, 10391] loss: 0.684

### Testing

In [None]:
# Get top 3 posteriors from the target model for the D_target data and feed to target model to predict the label

In [164]:
# Using shadow model 

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()

        # # MNIST
        # self.conv1 = nn.Conv2d(1, 32, 5, 1, padding=2)
        # self.conv2 = nn.Conv2d(32, 32, 5, 1)
        # self.fc1 = nn.Linear(800, 128)
        # self.fc2 = nn.Linear(128, 10)

        # CIFAR
        self.conv1 = nn.Conv2d(3, 32, 5, 1, padding=2)
        self.conv2 = nn.Conv2d(32, 32, 5, 1)
        self.fc1 = nn.Linear(1152, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = torch.flatten(x,1)
        x = self.fc1(x)
        x = F.relu(x)
        output = self.fc2(x)
        return output

In [165]:
# Below used to generate the attack data by changing the field values manually

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

net = Net().to(device)
PATH = './drive/MyDrive/pets_project/models/target/target_model_cifar.pt'
para = torch.load(PATH)
net.load_state_dict(para)

correct = 0
total = 0

count = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in testloader_target_cifar:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)

        topkvalues = (torch.topk(outputs.data, 3)).values
        if count == 0:
          D_attack_test_X = topkvalues
        else:
          D_attack_test_X = torch.cat((D_attack_test_X, topkvalues), 0)
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        count+=1

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

torch.save(D_attack_test_X, './drive/MyDrive/pets_project/datasets/attack/cifar/testing/D_attack_test_X_cifar.pt')

D_attack_test_y = torch.zeros(len(D_attack_test_X))
torch.save(D_attack_test_y, './drive/MyDrive/pets_project/datasets/attack/cifar/testing/D_attack_test_y_cifar.pt')

Using cuda device
Accuracy of the network on the 10000 test images: 71 %


In [166]:
# Loading data

D_attack_train_X_mnist = torch.load('./drive/MyDrive/pets_project/datasets/attack/mnist/testing/D_attack_train_X_mnist.pt')
D_attack_train_y_mnist = torch.load('./drive/MyDrive/pets_project/datasets/attack/mnist/testing/D_attack_train_y_mnist.pt')
D_attack_test_X_mnist = torch.load('./drive/MyDrive/pets_project/datasets/attack/mnist/testing/D_attack_test_X_mnist.pt')
D_attack_test_y_mnist = torch.load('./drive/MyDrive/pets_project/datasets/attack/mnist/testing/D_attack_test_y_mnist.pt')

D_attack_target_X_mnist = torch.cat((D_attack_train_X_mnist, D_attack_test_X_mnist), 0)
D_attack_target_y_mnist = (torch.cat((D_attack_train_y_mnist, D_attack_test_y_mnist), 0)).to(torch.long)

D_attack_train_X_cifar = torch.load('./drive/MyDrive/pets_project/datasets/attack/cifar/testing/D_attack_train_X_cifar.pt')
D_attack_train_y_cifar = torch.load('./drive/MyDrive/pets_project/datasets/attack/cifar/testing/D_attack_train_y_cifar.pt')
D_attack_test_X_cifar = torch.load('./drive/MyDrive/pets_project/datasets/attack/cifar/testing/D_attack_test_X_cifar.pt')
D_attack_test_y_cifar = torch.load('./drive/MyDrive/pets_project/datasets/attack/cifar/testing/D_attack_test_y_cifar.pt')

D_attack_target_X_cifar = torch.cat((D_attack_train_X_cifar, D_attack_test_X_cifar), 0)
D_attack_target_y_cifar = (torch.cat((D_attack_train_y_cifar, D_attack_test_y_cifar), 0)).to(torch.long)

In [172]:
def test():
    net = Net_attack().to(device)
    PATH = './drive/MyDrive/pets_project/models/attack/attack_model_mnist.pt'
    para = torch.load(PATH)
    net.load_state_dict(para)

    correct = 0
    total = 0
    batch_size = 10
    X = D_attack_X_mnist
    y = D_attack_y_mnist
    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        permutation = torch.randperm(X.size()[0])

        for i in range(0,X.size()[0], batch_size):
            indices = permutation[i:i+batch_size]
            inputs, labels = X[indices], y[indices]
            inputs, labels = inputs.to(device), labels.to(device)
        # for data in testloader:
        #     images, labels = data
        #     images, labels = images.to(device), labels.to(device)
            outputs = net(inputs)

            _, predicted = torch.max(outputs.data, 1)
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: %d %%' % (
        100 * correct / total))

test()

Accuracy of the network on the 10000 test images: 50 %


In [150]:
D_attack_y_cifar.unique()

tensor([0, 1])

In [85]:
batch_size = 10
# X is a torch Variable
permutation = torch.randperm(X.size()[0])
for i in range(0,X.size()[0], batch_size):
    indices = permutation[i:i+batch_size]
    batch_x, batch_y = X[indices], y[indices]

In [112]:
type(D_train_shadow_mnist.dataset[1][1])

int

In [None]:
in1 = ""
l1 = ""
for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            in1 = data
            break

In [None]:
in1.shape

torch.Size([100, 28, 28])