In [3]:
import torchvision
import torchvision.transforms as transforms

# Define the transformations for the training and test data
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

# Load the CIFAR-10 training and test datasets
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
from torch.utils.data import random_split, DataLoader

# Calculate the sizes for training and validation sets
train_size = int(0.8 * len(trainset))
val_size = len(trainset) - train_size

# Split the dataset
train_subset, val_subset = random_split(trainset, [train_size, val_size])

# Create data loaders for training, validation, and test sets
trainloader = DataLoader(train_subset, batch_size=100, shuffle=True, num_workers=2)
valloader = DataLoader(val_subset, batch_size=100, shuffle=False, num_workers=2)
testloader = DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from model import CIFAR10Classifier  # Ensure your model.py has a class named CIFAR10Classifier

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the model, loss function, and optimizer
model = CIFAR10Classifier().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [6]:
# Step 4: Train the model
for epoch in range(10):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Print training progress
        if i % 200 == 199:  # Print every 100 mini-batches
            print(f"[Epoch {epoch + 1}, Batch {i + 1}] Loss: {running_loss / 100:.4f}")
            running_loss = 0.0

    # Validation
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for data in valloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss /= len(valloader)
    val_accuracy = 100 * correct / total
    print(f"[Epoch {epoch + 1}] Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%")

print('Finished Training')



[Epoch 1, Batch 200] Loss: 3.7449
[Epoch 1, Batch 400] Loss: 3.1886
[Epoch 1] Validation Loss: 1.3695, Validation Accuracy: 51.96%
[Epoch 2, Batch 200] Loss: 2.9366
[Epoch 2, Batch 400] Loss: 2.8768
[Epoch 2] Validation Loss: 1.2279, Validation Accuracy: 57.89%
[Epoch 3, Batch 200] Loss: 2.7157
[Epoch 3, Batch 400] Loss: 2.6778
[Epoch 3] Validation Loss: 1.1520, Validation Accuracy: 60.05%
[Epoch 4, Batch 200] Loss: 2.5553
[Epoch 4, Batch 400] Loss: 2.5356
[Epoch 4] Validation Loss: 1.1002, Validation Accuracy: 61.75%
[Epoch 5, Batch 200] Loss: 2.4435
[Epoch 5, Batch 400] Loss: 2.4566
[Epoch 5] Validation Loss: 1.0855, Validation Accuracy: 62.65%
[Epoch 6, Batch 200] Loss: 2.3580
[Epoch 6, Batch 400] Loss: 2.3660
[Epoch 6] Validation Loss: 1.0414, Validation Accuracy: 63.89%
[Epoch 7, Batch 200] Loss: 2.2894
[Epoch 7, Batch 400] Loss: 2.2959
[Epoch 7] Validation Loss: 1.0393, Validation Accuracy: 63.53%
[Epoch 8, Batch 200] Loss: 2.2673
[Epoch 8, Batch 400] Loss: 2.2165
[Epoch 8] Valid

In [7]:
# Save the trained model
PATH = './cifar_net.pth'
torch.save(model.state_dict(), PATH)

In [8]:
# Step 6: Evaluate the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy of the network on the 10000 test images: {100 * correct / total:.2f}%")

Accuracy of the network on the 10000 test images: 64.41%


# Question **5**

In [10]:
!pip install torch torchvision

Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)
Collectin

In [11]:
import torch
import torch.nn.functional as F

def fgsm_attack(model, loss, images, labels, epsilon):
    images = images.clone().detach().requires_grad_(True)
    outputs = model(images)
    model.zero_grad()
    cost = loss(outputs, labels).to(images.device)
    cost.backward()
    attack_images = images + epsilon * images.grad.sign()
    attack_images = torch.clamp(attack_images, 0, 1)
    return attack_images

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import random_split, DataLoader
from model import CIFAR10Classifier  # Ensure your model.py has a class named CIFAR10Classifier

# Initialize the model, loss function, and optimizer
model = CIFAR10Classifier().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Adversarial training parameters
epsilon = 0.1  # perturbation amount

# Train the model with adversarial examples
for epoch in range(10):  # Number of epochs
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Generate adversarial examples
        adv_inputs = fgsm_attack(model, criterion, inputs, labels, epsilon)

        # Forward + backward + optimize (on both original and adversarial examples)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        adv_outputs = model(adv_inputs)
        adv_loss = criterion(adv_outputs, labels)
        total_loss = (loss + adv_loss) / 2
        total_loss.backward()
        optimizer.step()

        running_loss += total_loss.item()

        # Print training progress
        if i % 200 == 199:  # Print every 100 mini-batches
            print(f"[Epoch {epoch + 1}, Batch {i + 1}] Loss: {running_loss / 100:.4f}")
            running_loss = 0.0

    # Validation
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for data in valloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss /= len(valloader)
    val_accuracy = 100 * correct / total
    print(f"[Epoch {epoch + 1}] Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%")

print('Finished Training with Adversarial Privacy')

[Epoch 1, Batch 200] Loss: 4.0356
[Epoch 1, Batch 400] Loss: 3.6730
[Epoch 1] Validation Loss: 1.4330, Validation Accuracy: 49.18%
[Epoch 2, Batch 200] Loss: 3.5251
[Epoch 2, Batch 400] Loss: 3.4333
[Epoch 2] Validation Loss: 1.2781, Validation Accuracy: 55.71%
[Epoch 3, Batch 200] Loss: 3.3382
[Epoch 3, Batch 400] Loss: 3.2985
[Epoch 3] Validation Loss: 1.1776, Validation Accuracy: 58.40%
[Epoch 4, Batch 200] Loss: 3.2165
[Epoch 4, Batch 400] Loss: 3.2258
[Epoch 4] Validation Loss: 1.1632, Validation Accuracy: 59.87%
[Epoch 5, Batch 200] Loss: 3.1439
[Epoch 5, Batch 400] Loss: 3.1795
[Epoch 5] Validation Loss: 1.1174, Validation Accuracy: 61.21%
[Epoch 6, Batch 200] Loss: 3.0947
[Epoch 6, Batch 400] Loss: 3.1060
[Epoch 6] Validation Loss: 1.0866, Validation Accuracy: 61.87%
[Epoch 7, Batch 200] Loss: 3.0383
[Epoch 7, Batch 400] Loss: 3.0477
[Epoch 7] Validation Loss: 1.0852, Validation Accuracy: 62.24%
[Epoch 8, Batch 200] Loss: 2.9901
[Epoch 8, Batch 400] Loss: 2.9954
[Epoch 8] Valid

In [None]:
# Save the trained model
PATH = './cifar_net_adversarial.pth'
torch.save(model.state_dict(), PATH)

In [None]:
# Evaluate the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy of the network on the 10000 test images: {100 * correct / total:.2f}%")

Accuracy of the network on the 10000 test images: 64.55%


## Differential

In [None]:
!pip install opacus

Collecting opacus
  Downloading opacus-1.4.1-py3-none-any.whl (226 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/226.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m226.7/226.7 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: opacus
Successfully installed opacus-1.4.1


In [None]:
from opacus import PrivacyEngine

In [None]:
# Initialize the model, loss function, and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)  # Add L2 regularization

# Initialize Privacy Engine
privacy_engine = PrivacyEngine()

# Attach the privacy engine to the optimizer
model, optimizer, trainloader = privacy_engine.make_private(
    module=model,
    optimizer=optimizer,
    data_loader=trainloader,
    noise_multiplier=1.1,
    max_grad_norm=1.0,
)

# Training the model with Differential Privacy
for epoch in range(10):  # number of epochs
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # Add noise to inputs for privacy
        inputs = inputs + torch.randn_like(inputs) * 0.1

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward + backward + optimize
        outputs = model(inputs)

        # Apply temperature scaling to the outputs
        temperature = 2.0
        outputs = outputs / temperature

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Print training progress
        if i % 200 == 199:  # Print every 100 mini-batches
            print(f"[Epoch {epoch + 1}, Batch {i + 1}] Loss: {running_loss / 100:.4f}")
            running_loss = 0.0

    # Validation
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for data in valloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)

            # Apply temperature scaling to the outputs
            outputs = outputs / temperature

            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss /= len(valloader)
    val_accuracy = 100 * correct / total
    print(f"[Epoch {epoch + 1}] Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%")

print('Finished Training')



[Epoch 1, Batch 200] Loss: 4.5444
[Epoch 1, Batch 400] Loss: 4.3894
[Epoch 1] Validation Loss: 2.0943, Validation Accuracy: 26.24%
[Epoch 2, Batch 200] Loss: 4.3135
[Epoch 2, Batch 400] Loss: 4.2300
[Epoch 2] Validation Loss: 1.9811, Validation Accuracy: 29.84%
[Epoch 3, Batch 200] Loss: 4.1867
[Epoch 3, Batch 400] Loss: 4.1414
[Epoch 3] Validation Loss: 1.9138, Validation Accuracy: 32.48%
[Epoch 4, Batch 200] Loss: 4.1198
[Epoch 4, Batch 400] Loss: 4.0723
[Epoch 4] Validation Loss: 1.8793, Validation Accuracy: 33.66%
[Epoch 5, Batch 200] Loss: 4.0642
[Epoch 5, Batch 400] Loss: 4.0534
[Epoch 5] Validation Loss: 1.8392, Validation Accuracy: 34.86%
[Epoch 6, Batch 200] Loss: 4.0377
[Epoch 6, Batch 400] Loss: 4.0138
[Epoch 6] Validation Loss: 1.8117, Validation Accuracy: 35.63%
[Epoch 7, Batch 200] Loss: 4.0145
[Epoch 7, Batch 400] Loss: 4.0077
[Epoch 7] Validation Loss: 1.8010, Validation Accuracy: 36.00%
[Epoch 8, Batch 200] Loss: 3.9811
[Epoch 8, Batch 400] Loss: 3.9993
[Epoch 8] Valid

In [None]:
# Save the trained model
PATH = './cifar_net_differential.pth'
torch.save(model.state_dict(), PATH)

In [None]:
# Evaluate the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy of the network on the 10000 test images: {100 * correct / total:.2f}%")

Accuracy of the network on the 10000 test images: 38.41%


## PATE

In [20]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import random_split, DataLoader, Subset
from model import CIFAR10Classifier
import numpy as np

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the CIFAR-10 dataset
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)

# Split the dataset into disjoint subsets for teacher models
num_teachers = 10
teacher_subsets = random_split(trainset, [len(trainset) // num_teachers] * num_teachers)

# Train each teacher model
teacher_models = []
for i, subset in enumerate(teacher_subsets):
    trainloader = DataLoader(subset, batch_size=100, shuffle=True, num_workers=2)

    model = CIFAR10Classifier().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(5):  # Number of epochs for teacher models
        model.train()
        for data in trainloader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    teacher_models.append(model)
    print(f"Teacher {i + 1} trained")

print("All teacher models trained")

Files already downloaded and verified
Files already downloaded and verified
Teacher 1 trained
Teacher 2 trained
Teacher 3 trained
Teacher 4 trained
Teacher 5 trained
Teacher 6 trained
Teacher 7 trained
Teacher 8 trained
Teacher 9 trained
Teacher 10 trained
All teacher models trained


In [21]:
student_labels = []
student_data = []

# Aggregating predictions
for data in DataLoader(trainset, batch_size=100, shuffle=False):
    inputs, labels = data
    inputs = inputs.to(device)

    # Collect predictions from all teacher models
    teacher_preds = torch.zeros((num_teachers, inputs.size(0), 10)).to(device)
    for i, model in enumerate(teacher_models):
        outputs = model(inputs)
        teacher_preds[i] = outputs

    # Aggregate predictions and add noise
    aggregated_preds = torch.sum(teacher_preds, dim=0)
    noisy_preds = aggregated_preds + torch.randn_like(aggregated_preds) * 0.1  # Adding noise
    student_labels.append(torch.argmax(noisy_preds, dim=1))
    student_data.append(inputs)

student_labels = torch.cat(student_labels).to(device)
student_data = torch.cat(student_data).to(device)

student_model = CIFAR10Classifier().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(student_model.parameters(), lr=0.001, weight_decay=1e-4)

student_dataset = torch.utils.data.TensorDataset(student_data, student_labels)
student_loader = DataLoader(student_dataset, batch_size=100, shuffle=True)

for epoch in range(10):
    student_model.train()
    running_loss = 0.0
    for data in student_loader:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward + backward + optimize
        outputs = student_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Print training progress
        if i % 100 == 99:
            print(f"[Epoch {epoch + 1}, Batch {i + 1}] Loss: {running_loss / 100:.4f}")
            running_loss = 0.0

print('Finished Training Student Model')

Finished Training Student Model


In [22]:
# Save the trained student model
PATH = './cifar_student_net.pth'
torch.save(student_model.state_dict(), PATH)

In [23]:
# Evaluate the student model
student_model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = student_model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy of the student model on the 10000 test images: {100 * correct / total:.2f}%")

Accuracy of the student model on the 10000 test images: 49.32%


# Question **6**

In [None]:
# Step 1
# Load the baseline model
baseline_model = CIFAR10Classifier().to(device)
baseline_model.load_state_dict(torch.load('./cifar_net.pth'))
baseline_model.eval()

# Load the adversarially trained model
modified_model = CIFAR10Classifier().to(device)
modified_model.load_state_dict(torch.load('./cifar_student_net.pth'))
modified_model.eval()

CIFAR10Classifier(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
  (dropout1): Dropout2d(p=0.25, inplace=False)
  (dropout2): Dropout2d(p=0.5, inplace=False)
  (fc1): Linear(in_features=6272, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=10, bias=True)
)

In [None]:
# Step 2
# Split the training data into seen and unseen datasets
train_size = int(0.8 * len(trainset))
val_size = len(trainset) - train_size
seen_dataset, unseen_dataset = random_split(trainset, [train_size, val_size])

# Combine the unseen dataset with the test data to form the final unseen dataset
final_unseen_dataset = torch.utils.data.ConcatDataset([unseen_dataset, testset])

# Create DataLoaders for the seen and unseen datasets
seen_loader = DataLoader(seen_dataset, batch_size=100, shuffle=True, num_workers=2)
unseen_loader = DataLoader(final_unseen_dataset, batch_size=100, shuffle=False, num_workers=2)

In [None]:
# Step 3
num_shadow_models = 5
shadow_models = []

for i in range(num_shadow_models):
    model = CIFAR10Classifier().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)

    shadow_trainset, shadow_valset = random_split(trainset, [train_size, val_size])
    shadow_loader = DataLoader(shadow_trainset, batch_size=100, shuffle=True, num_workers=2)

    # Training the shadow model
    for epoch in range(10):
        model.train()
        for data in shadow_loader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    shadow_models.append(model)
    print(f"Shadow model {i+1} trained.")

Shadow model 1 trained.
Shadow model 2 trained.
Shadow model 3 trained.
Shadow model 4 trained.
Shadow model 5 trained.


In [None]:
# Step 4
# Function to get model predictions
def get_model_predictions(model, loader):
    model.eval()
    predictions = []
    with torch.no_grad():
        for data in loader:
            inputs, _ = data
            inputs = inputs.to(device)
            outputs = model(inputs)
            predictions.append(outputs)
    return torch.cat(predictions)

# Generate attacker datasets
def generate_attacker_dataset(shadow_models, seen_loader, unseen_loader):
    shadow_preds_seen = [get_model_predictions(model, seen_loader) for model in shadow_models]
    shadow_preds_unseen = [get_model_predictions(model, unseen_loader) for model in shadow_models]

    X = torch.cat(shadow_preds_seen + shadow_preds_unseen)
    y = torch.cat([torch.ones(len(pred)) for pred in shadow_preds_seen] + [torch.zeros(len(pred)) for pred in shadow_preds_unseen])

    return X, y

# Get attacker datasets for baseline and modified models
X_baseline, y_baseline = generate_attacker_dataset(shadow_models, seen_loader, unseen_loader)
X_modified, y_modified = generate_attacker_dataset(shadow_models, seen_loader, unseen_loader)


In [None]:
# Step 5
# Define the attacker model
class AttackerModel(nn.Module):
    def __init__(self):
        super(AttackerModel, self).__init__()
        self.fc1 = nn.Linear(10, 128)
        self.fc2 = nn.Linear(128, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x

# Train the attacker model
def train_attacker_model(X, y):
    attacker_model = AttackerModel().to(device)
    criterion = nn.BCELoss()
    optimizer = optim.Adam(attacker_model.parameters(), lr=0.001)

    dataset = torch.utils.data.TensorDataset(X, y)
    loader = DataLoader(dataset, batch_size=100, shuffle=True)

    for epoch in range(10):
        attacker_model.train()
        running_loss = 0.0
        for data in loader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device).float()

            optimizer.zero_grad()
            outputs = attacker_model(inputs)
            loss = criterion(outputs.squeeze(), labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f"Attacker model training epoch {epoch+1} loss: {running_loss/len(loader):.4f}")

    return attacker_model

# Train attacker models for baseline and modified models
attacker_model_baseline = train_attacker_model(X_baseline, y_baseline)
attacker_model_modified = train_attacker_model(X_modified, y_modified)

Attacker model training epoch 1 loss: 0.6394
Attacker model training epoch 2 loss: 0.6375
Attacker model training epoch 3 loss: 0.6368
Attacker model training epoch 4 loss: 0.6365
Attacker model training epoch 5 loss: 0.6363
Attacker model training epoch 6 loss: 0.6363
Attacker model training epoch 7 loss: 0.6362
Attacker model training epoch 8 loss: 0.6361
Attacker model training epoch 9 loss: 0.6361
Attacker model training epoch 10 loss: 0.6360
Attacker model training epoch 1 loss: 0.6389
Attacker model training epoch 2 loss: 0.6375
Attacker model training epoch 3 loss: 0.6370
Attacker model training epoch 4 loss: 0.6365
Attacker model training epoch 5 loss: 0.6364
Attacker model training epoch 6 loss: 0.6363
Attacker model training epoch 7 loss: 0.6362
Attacker model training epoch 8 loss: 0.6360
Attacker model training epoch 9 loss: 0.6360
Attacker model training epoch 10 loss: 0.6360


In [None]:
# Step 6
# Function to evaluate the attacker model
def evaluate_attacker_model(attacker_model, X, y):
    attacker_model.eval()
    dataset = torch.utils.data.TensorDataset(X, y)
    loader = DataLoader(dataset, batch_size=100, shuffle=False)

    correct = 0
    total = 0
    with torch.no_grad():
        for data in loader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device).float()
            outputs = attacker_model(inputs)
            predicted = (outputs.squeeze() > 0.5).float()
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Attacker Model Accuracy: {accuracy:.2f}%')
    return accuracy

# Evaluate the attacker models
print("Evaluating attacker model for baseline model:")
baseline_attacker_accuracy = evaluate_attacker_model(attacker_model_baseline, X_baseline, y_baseline)

print("Evaluating attacker model for modified model:")
modified_attacker_accuracy = evaluate_attacker_model(attacker_model_modified, X_modified, y_modified)

# Compare the MIA accuracy
print(f"Baseline Model MIA Accuracy: {baseline_attacker_accuracy:.2f}%")
print(f"Modified Model MIA Accuracy: {modified_attacker_accuracy:.2f}%")

Evaluating attacker model for baseline model:
Attacker Model Accuracy: 66.67%
Evaluating attacker model for modified model:
Attacker Model Accuracy: 66.67%
Baseline Model MIA Accuracy: 66.67%
Modified Model MIA Accuracy: 66.67%


# Newwww

In [None]:
import torch
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from torch.utils.data import DataLoader, Subset
import numpy as np
from torchvision import datasets, transforms
import torch.nn as nn
import torch.optim as optim

# Assuming model.py contains the CIFAR10Classifier definition and necessary imports
from model import CIFAR10Classifier

# Load the models
baseline_model = CIFAR10Classifier()
modified_model = CIFAR10Classifier()

baseline_model.load_state_dict(torch.load('./cifar_net.pth'))
modified_model.load_state_dict(torch.load('./cifar_student_net.pth'))

baseline_model.eval()
modified_model.eval()

# Prepare dataset (assuming CIFAR-10 dataset for illustration)
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_data = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_data = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Splitting training data into seen (80%) and unseen (20%) data
train_indices = list(range(len(train_data)))
train_seen_indices, train_unseen_indices = train_test_split(train_indices, test_size=0.2, random_state=42)

seen_data = Subset(train_data, train_seen_indices)
unseen_data = Subset(train_data, train_unseen_indices)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False)

# DataLoader for seen and unseen data
seen_loader = DataLoader(seen_data, batch_size=64, shuffle=True)
unseen_loader = DataLoader(unseen_data, batch_size=64, shuffle=False)

# Define a simple attacker model
class AttackerModel(nn.Module):
    def __init__(self, input_dim):
        super(AttackerModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x

# Function to extract features
def extract_features(model, loader):
    features = []
    labels = []
    model.eval()
    with torch.no_grad():
        for inputs, targets in loader:
            outputs = model(inputs)
            features.append(outputs.view(outputs.size(0), -1).cpu().numpy())
            labels.append(targets.cpu().numpy())
    features = np.concatenate(features, axis=0)
    labels = np.concatenate(labels, axis=0)
    return features, labels

# Extract features from both models
baseline_seen_features, _ = extract_features(baseline_model, seen_loader)
baseline_unseen_features, _ = extract_features(baseline_model, unseen_loader)
modified_seen_features, _ = extract_features(modified_model, seen_loader)
modified_unseen_features, _ = extract_features(modified_model, unseen_loader)

# Prepare attacker dataset
def prepare_attacker_data(seen_features, unseen_features):
    seen_labels = np.ones(len(seen_features))
    unseen_labels = np.zeros(len(unseen_features))
    features = np.concatenate((seen_features, unseen_features), axis=0)
    labels = np.concatenate((seen_labels, unseen_labels), axis=0)
    return features, labels

baseline_features, baseline_labels = prepare_attacker_data(baseline_seen_features, baseline_unseen_features)
modified_features, modified_labels = prepare_attacker_data(modified_seen_features, modified_unseen_features)

# Train attacker model
def train_attacker_model(features, labels):
    input_dim = features.shape[1]
    attacker_model = AttackerModel(input_dim)
    criterion = nn.BCELoss()
    optimizer = optim.Adam(attacker_model.parameters(), lr=0.001)

    dataset = torch.utils.data.TensorDataset(torch.tensor(features, dtype=torch.float32), torch.tensor(labels, dtype=torch.float32).unsqueeze(1))
    loader = DataLoader(dataset, batch_size=64, shuffle=True)

    for epoch in range(10):
        for data, target in loader:
            optimizer.zero_grad()
            output = attacker_model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

    return attacker_model

baseline_attacker = train_attacker_model(baseline_features, baseline_labels)
modified_attacker = train_attacker_model(modified_features, modified_labels)

# Evaluate attacker models
def evaluate_attacker_model(attacker_model, features, labels):
    attacker_model.eval()
    with torch.no_grad():
        outputs = attacker_model(torch.tensor(features, dtype=torch.float32))
        predictions = (outputs.cpu().numpy() > 0.5).astype(int)
    return accuracy_score(labels, predictions)

# Extract test features for evaluation
baseline_test_features, _ = extract_features(baseline_model, test_loader)
modified_test_features, _ = extract_features(modified_model, test_loader)

baseline_test_labels = np.zeros(len(baseline_test_features))
modified_test_labels = np.zeros(len(modified_test_features))

# Evaluate the attacker models on test data
baseline_mia_accuracy = evaluate_attacker_model(baseline_attacker, baseline_test_features, baseline_test_labels)
modified_mia_accuracy = evaluate_attacker_model(modified_attacker, modified_test_features, modified_test_labels)

print(f"Baseline Model MIA Accuracy: {baseline_mia_accuracy}")
print(f"Modified Model MIA Accuracy: {modified_mia_accuracy}")

Files already downloaded and verified
Files already downloaded and verified
Baseline Model MIA Accuracy: 0.0
Modified Model MIA Accuracy: 0.0005


# Gonna Die Soon
## Everything again

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset, ConcatDataset, random_split
import torch.nn.functional as F
from model import CIFAR10Classifier

# Define device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# Load CIFAR-10 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])
train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Split training data into 80% seen data and 20% unseen data
train_size = int(0.8 * len(train_set))
remaining_size = len(train_set) - train_size
train_subset, remaining_subset = random_split(train_set, [train_size, remaining_size])

Files already downloaded and verified
Files already downloaded and verified


In [None]:
# Create loaders for seen and unseen data
num_shadow_models = 5
seen_size_per_model = train_size // num_shadow_models
seen_loaders = []

for i in range(num_shadow_models):
    start_idx = i * seen_size_per_model
    end_idx = (i + 1) * seen_size_per_model
    seen_indices = torch.arange(start_idx, end_idx)
    seen_train_set = Subset(train_subset, seen_indices)
    seen_loader = DataLoader(seen_train_set, batch_size=100, shuffle=True)
    seen_loaders.append(seen_loader)

# Create concatenated unseen data from the remaining 20% of training data and the entire test set
unseen_dataset = ConcatDataset([remaining_subset, test_set])
unseen_size_per_model = len(unseen_dataset) // num_shadow_models
unseen_loaders = []

for i in range(num_shadow_models):
    start_idx = i * unseen_size_per_model
    end_idx = (i + 1) * unseen_size_per_model
    unseen_indices = torch.arange(start_idx, end_idx)
    unseen_subset = Subset(unseen_dataset, unseen_indices)
    unseen_loader = DataLoader(unseen_subset, batch_size=100, shuffle=False)
    unseen_loaders.append(unseen_loader)

test_loader = DataLoader(test_set, batch_size=100, shuffle=False)

In [None]:
# Define the attack model
class AttackModel(nn.Module):
    def __init__(self):
        super(AttackModel, self).__init__()
        self.fc1 = nn.Linear(10, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = torch.sigmoid(self.fc4(x))
        return x

In [None]:
class MembershipInferenceAttack:
    def __init__(self, shadow_model_class, attack_model_class, device='cpu'):
        self.shadow_model_class = shadow_model_class
        self.attack_model_class = attack_model_class
        self.device = device
        self.attack_models = {}

    def train_shadow_models(self, seen_loaders, num_epochs=10, lr=1e-3):
        self.shadow_models = [self.shadow_model_class().to(self.device) for _ in range(len(seen_loaders))]

        for i, (shadow_model, seen_loader) in enumerate(zip(self.shadow_models, seen_loaders)):
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.Adam(shadow_model.parameters(), lr=lr)
            self._train_model(shadow_model, seen_loader, criterion, optimizer, num_epochs)
            print(f'Shadow model {i+1} trained.')

    def _train_model(self, model, dataloader, criterion, optimizer, num_epochs):
        model.train()
        for epoch in range(num_epochs):
            running_loss = 0.0
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
            print(f'Epoch {epoch+1}, Loss: {running_loss/len(dataloader):.6f}')

    def collect_outputs(self, seen_loaders, unseen_loaders):
        self.attack_data = []
        self.attack_labels = []

        for shadow_model, seen_loader, unseen_loader in zip(self.shadow_models, seen_loaders, unseen_loaders):
            self._collect_shadow_model_outputs(shadow_model, seen_loader, label=1)  # in
            self._collect_shadow_model_outputs(shadow_model, unseen_loader, label=0)  # out

        self.attack_data = torch.cat(self.attack_data).to(self.device)
        self.attack_labels = torch.cat(self.attack_labels).to(self.device)

    def _collect_shadow_model_outputs(self, model, dataloader, label):
        model.eval()
        with torch.no_grad():
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                outputs = model(inputs)
                probabilities = F.softmax(outputs, dim=1)
                self.attack_data.append(torch.cat([probabilities, labels.unsqueeze(1).float()], dim=1))
                self.attack_labels.append(torch.full((outputs.size(0),), label, dtype=torch.float).to(self.device))

    def train_attack_models(self, num_epochs=10, lr=0.001):
        for class_label in range(10):  # Assuming 10 classes
            class_indices = (self.attack_data[:, -1] == class_label).nonzero().squeeze()
            class_data = self.attack_data[class_indices][:, :-1]  # Exclude the last column (class label)
            class_labels = self.attack_labels[class_indices].view(-1, 1)  # Ensure labels are the same shape as outputs

            attack_dataset = torch.utils.data.TensorDataset(class_data, class_labels)
            attack_loader = DataLoader(attack_dataset, batch_size=50, shuffle=True)

            attack_model = self.attack_model_class().to(self.device)
            criterion = nn.BCELoss()
            optimizer = optim.Adam(attack_model.parameters(), lr=lr)

            self._train_model(attack_model, attack_loader, criterion, optimizer, num_epochs)
            self.attack_models[class_label] = attack_model

            print(f'Attack model for class {class_label} trained.')

    def save_attack_models(self, path):
        for class_label, model in self.attack_models.items():
            torch.save(model.state_dict(), f'{path}_class_{class_label}.pth')
            print(f'Attack model for class {class_label} saved to {path}_class_{class_label}.pth')

    def load_attack_models(self, path):
        for class_label in range(10):  # Assuming 10 classes
            model = self.attack_model_class().to(self.device)
            model.load_state_dict(torch.load(f'{path}_class_{class_label}.pth', map_location=self.device))
            self.attack_models[class_label] = model
            print(f'Attack model for class {class_label} loaded from {path}_class_{class_label}.pth')

    def infer_membership(self, seen_loader, unseen_loader, seen_outputs, unseen_outputs, labels):
        model_outputs = torch.cat([seen_outputs, unseen_outputs]).to(self.device)

        print("In Infer : ")
        print(f"model_outputs size: {model_outputs.size()}")

        if isinstance(labels, list):
            labels = torch.cat(labels).to(self.device)
        else:
            labels = labels.to(self.device)

        print(f"labels size: {len(labels)}")

        memberships = []
        for output, label in zip(model_outputs, labels):
            class_label = label.item()
            attack_model = self.attack_models[class_label]
            membership_pred = attack_model(output.unsqueeze(0)).item()
            memberships.append(membership_pred)
        print(f"memberships size: {len(memberships)}")
        return torch.tensor(memberships, device=self.device)

    def evaluate_attack_model(self, seen_loader, unseen_loader, target_model):
        seen_outputs, labels_seen = self._get_model_outputs(target_model, seen_loader)
        unseen_outputs, labels_unseen = self._get_model_outputs(target_model, unseen_loader)

        print(f"Seen outputs size: {seen_outputs.size()}")
        print(f"Unseen outputs size: {unseen_outputs.size()}")

        attack_data = torch.cat([seen_outputs, unseen_outputs]).to(self.device)
        attack_labels = torch.cat([torch.ones(len(seen_outputs)), torch.zeros(len(unseen_outputs))]).to(self.device)

        # Ensure labels_seen and labels_unseen are tensors
        if isinstance(labels_seen, list):
            labels_seen = torch.cat(labels_seen)
        if isinstance(labels_unseen, list):
            labels_unseen = torch.cat(labels_unseen)

        labels = torch.cat([labels_seen, labels_unseen]).to(self.device)

        memberships = self.infer_membership(seen_loader, unseen_loader, seen_outputs, unseen_outputs, labels)
        membership_preds = (memberships > 0.5).float()
        accuracy = (membership_preds == attack_labels).float().mean().item()
        return accuracy

    def _get_model_outputs(self, model, dataloader):
        model.eval()
        outputs_list = []
        labels_list = []
        with torch.no_grad():
            for inputs, labels in dataloader:
                inputs = inputs.to(self.device)
                outputs = model(inputs)
                probabilities = F.softmax(outputs, dim=1)
                outputs_list.append(probabilities)
                labels_list.append(labels.cpu())  # Ensure labels are moved to CPU for concatenation
        return torch.cat(outputs_list), labels_list  # Return labels_list as a list of tensors

In [None]:
# Load the models
baseline_model = CIFAR10Classifier().to(device)
baseline_model.load_state_dict(torch.load('./cifar_net.pth'))
baseline_model.eval()

private_model = CIFAR10Classifier().to(device)
private_model.load_state_dict(torch.load('./cifar_student_net.pth'))
private_model.eval()

# Initialize MembershipInferenceAttack
mia = MembershipInferenceAttack(CIFAR10Classifier, AttackModel, device)

# Train shadow models
mia.train_shadow_models(seen_loaders, num_epochs=10)

# Collect outputs for attack model
mia.collect_outputs(seen_loaders, unseen_loaders)

# Train attack models
mia.train_attack_models(num_epochs=10)

Epoch 1, Loss: 2.005599
Epoch 2, Loss: 1.709452
Epoch 3, Loss: 1.572399
Epoch 4, Loss: 1.498010
Epoch 5, Loss: 1.437842
Epoch 6, Loss: 1.363819
Epoch 7, Loss: 1.305830
Epoch 8, Loss: 1.257781
Epoch 9, Loss: 1.192899
Epoch 10, Loss: 1.157905
Shadow model 1 trained.
Epoch 1, Loss: 2.115276
Epoch 2, Loss: 1.832025
Epoch 3, Loss: 1.694272
Epoch 4, Loss: 1.594993
Epoch 5, Loss: 1.550497
Epoch 6, Loss: 1.495581
Epoch 7, Loss: 1.435374
Epoch 8, Loss: 1.392361
Epoch 9, Loss: 1.347379
Epoch 10, Loss: 1.316811
Shadow model 2 trained.
Epoch 1, Loss: 2.037970
Epoch 2, Loss: 1.741874
Epoch 3, Loss: 1.605706
Epoch 4, Loss: 1.519575
Epoch 5, Loss: 1.471838
Epoch 6, Loss: 1.390339
Epoch 7, Loss: 1.353814
Epoch 8, Loss: 1.283862
Epoch 9, Loss: 1.242749
Epoch 10, Loss: 1.196078
Shadow model 3 trained.
Epoch 1, Loss: 2.102064
Epoch 2, Loss: 1.793580
Epoch 3, Loss: 1.658747
Epoch 4, Loss: 1.565381
Epoch 5, Loss: 1.508674
Epoch 6, Loss: 1.441093
Epoch 7, Loss: 1.379845
Epoch 8, Loss: 1.334075
Epoch 9, Loss

In [None]:
# Evaluate attack models
print("Evaluating attack model for baseline model:")
baseline_attack_accuracy = mia.evaluate_attack_model(seen_loader, unseen_loader, baseline_model)
print(f"Baseline Model MIA Accuracy: {baseline_attack_accuracy * 100:.2f}%")

print("Evaluating attack model for private model:")
private_attack_accuracy = mia.evaluate_attack_model(seen_loader, unseen_loader, private_model)
print(f"Private Model MIA Accuracy: {private_attack_accuracy * 100:.2f}%")

Evaluating attack model for baseline model:




Seen outputs size: torch.Size([8000, 10])
Unseen outputs size: torch.Size([4000, 10])
In Infer : 
model_outputs size: torch.Size([12000, 10])
labels size: 12000
memberships size: 12000
Baseline Model MIA Accuracy: 65.97%
Evaluating attack model for private model:
Seen outputs size: torch.Size([8000, 10])
Unseen outputs size: torch.Size([4000, 10])
In Infer : 
model_outputs size: torch.Size([12000, 10])
labels size: 12000
memberships size: 12000
Private Model MIA Accuracy: 54.08%


In [26]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset, ConcatDataset, random_split
import torch.nn.functional as F

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# CIFAR-10 dataset loading
transformation = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transformation)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transformation)

# Data splitting
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_subset, val_subset = random_split(train_dataset, [train_size, val_size])

# Loaders creation
shadow_model_count = 5
batch_size = train_size // shadow_model_count
train_loaders = []

for i in range(shadow_model_count):
    indices = torch.arange(i * batch_size, (i + 1) * batch_size)
    subset = Subset(train_subset, indices)
    loader = DataLoader(subset, batch_size=100, shuffle=True)
    train_loaders.append(loader)

# Unseen data combining
val_combined = ConcatDataset([val_subset, test_dataset])
unseen_batch_size = len(val_combined) // shadow_model_count
unseen_loaders = []

for i in range(shadow_model_count):
    indices = torch.arange(i * unseen_batch_size, (i + 1) * unseen_batch_size)
    subset = Subset(val_combined, indices)
    loader = DataLoader(subset, batch_size=100, shuffle=False)
    unseen_loaders.append(loader)

test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False)

# Attack model definition
def create_attack_network():
    model = nn.Sequential(
        nn.Linear(10, 128),
        nn.ReLU(),
        nn.Linear(128, 64),
        nn.ReLU(),
        nn.Linear(64, 32),
        nn.ReLU(),
        nn.Linear(32, 1),
        nn.Sigmoid()
    )
    return model.to(device)

def train_network(model, data_loader, loss_function, optimizer, epochs):
    model.train()
    for epoch in range(epochs):
        total_loss = 0.0
        for data, target in data_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = loss_function(output, target)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f'Epoch {epoch+1}, Loss: {total_loss/len(data_loader):.6f}')

def collect_shadow_data(models, seen_loaders, unseen_loaders):
    shadow_dataset = []
    for model, seen_loader, unseen_loader in zip(models, seen_loaders, unseen_loaders):
        shadow_dataset.extend(extract_outputs(model, seen_loader, label=1))
        shadow_dataset.extend(extract_outputs(model, unseen_loader, label=0))
    return shadow_dataset

def extract_outputs(model, loader, label):
    model.eval()
    outputs = []
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            out = model(inputs)
            probs = F.softmax(out, dim=1)
            combined = torch.cat([probs, labels.unsqueeze(1).float()], dim=1)
            for output in combined:
                outputs.append((output, label))
    return outputs

def train_attack_networks(attack_dataset, epochs=10, lr=0.001):
    attack_models = {}
    loss_fn = nn.BCELoss()
    for class_id in range(10):
        class_specific_data = [(data[:-1], label) for data, label in attack_dataset if data[-1].item() == class_id]
        if len(class_specific_data) == 0:
            continue
        X = torch.stack([data for data, label in class_specific_data])
        y = torch.tensor([label for data, label in class_specific_data], dtype=torch.float).unsqueeze(1)
        dataset = torch.utils.data.TensorDataset(X, y)
        loader = DataLoader(dataset, batch_size=50, shuffle=True)

        attacker = create_attack_network()
        optimizer = torch.optim.Adam(attacker.parameters(), lr=lr)
        train_network(attacker, loader, loss_fn, optimizer, epochs)
        attack_models[class_id] = attacker
        print(f'Attack model for class {class_id} trained.')
    return attack_models

def perform_membership_inference(attack_models, outputs, labels):
    membership_predictions = []
    for output, label in zip(outputs, labels):
        class_label = label.item()
        if class_label in attack_models:
            attack_model = attack_models[class_label]
            membership_pred = attack_model(output.unsqueeze(0).to(device)).item()
            membership_predictions.append(membership_pred)
    return torch.tensor(membership_predictions, device=device)

def evaluate_membership_attack(target_model, seen_loader, unseen_loader, attack_models):
    seen_outputs, seen_labels = get_model_predictions(target_model, seen_loader)
    unseen_outputs, unseen_labels = get_model_predictions(target_model, unseen_loader)

    combined_outputs = torch.cat([seen_outputs, unseen_outputs]).to(device)
    true_memberships = torch.cat([torch.ones(len(seen_outputs)), torch.zeros(len(unseen_outputs))]).to(device)

    membership_preds = perform_membership_inference(attack_models, combined_outputs, torch.cat([seen_labels, unseen_labels]).to(device))
    membership_binary = (membership_preds > 0.5).float()
    accuracy = (membership_binary == true_memberships).float().mean().item()
    return accuracy

def get_model_predictions(model, loader):
    model.eval()
    output_list = []
    label_list = []
    with torch.no_grad():
        for data, target in loader:
            data = data.to(device)
            outputs = model(data)
            probs = F.softmax(outputs, dim=1)
            output_list.append(probs)
            label_list.append(target)
    return torch.cat(output_list), torch.cat(label_list)

# Load and evaluate target models
baseline_model = CIFAR10Classifier().to(device)
baseline_model.load_state_dict(torch.load('./cifar_net.pth'))
baseline_model.eval()

private_model = CIFAR10Classifier().to(device)
private_model.load_state_dict(torch.load('./cifar_student_net.pth'))
private_model.eval()

# Train shadow models
shadow_models = [CIFAR10Classifier().to(device) for _ in range(shadow_model_count)]
loss_fn = nn.CrossEntropyLoss()
optimizer_fn = lambda model: optim.Adam(model.parameters(), lr=1e-3)

for i, (shadow_model, loader) in enumerate(zip(shadow_models, train_loaders)):
    train_network(shadow_model, loader, loss_fn, optimizer_fn(shadow_model), epochs=10)
    print(f'Shadow model {i+1} trained.')

# Collect shadow outputs
shadow_data = collect_shadow_data(shadow_models, train_loaders, unseen_loaders)

# Train attack models
attack_models = train_attack_networks(shadow_data, epochs=10)

# Evaluate attack models
print("Evaluating attack model for baseline model:")
baseline_attack_accuracy = evaluate_membership_attack(baseline_model, train_loaders[0], unseen_loaders[0], attack_models)
print(f"Baseline Model MIA Accuracy: {baseline_attack_accuracy:.2f}%")

print("Evaluating attack model for private model:")
private_attack_accuracy = evaluate_membership_attack(private_model, train_loaders[0], unseen_loaders[0], attack_models)
print(f"Private Model MIA Accuracy: {private_attack_accuracy:.2f}%")


Files already downloaded and verified
Files already downloaded and verified
Epoch 1, Loss: 2.022860
Epoch 2, Loss: 1.763491
Epoch 3, Loss: 1.618022
Epoch 4, Loss: 1.544770
Epoch 5, Loss: 1.458975
Epoch 6, Loss: 1.390237
Epoch 7, Loss: 1.329747
Epoch 8, Loss: 1.303929
Epoch 9, Loss: 1.242841
Epoch 10, Loss: 1.188995
Shadow model 1 trained.
Epoch 1, Loss: 2.007632
Epoch 2, Loss: 1.740101
Epoch 3, Loss: 1.639199
Epoch 4, Loss: 1.559334
Epoch 5, Loss: 1.490752
Epoch 6, Loss: 1.431793
Epoch 7, Loss: 1.368334
Epoch 8, Loss: 1.314577
Epoch 9, Loss: 1.274585
Epoch 10, Loss: 1.215740
Shadow model 2 trained.
Epoch 1, Loss: 2.115242
Epoch 2, Loss: 1.846865
Epoch 3, Loss: 1.711470
Epoch 4, Loss: 1.612441
Epoch 5, Loss: 1.540845
Epoch 6, Loss: 1.472068
Epoch 7, Loss: 1.433279
Epoch 8, Loss: 1.397428
Epoch 9, Loss: 1.342172
Epoch 10, Loss: 1.289917
Shadow model 3 trained.
Epoch 1, Loss: 2.097698
Epoch 2, Loss: 1.827050
Epoch 3, Loss: 1.683795
Epoch 4, Loss: 1.603843
Epoch 5, Loss: 1.541724
Epoch 6, 

In [27]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset, ConcatDataset, random_split
import torch.nn.functional as F

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# CIFAR-10 dataset loading with normalization
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Split dataset into training and validation sets
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_subset, val_subset = random_split(train_dataset, [train_size, val_size])

# Create data loaders for shadow models
shadow_count = 5
batch_size = train_size // shadow_count
train_loaders = []

# Split the training data into parts for each shadow model
for i in range(shadow_count):
    indices = torch.arange(i * batch_size, (i + 1) * batch_size)
    subset = Subset(train_subset, indices)
    loader = DataLoader(subset, batch_size=100, shuffle=True)
    train_loaders.append(loader)

# Combine validation and test sets for unseen data
val_combined = ConcatDataset([val_subset, test_dataset])
unseen_batch_size = len(val_combined) // shadow_count
unseen_loaders = []

# Split the unseen data into parts for each shadow model
for i in range(shadow_count):
    indices = torch.arange(i * unseen_batch_size, (i + 1) * unseen_batch_size)
    subset = Subset(val_combined, indices)
    loader = DataLoader(subset, batch_size=100, shuffle=False)
    unseen_loaders.append(loader)

test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False)

# Define the attack model architecture
def create_att_mod():
    # Define a simple neural network architecture for the attack model
    model = nn.Sequential(
        nn.Linear(10, 128),
        nn.ReLU(),
        nn.Linear(128, 64),
        nn.ReLU(),
        nn.Linear(64, 32),
        nn.ReLU(),
        nn.Linear(32, 1),
        nn.Sigmoid()
    )
    return model.to(device)

# Train the given model with the provided data loader
def train_mod(model, loader, loss_fn, opt, epochs):
    model.train()
    for epoch in range(epochs):
        total_loss = 0.0
        for data, target in loader:
            data, target = data.to(device), target.to(device)

            # Zero the parameter gradients
            opt.zero_grad()

            # Forward pass
            output = model(data)

            # Compute loss
            loss = loss_fn(output, target)

            # Backward pass and optimize
            loss.backward()
            opt.step()

            # Accumulate loss
            total_loss += loss.item()

        # Print average loss for the epoch
        print(f'Epoch {epoch+1}, Loss: {total_loss/len(loader):.4f}')

# Collect shadow data from multiple models
def get_shadow_data(models, seen_loaders, unseen_loaders):
    shadow_data = []
    for model, seen_loader, unseen_loader in zip(models, seen_loaders, unseen_loaders):
        # Collect data from seen loader (label=1)
        shadow_data.extend(extract_out(model, seen_loader, label=1))

        # Collect data from unseen loader (label=0)
        shadow_data.extend(extract_out(model, unseen_loader, label=0))
    return shadow_data

# Extract outputs from the model for the given data loader
def extract_out(model, loader, label):
    model.eval()
    outputs = []
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)

            # Forward pass
            out = model(inputs)

            # Compute softmax probabilities
            probs = F.softmax(out, dim=1)

            # Combine probabilities and labels
            combined = torch.cat([probs, labels.unsqueeze(1).float()], dim=1)

            # Append outputs with the specified label
            for output in combined:
                outputs.append((output, label))
    return outputs

# Train attack models for each class
def train_att_mods(att_data, epochs=10, lr=0.001):
    att_models = {}
    loss_fn = nn.BCELoss()

    # Train an attack model for each class
    for class_id in range(10):
        class_data = [(data[:-1], label) for data, label in att_data if data[-1].item() == class_id]
        if len(class_data) == 0:
            continue
        X = torch.stack([data for data, label in class_data])
        y = torch.tensor([label for data, label in class_data], dtype=torch.float).unsqueeze(1)
        dataset = torch.utils.data.TensorDataset(X, y)
        loader = DataLoader(dataset, batch_size=50, shuffle=True)

        attacker = create_att_mod()
        opt = torch.optim.Adam(attacker.parameters(), lr=lr)

        # Train the attack model
        train_mod(attacker, loader, loss_fn, opt, epochs)
        att_models[class_id] = attacker
        print(f'Class {class_id} attack model trained.')
    return att_models

# Perform membership inference using attack models
def infer_mem(att_models, outputs, labels):
    preds = []
    for output, label in zip(outputs, labels):
        class_id = label.item()

        # Use the attack model for the specific class
        if class_id in att_models:
            att_model = att_models[class_id]

            # Predict membership
            pred = att_model(output.unsqueeze(0).to(device)).item()
            preds.append(pred)
    return torch.tensor(preds, device=device)

# Evaluate the attack model
def eval_att_mod(target_model, seen_loader, unseen_loader, att_models):
    seen_outs, seen_labels = gen_out(target_model, seen_loader)
    unseen_outs, unseen_labels = gen_out(target_model, unseen_loader)

    combined_outs = torch.cat([seen_outs, unseen_outs]).to(device)
    true_labels = torch.cat([torch.ones(len(seen_outs)), torch.zeros(len(unseen_outs))]).to(device)

    # Perform membership inference
    membership_preds = infer_mem(att_models, combined_outs, torch.cat([seen_labels, unseen_labels]).to(device))

    # Binarize predictions
    binary_preds = (membership_preds > 0.5).float()

    # Compute accuracy
    accuracy = (binary_preds == true_labels).float().mean().item()
    return accuracy

# Generate outputs from the model for the given data loader
def gen_out(model, loader):
    model.eval()
    output_list = []
    label_list = []
    with torch.no_grad():
        for data, target in loader:
            data = data.to(device)

            # Forward pass
            outputs = model(data)

            # Compute softmax probabilities
            probs = F.softmax(outputs, dim=1)

            # Collect outputs and labels
            output_list.append(probs)
            label_list.append(target)
    return torch.cat(output_list), torch.cat(label_list)

# Load and evaluate target models
baseline_model = CIFAR10Classifier().to(device)
baseline_model.load_state_dict(torch.load('./cifar_net.pth'))
baseline_model.eval()

private_model = CIFAR10Classifier().to(device)
private_model.load_state_dict(torch.load('./cifar_student_net.pth'))
private_model.eval()

# Train shadow models
shadow_models = [CIFAR10Classifier().to(device) for _ in range(shadow_count)]
loss_fn = nn.CrossEntropyLoss()
optimizer_fn = lambda model: optim.Adam(model.parameters(), lr=1e-3)

# Train each shadow model with its respective data loader
for i, (shadow_model, loader) in enumerate(zip(shadow_models, train_loaders)):
    train_mod(shadow_model, loader, loss_fn, optimizer_fn(shadow_model), epochs=10)
    print(f'Shadow model {i+1} trained.')

# Collect shadow outputs
shadow_data = get_shadow_data(shadow_models, train_loaders, unseen_loaders)

# Train attack models
attack_models = train_att_mods(shadow_data, epochs=10)

Files already downloaded and verified
Files already downloaded and verified
Epoch 1, Loss: 2.0411
Epoch 2, Loss: 1.7531
Epoch 3, Loss: 1.6388
Epoch 4, Loss: 1.5321
Epoch 5, Loss: 1.4512
Epoch 6, Loss: 1.3974
Epoch 7, Loss: 1.3555
Epoch 8, Loss: 1.2961
Epoch 9, Loss: 1.2632
Epoch 10, Loss: 1.2189
Shadow model 1 trained.
Epoch 1, Loss: 2.0133
Epoch 2, Loss: 1.7276
Epoch 3, Loss: 1.6122
Epoch 4, Loss: 1.5225
Epoch 5, Loss: 1.4251
Epoch 6, Loss: 1.3683
Epoch 7, Loss: 1.3058
Epoch 8, Loss: 1.2317
Epoch 9, Loss: 1.1801
Epoch 10, Loss: 1.1418
Shadow model 2 trained.
Epoch 1, Loss: 2.0795
Epoch 2, Loss: 1.7803
Epoch 3, Loss: 1.6456
Epoch 4, Loss: 1.5469
Epoch 5, Loss: 1.4834
Epoch 6, Loss: 1.4172
Epoch 7, Loss: 1.3661
Epoch 8, Loss: 1.3199
Epoch 9, Loss: 1.2591
Epoch 10, Loss: 1.2169
Shadow model 3 trained.
Epoch 1, Loss: 2.0589
Epoch 2, Loss: 1.7794
Epoch 3, Loss: 1.6538
Epoch 4, Loss: 1.5446
Epoch 5, Loss: 1.4622
Epoch 6, Loss: 1.3955
Epoch 7, Loss: 1.3394
Epoch 8, Loss: 1.2752
Epoch 9, Loss

In [29]:
# Evaluate attack models
print("Evaluating attack model for baseline model:")
baseline_attack_accuracy = eval_att_mod(baseline_model, train_loaders[0], unseen_loaders[0], attack_models)
print(f"Baseline Model MIA Accuracy: {baseline_attack_accuracy * 100:.2f}%")

print("Evaluating attack model for private model:")
private_attack_accuracy = eval_att_mod(private_model, train_loaders[0], unseen_loaders[0], attack_models)
print(f"Private Model MIA Accuracy: {private_attack_accuracy * 100:.2f}%")

Evaluating attack model for baseline model:
Baseline Model MIA Accuracy: 63.63%
Evaluating attack model for private model:
Private Model MIA Accuracy: 53.45%
