In [144]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class CIFAR10Classifier(nn.Module):
  def __init__(self):
    super(CIFAR10Classifier, self).__init__()
    self.conv1 = nn.Conv2d(3, 16, 3, 1)
    self.conv2 = nn.Conv2d(16, 32, 3, 1)
    self.dropout1 = nn.Dropout2d(0.25)
    self.dropout2 = nn.Dropout2d(0.5)
    self.fc1 = nn.Linear(6272, 64)
    self.fc2 = nn.Linear(64, 10)

  def forward(self, x):
    x = self.conv1(x)
    x = F.relu(x)
    x = self.conv2(x)
    x = F.relu(x)
    x = F.max_pool2d(x, 2)
    x = self.dropout1(x)
    x = torch.flatten(x, 1)
    x = self.fc1(x)
    x = F.relu(x)
    x = self.dropout2(x)
    x = self.fc2(x)
    return x


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import CIFAR10
from torchvision import transforms
from torch.utils.data import Subset, DataLoader, TensorDataset
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC  # Import Support Vector Classifier
import joblib

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = CIFAR10Classifier()
state_dict = torch.load("model_state_dict.pth", map_location=device)
new_state_dict = {key.replace('_module.', ''): value for key, value in state_dict.items()}
model.load_state_dict(new_state_dict)
model.to(device)
model.eval()

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

DATA_ROOT = '../cifar10'
BATCH_SIZE = 64

# # Load the indices from list.txt
# indices_file = 'list.txt' ############
# with open(indices_file, 'r') as f:
#     indices = [int(line.strip()) for line in f]

indices = torch.randperm(len(trainset))[:10000]

full_train_dataset = CIFAR10(root=DATA_ROOT, train=True, download=True, transform=transform)
test_dataset = CIFAR10(root=DATA_ROOT, train=False, download=True, transform=transform)

train_indices_set = set(indices)
all_indices = set(range(len(full_train_dataset)))
other_indices = list(all_indices - train_indices_set)

train_dataset = Subset(full_train_dataset, indices[:len(indices)//2])  ###########
other_dataset = Subset(full_train_dataset, other_indices)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False)
other_loader = DataLoader(other_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Create labels
train_labels = torch.ones(len(train_dataset)).to(device)
other_labels = torch.zeros(len(other_dataset)).to(device)
test_labels = torch.zeros(len(test_dataset)).to(device)

####################################
# Implement an Attacker Model
####################################

def extract_features(model, dataloader):
    model.eval()
    features = []
    with torch.no_grad():
        for data in dataloader:
            inputs, _ = data
            inputs = inputs.to(device)
            outputs = model(inputs)
            features.append(outputs)
    return torch.cat(features).to(device)

train_features = extract_features(model, train_loader)
other_features = extract_features(model, other_loader)
test_features = extract_features(model, test_loader)


combined_features = torch.cat((train_features, other_features, test_features))
combined_labels = torch.cat((train_labels, other_labels, test_labels))


new_dataset = TensorDataset(combined_features, combined_labels)
new_loader = DataLoader(new_dataset, batch_size=BATCH_SIZE, shuffle=True)

#load your attacker model

#############################################

# Calculate training accuracy, confusion matrix, precision, and recall
binary_classifier.eval()
all_labels = []
all_predicted = []
correct = 0
total = 0

with torch.no_grad():
    for features, labels in new_loader:
        features, labels = features.to(device), labels.to(device)
        outputs = attacker(features).squeeze()
        predicted = (outputs > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        all_labels.extend(labels.cpu().numpy())
        all_predicted.extend(predicted.cpu().numpy())

accuracy = correct / total
print(f'Training Accuracy: {accuracy:.4f}')

cm = confusion_matrix(all_labels, all_predicted)
precision = precision_score(all_labels, all_predicted)
recall = recall_score(all_labels, all_predicted)
f1 = f1_score(all_labels, all_predicted)

print(f'Confusion Matrix:\n{cm}')
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

In [11]:
import torch
import torch.nn as nn
from torchvision.datasets import CIFAR10
from torchvision import transforms
from torch.utils.data import Subset, DataLoader, TensorDataset
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC  # Import Support Vector Classifier
import joblib

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = CIFAR10Classifier()
state_dict = torch.load("/kaggle/input/phase1/model_state_dict.pth", map_location=device)
new_state_dict = {key.replace('_module.', ''): value for key, value in state_dict.items()}
model.load_state_dict(new_state_dict)
model.to(device)
model.eval()

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

DATA_ROOT = '../cifar10'
BATCH_SIZE = 64



full_train_dataset = CIFAR10(root=DATA_ROOT, train=True, download=True, transform=transform)
test_dataset = CIFAR10(root=DATA_ROOT, train=False, download=True, transform=transform)

indices = torch.randperm(len(test_dataset))[:10000]

train_indices_set = set(indices)
all_indices = set(range(len(full_train_dataset)))
other_indices = list(all_indices - train_indices_set)

train_dataset = Subset(full_train_dataset, indices[:len(indices)//2])
other_dataset = Subset(full_train_dataset, other_indices)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False)
other_loader = DataLoader(other_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Create labels
train_labels = torch.ones(len(train_dataset)).to(device)
other_labels = torch.zeros(len(other_dataset)).to(device)
test_labels = torch.zeros(len(test_dataset)).to(device)

def extract_features(model, dataloader):
    model.eval()
    features = []
    with torch.no_grad():
        for data in dataloader:
            inputs, _ = data
            inputs = inputs.to(device)
            outputs = model(inputs)
            features.append(outputs)
    return torch.cat(features).to(device)

train_features = extract_features(model, train_loader)
other_features = extract_features(model, other_loader)
test_features = extract_features(model, test_loader)

combined_features = torch.cat((train_features, other_features, test_features)).cpu().numpy()
combined_labels = torch.cat((train_labels, other_labels, test_labels)).cpu().numpy()

# Standardize features
scaler = StandardScaler()
combined_features = scaler.fit_transform(combined_features)

# Train SVM model
svm_model = SVC(kernel='rbf', C=1.0)
svm_model.fit(combined_features, combined_labels)

# Save the trained SVM model
joblib.dump(svm_model, 'svm_attacker_model.pkl')

# Load the attacker model
attacker = joblib.load('svm_attacker_model.pkl')

# Predict and evaluate
predicted_labels = attacker.predict(combined_features)

accuracy = attacker.score(combined_features, combined_labels)
print(f'Training Accuracy: {accuracy:.4f}')

cm = confusion_matrix(combined_labels, predicted_labels)
precision = precision_score(combined_labels, predicted_labels)
recall = recall_score(combined_labels, predicted_labels)
f1 = f1_score(combined_labels, predicted_labels)

print(f'Confusion Matrix:\n{cm}')
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")


Files already downloaded and verified
Files already downloaded and verified
Training Accuracy: 0.9231
Confusion Matrix:
[[60000     0]
 [ 5000     0]]
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000


  _warn_prf(average, modifier, msg_start, len(result))


# Run


In [145]:
import torch
import torch.nn as nn
from torchvision.datasets import CIFAR10
from torchvision import transforms
from torch.utils.data import Subset, DataLoader, TensorDataset
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC  # Import Support Vector Classifier
import joblib

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = CIFAR10Classifier()
state_dict = torch.load("/kaggle/input/phase1/model_state_dict.pth", map_location=device)
new_state_dict = {key.replace('_module.', ''): value for key, value in state_dict.items()}
model.load_state_dict(new_state_dict)
model.to(device)
model.eval()

# Load CIFAR-10 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

trainset = CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = CIFAR10(root='./data', train=False, download=True, transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [146]:
# Select 10,000 random indices for the training subset
train_indices = torch.randperm(len(trainset))[:10000]

train_subset = torch.utils.data.Subset(trainset, train_indices)
train_loader = torch.utils.data.DataLoader(train_subset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

In [147]:
import torch.optim as optim

# Initialize model, loss function, and optimizer
model = CIFAR10Classifier().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [148]:
# Train the model
epochs = 10
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch {epoch + 1}/{epochs}, Loss: {running_loss / len(train_loader)}')



Epoch 1/10, Loss: 2.0472362891883606
Epoch 2/10, Loss: 1.7581613329565449
Epoch 3/10, Loss: 1.6381397847157375
Epoch 4/10, Loss: 1.532221658214642
Epoch 5/10, Loss: 1.452655795653155
Epoch 6/10, Loss: 1.3773438649572385
Epoch 7/10, Loss: 1.3060610795476635
Epoch 8/10, Loss: 1.2386927050390062
Epoch 9/10, Loss: 1.1781830783862217
Epoch 10/10, Loss: 1.1277589657504088


In [149]:
# Evaluate the model on the test set
test_loss = 0.0
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        model.eval()
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        test_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Test Loss: {test_loss / len(test_loader)}')
print(f'Test Accuracy: {100 * correct / total}%')

Test Loss: 1.25662881363729
Test Accuracy: 55.52%


In [151]:
import pickle

# Save the train indices
with open('/kaggle/working/train_indices.pkl', 'wb') as f:
    pickle.dump(train_indices, f)

In [153]:
def filter_assured_indices(data_loader, model, device, batch_size, threshold=0.9):
    
    assured_indices = []
    batch_index = 0
    for inputs, labels in data_loader:
        batch_index += 1
        inputs, labels = inputs.to(device), labels.to(device)
        with torch.no_grad():
            outputs = model(inputs)
            probabilities = F.softmax(outputs, dim=1)
            highest_probabilities, predictions = torch.max(probabilities, dim=1)

            for i in range(len(inputs)):
                if highest_probabilities[i].item() >= threshold:
                    overall_index = (batch_index-1) * batch_size + i
                    assured_indices.append(overall_index)
                    
    return assured_indices

This subset of data where the model is most confident might reveal patterns or features that are most discriminative according to the model, thus providing insight into the model's internal mechanisms.

In [154]:
def get_labels_from_indices(dataset, indices):
    # Create a subset of the dataset using the specified indices
    subset = Subset(dataset, indices)

    # Create a DataLoader for the subset
    data_loader = DataLoader(subset, batch_size=len(subset), shuffle=False)

    # Get the labels of the subset
    for _, labels in data_loader:
        return labels.numpy()

In [155]:
import numpy as np
import random

def stratified_split(data_indices, dataset_targets, labels, num_splits = 5, overlap_percent = 0.1):

    stratified_splits = []
    remaining_splits = []
    unique_classes = np.unique(labels)

    # Prepare lists to hold split indices and remaining indices
    split_indices = [[] for _ in range(num_splits)]
    remaining_indices_per_split = [[] for _ in range(num_splits)]
    overlap_count = 0  # as per your original settings where overlap_count calculation was commented out

    # Split indices by class
    for cls in unique_classes:
        cls_indices = [i for i in data_indices if dataset_targets[i] == int(cls)]
        random.shuffle(cls_indices)
        split_cls_indices = np.array_split(cls_indices, num_splits)
        for i in range(num_splits):
            split_indices[i].extend(split_cls_indices[i])
            # Handle overlap if required
            if overlap_count > 0:
                remaining_indices = list(set(cls_indices) - set(split_cls_indices[i]))
                overlap_indices = random.sample(remaining_indices, min(overlap_count, len(remaining_indices)))
                split_indices[i].extend(overlap_indices)

    # Calculate indices not included in each split
    for i in range(num_splits):
        all_shadow_indices = set(split_indices[i])
        remaining_indices_per_split[i] = list(set(data_indices) - all_shadow_indices)

    return split_indices


This function underpins sophisticated attack strategies by enabling precise and stratified training of shadow models, which are instrumental in both model shadowing and membership inference attacks. This underscores the need for robust security measures and ethical considerations in machine learning applications.

In [156]:
import torch
from torch.utils.data import Subset, DataLoader
from torch.nn import CrossEntropyLoss
from torch.optim import Adam

def train_shadow_models(train_dataset, device, split_indices, num_models, epochs):

    shadow_models = []
    shadow_model_indices = []
    
    for model_index in range(num_models):
        indices_for_model = split_indices[model_index]
        shadow_model_indices.append(indices_for_model)
        model_specific_dataset = Subset(train_dataset, indices_for_model)
        
        model_loader = DataLoader(model_specific_dataset, batch_size=64, shuffle=True)

        # Initialize a new model for each set of indices
        model = CIFAR10Classifier().to(device)
        optimizer = Adam(model.parameters(), lr=0.001)
        criterion = CrossEntropyLoss()

        # Train the model
        for epoch in range(epochs):
            model.train()
            total_loss = 0
            for inputs, labels in model_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                total_loss += loss.item()

            print(f"Shadow model {model_index}, Epoch {epoch}, Loss: {total_loss/len(model_loader)}")

        shadow_models.append(model)

    return shadow_models, shadow_model_indices

Also known as model stealing, here the attacker aims to create a new model that replicates the functionality of the target model as closely as possible, without having access to the target model's actual data or architecture.

In [157]:
def update_shadow_model_list(shadow_model_list, model_index, new_model):

    if 0 <= model_index < len(shadow_model_list):
        shadow_model_list[model_index] = new_model
    else:
        raise IndexError("The specified index is out of the range of the shadow model list.")

In [158]:
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader, Subset
import random

def prepare_membership_attack_data(shadow_models, shadow_indices_list, train_dataset, test_dataset, device, num_classes = 10):

    attack_outputs = [[] for _ in range(num_classes)]
    attack_labels = [[] for _ in range(num_classes)]

    for cls in range(num_classes):
        print("Class =", cls)
        for shadow_idx, shadow_model in enumerate(shadow_models):
            print("Shadow model =", shadow_idx + 1)
            shadow_model.eval()

            # Indices for shadow training data of the current class
            shadow_train_indices = [idx for idx in shadow_indices_list[shadow_idx] if train_dataset.targets[idx] == cls]
            shadow_train_loader = DataLoader(Subset(train_dataset, shadow_train_indices), batch_size=64, shuffle=False)

            # Indices for test data of the same class
            test_indices_cls = [idx for idx, label in enumerate(test_dataset.targets) if label == cls]
            random.shuffle(test_indices_cls)
            test_indices_cls = test_indices_cls[:len(shadow_train_indices)]
            test_data_loader = DataLoader(Subset(test_dataset, test_indices_cls), batch_size=64, shuffle=False)

            # Collect data from shadow model for training indices
            for inputs, _ in shadow_train_loader:
                inputs = inputs.to(device)
                with torch.no_grad():
                    outputs = shadow_model(inputs)
                    probabilities = F.softmax(outputs, dim=1)
                    for prob in probabilities:
                        attack_outputs[cls].append(prob.cpu().numpy())
                        attack_labels[cls].append(1)  # Label as in the training set

            # Collect data from shadow model for test indices
            for inputs, _ in test_data_loader:
                inputs = inputs.to(device)
                with torch.no_grad():
                    outputs = shadow_model(inputs)
                    probabilities = F.softmax(outputs, dim=1)
                    for prob in probabilities:
                        attack_outputs[cls].append(prob.cpu().numpy())
                        attack_labels[cls].append(0)  # Label as not in the training set

    return attack_outputs, attack_labels

This setup prepares data that can be used to analyze how well shadow models can differentiate between their training data and unseen test data, which is a crucial part of conducting effective membership inference attack

In [159]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier

def train_membership_attack_models(attack_data_by_class, attack_labels_by_class, num_classes=10, num_epochs=10):

    trained_attack_models = []
    for cls in range(num_classes):
        # Convert data for this class to numpy arrays for training
        class_attack_data = np.array(attack_data_by_class[cls])
        class_attack_labels = np.array(attack_labels_by_class[cls])

        # Initialize and train a random forest classifier as the attack model
        attack_model = RandomForestClassifier(n_estimators=100)
        attack_model.fit(class_attack_data.reshape(len(class_attack_data), -1), class_attack_labels)

        # Store the trained model
        trained_attack_models.append(attack_model)

    return trained_attack_models


Calling the function with prepared attack data and labels, which might come from a membership inference preparation process or other data gathering method designed to analyze model vulnerability.

In [160]:
import torch
import torch.nn.functional as F

def predict_membership_from_models(target_model, test_loader, attack_models, device):

    predictions = []
    target_model.eval()  # Move the model to the correct device and set to evaluation mode
    softmax_outputs = []

    # First, collect softmax outputs from the target model
    for inputs, _ in test_loader:
        inputs = inputs.to(device)
        with torch.no_grad():
            outputs = target_model(inputs)
            probabilities = F.softmax(outputs, dim=1)
            softmax_outputs.append(probabilities.cpu().numpy())

    # Second, predict membership using the softmax outputs
    for i, (_, labels) in enumerate(test_loader.dataset):
        cls = labels
        
        attack_model = attack_models[cls]
        input_data = softmax_outputs[i].reshape(1, -1)
        pred = attack_model.predict(input_data)
        predictions.append(pred[0])

    return predictions


This function is now independent and can be easily integrated into various machine learning pipelines or used for research purposes, especially in studies related to model privacy and security.

In [170]:
import torch
from torch.utils.data import DataLoader, Subset
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score

def evaluate_membership_attack_models(target_model, train_dataset, attack_models, train_indices, device):

    # Load the subset of training data for which to test membership inference
#     subset_loader = DataLoader(Subset(train_dataset, train_indices), batch_size=64, shuffle=False)
#     DataLoader(Subset(train_dataset, train_indices))
    
    # Predict membership using the provided attack models
    attack_predictions = predict_membership_from_models(target_model, DataLoader(Subset(train_dataset, train_indices)), attack_models, device)
    
    # All these data points are members of the training set
    true_labels = [1] * len(train_indices)
    
    # Calculate various metrics
    accuracy = accuracy_score(true_labels, attack_predictions)
    cm = confusion_matrix(true_labels, attack_predictions)
    precision = precision_score(true_labels, attack_predictions)
    recall = recall_score(true_labels, attack_predictions)
    f1 = f1_score(true_labels, attack_predictions)

    # Print the metrics
    print(f'Confusion Matrix:\n{cm}')
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"Accuracy: {accuracy:.4f}")

    # Optionally, return the accuracy or all metrics
    return accuracy  # You can also return a dictionary of all metrics if needed


In [162]:
from torch.utils.data import DataLoader, Subset, TensorDataset

assured_indices = filter_assured_indices(train_loader, model, device, batch_size = 64, threshold=0.9)
labels = get_labels_from_indices(trainset, assured_indices)

split_indices = stratified_split(assured_indices, trainset.targets, labels, num_splits = 5, overlap_percent = 0.1)

In [163]:
shadow_models, shadow_model_indices = train_shadow_models(trainset, device, split_indices, num_models = 5, epochs = 10)

Shadow model 0, Epoch 0, Loss: 2.283633550008138
Shadow model 0, Epoch 1, Loss: 2.2224972248077393
Shadow model 0, Epoch 2, Loss: 2.126007596651713
Shadow model 0, Epoch 3, Loss: 2.017794370651245
Shadow model 0, Epoch 4, Loss: 1.997313400109609
Shadow model 0, Epoch 5, Loss: 1.7687914570172627
Shadow model 0, Epoch 6, Loss: 1.763733685016632
Shadow model 0, Epoch 7, Loss: 1.7548107107480366
Shadow model 0, Epoch 8, Loss: 1.626665512720744
Shadow model 0, Epoch 9, Loss: 1.7539088726043701
Shadow model 1, Epoch 0, Loss: 2.332929086685181
Shadow model 1, Epoch 1, Loss: 2.196032238006592
Shadow model 1, Epoch 2, Loss: 2.0960459232330324
Shadow model 1, Epoch 3, Loss: 1.9966496467590331
Shadow model 1, Epoch 4, Loss: 1.8836265802383423
Shadow model 1, Epoch 5, Loss: 1.8177685737609863
Shadow model 1, Epoch 6, Loss: 1.6711799144744872
Shadow model 1, Epoch 7, Loss: 1.60203857421875
Shadow model 1, Epoch 8, Loss: 1.4851868152618408
Shadow model 1, Epoch 9, Loss: 1.3801929235458374
Shadow mod

In [164]:
attack_outputs, attack_labels = prepare_membership_attack_data(shadow_models, shadow_model_indices, trainset, testset, device, num_classes = 10)

Class = 0
Shadow model = 1
Shadow model = 2
Shadow model = 3
Shadow model = 4
Shadow model = 5
Class = 1
Shadow model = 1
Shadow model = 2
Shadow model = 3
Shadow model = 4
Shadow model = 5
Class = 2
Shadow model = 1
Shadow model = 2
Shadow model = 3
Shadow model = 4
Shadow model = 5
Class = 3
Shadow model = 1
Shadow model = 2
Shadow model = 3
Shadow model = 4
Shadow model = 5
Class = 4
Shadow model = 1
Shadow model = 2
Shadow model = 3
Shadow model = 4
Shadow model = 5
Class = 5
Shadow model = 1
Shadow model = 2
Shadow model = 3
Shadow model = 4
Shadow model = 5
Class = 6
Shadow model = 1
Shadow model = 2
Shadow model = 3
Shadow model = 4
Shadow model = 5
Class = 7
Shadow model = 1
Shadow model = 2
Shadow model = 3
Shadow model = 4
Shadow model = 5
Class = 8
Shadow model = 1
Shadow model = 2
Shadow model = 3
Shadow model = 4
Shadow model = 5
Class = 9
Shadow model = 1
Shadow model = 2
Shadow model = 3
Shadow model = 4
Shadow model = 5


In [165]:
trained_attack_models = train_membership_attack_models(attack_outputs, attack_labels, num_classes=10, num_epochs=10)

In [128]:
from joblib import dump

for idx, model in enumerate(trained_attack_models):
    dump(model, f'/kaggle/working/trained_attack_model_{idx}.joblib')


In [None]:
from joblib import load

# If you saved each model separately:
trained_attack_models = []
for idx in range(number_of_models):  # Replace `number_of_models` with the actual number
    model = load(f'trained_attack_model_{idx}.joblib')
    trained_attack_models.append(model)


In [167]:
target_model = CIFAR10Classifier()
state_dict = torch.load("/kaggle/input/phase1/model_state_dict.pth", map_location=device)
new_state_dict = {key.replace('_module.', ''): value for key, value in state_dict.items()}
target_model.load_state_dict(new_state_dict)
target_model.to(device)

CIFAR10Classifier(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
  (dropout1): Dropout2d(p=0.25, inplace=False)
  (dropout2): Dropout2d(p=0.5, inplace=False)
  (fc1): Linear(in_features=6272, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=10, bias=True)
)

In [171]:
train_indices = torch.randperm(len(trainset))[:10000]

# Evaluate attack models
attack_accuracy = evaluate_membership_attack_models(target_model, trainset, trained_attack_models, train_indices, device)



Confusion Matrix:
[[   0    0]
 [3498 6502]]
Precision: 1.0000
Recall: 0.6502
F1 Score: 0.7880
Accuracy: 0.6502
