In [1]:
# Load packages which are frequently used through experiments
import numpy as np

import torch.nn as nn
import torchvision
import whitebox_model_extractor, torch

import experiment_interface

In [2]:
# Create an experiment interface
interface = experiment_interface.ExperimentInterface()

# The following blocks are for generating baseline values of a random-picked black-box neural network 

Procedures:
1. Pick a black-box neural network randomly 
2. Generate a model without training but possess the same architecture and activation functions as the seleted black-box neural network & compute its accurancy
3. Prepare all data required for computation of the three measurements 
4. Calculate three measurements (AAE/AAPE/The difference between prediction capabiltiy)

In [3]:
# Procedure 1
num_of_model_extracted = 1

weights_dataset = interface.extract_whitebox_model_weights(num_of_model_extracted)
predictions_dataset = interface.extract_whitebox_model_predictions(num_of_model_extracted)

In [4]:
# Procedure 2
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.layer2 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()

    def forward(self, x):
        output = self.layer1(x)
        output = self.relu(output)
        output = self.layer2(output)
        return output 

input_size, hidden_size, output_size = 784, 64, 10
model = NeuralNet(input_size, hidden_size, output_size)

In [5]:
def compute_accurancy_for_randomly_generated_model(model):
    test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=torchvision.transforms.ToTensor(), download=True)
    test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=10000, shuffle=False)
    input_size, device, accurancy = 784, 'cpu', None

    with torch.no_grad():
        correct = 0
        total = 0
        for _, (images, labels) in enumerate(test_loader):
            images = images.reshape(-1, input_size).to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predictions = torch.max(outputs.data, 1)

            total += labels.size(0)
            correct += (predictions == labels).sum().item()

            accurancy = 100 * correct / total
    
    return accurancy

def compute_accurancy_for_the_blackbox_model(predictions):
    test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=torchvision.transforms.ToTensor(), download=True)
    test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=10000, shuffle=False)
    input_size, device, accurancy = 784, 'cpu', None

    with torch.no_grad():
        correct = 0
        total = 0
        for _, (_, labels) in enumerate(test_loader):
            total += labels.size(0)
            correct += (predictions == labels).sum().item()

            accurancy = 100 * correct / total
    
    return accurancy

# Procedure 3

# Extract weights and accurancy from the black-box neural network
weight_values = weights_dataset[0]
prediction_values = predictions_dataset[0]
prediction_values = torch.from_numpy(np.int64(prediction_values))
accurancy = compute_accurancy_for_the_blackbox_model(prediction_values)

# Extract weights and accurancy from the randomly generated model 
Extractor = whitebox_model_extractor.WhiteboxModelExtractor()
weight_values_prime = Extractor.parse_single_whitebox_model_weights(model.state_dict())
accurancy_prime = compute_accurancy_for_randomly_generated_model(model)

In [6]:
# Procedure 4
# a. AAE
print('Calculating AAE (Average Absolute Error) ...')
absolute_errors = np.abs(weight_values_prime - weight_values)
average_absolute_error = np.mean(absolute_errors)
print(average_absolute_error)

# b. AAPE
print('Calculating AAPE (Average Absolute Percentage Error) ...')
absolute_percentage_errors = np.abs((weight_values_prime - weight_values)/weight_values)
average_absolute_percentage_error = np.mean(absolute_percentage_errors)
print(average_absolute_percentage_error)

# c. The difference between prediction capabiltiy
print('Calculating The difference between prediction capabiltiy ...')
accurancy_diff = np.abs(accurancy_prime - accurancy)/100
print(accurancy_diff)


Calculating AAE (Average Absolute Error) ...
0.049205514084641924
Calculating AAPE (Average Absolute Percentage Error) ...
3.6545634568971144
Calculating The difference between prediction capabiltiy ...
0.8313000000000001
