In [1]:
import torch
import torchvision
from torch import nn
from matplotlib import pyplot as plt
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
from torchvision.models.feature_extraction import create_feature_extractor, get_graph_node_names
from collections import OrderedDict
import numpy as np
from tqdm.auto import tqdm

In [2]:
# import dataset
train_dataset = datasets.MNIST(root="mnist/", train=True, download=True, transform=ToTensor())
test_dataset = datasets.MNIST(root='mnist/', train=False, transform=ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to mnist/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 89396481.92it/s]


Extracting mnist/MNIST/raw/train-images-idx3-ubyte.gz to mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to mnist/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 32748227.58it/s]


Extracting mnist/MNIST/raw/train-labels-idx1-ubyte.gz to mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to mnist/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 26549750.46it/s]


Extracting mnist/MNIST/raw/t10k-images-idx3-ubyte.gz to mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 18955750.02it/s]


Extracting mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz to mnist/MNIST/raw



In [3]:
# create dataloaders
batch_size = 256
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

In [4]:
# define model
class LeNet5(nn.Module):
  def __init__(self):
      super().__init__()
      self.conv_1 = nn.Sequential(
          nn.Conv2d(in_channels=1,out_channels=6, kernel_size=5, stride=1),
          nn.ReLU(),
          nn.AvgPool2d(kernel_size=2, stride=2, padding=0)
      )
      self.conv_2 = nn.Sequential(
          nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1),
          nn.ReLU(),
          nn.AvgPool2d(kernel_size=2, stride=2, padding=0)
      )
      self.fc_1 = nn.Linear(in_features=256, out_features=120, bias=True)
      self.fc_2 = nn.Linear(120, 84, bias=True)
      self.relu = nn.ReLU()
      self.fc_3 = nn.Linear(84, 10, bias=True)

  def forward(self, x):
      x = self.conv_1(x)
      x = self.conv_2(x)
      x = torch.flatten(x, 1)
      x = self.fc_1(x)
      x = self.fc_2(x)
      x = self.relu(x)
      x = self.fc_3(x)
      return x

In [5]:
# import trained model
device = 'cuda' if torch.cuda.is_available() else 'cpu'
state_dict = torch.load('lenet5.pt')
model = LeNet5().to(device)
model.load_state_dict(state_dict)
model.eval()

LeNet5(
  (conv_1): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  )
  (conv_2): Sequential(
    (0): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  )
  (fc_1): Linear(in_features=256, out_features=120, bias=True)
  (fc_2): Linear(in_features=120, out_features=84, bias=True)
  (relu): ReLU()
  (fc_3): Linear(in_features=84, out_features=10, bias=True)
)

In [None]:
"""
x = torch.rand(256, 1, 28, 28)
nodes = [name for (name, _) in model.named_children()]
del nodes[-1]
feature_extractor = create_feature_extractor(model, return_nodes=get_graph_node_names(model)[0])
out_list = feature_extractor(x)
for name, layer in out_list.items():
  print(torch.flatten(layer))

node = get_graph_node_names(model)[0]
node.pop(0)
print(node)
"""

'\nx = torch.rand(256, 1, 28, 28)\nnodes = [name for (name, _) in model.named_children()]\ndel nodes[-1]\nfeature_extractor = create_feature_extractor(model, return_nodes=get_graph_node_names(model)[0])\nout_list = feature_extractor(x)\nfor name, layer in out_list.items():\n  print(torch.flatten(layer))\n\nnode = get_graph_node_names(model)[0]\nnode.pop(0)\nprint(node)\n'

In [6]:
# profiling function
def profileDNN(net, dataloader):
  net.eval()
  cov_dict = OrderedDict()
  batch_num = len(dataloader)
  #nodes = [name for (name, _) in model.named_children()]
  nodes = get_graph_node_names(net)[0]
  nodes.pop(0)
  feature_extractor = create_feature_extractor(model, return_nodes=nodes)

  for idx, (data, label) in enumerate(dataloader):
    with torch.no_grad():
      out_list = feature_extractor(data)

    if idx % 100 == 0:
      print("* finish {}/{} proportion".format(idx, batch_num))
      
    for name, layer in out_list.items():
      cur_neuron_num = 1 # number of neurons in this layer
      if len(layer.shape) == 2:
        # linear layer
        cur_neuron_num = layer.shape[1]
      else:
        # conv layer, count each value in filter as a neuron
        cur_neuron_num = layer.shape[1]*layer.shape[2]*layer.shape[3]
        
      layer = torch.flatten(layer, start_dim=1)
      neurons_max = torch.max(layer, dim=0)
      neurons_min = torch.min(layer, dim=0)

      for neuron_id in range(cur_neuron_num):
        if (name, neuron_id) not in cov_dict:
          cov_dict[(name, neuron_id)] = [None, None]
          
        profile_data_list = cov_dict[(name, neuron_id)]

        lower_bound = neurons_min.values[neuron_id]
        upper_bound = neurons_max.values[neuron_id]
        if profile_data_list != [None, None]:
          if upper_bound < profile_data_list[1]:
            upper_bound = profile_data_list[1]
            
          if lower_bound > profile_data_list[0]:
            lower_bound = profile_data_list[0]
          
        profile_data_list[0] = lower_bound
        profile_data_list[1] = upper_bound 
        cov_dict[(name, neuron_id)] = profile_data_list
  
  return cov_dict

In [7]:
# return profile of model and data
cov_dict = profileDNN(model, train_dataloader)

* finish 0/235 proportion
* finish 100/235 proportion
* finish 200/235 proportion


In [None]:
"""
import pickle 

with open('cov_dict.pkl', 'wb') as f:
  pickle.dump(cov_dict, f)

np.save('cov_dict.npy', cov_dict)

cov_dict = np.load('cov_dict.npy', allow_pickle='FALSE').item()
print(cov_dict)
"""

OrderedDict([(('conv_1.0', 0), [tensor(-0.2208), tensor(0.0559)]), (('conv_1.0', 1), [tensor(-0.1855), tensor(0.0593)]), (('conv_1.0', 2), [tensor(-0.3792), tensor(0.1787)]), (('conv_1.0', 3), [tensor(-0.7616), tensor(0.1292)]), (('conv_1.0', 4), [tensor(-0.9510), tensor(0.1383)]), (('conv_1.0', 5), [tensor(-0.9968), tensor(0.3645)]), (('conv_1.0', 6), [tensor(-1.0961), tensor(0.6898)]), (('conv_1.0', 7), [tensor(-1.0934), tensor(0.9997)]), (('conv_1.0', 8), [tensor(-1.0853), tensor(1.0398)]), (('conv_1.0', 9), [tensor(-1.1204), tensor(1.0668)]), (('conv_1.0', 10), [tensor(-1.1662), tensor(1.0712)]), (('conv_1.0', 11), [tensor(-1.1774), tensor(1.0954)]), (('conv_1.0', 12), [tensor(-1.1320), tensor(1.0966)]), (('conv_1.0', 13), [tensor(-1.1463), tensor(1.1257)]), (('conv_1.0', 14), [tensor(-1.1343), tensor(1.0897)]), (('conv_1.0', 15), [tensor(-1.0665), tensor(1.1293)]), (('conv_1.0', 16), [tensor(-1.0498), tensor(1.0752)]), (('conv_1.0', 17), [tensor(-1.0556), tensor(1.0683)]), (('conv

In [8]:
# neuron boundary coverage
def NBCoverage(net, dataloader, cov_dict):
  """
    Computes the neuron boundary coverage 
  """
  net.eval() 
  coverage_recorder = OrderedDict()
  batch_num = len(dataloader)
  #nodes = [name for (name, _) in model.named_children()]
  nodes = get_graph_node_names(net)[0]
  nodes.pop(0)
  feature_extractor = create_feature_extractor(model, return_nodes=nodes)

  for idx, (data, label) in enumerate(dataloader):
    with torch.no_grad():
      out_list = feature_extractor(data)

    if idx % 100 == 0:
      print("* finish {}/{} proportion".format(idx, batch_num))
    
    # Iterate through all output layers in the output list
    for name, layer in out_list.items():
      cur_neuron_num = 1 # number of neurons in this layer
      if len(layer.shape) == 2:
        # linear layer
        cur_neuron_num = layer.shape[1]
      else:
        # conv layer, count each value in filter as a neuron
        cur_neuron_num = layer.shape[1]*layer.shape[2]*layer.shape[3]
    
      layer = torch.flatten(layer, start_dim=1)
      neurons_max = torch.max(layer,dim=0)
      neurons_min = torch.min(layer,dim=0)

      for neuron_id in range(cur_neuron_num):
        if (name, neuron_id) not in coverage_recorder:
          coverage_recorder[(name, neuron_id)] = [0, 0]
        
        if neurons_min.values[neuron_id] < cov_dict[(name, neuron_id)][0]:
          coverage_recorder[(name, neuron_id)][0] = 1
        
        if neurons_max.values[neuron_id] > cov_dict[(name, neuron_id)][1]:
          coverage_recorder[(name, neuron_id)][1] = 1

  num_of_coverage_neuron = 0 
  result = sum(sum(i) for i in coverage_recorder.values()) / (2*len(coverage_recorder.values()))
  return result

In [9]:
result = NBCoverage(model, test_dataloader, cov_dict)
result

* finish 0/40 proportion


0.09855181493323302

In [21]:
# import new tests created with ddpm
new_tests = torch.load('ddpm_corrected_k_2.pt')

In [22]:
# define as a custom pytorch dataset
from torch.utils.data import Dataset

class AugmentedMNIST(Dataset):
  def __init__(self, tests):
    self.samples = tests
  
  def __len__(self):
    return len(self.samples)
  
  def __getitem__(self, idx):
    return self.samples[idx]

In [23]:
# add new tests to the tests dataset
augmented_tests = test_dataset + AugmentedMNIST(new_tests)
type(augmented_tests)

torch.utils.data.dataset.ConcatDataset

In [24]:
# create new dataloader
augmented_dataloader = DataLoader(augmented_tests, batch_size=batch_size)

In [25]:
new_result = NBCoverage(model, augmented_dataloader, cov_dict)
new_result

* finish 0/59 proportion


0.1260579274026707

In [26]:
# import new tests created with ddim
new_tests_2 = torch.load('ddim_corrected_k_2.pt')

# add new tests to the tests dataset
augmented_tests_2 = test_dataset + AugmentedMNIST(new_tests_2[:2500])
type(augmented_tests_2)

# create new dataloader
augmented_dataloader_2 = DataLoader(augmented_tests_2, batch_size=batch_size)

In [27]:
new_result_2 = NBCoverage(model, augmented_dataloader_2, cov_dict)
new_result_2

* finish 0/49 proportion


0.15248260297160052

In [28]:
# strong neuron activation coverage
def SNACoverage(net, dataloader, cov_dict):
  net.eval() 
  coverage_recorder = OrderedDict()
  batch_num = len(dataloader)
  #nodes = [name for (name, _) in model.named_children()]
  nodes = get_graph_node_names(net)[0]
  nodes.pop(0)
  feature_extractor = create_feature_extractor(model, return_nodes=nodes)

  for idx, (data, label) in enumerate(dataloader):
    with torch.no_grad():
      out_list = feature_extractor(data)

    if idx % 100 == 0:
      print("* finish {}/{} proportion".format(idx, batch_num))
    
    # Iterate through all output layers in the output list
    for name, layer in out_list.items():
      cur_neuron_num = 1 # number of neurons in this layer
      if len(layer.shape) == 2:
        # linear layer
        cur_neuron_num = layer.shape[1]
      else:
        # conv layer, count each value in filter as a neuron
        cur_neuron_num = layer.shape[1]*layer.shape[2]*layer.shape[3]
    
      layer = torch.flatten(layer, start_dim=1)
      neurons_max = torch.max(layer,dim=0)
      neurons_min = torch.min(layer,dim=0)

      for neuron_id in range(cur_neuron_num):
        if (name, neuron_id) not in coverage_recorder:
          coverage_recorder[(name, neuron_id)] = [0, 0]
        
        if neurons_max.values[neuron_id] > cov_dict[(name, neuron_id)][1]:
          coverage_recorder[(name, neuron_id)][1] = 1

  result = sum(sum(i) for i in coverage_recorder.values()) / (len(coverage_recorder.values()))
  return result

In [29]:
snac_result = SNACoverage(model, test_dataloader, cov_dict)
snac_result

* finish 0/40 proportion


0.1356968215158924

In [30]:
snac_result_2 = SNACoverage(model, augmented_dataloader, cov_dict)
snac_result_2

* finish 0/59 proportion


0.17199548617641527

In [31]:
snac_result_3 = SNACoverage(model, augmented_dataloader_2, cov_dict)
snac_result_3

* finish 0/49 proportion


0.1807410193718262

In [None]:
"""
  Mutator functionality
"""
diffusion_samples = torch.load('annotated_tests.pt')
diffusion_dataset = AugmentedMNIST(diffusion_samples)
diffusion_dataloader = DataLoader(diffusion_dataset, batch_size=1)
state_dict = torch.load('lenet5.pt')
model = LeNet5().to(device)
model.load_state_dict(state_dict)
model.eval()
original_coverage = result
# and cov_dict

In [None]:
nodes = get_graph_node_names(model)[0]
nodes.pop(0)
feature_extractor = create_feature_extractor(model, return_nodes=nodes)
for idx, (data, label) in tqdm(enumerate(diffusion_dataloader)):
  out_list = feature_extractor(data)
  break

0it [00:00, ?it/s]

11

In [None]:
out_list

{'conv_1.0': tensor([[[[-0.0335, -0.0359, -0.0366,  ..., -0.0373, -0.0366, -0.0386],
           [-0.0346, -0.0365, -0.0378,  ..., -0.1291,  0.0096,  0.1983],
           [-0.0354, -0.0365, -0.0375,  ..., -0.1766,  0.2335,  0.4899],
           ...,
           [-0.0343, -0.0531, -0.1997,  ..., -0.0386, -0.0385, -0.0379],
           [-0.0351, -0.0347, -0.0469,  ..., -0.0371, -0.0378, -0.0365],
           [-0.0335, -0.0356, -0.0359,  ..., -0.0367, -0.0371, -0.0377]],
 
          [[ 0.0604,  0.0524,  0.0500,  ...,  0.0479,  0.0460,  0.0486],
           [ 0.0682,  0.0622,  0.0594,  ..., -0.2326, -0.1208, -0.0653],
           [ 0.0653,  0.0581,  0.0550,  ...,  1.7024,  1.9161,  1.1059],
           ...,
           [ 0.0669,  0.0777,  0.3503,  ...,  0.0585,  0.0609,  0.0643],
           [ 0.0672,  0.0566,  0.0626,  ...,  0.0577,  0.0585,  0.0585],
           [ 0.0679,  0.0570,  0.0495,  ...,  0.0595,  0.0578,  0.0575]],
 
          [[ 0.9213,  0.9216,  0.9215,  ...,  0.9210,  0.9217,  0.9209],
 

In [None]:
cov_dict[('conv_1.0', 0)]

[tensor(-0.2208), tensor(0.0559)]

In [None]:
# neuron boundary coverage
def Mutator(net, dataloader, cov_dict):
  """
    Take in generated diffusion samples and check which ones increase coverage and return them
  """
  samples = []
  net.eval() 
  coverage_recorder = OrderedDict()
  nodes = get_graph_node_names(net)[0]
  nodes.pop(0)
  feature_extractor = create_feature_extractor(model, return_nodes=nodes)

  for idx, (data, label) in tqdm(enumerate(dataloader)):
    with torch.no_grad():
      out_list = feature_extractor(data)
    
    for name, layer in out_list.items():
      cur_neuron_num = 1 # number of neurons in this layer
      if len(layer.shape) == 2:
        # linear layer
        cur_neuron_num = layer.shape[1]
      else:
        # conv layer, count each value in filter as a neuron
        cur_neuron_num = layer.shape[1]*layer.shape[2]*layer.shape[3]
    
      layer = torch.flatten(layer, start_dim=1)
      neurons_max = torch.max(layer,dim=0)
      neurons_min = torch.min(layer,dim=0)

      increased_coverage = False
      for neuron_id in range(cur_neuron_num):
        if neurons_min.values[neuron_id] < cov_dict[(name, neuron_id)][0]:
          samples.append((data, label))
          increased_coverage = True
          break
        if neurons_max.values[neuron_id] > cov_dict[(name, neuron_id)][1]:
          samples.append((data, label))
          increased_coverage = True
          break
      
      if increased_coverage:
        break

  #result = sum(sum(i) for i in coverage_recorder.values()) / (2*len(coverage_recorder.values()))
  return samples

In [None]:
samples = Mutator(model, diffusion_dataloader, cov_dict)

0it [00:00, ?it/s]

In [None]:
len(samples)

2582

In [None]:
len(samples[0])

1

In [None]:
samples[0]

tensor([[[[-6.6700e-03, -6.2546e-03, -8.2440e-03, -4.2537e-03, -6.6786e-03,
           -6.0290e-03, -3.9937e-03, -6.5246e-03, -6.1658e-03, -4.1811e-03,
           -5.9923e-03, -7.2734e-03, -5.3185e-03, -6.2914e-03, -7.5536e-03,
           -6.5497e-03, -7.1456e-03, -5.6699e-03, -8.2142e-03, -5.5356e-03,
           -4.5326e-03, -6.3286e-03, -6.6391e-03, -6.5911e-03, -5.4451e-03,
           -7.2296e-03, -6.9515e-03, -4.9046e-03],
          [-4.2092e-04, -1.1247e-03, -1.5521e-03, -2.1593e-03, -8.5930e-04,
           -2.7069e-03, -1.7560e-03,  8.9053e-04, -3.2000e-03, -1.8574e-03,
           -7.3442e-04, -2.8355e-03, -2.8703e-03, -7.3275e-04, -1.4678e-03,
           -2.5964e-04, -3.1593e-03, -2.1574e-03, -2.3029e-03, -1.7018e-03,
           -2.8648e-03, -9.9971e-04, -2.0156e-03, -3.0204e-03, -2.7584e-03,
           -6.7132e-04, -9.5399e-04, -4.2463e-03],
          [-1.2173e-03, -2.3932e-03,  3.9069e-04, -1.0910e-03, -2.4181e-03,
           -3.1565e-03, -1.8122e-03, -2.1449e-03, -1.6077e-03,