In [1]:
import sys
sys.path.append("..")
import torch
import torch.nn as nn
from torch.nn import functional as F

import pytorch_lightning as pl
from pl_examples.basic_examples.mnist_datamodule import MNISTDataModule

import GradCertModule

ALPHA = 0.5            # Regularization Parameter (Weights the Reg. Term)
EPSILON = 0.025          # Input Peturbation Budget at Training Time
GAMMA = 0.00            # Model Peturbation Budget at Training Time 
                        #(Changed to proportional budget rather than absolute)
    
LEARN_RATE = 0.001     # Learning Rate Hyperparameter
HIDDEN_DIM = 128       # Hidden Neurons Hyperparameter
HIDDEN_LAY = 1         # Hidden Layers Hyperparameter
MAX_EPOCHS = 5
BATCH_SIZE = 100

EPSILON_LINEAR = True   # Put Epsilon on a Linear Schedule?
GAMMA_LINEAR = True     # Put Gamma on a Linear Schedule?

In [2]:
import XAIArchitectures
model = XAIArchitectures.FullyConnected(mode='PGD')
model.set_params(alpha=ALPHA, epsilon=EPSILON, gamma=GAMMA,
                learn_rate=LEARN_RATE, max_epochs=MAX_EPOCHS,
                epsilon_linear=EPSILON_LINEAR,gamma_linear=GAMMA_LINEAR,
                mode="NONE")

SET MODE TO:  PGD


In [3]:
dm = MNISTDataModule(batch_size=BATCH_SIZE, num_workers=0)
trainer = pl.Trainer(max_epochs=MAX_EPOCHS, accelerator="cpu", devices=1)
trainer.fit(model, datamodule=dm)
result = trainer.test(model, datamodule=dm)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name | Type       | Params
------------------------------------
0 | lays | ModuleList | 101 K 
1 | l1   | Linear     | 100 K 
2 | lf   | Linear     | 1.3 K 
------------------------------------
101 K     Trainable params
0         Non-trainable params
101 K     Total params
0.407     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

                not been set for this class (_ResultMetric). The property determines if `update` by
                default needs access to the full metric state. If this is not the case, significant speedups can be
                achieved and we recommend setting this to `False`.
                We provide an checking function
                `from torchmetrics.utilities import check_forward_no_full_state`
                that can be used to check if the `full_state_update=True` (old and potential slower behaviour,
                default for now) or if `full_state_update=False` can be used safely.
                


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]



Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc            0.9700000286102295
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


In [4]:
ACC = round(result[0]['test_acc'],2)
print(ACC)

0.97


In [5]:

import torchvision.datasets as datasets

device = torch.device("cpu")
mnist_trainset = datasets.MNIST(root='./Datasets', train=True, download=True, transform=None)
mnist_testset = datasets.MNIST(root='./Datasets', train=False, download=True, transform=None)


In [272]:
import copy
import numpy as np
def init_dual_vars(model, inp, weights):
    h = inp
    duals = [torch.zeros_like(h, requires_grad=True)]
    for i in range(len(model.layers)):
        w = weights[2*(i)].double()
        h = torch.matmul(h, w.T)
        dual_i = torch.rand_like(h, requires_grad=True)
        dual_i.requires_grad = True 
        duals.append(dual_i.float())
    return duals

weights = [t for t in model.parameters()]
data = np.asarray(mnist_testset[0][0]).reshape(1, 28*28)/255.0
inp = torch.tensor(data)

duals = init_dual_vars(model, inp, weights)
#duals_lam = init_dual_vars(model, inp, weights)




In [271]:
def zeros_violation(W, b, x_l, x_u, dual_lam, dual_mus, i):
    
    return None

def affine_legrange(W, b, x_l, x_u, dual_lam, dual_mus, i):
    x_l = torch.maximum(x_l, x_l*0)
    
    x_mu = (x_u + x_l)/2
    x_r = (x_u - x_l)/2
    W_mu = W
    h_mu = torch.matmul(x_mu, W_mu.T)
    x_rad = torch.matmul(x_r, torch.abs(W_mu).T)
    h_u = torch.maximum(h_mu + x_rad, h_mu*0)
    h_l = torch.maximum(h_mu - x_rad, h_mu*0)
    
    
    # Legrange Slack computations
    elm_1 = torch.matmul(torch.maximum(duals[i] - torch.matmul(duals[i+1], W), 0.0*duals[i]).float(), x_u.T)
    elm_2 = torch.matmul(torch.minimum(duals[i] - torch.matmul(duals[i+1], W), 0.0*duals[i]).float(), x_l.T)
    elm_3 = torch.matmul(b, duals[i+1].T)
    #print(elm_1, elm_2, elm_3)
    violation = elm_1 + elm_2 + elm_3
    
    return h_u, h_l, violation


def LegrangeForward(model, weights, inp, duals, eps, gam=0.0):
    h_l = inp-eps; h_u = inp+eps
    h_nom = inp.float()
    assert((h_l <= h_u).all())
    layers = int(len(weights)/2); 
    total_violation = 0
    inter_upper = [h_u]
    inter_lower = [h_l]
    for i in range(len(model.layers)):
        w, b = weights[2*(i)], weights[(2*(i))+1]
        h_l, h_u, violation = affine_legrange(w, b, h_l, h_u, duals, i)
        h_l = model.activations[0](h_l) 
        h_u = model.activations[0](h_u)
        # Account for additional violation in activation
        violation += h_nom * duals[i]
        h_nom = torch.matmul(h_nom, w.T) + b
        violation -= h_nom * duals[i+1]
        inter_upper.append(h_u)
        inter_lower.append(h_l)
        total_violation += violation
    return total_violation, inter_lower, inter_upper



err, inter_l, inter_u = LegrangeForward(model, weights, inp.float(), duals, 0.001)
print(err)
for i in range(len(duals)):
    duals[i].retain_grad()
err.backward()


for i in range(len(duals)):
    duals[i] = duals[i] - (0.5 * duals[i].grad)
    duals[i] = torch.maximum(0*duals[i] + 1e-5, duals[i])
#    duals[i] = torch.clip(duals[i], inter_l[i], inter_u[i])
#duals[0] = 0.0 * duals[0]
#duals[-1] = 0.0 * duals[-1]
err, l, u = LegrangeForward(model, weights, inp.float(), duals, 0.001)
print(err)   

RuntimeError: output with shape [1, 1] doesn't match the broadcast shape [1, 784]

In [252]:
for i in duals:
    print(i)

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.