In [1]:
import sys
sys.path.append("..")
from models.Pruneable import Pruneable
import onnx
import numpy as np
import torch
import torch.nn as nn
from utils.model_utils import *
from utils.config_utils import *
from utils.system_utils import *
from models.statistics.Metrics import Metrics
from utils.system_utils import setup_directories
from verify_utils.onnx_translator import ONNXTranslator
from models.networks.MLP5 import MLP5
from models import GeneralModel

The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!


In [2]:
# define arguments manually
arguments = {}
# device
arguments['device'] = "cuda"

# define arguments for model
#arguments.model = "ResNet18" # ResNet not supported for structured
arguments['model'] = "MLP5"

arguments['disable_masking'] = 1 # 0 for disable mask, 1 for mask (unstructured)
arguments['track_weights'] = 0
arguments['enable_rewinding'] = 0
arguments['growing_rate'] = 0.0000
arguments['outer_layer_pruning'] = 0
# arguments.prune_criterion = "SNIPit"  # unstructured

arguments['prune_criterion'] = "SNAPit" # or SNAPit ... # structured
arguments['l0'] = 0
arguments['l0_reg'] = 1.0
arguments['l1_reg'] = 0
arguments['lp_reg'] = 0
arguments['l2_reg'] = 5e-5
arguments['hoyer_reg'] = 0.001
arguments['N'] = 6000 # different for different dataset
arguments['beta_ema'] = 0.999


# define arguments for criterion
arguments['pruning_limit'] = 0.9
arguments['snip_steps'] = 6

# not pre-trained model
arguments['checkpoint_name'] = None
arguments['checkpoint_model'] = None

# dataset
arguments['data_set'] = "MNIST"
arguments['batch_size'] = 512
arguments['mean'] = (0.1307,)
arguments['std'] = (0.3081,)
arguments['tuning'] = 0
arguments['preload_all_data'] = 0
arguments['random_shuffle_labels'] = 0

# loss
arguments['loss'] = "CrossEntropy"

# optimizer
arguments['optimizer'] = "ADAM"
arguments['learning_rate'] = 2e-3

# training
arguments['save_freq'] = 10
arguments['eval'] = 0
arguments['train_scheme'] = "DefaultTrainer"
arguments['seed'] = 1234
arguments['epochs'] = 20

arguments['grad_noise'] = 0
arguments['grad_clip'] =10
arguments['eval_freq'] = 1000
arguments['max_training_minutes']= 6120
arguments['plot_weights_freq'] = 50
arguments['prune_delay'] = 0
arguments['prune_freq'] = 1
arguments['rewind_to'] = 6

arguments['skip_first_plot'] = 0
arguments['disable_histograms'] = 0
arguments['disable_saliency'] = 0
arguments['disable_confusion'] = 0
arguments['disable_weightplot'] = 0
arguments['disable_netplot'] = 0
arguments['disable_activations'] = 0

arguments['pruning_rate'] = 0
# during training
arguments['pruning_freq'] = 1

In [3]:
metrics = Metrics()
out = metrics.log_line
print = out

ensure_current_directory()
global out 
out = metrics.log_line
out(f"starting at {get_date_stamp()}")

metrics._batch_size = arguments['batch_size']
metrics._eval_freq = arguments['eval_freq']

starting at 2022-06-01_18.25.45


In [4]:
device = configure_device(arguments)
device

device(type='cuda')

In [5]:
!pwd

/nfs/homedirs/wangxun/robustness/SparseNetwork-Verification


In [6]:
onnx_model=onnx.load("notebook/mnist_relu_5_100.onnx")

In [7]:
translator = ONNXTranslator(onnx_model, True)

In [8]:
operations, resources = translator.translate()


Mean of [0.] extracted from network
Std of [1.] extracted from network


In [9]:
model: GeneralModel = find_right_model(
        NETWORKS_DIR,arguments['model'],
        device=device,
        operation=operations,
        resources=resources,
        is_maskable=arguments['disable_masking'],
        is_tracking_weights=arguments['track_weights'],
        is_rewindable=arguments['enable_rewinding'],
        is_growable=arguments['growing_rate'] > 0,
        outer_layer_pruning=arguments['outer_layer_pruning'],
        maintain_outer_mask_anyway=(
                                       not arguments['outer_layer_pruning']) and (
                                           "Structured" in arguments['prune_criterion']),
        l0=arguments['l0'],
        l0_reg=arguments['l0_reg'],
        N=arguments['N'],
        beta_ema=arguments['beta_ema'],
        l2_reg=arguments['l2_reg']
    ).to(device)

In [10]:
model

MLP5(
  (layers): Sequential(
    (0): ContainerLinear(in_features=784, out_features=100, bias=True)
    (1): ReLU()
    (2): ContainerLinear(in_features=100, out_features=100, bias=True)
    (3): ReLU()
    (4): ContainerLinear(in_features=100, out_features=100, bias=True)
    (5): ReLU()
    (6): ContainerLinear(in_features=100, out_features=100, bias=True)
    (7): ReLU()
    (8): ContainerLinear(in_features=100, out_features=100, bias=True)
    (9): ReLU()
    (10): ContainerLinear(in_features=100, out_features=10, bias=True)
  )
)

# prune the baseline model

In [11]:
# get criterion
criterion = find_right_model(
        CRITERION_DIR,arguments['prune_criterion'],
        model=model,
        limit=arguments['pruning_limit'],
        start=0.5,
        steps=arguments['snip_steps'],
        device=arguments['device']
    )   
# load data
train_loader, test_loader = find_right_model(
        DATASETS, arguments['data_set'],
        arguments=arguments,
        mean=arguments['mean'],
        std=arguments['std']
    )
# get loss function
loss = find_right_model(
        LOSS_DIR, arguments['loss'],
        device=device,
        l1_reg=arguments['l1_reg'],
        lp_reg=arguments['lp_reg'],
        l0_reg=arguments['l0_reg'],
        hoyer_reg=arguments['hoyer_reg']
    )
# get optimizer
optimizer = find_right_model(
        OPTIMS, arguments['optimizer'],
        params=model.parameters(),
        lr=arguments['learning_rate'],
        weight_decay=arguments['l2_reg'] if not arguments['l0'] else 0
    )
if not arguments['eval']:
    # build trainer
    run_name = f"_model={arguments['model']}_dataset={arguments['data_set']}_prune-criterion={arguments['prune_criterion']}" + \
               f"_pruning-limit={arguments['pruning_limit']}_train-scheme={arguments['train_scheme']}_seed={arguments['seed']}"
    trainer = find_right_model(
            TRAINERS_DIR, arguments['train_scheme'],
            model=model,
            loss=loss,
            optimizer=optimizer,
            device=device,
            arguments=arguments,
            train_loader=train_loader,
            test_loader=test_loader,
            metrics=metrics,
            criterion=criterion,
            run_name = run_name
        )

Using mean (0.1307,)
Made datestamp: 2022-06-01_18.25.55_model=MLP5_dataset=MNIST_prune-criterion=SNAPit_pruning-limit=0.9_train-scheme=DefaultTrainer_seed=1234




In [12]:
# save model before training
setup_directories()
save_models([model],"original")

In [13]:
trainer.train()

[1mStarted training[0m
Saved results/2022-06-01_18.25.55_model=MLP5_dataset=MNIST_prune-criterion=SNAPit_pruning-limit=0.9_train-scheme=DefaultTrainer_seed=1234/output/scores
pruning 34496 percentage 0.44 length_nonzero 78400
pruning 7480 percentage 0.748 length_nonzero 10000
pruning 7930 percentage 0.793 length_nonzero 10000
pruning 7470 percentage 0.747 length_nonzero 10000
pruning 7360 percentage 0.736 length_nonzero 10000
pruning 520 percentage 0.52 length_nonzero 1000
  (layers): Sequential(
    (0): ContainerLinear(in_features=784, out_features=56.0, bias=True)
    (2): ContainerLinear(in_features=56.0, out_features=45.0, bias=True)
    (4): ContainerLinear(in_features=45.0, out_features=46.0, bias=True)
    (6): ContainerLinear(in_features=46.0, out_features=55.0, bias=True)
    (8): ContainerLinear(in_features=55.0, out_features=48.0, bias=True)
    (10): ContainerLinear(in_features=48.0, out_features=10, bias=True)
final percentage after snap: 0.5465326633165829
Saved result

Training... 117/118

plotting..
finished plotting


[1mEPOCH 1 [0m 




Training... 0/118

Evaluating... 19/20

$  acc/train  |  loss/train  |  loss/test  |  acc/test  |  sparse/weight  |  sparse/node  |  sparse/hm  |  sparse/log_disk_size  |  time/gpu_time  |  time/flops_per_sample  |  time/flops_log_cum 
$  0.2089844  |  1.8764400   |  1.8421321  | 0.2212718  |    0.9173534    |   0.9020000   |  0.3565431  |       12.7237766       |    6.0786732    |      10319.0000000      |      8.7984545      
$ |  cuda/ram_footprint  |  time/batch_time  |  
$ |    227328.0000000    |     0.0131503     |
Training... 117/118



[1mEPOCH 2 [0m 




Training... 0/118

Evaluating... 19/20

$  acc/train  |  loss/train  |  loss/test  |  acc/test  |  sparse/weight  |  sparse/node  |  sparse/hm  |  sparse/log_disk_size  |  time/gpu_time  |  time/flops_per_sample  |  time/flops_log_cum 
$  0.3105469  |  1.6389621   |  1.6840366  | 0.2923713  |    0.9173534    |   0.9020000   |  0.4434196  |       12.72

Training... 117/118



[1mEPOCH 16 [0m 




Training... 0/118

Evaluating... 19/20

$  acc/train  |  loss/train  |  loss/test  |  acc/test  |  sparse/weight  |  sparse/node  |  sparse/hm  |  sparse/log_disk_size  |  time/gpu_time  |  time/flops_per_sample  |  time/flops_log_cum 
$  0.9511719  |  0.2062275   |  0.3299393  | 0.9189338  |    0.9173534    |   0.9020000   |  0.9181429  |       12.7237766       |    5.0909574    |      10319.0000000      |      9.9991395      
$ |  cuda/ram_footprint  |  time/batch_time  |  
$ |    227328.0000000    |     0.0171713     |
Training... 117/118



[1mEPOCH 17 [0m 




Training... 0/118

Evaluating... 19/20

$  acc/train  |  loss/train  |  loss/test  |  acc/test  |  sparse/weight  |  sparse/node  |  sparse/hm  |  sparse/log_disk_size  |  time/gpu_time  |  time/flops_per_sample  |  time/flops_log_cum 
$  0.9101562  |  0.2992357   |  0.3152551  | 0.9195944  |    0.9173534    |   0.9020000   |  0.9184726  |       12.7237766       |    6.0467673 

<Figure size 432x288 with 0 Axes>