#### Import modules

In [None]:
import torch
from torch import nn
import torch.nn.utils.prune as prune
import torch.nn.functional as F
from torch.nn.utils.prune import CustomFromMask
import pandas as pd
import numpy as nps
import matplotlib.pyplot as plt
import torchvision
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import random
import matplotlib.pyplot as plt
import torchvision.models as models
import time
from tqdm.autonotebook import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import inspect
import re

In [None]:
import sys
print(sys.executable)
print(sys.version)
print(sys.version_info)

/usr/bin/python3
3.7.13 (default, Apr 24 2022, 01:04:09) 
[GCC 7.5.0]
sys.version_info(major=3, minor=7, micro=13, releaselevel='final', serial=0)


In [None]:
import matplotlib

In [None]:
matplotlib.__version__

'3.2.2'

#### Functions

In [None]:
def plot_kernels(tensor, num_cols=3):
    if not tensor.ndim==4:
        raise Exception("assumes a 4D tensor")
    if not tensor.shape[-1]==3:
        raise Exception("last dim needs to be 3 to plot")
    num_kernels = tensor.shape[0]
    num_rows = 1+ num_kernels // num_cols
    fig = plt.figure(figsize=(num_cols,num_rows))
    for i in range(tensor.shape[0]):
        ax1 = fig.add_subplot(num_rows,num_cols,i+1)
        ax1.imshow(tensor[i])
        ax1.axis('off')
        ax1.set_xticklabels([])
        ax1.set_yticklabels([])

    plt.subplots_adjust(wspace=0.1, hspace=0.1)
    plt.show()


def resnet_training(train_loader, val_loader, model, epochs, loss_function, optimizer):
    start_ts = time.time()

    losses = []
    accs = []
    batches = len(train_loader)
    val_batches = len(val_loader)

    # loop for every epoch (training + evaluation)
    for epoch in range(epochs):
        total_loss = 0

        # progress bar (works in Jupyter notebook too!)
        progress = tqdm(enumerate(train_loader), desc="Loss: ", total=batches)

    #    ----------------- TRAINING  -------------------- 
        #set model to training
        model.train()
        
        for i, data in progress:
            X, y = data[0].to(device), data[1].to(device)
            
            # training step for single batch
            model.zero_grad()
            outputs = model(X)
            loss = loss_function(outputs, y)
            losses.append(loss.item())
            loss.backward()
            optimizer.step()

            # getting training quality data
            current_loss = loss.item()
            total_loss += current_loss

            _, predicted = torch.max(outputs.data, 1)
            predicted = predicted.to(device)
            # correct = (predicted == data[1]).sum().item()
            correct = (predicted == data[1].to(device))
            correct = correct.sum()
            correct = correct.item()
            accs.append(correct / data[1].to(device).size(0))
            
            # updating progress bar
            progress.set_description("Loss: {:.4f}".format(total_loss/(i+1)))
            
        # releasing unceseccary memory in GPU
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

    print(f"Training time: {time.time()-start_ts}s")

    return losses, accs


def resnet_eval(model):
    val_losses = 0

    precision, recall, f1, accuracy = [], [], [], []

    loss_function = nn.CrossEntropyLoss()
    val_batches = len(val_loader)
    # set model to evaluating (testing)
    model.eval()
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            X, y = data[0].to(device), data[1].to(device)

            outputs = model(X) # this get's the prediction from the network

            val_losses += loss_function(outputs, y)

            predicted_classes = torch.max(outputs, 1)[1] # get class from network's prediction
            
            # calculate P/R/F1/A metrics for batch
            accuracy.append(
                calculate_metric(accuracy_score, y.cpu(), predicted_classes.cpu())
            )
          
    print_scores(precision, recall, f1, accuracy, val_batches)

    return sum(accuracy)/val_batches


def flatten(t):
    t = t.reshape(1, -1)
    t = t.squeeze()
    return t


def get_new_model(learn_rate=0.001):
    model = CifarResNet().to(device)

    loss_func = nn.CrossEntropyLoss()
    optim_func = torch.optim.Adam(model.parameters(), lr=learning_rate)


    return model, loss_func, optim_func

def weight_reset(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
        m.reset_parameters()

def add_masks(model,masks):
    mask_pruner = CustomFromMask(None)
    for module_name, module in model.named_modules():
        if isinstance(module, torch.nn.Conv2d) or isinstance(module, torch.nn.modules.Linear):
            key = f"{module_name}.weight_mask"
            if key in masks:
                if isinstance(module, torch.nn.Conv2d):
                    _mask = masks[key]
                    mask_pruner.apply(module, 'weight', _mask)
                if isinstance(module, torch.nn.Linear):
                    _mask = masks[key]
                    mask_pruner.apply(module, 'weight', _mask)

def calculate_metric(metric_fn, true_y, pred_y):
    if "average" in inspect.getfullargspec(metric_fn).args:
        return metric_fn(true_y, pred_y, average="macro")
    else:
        return metric_fn(true_y, pred_y)
    
def print_scores(p, r, f1, a, batch_size):
    for name, scores in zip(("precision", "recall", "F1", "accuracy"), (p, r, f1, a)):
      if sum(scores)/batch_size > 0.001:
        print(f"\t{name.rjust(14, ' ')}: {sum(scores)/batch_size:.4f}")

#### Other


In [None]:
def get_data_loaders(train_batch_size, val_batch_size):
    transform = transforms.Compose(
        [transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    batch_size = 50

    trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                            download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                              shuffle=True, num_workers=2)

    testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                          download=True, transform=transform)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                            shuffle=False, num_workers=2)
    return trainloader, testloader

def resnet_print_sparcity(model, print_spar=True):
    layers = [(module, 'weight') for module in get_children(model)\
                        if (isinstance(module, torch.nn.modules.conv.Conv2d) or isinstance(module, torch.nn.modules.Linear))]
    if print_spar:  
        for lay in layers:
            print("Sparsity in {}: {:.2f}%".format(
                lay[0],
            100. * float(torch.sum(lay[0].weight == 0))
            / float(lay[0].weight.nelement())
        ))

    w_0 = 0
    w_neel = 0

    for lay in layers:
        w_0 += torch.sum(lay[0].weight == 0)
        w_neel += lay[0].weight.nelement()
    
    glob_sparc = 100. * float(w_0) / float(w_neel)
   
    if print_spar:
        print(
            "Global sparsity: {:.2f}%".format(glob_sparc)
        )

    return glob_sparc

def one_shot_resnet_prune(model_inp, percent):
    model = CifarResNet().to(device)

    model.load_state_dict(model_inp.state_dict())
    parameters_to_prune = [(module, 'weight') for module in get_children(model)\
                       if (isinstance(module, torch.nn.modules.conv.Conv2d) or isinstance(module, torch.nn.modules.Linear))]

    prune.global_unstructured(
        parameters_to_prune,
        pruning_method=prune.L1Unstructured,
        amount=percent,
    )

    return model

def get_children(model: torch.nn.Module):
    # get children form model!
    children = list(model.children())
    flatt_children = []
    if children == []:
        # if model has no children; model is last child! :O
        return model
    else:
       # look for children from children... to the last child!
       for child in children:
            try:
                flatt_children.extend(get_children(child))
            except TypeError:
                flatt_children.append(get_children(child))
    return flatt_children

In [None]:
def get_resnet_modules_for_prune(model):
    return [(module, 'weight') for module in get_children(model) if (isinstance(module, torch.nn.modules.conv.Conv2d) or isinstance(module, torch.nn.modules.Linear))]

def get_resnet_modules(model):
    return [module for module in get_children(model) if (isinstance(module, torch.nn.modules.conv.Conv2d) or isinstance(module, torch.nn.modules.Linear))]

In [None]:
class CifarResNet(nn.Module):
  def __init__(self, in_channels=3):
    super(CifarResNet, self).__init__()

    # Load a pretrained resnet model from torchvision.models in Pytorch
    self.model = models.resnet18(pretrained=False)

    # Change the input layer to take Grayscale image, instead of RGB images. 
    # Hence in_channels is set as 1 or 3 respectively
    # original definition of the first layer on the ResNet class
    # self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
    self.model.conv1 = nn.Conv2d(in_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
    
    # Change the output layer to output 10 classes instead of 1000 classes
    num_ftrs = self.model.fc.in_features
    self.model.fc = nn.Linear(num_ftrs, 10)

  def forward(self, x):
    return self.model(x)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"[INFO]: Computation device: {device}")

num_epochs = 5 
learning_rate = 0.001
batch_size=50

[INFO]: Computation device: cuda


In [None]:
train_loader, val_loader = get_data_loaders(batch_size, batch_size)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [None]:
untr_model = CifarResNet().to(device)
model, loss_func, optim_func = get_new_model()
untr_model.load_state_dict(model.state_dict())

<All keys matched successfully>

In [None]:
indx = 0

untr_modules = get_resnet_modules(untr_model)
untr_1l = untr_modules[indx].weight.data
untr_1l_distr = F.log_softmax(flatten(untr_1l), -1)

modules = get_resnet_modules(model)
model_1l = modules[indx].weight.data
model_1l_distr = F.softmax(flatten(model_1l), -1)

torch.nn.KLDivLoss(reduction="sum")(untr_1l_distr, model_1l_distr).item()

-4.4430566958908457e-07

In [None]:
# distances non trained vs trained
results_bp = dict()
# distances non trained vs pruned+trained
results_ap = dict()

results_ap_masked_weights = dict()

accs = []
for name, module in untr_model.named_modules():
    if isinstance(module, torch.nn.Conv2d) or isinstance(module, torch.nn.modules.Linear):
            results_bp[name] = []
            results_ap[name] = []
            results_ap_masked_weights[name] = []

for i in range(10):
    print(i)

    # creating untrained model and model to be trained
    untr_model = CifarResNet().to(device)
    model, loss_func, optim_func = get_new_model()
    untr_model.load_state_dict(model.state_dict())

    resnet_training(train_loader, val_loader, model, num_epochs, loss_func, optim_func)

    untr_modules = get_resnet_modules(untr_model)
    modules = get_resnet_modules(model)

    # calculating information distances for layers between untrained and trained models
    for indx in range(len(untr_modules)):
        
        untr_1l = untr_modules[indx].weight.data
        untr_1l_distr = F.log_softmax(flatten(untr_1l), -1)
        
        model_1l = modules[indx].weight.data
        model_1l_distr = F.softmax(flatten(model_1l), -1)

        results_bp[list([i for i in results_ap.keys()])[indx]].append(torch.nn.KLDivLoss(reduction="sum")(untr_1l_distr, model_1l_distr).item())

    # creating model to be pruned with weights of trained model
    model_topr = CifarResNet().to(device)
    model_topr.load_state_dict(model.state_dict())

    model_topr_80 = one_shot_resnet_prune(model_topr, 0.8)


    # saving masks for layers of pruned model to apply them for the next models
    masks = dict()
    for name, mask in model_topr_80.named_buffers():
        if isinstance(module, torch.nn.Conv2d) or isinstance(module, torch.nn.modules.Linear):
            masks[name] = mask.to(device)


    # create model with untrained model weights and apply pruning mask saved before
    reset_model = CifarResNet().to(device)
    reset_model.load_state_dict(untr_model.state_dict())
    add_masks(reset_model, masks)

    # train model with masked weights
    optim_func = torch.optim.Adam(reset_model.parameters(), lr=learning_rate)
    resnet_training(train_loader, val_loader, reset_model, num_epochs, loss_func, optim_func)

    # calculating information distances for layers between untrained and pruned + trained models
    reset_modules = get_resnet_modules(reset_model)
    for indx in range(len(untr_modules)):

        untr_1l_masked = untr_modules[indx].weight.data
        untr_1l_masked_distr = F.log_softmax(flatten(untr_1l_masked), -1)

        reset_model_1l = reset_modules[indx].weight.data
        reset_model_1l_distr = F.softmax(flatten(reset_model_1l), -1)

        results_ap_masked_weights[list([i for i in results_ap_masked_weights.keys()])[indx]].append(torch.nn.KLDivLoss(reduction="sum")(untr_1l_masked_distr, reset_model_1l_distr).item())


    # add mask to untrained model for calculating information
    untr_model_masked = CifarResNet().to(device)
    untr_model_masked.load_state_dict(untr_model.state_dict())
    add_masks(untr_model_masked, masks)

    untr_model_masked_modules = get_resnet_modules(untr_model_masked)
    for indx in range(len(untr_modules)):

        untr_1l = untr_model_masked_modules[indx].weight.data
        untr_1l_distr = F.log_softmax(flatten(untr_1l), -1)

        reset_model_1l = reset_modules[indx].weight.data
        reset_model_1l_distr = F.softmax(flatten(reset_model_1l), -1)

        results_ap[list([i for i in results_ap.keys()])[indx]].append(torch.nn.KLDivLoss(reduction="sum")(untr_1l_distr, reset_model_1l_distr).item())


    print('results_bp: ', results_bp)
    print('results_ap: ', results_ap)
    print('results_ap_masked_weights: ', results_ap_masked_weights)
    
    if i > 1:
        results_bp_df = pd.DataFrame.from_dict(results_bp)
        results_ap_df = pd.DataFrame.from_dict(results_ap)
        results_ap_masked_weights_df = pd.DataFrame.from_dict(results_ap_masked_weights)

        results_bp_df.to_csv(f'results_bp_df_{i+1}(5ep)_3dec.csv')
        results_ap_df.to_csv(f'results_ap_df_{i+1}(5ep)_3dec.csv')
        results_ap_masked_weights_df.to_csv(f'results_ap_masked_weights_df_{i+1}(5ep)_3dec.csv')


0


Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Training time: 387.2216103076935s


Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Training time: 403.5275721549988s
results_bp:  {'model.conv1': [0.11307872086763382], 'model.layer1.0.conv1': [0.06410016864538193], 'model.layer1.0.conv2': [0.07414640486240387], 'model.layer1.1.conv1': [0.06642788648605347], 'model.layer1.1.conv2': [0.07232633978128433], 'model.layer2.0.conv1': [0.10677914321422577], 'model.layer2.0.conv2': [0.1137218326330185], 'model.layer2.0.downsample.0': [0.15578684210777283], 'model.layer2.1.conv1': [0.042261216789484024], 'model.layer2.1.conv2': [0.03841032832860947], 'model.layer3.0.conv1': [0.10068082809448242], 'model.layer3.0.conv2': [0.04259492829442024], 'model.layer3.0.downsample.0': [0.07897263765335083], 'model.layer3.1.conv1': [0.0069086188450455666], 'model.layer3.1.conv2': [0.00597959291189909], 'model.layer4.0.conv1': [0.009717661887407303], 'model.layer4.0.conv2': [0.0015595073346048594], 'model.layer4.0.downsample.0': [0.030530819669365883], 'model.layer4.1.conv1': [0.00034805515315383673], 'model.layer4.1.conv2': [0.00035278307

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Training time: 388.55806970596313s


Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Training time: 404.8442041873932s
results_bp:  {'model.conv1': [0.11307872086763382, 0.11307399719953537], 'model.layer1.0.conv1': [0.06410016864538193, 0.0701621025800705], 'model.layer1.0.conv2': [0.07414640486240387, 0.08460833877325058], 'model.layer1.1.conv1': [0.06642788648605347, 0.05323853716254234], 'model.layer1.1.conv2': [0.07232633978128433, 0.058503128588199615], 'model.layer2.0.conv1': [0.10677914321422577, 0.10520866513252258], 'model.layer2.0.conv2': [0.1137218326330185, 0.11070676147937775], 'model.layer2.0.downsample.0': [0.15578684210777283, 0.170308917760849], 'model.layer2.1.conv1': [0.042261216789484024, 0.04595478996634483], 'model.layer2.1.conv2': [0.03841032832860947, 0.04019151255488396], 'model.layer3.0.conv1': [0.10068082809448242, 0.11203797161579132], 'model.layer3.0.conv2': [0.04259492829442024, 0.0491812601685524], 'model.layer3.0.downsample.0': [0.07897263765335083, 0.08299034833908081], 'model.layer3.1.conv1': [0.0069086188450455666, 0.0094403540715575

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Training time: 388.268253326416s


Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Training time: 405.37857723236084s
results_bp:  {'model.conv1': [0.11307872086763382, 0.11307399719953537, 0.11509300023317337], 'model.layer1.0.conv1': [0.06410016864538193, 0.0701621025800705, 0.05330277606844902], 'model.layer1.0.conv2': [0.07414640486240387, 0.08460833877325058, 0.07112552225589752], 'model.layer1.1.conv1': [0.06642788648605347, 0.05323853716254234, 0.05736185237765312], 'model.layer1.1.conv2': [0.07232633978128433, 0.058503128588199615, 0.05551191791892052], 'model.layer2.0.conv1': [0.10677914321422577, 0.10520866513252258, 0.11819308996200562], 'model.layer2.0.conv2': [0.1137218326330185, 0.11070676147937775, 0.12104196101427078], 'model.layer2.0.downsample.0': [0.15578684210777283, 0.170308917760849, 0.1624545156955719], 'model.layer2.1.conv1': [0.042261216789484024, 0.04595478996634483, 0.04992007836699486], 'model.layer2.1.conv2': [0.03841032832860947, 0.04019151255488396, 0.04281695559620857], 'model.layer3.0.conv1': [0.10068082809448242, 0.11203797161579132,

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Training time: 387.71938276290894s


Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Training time: 405.2718622684479s
results_bp:  {'model.conv1': [0.11307872086763382, 0.11307399719953537, 0.11509300023317337, 0.09346960484981537], 'model.layer1.0.conv1': [0.06410016864538193, 0.0701621025800705, 0.05330277606844902, 0.054324544966220856], 'model.layer1.0.conv2': [0.07414640486240387, 0.08460833877325058, 0.07112552225589752, 0.06467989832162857], 'model.layer1.1.conv1': [0.06642788648605347, 0.05323853716254234, 0.05736185237765312, 0.04322469234466553], 'model.layer1.1.conv2': [0.07232633978128433, 0.058503128588199615, 0.05551191791892052, 0.05260487645864487], 'model.layer2.0.conv1': [0.10677914321422577, 0.10520866513252258, 0.11819308996200562, 0.09693996608257294], 'model.layer2.0.conv2': [0.1137218326330185, 0.11070676147937775, 0.12104196101427078, 0.11286728084087372], 'model.layer2.0.downsample.0': [0.15578684210777283, 0.170308917760849, 0.1624545156955719, 0.1428021639585495], 'model.layer2.1.conv1': [0.042261216789484024, 0.04595478996634483, 0.04992007

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Training time: 391.8400077819824s


Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Training time: 403.5146162509918s
results_bp:  {'model.conv1': [0.11307872086763382, 0.11307399719953537, 0.11509300023317337, 0.09346960484981537, 0.1092023253440857], 'model.layer1.0.conv1': [0.06410016864538193, 0.0701621025800705, 0.05330277606844902, 0.054324544966220856, 0.0698663517832756], 'model.layer1.0.conv2': [0.07414640486240387, 0.08460833877325058, 0.07112552225589752, 0.06467989832162857, 0.08158469200134277], 'model.layer1.1.conv1': [0.06642788648605347, 0.05323853716254234, 0.05736185237765312, 0.04322469234466553, 0.0680694654583931], 'model.layer1.1.conv2': [0.07232633978128433, 0.058503128588199615, 0.05551191791892052, 0.05260487645864487, 0.07643716037273407], 'model.layer2.0.conv1': [0.10677914321422577, 0.10520866513252258, 0.11819308996200562, 0.09693996608257294, 0.11210925877094269], 'model.layer2.0.conv2': [0.1137218326330185, 0.11070676147937775, 0.12104196101427078, 0.11286728084087372, 0.11628363281488419], 'model.layer2.0.downsample.0': [0.1557868421077

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Training time: 389.8110315799713s


Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Training time: 407.2298491001129s
results_bp:  {'model.conv1': [0.11307872086763382, 0.11307399719953537, 0.11509300023317337, 0.09346960484981537, 0.1092023253440857, 0.10850817710161209], 'model.layer1.0.conv1': [0.06410016864538193, 0.0701621025800705, 0.05330277606844902, 0.054324544966220856, 0.0698663517832756, 0.05292492359876633], 'model.layer1.0.conv2': [0.07414640486240387, 0.08460833877325058, 0.07112552225589752, 0.06467989832162857, 0.08158469200134277, 0.06788699328899384], 'model.layer1.1.conv1': [0.06642788648605347, 0.05323853716254234, 0.05736185237765312, 0.04322469234466553, 0.0680694654583931, 0.06420949101448059], 'model.layer1.1.conv2': [0.07232633978128433, 0.058503128588199615, 0.05551191791892052, 0.05260487645864487, 0.07643716037273407, 0.06912991404533386], 'model.layer2.0.conv1': [0.10677914321422577, 0.10520866513252258, 0.11819308996200562, 0.09693996608257294, 0.11210925877094269, 0.11328472942113876], 'model.layer2.0.conv2': [0.1137218326330185, 0.1107

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Training time: 388.8594732284546s


Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Training time: 406.031619310379s
results_bp:  {'model.conv1': [0.11307872086763382, 0.11307399719953537, 0.11509300023317337, 0.09346960484981537, 0.1092023253440857, 0.10850817710161209, 0.10719019174575806], 'model.layer1.0.conv1': [0.06410016864538193, 0.0701621025800705, 0.05330277606844902, 0.054324544966220856, 0.0698663517832756, 0.05292492359876633, 0.04930983483791351], 'model.layer1.0.conv2': [0.07414640486240387, 0.08460833877325058, 0.07112552225589752, 0.06467989832162857, 0.08158469200134277, 0.06788699328899384, 0.061736252158880234], 'model.layer1.1.conv1': [0.06642788648605347, 0.05323853716254234, 0.05736185237765312, 0.04322469234466553, 0.0680694654583931, 0.06420949101448059, 0.04385153949260712], 'model.layer1.1.conv2': [0.07232633978128433, 0.058503128588199615, 0.05551191791892052, 0.05260487645864487, 0.07643716037273407, 0.06912991404533386, 0.04843108728528023], 'model.layer2.0.conv1': [0.10677914321422577, 0.10520866513252258, 0.11819308996200562, 0.09693996

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Training time: 385.3841574192047s


Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Training time: 405.4076340198517s
results_bp:  {'model.conv1': [0.11307872086763382, 0.11307399719953537, 0.11509300023317337, 0.09346960484981537, 0.1092023253440857, 0.10850817710161209, 0.10719019174575806, 0.09736466407775879], 'model.layer1.0.conv1': [0.06410016864538193, 0.0701621025800705, 0.05330277606844902, 0.054324544966220856, 0.0698663517832756, 0.05292492359876633, 0.04930983483791351, 0.0551266223192215], 'model.layer1.0.conv2': [0.07414640486240387, 0.08460833877325058, 0.07112552225589752, 0.06467989832162857, 0.08158469200134277, 0.06788699328899384, 0.061736252158880234, 0.07041127979755402], 'model.layer1.1.conv1': [0.06642788648605347, 0.05323853716254234, 0.05736185237765312, 0.04322469234466553, 0.0680694654583931, 0.06420949101448059, 0.04385153949260712, 0.05051403492689133], 'model.layer1.1.conv2': [0.07232633978128433, 0.058503128588199615, 0.05551191791892052, 0.05260487645864487, 0.07643716037273407, 0.06912991404533386, 0.04843108728528023, 0.0495059452950

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Training time: 390.43106603622437s


Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Training time: 407.7712097167969s
results_bp:  {'model.conv1': [0.11307872086763382, 0.11307399719953537, 0.11509300023317337, 0.09346960484981537, 0.1092023253440857, 0.10850817710161209, 0.10719019174575806, 0.09736466407775879, 0.10501189529895782], 'model.layer1.0.conv1': [0.06410016864538193, 0.0701621025800705, 0.05330277606844902, 0.054324544966220856, 0.0698663517832756, 0.05292492359876633, 0.04930983483791351, 0.0551266223192215, 0.0704306811094284], 'model.layer1.0.conv2': [0.07414640486240387, 0.08460833877325058, 0.07112552225589752, 0.06467989832162857, 0.08158469200134277, 0.06788699328899384, 0.061736252158880234, 0.07041127979755402, 0.08027549088001251], 'model.layer1.1.conv1': [0.06642788648605347, 0.05323853716254234, 0.05736185237765312, 0.04322469234466553, 0.0680694654583931, 0.06420949101448059, 0.04385153949260712, 0.05051403492689133, 0.04524550214409828], 'model.layer1.1.conv2': [0.07232633978128433, 0.058503128588199615, 0.05551191791892052, 0.05260487645864

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Training time: 388.77348828315735s


Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Loss:   0%|          | 0/1000 [00:00<?, ?it/s]

Training time: 405.4308624267578s
results_bp:  {'model.conv1': [0.11307872086763382, 0.11307399719953537, 0.11509300023317337, 0.09346960484981537, 0.1092023253440857, 0.10850817710161209, 0.10719019174575806, 0.09736466407775879, 0.10501189529895782, 0.09220292419195175], 'model.layer1.0.conv1': [0.06410016864538193, 0.0701621025800705, 0.05330277606844902, 0.054324544966220856, 0.0698663517832756, 0.05292492359876633, 0.04930983483791351, 0.0551266223192215, 0.0704306811094284, 0.057658061385154724], 'model.layer1.0.conv2': [0.07414640486240387, 0.08460833877325058, 0.07112552225589752, 0.06467989832162857, 0.08158469200134277, 0.06788699328899384, 0.061736252158880234, 0.07041127979755402, 0.08027549088001251, 0.07445858418941498], 'model.layer1.1.conv1': [0.06642788648605347, 0.05323853716254234, 0.05736185237765312, 0.04322469234466553, 0.0680694654583931, 0.06420949101448059, 0.04385153949260712, 0.05051403492689133, 0.04524550214409828, 0.0689506083726883], 'model.layer1.1.conv2