In [3]:
import sparseml
import sparsezoo
import torch
import torchvision
print(torch.__version__)

1.12.1+cu116


In [4]:
NUM_LABELS = 3
model = torchvision.models.resnet50(weights=torchvision.models.ResNet50_Weights.DEFAULT)
model.fc = torch.nn.Linear(model.fc.in_features, NUM_LABELS)
print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [5]:
import datasets
beans_dataset = datasets.load_dataset("beans")
print(beans_dataset["train"][0]["image_file_path"])
print(beans_dataset["validation"][0]["image_file_path"])

Using custom data configuration default
Reusing dataset beans (/home/ubuntu/.cache/huggingface/datasets/beans/default/0.0.0/d5abfbb94de45599d871182cff389bc870bf145b4829c4b0fe20f0cccd637cbd)


  0%|          | 0/3 [00:00<?, ?it/s]

/home/ubuntu/.cache/huggingface/datasets/downloads/extracted/eeb026374cf5ecfd5f40131a3159be9b9055ac21a3da11690e7eb4d117c99eee/train/bean_rust/bean_rust_train.84.jpg
/home/ubuntu/.cache/huggingface/datasets/downloads/extracted/f287261265d2f9a3e8f87a5526a54d1847b17f7c3ec5714e5719432f2b3e4a73/validation/bean_rust/bean_rust_val.36.jpg


In [6]:
from torchvision import transforms

imagenet_transform = transforms.Compose([
   transforms.Resize(size=256, interpolation=transforms.InterpolationMode.BILINEAR, max_size=None, antialias=None),
   transforms.CenterCrop(size=(224, 224)),
   transforms.ToTensor(),
   transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataset = torchvision.datasets.ImageFolder(
    root="/home/ubuntu/.cache/huggingface/datasets/downloads/extracted/eeb026374cf5ecfd5f40131a3159be9b9055ac21a3da11690e7eb4d117c99eee/train",
    transform=imagenet_transform
)

val_dataset = torchvision.datasets.ImageFolder(
    root="/home/ubuntu/.cache/huggingface/datasets/downloads/extracted/f287261265d2f9a3e8f87a5526a54d1847b17f7c3ec5714e5719432f2b3e4a73/validation",
    transform=imagenet_transform
)

In [7]:
from tqdm.auto import tqdm
import math

def run_model_one_epoch(model, data_loader, criterion, device, train=False, optimizer=None):
    if train:
        model.train()
    else:
        model.eval()

    running_loss = 0.0
    total_correct = 0
    total_predictions = 0

    # loop through batches
    for step, (inputs, labels) in tqdm(enumerate(data_loader), total=len(data_loader)):
        inputs = inputs.to(device)
        labels = labels.to(device)

        if train:
            optimizer.zero_grad()

        # compute loss, run backpropogation
        outputs = model(inputs)  # model returns logits
        loss = criterion(outputs, labels)
        if train:
            loss.backward()
            optimizer.step()

        running_loss += loss.item()

        # run evaluation
        predictions = outputs.argmax(dim=1)
        total_correct += torch.sum(predictions == labels).item()
        total_predictions += inputs.size(0)

    # return loss and evaluation metric
    loss = running_loss / (step + 1.0)
    accuracy = total_correct / total_predictions
    return loss, accuracy

In [8]:
from torch.utils.data import DataLoader
from torch.nn import CrossEntropyLoss
from torch.optim import Adam

# hyperparameters
BATCH_SIZE = 64

# setup device
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
print(f"Using device: {device}")

# setup data loaders
train_loader = DataLoader(
    train_dataset, BATCH_SIZE, shuffle=True, pin_memory=True, num_workers=16
)
val_loader = DataLoader(
    val_dataset, BATCH_SIZE, shuffle=False, pin_memory=True, num_workers=16
)

# setup loss function and optimizer, LR will be overriden by sparseml
criterion = CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=8e-3)

Using device: cuda


In [9]:
!cat ./dense_model/dense-recipe.yaml


# Epoch and Learning-Rate variables
num_epochs: 10.0
init_lr: 0.0005

training_modifiers:
  - !EpochRangeModifier
    start_epoch: 0.0
    end_epoch: eval(num_epochs)

  - !LearningRateFunctionModifier
    final_lr: 0.0
    init_lr: eval(init_lr)
    lr_func: cosine
    start_epoch: 0.0
    end_epoch: eval(num_epochs)

# Phase 1 Sparse Transfer Learning / Recovery
sparse_transfer_learning_modifiers:
  - !ConstantPruningModifier
    start_epoch: 0.0
    params: __ALL_PRUNABLE__


In [10]:
from sparseml.pytorch.optim import ScheduledModifierManager

# create ScheduledModifierManager and Optimizer wrapper
manager = ScheduledModifierManager.from_yaml("./dense_model/dense-recipe.yaml")
optimizer = manager.modify(model, optimizer, steps_per_epoch=len(train_loader))

In [11]:
from sparseml.pytorch.utils import get_prunable_layers, tensor_sparsity

# print sparsities of each layer
for (name, layer) in get_prunable_layers(model):
    print(f"{name}.weight: {tensor_sparsity(layer.weight).item():.4f}")

conv1.weight: 0.0000
layer1.0.conv1.weight: 0.0000
layer1.0.conv2.weight: 0.0000
layer1.0.conv3.weight: 0.0000
layer1.0.downsample.0.weight: 0.0000
layer1.1.conv1.weight: 0.0000
layer1.1.conv2.weight: 0.0000
layer1.1.conv3.weight: 0.0000
layer1.2.conv1.weight: 0.0000
layer1.2.conv2.weight: 0.0000
layer1.2.conv3.weight: 0.0000
layer2.0.conv1.weight: 0.0000
layer2.0.conv2.weight: 0.0000
layer2.0.conv3.weight: 0.0000
layer2.0.downsample.0.weight: 0.0000
layer2.1.conv1.weight: 0.0000
layer2.1.conv2.weight: 0.0000
layer2.1.conv3.weight: 0.0000
layer2.2.conv1.weight: 0.0000
layer2.2.conv2.weight: 0.0000
layer2.2.conv3.weight: 0.0000
layer2.3.conv1.weight: 0.0000
layer2.3.conv2.weight: 0.0000
layer2.3.conv3.weight: 0.0000
layer3.0.conv1.weight: 0.0000
layer3.0.conv2.weight: 0.0000
layer3.0.conv3.weight: 0.0000
layer3.0.downsample.0.weight: 0.0000
layer3.1.conv1.weight: 0.0000
layer3.1.conv2.weight: 0.0000
layer3.1.conv3.weight: 0.0000
layer3.2.conv1.weight: 0.0000
layer3.2.conv2.weight: 0.000

In [12]:
# run transfer learning
epoch = 0
for epoch in range(manager.max_epochs):
    # run training loop
    epoch_name = f"{epoch + 1}/{manager.max_epochs}"
    print(f"Running Training Epoch {epoch_name}")
    train_loss, train_acc = run_model_one_epoch(
        model, train_loader, criterion, device, train=True, optimizer=optimizer
    )
    print(
        f"Training Epoch: {epoch_name}\nTraining Loss: {train_loss}\nTop 1 Acc: {train_acc}\n"
    )

    # run validation loop
    print(f"Running Validation Epoch {epoch_name}")
    val_loss, val_acc = run_model_one_epoch(model, val_loader, criterion, device)
    print(
        f"Validation Epoch: {epoch_name}\nVal Loss: {val_loss}\nTop 1 Acc: {val_acc}\n"
    )

manager.finalize(model)

Running Training Epoch 1/10


  0%|          | 0/17 [00:00<?, ?it/s]

Training Epoch: 1/10
Training Loss: 0.4339083959074581
Top 1 Acc: 0.8404255319148937

Running Validation Epoch 1/10


  0%|          | 0/3 [00:00<?, ?it/s]

Validation Epoch: 1/10
Val Loss: 0.248091588417689
Top 1 Acc: 0.9548872180451128

Running Training Epoch 2/10


  0%|          | 0/17 [00:00<?, ?it/s]

Training Epoch: 2/10
Training Loss: 0.06769604531719405
Top 1 Acc: 0.9816247582205029

Running Validation Epoch 2/10


  0%|          | 0/3 [00:00<?, ?it/s]

Validation Epoch: 2/10
Val Loss: 0.16374425676379664
Top 1 Acc: 0.924812030075188

Running Training Epoch 3/10


  0%|          | 0/17 [00:00<?, ?it/s]

Training Epoch: 3/10
Training Loss: 0.02384376264971626
Top 1 Acc: 0.9932301740812379

Running Validation Epoch 3/10


  0%|          | 0/3 [00:00<?, ?it/s]

Validation Epoch: 3/10
Val Loss: 0.049745342917352296
Top 1 Acc: 0.9849624060150376

Running Training Epoch 4/10


  0%|          | 0/17 [00:00<?, ?it/s]

Training Epoch: 4/10
Training Loss: 0.09208150484653957
Top 1 Acc: 0.9922630560928434

Running Validation Epoch 4/10


  0%|          | 0/3 [00:00<?, ?it/s]

Validation Epoch: 4/10
Val Loss: 0.0414201499883499
Top 1 Acc: 0.9774436090225563

Running Training Epoch 5/10


  0%|          | 0/17 [00:00<?, ?it/s]

Training Epoch: 5/10
Training Loss: 0.05627283230697846
Top 1 Acc: 0.9835589941972921

Running Validation Epoch 5/10


  0%|          | 0/3 [00:00<?, ?it/s]

Validation Epoch: 5/10
Val Loss: 0.018425415522263695
Top 1 Acc: 0.9849624060150376

Running Training Epoch 6/10


  0%|          | 0/17 [00:00<?, ?it/s]

Training Epoch: 6/10
Training Loss: 0.01825398319017361
Top 1 Acc: 0.995164410058027

Running Validation Epoch 6/10


  0%|          | 0/3 [00:00<?, ?it/s]

Validation Epoch: 6/10
Val Loss: 0.03305983874209536
Top 1 Acc: 0.9849624060150376

Running Training Epoch 7/10


  0%|          | 0/17 [00:00<?, ?it/s]

Training Epoch: 7/10
Training Loss: 0.019919913084081867
Top 1 Acc: 0.9980657640232108

Running Validation Epoch 7/10


  0%|          | 0/3 [00:00<?, ?it/s]

Validation Epoch: 7/10
Val Loss: 0.009156467043794692
Top 1 Acc: 1.0

Running Training Epoch 8/10


  0%|          | 0/17 [00:00<?, ?it/s]

Training Epoch: 8/10
Training Loss: 0.027912832608254737
Top 1 Acc: 0.9990328820116054

Running Validation Epoch 8/10


  0%|          | 0/3 [00:00<?, ?it/s]

Validation Epoch: 8/10
Val Loss: 0.008448541047982872
Top 1 Acc: 1.0

Running Training Epoch 9/10


  0%|          | 0/17 [00:00<?, ?it/s]

Training Epoch: 9/10
Training Loss: 0.006125200964400873
Top 1 Acc: 1.0

Running Validation Epoch 9/10


  0%|          | 0/3 [00:00<?, ?it/s]

Validation Epoch: 9/10
Val Loss: 0.006565663294168189
Top 1 Acc: 1.0

Running Training Epoch 10/10


  0%|          | 0/17 [00:00<?, ?it/s]

Training Epoch: 10/10
Training Loss: 0.003272765227045645
Top 1 Acc: 1.0

Running Validation Epoch 10/10


  0%|          | 0/3 [00:00<?, ?it/s]

Validation Epoch: 10/10
Val Loss: 0.007307609868197081
Top 1 Acc: 1.0



In [13]:
from sparseml.pytorch.utils import ModuleExporter

save_dir = "dense_model"
exporter = ModuleExporter(model, output_dir=save_dir)
exporter.export_pytorch(name="resnet-50-dense-beans.pth")

In [36]:
%cat recipe-0.yaml

# Epoch and Learning-Rate variables
num_epochs: 10.0
init_lr: 0.00025
inter_func: cubic
mask_type: unstructured

training_modifiers:
  - !EpochRangeModifier
    start_epoch: 0.0
    end_epoch: eval(num_epochs)

  - !LearningRateFunctionModifier
    final_lr: 0.0
    init_lr: eval(init_lr)
    lr_func: cosine
    start_epoch: 0.0
    end_epoch: eval(num_epochs)

# Pruning
pruning_modifiers:
   - !GlobalMagnitudePruningModifier
     init_sparsity: 0.05
     final_sparsity: 0.90
     start_epoch: 0.0
     end_epoch: eval(num_epochs)
     update_frequency: 1.0
     params: 
        - 're:layer1.*.conv1.weight'
        - 're:layer1.*.conv2.weight'
        - 're:layer1.*.conv3.weight'
        - 're:layer1.0.downsample.0.weight'
        - 're:layer2.*.conv1.weight'
        - 're:layer2.*.conv2.weight'
        - 're:layer2.*.conv3.weight'
        - 're:layer2.0.downsample.0.weight'
        - 're:layer3.*.conv1.weight'
        - 're:layer3.*.conv2.weight'
  

In [37]:
checkpoint = torch.load("./dense_model/training/resnet-50-dense-beans.pth")
model = torchvision.models.resnet50()
model.fc = torch.nn.Linear(model.fc.in_features, NUM_LABELS)
model.load_state_dict(checkpoint['state_dict'])
model.to(device)

# setup loss function and optimizer, LR will be overriden by sparseml
criterion = CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=8e-3)

In [38]:
print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [39]:
# create ScheduledModifierManager and Optimizer wrapper
manager = ScheduledModifierManager.from_yaml("./recipe-0.yaml")
optimizer = manager.modify(model, optimizer, steps_per_epoch=len(train_loader))

In [40]:
for (name, layer) in get_prunable_layers(model):
    print(f"{name}.weight: {tensor_sparsity(layer.weight).item():.4f}")

conv1.weight: 0.0000
layer1.0.conv1.weight: 0.0000
layer1.0.conv2.weight: 0.0000
layer1.0.conv3.weight: 0.0000
layer1.0.downsample.0.weight: 0.0000
layer1.1.conv1.weight: 0.0000
layer1.1.conv2.weight: 0.0000
layer1.1.conv3.weight: 0.0000
layer1.2.conv1.weight: 0.0000
layer1.2.conv2.weight: 0.0000
layer1.2.conv3.weight: 0.0000
layer2.0.conv1.weight: 0.0000
layer2.0.conv2.weight: 0.0000
layer2.0.conv3.weight: 0.0000
layer2.0.downsample.0.weight: 0.0000
layer2.1.conv1.weight: 0.0000
layer2.1.conv2.weight: 0.0000
layer2.1.conv3.weight: 0.0000
layer2.2.conv1.weight: 0.0000
layer2.2.conv2.weight: 0.0000
layer2.2.conv3.weight: 0.0000
layer2.3.conv1.weight: 0.0000
layer2.3.conv2.weight: 0.0000
layer2.3.conv3.weight: 0.0000
layer3.0.conv1.weight: 0.0000
layer3.0.conv2.weight: 0.0000
layer3.0.conv3.weight: 0.0000
layer3.0.downsample.0.weight: 0.0000
layer3.1.conv1.weight: 0.0000
layer3.1.conv2.weight: 0.0000
layer3.1.conv3.weight: 0.0000
layer3.2.conv1.weight: 0.0000
layer3.2.conv2.weight: 0.000

In [41]:
# run transfer learning
epoch = 0
for epoch in range(manager.max_epochs):
    # run training loop
    epoch_name = f"{epoch + 1}/{manager.max_epochs}"
    print(f"Running Training Epoch {epoch_name}")
    train_loss, train_acc = run_model_one_epoch(
        model, train_loader, criterion, device, train=True, optimizer=optimizer
    )
    print(
        f"Training Epoch: {epoch_name}\nTraining Loss: {train_loss}\nTop 1 Acc: {train_acc}\n"
    )

    # run validation loop
    print(f"Running Validation Epoch {epoch_name}")
    val_loss, val_acc = run_model_one_epoch(model, val_loader, criterion, device)
    print(
        f"Validation Epoch: {epoch_name}\nVal Loss: {val_loss}\nTop 1 Acc: {val_acc}\n"
    )
    
    print(f"\n\nChecking Sparsity Level:")
    for (name, layer) in get_prunable_layers(model):
        print(f"{name}.weight: {tensor_sparsity(layer.weight).item():.4f}")
    print(f"\n")
    
manager.finalize(model)

Running Training Epoch 1/10


  0%|          | 0/17 [00:00<?, ?it/s]

Training Epoch: 1/10
Training Loss: 0.009745531282319194
Top 1 Acc: 0.9980657640232108

Running Validation Epoch 1/10


  0%|          | 0/3 [00:00<?, ?it/s]

Validation Epoch: 1/10
Val Loss: 0.03642222502579292
Top 1 Acc: 0.9849624060150376



Checking Sparsity Level:
conv1.weight: 0.0000
layer1.0.conv1.weight: 0.0156
layer1.0.conv2.weight: 0.0299
layer1.0.conv3.weight: 0.0468
layer1.0.downsample.0.weight: 0.0452
layer1.1.conv1.weight: 0.0623
layer1.1.conv2.weight: 0.0414
layer1.1.conv3.weight: 0.0328
layer1.2.conv1.weight: 0.0204
layer1.2.conv2.weight: 0.0228
layer1.2.conv3.weight: 0.0385
layer2.0.conv1.weight: 0.0201
layer2.0.conv2.weight: 0.0337
layer2.0.conv3.weight: 0.0354
layer2.0.downsample.0.weight: 0.0452
layer2.1.conv1.weight: 0.0430
layer2.1.conv2.weight: 0.0410
layer2.1.conv3.weight: 0.0340
layer2.2.conv1.weight: 0.0350
layer2.2.conv2.weight: 0.0398
layer2.2.conv3.weight: 0.0384
layer2.3.conv1.weight: 0.0297
layer2.3.conv2.weight: 0.0348
layer2.3.conv3.weight: 0.0418
layer3.0.conv1.weight: 0.0280
layer3.0.conv2.weight: 0.0441
layer3.0.conv3.weight: 0.0345
layer3.0.downsample.0.weight: 0.0551
layer3.1.conv1.weight: 0.0492
layer3.

  0%|          | 0/17 [00:00<?, ?it/s]

Training Epoch: 2/10
Training Loss: 0.015113459074414363
Top 1 Acc: 0.9961315280464217

Running Validation Epoch 2/10


  0%|          | 0/3 [00:00<?, ?it/s]

Validation Epoch: 2/10
Val Loss: 0.09295554442602831
Top 1 Acc: 0.9774436090225563



Checking Sparsity Level:
conv1.weight: 0.0000
layer1.0.conv1.weight: 0.1057
layer1.0.conv2.weight: 0.1675
layer1.0.conv3.weight: 0.1412
layer1.0.downsample.0.weight: 0.1352
layer1.1.conv1.weight: 0.1551
layer1.1.conv2.weight: 0.1612
layer1.1.conv3.weight: 0.1602
layer1.2.conv1.weight: 0.1138
layer1.2.conv2.weight: 0.1321
layer1.2.conv3.weight: 0.1959
layer2.0.conv1.weight: 0.1172
layer2.0.conv2.weight: 0.1901
layer2.0.conv3.weight: 0.1868
layer2.0.downsample.0.weight: 0.2431
layer2.1.conv1.weight: 0.2298
layer2.1.conv2.weight: 0.2323
layer2.1.conv3.weight: 0.1968
layer2.2.conv1.weight: 0.1970
layer2.2.conv2.weight: 0.2298
layer2.2.conv3.weight: 0.2151
layer2.3.conv1.weight: 0.1743
layer2.3.conv2.weight: 0.1960
layer2.3.conv3.weight: 0.2179
layer3.0.conv1.weight: 0.1623
layer3.0.conv2.weight: 0.2515
layer3.0.conv3.weight: 0.1914
layer3.0.downsample.0.weight: 0.3051
layer3.1.conv1.weight: 0.2738
layer3.

  0%|          | 0/17 [00:00<?, ?it/s]

Training Epoch: 3/10
Training Loss: 0.02089866736616172
Top 1 Acc: 0.9941972920696325

Running Validation Epoch 3/10


  0%|          | 0/3 [00:00<?, ?it/s]

Validation Epoch: 3/10
Val Loss: 0.08353681061453244
Top 1 Acc: 0.9624060150375939



Checking Sparsity Level:
conv1.weight: 0.0000
layer1.0.conv1.weight: 0.1858
layer1.0.conv2.weight: 0.2866
layer1.0.conv3.weight: 0.2258
layer1.0.downsample.0.weight: 0.2141
layer1.1.conv1.weight: 0.2385
layer1.1.conv2.weight: 0.2649
layer1.1.conv3.weight: 0.2523
layer1.2.conv1.weight: 0.1957
layer1.2.conv2.weight: 0.2279
layer1.2.conv3.weight: 0.2909
layer2.0.conv1.weight: 0.2060
layer2.0.conv2.weight: 0.3258
layer2.0.conv3.weight: 0.3037
layer2.0.downsample.0.weight: 0.4047
layer2.1.conv1.weight: 0.3805
layer2.1.conv2.weight: 0.3882
layer2.1.conv3.weight: 0.3278
layer2.2.conv1.weight: 0.3349
layer2.2.conv2.weight: 0.3848
layer2.2.conv3.weight: 0.3596
layer2.3.conv1.weight: 0.2998
layer2.3.conv2.weight: 0.3358
layer2.3.conv3.weight: 0.3437
layer3.0.conv1.weight: 0.2781
layer3.0.conv2.weight: 0.4216
layer3.0.conv3.weight: 0.3248
layer3.0.downsample.0.weight: 0.5016
layer3.1.conv1.weight: 0.4533
layer3.

  0%|          | 0/17 [00:00<?, ?it/s]

Training Epoch: 4/10
Training Loss: 0.015480193025295567
Top 1 Acc: 0.9961315280464217

Running Validation Epoch 4/10


  0%|          | 0/3 [00:00<?, ?it/s]

Validation Epoch: 4/10
Val Loss: 0.014964782571648053
Top 1 Acc: 0.9924812030075187



Checking Sparsity Level:
conv1.weight: 0.0000
layer1.0.conv1.weight: 0.2566
layer1.0.conv2.weight: 0.3874
layer1.0.conv3.weight: 0.3030
layer1.0.downsample.0.weight: 0.2864
layer1.1.conv1.weight: 0.3156
layer1.1.conv2.weight: 0.3574
layer1.1.conv3.weight: 0.3226
layer1.2.conv1.weight: 0.2751
layer1.2.conv2.weight: 0.3190
layer1.2.conv3.weight: 0.3697
layer2.0.conv1.weight: 0.2869
layer2.0.conv2.weight: 0.4424
layer2.0.conv3.weight: 0.4022
layer2.0.downsample.0.weight: 0.5349
layer2.1.conv1.weight: 0.5007
layer2.1.conv2.weight: 0.5141
layer2.1.conv3.weight: 0.4416
layer2.2.conv1.weight: 0.4510
layer2.2.conv2.weight: 0.5120
layer2.2.conv3.weight: 0.4785
layer2.3.conv1.weight: 0.4095
layer2.3.conv2.weight: 0.4572
layer2.3.conv3.weight: 0.4486
layer3.0.conv1.weight: 0.3802
layer3.0.conv2.weight: 0.5596
layer3.0.conv3.weight: 0.4387
layer3.0.downsample.0.weight: 0.6490
layer3.1.conv1.weight: 0.5910
layer3

  0%|          | 0/17 [00:00<?, ?it/s]

Training Epoch: 5/10
Training Loss: 0.006244629046277088
Top 1 Acc: 0.9980657640232108

Running Validation Epoch 5/10


  0%|          | 0/3 [00:00<?, ?it/s]

Validation Epoch: 5/10
Val Loss: 0.007931285625090823
Top 1 Acc: 1.0



Checking Sparsity Level:
conv1.weight: 0.0000
layer1.0.conv1.weight: 0.3132
layer1.0.conv2.weight: 0.4789
layer1.0.conv3.weight: 0.3718
layer1.0.downsample.0.weight: 0.3507
layer1.1.conv1.weight: 0.3847
layer1.1.conv2.weight: 0.4396
layer1.1.conv3.weight: 0.3866
layer1.2.conv1.weight: 0.3459
layer1.2.conv2.weight: 0.3991
layer1.2.conv3.weight: 0.4357
layer2.0.conv1.weight: 0.3589
layer2.0.conv2.weight: 0.5409
layer2.0.conv3.weight: 0.4852
layer2.0.downsample.0.weight: 0.6344
layer2.1.conv1.weight: 0.5958
layer2.1.conv2.weight: 0.6136
layer2.1.conv3.weight: 0.5360
layer2.2.conv1.weight: 0.5475
layer2.2.conv2.weight: 0.6120
layer2.2.conv3.weight: 0.5734
layer2.3.conv1.weight: 0.5049
layer2.3.conv2.weight: 0.5596
layer2.3.conv3.weight: 0.5365
layer3.0.conv1.weight: 0.4702
layer3.0.conv2.weight: 0.6687
layer3.0.conv3.weight: 0.5361
layer3.0.downsample.0.weight: 0.7558
layer3.1.conv1.weight: 0.6969
layer3.1.conv2.weight

  0%|          | 0/17 [00:00<?, ?it/s]

Training Epoch: 6/10
Training Loss: 0.010191820685149115
Top 1 Acc: 0.9970986460348162

Running Validation Epoch 6/10


  0%|          | 0/3 [00:00<?, ?it/s]

Validation Epoch: 6/10
Val Loss: 0.013040244715133062
Top 1 Acc: 0.9924812030075187



Checking Sparsity Level:
conv1.weight: 0.0000
layer1.0.conv1.weight: 0.3665
layer1.0.conv2.weight: 0.5532
layer1.0.conv3.weight: 0.4306
layer1.0.downsample.0.weight: 0.4067
layer1.1.conv1.weight: 0.4468
layer1.1.conv2.weight: 0.5105
layer1.1.conv3.weight: 0.4446
layer1.2.conv1.weight: 0.4129
layer1.2.conv2.weight: 0.4703
layer1.2.conv3.weight: 0.4930
layer2.0.conv1.weight: 0.4214
layer2.0.conv2.weight: 0.6199
layer2.0.conv3.weight: 0.5578
layer2.0.downsample.0.weight: 0.7127
layer2.1.conv1.weight: 0.6706
layer2.1.conv2.weight: 0.6892
layer2.1.conv3.weight: 0.6139
layer2.2.conv1.weight: 0.6265
layer2.2.conv2.weight: 0.6889
layer2.2.conv3.weight: 0.6487
layer2.3.conv1.weight: 0.5859
layer2.3.conv2.weight: 0.6432
layer2.3.conv3.weight: 0.6093
layer3.0.conv1.weight: 0.5457
layer3.0.conv2.weight: 0.7501
layer3.0.conv3.weight: 0.6161
layer3.0.downsample.0.weight: 0.8281
layer3.1.conv1.weight: 0.7758
layer3

  0%|          | 0/17 [00:00<?, ?it/s]

Training Epoch: 7/10
Training Loss: 0.028277927598751643
Top 1 Acc: 0.9970986460348162

Running Validation Epoch 7/10


  0%|          | 0/3 [00:00<?, ?it/s]

Validation Epoch: 7/10
Val Loss: 0.027645598476131756
Top 1 Acc: 1.0



Checking Sparsity Level:
conv1.weight: 0.0000
layer1.0.conv1.weight: 0.4146
layer1.0.conv2.weight: 0.6099
layer1.0.conv3.weight: 0.4810
layer1.0.downsample.0.weight: 0.4547
layer1.1.conv1.weight: 0.4962
layer1.1.conv2.weight: 0.5668
layer1.1.conv3.weight: 0.4924
layer1.2.conv1.weight: 0.4672
layer1.2.conv2.weight: 0.5302
layer1.2.conv3.weight: 0.5394
layer2.0.conv1.weight: 0.4760
layer2.0.conv2.weight: 0.6813
layer2.0.conv3.weight: 0.6140
layer2.0.downsample.0.weight: 0.7674
layer2.1.conv1.weight: 0.7262
layer2.1.conv2.weight: 0.7451
layer2.1.conv3.weight: 0.6732
layer2.2.conv1.weight: 0.6854
layer2.2.conv2.weight: 0.7446
layer2.2.conv3.weight: 0.7042
layer2.3.conv1.weight: 0.6469
layer2.3.conv2.weight: 0.7082
layer2.3.conv3.weight: 0.6685
layer3.0.conv1.weight: 0.6048
layer3.0.conv2.weight: 0.8069
layer3.0.conv3.weight: 0.6779
layer3.0.downsample.0.weight: 0.8744
layer3.1.conv1.weight: 0.8290
layer3.1.conv2.weight

  0%|          | 0/17 [00:00<?, ?it/s]

Training Epoch: 8/10
Training Loss: 0.0318604385589852
Top 1 Acc: 1.0

Running Validation Epoch 8/10


  0%|          | 0/3 [00:00<?, ?it/s]

Validation Epoch: 8/10
Val Loss: 0.03692007825399438
Top 1 Acc: 1.0



Checking Sparsity Level:
conv1.weight: 0.0000
layer1.0.conv1.weight: 0.4473
layer1.0.conv2.weight: 0.6496
layer1.0.conv3.weight: 0.5182
layer1.0.downsample.0.weight: 0.4882
layer1.1.conv1.weight: 0.5353
layer1.1.conv2.weight: 0.6064
layer1.1.conv3.weight: 0.5291
layer1.2.conv1.weight: 0.5075
layer1.2.conv2.weight: 0.5724
layer1.2.conv3.weight: 0.5753
layer2.0.conv1.weight: 0.5150
layer2.0.conv2.weight: 0.7218
layer2.0.conv3.weight: 0.6547
layer2.0.downsample.0.weight: 0.8039
layer2.1.conv1.weight: 0.7629
layer2.1.conv2.weight: 0.7809
layer2.1.conv3.weight: 0.7136
layer2.2.conv1.weight: 0.7243
layer2.2.conv2.weight: 0.7808
layer2.2.conv3.weight: 0.7411
layer2.3.conv1.weight: 0.6886
layer2.3.conv2.weight: 0.7515
layer2.3.conv3.weight: 0.7091
layer3.0.conv1.weight: 0.6459
layer3.0.conv2.weight: 0.8426
layer3.0.conv3.weight: 0.7206
layer3.0.downsample.0.weight: 0.9014
layer3.1.conv1.weight: 0.8622
layer3.1.conv2.weight:

  0%|          | 0/17 [00:00<?, ?it/s]

Training Epoch: 9/10
Training Loss: 0.036725726197747624
Top 1 Acc: 1.0

Running Validation Epoch 9/10


  0%|          | 0/3 [00:00<?, ?it/s]

Validation Epoch: 9/10
Val Loss: 0.046517357851068176
Top 1 Acc: 1.0



Checking Sparsity Level:
conv1.weight: 0.0000
layer1.0.conv1.weight: 0.4712
layer1.0.conv2.weight: 0.6728
layer1.0.conv3.weight: 0.5400
layer1.0.downsample.0.weight: 0.5073
layer1.1.conv1.weight: 0.5588
layer1.1.conv2.weight: 0.6290
layer1.1.conv3.weight: 0.5516
layer1.2.conv1.weight: 0.5294
layer1.2.conv2.weight: 0.5957
layer1.2.conv3.weight: 0.5948
layer2.0.conv1.weight: 0.5384
layer2.0.conv2.weight: 0.7441
layer2.0.conv3.weight: 0.6780
layer2.0.downsample.0.weight: 0.8222
layer2.1.conv1.weight: 0.7831
layer2.1.conv2.weight: 0.8004
layer2.1.conv3.weight: 0.7358
layer2.2.conv1.weight: 0.7461
layer2.2.conv2.weight: 0.8004
layer2.2.conv3.weight: 0.7619
layer2.3.conv1.weight: 0.7125
layer2.3.conv2.weight: 0.7751
layer2.3.conv3.weight: 0.7315
layer3.0.conv1.weight: 0.6695
layer3.0.conv2.weight: 0.8614
layer3.0.conv3.weight: 0.7442
layer3.0.downsample.0.weight: 0.9151
layer3.1.conv1.weight: 0.8794
layer3.1.conv2.weight

  0%|          | 0/17 [00:00<?, ?it/s]

Training Epoch: 10/10
Training Loss: 0.032790856435894966
Top 1 Acc: 1.0

Running Validation Epoch 10/10


  0%|          | 0/3 [00:00<?, ?it/s]

Validation Epoch: 10/10
Val Loss: 0.0411220183596015
Top 1 Acc: 1.0



Checking Sparsity Level:
conv1.weight: 0.0000
layer1.0.conv1.weight: 0.4783
layer1.0.conv2.weight: 0.6819
layer1.0.conv3.weight: 0.5481
layer1.0.downsample.0.weight: 0.5146
layer1.1.conv1.weight: 0.5688
layer1.1.conv2.weight: 0.6389
layer1.1.conv3.weight: 0.5606
layer1.2.conv1.weight: 0.5388
layer1.2.conv2.weight: 0.6055
layer1.2.conv3.weight: 0.6023
layer2.0.conv1.weight: 0.5475
layer2.0.conv2.weight: 0.7528
layer2.0.conv3.weight: 0.6869
layer2.0.downsample.0.weight: 0.8294
layer2.1.conv1.weight: 0.7904
layer2.1.conv2.weight: 0.8076
layer2.1.conv3.weight: 0.7448
layer2.2.conv1.weight: 0.7550
layer2.2.conv2.weight: 0.8084
layer2.2.conv3.weight: 0.7696
layer2.3.conv1.weight: 0.7220
layer2.3.conv2.weight: 0.7845
layer2.3.conv3.weight: 0.7395
layer3.0.conv1.weight: 0.6785
layer3.0.conv2.weight: 0.8685
layer3.0.conv3.weight: 0.7536
layer3.0.downsample.0.weight: 0.9200
layer3.1.conv1.weight: 0.8859
layer3.1.conv2.weight:

In [42]:
save_dir = "experiment-0"
exporter = ModuleExporter(model, output_dir=save_dir)
exporter.export_pytorch(name="resnet-50-sparse-experiment-0.pth")