In [3]:
import sparseml
from sparseml.pytorch.optim import ScheduledModifierManager
from sparseml.pytorch.utils import get_prunable_layers, tensor_sparsity
import torch
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
from torch.nn import CrossEntropyLoss
from torch.optim import Adam
from tqdm.auto import tqdm
import math
from datasets import load_dataset

In [4]:
dataset = load_dataset("beans")
print(dataset)

Using custom data configuration default
Reusing dataset beans (/home/ubuntu/.cache/huggingface/datasets/beans/default/0.0.0/d5abfbb94de45599d871182cff389bc870bf145b4829c4b0fe20f0cccd637cbd)


  0%|          | 0/3 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['image_file_path', 'image', 'labels'],
        num_rows: 1034
    })
    validation: Dataset({
        features: ['image_file_path', 'image', 'labels'],
        num_rows: 133
    })
    test: Dataset({
        features: ['image_file_path', 'image', 'labels'],
        num_rows: 128
    })
})


In [6]:
print(dataset["train"][0]["image_file_path"])
print(dataset["validation"][0]["image_file_path"])

/home/ubuntu/.cache/huggingface/datasets/downloads/extracted/eeb026374cf5ecfd5f40131a3159be9b9055ac21a3da11690e7eb4d117c99eee/train/bean_rust/bean_rust_train.84.jpg
/home/ubuntu/.cache/huggingface/datasets/downloads/extracted/f287261265d2f9a3e8f87a5526a54d1847b17f7c3ec5714e5719432f2b3e4a73/validation/bean_rust/bean_rust_val.36.jpg


In [7]:
train_path = "/home/ubuntu/.cache/huggingface/datasets/downloads/extracted/eeb026374cf5ecfd5f40131a3159be9b9055ac21a3da11690e7eb4d117c99eee/train"
val_path = "/home/ubuntu/.cache/huggingface/datasets/downloads/extracted/f287261265d2f9a3e8f87a5526a54d1847b17f7c3ec5714e5719432f2b3e4a73/validation/" 

In [8]:
BATCH_SIZE = 32
NUM_LABELS = 3

# transforms
imagenet_transform = transforms.Compose([
   transforms.Resize(size=256, interpolation=transforms.InterpolationMode.BILINEAR, max_size=None, antialias=None),
   transforms.CenterCrop(size=(224, 224)),
   transforms.ToTensor(),
   transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# datasets
train_dataset = torchvision.datasets.ImageFolder(
    root=train_path,
    transform=imagenet_transform
)

val_dataset = torchvision.datasets.ImageFolder(
    root=val_path,
    transform=imagenet_transform
)

# setup data loaders
train_loader = DataLoader(train_dataset, BATCH_SIZE, shuffle=True, pin_memory=True, num_workers=16)
val_loader = DataLoader(val_dataset, BATCH_SIZE, shuffle=False, pin_memory=True, num_workers=16)

In [9]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [13]:
def run_model_one_epoch(model, data_loader, criterion, device, train=False, optimizer=None):
    if train:
        model.train()
    else:
        model.eval()

    running_loss = 0.0
    total_correct = 0
    total_predictions = 0

    # loop through batches
    for step, (inputs, labels) in tqdm(enumerate(data_loader), total=len(data_loader)):
        inputs = inputs.to(device)
        labels = labels.to(device)

        if train:
            optimizer.zero_grad()

        # compute loss, run backpropogation
        outputs = model(inputs)  # model returns logits
        loss = criterion(outputs, labels)
        if train:
            loss.backward()
            optimizer.step()

        running_loss += loss.item()

        # run evaluation
        predictions = outputs.argmax(dim=1)
        total_correct += torch.sum(predictions == labels).item()
        total_predictions += inputs.size(0)

    # return loss and evaluation metric
    loss = running_loss / (step + 1.0)
    accuracy = total_correct / total_predictions
    return loss, accuracy

## Train Dense Model

In [12]:
model = torchvision.models.mobilenet_v2(weights=torchvision.models.MobileNet_V2_Weights.DEFAULT)
model.classifier[1] = torch.nn.Linear(model.classifier[1].in_features, NUM_LABELS)
model.to(device)
print(model)
print(f"Using device: {device}")

MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

In [18]:
# setup loss function and optimizer, LR will be overriden by sparseml
criterion = CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=8e-3)

In [19]:
!cat ./dense_model/dense-recipe.yaml


# Epoch and Learning-Rate variables
num_epochs: 10.0
init_lr: 0.0005

training_modifiers:
  - !EpochRangeModifier
    start_epoch: 0.0
    end_epoch: eval(num_epochs)

  - !LearningRateFunctionModifier
    final_lr: 0.0
    init_lr: eval(init_lr)
    lr_func: cosine
    start_epoch: 0.0
    end_epoch: eval(num_epochs)

# Phase 1 Sparse Transfer Learning / Recovery
sparse_transfer_learning_modifiers:
  - !ConstantPruningModifier
    start_epoch: 0.0
    params: __ALL_PRUNABLE__


In [20]:
# create ScheduledModifierManager and Optimizer wrapper
manager = ScheduledModifierManager.from_yaml("./dense_model/dense-recipe.yaml")
optimizer = manager.modify(model, optimizer, steps_per_epoch=len(train_loader))

In [21]:
# run transfer learning
epoch = 0
for epoch in range(manager.max_epochs):
    # run training loop
    epoch_name = f"{epoch + 1}/{manager.max_epochs}"
    print(f"Running Training Epoch {epoch_name}")
    train_loss, train_acc = run_model_one_epoch(
        model, train_loader, criterion, device, train=True, optimizer=optimizer
    )
    print(
        f"Training Epoch: {epoch_name}\nTraining Loss: {train_loss}\nTop 1 Acc: {train_acc}\n"
    )

    # run validation loop
    print(f"Running Validation Epoch {epoch_name}")
    val_loss, val_acc = run_model_one_epoch(model, val_loader, criterion, device)
    print(
        f"Validation Epoch: {epoch_name}\nVal Loss: {val_loss}\nTop 1 Acc: {val_acc}\n"
    )

manager.finalize(model)

Running Training Epoch 1/10


  0%|          | 0/33 [00:00<?, ?it/s]

Training Epoch: 1/10
Training Loss: 0.49688769707625563
Top 1 Acc: 0.811411992263056

Running Validation Epoch 1/10


  0%|          | 0/5 [00:00<?, ?it/s]

Validation Epoch: 1/10
Val Loss: 0.10483174873515963
Top 1 Acc: 0.9624060150375939

Running Training Epoch 2/10


  0%|          | 0/33 [00:00<?, ?it/s]

Training Epoch: 2/10
Training Loss: 0.13233664106916299
Top 1 Acc: 0.9709864603481625

Running Validation Epoch 2/10


  0%|          | 0/5 [00:00<?, ?it/s]

Validation Epoch: 2/10
Val Loss: 0.10690611051395535
Top 1 Acc: 0.9624060150375939

Running Training Epoch 3/10


  0%|          | 0/33 [00:00<?, ?it/s]

Training Epoch: 3/10
Training Loss: 0.061133282936432144
Top 1 Acc: 0.9893617021276596

Running Validation Epoch 3/10


  0%|          | 0/5 [00:00<?, ?it/s]

Validation Epoch: 3/10
Val Loss: 0.05752026969566941
Top 1 Acc: 0.9774436090225563

Running Training Epoch 4/10


  0%|          | 0/33 [00:00<?, ?it/s]

Training Epoch: 4/10
Training Loss: 0.03709277432327243
Top 1 Acc: 0.988394584139265

Running Validation Epoch 4/10


  0%|          | 0/5 [00:00<?, ?it/s]

Validation Epoch: 4/10
Val Loss: 0.09043196495622396
Top 1 Acc: 0.9699248120300752

Running Training Epoch 5/10


  0%|          | 0/33 [00:00<?, ?it/s]

Training Epoch: 5/10
Training Loss: 0.011832633150995454
Top 1 Acc: 0.9990328820116054

Running Validation Epoch 5/10


  0%|          | 0/5 [00:00<?, ?it/s]

Validation Epoch: 5/10
Val Loss: 0.05438814461231232
Top 1 Acc: 0.9774436090225563

Running Training Epoch 6/10


  0%|          | 0/33 [00:00<?, ?it/s]

Training Epoch: 6/10
Training Loss: 0.012920034544852873
Top 1 Acc: 0.9970986460348162

Running Validation Epoch 6/10


  0%|          | 0/5 [00:00<?, ?it/s]

Validation Epoch: 6/10
Val Loss: 0.05623044967651367
Top 1 Acc: 0.9699248120300752

Running Training Epoch 7/10


  0%|          | 0/33 [00:00<?, ?it/s]

Training Epoch: 7/10
Training Loss: 0.008218912644114233
Top 1 Acc: 0.9980657640232108

Running Validation Epoch 7/10


  0%|          | 0/5 [00:00<?, ?it/s]

Validation Epoch: 7/10
Val Loss: 0.05101724807173014
Top 1 Acc: 0.9774436090225563

Running Training Epoch 8/10


  0%|          | 0/33 [00:00<?, ?it/s]

Training Epoch: 8/10
Training Loss: 0.006811827950348908
Top 1 Acc: 1.0

Running Validation Epoch 8/10


  0%|          | 0/5 [00:00<?, ?it/s]

Validation Epoch: 8/10
Val Loss: 0.0494396741501987
Top 1 Acc: 0.9548872180451128

Running Training Epoch 9/10


  0%|          | 0/33 [00:00<?, ?it/s]

Training Epoch: 9/10
Training Loss: 0.005579106432075302
Top 1 Acc: 0.9990328820116054

Running Validation Epoch 9/10


  0%|          | 0/5 [00:00<?, ?it/s]

Validation Epoch: 9/10
Val Loss: 0.050181417539715764
Top 1 Acc: 0.9624060150375939

Running Training Epoch 10/10


  0%|          | 0/33 [00:00<?, ?it/s]

Training Epoch: 10/10
Training Loss: 0.006127264015517678
Top 1 Acc: 0.9980657640232108

Running Validation Epoch 10/10


  0%|          | 0/5 [00:00<?, ?it/s]

Validation Epoch: 10/10
Val Loss: 0.04676330639049411
Top 1 Acc: 0.9624060150375939



In [22]:
# print sparsities of each layer
for (name, layer) in get_prunable_layers(model):
    print(f"{name}.weight: {tensor_sparsity(layer.weight).item():.4f}")

features.0.0.weight: 0.0000
features.1.conv.0.0.weight: 0.0000
features.1.conv.1.weight: 0.0000
features.2.conv.0.0.weight: 0.0000
features.2.conv.1.0.weight: 0.0000
features.2.conv.2.weight: 0.0000
features.3.conv.0.0.weight: 0.0000
features.3.conv.1.0.weight: 0.0000
features.3.conv.2.weight: 0.0000
features.4.conv.0.0.weight: 0.0000
features.4.conv.1.0.weight: 0.0000
features.4.conv.2.weight: 0.0000
features.5.conv.0.0.weight: 0.0000
features.5.conv.1.0.weight: 0.0000
features.5.conv.2.weight: 0.0000
features.6.conv.0.0.weight: 0.0000
features.6.conv.1.0.weight: 0.0000
features.6.conv.2.weight: 0.0000
features.7.conv.0.0.weight: 0.0000
features.7.conv.1.0.weight: 0.0000
features.7.conv.2.weight: 0.0000
features.8.conv.0.0.weight: 0.0000
features.8.conv.1.0.weight: 0.0000
features.8.conv.2.weight: 0.0000
features.9.conv.0.0.weight: 0.0000
features.9.conv.1.0.weight: 0.0000
features.9.conv.2.weight: 0.0000
features.10.conv.0.0.weight: 0.0000
features.10.conv.1.0.weight: 0.0000
features

In [25]:
from sparseml.pytorch.utils import ModuleExporter

save_dir = "dense_model"
exporter = ModuleExporter(model, output_dir=save_dir)
exporter.export_pytorch(name="mobilenet-v2-dense-beans.pth")

## Prune The Model

In [27]:
checkpoint = torch.load("./dense_model/training/mobilenet-v2-dense-beans.pth")
model = torchvision.models.mobilenet_v2()
model.classifier[1] = torch.nn.Linear(model.classifier[1].in_features, NUM_LABELS)
model.load_state_dict(checkpoint['state_dict'])
model.to(device)

# setup loss function and optimizer, LR will be overriden by sparseml
criterion = CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=8e-3)

In [28]:
print(model)

MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

In [29]:
!cat recipe-0.yaml

# Epoch and Learning-Rate variables
num_epochs: 13.0
pruning_epochs: 10.0
init_lr: 0.0005
inter_func: cubic
mask_type: unstructured

training_modifiers:
  - !EpochRangeModifier
    start_epoch: 0.0
    end_epoch: eval(num_epochs)

  - !LearningRateFunctionModifier
    final_lr: 0.0
    init_lr: eval(init_lr)
    lr_func: cosine
    start_epoch: 0.0
    end_epoch: eval(num_epochs)

# Pruning
pruning_modifiers:
  - !GlobalMagnitudePruningModifier
    init_sparsity: 0.05
    final_sparsity: 0.90
    start_epoch: 0.0
    end_epoch: eval(pruning_epochs)
    update_frequency: 1.0
    params: 
        - 'features.0.0.weight'
        - 'features.18.0.weight'
        - 're:features.*.conv.*.weight'
        - 're:features.*.conv.*.*.weight'
    leave_enabled: True
    inter_func: eval(inter_func)
    mask_type: eval(mask_type)

finetuning_modifiers:
  - !ConstantPruningModifier
    start_epoch: eval(pruning_epochs)
    params: __ALL_PRUNABLE__

In [30]:
# create ScheduledModifierManager and Optimizer wrapper
manager = ScheduledModifierManager.from_yaml("./recipe-0.yaml")
optimizer = manager.modify(model, optimizer, steps_per_epoch=len(train_loader))

In [31]:
for (name, layer) in get_prunable_layers(model):
    print(f"{name}.weight: {tensor_sparsity(layer.weight).item():.4f}")

features.0.0.weight: 0.0000
features.1.conv.0.0.weight: 0.0000
features.1.conv.1.weight: 0.0000
features.2.conv.0.0.weight: 0.0000
features.2.conv.1.0.weight: 0.0000
features.2.conv.2.weight: 0.0000
features.3.conv.0.0.weight: 0.0000
features.3.conv.1.0.weight: 0.0000
features.3.conv.2.weight: 0.0000
features.4.conv.0.0.weight: 0.0000
features.4.conv.1.0.weight: 0.0000
features.4.conv.2.weight: 0.0000
features.5.conv.0.0.weight: 0.0000
features.5.conv.1.0.weight: 0.0000
features.5.conv.2.weight: 0.0000
features.6.conv.0.0.weight: 0.0000
features.6.conv.1.0.weight: 0.0000
features.6.conv.2.weight: 0.0000
features.7.conv.0.0.weight: 0.0000
features.7.conv.1.0.weight: 0.0000
features.7.conv.2.weight: 0.0000
features.8.conv.0.0.weight: 0.0000
features.8.conv.1.0.weight: 0.0000
features.8.conv.2.weight: 0.0000
features.9.conv.0.0.weight: 0.0000
features.9.conv.1.0.weight: 0.0000
features.9.conv.2.weight: 0.0000
features.10.conv.0.0.weight: 0.0000
features.10.conv.1.0.weight: 0.0000
features

In [32]:
# run transfer learning
epoch = 0
for epoch in range(manager.max_epochs):
    # run training loop
    epoch_name = f"{epoch + 1}/{manager.max_epochs}"
    print(f"Running Training Epoch {epoch_name}")
    train_loss, train_acc = run_model_one_epoch(
        model, train_loader, criterion, device, train=True, optimizer=optimizer
    )
    print(
        f"Training Epoch: {epoch_name}\nTraining Loss: {train_loss}\nTop 1 Acc: {train_acc}\n"
    )

    # run validation loop
    print(f"Running Validation Epoch {epoch_name}")
    val_loss, val_acc = run_model_one_epoch(model, val_loader, criterion, device)
    print(
        f"Validation Epoch: {epoch_name}\nVal Loss: {val_loss}\nTop 1 Acc: {val_acc}\n"
    )
    
    print(f"\n\nChecking Sparsity Level:")
    for (name, layer) in get_prunable_layers(model):
        print(f"{name}.weight: {tensor_sparsity(layer.weight).item():.4f}")
    print(f"\n")
    
manager.finalize(model)

Running Training Epoch 1/13


  0%|          | 0/33 [00:00<?, ?it/s]

Training Epoch: 1/13
Training Loss: 0.05181791849088657
Top 1 Acc: 0.9835589941972921

Running Validation Epoch 1/13


  0%|          | 0/5 [00:00<?, ?it/s]

Validation Epoch: 1/13
Val Loss: 0.08574707275256514
Top 1 Acc: 0.9624060150375939



Checking Sparsity Level:
features.0.0.weight: 0.0880
features.1.conv.0.0.weight: 0.0729
features.1.conv.1.weight: 0.0820
features.2.conv.0.0.weight: 0.0293
features.2.conv.1.0.weight: 0.0081
features.2.conv.2.weight: 0.0169
features.3.conv.0.0.weight: 0.0321
features.3.conv.1.0.weight: 0.0255
features.3.conv.2.weight: 0.0286
features.4.conv.0.0.weight: 0.0240
features.4.conv.1.0.weight: 0.0008
features.4.conv.2.weight: 0.0197
features.5.conv.0.0.weight: 0.0343
features.5.conv.1.0.weight: 0.0237
features.5.conv.2.weight: 0.0288
features.6.conv.0.0.weight: 0.0319
features.6.conv.1.0.weight: 0.0272
features.6.conv.2.weight: 0.0334
features.7.conv.0.0.weight: 0.0218
features.7.conv.1.0.weight: 0.0035
features.7.conv.2.weight: 0.0228
features.8.conv.0.0.weight: 0.0445
features.8.conv.1.0.weight: 0.0341
features.8.conv.2.weight: 0.0502
features.9.conv.0.0.weight: 0.0424
features.9.conv.1.0.weight: 0.0292
fe

  0%|          | 0/33 [00:00<?, ?it/s]

Training Epoch: 2/13
Training Loss: 0.01928500584361962
Top 1 Acc: 0.9941972920696325

Running Validation Epoch 2/13


  0%|          | 0/5 [00:00<?, ?it/s]

Validation Epoch: 2/13
Val Loss: 0.04039517091587186
Top 1 Acc: 0.9849624060150376



Checking Sparsity Level:
features.0.0.weight: 0.1910
features.1.conv.0.0.weight: 0.1319
features.1.conv.1.weight: 0.1875
features.2.conv.0.0.weight: 0.1680
features.2.conv.1.0.weight: 0.0532
features.2.conv.2.weight: 0.1115
features.3.conv.0.0.weight: 0.1450
features.3.conv.1.0.weight: 0.1142
features.3.conv.2.weight: 0.1534
features.4.conv.0.0.weight: 0.1279
features.4.conv.1.0.weight: 0.0085
features.4.conv.2.weight: 0.1152
features.5.conv.0.0.weight: 0.1758
features.5.conv.1.0.weight: 0.1337
features.5.conv.2.weight: 0.1759
features.6.conv.0.0.weight: 0.1637
features.6.conv.1.0.weight: 0.1337
features.6.conv.2.weight: 0.1898
features.7.conv.0.0.weight: 0.1243
features.7.conv.1.0.weight: 0.0069
features.7.conv.2.weight: 0.1381
features.8.conv.0.0.weight: 0.2351
features.8.conv.1.0.weight: 0.1765
features.8.conv.2.weight: 0.2569
features.9.conv.0.0.weight: 0.2288
features.9.conv.1.0.weight: 0.1837
fe

  0%|          | 0/33 [00:00<?, ?it/s]

Training Epoch: 3/13
Training Loss: 0.02513152473274562
Top 1 Acc: 0.9941972920696325

Running Validation Epoch 3/13


  0%|          | 0/5 [00:00<?, ?it/s]

Validation Epoch: 3/13
Val Loss: 0.04448752112220973
Top 1 Acc: 0.9924812030075187



Checking Sparsity Level:
features.0.0.weight: 0.2558
features.1.conv.0.0.weight: 0.1944
features.1.conv.1.weight: 0.2910
features.2.conv.0.0.weight: 0.2728
features.2.conv.1.0.weight: 0.0891
features.2.conv.2.weight: 0.1910
features.3.conv.0.0.weight: 0.2393
features.3.conv.1.0.weight: 0.1975
features.3.conv.2.weight: 0.2564
features.4.conv.0.0.weight: 0.2115
features.4.conv.1.0.weight: 0.0177
features.4.conv.2.weight: 0.2018
features.5.conv.0.0.weight: 0.2970
features.5.conv.1.0.weight: 0.2193
features.5.conv.2.weight: 0.2944
features.6.conv.0.0.weight: 0.2752
features.6.conv.1.0.weight: 0.2454
features.6.conv.2.weight: 0.3210
features.7.conv.0.0.weight: 0.2137
features.7.conv.1.0.weight: 0.0197
features.7.conv.2.weight: 0.2357
features.8.conv.0.0.weight: 0.3953
features.8.conv.1.0.weight: 0.2914
features.8.conv.2.weight: 0.4281
features.9.conv.0.0.weight: 0.3828
features.9.conv.1.0.weight: 0.3137
fe

  0%|          | 0/33 [00:00<?, ?it/s]

Training Epoch: 4/13
Training Loss: 0.0277155696160414
Top 1 Acc: 0.9903288201160542

Running Validation Epoch 4/13


  0%|          | 0/5 [00:00<?, ?it/s]

Validation Epoch: 4/13
Val Loss: 0.05159115699352697
Top 1 Acc: 0.9849624060150376



Checking Sparsity Level:
features.0.0.weight: 0.3241
features.1.conv.0.0.weight: 0.2535
features.1.conv.1.weight: 0.3340
features.2.conv.0.0.weight: 0.3503
features.2.conv.1.0.weight: 0.1227
features.2.conv.2.weight: 0.2582
features.3.conv.0.0.weight: 0.3247
features.3.conv.1.0.weight: 0.2708
features.3.conv.2.weight: 0.3406
features.4.conv.0.0.weight: 0.2908
features.4.conv.1.0.weight: 0.0394
features.4.conv.2.weight: 0.2745
features.5.conv.0.0.weight: 0.3975
features.5.conv.1.0.weight: 0.2992
features.5.conv.2.weight: 0.4062
features.6.conv.0.0.weight: 0.3833
features.6.conv.1.0.weight: 0.3409
features.6.conv.2.weight: 0.4261
features.7.conv.0.0.weight: 0.2909
features.7.conv.1.0.weight: 0.0509
features.7.conv.2.weight: 0.3217
features.8.conv.0.0.weight: 0.5207
features.8.conv.1.0.weight: 0.3961
features.8.conv.2.weight: 0.5641
features.9.conv.0.0.weight: 0.5110
features.9.conv.1.0.weight: 0.4265
fe

  0%|          | 0/33 [00:00<?, ?it/s]

Training Epoch: 5/13
Training Loss: 0.05216440672732212
Top 1 Acc: 0.9874274661508704

Running Validation Epoch 5/13


  0%|          | 0/5 [00:00<?, ?it/s]

Validation Epoch: 5/13
Val Loss: 0.033026803645771
Top 1 Acc: 0.9774436090225563



Checking Sparsity Level:
features.0.0.weight: 0.3889
features.1.conv.0.0.weight: 0.3368
features.1.conv.1.weight: 0.3809
features.2.conv.0.0.weight: 0.4062
features.2.conv.1.0.weight: 0.1551
features.2.conv.2.weight: 0.3242
features.3.conv.0.0.weight: 0.4144
features.3.conv.1.0.weight: 0.3341
features.3.conv.2.weight: 0.4149
features.4.conv.0.0.weight: 0.3539
features.4.conv.1.0.weight: 0.0725
features.4.conv.2.weight: 0.3409
features.5.conv.0.0.weight: 0.4842
features.5.conv.1.0.weight: 0.3750
features.5.conv.2.weight: 0.5052
features.6.conv.0.0.weight: 0.4787
features.6.conv.1.0.weight: 0.4265
features.6.conv.2.weight: 0.5290
features.7.conv.0.0.weight: 0.3649
features.7.conv.1.0.weight: 0.0943
features.7.conv.2.weight: 0.4023
features.8.conv.0.0.weight: 0.6228
features.8.conv.1.0.weight: 0.4951
features.8.conv.2.weight: 0.6655
features.9.conv.0.0.weight: 0.6145
features.9.conv.1.0.weight: 0.5255
feat

  0%|          | 0/33 [00:00<?, ?it/s]

Training Epoch: 6/13
Training Loss: 0.08560408513541475
Top 1 Acc: 0.9806576402321083

Running Validation Epoch 6/13


  0%|          | 0/5 [00:00<?, ?it/s]

Validation Epoch: 6/13
Val Loss: 0.047504376620054245
Top 1 Acc: 0.9924812030075187



Checking Sparsity Level:
features.0.0.weight: 0.4282
features.1.conv.0.0.weight: 0.3854
features.1.conv.1.weight: 0.4277
features.2.conv.0.0.weight: 0.4473
features.2.conv.1.0.weight: 0.2002
features.2.conv.2.weight: 0.3728
features.3.conv.0.0.weight: 0.4884
features.3.conv.1.0.weight: 0.3904
features.3.conv.2.weight: 0.4818
features.4.conv.0.0.weight: 0.4141
features.4.conv.1.0.weight: 0.1258
features.4.conv.2.weight: 0.4039
features.5.conv.0.0.weight: 0.5547
features.5.conv.1.0.weight: 0.4421
features.5.conv.2.weight: 0.5843
features.6.conv.0.0.weight: 0.5542
features.6.conv.1.0.weight: 0.5098
features.6.conv.2.weight: 0.6082
features.7.conv.0.0.weight: 0.4259
features.7.conv.1.0.weight: 0.1609
features.7.conv.2.weight: 0.4735
features.8.conv.0.0.weight: 0.7039
features.8.conv.1.0.weight: 0.5929
features.8.conv.2.weight: 0.7467
features.9.conv.0.0.weight: 0.6944
features.9.conv.1.0.weight: 0.6045
f

  0%|          | 0/33 [00:00<?, ?it/s]

Training Epoch: 7/13
Training Loss: 0.13826644358535609
Top 1 Acc: 0.9555125725338491

Running Validation Epoch 7/13


  0%|          | 0/5 [00:00<?, ?it/s]

Validation Epoch: 7/13
Val Loss: 0.06480909623205662
Top 1 Acc: 0.9924812030075187



Checking Sparsity Level:
features.0.0.weight: 0.4583
features.1.conv.0.0.weight: 0.4201
features.1.conv.1.weight: 0.4590
features.2.conv.0.0.weight: 0.4876
features.2.conv.1.0.weight: 0.2396
features.2.conv.2.weight: 0.4175
features.3.conv.0.0.weight: 0.5408
features.3.conv.1.0.weight: 0.4383
features.3.conv.2.weight: 0.5362
features.4.conv.0.0.weight: 0.4589
features.4.conv.1.0.weight: 0.1713
features.4.conv.2.weight: 0.4505
features.5.conv.0.0.weight: 0.6123
features.5.conv.1.0.weight: 0.4994
features.5.conv.2.weight: 0.6411
features.6.conv.0.0.weight: 0.6138
features.6.conv.1.0.weight: 0.5527
features.6.conv.2.weight: 0.6644
features.7.conv.0.0.weight: 0.4774
features.7.conv.1.0.weight: 0.2442
features.7.conv.2.weight: 0.5317
features.8.conv.0.0.weight: 0.7644
features.8.conv.1.0.weight: 0.6707
features.8.conv.2.weight: 0.8041
features.9.conv.0.0.weight: 0.7559
features.9.conv.1.0.weight: 0.6652
fe

  0%|          | 0/33 [00:00<?, ?it/s]

Training Epoch: 8/13
Training Loss: 0.24723159487951885
Top 1 Acc: 0.9168278529980658

Running Validation Epoch 8/13


  0%|          | 0/5 [00:00<?, ?it/s]

Validation Epoch: 8/13
Val Loss: 0.12623528838157655
Top 1 Acc: 0.9699248120300752



Checking Sparsity Level:
features.0.0.weight: 0.4769
features.1.conv.0.0.weight: 0.4410
features.1.conv.1.weight: 0.4902
features.2.conv.0.0.weight: 0.5215
features.2.conv.1.0.weight: 0.2697
features.2.conv.2.weight: 0.4562
features.3.conv.0.0.weight: 0.5819
features.3.conv.1.0.weight: 0.4691
features.3.conv.2.weight: 0.5799
features.4.conv.0.0.weight: 0.4905
features.4.conv.1.0.weight: 0.2168
features.4.conv.2.weight: 0.4863
features.5.conv.0.0.weight: 0.6523
features.5.conv.1.0.weight: 0.5469
features.5.conv.2.weight: 0.6816
features.6.conv.0.0.weight: 0.6556
features.6.conv.1.0.weight: 0.5822
features.6.conv.2.weight: 0.7048
features.7.conv.0.0.weight: 0.5181
features.7.conv.1.0.weight: 0.3142
features.7.conv.2.weight: 0.5773
features.8.conv.0.0.weight: 0.8066
features.8.conv.1.0.weight: 0.7153
features.8.conv.2.weight: 0.8413
features.9.conv.0.0.weight: 0.7955
features.9.conv.1.0.weight: 0.7063
fe

  0%|          | 0/33 [00:00<?, ?it/s]

Training Epoch: 9/13
Training Loss: 0.1767726969538313
Top 1 Acc: 0.9487427466150871

Running Validation Epoch 9/13


  0%|          | 0/5 [00:00<?, ?it/s]

Validation Epoch: 9/13
Val Loss: 0.14729779064655305
Top 1 Acc: 0.9849624060150376



Checking Sparsity Level:
features.0.0.weight: 0.4873
features.1.conv.0.0.weight: 0.4549
features.1.conv.1.weight: 0.5000
features.2.conv.0.0.weight: 0.5430
features.2.conv.1.0.weight: 0.2812
features.2.conv.2.weight: 0.4787
features.3.conv.0.0.weight: 0.6042
features.3.conv.1.0.weight: 0.4846
features.3.conv.2.weight: 0.6042
features.4.conv.0.0.weight: 0.5119
features.4.conv.1.0.weight: 0.2546
features.4.conv.2.weight: 0.5067
features.5.conv.0.0.weight: 0.6763
features.5.conv.1.0.weight: 0.5758
features.5.conv.2.weight: 0.7051
features.6.conv.0.0.weight: 0.6816
features.6.conv.1.0.weight: 0.6042
features.6.conv.2.weight: 0.7293
features.7.conv.0.0.weight: 0.5369
features.7.conv.1.0.weight: 0.3455
features.7.conv.2.weight: 0.6029
features.8.conv.0.0.weight: 0.8276
features.8.conv.1.0.weight: 0.7341
features.8.conv.2.weight: 0.8612
features.9.conv.0.0.weight: 0.8167
features.9.conv.1.0.weight: 0.7277
fe

  0%|          | 0/33 [00:00<?, ?it/s]

Training Epoch: 10/13
Training Loss: 0.12266924277399525
Top 1 Acc: 0.9700193423597679

Running Validation Epoch 10/13


  0%|          | 0/5 [00:00<?, ?it/s]

Validation Epoch: 10/13
Val Loss: 0.104463392496109
Top 1 Acc: 0.9849624060150376



Checking Sparsity Level:
features.0.0.weight: 0.4919
features.1.conv.0.0.weight: 0.4618
features.1.conv.1.weight: 0.5098
features.2.conv.0.0.weight: 0.5475
features.2.conv.1.0.weight: 0.2951
features.2.conv.2.weight: 0.4852
features.3.conv.0.0.weight: 0.6137
features.3.conv.1.0.weight: 0.4915
features.3.conv.2.weight: 0.6126
features.4.conv.0.0.weight: 0.5171
features.4.conv.1.0.weight: 0.2639
features.4.conv.2.weight: 0.5163
features.5.conv.0.0.weight: 0.6842
features.5.conv.1.0.weight: 0.5856
features.5.conv.2.weight: 0.7145
features.6.conv.0.0.weight: 0.6914
features.6.conv.1.0.weight: 0.6146
features.6.conv.2.weight: 0.7373
features.7.conv.0.0.weight: 0.5449
features.7.conv.1.0.weight: 0.3640
features.7.conv.2.weight: 0.6121
features.8.conv.0.0.weight: 0.8355
features.8.conv.1.0.weight: 0.7399
features.8.conv.2.weight: 0.8680
features.9.conv.0.0.weight: 0.8251
features.9.conv.1.0.weight: 0.7355
fea

  0%|          | 0/33 [00:00<?, ?it/s]

Training Epoch: 11/13
Training Loss: 0.08148531587512205
Top 1 Acc: 0.9787234042553191

Running Validation Epoch 11/13


  0%|          | 0/5 [00:00<?, ?it/s]

Validation Epoch: 11/13
Val Loss: 0.07646273151040077
Top 1 Acc: 0.9774436090225563



Checking Sparsity Level:
features.0.0.weight: 0.4919
features.1.conv.0.0.weight: 0.4618
features.1.conv.1.weight: 0.5156
features.2.conv.0.0.weight: 0.5495
features.2.conv.1.0.weight: 0.2951
features.2.conv.2.weight: 0.4852
features.3.conv.0.0.weight: 0.6152
features.3.conv.1.0.weight: 0.4915
features.3.conv.2.weight: 0.6137
features.4.conv.0.0.weight: 0.5185
features.4.conv.1.0.weight: 0.2670
features.4.conv.2.weight: 0.5167
features.5.conv.0.0.weight: 0.6847
features.5.conv.1.0.weight: 0.5880
features.5.conv.2.weight: 0.7153
features.6.conv.0.0.weight: 0.6929
features.6.conv.1.0.weight: 0.6169
features.6.conv.2.weight: 0.7378
features.7.conv.0.0.weight: 0.5461
features.7.conv.1.0.weight: 0.3646
features.7.conv.2.weight: 0.6134
features.8.conv.0.0.weight: 0.8370
features.8.conv.1.0.weight: 0.7410
features.8.conv.2.weight: 0.8693
features.9.conv.0.0.weight: 0.8263
features.9.conv.1.0.weight: 0.7367
f

  0%|          | 0/33 [00:00<?, ?it/s]

Training Epoch: 12/13
Training Loss: 0.0749947436605439
Top 1 Acc: 0.9816247582205029

Running Validation Epoch 12/13


  0%|          | 0/5 [00:00<?, ?it/s]

Validation Epoch: 12/13
Val Loss: 0.07313197031617165
Top 1 Acc: 0.9774436090225563



Checking Sparsity Level:
features.0.0.weight: 0.4919
features.1.conv.0.0.weight: 0.4618
features.1.conv.1.weight: 0.5156
features.2.conv.0.0.weight: 0.5495
features.2.conv.1.0.weight: 0.2951
features.2.conv.2.weight: 0.4852
features.3.conv.0.0.weight: 0.6152
features.3.conv.1.0.weight: 0.4915
features.3.conv.2.weight: 0.6137
features.4.conv.0.0.weight: 0.5185
features.4.conv.1.0.weight: 0.2670
features.4.conv.2.weight: 0.5167
features.5.conv.0.0.weight: 0.6847
features.5.conv.1.0.weight: 0.5880
features.5.conv.2.weight: 0.7153
features.6.conv.0.0.weight: 0.6929
features.6.conv.1.0.weight: 0.6169
features.6.conv.2.weight: 0.7378
features.7.conv.0.0.weight: 0.5461
features.7.conv.1.0.weight: 0.3646
features.7.conv.2.weight: 0.6134
features.8.conv.0.0.weight: 0.8370
features.8.conv.1.0.weight: 0.7410
features.8.conv.2.weight: 0.8693
features.9.conv.0.0.weight: 0.8263
features.9.conv.1.0.weight: 0.7367
f

  0%|          | 0/33 [00:00<?, ?it/s]

Training Epoch: 13/13
Training Loss: 0.08235379680991173
Top 1 Acc: 0.97678916827853

Running Validation Epoch 13/13


  0%|          | 0/5 [00:00<?, ?it/s]

Validation Epoch: 13/13
Val Loss: 0.07470075637102128
Top 1 Acc: 0.9774436090225563



Checking Sparsity Level:
features.0.0.weight: 0.4919
features.1.conv.0.0.weight: 0.4618
features.1.conv.1.weight: 0.5156
features.2.conv.0.0.weight: 0.5495
features.2.conv.1.0.weight: 0.2951
features.2.conv.2.weight: 0.4852
features.3.conv.0.0.weight: 0.6152
features.3.conv.1.0.weight: 0.4915
features.3.conv.2.weight: 0.6137
features.4.conv.0.0.weight: 0.5185
features.4.conv.1.0.weight: 0.2670
features.4.conv.2.weight: 0.5167
features.5.conv.0.0.weight: 0.6847
features.5.conv.1.0.weight: 0.5880
features.5.conv.2.weight: 0.7153
features.6.conv.0.0.weight: 0.6929
features.6.conv.1.0.weight: 0.6169
features.6.conv.2.weight: 0.7378
features.7.conv.0.0.weight: 0.5461
features.7.conv.1.0.weight: 0.3646
features.7.conv.2.weight: 0.6134
features.8.conv.0.0.weight: 0.8370
features.8.conv.1.0.weight: 0.7410
features.8.conv.2.weight: 0.8693
features.9.conv.0.0.weight: 0.8263
features.9.conv.1.0.weight: 0.7367
f

In [33]:
from sparseml.pytorch.utils import ModuleExporter

save_dir = "experiment-0"
exporter = ModuleExporter(model, output_dir=save_dir)
exporter.export_pytorch(name="mobilenet_v2-sparse-experiment-0.pth")
exporter.export_onnx(torch.randn(1, 3, 224, 224), name="sparse-model.onnx", convert_qat=True)



In [34]:
checkpoint2 = torch.load("./dense_model/training/mobilenet-v2-dense-beans.pth")
model2 = torchvision.models.mobilenet_v2()
model2.classifier[1] = torch.nn.Linear(model.classifier[1].in_features, NUM_LABELS)
model2.load_state_dict(checkpoint['state_dict'])

<All keys matched successfully>

In [36]:
exporter = ModuleExporter(model2, output_dir="./test")
exporter.export_onnx(torch.randn(1, 3, 224, 224), name="dense-model.onnx", convert_qat=True)