In [60]:
from ultralytics import YOLO
import torch
from torch.nn.utils import prune

In [56]:
weights_path = '/usr/src/ultralytics/runs/classify/train/weights/best.pt'

model = YOLO(weights_path)

In [3]:
metrics = model.val(data='imagewoof')

Ultralytics YOLOv8.0.176 🚀 Python-3.10.11 torch-2.0.1 CUDA:0 (NVIDIA A100 80GB PCIe, 81093MiB)
YOLOv8l-cls summary (fused): 133 layers, 36197386 parameters, 0 gradients, 98.7 GFLOPs
[34m[1mtrain:[0m /usr/src/datasets/imagewoof/train... found 9025 images in 10 classes ✅ 
[34m[1mval:[0m /usr/src/datasets/imagewoof/val... found 3929 images in 10 classes ✅ 
[34m[1mtest:[0m None...
[34m[1mval: [0mScanning /usr/src/datasets/imagewoof/val... 3929 images, 0 corrupt: 100%|██████████| 3929/3929 [00:00<?, ?it/s][0m
               classes   top1_acc   top5_acc: 100%|██████████| 246/246 [00:02<00:00, 83.67it/s] 
                   all      0.925      0.995
Speed: 0.1ms preprocess, 0.4ms inference, 0.0ms loss, 0.0ms postprocess per image
Results saved to [1m/usr/src/ultralytics/runs/classify/val2[0m


In [13]:
print(f'Top1 accuracy {metrics.top1:.3f}')
print(f'Top1 accuracy {metrics.top5:.3f}')
for stage, time in metrics.speed.items():
    print(f'{stage} speed {time:.3f} ms')

Top1 accuracy 0.925
Top1 accuracy 0.995
preprocess speed 0.088 ms
inference speed 0.410 ms
loss speed 0.001 ms
postprocess speed 0.001 ms


In [16]:
def check_model_size(model):
    buffer_size, param_size = 0, 0
    for param in model.parameters():
        param_size += param.nelement() * param.element_size()

    for buffer in model.buffers():
        buffer_size += buffer.nelement() * buffer.element_size()

    size_all_mb = (param_size + buffer_size) / 1024**2

    return size_all_mb

In [57]:
check_model_size(model.model)

138.25603485107422

# Quantization

In [48]:
model_int8 = torch.ao.quantization.quantize_dynamic(
    model.model,  # the original model
    {torch.nn.Conv2d},  # a set of layers to dynamically quantize
    dtype=torch.qint8)

In [49]:
ckpt = {
        'model': model_int8,
        'train_args': {},  # save as dict
}

print(f'Quant model size: {check_model_size(model_int8)}')

torch.save(ckpt, './model_quant.pt')

Quant model size: 138.25603485107422


In [50]:
quant_model = YOLO('./model_quant.pt')
quant_model.task = 'classify'

In [51]:
quant_model_metrics = quant_model.val(data='imagewoof')

Ultralytics YOLOv8.0.176 🚀 Python-3.10.11 torch-2.0.1 CUDA:0 (NVIDIA A100 80GB PCIe, 81093MiB)
YOLOv8l-cls summary (fused): 133 layers, 36197386 parameters, 0 gradients, 98.7 GFLOPs
[34m[1mtrain:[0m /usr/src/datasets/imagewoof/train... found 9025 images in 10 classes ✅ 
[34m[1mval:[0m /usr/src/datasets/imagewoof/val... found 3929 images in 10 classes ✅ 
[34m[1mtest:[0m None...
[34m[1mval: [0mScanning /usr/src/datasets/imagewoof/val... 3929 images, 0 corrupt: 100%|██████████| 3929/3929 [00:00<?, ?it/s][0m
               classes   top1_acc   top5_acc: 100%|██████████| 246/246 [00:15<00:00, 16.27it/s]
                   all      0.707      0.974
Speed: 0.5ms preprocess, 2.1ms inference, 0.0ms loss, 0.0ms postprocess per image
Results saved to [1m/usr/src/ultralytics/runs/detect/val6[0m


In [52]:
print(f'Top1 accuracy {quant_model_metrics.top1:.3f}')
print(f'Top1 accuracy {quant_model_metrics.top5:.3f}')
for stage, time in quant_model_metrics.speed.items():
    print(f'{stage} speed {time:.3f} ms')

Top1 accuracy 0.707
Top1 accuracy 0.974
preprocess speed 0.522 ms
inference speed 2.120 ms
loss speed 0.001 ms
postprocess speed 0.001 ms


In [54]:
check_model_size(model.model)

138.25603485107422

# Pruning

In [58]:
def sparsity(model):
    # Return global model sparsity
    a, b = 0, 0
    for p in model.parameters():
        a += p.numel()
        b += (p == 0).sum()
    return b / a

In [61]:
for name, m in model.model.named_modules():
    if isinstance(m, torch.nn.Conv2d) or isinstance(m, torch.nn.Linear):
        prune.ln_structured(m, name='weight', amount=0.05, n=2, dim=0)  # prune
        prune.remove(m, 'weight')  # make permanent
print(f'Model pruned to {sparsity(model.model):.3g} global sparsity')

Model pruned to 0.0502 global sparsity


In [62]:
ckpt = {
    'model': model.model,
    'train_args': {},  # save as dict
}

print(f'Pruned model size: {check_model_size(model.model)}')

torch.save(ckpt, './model_pruned.pt')

pruned_model = YOLO("./model_pruned.pt")

Pruned model size: 138.25603485107422


In [65]:
pruned_model.task = 'classify'

In [67]:
pruned_model_metrics = pruned_model.val(data='imagewoof')

Ultralytics YOLOv8.0.176 🚀 Python-3.10.11 torch-2.0.1 CUDA:0 (NVIDIA A100 80GB PCIe, 81093MiB)
[34m[1mtrain:[0m /usr/src/datasets/imagewoof/train... found 9025 images in 10 classes ✅ 
[34m[1mval:[0m /usr/src/datasets/imagewoof/val... found 3929 images in 10 classes ✅ 
[34m[1mtest:[0m None...
[34m[1mval: [0mScanning /usr/src/datasets/imagewoof/val... 3929 images, 0 corrupt: 100%|██████████| 3929/3929 [00:00<?, ?it/s][0m
               classes   top1_acc   top5_acc: 100%|██████████| 246/246 [00:16<00:00, 15.29it/s]
                   all       0.63      0.952
Speed: 0.6ms preprocess, 2.1ms inference, 0.0ms loss, 0.0ms postprocess per image
Results saved to [1m/usr/src/ultralytics/runs/detect/val8[0m


In [68]:
print(f'Top1 accuracy {pruned_model_metrics.top1:.3f}')
print(f'Top1 accuracy {pruned_model_metrics.top5:.3f}')
for stage, time in pruned_model_metrics.speed.items():
    print(f'{stage} speed {time:.3f} ms')

Top1 accuracy 0.630
Top1 accuracy 0.952
preprocess speed 0.631 ms
inference speed 2.086 ms
loss speed 0.002 ms
postprocess speed 0.001 ms
