In [None]:
import torch
import torch.quantization
from ultralytics import YOLO

# Load your pre-trained model
model = YOLO('AlertCycleV5.pt')
model.eval()

# Define the quantization configuration
# We use dynamic quantization here, which is well-suited for CPUs (including ARM)
model_quantized = torch.quantization.quantize_dynamic(
    model, {torch.nn.Linear}, dtype=torch.qint8
)

# Save the quantized model
torch.save(model_quantized, 'AlertCycleV5_quantized.pt')
print("Model quantized and saved as 'AlertCycleV5_quantized.pt'")


Model quantized and saved as 'AlertCycleV6_quantized.pt'


In [None]:
from torch.nn.utils import prune

# Function to prune the model
def prune_model(model, amount=0.3):
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Conv2d) or isinstance(module, torch.nn.Linear):
            prune.l1_unstructured(module, name='weight', amount=amount)
    return model

# Prune the model (with 30% of the weights pruned)
model_pruned = prune_model(model_quantized, amount=0.3)

# Save the pruned model
torch.save(model_pruned, 'AlertCycleV5_pruned.pt')
print("Model pruned and saved as 'AlertCycleV5_pruned.pt'")


Model pruned and saved as 'AlertCycleV6_pruned.pt'
