# Metrics

In this notebook we test any remaining metrics we need need after training

## Results

### ResNet

1. Total Parameters: 466906
2. MACs: 34879808
3. Energy FP32: 0.160 mJ
4. Energy FP16: 0.038 mJ
5. Energy INT8: 0.007 mJ
6. Energy reduction FP32: 4.2×
7. Energy reduction INT8: 23.0×

### SqueezeNet

1. Total Parameters: 734986
2. MACs: 26637312
3. Energy FP32: 0.123 mJ
4. Energy FP16: 0.029 mJ
5. Energy INT8: 0.005 mJ
6. Energy reduction FP32: 4.2×
7. Energy reduction INT8: 23.0×

### AlexNet

1. Total Parameters: 1048330
2. MACs: 33598720
3. Energy FP32: 0.155 mJ
4. Energy FP16: 0.037 mJ
5. Energy INT8: 0.007 mJ
6. Energy reduction FP32: 4.2×
7. Energy reduction INT8: 23.0×

## Energy vs Precision

In [4]:
import torch

import helper
from squeezenet_model import SqueezeNetCIFAR10, SqueezeNetCIFAR10_QAT
from alexnet_model import AlexNetCIFAR10, AlexNetCIFAR10_QAT
from resnet32_model import ResNet, ResNetQAT

In [34]:
mname = "squeezenet"
# mname = "alexnet"
# mname = "resnet"

if mname == "squeezenet":
    get_model = SqueezeNetCIFAR10
    get_model_qat = SqueezeNetCIFAR10_QAT
elif mname == "alexnet":
    get_model = AlexNetCIFAR10
    get_model_qat = AlexNetCIFAR10_QAT
elif mname == "resnet":
    get_model = ResNet
    get_model_qat = ResNetQAT

In [35]:
def estimate_energy(macs, precision="fp32"):
    energy_per_mac = {
        "fp32": 4.6e-12,
        "fp16": 1.1e-12,
        "int8": 0.2e-12
    }
    return macs * energy_per_mac[precision]  # Joules

def compute_sparsity(model):
    total = 0
    zeros = 0
    for m in model.modules():
        if isinstance(m, (torch.nn.Conv2d, torch.nn.Linear)):
            w = m.weight.detach()
            total += w.numel()
            zeros += (w == 0).sum().item()
    return zeros / total

In [None]:
from fvcore.nn import FlopCountAnalysis

def get_metrics(mname="squeezenet"):
    print(f"{mname} Metrics")

    model = get_model()
    model.load_model(f"../pth/{mname}_fp32.pth", device='cpu')
    x = torch.randn(1, 3, 32, 32)

    # total parameters
    total_params = sum(p.numel() for p in model.parameters())
    print(f"Total Parameters: {total_params}")

    # total MACs
    flops = FlopCountAnalysis(model, x)
    macs = flops.total() // 2  # FLOPs = 2 × MACs
    print(f"MACs: {macs}")

    # energy vs precision
    energy_fp32 = estimate_energy(macs, "fp32")
    energy_fp16 = estimate_energy(macs, "fp16")
    energy_int8 = estimate_energy(macs, "int8")
    print(f"Energy FP32: {energy_fp32*1e3:.3f} mJ")
    print(f"Energy FP16: {energy_fp16*1e3:.3f} mJ")
    print(f"Energy INT8: {energy_int8*1e3:.3f} mJ")
    print(f"Energy reduction FP32: {energy_fp32 / energy_fp16:.1f}×")
    print(f"Energy reduction INT8: {energy_fp32 / energy_int8:.1f}×")

    #
    fnames = [f"{mname}_10", f"{mname}_30", f"{mname}_50", f"{mname}_70"]

    for fn in fnames:
        model_pruned = get_model()
        model_pruned.load_model(f"../pth/{fn}.pth", device='cpu')
        sparsity = compute_sparsity(model_pruned)
        eff_macs = macs * (1 - sparsity)
        energy = estimate_energy(eff_macs,"fp32")
        print(f"Sparsity: {sparsity*100:.1f}%")
        print(f"Energy: {energy*1e3:.3f}")

get_metrics(mname)

Unsupported operator aten::max_pool2d encountered 3 time(s)
Unsupported operator aten::avg_pool2d encountered 1 time(s)


squeezenet Metrics
Model loaded from ../pth/squeezenet_fp32.pth
Total Parameters: 734986
MACs: 26637312
Energy FP32: 0.123 mJ
Energy FP16: 0.029 mJ
Energy INT8: 0.005 mJ
Energy reduction FP32: 4.2×
Energy reduction INT8: 23.0×
Model loaded from ../pth/squeezenet_10.pth
Sparsity: 10.0%
Energy: 0.005
Model loaded from ../pth/squeezenet_30.pth
Sparsity: 30.0%
Energy: 0.004
Model loaded from ../pth/squeezenet_50.pth
Sparsity: 50.0%
Energy: 0.003
Model loaded from ../pth/squeezenet_70.pth
Sparsity: 70.0%
Energy: 0.002


In [14]:
from fvcore.nn import FlopCountAnalysis

model = get_model()
model.load_model(f"../pth/{mname}_fp32.pth", device='cpu')
x = torch.randn(1, 3, 32, 32)

flops = FlopCountAnalysis(model, x)
macs = flops.total() // 2  # FLOPs = 2 × MACs

print(f"MACs: {macs}")

Unsupported operator aten::max_pool2d encountered 3 time(s)
Unsupported operator aten::avg_pool2d encountered 1 time(s)


Model loaded from ../pth/squeezenet_fp32.pth
MACs: 26637312


In [15]:
def estimate_energy(macs, precision="fp32"):
    energy_per_mac = {
        "fp32": 4.6e-12,
        "fp16": 1.1e-12,
        "int8": 0.2e-12
    }
    return macs * energy_per_mac[precision]  # Joules

energy_fp32 = estimate_energy(macs, "fp32")
energy_fp16 = estimate_energy(macs, "fp16")
energy_int8 = estimate_energy(macs, "int8")

print(f"Energy FP32: {energy_fp32*1e3:.3f} mJ")
print(f"Energy FP16: {energy_fp16*1e3:.3f} mJ")
print(f"Energy INT8: {energy_int8*1e3:.3f} mJ")
print(f"Energy reduction FP32: {energy_fp32 / energy_fp16:.1f}×")
print(f"Energy reduction INT8: {energy_fp32 / energy_int8:.1f}×")

Energy FP32: 0.123 mJ
Energy FP16: 0.029 mJ
Energy INT8: 0.005 mJ
Energy reduction FP32: 4.2×
Energy reduction INT8: 23.0×


## Energy vs Sparsity

In [None]:

mtitle = "ResNet"
resnet_results = [{
    "name": f"{mtitle} FP32",
    "accuracy": 0,
    "energy": 0.123,
    "batch = 1 throughput": 3256.7,
    "batch = 64 throughput": 125515.1,
    "batch = 128 throughput": 176338,
    "batch = 1 latency": .307,
    "batch = 64 latency": .510,
    "batch = 128 latency": .726,
    "batch = 1 engine size": 2.0M,
    "batch = 64 engine size": 2.1M,
    "batch = 128 engine size": 2.1M,
    "batch = 1 accuracy": 89.74,
    "batch = 64 accuracy": 89.74,
    "batch = 128 accuracy": 89.74
    },
    {
        "name": f"{mtitle} FP16",
        "accuracy": 0,
        "energy": 0.029
        "batch = 1 throughput": 4069.9,
        "batch = 64 throughput": 195238.1,
        "batch = 128 throughput": 293951,
        "batch = 1 latency": .246,
        "batch = 64 latency": .328,
        "batch = 128 latency": .435,
        "batch = 1 engine size": 1.2M,
        "batch = 64 engine size": 1.2M,
        "batch = 128 engine size": 1.2M,
        "batch = 1 accuracy": 89.75,
        "batch = 64 accuracy": 89.74,
        "batch = 128 accuracy": 89.74
    },
    {
        "name": f"{mtitle} INT8",
        "accuracy": 0,
        "energy": 0.005,
        "batch = 1 throughput": 5264.5,
        "batch = 64 throughput": 209156.0,
        "batch = 128 throughput": 306946.6,
        "batch = 1 latency": .190,
        "batch = 64 latency": .306,
        "batch = 128 latency": .417,
        "batch = 1 engine size": .971M,
        "batch = 64 engine size": .821M,
        "batch = 128 engine size": .795M,
        "batch = 1 accuracy": 89.75,
        "batch = 64 accuracy": 89.74,
        "batch = 128 accuracy": 89.74
    },
    {
        "name": f"{mtitle} FP32 Pruned 10%",
        "accuracy": 0,
        "energy": 0,
        "batch = 1 throughput": 3229.6,
        "batch = 64 throughput": 125688.3,
        "batch = 128 throughput": 176323.8,
        "batch = 1 latency": .310,
        "batch = 64 latency": .509,
        "batch = 128 latency": .726,
        "batch = 1 engine size": 2.0M,
        "batch = 64 engine size": 2.1M,
        "batch = 128 engine size": 2.1M,
        "batch = 1 accuracy": 90.38,
        "batch = 64 accuracy": 90.37,
        "batch = 128 accuracy": 90.38
    },
    {
        "name": f"{mtitle} FP32 Pruned 30%",
        "accuracy": 0,
        "energy": 0,
        "batch = 1 throughput": 3197.4,
        "batch = 64 throughput": 124203.4,
        "batch = 128 throughput": 174183.0,
        "batch = 1 latency": .313,
        "batch = 64 latency": .515,
        "batch = 128 latency": .735,
        "batch = 1 engine size": 2.0M,
        "batch = 64 engine size": 2.1M,
        "batch = 128 engine size": 2.1M,
        "batch = 1 accuracy": 89.16,
        "batch = 64 accuracy": 89.18,
        "batch = 128 accuracy": 89.18
    },
    {
        "name": f"{mtitle} FP32 Pruned 50%",
        "accuracy": 0,
        "energy": 0,
        "batch = 1 throughput": 3346.9,
        "batch = 64 throughput": 127108.7,
        "batch = 128 throughput": 176151.6,
        "batch = 1 latency": .299,
        "batch = 64 latency": .504,
        "batch = 128 latency": .727,
        "batch = 1 engine size": 2.0M,
        "batch = 64 engine size": 2.1M,
        "batch = 128 engine size": 2.1M,
        "batch = 1 accuracy": 90.86,
        "batch = 64 accuracy": 90.86,
        "batch = 128 accuracy": 90.87
    },
    {
        "name": f"{mtitle} FP32 Pruned 70%",
        "accuracy": 0,
        "energy": 0,
        "batch = 1 throughput": 3284.5,
        "batch = 64 throughput": 127111.6,
        "batch = 128 throughput": 175969.6,
        "batch = 1 latency": .304,
        "batch = 64 latency": .503,
        "batch = 128 latency": .727,
        "batch = 1 engine size": 2.0M,
        "batch = 64 engine size": 2.1M,
        "batch = 128 engine size": 2.1M,
        "batch = 1 accuracy": 91.3,
        "batch = 64 accuracy": 91.31,
        "batch = 128 accuracy": 91.31
    },
    {
        "name": f"{mtitle} Pruned and Quantized",
        "accuracy": 0,
        "energy": 0,
        "batch = 1 throughput": 2658.1,
        "batch = 64 throughput": 123880.1,
        "batch = 128 throughput": 210303.2,
        "batch = 1 latency": .376,
        "batch = 64 latency": .517,
        "batch = 128 latency": .609,
        "batch = 1 engine size": .981M,
        "batch = 64 engine size": .833M,
        "batch = 128 engine size": .808M,
        "batch = 1 accuracy": 90.00,
        "batch = 64 accuracy": 88.66,
        "batch = 128 accuracy": 88.67
    },
]

In [None]:

mtitle = "SqueezeNet"
squeezenet_results = [{
    "name": f"{mtitle} FP32",
    "accuracy": 89.20,
    "energy": 0.123,
    "batch = 1 throughput": 4349.5,
    "batch = 64 throughput": 140871.4,
    "batch = 128 throughput": 173614.3,
    "batch = 1 latency": .230,
    "batch = 64 latency": .454,
    "batch = 128 latency": .737,
    "batch = 1 engine size": 3.1M,
    "batch = 64 engine size": 3.4M,
    "batch = 128 engine size": 3.3M,
    "batch = 1 accuracy": 89.21,
    "batch = 64 accuracy": 89.19,
    "batch = 128 accuracy": 89.19 
    },
    {
        "name": f"{mtitle} FP16",
        "accuracy": 0,
        "energy": 0.029,
        "batch = 1 throughput": 5050.5,
        "batch = 64 throughput": 210439.8,
        "batch = 128 throughput": 312212.0,
        "batch = 1 latency": .198,
        "batch = 64 latency": .304,
        "batch = 128 latency": .410,
        "batch = 1 engine size": 1.8M,
        "batch = 64 engine size": 1.9M,
        "batch = 128 engine size": 1.9M,
        "batch = 1 accuracy": 89.23,
        "batch = 64 accuracy": 89.20,
        "batch = 128 accuracy": 89.21
    },
    {
        "name": f"{mtitle} INT8",
        "accuracy": 0,
        "energy": 0.005,
        "batch = 1 throughput": 3968.1,
        "batch = 64 throughput": 228469.1,
        "batch = 128 throughput": 356741.3,
        "batch = 1 latency": .252,
        "batch = 64 latency": .280,
        "batch = 128 latency": .359,
        "batch = 1 engine size": 1.2M,
        "batch = 64 engine size": 1.4M,
        "batch = 128 engine size": 1.3M,
        "batch = 1 accuracy": 89.13,
        "batch = 64 accuracy": 89.22,
        "batch = 128 accuracy": 89.18
    },
    {
        "name": f"{mtitle} FP32 Pruned 10%",
        "accuracy": 88.18,
        "energy": 0.110,
        "batch = 1 throughput": 4249.6,
        "batch = 64 throughput": 138965.2,
        "batch = 128 throughput": 172100.4,
        "batch = 1 latency": .235,
        "batch = 64 latency": .461,
        "batch = 128 latency": .744,
        "batch = 1 engine size": 3.1M,
        "batch = 64 engine size": 3.4M,
        "batch = 128 engine size": 3.3M,
        "batch = 1 accuracy": 88.2,
        "batch = 64 accuracy": 88.2,
        "batch = 128 accuracy": 88.2
    },
    {
        "name": f"{mtitle} FP32 Pruned 30%",
        "accuracy": 89.20,
        "energy": 0.086,
        "batch = 1 throughput": 4321.8,
        "batch = 64 throughput": 139816.6,
        "batch = 128 throughput": 171848.2,
        "batch = 1 latency": .231,
        "batch = 64 latency": .458,
        "batch = 128 latency": .745,
        "batch = 1 engine size": 3.1M,
        "batch = 64 engine size": 3.4M,
        "batch = 128 engine size": 3.4M,
        "batch = 1 accuracy": 88.97,
        "batch = 64 accuracy": 88.98,
        "batch = 128 accuracy": 88.98
    },
    {
        "name": f"{mtitle} FP32 Pruned 50%",
        "accuracy": 88.97,
        "energy": 0.061,
        "batch = 1 throughput": 4235.8,
        "batch = 64 throughput": 139790.1,
        "batch = 128 throughput": 171286.6,
        "batch = 1 latency": .236,
        "batch = 64 latency": .458,
        "batch = 128 latency": .747,
        "batch = 1 engine size": 3.1M,
        "batch = 64 engine size": 3.4M,
        "batch = 128 engine size": 3.4M,
        "batch = 1 accuracy": 89.18,
        "batch = 64 accuracy": 89.19,
        "batch = 128 accuracy": 89.19
    },
    {
        "name": f"{mtitle} FP32 Pruned 70%",
        "accuracy": 89.66,
        "energy": 0.037,
        "batch = 1 throughput": 4421.9,
        "batch = 64 throughput": 139949.1,
        "batch = 128 throughput": 172244.6,
        "batch = 1 latency": .226,
        "batch = 64 latency": .457,
        "batch = 128 latency": .743,
        "batch = 1 engine size": 3.1M,
        "batch = 64 engine size": 3.4M,
        "batch = 128 engine size": 3.4M,
        "batch = 1 accuracy": 89.71,
        "batch = 64 accuracy": 89.72,
        "batch = 128 accuracy": 89.71
    },
    {
        "name": f"{mtitle} Pruned and Quantized",
        "accuracy": 88.71,
        "energy": 0.002,
        "batch = 1 throughput": 2711.8,
        "batch = 64 throughput": 130063.4,
        "batch = 128 throughput": 220532.5,
        "batch = 1 latency": .369,
        "batch = 64 latency": .492,
        "batch = 128 latency": .580,
        "batch = 1 engine size": 1.2M,
        "batch = 64 engine size": 1.4M,
        "batch = 128 engine size": 1.3M,
        "batch = 1 accuracy": 96.00,
        "batch = 64 accuracy": 88.59,
        "batch = 128 accuracy": 88.42
    },
]

In [None]:

mtitle = "AlexNet"

alexnet_results = [{
    "name": f"{mtitle} FP32",
    "accuracy": 0,
    "energy": 0.155,
    "batch = 1 throughput": 10807.8,
    "batch = 64 throughput": 311812.4,
    "batch = 128 throughput": 389893.9,
    "batch = 1 latency": .093,
    "batch = 64 latency": .205,
    "batch = 128 latency": .328,
    "batch = 1 engine size": 4.2M,
    "batch = 64 engine size": 4.2M,
    "batch = 128 engine size": 4.3M,
    "batch = 1 accuracy": 88.13,
    "batch = 64 accuracy": 88.13,
    "batch = 128 accuracy": 88.13
    },
    {
        "name": f"{mtitle} FP16",
        "accuracy": 0,
        "energy": 0.037,
        "batch = 1 throughput": 14351.5,
        "batch = 64 throughput": 505405.8,
        "batch = 128 throughput": 732974.5,
        "batch = 1 latency": .070,
        "batch = 64 latency": .127,
        "batch = 128 latency": .175,
        "batch = 1 engine size": 2.2M,
        "batch = 64 engine size": 2.2M,
        "batch = 128 engine size": 2.2M,
        "batch = 1 accuracy": 88.13,
        "batch = 64 accuracy": 88.14,
        "batch = 128 accuracy": 88.14
    },
    {
        "name": f"{mtitle} INT8",
        "accuracy": 0,
        "energy": 0.007,
        "batch = 1 throughput": 9867.3,
        "batch = 64 throughput": 573905.2,
        "batch = 128 throughput": 1062157.5,
        "batch = 1 latency": .101,
        "batch = 64 latency": .112,
        "batch = 128 latency": .121,
        "batch = 1 engine size": 1.5M,
        "batch = 64 engine size": 1.2M,
        "batch = 128 engine size": 1.2M,
        "batch = 1 accuracy": 88.17,
        "batch = 64 accuracy": 88.13,
        "batch = 128 accuracy": 88.16
    },
    {
        "name": f"{mtitle} FP32 Pruned 10%",
        "accuracy": 87.43,
        "energy": 0.139,
        "batch = 1 throughput": 10943.1,
        "batch = 64 throughput": 315314.2,
        "batch = 128 throughput": 389125.6,
        "batch = 1 latency": .091,
        "batch = 64 latency": .203,
        "batch = 128 latency": .329,
        "batch = 1 engine size": 4.2M,
        "batch = 64 engine size": 4.2M,
        "batch = 128 engine size": 4.2M,
        "batch = 1 accuracy": 88.58,
        "batch = 64 accuracy": 88.57,
        "batch = 128 accuracy": 88.57
    },
    {
        "name": f"{mtitle} FP32 Pruned 30%",
        "accuracy": 88.55,
        "energy": 0.108,
        "batch = 1 throughput": 10902.2,
        "batch = 64 throughput": 311635.2,
        "batch = 128 throughput": 388853.3,
        "batch = 1 latency": .092,
        "batch = 64 latency": .205,
        "batch = 128 latency": .329,
        "batch = 1 engine size": 4.2M,
        "batch = 64 engine size": 4.2M,
        "batch = 128 engine size": 4.3M,
        "batch = 1 accuracy": 88.49,
        "batch = 64 accuracy": 88.48,
        "batch = 128 accuracy": 88.48
    },
    {
        "name": f"{mtitle} FP32 Pruned 50%",
        "accuracy": 88.44,
        "energy": 0.077,
        "batch = 1 throughput": 10961.5,
        "batch = 64 throughput": 314282.0,
        "batch = 128 throughput": 390365.2,
        "batch = 1 latency": .091,
        "batch = 64 latency": .204,
        "batch = 128 latency": .328,
        "batch = 1 engine size": 4.1M,
        "batch = 64 engine size": 4.2M,
        "batch = 128 engine size": 4.2M,
        "batch = 1 accuracy": 87.98,
        "batch = 64 accuracy": 87.97,
        "batch = 128 accuracy": 87.97
    },
    {
        "name": f"{mtitle} FP32 Pruned 70%",
        "accuracy": 88.94,
        "energy": 0.046,
        "batch = 1 throughput": 11098.6,
        "batch = 64 throughput": 313402.6,
        "batch = 128 throughput": 390450.5,
        "batch = 1 latency": .090,
        "batch = 64 latency": .204,
        "batch = 128 latency": .328,
        "batch = 1 engine size": 4.2M,
        "batch = 64 engine size": 4.2M,
        "batch = 128 engine size": 4.2M,
        "batch = 1 accuracy": 88.27,
        "batch = 64 accuracy": 88.27,
        "batch = 128 accuracy": 88.27
    },
    {
        "name": f"{mtitle} Pruned 70% and Quantized",
        "accuracy": 76.95,
        "energy": 0.002,
        "batch = 1 throughput": 7384.2,
        "batch = 64 throughput": 324801.9,
        "batch = 128 throughput": 527815.9,
        "batch = 1 latency": .135,
        "batch = 64 latency": .197,
        "batch = 128 latency": .243,
        "batch = 1 engine size": 1.5M,
        "batch = 64 engine size": 1.2M,
        "batch = 128 engine size": 1.2M,
        "batch = 1 accuracy": 96.00,
        "batch = 64 accuracy": 88.66,
        "batch = 128 accuracy": 86.84
    },
]