## **Mount Google Drive And Load Deepsparse Engine**
Note: Restart kernel after completeing the `pip installs`


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip uninstall -y flask click

In [None]:
!pip install deepsparse

---

##**Setup a benchmarking exercise**

In [None]:
import numpy
from deepsparse import benchmark_model
from deepsparse.cpu import cpu_architecture

In [None]:
# check VNNI - don't have quantization
if cpu_architecture()["vnni"]:
    print("VNNI extensions detected, model will run with quantized speedups\n")
else:
    print(
        "WARNING: No VNNI extensions detected. Your model will not run with "
        "quantized speedups which will affect benchmarking\n"
    )




In [None]:
!lscpu

Architecture:        x86_64
CPU op-mode(s):      32-bit, 64-bit
Byte Order:          Little Endian
CPU(s):              2
On-line CPU(s) list: 0,1
Thread(s) per core:  2
Core(s) per socket:  1
Socket(s):           1
NUMA node(s):        1
Vendor ID:           GenuineIntel
CPU family:          6
Model:               79
Model name:          Intel(R) Xeon(R) CPU @ 2.20GHz
Stepping:            0
CPU MHz:             2199.998
BogoMIPS:            4399.99
Hypervisor vendor:   KVM
Virtualization type: full
L1d cache:           32K
L1i cache:           32K
L2 cache:            256K
L3 cache:            56320K
NUMA node0 CPU(s):   0,1
Flags:               fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_sin

In [13]:
BATCH_SIZE = 16
NUM_CORES = 2  # maximum number of cores available
NUM_ITERATIONS = 50
NUM_WARMUP_ITERATIONS = 20

In [14]:
def benchmark_imagenette_model(model_name, model_path):
    print(
        f"Benchmarking {model_name} for {NUM_ITERATIONS} iterations at batch "
        f"size {BATCH_SIZE} with {NUM_CORES} CPU cores"
    )

    # generates random data
    sample_input = [
        numpy.ascontiguousarray(
            numpy.random.randn(BATCH_SIZE, 3, 224, 224).astype(numpy.float32)
        )
    ]

    results = benchmark_model(
        model=model_path,
        inp=sample_input,
        batch_size=BATCH_SIZE,
        num_cores=NUM_CORES,
        num_iterations=NUM_ITERATIONS,
        num_warmup_iterations=NUM_WARMUP_ITERATIONS,
        show_progress=True,
    )
    print(f"results:\n{results}")
    return results

In [15]:
quantized_onnx_path = "/content/drive/MyDrive/nm/onboarding/pytorch_sparse_quantized_transfer_learning/resnet50_imagenette_pruned_quant.onnx"

# base ResNet-50 Imagenette model downloaded from SparseZoo
base_results = benchmark_imagenette_model(
    "ResNet-50 Imagenette Base",
    "zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenette/base-none"
)

optimized_results = benchmark_imagenette_model(
    "ResNet-50 Imagenette pruned-quantized", quantized_onnx_path
)

speed_up = base_results.ms_per_batch / optimized_results.ms_per_batch
print(f"Speed-up from sparse-quantized transfer learning: {speed_up}")

Benchmarking ResNet-50 Imagenette Base for 50 iterations at batch size 16 with 2 CPU cores


  0%|          | 0/50 [00:00<?, ?it/s]

results:
BenchmarkResults:
	items_per_second: 6.360972761482003
	ms_per_batch: 2515.3385496139526
	batch_times_mean: 2.5153385496139524
	batch_times_median: 2.474809169769287
	batch_times_std: 0.18760680151208822
Benchmarking ResNet-50 Imagenette pruned-quantized for 50 iterations at batch size 16 with 2 CPU cores


  0%|          | 0/50 [00:00<?, ?it/s]

results:
BenchmarkResults:
	items_per_second: 24.28960955147172
	ms_per_batch: 658.7178754806519
	batch_times_mean: 0.6587178754806519
	batch_times_median: 0.6160538196563721
	batch_times_std: 0.14224137377999602
Speed-up from sparse-quantized transfer learning: 3.8185369537430054
