### Install the required library

In [1]:
!pip3 install qai-hub

Collecting qai-hub
  Downloading qai_hub-0.27.0-py3-none-any.whl (102 kB)
     ---------------------------------------- 0.0/102.2 kB ? eta -:--:--
     ---------------------------------------- 102.2/102.2 kB ? eta 0:00:00
Collecting prettytable>=3.9.0
  Downloading prettytable-3.16.0-py3-none-any.whl (33 kB)
Collecting backoff>=2.2
  Using cached backoff-2.2.1-py3-none-any.whl (15 kB)
Collecting numpy<2,>=1.22.0
  Downloading numpy-1.26.4-cp310-cp310-win_amd64.whl (15.8 MB)
     ---------------------------------------- 0.0/15.8 MB ? eta -:--:--
     - -------------------------------------- 0.5/15.8 MB 15.9 MB/s eta 0:00:01
     -- ------------------------------------- 1.0/15.8 MB 12.9 MB/s eta 0:00:02
     ---- ----------------------------------- 1.8/15.8 MB 16.4 MB/s eta 0:00:01
     ------- -------------------------------- 2.9/15.8 MB 18.3 MB/s eta 0:00:01
     --------- ------------------------------ 3.9/15.8 MB 17.9 MB/s eta 0:00:01
     ----------- ---------------------------- 4.7


[notice] A new release of pip is available: 23.0.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


### Put the API token from the online dashboard

In [2]:
!qai-hub configure --api_token TOKEN

qai-hub configuration saved to C:\Users\m/.qai_hub/client.ini
[api]
api_token = TOKEN
api_url = https://app.aihub.qualcomm.com
web_url = https://app.aihub.qualcomm.com
verbose = True






In [4]:
!pip3 install "qai-hub[torch]"

Collecting torch>=1.13
  Downloading torch-2.6.0-cp310-cp310-win_amd64.whl (204.2 MB)
     ---------------------------------------- 0.0/204.2 MB ? eta -:--:--
     --------------------------------------- 0.3/204.2 MB 18.6 MB/s eta 0:00:11
     --------------------------------------- 0.9/204.2 MB 14.1 MB/s eta 0:00:15
     --------------------------------------- 1.7/204.2 MB 13.7 MB/s eta 0:00:15
     --------------------------------------- 2.5/204.2 MB 14.5 MB/s eta 0:00:14
      -------------------------------------- 3.2/204.2 MB 15.7 MB/s eta 0:00:13
      -------------------------------------- 4.1/204.2 MB 17.6 MB/s eta 0:00:12
      -------------------------------------- 4.9/204.2 MB 18.5 MB/s eta 0:00:11
     - ------------------------------------- 5.3/204.2 MB 17.8 MB/s eta 0:00:12
     - ------------------------------------- 6.2/204.2 MB 18.0 MB/s eta 0:00:11
     - ------------------------------------- 7.7/204.2 MB 18.9 MB/s eta 0:00:11
     - ----------------------------------


[notice] A new release of pip is available: 23.0.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


### Run below the mobilenet v2 model which is optimized for Snapdragon X Elite CRD. You can change the model type and device to your liking. After the job is triggered after executing below, you can see the details about the execution on the Qualcomm dashboard which also shows the model architecture diagram (quite cool!)

In [5]:
import qai_hub as hub
import torch
from torchvision.models import mobilenet_v2
import requests
import numpy as np
from PIL import Image

# Using pre-trained MobileNet
torch_model = mobilenet_v2(pretrained=True)
torch_model.eval()

# Step 1: Trace model
input_shape = (1, 3, 224, 224)
example_input = torch.rand(input_shape)
traced_torch_model = torch.jit.trace(torch_model, example_input)

# Step 2: Compile model
compile_job = hub.submit_compile_job(
    model=traced_torch_model,
    device=hub.Device("Snapdragon X Elite CRD"),
    input_specs=dict(image=input_shape),
    options="--target_runtime onnx",
)

# Step 3: Profile on cloud-hosted device
target_model = compile_job.get_target_model()
profile_job = hub.submit_profile_job(
    model=target_model,
    device=hub.Device("Snapdragon X Elite CRD"),
)

# Step 4: Run inference on cloud-hosted device
sample_image_url = (
    "https://qaihub-public-assets.s3.us-west-2.amazonaws.com/apidoc/input_image1.jpg"
)
response = requests.get(sample_image_url, stream=True)
response.raw.decode_content = True
image = Image.open(response.raw).resize((224, 224))
input_array = np.expand_dims(
    np.transpose(np.array(image, dtype=np.float32) / 255.0, (2, 0, 1)), axis=0
)

# Run inference using the on-device model on the input image
inference_job = hub.submit_inference_job(
    model=target_model,
    device=hub.Device("Snapdragon X Elite CRD"),
    inputs=dict(image=[input_array]),
)
on_device_output = inference_job.download_output_data()

# Step 5: Post-processing the on-device output
output_name = list(on_device_output.keys())[0]
out = on_device_output[output_name][0]
on_device_probabilities = np.exp(out) / np.sum(np.exp(out), axis=1)

# Read the class labels for imagenet
sample_classes = "https://qaihub-public-assets.s3.us-west-2.amazonaws.com/apidoc/imagenet_classes.txt"
response = requests.get(sample_classes, stream=True)
response.raw.decode_content = True
categories = [str(s.strip()) for s in response.raw]

# Print top five predictions for the on-device model
print("Top-5 On-Device predictions:")
top5_classes = np.argsort(on_device_probabilities[0], axis=0)[-5:]
for c in reversed(top5_classes):
    print(f"{c} {categories[c]:20s} {on_device_probabilities[0][c]:>6.1%}")

# Step 6: Download model
target_model = compile_job.get_target_model()
target_model.download("mobilenet_v2.onnx")


Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to C:\Users\m/.cache\torch\hub\checkpoints\mobilenet_v2-b0353104.pth
100%|██████████| 13.6M/13.6M [00:00<00:00, 31.7MB/s]


Uploading tmp5og0lqd5.pt


100%|[34m██████████[0m| 13.9M/13.9M [00:02<00:00, 5.43MB/s]


Scheduled compile job (j56ee436g) successfully. To see the status and results:
    https://app.aihub.qualcomm.com/jobs/j56ee436g/

Waiting for compile job (j56ee436g) completion. Type Ctrl+C to stop waiting at any time.
    ✅ SUCCESS                          
Scheduled profile job (jp3vv043g) successfully. To see the status and results:
    https://app.aihub.qualcomm.com/jobs/jp3vv043g/



Uploading dataset: 154kB [00:00, 670kB/s]                    <?, ?B/s]


Scheduled inference job (jgokk61qp) successfully. To see the status and results:
    https://app.aihub.qualcomm.com/jobs/jgokk61qp/

Waiting for inference job (jgokk61qp) completion. Type Ctrl+C to stop waiting at any time.
    ✅ SUCCESS                          


tmp_m09pwup.h5: 100%|[34m██████████[0m| 14.5k/14.5k [00:00<?, ?B/s]


Top-5 On-Device predictions:
968 b'cup'                71.3%
504 b'coffee mug'         16.4%
967 b'espresso'            7.8%
809 b'soup bowl'           1.4%
659 b'mixing bowl'         1.3%


mobilenet_v2.onnx: 100%|[34m██████████[0m| 13.3M/13.3M [00:01<00:00, 10.8MB/s]

Downloaded model to mobilenet_v2.onnx





'mobilenet_v2.onnx'