# Setting Up SDK Artifacts

In [None]:
import os
os.environ['SNPE_ROOT']="/local/mnt/workspace/snpe/2.29.0.241129" #set up your snpe path here.
os.environ['RAW_FILE_FOLDER']="raw"
os.environ['DLC32']="models/detr_resnet101_fp32.dlc"  # Use the path to your non-quantized dlc
os.environ['DLC8']="models/detr_resnet101_w8a8.dlc"              # Use the path to your Quantized dlc
os.environ['TARGET_INPUT_LIST']="list.txt"  # Use the name of the input file
os.environ['ONDEVICE_FOLDER']="detr"
os.environ['DEVICE_HOST']="localhost"
os.environ['DEVICE_ID']="728b7a92" #fill your device-id. Use command "adb devices" to get devices names. example :"e18d5d0"
os.environ['SNPE_TARGET_ARCH']="aarch64-android"
os.environ['SNPE_TARGET_STL']="libc++_shared.so"
os.environ['SNPE_TARGET_DSPARCH']="hexagon-v79" 

## Generate model

In [None]:
import torch
import os
import shutil
import torch.nn as nn
model = torch.hub.load('facebookresearch/detr', 'detr_resnet101', pretrained=True)
model.eval()
dummy_input=torch.randn(1, 3, 800, 1066)
output = model(dummy_input)
print(output['pred_logits'].shape)

class ModifiedModel(nn.Module):
    def __init__(self):
        super(ModifiedModel,self).__init__()
        self.model = model
        self.model.eval()
    def forward(self,pixel_values):
        output = self.model(pixel_values)
        output['pred_logits'] = output['pred_logits'].softmax(-1)[0,:,:-1]
        return output
customModel = ModifiedModel()
customModel.eval()
dummy_input=torch.randn(1, 3, 800, 1066)
output = customModel(dummy_input)
print(output['pred_logits'].shape)

In [None]:
os.makedirs('models', exist_ok=True)

In [None]:
dummy_input=torch.randn(1, 3, 800, 1066)

torch.onnx.export(customModel, dummy_input, "models/detr_resnet101.onnx", opset_version=11
                  , verbose=False)

In [None]:
%%bash
source $SNPE_ROOT/bin/envsetup.sh
snpe-onnx-to-dlc --input_network models/detr_resnet101.onnx --output_path models/detr_resnet101_fp32.dlc
snpe-dlc-info -i models/detr_resnet101_fp32.dlc > models/detr_resnet101_fp32.txt

## import libraries

In [None]:
import math
import os
from PIL import Image
import requests
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display, clear_output
import torch
import shutil
import numpy as np
from torch import nn
from torchvision.models import resnet50
import torchvision.transforms as T
torch.set_grad_enabled(False);
import os
import cv2
import numpy as np
from numpy import asarray
from PIL import Image
import glob
import torch.nn.functional as nnf
import subprocess
!pip3 install ipywidgets

## Getting the Dataset and Preparation

In [None]:
# User can download dataset of their choice for accuracy validation. 
# User needs to follow the pre/post processing steps prescribed in dataset (or) given below. 
# You can use coco val2017 or part of it.
!wget http://images.cocodataset.org/zips/val2017.zip -q --show-progress
!unzip val2017.zip

In [None]:
files = os.listdir('val2017') #val2017 is the datatset folder path. Keeping only 15 images.
for file in files[15:]:
    os.remove("val2017/"+file)
    

In [None]:
%%bash
rm -rf val2017.zip

### Pre-Processing Steps of DETR Model

In [None]:
# standard PyTorch mean-std input image normalization
transform = T.Compose([
    T.Resize(800),
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# for output bounding box post-processing
def box_cxcywh_to_xyxy(x):
    x_c, y_c, w, h = x.unbind(1)
    b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
         (x_c + 0.5 * w), (y_c + 0.5 * h)]
    return torch.stack(b, dim=1)

def rescale_bboxes(out_bbox, size):
    img_w, img_h = size
    b = box_cxcywh_to_xyxy(out_bbox)
    b = b * torch.tensor([img_w, img_h, img_w, img_h], dtype=torch.float32)
    return b

def plot_results(pil_img, prob, boxes,Image_count):
    fig=plt.figure(figsize=(8,8))
    ax1=fig.add_subplot(2,2,3)
    ax1.imshow(pil_img)
    ax = plt.gca()
    colors = COLORS * 100
    for p, (xmin, ymin, xmax, ymax), c in zip(prob, boxes.tolist(), colors):
        ax.add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
                                   fill=False, color=c, linewidth=1))
        cl = p.argmax()
        text = f'{CLASSES[cl]}: {p[cl]:0.2f}'
        ax.text(xmin, ymin, text, fontsize=10,
                bbox=dict(alpha=0.5))
    plt.savefig(str(Image_count)+".jpg")
    if Image_count%2==0:
        shutil.move(str(Image_count)+".jpg","output/CPU")
    else:
        shutil.move(str(Image_count)+".jpg","output/DSP")
    plt.show()

### Steps to create raw images

In [None]:
name="raw"
os.system('mkdir ' + name)
def detect(imgfile,i):
    #getting the actual image
    origimg = Image.open(imgfile)
    #Transforming the image
    img = transform(origimg).unsqueeze(0)

    img= nnf.interpolate(img, size=(800, 1066), mode='bicubic', align_corners=False)
    
    img_to_save=img.numpy().transpose(0,2,3,1).astype(np.float32)
    
    img_to_save.tofile("raw/"+filenames[i].split(".")[0]+".raw")
    
filenames = os.listdir("val2017") ## change val2017 to the folder name where you have your dataset images.
for i in range(0,len(filenames)):
    if "jpg" in filenames[i].lower():
        detect("val2017/"+filenames[i],i)

In [None]:
%%bash
find ./raw -name *.raw > list.txt

### Getting the Quantized Model

In [None]:
%%bash
source $SNPE_ROOT/bin/envsetup.sh
snpe-dlc-quantize --input_dlc models/detr_resnet101_fp32.dlc --input_list list.txt  --output_dlc models/detr_resnet101_w8a8.dlc 

- For snpe-dlc-graph-prepare fix value of htp_soc.
- Based on the device you will be running set value of <b>--htp_socs. Example sm8750 or sm8650 or sm8550</b>

In [None]:
%%bash
source $SNPE_ROOT/bin/envsetup.sh
snpe-dlc-graph-prepare --input_dlc models/detr_resnet101_w8a8.dlc --htp_socs=sm8750 --set_output_tensors=5848,5856 --output_dlc=models/detr_resnet101_w8a8_cached.dlc

**Optional Code blocks**
## Creating Bin and Lib Folder On Device

<b>- Below blocks are completely optional. 
- You have the model already prepared.
- Run below code blocks only if you want to try out model by pushing it device.</b>

In [None]:
%%bash
#source throughput.sh >>dump.txt
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL shell "mkdir -p /data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/bin" && $DEVICE_SHELL shell "mkdir -p /data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/lib" && $DEVICE_SHELL shell "mkdir -p /data/local/tmp/snpeexample/dsp/lib"
$DEVICE_SHELL shell "mkdir -p /data/local/tmp/$ONDEVICE_FOLDER"

# Pusing All Bin and Lib Files on to Device
* use hexagon-v79 for sm8750
* use hexagon-v75 for sm8650
* use hexagon-v73 for sm8550

In [None]:
%%bash
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL push $SNPE_ROOT/lib/$SNPE_TARGET_ARCH/$SNPE_TARGET_STL /data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/lib
$DEVICE_SHELL push $SNPE_ROOT/lib/$SNPE_TARGET_ARCH/*.so /data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/lib
$DEVICE_SHELL push $SNPE_ROOT/lib/hexagon-v79/unsigned/*.so /data/local/tmp/snpeexample/dsp/lib
$DEVICE_SHELL push $SNPE_ROOT/bin/$SNPE_TARGET_ARCH/snpe-net-run /data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/bin

# Pushing Artifacts onto Device

In [None]:
%%bash
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL shell "mkdir -p /data/local/tmp/$ONDEVICE_FOLDER"

In [None]:
%%bash
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL push $RAW_FILE_FOLDER /data/local/tmp/$ONDEVICE_FOLDER
$DEVICE_SHELL push $TARGET_INPUT_LIST /data/local/tmp/$ONDEVICE_FOLDER
$DEVICE_SHELL push $DLC32 /data/local/tmp/$ONDEVICE_FOLDER
$DEVICE_SHELL push models/detr_resnet101_w8a8_gp.dlc /data/local/tmp/$ONDEVICE_FOLDER
$DEVICE_SHELL push $DLC8 /data/local/tmp/$ONDEVICE_FOLDER

# Inferencing 8-bit DLC onto DSP Runtime

In [None]:
%%bash
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL shell
export SNPE_TARGET_ARCH=aarch64-android
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/lib
export PATH=$PATH:/data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/bin
export ADSP_LIBRARY_PATH="/data/local/tmp/snpeexample/dsp/lib;/system/lib/rfsa/adsp;/system/vendor/lib/rfsa/adsp;/dsp"
export OUTPUT_FOLDER=OUTPUT_8b_DSP
export DLC8=detr_resnet101_w8a8_gp.dlc
export ONDEVICE_FOLDER="detr"
cd /data/local/tmp/$ONDEVICE_FOLDER &&
chmod -R 777 * &&
snpe-net-run --container $DLC8 --input_list list.txt  --set_unconsumed_as_output --output_dir=OUTPUT_8b_DSP --use_dsp

# Inferencing 32-bit DLC onto CPU Runtime

In [None]:
%%bash
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL shell
export SNPE_TARGET_ARCH=aarch64-android
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/lib
export PATH=$PATH:/data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/bin
export OUTPUT_FOLDER=OUTPUT_32b_CPU
export DLC32=detr_resnet101_fp32.dlc
export ONDEVICE_FOLDER="detr"
cd /data/local/tmp/$ONDEVICE_FOLDER &&
snpe-net-run --container $DLC32 --input_list list.txt  --output_dir=OUTPUT_32b_CPU --set_unconsumed_as_output

# Pulling output folder generated on different Precision and Cores

In [None]:
%%bash
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL pull /data/local/tmp/$ONDEVICE_FOLDER/OUTPUT_8b_DSP OUTPUT_8b_DSP
$DEVICE_SHELL pull /data/local/tmp/$ONDEVICE_FOLDER/OUTPUT_32b_CPU OUTPUT_32b_CPU

## Post Processing the Inferenced data

In [None]:
# Sample list of classes
CLASSES = [
    'N/A', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A',
    'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
    'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack',
    'umbrella', 'N/A', 'N/A', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
    'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
    'skateboard', 'surfboard', 'tennis racket', 'bottle', 'N/A', 'wine glass',
    'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
    'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
    'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table', 'N/A',
    'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
    'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A',
    'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
    'toothbrush'
]

# colors for visualization
COLORS = [[0.000, 0.447, 0.741], [0.850, 0.325, 0.098], [0.929, 0.694, 0.125],
          [0.494, 0.184, 0.556], [0.466, 0.674, 0.188], [0.301, 0.745, 0.933]]

In [None]:
import matplotlib.pyplot as plt
Image_Paths=[]

with open('list.txt', 'r') as f:
    for line in f:
        Image_Paths.append(line.strip().split("/")[-1].split(".")[0])


count=Image_count=0
if os.path.exists("output")==False:
    os.mkdir("output")
if os.path.exists("output/CPU")==False:
    os.mkdir("output/CPU")
if os.path.exists("output/DSP")==False:
    os.mkdir("output/DSP")
for image in Image_Paths:
    image_path = 'val2017/'+image+".jpg"
    im = Image.open(image_path)
    file1 = 'OUTPUT_32b_CPU/Result_' + str(count) + '/5867.raw'
    file2 = 'OUTPUT_32b_CPU/Result_' + str(count) + '/5860.raw'
    file3 = 'OUTPUT_8b_DSP/Result_' + str(count) + '/5867.raw'
    file4 = 'OUTPUT_8b_DSP/Result_' + str(count) + '/5860.raw'
    a=np.fromfile(file1,np.float32)
    a=a.reshape(100,91)
    tensor_a = torch.from_numpy(a)
    b=np.fromfile(file2,np.float32)
    b=b.reshape(1,100,4)
    tensor_b = torch.from_numpy(b)

    c=np.fromfile(file3,np.float32)
    c=c.reshape(100,91)
    tensor_c = torch.from_numpy(c)
    d=np.fromfile(file4,np.float32)
    d=d.reshape(1,100,4)
    tensor_d = torch.from_numpy(d)


    
    probas = tensor_a
    keep = probas.max(-1).values > 0.9
    bboxes_scaled = rescale_bboxes(tensor_b[0, keep], im.size)
    print("CPU FP32 Inference Result")
    plot_results(im, probas[keep], bboxes_scaled,Image_count)
    Image_count=Image_count+1

    probas = tensor_c
    keep = probas.max(-1).values > 0.9
    bboxes_scaled = rescale_bboxes(tensor_d[0, keep], im.size)
    print("DSP INT8 Inference Result")
    plot_results(im, probas[keep], bboxes_scaled,Image_count)
    Image_count=Image_count+1
    count=count+1