# Setting Up All Artifacts details

In [None]:
## Give appropriate permission to the directory "FOLDER_WITH_ARTIFACTS" you are working with
import os
os.environ['SNPE_ROOT']="/local/mnt/workspace/aditya/qaisw-v2.15.1.230926150623_62883"#set up your snpe path here.
os.environ['RAW_FILE_FOLDER']="raw"
os.environ['DLC32']="models/yolo_nas_fp32.dlc"
os.environ['DLC8']="models/yolo_nas_w8a8.dlc"
os.environ['TARGET_INPUT_LIST']="input.txt"
os.environ['ONDEVICE_FOLDER']="yolonas"
os.environ['DEVICE_HOST']="localhost"
os.environ['DEVICE_ID']="2dce6316" #change with your device-id. Use command "adb devices" to get devices names.
os.environ['SNPE_TARGET_ARCH']="aarch64-android"
os.environ['SNPE_TARGET_STL']="libc++_shared.so"

In [None]:
## Note- Use python3.8 or above for generating onnx
!pip install super-gradients==3.1.2
import torch
from super_gradients.training import models
from super_gradients.common.object_names import Models
import cv2
import numpy as np
import os

## Getting The dataset

In [None]:
# User needs to download the dataset of their choice. 

## Getting the ONNX Model

In [None]:
os.makedirs('models', exist_ok=True)

In [None]:
model = models.get(Models.YOLO_NAS_S, pretrained_weights="coco")
# Prpare model for conversion
# Input size is in format of [Batch x Channels x Width x Height] where 640 is the standard dataset dimensions
model.eval()
model.prep_model_for_conversion(input_size=[1, 3, 320, 320])
# Create dummy_input
dummy_input = torch.randn([1, 3, 320, 320], device="cpu")
# Convert model to onnx
torch.onnx.export(model, dummy_input, "models/yolo_nas_s.onnx", opset_version=11)

#### Getting the FP32 Model

In [None]:
%%bash
source $SNPE_ROOT/bin/envsetup.sh
snpe-onnx-to-dlc -i models/yolo_nas_s.onnx -o models/yolo_nas_fp32.dlc

## Preprocessing

In [None]:
def preprocess(original_image):
    resized_image = cv2.resize(original_image, (320, 320))
    resized_image = resized_image/255
    return resized_image
##Please test download and give the path here
dataset_path = "val2017/"
!mkdir -p rawYoloNAS
filenames=[]
for path in os.listdir(dataset_path):
    # check if current path is a file
    if os.path.isfile(os.path.join(dataset_path, path)):
        filenames.append(os.path.join(dataset_path, path))
for filename in filenames:
    original_image = cv2.imread(filename)
    img = preprocess(original_image)
    img = img.astype(np.float32)
    img.tofile("raw/"+filename.split("/")[-1].split(".")[0]+".raw")

In [None]:
%%bash
find raw -name *.raw > input.txt

## Quantize the DLC

In [None]:
%%bash
source $SNPE_ROOT/bin/envsetup.sh
snpe-dlc-quantize --input_dlc models/yolo_nas_fp32.dlc --input_list input.txt --use_enhanced_quantizer --use_adjusted_weights_quantizer --axis_quant --output_dlc models/yolo_nas_w8a8.dlc

In [None]:
%%bash
adb devices

In [None]:
%%bash
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL shell "mkdir -p /data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/bin" && $DEVICE_SHELL shell "mkdir -p /data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/lib" && $DEVICE_SHELL shell "mkdir -p /data/local/tmp/snpeexample/dsp/lib"

In [None]:
%%bash
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL push $SNPE_ROOT/lib/$SNPE_TARGET_ARCH/$SNPE_TARGET_STL /data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/lib
$DEVICE_SHELL push $SNPE_ROOT/bin/$SNPE_TARGET_ARCH/snpe-net-run /data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/bin
$DEVICE_SHELL push $SNPE_ROOT/lib/hexagon-v75/unsigned/*.so /data/local/tmp/snpeexample/dsp/lib
$DEVICE_SHELL push $SNPE_ROOT/lib/$SNPE_TARGET_ARCH/*.so /data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/lib

In [None]:
%%bash
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL shell "mkdir -p /data/local/tmp/$ONDEVICE_FOLDER"

In [None]:
%%bash
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL push $DLC32 /data/local/tmp/$ONDEVICE_FOLDER
$DEVICE_SHELL push $DLC8 /data/local/tmp/$ONDEVICE_FOLDER
$DEVICE_SHELL push $RAW_FILE_FOLDER /data/local/tmp/$ONDEVICE_FOLDER
$DEVICE_SHELL push $TARGET_INPUT_LIST /data/local/tmp/$ONDEVICE_FOLDER

In [None]:
%%bash
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL shell
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/snpeexample/aarch64-android/lib
export PATH=$PATH:/data/local/tmp/snpeexample/aarch64-android/bin
export OUTPUT_FOLDER=OUTPUT_8b_DSP
export OUTPUT_DLC_QUANTIZED8=yolo_nas_w8a8.dlc
export ADSP_LIBRARY_PATH="/data/local/tmp/snpeexample/dsp/lib;/system/lib/rfsa/adsp;/system/vendor/lib/rfsa/adsp;/dsp"
export ONDEVICE_FOLDER="yolonas"
cd /data/local/tmp/$ONDEVICE_FOLDER &&
snpe-net-run --container $OUTPUT_DLC_QUANTIZED8 --input_list input.txt --set_output_tensors 885,877 --output_dir $OUTPUT_FOLDER --use_dsp

In [None]:
%%bash
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL shell
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/snpeexample/aarch64-android/lib
export PATH=$PATH:/data/local/tmp/snpeexample/aarch64-android/bin
export OUTPUT_FOLDER=OUTPUT_32b_CPU
export OUTPUT_DLC_32=yolo_nas_fp32.dlc
export ONDEVICE_FOLDER="yolonas"
cd /data/local/tmp/$ONDEVICE_FOLDER &&
snpe-net-run --container $OUTPUT_DLC_32 --input_list input.txt --set_output_tensors 885,877 --output_dir $OUTPUT_FOLDER

In [None]:
os.makedirs('output',exist_ok=True)

## Pull output from device

In [None]:
%%bash
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL pull /data/local/tmp/$ONDEVICE_FOLDER/OUTPUT_8b_DSP output/OUTPUT_8b_DSP
$DEVICE_SHELL pull /data/local/tmp/$ONDEVICE_FOLDER/OUTPUT_32b_CPU output/OUTPUT_32b_CPU

In [None]:
def ImageNames():
    inputlist = open('input.txt', 'r')
    Lines = inputlist.readlines()
    count = 0
    imageList = []
    for line in Lines:
        name = line.split("/",1)[1]
        name = name.split('.')[0]
        imageList.append(name)
        count += 1
    return imageList
imageList = ImageNames()
print((imageList))

In [None]:
label2class = {'0': 'person', '1': 'bicycle', '2': 'car', '3': 'motorcycle', '4': 'airplane', '5': 'bus', 
               '6': 'train', '7': 'truck', '8': 'boat', '9': 'traffic', '10': 'fire', '11': 'stop', '12': 'parking', 
               '13': 'bench', '14': 'bird', '15': 'cat', '16': 'dog', '17': 'horse', '18': 'sheep', '19': 'cow', 
               '20': 'elephant', '21': 'bear', '22': 'zebra', '23': 'giraffe', '24': 'backpack', '25': 'umbrella', 
               '26': 'handbag', '27': 'tie', '28': 'suitcase', '29': 'frisbee', '30': 'skis', '31': 'snowboard', 
               '32': 'sports', '33': 'kite', '34': 'baseball', '35': 'baseball', '36': 'skateboard', '37': 'surfboard', 
               '38': 'tennis', '39': 'bottle', '40': 'wine', '41': 'cup', '42': 'fork', '43': 'knife', '44': 'spoon', 
               '45': 'bowl', '46': 'banana', '47': 'apple', '48': 'sandwich', '49': 'orange', '50': 'broccoli', 
               '51': 'carrot', '52': 'hot', '53': 'pizza', '54': 'donut', '55': 'cake', '56': 'chair', '57': 'couch', 
               '58': 'potted', '59': 'bed', '60': 'dining', '61': 'toilet', '62': 'tv', '63': 'laptop', '64': 'mouse', 
               '65': 'remote', '66': 'keyboard', '67': 'cell', '68': 'microwave', '69': 'oven', '70': 'toaster', 
               '71': 'sink', '72': 'refrigerator', '73': 'book', '74': 'clock', '75': 'vase', '76': 'scissors', 
               '77': 'teddy', '78': 'hair', '79': 'toothbrush'}

## Post Processing 

In [None]:
from matplotlib import pyplot as plt
import cv2.dnn
import numpy as np
colors = np.random.uniform(0, 255, size=(len(list(label2class.values())), 3))
def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
    label = f'{label2class[str(class_id)]} ({confidence:.2f})'
    color = colors[class_id]
    img = cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 1)
    img = cv2.putText(img, label, (x +2, y -10), cv2.FONT_HERSHEY_TRIPLEX, 1, color, 4)
    return img

In [None]:
def postProc(filename, output1, output2, out_path):

    output1_reshape = output1.reshape(2100,4)
    output2_reshape = output2.reshape(2100,80)
    output = output2_reshape
    
    original_image: np.ndarray = cv2.imread(filename)
    ratio_1 = original_image.shape[0]/320
    ratio_2 = original_image.shape[1]/320
    
    boxes = []
    scores = []
    class_ids = []
        
    for i in range(0, output.shape[0]):
        classes_scores = output[i]
        (minScore, maxScore, minClassLoc, (x, maxClassIndex)) = cv2.minMaxLoc(classes_scores)
        if maxScore >= 0.05:
            x = round(output1_reshape[i][0]) ; y = round(output1_reshape[i][1]); 
            w = round(output1_reshape[i][2]) ; h = round(output1_reshape[i][3]);
            
            x1, y1 = x, y
            x2, y2 = w, h
            box = [x1, y1, x2, y2]
            boxes.append(box)
            scores.append(float(maxScore))
            class_ids.append(maxClassIndex)
            if(len(boxes)==704) or len(boxes)== 693:
                print("i = ",i)
                print("x = ",x)#x1
                print("y = ",y)#y1
                print("w = ",w)#x2
                print("h = ",h)#y2
                print(box)
    result_boxes = cv2.dnn.NMSBoxes(boxes, scores, 0.20, 0.5, 0.5) #32b CPU
    
    print("result_boxes :: ",result_boxes)
    detections = []
    img = original_image
    for i in range(len(result_boxes)):
        index = result_boxes[i]
        box = boxes[index]
        detection = {
            'class_id': class_ids[index],
            'class_name': label2class[str(class_ids[index])],
            'confidence': scores[index],
            'box': box
             }
        detections.append(detection)
        img = draw_bounding_box(original_image, class_ids[index], scores[index], int(box[0]*ratio_2), int(box[1]*ratio_1), int(box[2]*ratio_2), int(box[3]*ratio_1))
        print(detection)
        print("boxcords::",int(box[0]), int(box[1]), int(box[2]), int(box[3]))
        print("boxcords::",int(box[0]*ratio_2), int(box[1]*ratio_1), int(box[2]*ratio_2), int(box[3]*ratio_1))
    #     cv2.imwrite("test.jpg", img)
        plt.imsave(out_path,img)
        # plt.show()
        

In [None]:
os.makedirs('output/32b_arm', exist_ok=True)
os.makedirs('output/8b_dsp', exist_ok=True)

## Save results on CPU

In [None]:
test_images_dir = "output/OUTPUT_32b_CPU/"
image_dir = 'val2017/'
import cv2
import os
import numpy as np
import torch
from PIL import Image
for i in range(0,len(imageList)):
    img_path = image_dir+imageList[i]+'.jpg'
    raw_path = os.path.join(test_images_dir, 'Result_')
    output1 = np.fromfile(raw_path+str(i)+'/885.raw',dtype="float32")
    output2 = np.fromfile(raw_path+str(i)+'/877.raw',dtype="float32")
    print(output1.shape)
    print(output2.shape)
    out_path = 'output/32b_arm/'+imageList[i]+'_prediction_32b_arm.png'
    postProc(img_path,output1,output2,out_path)
    i = i +1

## Save results on DSP

In [None]:
test_images_dir = "output/OUTPUT_8b_DSP/"
image_dir = 'val2017/'
import cv2
import os
import numpy as np
import torch
from PIL import Image
for i in range(0,len(imageList)):
    img_path = image_dir+imageList[i]+'.jpg'
    raw_path = os.path.join(test_images_dir, 'Result_')
    output1 = np.fromfile(raw_path+str(i)+'/885.raw',dtype="float32")
    output2 = np.fromfile(raw_path+str(i)+'/877.raw',dtype="float32")
    print(output1.shape)
    print(output2.shape)
    out_path = 'output/8b_dsp/'+imageList[i]+'_prediction_8b_dsp.png'
    postProc(img_path,output1,output2,out_path)
    i = i +1

## Output on CPU

In [None]:
for i in range(5):
    img  = plt.imread('output/32b_arm/'+imageList[i]+'_prediction_32b_arm.png')
    plt.imshow(img)
    plt.show()

## Output on DSP

In [None]:
for i in range(5):
    img  = plt.imread('output/8b_dsp/'+imageList[i]+'_prediction_8b_dsp.png')
    plt.imshow(img)
    plt.show()