## Tiny YOLO Loop Inference


In [None]:
import ctypes
import numpy as np
from datetime import datetime
import cv2
from postprocessor import postprocessing
import multiprocessing 
from IPython.display import Image, display  , clear_output
from PIL import Image as PILImage
import io

## Prepare


In [None]:
IP_BASE_ADDRESS = 0x43c00000
ADDRESS_RANGE = 0x1000
ADDR_IP_CTRL = 0x00
ADDR_IN_LOW = 0x10
ADDR_IN_HIGH = 0x14
ADDR_OUT_LOW = 0x1c
ADDR_OUT_HIGH = 0x20
ADDR_MODE = 0x28

#chanwise

wmeans= np.asarray([0.033435356, 0.040640667, 0.02830263, 0.028113252, 0.090943664, 0.05413572, 0.04137789, 0.05721583, 0.054953724, 0.06515035, 0.045479294, 0.061646923, 0.031853218, 0.057418287, 0.048105445, 0.03305997, 0.040052418, 0.042445477, 0.041713506, 0.068666436, 0.057845917, 0.05469403, 0.03162237, 0.03852496, 0.05589233, 0.042851884, 0.044526305, 0.023378735, 0.024275787, 0.10315256, 0.05207203, 0.054588277, 0.057972748, 0.053736657, 0.05872504, 0.054859173, 0.05962007, 0.056790277, 0.06427645, 0.05288439, 0.052599918, 0.054841787, 0.054693557, 0.054210298, 0.074391074, 0.058028027, 0.054509915, 0.051626373, 0.05191318, 0.059774343, 0.018797167, 0.019762855, 0.015595292, 0.010377877, 0.096230194, 0.025394442, 0.037996702, 0.045422852, 0.03437901, 0.040340886, 0.037080817, 0.032852493, 0.051562235, 0.04031235, 0.03565653, 0.024606774, 0.049541403, 0.04454396, 0.039493755, 0.06044323, 0.03889671, 0.036654513, 0.027156929, 0.037195265, 0.034402523, 0.021786999, 0.035217866, 0.013877937, 0.02175496, 0.10512365, 0.053382784, 0.04790908, 0.048428394, 0.04619976, 0.01883534, 0.051533263, 0.055429906, 0.054879732, 0.041577768, 0.042774152, 0.052682582, 0.051729254, 0.04758482, 0.049580473, 0.055306707, 0.035829913, 0.04295467, 0.049666215, 0.05386947, 0.04572626, 0.0018182033, 0.008441488, 0.004369467, 0.009968552, 0.08600053, 0.039413314, 0.037624203, 0.031340715, 0.027673598, 0.015407145, 0.03948128, 0.042174693, 0.045264073, 0.02087857, 0.027500201, 0.031777654, 0.037840065, 0.034118634, 0.038781233, 0.04177563, 0.025552304, 0.02757378, 0.03276727, 0.03945421, 0.0245141],dtype=np.float32)

wmeans = wmeans  /  15.   # 4bit

anchors=[(1.08, 1.19), (3.42, 4.41), (6.63, 11.38), (9.42, 5.11), (16.62, 10.52)]

classes = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']

colors = [
        (31, 119, 180),
        (255, 127, 14),
        (44, 160, 44),
        (214, 39, 40),
        (148, 103, 189),
        (140, 86, 75),
        (227, 119, 194),
        (127, 127, 127),
        (188, 189, 34),
        (23, 190, 207),
        (31, 119, 180),
        (255, 127, 14),
        (44, 160, 44),
        (214, 39, 40),
        (148, 103, 189),
        (140, 86, 75),
        (227, 119, 194),
        (127, 127, 127),
        (188, 189, 34),
        (23, 190, 207),    ]

 
input_height = 416
input_width = 416
score_threshold = 0.5
iou_threshold = 0.3


import bnn
from pynq import MMIO
mmio = MMIO(IP_BASE_ADDRESS, ADDRESS_RANGE)

hw_classifier = bnn.CnvClassifier(bnn.NETWORK_CNVW1A1,'tinyyolo',bnn.RUNTIME_HW) 

print('preparing buffers')
from pynq import Xlnk
xlnk = Xlnk()
input_buffer = xlnk.cma_array(shape=(416*416*3,), dtype=np.uint8)
in_addr = input_buffer.physical_address
output_buffer = xlnk.cma_array(shape=(125*169,), dtype=np.int64)
out_addr = output_buffer.physical_address
np.copyto(output_buffer,np.zeros(125*169,dtype=np.int64)) 


mmio.write(ADDR_MODE, 0)
mmio.write(ADDR_IN_HIGH, 0)
mmio.write(ADDR_IN_LOW, in_addr)
mmio.write(ADDR_OUT_HIGH, 0)
mmio.write(ADDR_OUT_LOW, out_addr)


## Prepare List of Images

In [None]:
import os
imgList = []

path = './voc_images/'

for filename in os.listdir(path):
    imgPath = filename
    if imgPath.__contains__('jpg'):
        input_image = PILImage.open(path+imgPath)
        imgList.append(input_image)

print(len(imgList))

## Call Function

In [None]:
def infer(flag, img_shared_array, lock):

  thistime=0
#     print('reading image')
    # input_image = cv2.imread(imgPath)
  for imgPath in imgList:    
    input_image = imgPath #PILImage.open(imgPath)

    starter = datetime.now()
    
#     np.copyto(output_buffer,np.zeros(125*169,dtype=np.int64)) 



#     print('preparing image')



#     input_image = input_image.resize((416,416))

    imgDraw = np.copy(input_image)

    img = np.asarray(input_image).flatten()
    img = np.round(img *.50196078431)      # IMG/255*128
#     img =  img * 0.50196078431        
    img = np.asarray(img,dtype=np.uint8)
    img = np.clip(img,0,127)  #  img[img == 128] = 127
    # img = np.ascontiguousarray(img,dtype=np.uint8) 



    # print(img.shape)
    # print('LOST',input_image.shape)


    np.copyto(input_buffer,img) 


#     print('inferring ')


    mmio.write(0x5c, 1)

    mmio.write(ADDR_IP_CTRL, 1)

    while mmio.read(ADDR_IP_CTRL) & 0x02 == 0:
        pass

#     print('result parse ')
    
    res = np.zeros(125*169,dtype=np.int64)
    np.copyto(res,output_buffer) 


    res = np.reshape(res,(13,13,125))    
    res = np.transpose(res,(2,0,1))
    res = np.asarray(res,dtype=np.float32)
    # print('res',res.shape)
    # res = np.ascontiguousarray(res)

    # print(res[:14,0,0])


    res = res * wmeans[:,None,None]
    # for idx,w in enumerate(wmeans):
    #      res[idx,:,:]  *= w


    # imgDraw = cv2.cvtColor(imgDraw, cv2.COLOR_RGB2BGR)


#     print('post parse ')

    postprocessing(res[None,:],imgDraw,score_threshold,iou_threshold,input_height,input_width,anchors,colors,classes)
    
    ender = datetime.now()
    
    arr = np.frombuffer(img_shared_array.get_obj(), dtype="uint8")
    
#     print('wait lock ')
    while flag.value == 1:
        pass
    np.copyto(arr,np.asarray(imgDraw).flatten())
#     print('write flag ')
    lock.acquire() 
    flag.value += 1
    lock.release() 
    
    thistime += (ender - starter).total_seconds()  * 1000
#   print("Infer call took {} ms".format(thistime/len(imgList)))

In [None]:
flag = multiprocessing.Value('i', 0) 
img_shared_array = multiprocessing.Array("c", 416*416*3)
lock = multiprocessing.Lock() 
p1 = multiprocessing.Process(target=infer, args=(flag,img_shared_array,lock)) 

totalsum = 0

start = datetime.now()

p1.start() 
# p1.join()

imgcount = len(imgList)


for i in range(imgcount):
            while flag.value == 0:
                pass
            startx = datetime.now()
            imgDraw = np.frombuffer(img_shared_array.get_obj(), dtype="uint8")
            imgDraw = imgDraw.reshape((416,416,3))

            f = io.BytesIO()
            PILImage.fromarray(imgDraw).save(f, 'jpeg')
            display(Image(data=f.getvalue()))  
            endx = datetime.now()
            
            totalsum += (endx - startx).total_seconds()  
            lock.acquire()         
            flag.value -= 1
            lock.release()    
    
 
end = datetime.now()



micros = int((end - start).total_seconds() * 1000)
print("HW implementation took {} ms".format(micros))
print("FPS: {} ".format(1000*imgcount/micros))
print("HW implementation sum took {} ms".format(totalsum*1000))   

