In [1]:
from PIL import Image, ImageDraw
from math import floor
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from pynq import allocate, Overlay
from random import seed
from random import randint
from random import getrandbits
import glob
import cv2
import time
import os

In [2]:
def run_kernel_HW(original_image,in_buffer,out_buffers):
    # Transfer color channel 0
    in_buffer[:] = np.array(original_image)[:,:,[0]]
    dma.sendchannel.transfer(in_buffer)
    dma.recvchannel.transfer(out_buffers[0])
    dma.sendchannel.wait()
    dma.recvchannel.wait()
    # Transfer color channel 1
    in_buffer[:] = np.array(original_image)[:,:,[1]]
    dma.sendchannel.transfer(in_buffer)
    dma.recvchannel.transfer(out_buffers[1])
    dma.sendchannel.wait()
    dma.recvchannel.wait()
    # Transfer color channel 2
    in_buffer[:] = np.array(original_image)[:,:,[2]]
    dma.sendchannel.transfer(in_buffer)
    dma.recvchannel.transfer(out_buffers[2])
    dma.sendchannel.wait()
    dma.recvchannel.wait()

In [3]:
def process_dir(bit_precision,image_list):
    elapse = np.empty(len(image_list))
    for i in range(len(image_list)):
        print("#",end='')
        
        original_image = image_list[i]
        width,height = original_image.size
        controller.write(0,1)
        controller.write(16,width)
        controller.write(20,height)
        
        if(bit_precision==0):
            in_buffer = allocate(shape=(height, width, 1),dtype=np.uint8, cacheable=1)
        elif(bit_precision==1):
            in_buffer = allocate(shape=(height, width, 1),dtype=np.uint16, cacheable=1)
        elif(bit_precision==2):
            in_buffer = allocate(shape=(height, width, 1),dtype=np.uint32, cacheable=1)
        out_buffer = allocate(shape=(height-2, width-2, 3),dtype=np.uint8, cacheable=1)
        
        out_buffers = []
        
        out_buffer0 = allocate(shape=(height-2, width-2, 1),dtype=np.uint32, cacheable=1)
        out_buffers.append(out_buffer0)
        out_buffer1 = allocate(shape=(height-2, width-2, 1),dtype=np.uint32, cacheable=1)
        out_buffers.append(out_buffer1)
        out_buffer2 = allocate(shape=(height-2, width-2, 1),dtype=np.uint32, cacheable=1)
        out_buffers.append(out_buffer2)
        
        start = time.time()
        run_kernel_HW(original_image,in_buffer,out_buffers)
        stop = time.time()
        
        # Clipping values to be in correct bit range for RGB png
        min = 0
        max = 255
        out_buffer[:,:,[0]] = np.clip(out_buffer0, 0, max)
        out_buffer[:,:,[1]]  = np.clip(out_buffer1, 0, max)
        out_buffer[:,:,[2]]= np.clip(out_buffer2, 0, max)
        convolution_image = Image.fromarray(out_buffer)
        convolution_image.save("Output/Benchmark/convolution_bp"+str(bit_precision)+"_t"+str(i)+"_output.png", "PNG")
        elapse[i] = (stop-start)
        
        in_buffer.freebuffer()
        out_buffer.freebuffer()
        out_buffer0.freebuffer()
        out_buffer1.freebuffer()
        out_buffer2.freebuffer()
    print()
    return elapse

In [4]:
def evaluate(im_dir,elapse):
    total = 0 
    for i in range(len(elapse)):
        total = total + elapse[i]
    avg = total/len(elapse)
    print("\nStats for processing images in",im_dir)
    print("-------------------------------------------------")
    print("Number of photos processed:",len(elapse))
    print("Total hardware processing runtime is:",round(total,3),"seconds")
    print("Average hardware runtime per image is:",round(avg*1000,3),"millis-seconds")

In [5]:
filterBase = 24
dataBase = 60
def output_registers():
    print("Control Enable:         %d" % (controller.read(0)))
    print("Reset:                  %d" % (controller.read(4)))
    print("State Machine Register: %d" % (controller.read(8)))
    print("Last Calculated Value:  %d" % (controller.read(12)))
    print("Image Width:            %d" % (controller.read(16)))
    print("Image Height:           %d" % (controller.read(20)))
    print("\nFilter Set:")
    for i in range(9):
        print("filterSet[%d] = %d" % (i,controller.read((i*4)+filterBase)))
    print("\nData Set:")
    for i in range(9):
        print("dataSet[%d] = %d" % (i,controller.read((i*4)+dataBase)))
    print("\n")

In [6]:
image_directories = [dI for dI in os.listdir('Images') if (os.path.isdir(os.path.join('Images',dI)) and (dI[0]!='.'))]
widths = []
widths.append('480')
widths.append('1080')
widths.append('240')
widths.append('720')
widths.append('120')

In [7]:
image_directories

['w480', 'w1080', 'w240', 'w720', 'w120']

In [9]:
kernelsize = 3
# High-pass kernel
kernel = [[ 0, 0, 0 ],
          [ 0, 1, 0 ],
          [ 0, 0, 0 ]]

In [10]:
# Max run for each directory
run_count = 15

start_time = time.time()
# Test performance at multiple bit precisions
for bit_precision in range(1,3):
    if(bit_precision==0):
        print("Processing images at 8 bit precision")
        Convolution_design = Overlay("Hardware-Files/Stable/PL_Convolution_8b.bit",)
    elif(bit_precision==1):
        print("Processing images at 16 bit precision")
        Convolution_design = Overlay("Hardware-Files/Stable/PL_Convolution_16b.bit",)
    elif(bit_precision==2):
        print("Processing images at 32 bit precision")
        Convolution_design = Overlay("Hardware-Files/Stable/PL_Convolution_32b.bit",)

    dma = Convolution_design.axi_dma_0
    controller = Convolution_design.Convolution_Controll_0
    
    #Enable IP control register
    controller.write(0,1)
    
    #Input Filter set
    for x in range(kernelsize):
        for y in range(kernelsize):
            controller.write(filterBase+((x*kernelsize)+y)*4,kernel[y][x])
    
#     for i in range(len(image_directories)):
    for i in range(len(image_directories)):
        # Pre-load image directory
#         im_dir = 'Images/'+image_directories[i]+'/'
        im_dir = 'Images/w3840/'
        image_list = []
        for filename in glob.glob(im_dir+'*.jpg'):
            im=Image.open(filename)
            image_list.append(im)
            print("#",end='') 
            if(len(image_list)==run_count): break

        # Pre-load image directory
        print("\n# Now processing images in \'"+im_dir+"\' with a width of",widths[i])
        elapse = process_dir(bit_precision,image_list)
        evaluate(im_dir,elapse)
        if(i!=len(image_directories)-1): print("\n\n\n")
            
    if(bit_precision!=2): print("\n")
stop_time = time.time()

print("\nTotal test runtime:",round(stop_time-start_time,3),"seconds")

Processing images at 16 bit precision
###############
# Now processing images in 'Images/w480/' with a width of 480
###############

Stats for processing images in Images/w480/
-------------------------------------------------
Number of photos processed: 15
Total hardware processing runtime is: 2.234 seconds
Average hardware runtime per image is: 148.925 millis-seconds




###############
# Now processing images in 'Images/w1080/' with a width of 1080
###############

Stats for processing images in Images/w1080/
-------------------------------------------------
Number of photos processed: 15
Total hardware processing runtime is: 10.221 seconds
Average hardware runtime per image is: 681.378 millis-seconds




###############
# Now processing images in 'Images/w240/' with a width of 240
###############

Stats for processing images in Images/w240/
-------------------------------------------------
Number of photos processed: 15
Total hardware processing runtime is: 0.512 seconds
Average ha