In [1]:
from PIL import Image
from math import floor
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from pynq import allocate, Overlay
from random import seed
from random import randint

In [2]:
Convolution_design = Overlay("PL_Convolution.bit",)

dma = Convolution_design.axi_dma_0
controller = Convolution_design.Convolution_Controll_0

filterBase = 20
dataBase = 64

In [3]:
def run_kernel():
    dma.sendchannel.transfer(in_buffer)
    dma.recvchannel.transfer(out_buffer)
    dma.sendchannel.wait()
    dma.recvchannel.wait()

### Control Registers
The convolution controller has several control registers. This is a breakdown of the internal offset to use and the corresponding purpose. All necessary control registers should be set prior to beginning data stream.

| Register Offset (Hex) | Register Offset (Decimal) | Register Name | Description |
| --------------------- | ------------------------- | ------------- | ----------- |
|0x00| 0 | Image Width | This is the control register holding image width |
|0x04| 4 | Image Height | This is the control register holding image height |
|0x08| 8 | Control Enable | Set this high before starting data stream |
|0x0c| 12 | State Machine Register | Bit 0 = RDst; Bit 1 = ADDst; Bit 2 = MULTIst |
|0x10| 16 | Last calculated value | Holds the last value that was calculated |
|0x14-0x34| 20-52 | Filter Set | Every 4 offset corresponds to one filter value |
|0x38| 56 | Reset Queue | Write a high to this to reset controller. It should normally read low unless something is stopping the controller from self reseting |
|0x3c| 60 | Convolution Count | Simply keeps track of the number of succesful convolutions since IP was loaded |
|0x40-0x60| 64-96 | Data Set | Last known data set, every 4 offset corresponds to one data value|
|0x64| 100 | rCount | Number of times the controller entered a read state |
|0x68| 104 | mCount | Number of times the controller entered a multiply state |
|0x6c| 108 | aCount | Number of times the controller entered a add state |
|0x70| 112 | resetCount | Number of times the controller state machine has been reset |
|0x74| 116 | current x | Current x position in total image map (x domain is 0 to Image Width-1) |
|0x78| 120 | current y | Current y position in total image map (y domain is 0 to Image Height-1) |
|0x7c| 124 | s_axis_ready |  |
|0x80| 128 | m_axis_valid |  |
|0x84| 132 | m_axis_ready |  |
|0x88| 136 | m_axis_last |  |
|0x8c| 140 | datapointer |  |

In [4]:
def output_registers():
    print("Image width:           %d" % (controller.read(0)))#Width
    print("Image height:          %d" % (controller.read(4)))#Height
    print("Current X position:    %d" %(controller.read(116)))
    print("Current Y position:    %d" % (controller.read(120)))
    print("Last calculated value: %d" %(controller.read(16)))
    
    print("\nFilter Set:")
    for i in range(9):
        print("filterSet[%d] = %d" % (i,controller.read((i*4)+filterBase)))
    
    print("\nLast Known Data Set:")
    for i in range(9):
        print("dataSet[%d] = %d" % (i,controller.read((i*4)+dataBase)))
    
    print("\nController start signal:           %d" %(controller.read(8)))
    print("Reset register:                    %d" %(controller.read(56)))
    print("State Machine Register:            %d" % (controller.read(12)))
    print("s_axis_ready status:               %d" %(controller.read(124)))
    print("m_axis_valid status:               %d" %(controller.read(128)))
    print("m_axis_ready status:               %d" %(controller.read(132)))
    print("m_axis_last status:                %d" %(controller.read(136)))
    print("data pointer:                      %d" %(controller.read(140)))
    print("Number of times in read state:     %d" %(controller.read(100)))
    print("Number of times in multiply state: %d" %(controller.read(104)))
    print("Number of times in add state:      %d" %(controller.read(108)))
    print("Completed convolution:             %d" %(controller.read(60)))
    print("State machine reset count:         %d" %(controller.read(112)))

In [5]:
def reset_ip():
    times = 0
    controller.write(56,1)
    while(controller.read(56)==1):
        if(times==0):
            print("Waiting on reset queue.")
        times+=1
    if(times==0):
        print("Controller reset successful first try.")
    
    else:
        print("Controller reset successful after waiting %d times." % (times))

## After this point is just testing stuff

### Helping the controller out
This is a very assisted way of feeding data to the controller. Data is fed in chunks of 3 for the most part

In [6]:
def load_full(current_x,current_y):
    dataSet = []
    for i in range(3):
        for j in range(3):
#             full_buffer[i][j] = i*3+j
            full_buffer[i][j] = randint(0,50)
            dataSet.append(full_buffer[i][j])
        
    dma.sendchannel.transfer(full_buffer)
    dma.sendchannel.wait()
    
    return dataSet

def load_line(current_x,current_y):
    dataSet = []
    
    for i in range(6):
        dataSet.append(controller.read((i*4)+dataBase))
    
    for i in range(3):
#         line_buffer[i-6][0] = i-6
        line_buffer[i][0] = randint(0,50)
        dataSet.append(line_buffer[i][0])
    
    dma.sendchannel.transfer(line_buffer)
    dma.sendchannel.wait()
    
    return dataSet

In [15]:
Convolution_design = Overlay("PL_Convolution.bit")

dma = Convolution_design.axi_dma_0
controller = Convolution_design.Convolution_Controll_0

seed(12256163123)

In [16]:
width = 3
height = 3

full_buffer = allocate(shape=(3, 3, 1),dtype=np.uint32, cacheable=1)
line_buffer = allocate(shape=(3, 1, 1),dtype=np.uint32, cacheable=1)
out_buffer = allocate(shape=(height-2, width-2, 1),dtype=np.uint32, cacheable=1)

filterSet = []
for i in range(9):
    filterSet.append(randint(0,50))

print("filterSet = [ ",end='')
for i in range(9):
    print("%d" % (filterSet[i]),end=', ')
print("]")

filterSet = [ 50, 8, 49, 31, 23, 6, 2, 43, 50, ]


In [17]:
#Enable IP control register
controller.write(8,1)

#Input Filter set
for i in range(9):
    controller.write((i*4)+filterBase,filterSet[i])

#Input width/height
controller.write(0,width)#Width
controller.write(4,height)#Height

In [18]:
start_reset = controller.read(112)
dma.recvchannel.transfer(out_buffer)
print("Starting Test Convolution!")
print("==========================")
print("Test Width:  %d"%(controller.read(0)))
print("Test Height: %d"%(controller.read(4)))
print()

dataSet = []
testCnt = 0
failCnt = 0
passCnt = 0
while(start_reset == controller.read(112)):    
    testCnt+=1
    print("######### Start of test %d #########"%(testCnt))
    
    curr_x = controller.read(116)
    curr_y = controller.read(120)
    
    if( curr_x == 0 ):#Beginning of row means to load a whole kernel
        dataSet = load_full(curr_x,curr_y)
        print("              |1|%3d    |4|%3d    |7|%3d" % (dataSet[0],dataSet[3],dataSet[6]))
        print("              |2|%3d    |5|%3d    |8|%3d" % (dataSet[1],dataSet[4],dataSet[7]))
        print("Loaded Full:  |3|%3d    |6|%3d    |9|%3d" % (dataSet[2],dataSet[5],dataSet[8]))
        
    else:#Somewhere in the middle of row
        dataSet = load_line(curr_x,curr_y)
        print("              |7|%3d" % (dataSet[6]))
        print("              |8|%3d" % (dataSet[7]))
        print("Loading Line: |9|%3d" % (dataSet[8]))
    
    #Print the filter set
    print("\nfilterSet              = [ ",end='')
    for i in range(9):
        print("%d" % (filterSet[i]),end=', ')
    print("]")
    
    #Calculate the expected value
    expected_value = 0
    print("dataSet used           = [ ",end='')
    for i in range(9):
        print("%d" % (dataSet[i]),end=', ')
        expected_value+=dataSet[i]*filterSet[i]
    print("]")
        
    #Retrieve PL calculated value from controller
    calculated_value = controller.read(16)
    
    print("Expected Value         = %d" % (expected_value))
    print("Calculated Value       = %d" % (calculated_value))
    
    print("Expected PL dataSet    = [ ",end='')
    for i in range(3,9):
        print("%d" % (dataSet[i]),end=', ')
    for i in range(6,9):
        print("%d" % (dataSet[i]),end=', ')
    print("]")
    
    print("PL dataSet             = [ ",end = '')
    for i in range(9):
        print("%d" % (controller.read((i*4)+dataBase)),end=', ')
    print("]")
    
    if(expected_value == calculated_value):
        passCnt+=1
        print("Test Result:           PASS\n")
    else:
        failCnt+=1
        print("Test Result:           FAIL\n")
dma.recvchannel.wait()

print("=============================\nTest Complete!\n")
if(failCnt == 0):
    print("All %d tests pass!" % (testCnt))
else:
    print("%d out of %d tests FAILED" % (failCnt,testCnt))

print("=============================\n")
    
print("\nControl Registers:")
print("==================")
output_registers()

Starting Test Convolution!
Test Width:  3
Test Height: 3

######### Start of test 1 #########
              |1| 24    |4| 32    |7| 32
              |2| 43    |5| 28    |8| 49
Loaded Full:  |3|  7    |6|  2    |9|  5

filterSet              = [ 50, 8, 49, 31, 23, 6, 2, 43, 50, ]
dataSet used           = [ 24, 43, 7, 32, 28, 2, 32, 49, 5, ]
Expected Value         = 5956
Calculated Value       = 5956
Expected PL dataSet    = [ 32, 28, 2, 32, 49, 5, 32, 49, 5, ]
PL dataSet             = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, ]
Test Result:           PASS



KeyboardInterrupt: 

In [None]:
filterSet = [ 0,1,2,3,4,5,6,7,8 ]
dataSet = [ 1,6,11,2,7,12,3,8,13 ]
cSum = 0

for i in range(9):
    if (i%3==0):
        print()
        
    print("%s x %s = %s = %d" % (hex(dataSet[i]),hex(filterSet[i]),hex(dataSet[i]*filterSet[i]),dataSet[i]*filterSet[i]))
    cSum+=dataSet[i]*filterSet[i]

print("\ncSum in hex:     %s" % (hex(cSum)))
print("cSum in decimal: %d" % (cSum))

In [19]:
output_registers()

Image width:           3
Image height:          3
Current X position:    0
Current Y position:    0
Last calculated value: 5956

Filter Set:
filterSet[0] = 50
filterSet[1] = 8
filterSet[2] = 49
filterSet[3] = 31
filterSet[4] = 23
filterSet[5] = 6
filterSet[6] = 2
filterSet[7] = 43
filterSet[8] = 50

Last Known Data Set:
dataSet[0] = 0
dataSet[1] = 0
dataSet[2] = 0
dataSet[3] = 0
dataSet[4] = 0
dataSet[5] = 0
dataSet[6] = 0
dataSet[7] = 0
dataSet[8] = 0

Controller start signal:           1
Reset register:                    0
State Machine Register:            0
s_axis_ready status:               1
m_axis_valid status:               0
m_axis_ready status:               1
m_axis_last status:                0
data pointer:                      0
Number of times in read state:     9
Number of times in multiply state: 4
Number of times in add state:      5
Completed convolution:             1
State machine reset count:         1


### A little closer
In this case, a buffer of data is prepared but it is limited in width to one one kernel size. This is a restriction due to the current operation of the DMA.

In [None]:
Convolution_design = Overlay("PL_Convolution.bit")

dma = Convolution_design.axi_dma_0
controller = Convolution_design.Convolution_Controll_0

seed(2)

In [None]:
width = 3
height = 1920

in_buffer = allocate(shape=(height, width, 1),dtype=np.uint32, cacheable=1)
out_buffer = allocate(shape=(int(height/3), 1, 1),dtype=np.uint32, cacheable=1)

for i in range(width):
    for j in range(height):
        in_buffer[j][i] = i*width+j

In [None]:
#Enable IP control register
controller.write(8,1)

#Input Filter set
controller.write(20,0)
controller.write(24,1)
controller.write(28,2)
controller.write(32,3)
controller.write(36,4)
controller.write(40,5)
controller.write(44,6)
controller.write(48,7)
controller.write(52,8)

#Input width/height
controller.write(0,width)#Width
controller.write(4,int(height/3))#Height

In [None]:
output_registers()

In [None]:
dma.sendchannel.transfer(in_buffer)
dma.recvchannel.transfer(out_buffer)
dma.sendchannel.wait()
dma.recvchannel.wait()

In [None]:
output_registers()

In [None]:
out_buffer