## Get FGPU iPython Package

In [None]:
!sudo -H pip install --upgrade 'git+https://github.com/malkadi/FGPU_Python'

## Kernel Code

In [5]:
%%writefile copy.cl

#include "FGPUlib.c"

__kernel void copy_word(__global int *in, __global int *out) {
    int index = get_global_id(0);
    out[index] = in[index];
}

Overwriting copy.cl


## Create Objects

In [1]:
from FGPU import FGPU
from pynq.drivers import xlnk

fgpu= FGPU()
mem=xlnk.xlnk()
mem.cma_stats()

{'Buffer Count': 0, 'CMA Memory Available': 123211776, 'CMA Memory Usage': 0}

## Program Hardware

In [14]:
fgpu.set_bitFile("V2_8CUs_noAtomic_SubInteger_235MHz.bit")
fgpu.download_bitstream()

## Compile Kernel

In [2]:
fgpu.set_kernel_file("copy.cl")
fgpu.compile_kernel()

Compiling /home/muhammed/jupyter_notebooks/FGPU/copy.cl
Compiling succeeded!



code.bin:	file format ELF32-fgpu

Disassembly of section .text:
copy_word:
       0:	22 00 00 a8 	lp	r2, 1
       4:	03 00 00 a8 	lp	r3, 0
       8:	04 00 00 a0 	lid	r4, 0
       c:	05 00 00 a1 	wgoff	r5, 0
      10:	a1 10 00 10 	add	r1, r5, r4
      14:	23 0c 00 74 	lw	r3, r3[r1]
      18:	23 08 00 7c 	sw	r3, r2[r1]
      1c:	00 00 00 92 	ret



## Allocate Memory

In [3]:
length = 256*1024 # length of input and output array
src = mem.cma_alloc(length, data_type = "unsigned")
dst = mem.cma_alloc(length, data_type = "unsigned")
mem.cma_stats()

{'Buffer Count': 2,
 'CMA Memory Available': 121356288,
 'CMA Memory Usage': 2097152}

## Initialize Memory

In [4]:
for i in range(0, length):
    src[i] = i
    dst[i] = 0

## Configure Kernel

In [13]:
# bind allocated memory to kenel parameters
fgpu.set_paramerter(0, src, mem)
fgpu.set_paramerter(1, dst, mem)
# setup index space
fgpu.set_num_dimensions(1)
fgpu.set_size(length)
fgpu.set_work_group_size(64)
fgpu.set_offset(0)

## Execute Kernel

In [16]:
fgpu.download_kernel()
execTime = fgpu.execute_kernel()
print ("Execution time =", int(execTime*1000000), "us")

Execution time = 1405 us


## Check Results

In [20]:
src_buf = mem.cma_cast(src, "unsigned int")
dst_buf = mem.cma_cast(dst, "unsigned int ")
nErrors = 0
for i in range(0,length):
    if src_buf[i] != dst_buf[i]:
        nErrors += 1
        #print (src_buf[i], dst_buf[i])
if nErrors == 0:
    print ("no Errors found!")
else:
    print (nErrors, "Errors found!")

no Errors found!
