## Get FGPU IPython Package

In [1]:
!sudo -H pip install --upgrade 'git+https://github.com/malkadi/FGPU_IPython'

Collecting git+https://github.com/malkadi/FGPU_IPython
  Cloning https://github.com/malkadi/FGPU_IPython to /tmp/pip-8afn9bvh-build
Installing collected packages: FGPU
  Found existing installation: FGPU 0.1
    Uninstalling FGPU-0.1:
      Successfully uninstalled FGPU-0.1
  Running setup.py install for FGPU ... [?25l- \ | / - \ | / - \ | done
[?25hSuccessfully installed FGPU-0.1


## Kernel Code
This kernel copies an array on 32bit words with the global memory

In [4]:
%%writefile copy.cl

#include "FGPUlib.c"

__kernel void copy_word(__global int *in, __global int *out) {
    int index = get_global_id(0);
    out[index] = in[index];
}

Overwriting copy.cl


## Create Objects

In [1]:
from FGPU import FGPU
from FGPU import xlnk

fgpu= FGPU()
mem=xlnk.xlnk()
mem.cma_stats()

{'Buffer Count': 0, 'CMA Memory Available': 127647744, 'CMA Memory Usage': 0}

## Program Hardware

In [2]:
fgpu.set_bitFile("V2_8CUs_SubInteger_2K_LMEM_240MHz.bit")
fgpu.download_bitstream()

## Compile Kernel

In [3]:
fgpu.set_kernel_file("copy.cl")
fgpu.compile_kernel(True)

Compiling /home/muhammed/FGPU_IPython/notebooks/copy.cl
Compiling succeeded!



/usr/local/lib/python3.4/dist-packages/FGPU/code.bin:	file format ELF32-fgpu

Disassembly of section .text:
copy_word:
       0:	22 00 00 a8 	lp	r2, 1
       4:	03 00 00 a8 	lp	r3, 0
       8:	04 00 00 a0 	lid	r4, 0
       c:	05 00 00 a1 	wgoff	r5, 0
      10:	a1 10 00 10 	add	r1, r5, r4
      14:	23 0c 00 74 	lw	r3, r3[r1]
      18:	23 08 00 7c 	sw	r3, r2[r1]
      1c:	00 00 00 92 	ret



## Allocate Memory

In [4]:
length = 256*1024 # length of input and output array
src = mem.cma_alloc(length, data_type = "unsigned")
dst = mem.cma_alloc(length, data_type = "unsigned")
mem.cma_stats()

{'Buffer Count': 2,
 'CMA Memory Available': 125546496,
 'CMA Memory Usage': 2097152}

## Initialize Memory

In [5]:
for i in range(0, length):
    src[i] = i
    dst[i] = 0

## Configure Kernel

In [6]:
# bind allocated memory to kenel parameters
fgpu.set_paramerter(0, src, mem)
fgpu.set_paramerter(1, dst, mem)
# setup index space
fgpu.set_num_dimensions(1)
fgpu.set_size(length)
fgpu.set_work_group_size(64)
fgpu.set_offset(0)

## Execute on FGPU

In [7]:
fgpu.download_kernel()
execTime = fgpu.execute_kernel()
print ("Execution time =", int(execTime*1000000), "us")

Execution time = 1403 us


## Execute with memcopy

In [10]:
import time
dst2 = mem.cma_alloc(length, data_type = "unsigned")
start = time.time()
mem.cma_memcopy(dst2, src, length*4)
end = time.time()
print ("Execution time =", int((end-start)*1000000), "us")

Execution time = 28925 us


## Check Results

In [11]:
src_buf = mem.cma_cast(src, "unsigned int")
dst_buf = mem.cma_cast(dst, "unsigned int ")
nErrors = 0
for i in range(0,length):
    if src_buf[i] != dst_buf[i]:
        nErrors += 1
        #print (src_buf[i], dst_buf[i])
if nErrors == 0:
    print ("no Errors found!")
else:
    print (nErrors, "Errors found!")

no Errors found!
