Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

initial add

  • Loading branch information...
commit ec0a9b483b027c736b9bacee67b18fa69d91ad82 1 parent 26873f9
@tonyrog authored
View
8 Makefile
@@ -0,0 +1,8 @@
+
+all:
+ (cd c_src; make)
+ (cd src; make)
+
+doc:
+ (cd src; make edoc)
+
View
14 README
@@ -0,0 +1,14 @@
+Welcome to the Erlang OpenCL binding
+
+To get started you need erlang, preferably R13. You also need
+GCC and a machine with OpenCL installed. Currently only Mac Os X 10.6.x
+is known to work. Then linux support is on it's way.
+
+To build: just run make
+
+To build examples: Goto the examples directory and run make.
+
+Make sure erlang finds the cl/ebin directory.
+
+
+
View
172 c_src/Makefile
@@ -0,0 +1,172 @@
+OSNAME := $(shell uname -s)
+MACHINE := $(shell uname -m)
+CC = gcc
+
+ERLDIR := /usr/local/lib/erlang
+ERL_C_INCLUDE_DIR := $(ERLDIR)/usr/include
+OCL_DIR := $(shell cd ..; pwd)
+
+MAC_OS_X = No
+WIN32 = No
+LINUX = No
+
+ALTIVEC = No
+MMX = No
+SSE = No
+SSE2 = No
+SSE3 = No
+SSE4 = No
+
+ifeq ($(TYPE), debug)
+OBJDIR=../obj/debug
+LIBDIR=../lib/debug
+CFLAGS += -fPIC -Ddebug -DDEBUG -g -Wall -Wextra -Wswitch-default -Wswitch-enum -no-cpp-precomp -D_THREAD_SAFE -D_REENTRANT -fno-common
+endif
+
+ifeq ($(TYPE), release)
+OBJDIR=../obj/release
+LIBDIR=../lib/release
+CFLAGS += -fPIC -O3 -Wall -Wextra -Wswitch-default -Wswitch-enum -no-cpp-precomp -D_THREAD_SAFE -D_REENTRANT -fno-common
+endif
+
+ifeq ($(OSNAME), Linux)
+LINUX = Yes
+ifeq ($(WORDSIZE), 32)
+CFLAGS += -m32 -DWORDSIZE=32
+endif
+ifeq ($(WORDSIZE), 64)
+CFLAGS += -m64 -DWORDSIZE=64
+endif
+LD_SHARED := $(CC) -shared
+endif
+
+ifeq ($(OSNAME), Darwin)
+MAC_OS_X = Yes
+ifeq ($(WORDSIZE), 32)
+CFLAGS += -m32 -DWORDSIZE=32
+LD_SHARED := $(CC) -m32 -bundle -flat_namespace -undefined suppress
+endif
+ifeq ($(WORDSIZE), 64)
+CFLAGS += -m64 -DWORDSIZE=64
+LD_SHARED := $(CC) -m64 -bundle -flat_namespace -undefined suppress
+endif
+endif
+
+ifeq ($(MACHINE), "Power Macintosh")
+ALTIVEC = Yes
+endif
+
+ifeq ($(MACHINE), i386)
+MMX = No
+SSE = No
+SSE2 = Yes
+SSE3 = No
+endif
+
+ifeq ($(MACHINE), i686)
+MMX = No
+SSE = No
+SSE2 = Yes
+SSE3 = No
+endif
+
+ifeq ($(MAC_OS_X),Yes)
+ LDFLAGS += -framework OpenCL
+endif
+
+ifeq ($(WIN32),Yes)
+ CFLAGS += -DWIN32
+endif
+
+ifeq ($(ALTIVEC),Yes)
+ CFLAGS += -maltivec -DUSE_ALTIVEC
+endif
+
+ifeq ($(SSE),Yes)
+ CFLAGS += -msse
+endif
+
+ifeq ($(SSE2),Yes)
+ CFLAGS += -msse2 -DUSE_SSE2
+ SIMD_OBJS += ocl_simd_sse2.$(BIT).o
+endif
+
+ifeq ($(SSE3),Yes)
+ MCFLAGS += -msse3
+endif
+
+ifeq ($(MMX),Yes)
+ MCFLAGS += -mmmx -DUSE_MMX
+ SIMD_OBJS += ocl_simd_mmx.$(BIT).o
+endif
+
+CL_DRV = $(LIBDIR)/$(WORDSIZE)/cl_drv.so
+
+CL_OBJS = \
+ $(OBJDIR)/$(WORDSIZE)/cl_drv.o \
+ $(OBJDIR)/$(WORDSIZE)/cl_hash.o
+
+CL_ASM = \
+ $(OBJDIR)/$(WORDSIZE)/cl_drv.s \
+ $(OBJDIR)/$(WORDSIZE)/cl_hash.s
+
+OCL_SIMD_OBJS = $(SIMD_OBJS:%=$(OBJDIR)/%)
+
+all: all32 all64
+
+all32: config.32.h
+ $(MAKE) driver TYPE=release WORDSIZE=32
+ $(MAKE) driver TYPE=debug WORDSIZE=32
+
+all64: config.64.h
+ $(MAKE) driver TYPE=release WORDSIZE=64
+ $(MAKE) driver TYPE=debug WORDSIZE=64
+
+config.32.h: configure
+ ./configure --with-wordsize=32
+
+config.64.h: configure
+ ./configure --with-wordsize=64
+
+configure: configure.in
+ autoheader; autoconf
+
+clean:
+ $(MAKE) clean_internal TYPE=release
+
+# Make cl library
+debug:
+ $(MAKE) driver TYPE=debug
+
+clean_debug:
+ $(MAKE) clean_internal TYPE=debug
+
+release:
+ $(MAKE) driver TYPE=release
+
+driver: $(CL_DRV)
+
+cbuf_test: cbuf_test.o
+ $(CC) -o $@ cbuf_test.o
+
+cbuf_test.o: cbufv2.h
+
+depend:
+
+clean_internal:
+ -rm -f $(OBJDIR)/*.o
+
+$(CL_DRV): $(OCL_LIB) $(CL_OBJS) # $(CL_ASM)
+ $(LD_SHARED) -o $@ $(CL_OBJS) $(LDFLAGS)
+
+$(OBJDIR)/$(WORDSIZE)/cl_drv.o: cbufv2.h cl_hash.h
+
+$(OBJDIR)/$(WORDSIZE)/%.o: %.c
+ $(CC) -c -o $@ $(CFLAGS) -I$(ERL_C_INCLUDE_DIR) $<
+
+ $(OBJDIR)/$(WORDSIZE)/%.s: %.c
+ $(CC) -S -o $@ $(CFLAGS) -I$(ERL_C_INCLUDE_DIR) $<
+
+cbuf_test.o: cbuf_test.c
+ $(CC) -c -o $@ -g -I$(ERL_C_INCLUDE_DIR) $<
+
View
352 c_src/cbuf_test.c
@@ -0,0 +1,352 @@
+//
+// CBUF testing
+//
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "erl_driver.h"
+#define driver_alloc(size) malloc((size))
+#define driver_realloc(ptr,size) realloc((ptr),(size))
+#define driver_free(ptr) free((ptr))
+
+ErlDrvBinary* driver_alloc_binary(int sz)
+{
+ ErlDrvBinary* bp = driver_alloc(sizeof(ErlDrvBinary)+sz);
+ bp->orig_size = sz;
+ return bp;
+}
+
+ErlDrvBinary* driver_realloc_binary(ErlDrvBinary* bp, int sz)
+{
+ bp = driver_realloc(bp, sz);
+ bp->orig_size = sz;
+ return bp;
+}
+void driver_free_binary(ErlDrvBinary* bp)
+{
+ driver_free(bp);
+}
+
+#include "cbufv2.h"
+
+u_int8_t vu8[] = {1,2,3,4,5,6,7,8,9,10};
+u_int16_t vu16[] = {11,12,13,14,15,16,17,18,19,20};
+u_int32_t vu32[] = {21,22,23,24,25,26,27,28,29,30};
+u_int64_t vu64[] = {31,32,33,34,35,36,37,38,39,40};
+
+float vf32[] = {41.0,42.0,43.0,44.0,45.0,46.0,47.0,48.0,49.0,50.0};
+double vf64[] = {51.0,52.0,53.0,54.0,55.0,56.0,57.0,58.0,59.0,60.0};
+
+// Aligned not wrapped
+SysIOVec iov1[6] =
+{
+ { (char*)vu8, sizeof(vu8) },
+ { (char*)vu16, sizeof(vu16) },
+ { (char*)vu32, sizeof(vu32) },
+ { (char*)vu64, sizeof(vu64) },
+ { (char*)vf32, sizeof(vf32) },
+ { (char*)vf64, sizeof(vf64) }
+};
+
+ErlDrvBinary* binv1[6] = { 0, 0, 0, 0, 0, 0};
+
+ErlIOVec vec1 =
+{
+ 6,
+ (sizeof(vu8)+sizeof(vu16)+
+ sizeof(vu32)+sizeof(vu64)+
+ sizeof(vf32)+sizeof(vf64)),
+ iov1,
+ binv1
+};
+
+u_int8_t vx[] = { 1, 0, 0, 0, 2, 0, 0, 0, 3, 0 };
+u_int8_t vy[] = { 0, 0, 4, 0, 0, 0, 5, 0 };
+u_int8_t vz[] = { 0, 0, 6, 0, 7 };
+
+SysIOVec iov2[3] =
+{
+ { (char*)vx, sizeof(vx) },
+ { (char*)vy, sizeof(vy) },
+ { (char*)vz, sizeof(vz) }
+};
+
+ErlDrvBinary* binv2[3] = { 0, 0, 0};
+
+ErlIOVec vec2 =
+{
+ 3,
+ (sizeof(vx)+sizeof(vy)+sizeof(vz)),
+ iov2,
+ binv2
+};
+
+
+
+void print_u8(cbuf_t* in, int n)
+{
+ int i;
+ for (i = 0; i < n; i++) {
+ u_int8_t val;
+ get_uint8(in, &val);
+ printf("%u ", val);
+ }
+ printf("\n");
+}
+
+void print_u16(cbuf_t* in, int n)
+{
+ int i;
+ for (i = 0; i < n; i++) {
+ u_int16_t val;
+ get_uint16(in, &val);
+ printf("%u ", val);
+ }
+ printf("\n");
+}
+
+void print_u32(cbuf_t* in, int n)
+{
+ int i;
+ for (i = 0; i < n; i++) {
+ u_int32_t val;
+ get_uint32(in, &val);
+ printf("%u ", val);
+ }
+ printf("\n");
+}
+
+void print_u64(cbuf_t* in, int n)
+{
+ int i;
+ for (i = 0; i < n; i++) {
+ u_int64_t val;
+ get_uint64(in, &val);
+ printf("%llu ", val);
+ }
+ printf("\n");
+}
+
+void print_f32(cbuf_t* in, int n)
+{
+ int i;
+ for (i = 0; i < n; i++) {
+ float val;
+ get_float32(in, &val);
+ printf("%f ", val);
+ }
+ printf("\n");
+}
+
+
+void print_f64(cbuf_t* in, int n)
+{
+ int i;
+ for (i = 0; i < n; i++) {
+ double val;
+ get_float64(in, &val);
+ printf("%f ", val);
+ }
+ printf("\n");
+}
+
+void read_buffer_test()
+{
+ cbuf_t in;
+ printf("read_buffer_test vu8: BEGIN\n");
+ cbuf_init(&in, &vu8, sizeof(vu8), 0, 0);
+ print_u8(&in, 10);
+ printf("read_buffer_test vu8: END%s\n",
+ !cbuf_eob(&in) ? " (data not consumed)" : "");
+ cbuf_final(&in);
+
+ printf("read_buffer_test vu16: BEGIN\n");
+ cbuf_init(&in, &vu16, sizeof(vu16), 0, 0);
+ print_u16(&in, 10);
+ printf("read_buffer_test vu16: END%s\n",
+ !cbuf_eob(&in) ? " (data not consumed)" : "");
+ cbuf_final(&in);
+
+ printf("read_buffer_test vu32: BEGIN\n");
+ cbuf_init(&in, &vu32, sizeof(vu32), 0, 0);
+ print_u32(&in, 10);
+ printf("read_buffer_test vu32: END%s\n",
+ !cbuf_eob(&in) ? " (data not consumed)" : "");
+ cbuf_final(&in);
+
+ printf("read_buffer_test vu64: BEGIN\n");
+ cbuf_init(&in, &vu64, sizeof(vu64), 0, 0);
+ print_u64(&in, 10);
+ printf("read_buffer_test vu64: END%s\n",
+ !cbuf_eob(&in) ? " (data not consumed)" : "");
+ cbuf_final(&in);
+
+ printf("read_buffer_test vf32: BEGIN\n");
+ cbuf_init(&in, &vf32, sizeof(vf32), 0, 0);
+ print_f32(&in, 10);
+ printf("read_buffer_test vu32: END%s\n",
+ !cbuf_eob(&in) ? " (data not consumed)" : "");
+ cbuf_final(&in);
+
+ printf("read_buffer_test vf64: BEGIN\n");
+ cbuf_init(&in, &vf64, sizeof(vf64), 0, 0);
+ print_f64(&in, 10);
+ printf("read_buffer_test vf64: END%s\n",
+ !cbuf_eob(&in) ? " (data not consumed)" : "");
+ cbuf_final(&in);
+}
+
+void read_vector_test()
+{
+ cbuf_t in;
+
+ printf("read_vector_test1: BEGIN\n");
+ cbuf_initv(&in, &vec1);
+ cbuf_print(&in, "vec1");
+ print_u8(&in, 10);
+ print_u16(&in, 10);
+ print_u32(&in, 10);
+ print_u64(&in, 10);
+ print_f32(&in, 10);
+ print_f64(&in, 10);
+ printf("read_vector_test1: END%s\n",
+ !cbuf_eob(&in) ? " (data not consumed)" : "");
+ cbuf_print(&in, "vec1");
+ cbuf_final(&in);
+
+ printf("read_vector_test2: BEGIN\n");
+ cbuf_initv(&in, &vec2);
+ cbuf_print(&in, "vec2");
+ print_u32(&in, 5);
+ print_u16(&in, 1);
+ print_u8(&in, 1);
+ printf("read_vector_test2: END%s\n",
+ !cbuf_eob(&in) ? " (data not consumed)" : "");
+ cbuf_print(&in, "vec2");
+ cbuf_final(&in);
+}
+
+void write_buffer_test()
+{
+ cbuf_t out;
+ u_int8_t small_buf[10];
+ u_int8_t i8;
+ u_int16_t i16;
+
+ printf("write_buffer_test small_buf: BEGIN\n");
+ cbuf_init(&out, small_buf, sizeof(small_buf), 0, 0);
+ for (i8 = 1; i8 <= 10; i8++)
+ cbuf_write(&out, &i8, sizeof(i8));
+ cbuf_reset(&out, 0);
+ print_u8(&out, 10);
+ cbuf_print(&out, "out_i8");
+ cbuf_final(&out);
+ printf("write_buffer_test: small_buf: END\n");
+
+ printf("write_buffer_test alloc_buf: BEGIN\n");
+ cbuf_init(&out, small_buf, sizeof(small_buf), 0, 0);
+ for (i8 = 1; i8 <= 20; i8++)
+ cbuf_write(&out, &i8, sizeof(i8));
+ cbuf_reset(&out, 0);
+ print_u8(&out, 20);
+ cbuf_print(&out, "out_i8");
+ cbuf_final(&out);
+ printf("write_buffer_test: alloc_buf: END\n");
+
+ printf("write_buffer_test realloc_buf: BEGIN\n");
+ cbuf_init(&out, small_buf, sizeof(small_buf), 0, 0);
+ cbuf_print(&out, "out_i16");
+ for (i16 = 1; i16 <= 200; i16++)
+ cbuf_write(&out, &i16, sizeof(i16));
+ cbuf_reset(&out, 0);
+ print_u16(&out, 200);
+ cbuf_print(&out, "out_i16");
+ cbuf_final(&out);
+ printf("write_buffer_test: realloc_buf: END\n");
+
+ // the same with empty inital buffer
+ printf("write_buffer_test realloc_buf2: BEGIN\n");
+ cbuf_init(&out, 0, 0, 0, 0);
+ cbuf_print(&out, "out_i16");
+ for (i16 = 1; i16 <= 200; i16++)
+ cbuf_write(&out, &i16, sizeof(i16));
+ cbuf_reset(&out, 0);
+ print_u16(&out, 200);
+ cbuf_print(&out, "out_i16");
+ cbuf_final(&out);
+ printf("write_buffer_test: realloc_buf2: END\n");
+
+ printf("write_buffer_test binary_realloc_buf: BEGIN\n");
+ cbuf_init(&out, small_buf, sizeof(small_buf), 0, CBUF_FLAG_BINARY);
+ cbuf_print(&out, "out_i16");
+ for (i16 = 1; i16 <= 200; i16++)
+ cbuf_write(&out, &i16, sizeof(i16));
+ cbuf_reset(&out, 0);
+ print_u16(&out, 200);
+ cbuf_print(&out, "out_i16");
+ cbuf_final(&out);
+ printf("write_buffer_test: binary_realloc_buf: END\n");
+
+
+
+}
+
+void write_vec_test()
+{
+ cbuf_t out;
+ u_int8_t small_buf[10];
+ u_int16_t i16;
+
+ printf("write_vec_test1: binary BEGIN\n");
+ cbuf_init(&out, small_buf, sizeof(small_buf), 0, CBUF_FLAG_BINARY);
+ for (i16 = 1; i16 <= 20; i16++)
+ cbuf_write(&out, &i16, sizeof(i16));
+ cbuf_seg_add(&out);
+ for (i16 = 21; i16 <= 40; i16++)
+ cbuf_write(&out, &i16, sizeof(i16));
+ cbuf_seg_add(&out);
+ for (i16 = 41; i16 <= 60; i16++)
+ cbuf_write(&out, &i16, sizeof(i16));
+ cbuf_reset(&out, 0);
+ print_u16(&out, 60);
+ cbuf_print(&out, "out_i16");
+ cbuf_final(&out);
+ printf("write_vec_test1: binary END\n");
+
+
+ printf("write_vec_test2: BEGIN\n");
+ cbuf_init(&out, small_buf, sizeof(small_buf), 0, 0);
+ for (i16 = 1; i16 <= 20; i16++)
+ cbuf_write(&out, &i16, sizeof(i16));
+ cbuf_seg_add(&out);
+ for (i16 = 21; i16 <= 40; i16++)
+ cbuf_write(&out, &i16, sizeof(i16));
+ cbuf_seg_add(&out);
+ for (i16 = 41; i16 <= 60; i16++)
+ cbuf_write(&out, &i16, sizeof(i16));
+ cbuf_seg_add(&out);
+ for (i16 = 61; i16 <= 80; i16++)
+ cbuf_write(&out, &i16, sizeof(i16));
+ cbuf_seg_add(&out);
+ for (i16 = 81; i16 <= 100; i16++)
+ cbuf_write(&out, &i16, sizeof(i16));
+ cbuf_reset(&out, 0);
+ print_u16(&out, 100);
+ cbuf_print(&out, "out_i16");
+ cbuf_final(&out);
+ printf("write_vec_test2: END\n");
+}
+
+
+main()
+{
+ read_buffer_test();
+ read_vector_test();
+
+ write_buffer_test();
+ write_vec_test();
+
+ exit(0);
+}
View
1,250 c_src/cbufv2.h
@@ -0,0 +1,1250 @@
+/*
+ * control buffer managment
+ *
+ */
+#include <stdlib.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <memory.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <machine/endian.h>
+
+#define CBUF_USE_PUT_ETF // pack ETF
+#define CBUF_USE_PUT_CTI // pack CTI
+
+#define CBUF_FLAG_HEAP 0x01 // allocated heap memory
+#define CBUF_FLAG_BINARY 0x02 // ErlDrvBinary
+#define CBUF_FLAG_PUT_CTI 0x00 // Put CTI data (default)
+#define CBUF_FLAG_PUT_ETF 0x10 // Put ETF data
+#define CBUF_FLAG_PUT_TRM 0x20 // Put ErlDrvTerm data
+#define CBUF_FLAG_PUT_MASK 0x30 // put type value selection
+
+#define CBUF_VEC_SIZE 4 // static vector size
+#define CBUF_SEG_EXTRA 256
+//
+//
+//
+typedef struct {
+ u_int8_t flags; // allocation status (HEAP|BINARY)
+ u_int8_t* base; // base pointer
+ size_t size; // allocated length of segment
+ size_t len; // used length of segment
+ ErlDrvBinary* bp; // reference when segment is in a binary
+} cbuf_segment_t;
+
+typedef struct
+{
+ u_int8_t flags; // allocation flags
+ size_t ip; // poistion in current segment
+ size_t iv;
+ size_t vlen; // used length of v
+ size_t vsize; // actual length of v
+ cbuf_segment_t* v; // current segment
+ cbuf_segment_t dv[CBUF_VEC_SIZE];
+} cbuf_t;
+
+// CBUF Tag Interface (CTI) (always used by cl_drv)
+#define OK 1 // 'ok'
+#define ERROR 2 // 'error'
+#define EVENT 3 // 'event'
+#define INT8 4 // int8_t
+#define UINT8 5 // u_int8_t
+#define INT16 6 // int16_t
+#define UINT16 7 // u_int16_t
+#define INT32 8 // int32_t
+#define UINT32 9 // u_int32_t
+#define INT64 10 // int64_t
+#define UINT64 11 // u_int64_t
+#define BOOLEAN 12 // u_int8_t
+#define FLOAT32 13 // float
+#define FLOAT64 14 // double
+#define STRING1 15 // len byte followed by UTF-8 chars
+#define STRING4 16 // 4-byte len followed by UTF-8 string
+#define ATOM 17 // len bytes followed by ASCII chars
+#define BINARY 18 // binary 4-byte len followed by Octets
+#define LIST 19 // list begin
+#define LIST_END 20 // list end
+#define TUPLE 21 // tuple begin
+#define TUPLE_END 22 // tuple end
+#define ENUM 23 // Encoded as INT32
+#define BITFIELD 24 // Encoded as UINT64
+#define HANDLE 25 // Encoded pointer 32/64 bit
+
+// External Term Format (ETF)
+// Version 131
+#ifdef CBUF_USE_PUT_ETF
+
+#define VERSION_MAGIC 131
+#define SMALL_INTEGER_EXT 97 // 'a'
+#define INTEGER_EXT 98 // 'b'
+#define FLOAT_EXT 99 // 'c'
+#define ATOM_EXT 100 // 'd'
+#define SMALL_ATOM_EXT 115 // 's'
+#define REFERENCE_EXT 101 // 'e'
+#define NEW_REFERENCE_EXT 114 // 'r'
+#define PORT_EXT 102 // 'f'
+#define NEW_FLOAT_EXT 70 // 'F'
+#define PID_EXT 103 // 'g'
+#define SMALL_TUPLE_EXT 104 // 'h'
+#define LARGE_TUPLE_EXT 105 // 'i'
+#define NIL_EXT 106 // 'j'
+#define STRING_EXT 107 // 'k'
+#define LIST_EXT 108 // 'l'
+#define BINARY_EXT 109 // 'm'
+#define BIT_BINARY_EXT 77 // 'M'
+#define SMALL_BIG_EXT 110 // 'n'
+#define LARGE_BIG_EXT 111 // 'o'
+#define NEW_FUN_EXT 112 // 'p'
+#define EXPORT_EXT 113 // 'q'
+#define FUN_EXT 117 // 'u'
+#define DIST_HEADER 68 // 'D'
+#define ATOM_CACHE_REF 82 // 'R'
+#define COMPRESSED 80 // 'P'
+
+#endif
+
+// Debug
+void cbuf_print(cbuf_t* cp,char* name)
+{
+ size_t i;
+ FILE* f = stderr;
+
+ fprintf(f,"cbuf %s = {\r\n", name);
+ fprintf(f," flags:");
+ if (cp->flags & CBUF_FLAG_BINARY) fprintf(f," binary");
+ if (cp->flags & CBUF_FLAG_HEAP) fprintf(f," heap");
+ fprintf(f,"\r\n");
+ fprintf(f," iv: %lu\r\n", cp->iv);
+ fprintf(f," ip: %lu\r\n", cp->ip);
+ fprintf(f," vsize: %lu\r\n", cp->vsize);
+ fprintf(f," vlen: %lu\r\n", cp->vlen);
+ fprintf(f," dv: %s\r\n", (cp->v == cp->dv) ? "true" : "false");
+ for (i = 0; i < cp->vlen; i++) {
+ fprintf(f," v[%lu].flags:", i);
+ if (cp->v[i].flags & CBUF_FLAG_BINARY) fprintf(f," binary");
+ if (cp->v[i].flags & CBUF_FLAG_HEAP) fprintf(f," heap");
+ fprintf(f,"\r\n");
+ fprintf(f," v[%lu].base = %p\r\n", i, cp->v[i].base);
+ fprintf(f," v[%lu].size = %lu\r\n", i, cp->v[i].size);
+ fprintf(f," v[%lu].len = %lu\r\n", i, cp->v[i].len);
+ fprintf(f," v[%lu].bp = %p\r\n", i, cp->v[i].bp);
+ }
+ fprintf(f,"};\r\n");
+}
+
+// copy src to dst. native-endian to big-endian
+// src is a buffer holding a number in native endian order
+// dst is a buffer holding a number in big endian order
+//
+static inline void* memcpy_n2b(void* dst, void* src, size_t len)
+{
+#if BYTE_ORDER == BIG_ENDIAN
+ return memcpy(dst, src, len);
+#else
+ u_int8_t* sp = ((u_int8_t*) src) + len;
+ u_int8_t* dp = (u_int8_t*) dst;
+ while(len--)
+ *dp++ = *--sp;
+ return dst;
+#endif
+}
+
+
+// Number of bytes written/read to current segment
+static inline size_t cbuf_seg_used(cbuf_t* cp)
+{
+ return cp->ip;
+}
+
+// Return a pointer to current poistion
+static inline u_int8_t* cbuf_seg_ptr(cbuf_t* cp)
+{
+ return (u_int8_t*) (cp->v[cp->iv].base + cp->ip);
+}
+
+// Number of byte available to read in current segment
+static inline size_t cbuf_seg_r_avail(cbuf_t* cp)
+{
+ return (cp->iv >= cp->vlen) ? 0 : (cp->v[cp->iv].len - cp->ip);
+}
+
+// Total number of byte available for read
+static size_t cbuf_r_avail(cbuf_t* cp)
+{
+ size_t sz = cbuf_seg_r_avail(cp);
+ size_t i = cp->iv + 1;
+ while(i < cp->vlen) {
+ sz += cp->v[i].len;
+ i++;
+ }
+ return sz;
+}
+
+// Number of byte available to write in current segment
+static inline size_t cbuf_seg_w_avail(cbuf_t* cp)
+{
+ return (cp->iv >= cp->vlen) ? 0 : (cp->v[cp->iv].size - cp->ip);
+}
+
+// return 1 if at end of buf 0 otherwise
+static inline int cbuf_eob(cbuf_t* cp)
+{
+ return (cp->iv >= cp->vlen) ||
+ ((cp->iv == cp->vlen-1) && (cbuf_seg_r_avail(cp) == 0));
+}
+
+// Adjust position if end of segment to next segment
+static inline void cbuf_adjust_r_ip(cbuf_t* cp)
+{
+ if (cp->ip >= cp->v[cp->iv].len) {
+ cp->iv++;
+ cp->ip = 0;
+ }
+}
+
+// Adjust position if end of segment to next segment
+static inline void cbuf_adjust_w_ip(cbuf_t* cp)
+{
+ if (cp->ip >= cp->v[cp->iv].size) {
+ cp->iv++;
+ cp->ip = 0;
+ }
+}
+
+
+// Rest the cbuf to start & set read flag
+static inline void cbuf_reset(cbuf_t* cp, u_int32_t skip)
+{
+ cp->iv = 0;
+ cp->ip = 0;
+ while(skip > cp->v[cp->iv].len) {
+ skip -= cp->v[cp->iv].len;
+ cp->iv++;
+ }
+ cp->ip = skip;
+}
+
+// resize (grow) current segment
+static u_int8_t* cbuf_seg_realloc(cbuf_t* cp, size_t need)
+{
+ cbuf_segment_t* sp = &cp->v[cp->iv];
+ size_t new_size;
+
+ if (sp->len + need <= sp->size) {
+ sp->len += need;
+ return sp->base + cp->ip;
+ }
+ new_size = sp->size + need + CBUF_SEG_EXTRA;
+ if (sp->flags & CBUF_FLAG_BINARY) {
+ // Data is allocated in ErlDrvBinary
+ if (sp->bp) {
+ ErlDrvBinary* bp;
+ // fprintf(stderr, "realloc_binary: %lu\r\n", new_size);
+ if (!(bp = driver_realloc_binary(sp->bp, new_size)))
+ return 0;
+ sp->bp = bp;
+ }
+ else {
+ // fprintf(stderr, "alloc_binary: %lu\r\n", new_size);
+ if (!(sp->bp = driver_alloc_binary(new_size)))
+ return 0;
+ memcpy(sp->bp->orig_bytes, sp->base, sp->len);
+ }
+ sp->base = (u_int8_t*) sp->bp->orig_bytes;
+ }
+ else if (sp->flags & CBUF_FLAG_HEAP) {
+ // Data is already dynamic binaries not used
+ u_int8_t* dp;
+ // fprintf(stderr, "realloc: %lu\r\n", new_size);
+ if (!(dp = driver_realloc(sp->base, new_size)))
+ return 0;
+ sp->base = dp;
+ }
+ else {
+ // Move data from static buffer to dynamic
+ u_int8_t* base = sp->base;
+ u_int8_t* dp;
+
+ // fprintf(stderr, "alloc: %lu\r\n", new_size);
+ if (!(dp = driver_alloc(new_size)))
+ return 0;
+ sp->base = dp;
+ memcpy(sp->base, base, sp->len);
+ sp->flags |= CBUF_FLAG_HEAP;
+ }
+ sp->size = new_size;
+ return sp->base + cp->ip;
+}
+
+// grow the segment vector
+static int cbuf_vec_grow(cbuf_t* cp)
+{
+ size_t vsize = 2*cp->vsize;
+ cbuf_segment_t* sp;
+
+ if (cp->v == cp->dv) {
+ if (!(sp = driver_alloc(sizeof(cbuf_segment_t)*vsize)))
+ return 0;
+ memcpy(sp,cp->dv,CBUF_VEC_SIZE*sizeof(cbuf_segment_t));
+ }
+ else {
+ if (!(sp = driver_realloc(cp->v,sizeof(cbuf_segment_t)*vsize)))
+ return 0;
+ }
+ cp->v = sp;
+ cp->vsize = vsize;
+ return 1;
+}
+
+// Terminate current segment (patch iov_len)
+// add new segment and increase iv
+static int cbuf_seg_add(cbuf_t* cp)
+{
+ cp->v[cp->iv].len = cp->ip;
+ cp->iv++;
+ cp->ip = 0;
+ if (cp->iv >= cp->vlen) {
+ cp->vlen++;
+ if (cp->vlen >= cp->vsize) {
+ if (!cbuf_vec_grow(cp))
+ return 0;
+ }
+ memset(&cp->v[cp->iv], 0, sizeof(cbuf_segment_t));
+ }
+ return 1;
+}
+
+// Allocate len contigous bytes in current segment
+static inline u_int8_t* cbuf_seg_alloc(cbuf_t* cp, size_t len)
+{
+ u_int8_t* ptr;
+
+ if (cbuf_seg_w_avail(cp) < len) {
+ if (!cbuf_seg_realloc(cp, len))
+ return 0;
+ }
+ ptr = cbuf_seg_ptr(cp);
+ cp->ip += len;
+ cp->v[cp->iv].len = cp->ip;
+ return ptr;
+}
+
+// segmented read & handle end of segment pointer
+static int cbuf_seg_read(cbuf_t* cp, void* ptr, size_t len)
+{
+ size_t n;
+ while((cp->iv < cp->vlen) && len && ((n=cbuf_seg_r_avail(cp)) < len)) {
+ memcpy(ptr, cp->v[cp->iv].base + cp->ip, n);
+ ptr = ((u_int8_t*) ptr) + n;
+ len -= n;
+ cp->iv++;
+ cp->ip = 0;
+ }
+ if (cbuf_seg_r_avail(cp) < len)
+ return 0;
+ memcpy(ptr, cp->v[cp->iv].base + cp->ip, len);
+ cp->ip += len;
+ cbuf_adjust_r_ip(cp);
+ return 1;
+}
+
+// read data from cbuf into ptr,len
+static inline int cbuf_read(cbuf_t* cp, void* ptr, size_t len)
+{
+ if (cbuf_seg_r_avail(cp) > len) { // fast case
+ memcpy(ptr, cp->v[cp->iv].base + cp->ip, len);
+ cp->ip += len;
+ return 1;
+ }
+ return cbuf_seg_read(cp, ptr, len);
+}
+
+//
+// Write data into segments
+// FIXME: add code to expand segments
+//
+static int cbuf_seg_write(cbuf_t* cp, void* ptr, size_t len)
+{
+ size_t n;
+ while((cp->iv < cp->vlen) && len && ((n=cbuf_seg_w_avail(cp)) < len)) {
+ memcpy(cp->v[cp->iv].base+cp->ip, ptr, n);
+ ptr = ((u_int8_t*) ptr) + n;
+ cp->v[cp->iv].len = cp->ip + n;
+ len -= n;
+ cp->iv++;
+ cp->ip = 0;
+ }
+ if (cbuf_seg_w_avail(cp) < len)
+ return 0;
+ memcpy(cp->v[cp->iv].base + cp->ip, ptr, len);
+ cp->ip += len;
+ cp->v[cp->iv].len = cp->ip;
+ cbuf_adjust_w_ip(cp);
+ return 1;
+}
+
+
+// copy tag and data in ptr,len to cbuf, fix fill vector version
+static inline int cbuf_twrite(cbuf_t* cp, u_int8_t tag, void* ptr, size_t len)
+{
+ u_int8_t* p;
+
+ if (!(p = cbuf_seg_alloc(cp, 1+len)))
+ return 0;
+ p[0] = tag;
+ switch(len) {
+ case 4: p[4] = ((u_int8_t*)ptr)[3];
+ case 3: p[3] = ((u_int8_t*)ptr)[2];
+ case 2: p[2] = ((u_int8_t*)ptr)[1];
+ case 1: p[1] = ((u_int8_t*)ptr)[0];
+ case 0: break;
+ default: memcpy(p+1, ptr, len); break;
+ }
+ return 1;
+}
+
+// write data to cbuf, fix fill vector version
+static inline int cbuf_write(cbuf_t* cp, void* ptr, size_t len)
+{
+ u_int8_t* p;
+
+ if (!(p = cbuf_seg_alloc(cp, len)))
+ return 0;
+ switch(len) {
+ case 4: p[3] = ((u_int8_t*)ptr)[3];
+ case 3: p[2] = ((u_int8_t*)ptr)[2];
+ case 2: p[1] = ((u_int8_t*)ptr)[1];
+ case 1: p[0] = ((u_int8_t*)ptr)[0];
+ case 0: break;
+ default: memcpy(p, ptr, len); break;
+ }
+ return 1;
+}
+
+//
+// Initialize read/write buffer
+// for write buffer
+// flags = 0 => data will be resized with malloc
+// flags = HEAP => data will be resize with realloc
+// flags = BINARY => data will be resize with binary_alloc
+// flags = PUT_CTI => put CTI format
+// flags = PUT_ETF => put ETF format
+//
+static void cbuf_init(cbuf_t* cp, void* buf, size_t len,
+ size_t skip, u_int8_t flags)
+{
+ cp->flags = (flags & CBUF_FLAG_PUT_MASK);
+ cp->v = cp->dv;
+ cp->vlen = 1;
+ cp->vsize = CBUF_VEC_SIZE;
+
+ cp->v[0].flags = flags;
+ cp->v[0].base = buf;
+ cp->v[0].len = len;
+ cp->v[0].size = len;
+ cp->v[0].bp = 0;
+
+ cp->iv = 0; // current vector index
+ cp->ip = skip; // current position in current vector
+}
+
+// IOV read only (or copy on write?)
+static void cbuf_initv(cbuf_t* cp, ErlIOVec* vec)
+{
+ int i;
+ cp->flags = 0;
+ if (vec->vsize > CBUF_VEC_SIZE)
+ cp->v = driver_alloc(sizeof(cbuf_segment_t)*vec->vsize);
+ else
+ cp->v = cp->dv;
+ cp->vsize = vec->vsize;
+ cp->vlen = vec->vsize;
+ for (i = 0; i < vec->vsize; i++) {
+ cp->v[i].flags = 0;
+ cp->v[i].base = (u_int8_t*) vec->iov[i].iov_base;
+ cp->v[i].size = vec->iov[i].iov_len;
+ cp->v[i].len = vec->iov[i].iov_len;
+ cp->v[i].bp = vec->binv[i];
+ }
+ cp->iv = 0;
+ cp->ip = 0;
+}
+
+
+// Create cbuf as a binary
+static cbuf_t* cbuf_new_bin(u_int8_t* buf,size_t len,size_t skip)
+{
+ cbuf_t* cp;
+ ErlDrvBinary* bp;
+
+ if (!(cp = (cbuf_t*) driver_alloc(sizeof(cbuf_t))))
+ return 0;
+ if (!(bp = driver_alloc_binary(len))) {
+ driver_free(cp);
+ return 0;
+ }
+ cbuf_init(cp,bp->orig_bytes,len,skip,CBUF_FLAG_BINARY);
+ cp->flags = CBUF_FLAG_HEAP; // cp is on heap
+ cp->v[0].bp = bp; // the binary ref (after init!)
+ if (buf) memcpy(cp->v[0].base, buf, len);
+ return cp;
+}
+
+/* allocate a combi cbuf_t and buffer (non growing) */
+static cbuf_t* cbuf_new(u_int8_t* buf, u_int32_t len, u_int32_t skip)
+{
+ cbuf_t* cp;
+ char* bp;
+
+ if (!(cp = (cbuf_t*) driver_alloc(sizeof(cbuf_t))))
+ return 0;
+ if (!(bp = driver_alloc(len))) {
+ driver_free(cp);
+ return 0;
+ }
+ cbuf_init(cp,bp,len,skip,CBUF_FLAG_HEAP);
+ cp->flags = CBUF_FLAG_HEAP;
+ if (buf) memcpy(cp->v[0].base, buf, len);
+ return cp;
+}
+
+//
+// Cleanup dynamically created vectors etc
+//
+static void cbuf_final(cbuf_t* cp)
+{
+ size_t i;
+ for (i = 0; i < cp->vlen; i++) {
+ cbuf_segment_t* sp = &cp->v[i];
+
+ if (sp->flags & CBUF_FLAG_BINARY) {
+ if (sp->bp)
+ driver_free_binary(sp->bp);
+ }
+ else if (sp->flags & CBUF_FLAG_HEAP)
+ driver_free(sp->base);
+ }
+ if (cp->v != cp->dv)
+ driver_free(cp->v);
+}
+
+static inline void cbuf_free(cbuf_t* cp)
+{
+ cbuf_final(cp);
+ if (cp->flags & CBUF_FLAG_HEAP)
+ driver_free(cp);
+}
+
+// Trim buffer to used size (when binary)
+// The control interface wont use the size return in the case
+// of an allocated binary. THIS IS A BUG (I think)
+// FIXME: a bit dangerous since I do not know what the orig_size
+// the real fix is to reallocate!
+static inline void cbuf_trim(cbuf_t* cp)
+{
+ if (cp->v[cp->iv].bp)
+ cp->v[cp->iv].bp->orig_size = cbuf_seg_used(cp);
+}
+
+/* add "raw" data to cbuf_t buffer */
+static inline void cbuf_add(cbuf_t* cp, u_int8_t* buf, u_int32_t len)
+{
+ u_int8_t* ptr = cbuf_seg_alloc(cp, len);
+ memcpy(ptr, buf, len);
+}
+
+// skip "data" (reading) moving ptr forward
+static void cbuf_forward(cbuf_t* cp, size_t len)
+{
+ while(cp->iv < cp->vlen) {
+ size_t n = cbuf_seg_r_avail(cp);
+ if (n >= len) {
+ cp->ip += len;
+ cbuf_adjust_r_ip(cp);
+ return;
+ }
+ len -= n;
+ cp->iv++;
+ cp->ip = 0;
+ }
+}
+
+// skip backward
+static void cbuf_backward(cbuf_t* cp, size_t len)
+{
+ while(len) {
+ size_t n = cbuf_seg_used(cp);
+ if (n >= len) {
+ cp->ip -= len;
+ return;
+ }
+ len -= n;
+ if (cp->iv == 0) {
+ cp->ip = 0;
+ return;
+ }
+ cp->iv--;
+ cp->ip = cp->v[cp->iv].len;
+ }
+ cbuf_adjust_r_ip(cp);
+}
+
+/*****************************************************************************
+ *
+ * PUT tagged data
+ *
+ *****************************************************************************/
+static inline size_t cbuf_sizeof(u_int8_t tag)
+{
+ switch (tag) {
+ case BOOLEAN: return sizeof(u_int8_t);
+ case UINT8: return sizeof(u_int8_t);
+ case UINT16: return sizeof(u_int16_t);
+ case UINT32: return sizeof(u_int32_t);
+ case UINT64: return sizeof(u_int64_t);
+ case STRING1: return 0; // variable
+ case LIST: return 0; // variable
+ case LIST_END: return 0; // variable
+ case TUPLE: return 0; // variable
+ case TUPLE_END: return 0; // variable
+ case ATOM: return 0; // variable
+ case BINARY: return 0; // variable
+ case INT8: return sizeof(int8_t);
+ case INT16: return sizeof(int16_t);
+ case INT32: return sizeof(int32_t);
+ case INT64: return sizeof(int64_t);
+ case FLOAT32: return sizeof(float);
+ case FLOAT64: return sizeof(double);
+ case STRING4: return 0;
+ case ENUM: return sizeof(int32_t);
+ case BITFIELD: return sizeof(int64_t);
+ case HANDLE: return sizeof(intptr_t);
+ default: return 0;
+ }
+}
+
+#ifdef CBUF_USE_PUT_CTI
+
+static inline int cbuf_cti_put_boolean(cbuf_t* cp, u_int8_t value)
+{
+ return cbuf_twrite(cp, BOOLEAN, &value, sizeof(value));
+}
+
+static inline int cbuf_cti_put_int8(cbuf_t* cp, int8_t value)
+{
+ return cbuf_twrite(cp, INT8, &value, sizeof(value));
+}
+
+static inline int cbuf_cti_put_int16(cbuf_t* cp, int16_t value)
+{
+ return cbuf_twrite(cp, INT16, &value, sizeof(value));
+}
+
+static inline int cbuf_cti_put_int32(cbuf_t* cp, int32_t value)
+{
+ return cbuf_twrite(cp, INT32, &value, sizeof(value));
+}
+
+static inline int cbuf_cti_put_int64(cbuf_t* cp, int64_t value)
+{
+ return cbuf_twrite(cp, INT64, &value, sizeof(value));
+}
+
+static inline int cbuf_cti_put_float32(cbuf_t* cp, float value)
+{
+ return cbuf_twrite(cp, FLOAT32, &value, sizeof(value));
+}
+
+static inline int cbuf_cti_put_float64(cbuf_t* cp, double value)
+{
+ return cbuf_twrite(cp, FLOAT64, &value, sizeof(value));
+}
+
+static inline int cbuf_cti_put_uint8(cbuf_t* cp, u_int8_t value)
+{
+ return cbuf_twrite(cp, UINT8, &value, sizeof(value));
+}
+
+static inline int cbuf_cti_put_uint16(cbuf_t* cp, u_int16_t value)
+{
+ return cbuf_twrite(cp, UINT16, &value, sizeof(value));
+}
+
+static inline int cbuf_cti_put_uint32(cbuf_t* cp, u_int32_t value)
+{
+ return cbuf_twrite(cp, UINT32, &value, sizeof(value));
+}
+
+static inline int cbuf_cti_put_uint64(cbuf_t* cp, u_int64_t value)
+{
+ return cbuf_twrite(cp, UINT64, &value, sizeof(value));
+}
+
+/* put special tag like TUPLE/LIST/TUPLE_END/TUPLE_END
+ * REPLY_OK/REPLY_ERROR/REPLY_EVENT etc
+ */
+
+static inline int cbuf_cti_put_tuple_begin(cbuf_t* cp, size_t n)
+{
+ (void) n;
+ return cbuf_twrite(cp, TUPLE, 0, 0);
+}
+
+static inline int cbuf_cti_put_tuple_end(cbuf_t* cp)
+{
+ return cbuf_twrite(cp, TUPLE_END, 0, 0);
+}
+
+static inline int cbuf_cti_put_list_begin(cbuf_t* cp, size_t n)
+{
+ (void) n;
+ return cbuf_twrite(cp, LIST, 0, 0);
+}
+
+static inline int cbuf_cti_put_list_end(cbuf_t* cp)
+{
+ return cbuf_twrite(cp, LIST_END, 0, 0);
+}
+
+static inline int cbuf_cti_put_begin(cbuf_t* cp)
+{
+ (void) cp;
+ return 1;
+}
+
+static inline int cbuf_cti_put_end(cbuf_t* cp)
+{
+ (void) cp;
+ return 1;
+}
+
+static inline int cbuf_cti_put_tag_ok(cbuf_t* cp)
+{
+ return cbuf_twrite(cp, OK, 0, 0);
+}
+
+static inline int cbuf_cti_put_tag_error(cbuf_t* cp)
+{
+ return cbuf_twrite(cp, ERROR, 0, 0);
+}
+
+static inline int cbuf_cti_put_tag_event(cbuf_t* cp)
+{
+ return cbuf_twrite(cp, EVENT, 0, 0);
+}
+
+static inline int cbuf_cti_put_atom(cbuf_t* cp, const char* atom)
+{
+ u_int8_t* ptr;
+ u_int32_t n = strlen(atom);
+
+ if (n > 0xff) n = 0xff; // truncate error?
+ if (!(ptr = cbuf_seg_alloc(cp, n+2)))
+ return 0;
+ ptr[0] = ATOM;
+ ptr[1] = n;
+ memcpy(&ptr[2], atom, n);
+ return 1;
+}
+
+static inline int cbuf_cti_put_string(cbuf_t* cp, const char* string, int n)
+{
+ u_int8_t* ptr;
+
+ if ((string == NULL) || (n == 0)) {
+ if (!(ptr = cbuf_seg_alloc(cp, 2)))
+ return 0;
+ ptr[0] = STRING1;
+ ptr[1] = 0;
+ }
+ else {
+ if (n <= 0xff) {
+ if (!(ptr = cbuf_seg_alloc(cp, n+2)))
+ return 0;
+ ptr[0] = STRING1;
+ ptr[1] = n;
+ memcpy(&ptr[2], string, n);
+ }
+ else {
+ u_int32_t len = n;
+ if (!(ptr = cbuf_seg_alloc(cp, n+5)))
+ return 0;
+ ptr[0] = STRING4;
+ memcpy(&ptr[1], &len, sizeof(len));
+ memcpy(&ptr[5], string, n);
+ }
+ }
+ return 1;
+}
+
+static inline int cbuf_cti_put_binary(cbuf_t* cp, const u_int8_t* buf, u_int32_t len)
+{
+ u_int8_t* ptr;
+
+ if (!(ptr = cbuf_seg_alloc(cp, len+5)))
+ return 0;
+ ptr[0] = BINARY;
+ memcpy(ptr+1, &len, sizeof(len));
+ memcpy(ptr+5, buf, len);
+ return 1;
+}
+#endif // CBUF_USE_PUT_CTI
+
+
+#ifdef CBUF_USE_PUT_ETF
+//
+// ETF implementation of reply data
+//
+static inline int etf_put_uint8(cbuf_t* cp, u_int8_t value)
+{
+ u_int8_t* p;
+ if (!(p = cbuf_seg_alloc(cp, 2)))
+ return 0;
+ p[0] = SMALL_INTEGER_EXT;
+ p[1] = value;
+ return 1;
+}
+
+static inline int etf_put_int32(cbuf_t* cp, int32_t value)
+{
+ u_int8_t* p;
+ if (!(p = cbuf_seg_alloc(cp, 5)))
+ return 0;
+ p[0] = INTEGER_EXT;
+ memcpy_n2b(&p[1], &value, 4);
+ return 1;
+}
+
+static inline int etf_put_u64(cbuf_t* cp,u_int8_t sign,u_int64_t value)
+{
+ u_int8_t* p;
+
+ if (!(p = cbuf_seg_alloc(cp, 13)))
+ return 0;
+ p[0] = SMALL_BIG_EXT;
+ p[1] = 8;
+ p[2] = sign;
+ memcpy_n2b(&p[3], &value, 8);
+ return 1;
+}
+
+static inline int etf_put_int64(cbuf_t* cp, int64_t value)
+{
+ if (value < 0)
+ return etf_put_u64(cp, 1, (u_int64_t) -value);
+ else
+ return etf_put_u64(cp, 0, (u_int64_t) value);
+}
+
+static inline int etf_put_float(cbuf_t* cp, double value)
+{
+ u_int8_t* p;
+ if (!(p = cbuf_seg_alloc(cp, 9)))
+ return 0;
+ p[0] = NEW_FLOAT_EXT;
+ memcpy_n2b(&p[1], &value, 8);
+ return 1;
+}
+
+
+static inline int etf_put_atom(cbuf_t* cp, const char* atom, size_t len)
+{
+ u_int8_t* p;
+ if (len > 255) len = 255;
+ if (!(p = cbuf_seg_alloc(cp, len+2)))
+ return 0;
+ p[0] = SMALL_ATOM_EXT;
+ p[1] = len;
+ memcpy(&p[2], atom, len);
+ return 1;
+}
+
+static inline int cbuf_etf_put_atom(cbuf_t* cp, const char* atom)
+{
+ size_t n = strlen(atom);
+ return etf_put_atom(cp, atom, n);
+}
+
+static inline int cbuf_etf_put_boolean(cbuf_t* cp, u_int8_t value)
+{
+ if (value)
+ return etf_put_atom(cp, "true", 4);
+ else
+ return etf_put_atom(cp, "false", 5);
+}
+
+static inline int cbuf_etf_put_int8(cbuf_t* cp, int8_t value)
+{
+ if (value >= 0)
+ return etf_put_uint8(cp, (u_int8_t) value);
+ else
+ return etf_put_int32(cp, (int32_t) value);
+}
+
+static inline int cbuf_etf_put_int16(cbuf_t* cp, int16_t value)
+{
+ return etf_put_int32(cp, (int32_t) value);
+}
+
+static inline int cbuf_etf_put_int32(cbuf_t* cp, int32_t value)
+{
+ return etf_put_int32(cp, value);
+}
+
+static inline int cbuf_etf_put_int64(cbuf_t* cp, int64_t value)
+{
+ return etf_put_int64(cp, value);
+}
+
+static inline int cbuf_etf_put_float64(cbuf_t* cp, double value)
+{
+ return etf_put_float(cp, value);
+}
+
+static inline int cbuf_etf_put_float32(cbuf_t* cp, float value)
+{
+ return etf_put_float(cp, (double) value);
+}
+
+static inline int cbuf_etf_put_uint8(cbuf_t* cp, u_int8_t value)
+{
+ return etf_put_uint8(cp, value);
+}
+
+static inline int cbuf_etf_put_uint16(cbuf_t* cp, u_int16_t value)
+{
+ return etf_put_int32(cp, (int32_t) value);
+}
+
+static inline int cbuf_etf_put_uint32(cbuf_t* cp, u_int32_t value)
+{
+ if (value > 0x7fffffff)
+ return etf_put_u64(cp, 0, (uint64_t) value);
+ else
+ return etf_put_int32(cp, (int32_t) value);
+}
+
+static inline int cbuf_etf_put_uint64(cbuf_t* cp, u_int64_t value)
+{
+ return etf_put_u64(cp, 0, value);
+}
+
+static inline int cbuf_etf_put_begin(cbuf_t* cp)
+{
+ u_int8_t* p;
+ if (!(p = cbuf_seg_alloc(cp, 1)))
+ return 0;
+ p[0] = VERSION_MAGIC;
+ return 1;
+}
+
+static inline int cbuf_etf_put_end(cbuf_t* cp)
+{
+ (void) cp;
+ return 1;
+}
+
+
+static inline int cbuf_etf_put_tuple_begin(cbuf_t* cp, size_t n)
+{
+ u_int8_t* p;
+ if (n > 0xFF) {
+ if (!(p = cbuf_seg_alloc(cp, 5)))
+ return 0;
+ p[0] = LARGE_TUPLE_EXT;
+ memcpy_n2b(&p[1], &n, 4);
+ }
+ else {
+ if (!(p = cbuf_seg_alloc(cp, 2)))
+ return 0;
+ p[0] = SMALL_TUPLE_EXT;
+ p[1] = n;
+ }
+ return 1;
+}
+
+static inline int cbuf_etf_put_tuple_end(cbuf_t* cp)
+{
+ (void) cp;
+ return 1;
+}
+
+static inline int cbuf_etf_put_list_begin(cbuf_t* cp, size_t n)
+{
+ u_int8_t* p;
+ if (!(p = cbuf_seg_alloc(cp, 5)))
+ return 0;
+ p[0] = LIST_EXT;
+ memcpy_n2b(&p[1], &n, 4);
+ return 1;
+}
+
+// proper list end!
+static inline int cbuf_etf_put_list_end(cbuf_t* cp)
+{
+ u_int8_t* p;
+ if (!(p = cbuf_seg_alloc(cp, 1)))
+ return 0;
+ p[0] = NIL_EXT;
+ return 1;
+}
+
+static inline int cbuf_etf_put_tag_ok(cbuf_t* cp)
+{
+ return etf_put_atom(cp, "ok", 2);
+}
+
+static inline int cbuf_etf_put_tag_error(cbuf_t* cp)
+{
+ return etf_put_atom(cp, "error", 5);
+}
+
+static inline int cbuf_etf_put_tag_event(cbuf_t* cp)
+{
+ return etf_put_atom(cp, "event", 5);
+}
+
+static inline int cbuf_etf_put_string(cbuf_t* cp, const char* string, int n)
+{
+ u_int8_t* p;
+
+ if ((string == NULL) || (n == 0)) {
+ if (!(p = cbuf_seg_alloc(cp, 1)))
+ return 0;
+ p[0] = NIL_EXT;
+ }
+ else {
+ if (n > 0xFFFF) n = 0xFFFF; // warn?
+ if (!(p = cbuf_seg_alloc(cp, n+3)))
+ return 0;
+ p[0] = STRING_EXT;
+ p[1] = n>>8;
+ p[2] = n;
+ memcpy(&p[3], string, n);
+ }
+ return 1;
+}
+
+// FIXME - if vectored interface add as binary part
+static inline int cbuf_etf_put_binary(cbuf_t* cp, const u_int8_t* buf,
+ u_int32_t len)
+{
+ u_int8_t* p;
+
+ if (!(p = cbuf_seg_alloc(cp, len+5)))
+ return 0;
+ p[0] = BINARY_EXT;
+ memcpy_n2b(&p[1], &len, 4);
+ memcpy(&p[5], buf, len);
+ return 1;
+}
+
+#endif // CBUF_USE_PUT_ETF
+
+// Select ETF or CTI both in runtime and compile time
+#if defined(CBUF_USE_PUT_ETF) && defined(CBUF_USE_PUT_CTI)
+
+#define cbuf_put(what,cp) ( \
+ (((cp)->flags & CBUF_FLAG_PUT_MASK) == CBUF_FLAG_PUT_ETF) ? \
+ (cbuf_etf_put_##what((cp))) : \
+ ((((cp)->flags & CBUF_FLAG_PUT_MASK) == CBUF_FLAG_PUT_CTI) ? \
+ (cbuf_cti_put_##what((cp))) : 0))
+
+#define cbuf_put_value(what,cp,arg) ( \
+ (((cp)->flags & CBUF_FLAG_PUT_MASK) == CBUF_FLAG_PUT_ETF) ? \
+ (cbuf_etf_put_##what((cp),(arg))) : \
+ ((((cp)->flags & CBUF_FLAG_PUT_MASK) == CBUF_FLAG_PUT_CTI) ? \
+ (cbuf_cti_put_##what((cp),(arg))) : 0))
+
+#define cbuf_put_value2(what,cp,arg1,arg2) ( \
+ (((cp)->flags & CBUF_FLAG_PUT_MASK) == CBUF_FLAG_PUT_ETF) ? \
+ (cbuf_etf_put_##what((cp),(arg1),(arg2))) : \
+ ((((cp)->flags & CBUF_FLAG_PUT_MASK) == CBUF_FLAG_PUT_CTI) ? \
+ (cbuf_cti_put_##what((cp),(arg1),(arg2))) : 0))
+
+#elif defined(CBUF_USE_PUT_ETF)
+
+#define cbuf_put(what,cp) \
+ (cbuf_etf_put_##what((cp)))
+#define cbuf_put_value(what,cp,arg) \
+ (cbuf_etf_put_##what((cp),(arg)))
+#define cbuf_put_value2(what,cp,arg1,arg2) \
+ (cbuf_etf_put_##what((cp),(arg1),(arg2)))
+
+#elif defined(CBUF_USE_PUT_CTI)
+
+#define cbuf_put(what,cp) \
+ (cbuf_cit_put_##what((cp)))
+#define cbuf_put_value(what,cp,arg) \
+ (cbuf_cti_put_##what((cp),(arg)))
+#define cbuf_put_value2(what,cp,arg1,arg2) \
+ (cbuf_cti_put_##what((cp),(arg1),(arg2)))
+
+#else
+#error "must use either CTI or ETF"
+#endif
+
+static inline int cbuf_put_boolean(cbuf_t* cp, u_int8_t value)
+{
+ return cbuf_put_value(boolean, cp, value);
+}
+
+static inline int cbuf_put_int8(cbuf_t* cp, int8_t value)
+{
+ return cbuf_put_value(int8, cp, value);
+}
+
+static inline int cbuf_put_int16(cbuf_t* cp, int16_t value)
+{
+ return cbuf_put_value(int16, cp, value);
+}
+
+static inline int cbuf_put_int32(cbuf_t* cp, int32_t value)
+{
+ return cbuf_put_value(int32, cp, value);
+}
+
+static inline int cbuf_put_int64(cbuf_t* cp, int64_t value)
+{
+ return cbuf_put_value(int64, cp, value);
+}
+static inline int cbuf_put_float32(cbuf_t* cp, float value)
+{
+ return cbuf_put_value(float32, cp, value);
+}
+static inline int cbuf_put_float64(cbuf_t* cp, double value)
+{
+ return cbuf_put_value(float64, cp, value);
+}
+static inline int cbuf_put_uint8(cbuf_t* cp, u_int8_t value)
+{
+ return cbuf_put_value(uint8, cp, value);
+}
+static inline int cbuf_put_uint16(cbuf_t* cp, u_int16_t value)
+{
+ return cbuf_put_value(uint16, cp, value);
+}
+
+static inline int cbuf_put_uint32(cbuf_t* cp, u_int32_t value)
+{
+ return cbuf_put_value(uint32, cp, value);
+}
+
+static inline int cbuf_put_uint64(cbuf_t* cp, u_int64_t value)
+{
+ return cbuf_put_value(uint64, cp, value);
+}
+
+static inline int cbuf_put_atom(cbuf_t* cp, const char* value)
+{
+ return cbuf_put_value(atom, cp, value);
+}
+
+static inline int cbuf_put_tuple_begin(cbuf_t* cp, size_t n)
+{
+ return cbuf_put_value(tuple_begin, cp, n);
+}
+
+static inline int cbuf_put_tuple_end(cbuf_t* cp)
+{
+ return cbuf_put(tuple_end, cp);
+}
+
+static inline int cbuf_put_list_begin(cbuf_t* cp, size_t n)
+{
+ return cbuf_put_value(list_begin, cp, n);
+}
+
+static inline int cbuf_put_list_end(cbuf_t* cp)
+{
+ return cbuf_put(list_end, cp);
+}
+
+static inline int cbuf_put_begin(cbuf_t* cp)
+{
+ return cbuf_put(begin, cp);
+}
+
+static inline int cbuf_put_end(cbuf_t* cp)
+{
+ return cbuf_put(end, cp);
+}
+
+static inline int cbuf_put_tag_ok(cbuf_t* cp)
+{
+ return cbuf_put(tag_ok, cp);
+}
+
+static inline int cbuf_put_tag_error(cbuf_t* cp)
+{
+ return cbuf_put(tag_error, cp);
+}
+static inline int cbuf_put_tag_event(cbuf_t* cp)
+{
+ return cbuf_put(tag_event, cp);
+}
+
+static inline int cbuf_put_string(cbuf_t* cp, const char* value, int n)
+{
+ return cbuf_put_value2(string, cp, value, n);
+}
+
+static inline int cbuf_put_binary(cbuf_t* cp, const u_int8_t* buf, u_int32_t len)
+{
+ return cbuf_put_value2(binary, cp, buf, len);
+}
+
+
+/*****************************************************************************
+ *
+ * GET untagged data
+ *
+ *****************************************************************************/
+
+static inline int get_boolean(cbuf_t* cp, u_int8_t* val)
+{
+ u_int8_t v;
+ if (cbuf_read(cp, &v, sizeof(*val))) {
+ *val = (v != 0);
+ return 1;
+ }
+ return 0;
+}
+
+static inline int get_uint8(cbuf_t* cp, u_int8_t* val)
+{
+ return cbuf_read(cp, val, sizeof(*val));
+}
+
+static inline int get_uint16(cbuf_t* cp, u_int16_t* val)
+{
+ return cbuf_read(cp, val, sizeof(*val));
+}
+
+static inline int get_int32(cbuf_t* cp, int32_t* val)
+{
+ return cbuf_read(cp, val, sizeof(*val));
+}
+
+static inline int get_uint32(cbuf_t* cp, u_int32_t* val)
+{
+ return cbuf_read(cp, val, sizeof(*val));
+}
+
+
+static inline int get_uint64(cbuf_t* cp, u_int64_t* val)
+{
+ return cbuf_read(cp, val, sizeof(*val));
+}
+
+static inline int get_float32(cbuf_t* cp, float* val)
+{
+ return cbuf_read(cp, val, sizeof(*val));
+}
+
+static inline int get_float64(cbuf_t* cp, double* val)
+{
+ return cbuf_read(cp, val, sizeof(*val));
+}
View
92 c_src/cl_cpuid.h
@@ -0,0 +1,92 @@
+
+/* CPUID FUNCTION 1, edx info */
+#define CPUID_FPU 0x00000001
+#define CPUID_VME 0x00000002
+#define CPUID_DE 0x00000004
+#define CPUID_PSE 0x00000008
+#define CPUID_TSC 0x00000010
+#define CPUID_MSR 0x00000020
+#define CPUID_PAE 0x00000040
+#define CPUID_MCE 0x00000080
+#define CPUID_CX8 0x00000100
+#define CPUID_APIC 0x00000200
+#define CPUID_B10 0x00000400
+#define CPUID_SEP 0x00000800
+#define CPUID_MTRR 0x00001000
+#define CPUID_PGE 0x00002000
+#define CPUID_MCA 0x00004000
+#define CPUID_CMOV 0x00008000
+#define CPUID_PAT 0x00010000
+#define CPUID_PSE36 0x00020000
+#define CPUID_PSN 0x00040000
+#define CPUID_CLFSH 0x00080000
+#define CPUID_B20 0x00100000
+#define CPUID_DS 0x00200000
+#define CPUID_ACPI 0x00400000
+#define CPUID_MMX 0x00800000
+#define CPUID_FXSR 0x01000000
+#define CPUID_SSE 0x02000000
+#define CPUID_XMM 0x02000000
+#define CPUID_SSE2 0x04000000
+#define CPUID_SS 0x08000000
+#define CPUID_HTT 0x10000000 // Hyperhreading support
+#define CPUID_TM 0x20000000
+#define CPUID_IA64 0x40000000
+#define CPUID_PBE 0x80000000
+
+/* CPUID FUNCTION 1 ebx info */
+#define CPUID_BRAND_INDEX 0x000000ff
+#define CPUID_CLFUSH_SIZE 0x0000ff00
+#define CPUID_HTT_CORES 0x00ff0000
+#define CPUID_LOCAL_APIC_ID 0xff000000
+
+/* CPUID FUNCTION 4 eax info */
+#define CPUID_CORES_PER_PROCPAK 0xFC000000
+
+
+/* FUNCTION 4,0 edx? */
+#define CPUID2_SSE3 0x00000001
+#define CPUID2_MON 0x00000008
+#define CPUID2_DS_CPL 0x00000010
+#define CPUID2_VMX 0x00000020
+#define CPUID2_EST 0x00000080
+#define CPUID2_TM2 0x00000100
+#define CPUID2_SSSE3 0x00000200
+#define CPUID2_CNXTID 0x00000400
+#define CPUID2_CX16 0x00002000
+#define CPUID2_XTPR 0x00004000
+
+#define AMDID_SYSCALL 0x00000800
+#define AMDID_MP 0x00080000
+#define AMDID_NX 0x00100000
+#define AMDID_EXT_MMX 0x00400000
+#define AMDID_FFXSR 0x01000000
+#define AMDID_RDTSCP 0x08000000
+#define AMDID_LM 0x20000000
+#define AMDID_EXT_3DNOW 0x40000000
+#define AMDID_3DNOW 0x80000000
+
+#define AMDID2_LAHF 0x00000001
+#define AMDID2_CMP 0x00000002
+#define AMDID2_SVM 0x00000004
+#define AMDID2_EXT_APIC 0x00000008
+#define AMDID2_CR8 0x00000010
+#define AMDID2_PREFETCH 0x00000100
+
+/*
+ * AMD extended function 8000_0008h ecx info
+ */
+#define AMDID_CMP_CORES 0x000000ff
+
+
+#define EXT_CPUID_3DNOW 0x80000000
+#define EXT_CPUID_AMD_3DNOWEXT 0x40000000
+#define EXT_CPUID_AMD_MMXEXT 0x00400000
+
+#define FF_CPU_MMX 0x00000001
+#define FF_CPU_MMXEXT 0x00000002
+#define FF_CPU_SSE 0x00000004
+#define FF_CPU_SSE2 0x00000008
+#define FF_CPU_3DNOW 0x00000010
+#define FF_CPU_3DNOWEXT 0x00000020
+#define FF_CPU_TSC 0x00000040
View
3,264 c_src/cl_drv.c
@@ -0,0 +1,3264 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <sys/socket.h>
+#include <OpenCL/opencl.h>
+
+#if WORDSIZE==32
+#include "config.32.h"
+#elif WORDSIZE==64
+#include "config.64.h"
+#else
+#error "WORDSIZE not defined"
+#endif
+
+#include "erl_driver.h"
+
+#include "cbufv2.h"
+#include "cl_hash.h"
+
+#define ECL_REPLY_TYPE CBUF_FLAG_PUT_ETF
+
+#ifdef DEBUG
+#define DBG(fmt,...) \
+ fprintf(stderr, fmt "\r\n", __VA_ARGS__)
+#define CBUF_DBG(buf,msg)
+// #define CBUF_DBG(buf,msg) cbuf_print((buf),(msg))
+#else
+#define DBG(fmt,...)
+#define CBUF_DBG(buf,msg)
+#endif
+
+// debug async events
+#define A_DBG(fmt,...) DBG(fmt,__VA_ARGS__)
+
+
+typedef int (*get_fn_t)(cbuf_t*,void*,void*);
+
+// Type map to external communication
+#if SIZEOF_SHORT == 2
+#define SHORT INT16
+#define USHORT UINT16
+#elif SIZEOF_SHORT == 4
+#define SHORT INT32
+#define USHORT UINT32
+#endif
+
+#if SIZEOF_INT == 4
+#define INT INT32
+#define UINT UINT32
+#elif SIZEOF_INT == 8
+#define INT INT64
+#define UINT UINT64
+#endif
+
+#if SIZEOF_LONG == 4
+#define LONG UINT32
+#define ULONG UINT32
+#elif SIZEOF_LONG == 8
+#define LONG UINT64
+#define ULONG UINT64
+#endif
+
+#if SIZEOF_SIZE_T == 4
+#define SIZE_T UINT32
+#define get_size(in, ptr) get_uint32((in),(u_int32_t*)(ptr))
+#define get_size_fn (get_fn_t) get_uint32
+#elif SIZEOF_SIZE_T == 8
+#define SIZE_T UINT64
+#define get_size(in, ptr) get_uint64((in),(u_int64_t*)(ptr))
+#define get_size_fn (get_fn_t) get_uint64
+#endif
+
+#if SIZEOF_VOID_P == 4
+#define POINTER_T UINT32
+typedef u_int32_t pointer_t;
+#define put_pointer(out, ptr) cbuf_put_uint32((out),(u_int32_t)(ptr))
+#define get_pointer(in, ptr) get_uint32((in),(u_int32_t*)(ptr))
+#define get_pointer_fn get_uint32
+#elif SIZEOF_VOID_P == 8
+#define POINTER_T UINT64
+typedef u_int64_t pointer_t;
+#define put_pointer(out, ptr) cbuf_put_uint64((out),(u_int64_t)(ptr))
+#define get_pointer(in, ptr) get_uint64((in),(u_int64_t*)(ptr))
+#define get_pointer_fn get_uint64
+#else
+#error "check configure, unable to determine SIZEOF_VOID_P"
+#endif
+
+/* convert object to handle (just cast to handle setKernelArg) */
+#define EPTR_HANDLE(ptr) ((pointer_t)(ptr))
+
+#define STRING STRING4
+
+#define OCL_CHAR INT8
+#define OCL_UCHAR UINT8
+#define OCL_SHORT INT16
+#define OCL_USHORT UINT16
+#define OCL_INT INT32
+#define OCL_UINT UINT32
+#define OCL_LONG INT64
+#define OCL_ULONG UINT64
+#define OCL_HALF UINT16
+#define OCL_FLOAT FLOAT32
+#define OCL_DOUBLE FLOAT64
+#define OCL_BOOL OCL_UINT // not always same size as in kernel
+#define OCL_STRING STRING4
+#define OCL_BITFIELD OCL_ULONG
+#define OCL_POINTER POINTER_T
+#define OCL_SIZE SIZE_T
+#define OCL_HANDLE HANDLE
+
+#define OCL_DEVICE_TYPE OCL_BITFIELD
+#define OCL_DEVICE_FP_CONFIG OCL_BITFIELD
+#define OCL_DEVICE_GLOBAL_MEM_CACHE_TYPE OCL_BITFIELD // ?
+#define OCL_PLATFORM_INFO OCL_UINT
+#define OCL_DEVICE_INFO OCL_UINT
+#define OCL_DEVICE_GLOBAL_MEM_CACHE_TYPE OCL_BITFIELD
+#define OCL_DEVICE_FP_CONFIG OCL_BITFIELD
+#define OCL_DEVICE_EXEC_CAPABILITIES OCL_BITFIELD
+#define OCL_QUEUE_PROPERTIES OCL_BITFIELD
+#define OCL_DEVICE_LOCAL_MEM_TYPE OCL_BITFIELD
+#define OCL_PLATFORM_ID OCL_POINTER
+#define OCL_MEM_OBJECT_TYPE OCL_UINT
+#define OCL_MEM_FLAGS OCL_BITFIELD
+#define OCL_SAMPLER_ADDRESSING_MODE OCL_UINT
+#define OCL_SAMPLER_FILTER_MODE OCL_UINT
+#define OCL_BUILD_STATUS OCL_INT
+
+#define MAX_INFO_SIZE 256 // ulong (2K or 4K buffer)
+#define MAX_DEVICES 128
+#define MAX_PLATFORMS 128
+#define MAX_OPTION_LIST 1024
+#define MAX_KERNEL_NAME 1024
+#define MAX_KERNELS 1024
+#define MAX_SOURCES 128
+#define MAX_WAIT_LIST 128
+#define MAX_WORK_SIZE 3
+
+// COMMANDS (cl_drv_ctl)
+#define ECL_NOOP 0x01
+#define ECL_GET_PLATFORM_IDS 0x02
+#define ECL_GET_DEVICE_IDS 0x03
+#define ECL_GET_PLATFORM_INFO 0x04
+#define ECL_GET_DEVICE_INFO 0x05
+#define ECL_CREATE_CONTEXT 0x06
+#define ECL_RELEASE_CONTEXT 0x07
+#define ECL_RETAIN_CONTEXT 0x08
+#define ECL_GET_CONTEXT_INFO 0x09
+#define ECL_CREATE_QUEUE 0x0A
+#define ECL_RETAIN_QUEUE 0x0B
+#define ECL_RELEASE_QUEUE 0x0C
+#define ECL_GET_QUEUE_INFO 0x0D
+#define ECL_SET_QUEUE_PROPERTY 0x0E
+#define ECL_CREATE_BUFFER 0x0F
+#define ECL_ENQUEUE_READ_BUFFER 0x10
+#define ECL_ENQUEUE_WRITE_BUFFER 0x11
+#define ECL_ENQUEUE_COPY_BUFFER 0x12
+#define ECL_RETAIN_MEM_OBJECT 0x13
+#define ECL_RELEASE_MEM_OBJECT 0x14
+#define ECL_CREATE_IMAGE2D 0x15
+#define ECL_CREATE_IMAGE3D 0x16
+#define ECL_GET_SUPPORTED_IMAGE_FORMATS 0x17
+#define ECL_ENQUEUE_READ_IMAGE 0x18
+#define ECL_ENQUEUE_WRITE_IMAGE 0x19
+#define ECL_ENQUEUE_COPY_IMAGE 0x1A
+#define ECL_ENQUEUE_COPY_IMAGE_TO_BUFFER 0x1B
+#define ECL_ENQUEUE_COPY_BUFFER_TO_IMAGE 0x1C
+#define ECL_ENQUEUE_MAP_BUFFER 0x1D
+#define ECL_ENQUEUE_MAP_IMAGE 0x1E
+#define ECL_ENQUEUE_UNMAP_MEM_OBEJCT 0x1F
+#define ECL_GET_MEM_OBJECT_INFO 0x20
+#define ECL_GET_IMAGE_INFO 0x21
+#define ECL_CREATE_SAMPLER 0x22
+#define ECL_RETAIN_SAMPLER 0x23
+#define ECL_RELEASE_SAMPLER 0x24
+#define ECL_GET_SAMPLER_INFO 0x25
+#define ECL_CREATE_PROGRAM_WITH_SOURCE 0x26
+#define ECL_CREATE_PROGRAM_WITH_BINARY 0x27
+#define ECL_RELEASE_PROGRAM 0x28
+#define ECL_RETAIN_PROGRAM 0x29
+#define ECL_BUILD_PROGRAM 0x2A
+#define ECL_UNLOAD_COMPILER 0x2B
+#define ECL_GET_PROGRAM_INFO 0x2C
+#define ECL_CREATE_KERNEL 0x2D
+#define ECL_CREATE_KERNELS_IN_PROGRAM 0x2E
+#define ECL_RETAIN_KERNEL 0x2F
+#define ECL_RELEASE_KERNEL 0x30
+#define ECL_SET_KERNEL_ARG 0x31
+#define ECL_GET_KERNEL_INFO 0x32
+#define ECL_GET_PROGRAM_BUILD_INFO 0x33
+#define ECL_RETAIN_EVENT 0x34
+#define ECL_RELEASE_EVENT 0x35
+#define ECL_GET_EVENT_INFO 0x36
+#define ECL_GET_KERNEL_WORKGROUP_INFO 0x37
+#define ECL_ENQUEUE_ND_RANGE_KERNEL 0x38
+#define ECL_ENQUEUE_TASK 0x39
+#define ECL_FLUSH 0x3A
+#define ECL_FINISH 0x3B
+#define ECL_ENQUEUE_MARKER 0x3C
+#define ECL_ENQUEUE_WAIT_FOR_EVENT 0x3D
+#define ECL_ENQUEUE_BARRIER 0x3E
+
+/*
+ * Environment keeps track on all allocated objects
+ * and has a protective hash layer to check valididity
+ * of object pointers. The driver will return a slightly
+ * change pointer (down shift 2 bits, zeros anyway) as an
+ * integer reference. Then the object is stored in a hash
+ * table. The native OpenCL object pointer is used as the
+ * key and the handle.
+ *
+ */
+
+typedef struct {
+ char* info_name; // Display name
+ cl_uint info_id; // Information
+ bool is_array; // return type is a vector of data
+ unsigned char info_type; // octet_buffer.h type
+ unsigned char extern_type; // octet_buffer.h type
+ void* extern_info; // Encode/Decode enum/bitfields
+} ecl_info_t;
+
+typedef struct {
+ char* key;
+ u_int64_t value;
+} ecl_kv_t;
+
+typedef enum {
+ ECL_COMMAND_WAIT_STATUS=1, // wait for completion
+ ECL_COMMAND_WAIT_BIN=2, // wait for completion, return binary
+ ECL_COMMAND_FINISH=3 // wait for all events to complete
+} ecl_command_type_t;
+
+const char* ecl_command_name[8] =
+{ "null",
+ "wait_status",
+ "wait_bin",
+ "finish",
+ "???",
+ "???",
+ "???",
+ "???"
+};
+
+
+typedef struct {
+ ecl_command_type_t type; // command type
+ ErlDrvTermData caller; // The caller that needs response
+ u_int32_t eref; // Event reference (finish)
+ union {
+ cl_event event; // Event argument
+ cl_command_queue queue;
+ };
+ ErlDrvBinary* bin; // optional binary argument
+} ecl_command_t;
+
+typedef enum {
+ ECL_RESPONSE_EVENT_STATUS=1,
+ ECL_RESPONSE_EVENT_BIN=2,
+ ECL_RESPONSE_FINISH=3,
+ ECL_RESPONSE_BUILD=4,
+ ECL_RESPONSE_CONTEXT=5
+} ecl_response_type_t;
+
+const char* ecl_response_name[8] =
+{ "null",
+ "event_status",
+ "event_bin",
+ "finish",
+ "build",
+ "context",
+ "???",
+ "???"
+};
+
+typedef struct {
+ ecl_response_type_t type; // Response type
+ ErlDrvTermData caller; // The caller that needs response
+ u_int32_t eref; // async reference
+ int err; // reply error
+ cl_int status; // exeuction status
+ union {
+ cl_event event; // EVENT argument
+ cl_command_queue queue; // QUEUE argument
+ cl_program program; // PROGRAM argument
+ char* errinfo; // Context error
+ };
+ ErlDrvBinary* bin; // optionsl binary argument data
+} ecl_response_t;
+
+
+/* environment */
+typedef struct ocl_env {
+ ErlDrvPort port; // Port reference
+ lhash_t ref; // NativePointer => EclObject -> NativPointer
+ ErlDrvTid tid; // Event thread dispatcher
+ ErlDrvEvent evt[2]; // Thread events evt[0]=main size, evt[1]=thread side
+ u_int32_t eref; // event reference for event replies
+} ecl_env_t;
+
+typedef enum
+{
+ NO_TYPE = 0,
+ PLATFORM_TYPE = 1, // special
+ DEVICE_TYPE = 2, // special
+ CONTEXT_TYPE = 3,
+ QUEUE_TYPE = 4,
+ MEM_TYPE = 5,
+ SAMPLER_TYPE = 6,
+ PROGRAM_TYPE = 7,
+ KERNEL_TYPE = 8,
+ EVENT_TYPE = 9,
+} ecl_object_type_t;
+
+const char* ecl_type_name[] =
+{ "NONE",
+ "PLATFORM",
+ "DEVICE",
+ "CONTEXT",
+ "QUEUE",
+ "MEMOBJECT",
+ "SAMPLER",
+ "PROGRAM",
+ "KERNEL",
+ "EVENT"
+};
+
+struct _ecl_object_t;
+
+typedef cl_int (*retain_fn)(void*);
+typedef cl_int (*release_fn)(void*);
+typedef cl_int (*info_fn)(void* ptr, cl_uint param_name,
+ size_t param_value_size,
+ void* param_value, size_t* param_value_size_ret);
+
+typedef struct _ecl_class_t {
+ ecl_object_type_t type;
+ retain_fn retain;
+ release_fn release;
+ info_fn info;
+ cl_uint info_len;
+ ecl_info_t* info_vec;
+} ecl_class_t;
+
+/* generic object */
+typedef struct _ecl_object_t {
+ lhash_bucket_t hbucket;
+ ecl_class_t* cl;
+ unsigned int refc;
+ ecl_env_t* env;
+ union {
+ cl_platform_id platform;
+ cl_device_id device;
+ cl_context context;
+ cl_command_queue queue;
+ cl_mem mem;
+ cl_sampler sampler;
+ cl_program program;
+ cl_kernel kernel;
+ cl_event event;
+ void* opaque;
+ };
+} ecl_object_t;
+
+
+#define ECL_DEVICE_TYPE_DEFAULT 0x00000000
+#define ECL_DEVICE_TYPE_CPU 0x00000001
+#define ECL_DEVICE_TYPE_GPU 0x00000002
+#define ECL_DEVICE_TYPE_ACCELERATOR 0x00000004
+#define ECL_DEVICE_TYPE_ALL 0xFFFFFFFF
+
+ecl_kv_t kv_device_type[] = { // bitfield
+ { "cpu", CL_DEVICE_TYPE_CPU },
+ { "gpu", CL_DEVICE_TYPE_GPU },
+ { "accelerator", CL_DEVICE_TYPE_ACCELERATOR },
+ { 0, 0}
+};
+
+ecl_kv_t kv_fp_config[] = { // bitfield
+ { "denorm", CL_FP_DENORM },
+ { "inf_nan", CL_FP_INF_NAN },
+ { "round_to_nearest", CL_FP_ROUND_TO_NEAREST },
+ { "round_to_zero", CL_FP_ROUND_TO_ZERO },
+ { "round_to_inf", CL_FP_ROUND_TO_INF },
+ { "fma", CL_FP_FMA },
+ { 0, 0 }
+};
+
+ecl_kv_t kv_mem_cache_type[] = { // enum
+ { "none", CL_NONE },
+ { "read_only", CL_READ_ONLY_CACHE },
+ { "read_write", CL_READ_WRITE_CACHE },
+ { 0, 0 }
+};
+
+ecl_kv_t kv_local_mem_type[] = { // enum
+ { "local", CL_LOCAL },
+ { "global", CL_GLOBAL },
+ { 0, 0 }
+};
+
+ecl_kv_t kv_exec_capabilities[] = { // bit field
+ { "kernel", CL_EXEC_KERNEL },
+ { "native_kernel", CL_EXEC_NATIVE_KERNEL },
+ { 0, 0 }
+};
+
+#define ECL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE 0x01
+#define ECL_QUEUE_PROFILING_ENABLE 0x02
+
+ecl_kv_t kv_command_queue_properties[] = { // bit field
+ { "out_of_order_exec_mode_enable", CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE },
+ { "profiling_enable", CL_QUEUE_PROFILING_ENABLE },
+ { 0, 0}
+};
+
+#define ECL_MEM_READ_WRITE 0x01
+#define ECL_MEM_WRITE_ONLY 0x02
+#define ECL_MEM_READ_ONLY 0x04
+#define ECL_MEM_USE_HOST_PTR 0x08
+#define ECL_MEM_ALLOC_HOST_PTR 0x10
+#define ECL_MEM_COPY_HOST_PTR 0x20
+
+ecl_kv_t kv_mem_flags[] = { // bit field
+ { "read_write", CL_MEM_READ_WRITE },
+ { "write_only", CL_MEM_WRITE_ONLY },
+ { "read_only", CL_MEM_READ_ONLY },
+ { "use_host_ptr", CL_MEM_USE_HOST_PTR },
+ { "alloc_host_ptr", CL_MEM_ALLOC_HOST_PTR },
+ { "copy_host_ptr", CL_MEM_COPY_HOST_PTR },
+ { 0, 0 }
+};
+
+ecl_kv_t kv_mem_object_type[] = { // enum
+ { "buffer", CL_MEM_OBJECT_BUFFER },
+ { "image2d", CL_MEM_OBJECT_IMAGE2D },
+ { "image3d", CL_MEM_OBJECT_IMAGE3D },
+ { 0, 0 }
+};
+
+ecl_kv_t kv_addressing_mode[] = { // enum
+ { "none", CL_ADDRESS_NONE },
+ { "clamp_to_eded", CL_ADDRESS_CLAMP_TO_EDGE },
+ { "clamp", CL_ADDRESS_CLAMP },
+ { "repeat", CL_ADDRESS_REPEAT },
+ { 0, 0 }
+};
+
+#define ADDRESSING_MODE_NUM ((int)(sizeof(kv_addressing_mode)/sizeof(ecl_kv_t))-1)
+
+
+ecl_kv_t kv_filter_mode[] = { // enum
+ { "nearest", CL_FILTER_NEAREST },
+ { "linear", CL_FILTER_LINEAR },
+ { 0, 0 }
+};
+
+#define FILTER_MODE_NUM ((int)(sizeof(kv_filter_mode)/sizeof(ecl_kv_t))-1)
+
+ecl_kv_t kv_map_flags[] = { // bitfield
+ { "read", CL_MAP_READ },
+ { "write", CL_MAP_WRITE },
+ { 0, 0 }
+};
+
+ecl_kv_t kv_build_status[] = { // enum
+ { "success", CL_BUILD_SUCCESS },
+ { "none", CL_BUILD_NONE },
+ { "error", CL_BUILD_ERROR },
+ { "in_progress", CL_BUILD_IN_PROGRESS },
+ { 0, 0 }
+};
+
+ecl_kv_t kv_command_type[] = { // enum
+ { "ndrange_kernel", CL_COMMAND_NDRANGE_KERNEL },
+ { "task", CL_COMMAND_TASK },
+ { "native_kernel", CL_COMMAND_NATIVE_KERNEL },
+ { "read_buffer", CL_COMMAND_READ_BUFFER },
+ { "write_buffer", CL_COMMAND_WRITE_BUFFER },
+ { "copy_buffer", CL_COMMAND_COPY_BUFFER },
+ { "read_image", CL_COMMAND_READ_IMAGE },
+ { "write_image", CL_COMMAND_WRITE_IMAGE },
+ { "copy_image", CL_COMMAND_COPY_IMAGE },
+ { "copy_image_to_buffer", CL_COMMAND_COPY_IMAGE_TO_BUFFER },
+ { "copy_buffer_to_image", CL_COMMAND_COPY_BUFFER_TO_IMAGE },
+ { "map_buffer", CL_COMMAND_MAP_BUFFER },
+ { "map_image", CL_COMMAND_MAP_IMAGE },
+ { "unmap_mem_object", CL_COMMAND_UNMAP_MEM_OBJECT },
+ { "marker", CL_COMMAND_MARKER },
+ { "aquire_gl_objects", CL_COMMAND_ACQUIRE_GL_OBJECTS },
+ { "release_gl_objects", CL_COMMAND_RELEASE_GL_OBJECTS },
+ { 0, 0}
+};
+
+ecl_kv_t kv_execution_status[] = { // enum
+ { "complete", CL_COMPLETE }, // same as CL_SUCCESS
+ { "running", CL_RUNNING },
+ { "submitted", CL_SUBMITTED },
+ { "queued", CL_QUEUED },
+ // the error codes (negative values)
+ { "device_not_found", CL_DEVICE_NOT_FOUND },
+ { "device_not_available", CL_DEVICE_NOT_AVAILABLE },
+ { "compiler_not_available", CL_COMPILER_NOT_AVAILABLE },
+ { "mem_object_allocation_failure", CL_MEM_OBJECT_ALLOCATION_FAILURE },
+ { "out_of_resources", CL_OUT_OF_RESOURCES },
+ { "out_of_host_memory", CL_OUT_OF_HOST_MEMORY },
+ { "profiling_info_not_available", CL_PROFILING_INFO_NOT_AVAILABLE },
+ { "mem_copy_overlap", CL_MEM_COPY_OVERLAP },
+ { "image_format_mismatch", CL_IMAGE_FORMAT_MISMATCH },
+ { "image_format_not_supported", CL_IMAGE_FORMAT_NOT_SUPPORTED },
+ { "build_program_failure", CL_BUILD_PROGRAM_FAILURE },
+ { "map_failure", CL_MAP_FAILURE },
+ { "invalid_value", CL_INVALID_VALUE },
+ { "invalid_device type", CL_INVALID_DEVICE_TYPE },
+ { "invalid_platform", CL_INVALID_PLATFORM },
+ { "invalid_device", CL_INVALID_DEVICE },
+ { "invalid_context", CL_INVALID_CONTEXT },
+ { "invalid_queue_properties", CL_INVALID_QUEUE_PROPERTIES },
+ { "invalid_command_queue", CL_INVALID_COMMAND_QUEUE },
+ { "invalid_host_ptr", CL_INVALID_HOST_PTR },
+ { "invalid_mem_object", CL_INVALID_MEM_OBJECT },
+ { "invalid_image_format_descriptor", CL_INVALID_IMAGE_FORMAT_DESCRIPTOR },
+ { "invalid_image_size", CL_INVALID_IMAGE_SIZE },
+ { "invalid_sampler", CL_INVALID_SAMPLER },
+ { "invalid_binary", CL_INVALID_BINARY },
+ { "invalid_build_options", CL_INVALID_BUILD_OPTIONS },
+ { "invalid_program", CL_INVALID_PROGRAM },
+ { "invalid_program_executable", CL_INVALID_PROGRAM_EXECUTABLE },
+ { "invalid_kernel_name", CL_INVALID_KERNEL_NAME },
+ { "invalid_kernel_definition", CL_INVALID_KERNEL_DEFINITION },
+ { "invalid_kernel", CL_INVALID_KERNEL },
+ { "invalid_arg_index", CL_INVALID_ARG_INDEX },
+ { "invalid_arg_value", CL_INVALID_ARG_VALUE },
+ { "invalid_arg_size", CL_INVALID_ARG_SIZE },
+ { "invalid_kernel_args", CL_INVALID_KERNEL_ARGS },
+ { "invalid_work_dimension", CL_INVALID_WORK_DIMENSION },
+ { "invalid_work_group_size", CL_INVALID_WORK_GROUP_SIZE },
+ { "invalid_work_item size", CL_INVALID_WORK_ITEM_SIZE },
+ { "invalid_global_offset", CL_INVALID_GLOBAL_OFFSET },
+ { "invalid_event_wait_list", CL_INVALID_EVENT_WAIT_LIST },
+ { "invalid_event", CL_INVALID_EVENT },
+ { "invalid_operation", CL_INVALID_OPERATION },
+ { "invalid_gl_object", CL_INVALID_GL_OBJECT },
+ { "invalid_buffer_size", CL_INVALID_BUFFER_SIZE },
+ { "invalid_mip_level", CL_INVALID_MIP_LEVEL },
+ { 0, 0 }
+};
+
+// Map device info index 0...N => cl_device_info x Data type
+ecl_info_t device_info[] =
+{
+ /* 00 */ { "type", CL_DEVICE_TYPE, false, OCL_DEVICE_TYPE, BITFIELD, kv_device_type },
+ /* 01 */ { "vendor_id", CL_DEVICE_VENDOR_ID, false, OCL_UINT, UINT, 0 },
+ /* 02 */ { "max_compute_units", CL_DEVICE_MAX_COMPUTE_UNITS, false, OCL_UINT, UINT, 0 },
+ /* 03 */ { "max_work_item_dimensions", CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, false, OCL_UINT, UINT, 0 },
+ /* 04 */ { "max_work_group_size", CL_DEVICE_MAX_WORK_GROUP_SIZE, false, OCL_SIZE, SIZE_T, 0 },
+ /* 05 */ { "max_work_item_sizes", CL_DEVICE_MAX_WORK_ITEM_SIZES, true, OCL_SIZE, SIZE_T, 0 },
+ /* 06 */ { "preferred_vector_width_char", CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, false, OCL_UINT, UINT, 0 },
+ /* 07 */ { "preferred_vector_width_short", CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, false, OCL_UINT, UINT, 0 },
+ /* 08 */ { "preferred_vector_width_int", CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, false, OCL_UINT, UINT, 0 },
+ /* 09 */ { "preferred_vector_width_long", CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, false,OCL_UINT, UINT, 0 },
+ /* 0A */ { "preferred_vector_width_float", CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, false, OCL_UINT, UINT, 0 },
+ /* 0B */ { "preferred_vector_width_double", CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, false, OCL_UINT, UINT, 0 },
+ /* 0C */ { "max_clock_frequency", CL_DEVICE_MAX_CLOCK_FREQUENCY, false, OCL_UINT, UINT, 0 },
+ /* 0D */ { "address_bits", CL_DEVICE_ADDRESS_BITS, fa