diff --git a/clients/spatter/json.c b/clients/spatter/json.c index 679120d..7d8ad75 100644 --- a/clients/spatter/json.c +++ b/clients/spatter/json.c @@ -28,913 +28,984 @@ */ #include "json.h" +#include "unused.h" #ifdef _MSC_VER -#ifndef _CRT_SECURE_NO_WARNINGS -#define _CRT_SECURE_NO_WARNINGS -#endif + #ifndef _CRT_SECURE_NO_WARNINGS + #define _CRT_SECURE_NO_WARNINGS + #endif #endif const struct _json_value json_value_none; -#include -#include #include #include +#include +#include typedef unsigned int json_uchar; -static unsigned char hex_value(json_char c) { - if (isdigit(c)) - return c - '0'; - - switch (c) { - case 'a': - case 'A': - return 0x0A; - case 'b': - case 'B': - return 0x0B; - case 'c': - case 'C': - return 0x0C; - case 'd': - case 'D': - return 0x0D; - case 'e': - case 'E': - return 0x0E; - case 'f': - case 'F': - return 0x0F; - default: - return 0xFF; - } +static unsigned char hex_value (json_char c) +{ + if (isdigit(c)) + return c - '0'; + + switch (c) { + case 'a': case 'A': return 0x0A; + case 'b': case 'B': return 0x0B; + case 'c': case 'C': return 0x0C; + case 'd': case 'D': return 0x0D; + case 'e': case 'E': return 0x0E; + case 'f': case 'F': return 0x0F; + default: return 0xFF; + } } -typedef struct { - unsigned long used_memory; +typedef struct +{ + unsigned long used_memory; - unsigned int uint_max; - unsigned long ulong_max; + unsigned int uint_max; + unsigned long ulong_max; - json_settings settings; - int first_pass; + json_settings settings; + int first_pass; - const json_char *ptr; - unsigned int cur_line, cur_col; + const json_char * ptr; + unsigned int cur_line, cur_col; } json_state; -static void *default_alloc(size_t size, int zero, void *user_data) { - return zero ? calloc(1, size) : malloc(size); +static void * default_alloc (size_t size, int zero, void * UNUSED(user_data)) +{ + return zero ? calloc (1, size) : malloc (size); } -static void default_free(void *ptr, void *user_data) { free(ptr); } +static void default_free (void * ptr, void * UNUSED(user_data)) +{ + free (ptr); +} -static void *json_alloc(json_state *state, unsigned long size, int zero) { - if ((state->ulong_max - state->used_memory) < size) - return 0; +static void * json_alloc (json_state * state, unsigned long size, int zero) +{ + if ((state->ulong_max - state->used_memory) < size) + return 0; - if (state->settings.max_memory && - (state->used_memory += size) > state->settings.max_memory) { - return 0; - } + if (state->settings.max_memory + && (state->used_memory += size) > state->settings.max_memory) + { + return 0; + } - return state->settings.mem_alloc(size, zero, state->settings.user_data); + return state->settings.mem_alloc (size, zero, state->settings.user_data); } -static int new_value(json_state *state, json_value **top, json_value **root, - json_value **alloc, json_type type) { - json_value *value; - int values_size; +static int new_value (json_state * state, + json_value ** top, json_value ** root, json_value ** alloc, + json_type type) +{ + json_value * value; + int values_size; - if (!state->first_pass) { - value = *top = *alloc; - *alloc = (*alloc)->_reserved.next_alloc; + if (!state->first_pass) + { + value = *top = *alloc; + *alloc = (*alloc)->_reserved.next_alloc; - if (!*root) - *root = value; + if (!*root) + *root = value; - switch (value->type) { - case json_array: + switch (value->type) + { + case json_array: - if (value->u.array.length == 0) - break; + if (value->u.array.length == 0) + break; - if (!(value->u.array.values = (json_value **)json_alloc( - state, value->u.array.length * sizeof(json_value *), 0))) { - return 0; - } + if (! (value->u.array.values = (json_value **) json_alloc + (state, value->u.array.length * sizeof (json_value *), 0)) ) + { + return 0; + } - value->u.array.length = 0; - break; + value->u.array.length = 0; + break; - case json_object: + case json_object: - if (value->u.object.length == 0) - break; + if (value->u.object.length == 0) + break; - values_size = sizeof(*value->u.object.values) * value->u.object.length; + values_size = sizeof (*value->u.object.values) * value->u.object.length; - if (!(value->u.object.values = (json_object_entry *)json_alloc( - state, values_size + ((unsigned long)value->u.object.values), - 0))) { - return 0; - } + if (! (value->u.object.values = (json_object_entry *) json_alloc + (state, values_size + ((unsigned long) value->u.object.values), 0)) ) + { + return 0; + } - value->_reserved.object_mem = - (*(char **)&value->u.object.values) + values_size; + value->_reserved.object_mem = (*(char **) &value->u.object.values) + values_size; - value->u.object.length = 0; - break; + value->u.object.length = 0; + break; - case json_string: + case json_string: - if (!(value->u.string.ptr = (json_char *)json_alloc( - state, (value->u.string.length + 1) * sizeof(json_char), 0))) { - return 0; - } + if (! (value->u.string.ptr = (json_char *) json_alloc + (state, (value->u.string.length + 1) * sizeof (json_char), 0)) ) + { + return 0; + } - value->u.string.length = 0; - break; + value->u.string.length = 0; + break; - default: - break; - }; + default: + break; + }; - return 1; - } + return 1; + } - if (!(value = (json_value *)json_alloc( - state, sizeof(json_value) + state->settings.value_extra, 1))) { - return 0; - } + if (! (value = (json_value *) json_alloc + (state, sizeof (json_value) + state->settings.value_extra, 1))) + { + return 0; + } - if (!*root) - *root = value; + if (!*root) + *root = value; - value->type = type; - value->parent = *top; + value->type = type; + value->parent = *top; -#ifdef JSON_TRACK_SOURCE - value->line = state->cur_line; - value->col = state->cur_col; -#endif + #ifdef JSON_TRACK_SOURCE + value->line = state->cur_line; + value->col = state->cur_col; + #endif - if (*alloc) - (*alloc)->_reserved.next_alloc = value; + if (*alloc) + (*alloc)->_reserved.next_alloc = value; - *alloc = *top = value; + *alloc = *top = value; - return 1; + return 1; } -#define whitespace \ - case '\n': \ - ++state.cur_line; \ - state.cur_col = 0; \ - case ' ': \ - case '\t': \ - case '\r' - -#define string_add(b) \ - do { \ - if (!state.first_pass) \ - string[string_length] = b; \ - ++string_length; \ - } while (0); - -#define line_and_col state.cur_line, state.cur_col - -static const long flag_next = 1 << 0, flag_reproc = 1 << 1, - flag_need_comma = 1 << 2, flag_seek_value = 1 << 3, - flag_escaped = 1 << 4, flag_string = 1 << 5, - flag_need_colon = 1 << 6, flag_done = 1 << 7, - flag_num_negative = 1 << 8, flag_num_zero = 1 << 9, - flag_num_e = 1 << 10, flag_num_e_got_sign = 1 << 11, - flag_num_e_negative = 1 << 12, flag_line_comment = 1 << 13, - flag_block_comment = 1 << 14; - -json_value *json_parse_ex(json_settings *settings, const json_char *json, - size_t length, char *error_buf) { - json_char error[json_error_max]; - const json_char *end; - json_value *top, *root, *alloc = 0; - json_state state = {0}; - long flags; - long num_digits = 0, num_e = 0; - json_int_t num_fraction = 0; - - /* Skip UTF-8 BOM - */ - if (length >= 3 && ((unsigned char)json[0]) == 0xEF && - ((unsigned char)json[1]) == 0xBB && ((unsigned char)json[2]) == 0xBF) { - json += 3; - length -= 3; - } - - error[0] = '\0'; - end = (json + length); - - memcpy(&state.settings, settings, sizeof(json_settings)); - - if (!state.settings.mem_alloc) - state.settings.mem_alloc = default_alloc; - - if (!state.settings.mem_free) - state.settings.mem_free = default_free; - - memset(&state.uint_max, 0xFF, sizeof(state.uint_max)); - memset(&state.ulong_max, 0xFF, sizeof(state.ulong_max)); - - state.uint_max -= 8; /* limit of how much can be added before next check */ - state.ulong_max -= 8; - - for (state.first_pass = 1; state.first_pass >= 0; --state.first_pass) { - json_uchar uchar; - unsigned char uc_b1, uc_b2, uc_b3, uc_b4; - json_char *string = 0; - unsigned int string_length = 0; - - top = root = 0; - flags = flag_seek_value; - - state.cur_line = 1; - - for (state.ptr = json;; ++state.ptr) { - json_char b = (state.ptr == end ? 0 : *state.ptr); - - if (flags & flag_string) { - if (!b) { - sprintf(error, "Unexpected EOF in string (at %d:%d)", line_and_col); - goto e_failed; - } - - if (string_length > state.uint_max) - goto e_overflow; - - if (flags & flag_escaped) { - flags &= ~flag_escaped; - - switch (b) { - case 'b': - string_add('\b'); - break; - case 'f': - string_add('\f'); - break; - case 'n': - string_add('\n'); - break; - case 'r': - string_add('\r'); - break; - case 't': - string_add('\t'); - break; - case 'u': - - if (end - state.ptr <= 4 || - (uc_b1 = hex_value(*++state.ptr)) == 0xFF || - (uc_b2 = hex_value(*++state.ptr)) == 0xFF || - (uc_b3 = hex_value(*++state.ptr)) == 0xFF || - (uc_b4 = hex_value(*++state.ptr)) == 0xFF) { - sprintf(error, "Invalid character value `%c` (at %d:%d)", b, - line_and_col); - goto e_failed; - } - - uc_b1 = (uc_b1 << 4) | uc_b2; - uc_b2 = (uc_b3 << 4) | uc_b4; - uchar = (uc_b1 << 8) | uc_b2; - - if ((uchar & 0xF800) == 0xD800) { - json_uchar uchar2; - - if (end - state.ptr <= 6 || (*++state.ptr) != '\\' || - (*++state.ptr) != 'u' || - (uc_b1 = hex_value(*++state.ptr)) == 0xFF || - (uc_b2 = hex_value(*++state.ptr)) == 0xFF || - (uc_b3 = hex_value(*++state.ptr)) == 0xFF || - (uc_b4 = hex_value(*++state.ptr)) == 0xFF) { - sprintf(error, "Invalid character value `%c` (at %d:%d)", b, - line_and_col); - goto e_failed; - } - - uc_b1 = (uc_b1 << 4) | uc_b2; - uc_b2 = (uc_b3 << 4) | uc_b4; - uchar2 = (uc_b1 << 8) | uc_b2; - - uchar = 0x010000 | ((uchar & 0x3FF) << 10) | (uchar2 & 0x3FF); - } - - if (sizeof(json_char) >= sizeof(json_uchar) || (uchar <= 0x7F)) { - string_add((json_char)uchar); - break; +#define whitespace \ + case '\n': ++ state.cur_line; state.cur_col = 0; \ + case ' ': case '\t': case '\r' + +#define string_add(b) \ + do { if (!state.first_pass) string [string_length] = b; ++ string_length; } while (0); + +#define line_and_col \ + state.cur_line, state.cur_col + +static const long + flag_next = 1 << 0, + flag_reproc = 1 << 1, + flag_need_comma = 1 << 2, + flag_seek_value = 1 << 3, + flag_escaped = 1 << 4, + flag_string = 1 << 5, + flag_need_colon = 1 << 6, + flag_done = 1 << 7, + flag_num_negative = 1 << 8, + flag_num_zero = 1 << 9, + flag_num_e = 1 << 10, + flag_num_e_got_sign = 1 << 11, + flag_num_e_negative = 1 << 12, + flag_line_comment = 1 << 13, + flag_block_comment = 1 << 14; + +json_value * json_parse_ex (json_settings * settings, + const json_char * json, + size_t length, + char * error_buf) +{ + json_char error [json_error_max]; + const json_char * end; + json_value * top, * root, * alloc = 0; + json_state state = { 0 }; + long flags; + long num_digits = 0, num_e = 0; + json_int_t num_fraction = 0; + + /* Skip UTF-8 BOM + */ + if (length >= 3 && ((unsigned char) json [0]) == 0xEF + && ((unsigned char) json [1]) == 0xBB + && ((unsigned char) json [2]) == 0xBF) + { + json += 3; + length -= 3; + } + + error[0] = '\0'; + end = (json + length); + + memcpy (&state.settings, settings, sizeof (json_settings)); + + if (!state.settings.mem_alloc) + state.settings.mem_alloc = default_alloc; + + if (!state.settings.mem_free) + state.settings.mem_free = default_free; + + memset (&state.uint_max, 0xFF, sizeof (state.uint_max)); + memset (&state.ulong_max, 0xFF, sizeof (state.ulong_max)); + + state.uint_max -= 8; /* limit of how much can be added before next check */ + state.ulong_max -= 8; + + for (state.first_pass = 1; state.first_pass >= 0; -- state.first_pass) + { + json_uchar uchar; + unsigned char uc_b1, uc_b2, uc_b3, uc_b4; + json_char * string = 0; + unsigned int string_length = 0; + + top = root = 0; + flags = flag_seek_value; + + state.cur_line = 1; + + for (state.ptr = json ;; ++ state.ptr) + { + json_char b = (state.ptr == end ? 0 : *state.ptr); + + if (flags & flag_string) + { + if (!b) + { sprintf (error, "Unexpected EOF in string (at %d:%d)", line_and_col); + goto e_failed; } - if (uchar <= 0x7FF) { - if (state.first_pass) - string_length += 2; - else { - string[string_length++] = 0xC0 | (uchar >> 6); - string[string_length++] = 0x80 | (uchar & 0x3F); - } + if (string_length > state.uint_max) + goto e_overflow; + + if (flags & flag_escaped) + { + flags &= ~ flag_escaped; + + switch (b) + { + case 'b': string_add ('\b'); break; + case 'f': string_add ('\f'); break; + case 'n': string_add ('\n'); break; + case 'r': string_add ('\r'); break; + case 't': string_add ('\t'); break; + case 'u': + + if (end - state.ptr <= 4 || + (uc_b1 = hex_value (*++ state.ptr)) == 0xFF || + (uc_b2 = hex_value (*++ state.ptr)) == 0xFF || + (uc_b3 = hex_value (*++ state.ptr)) == 0xFF || + (uc_b4 = hex_value (*++ state.ptr)) == 0xFF) + { + sprintf (error, "Invalid character value `%c` (at %d:%d)", b, line_and_col); + goto e_failed; + } + + uc_b1 = (uc_b1 << 4) | uc_b2; + uc_b2 = (uc_b3 << 4) | uc_b4; + uchar = (uc_b1 << 8) | uc_b2; + + if ((uchar & 0xF800) == 0xD800) { + json_uchar uchar2; + + if (end - state.ptr <= 6 || (*++ state.ptr) != '\\' || (*++ state.ptr) != 'u' || + (uc_b1 = hex_value (*++ state.ptr)) == 0xFF || + (uc_b2 = hex_value (*++ state.ptr)) == 0xFF || + (uc_b3 = hex_value (*++ state.ptr)) == 0xFF || + (uc_b4 = hex_value (*++ state.ptr)) == 0xFF) + { + sprintf (error, "Invalid character value `%c` (at %d:%d)", b, line_and_col); + goto e_failed; + } + + uc_b1 = (uc_b1 << 4) | uc_b2; + uc_b2 = (uc_b3 << 4) | uc_b4; + uchar2 = (uc_b1 << 8) | uc_b2; + + uchar = 0x010000 | ((uchar & 0x3FF) << 10) | (uchar2 & 0x3FF); + } + + if (sizeof (json_char) >= sizeof (json_uchar) || (uchar <= 0x7F)) + { + string_add ((json_char) uchar); + break; + } + + if (uchar <= 0x7FF) + { + if (state.first_pass) + string_length += 2; + else + { string [string_length ++] = 0xC0 | (uchar >> 6); + string [string_length ++] = 0x80 | (uchar & 0x3F); + } + + break; + } + + if (uchar <= 0xFFFF) { + if (state.first_pass) + string_length += 3; + else + { string [string_length ++] = 0xE0 | (uchar >> 12); + string [string_length ++] = 0x80 | ((uchar >> 6) & 0x3F); + string [string_length ++] = 0x80 | (uchar & 0x3F); + } + + break; + } + + if (state.first_pass) + string_length += 4; + else + { string [string_length ++] = 0xF0 | (uchar >> 18); + string [string_length ++] = 0x80 | ((uchar >> 12) & 0x3F); + string [string_length ++] = 0x80 | ((uchar >> 6) & 0x3F); + string [string_length ++] = 0x80 | (uchar & 0x3F); + } - break; - } + break; - if (uchar <= 0xFFFF) { - if (state.first_pass) - string_length += 3; - else { - string[string_length++] = 0xE0 | (uchar >> 12); - string[string_length++] = 0x80 | ((uchar >> 6) & 0x3F); - string[string_length++] = 0x80 | (uchar & 0x3F); - } + default: + string_add (b); + }; - break; + continue; } - if (state.first_pass) - string_length += 4; - else { - string[string_length++] = 0xF0 | (uchar >> 18); - string[string_length++] = 0x80 | ((uchar >> 12) & 0x3F); - string[string_length++] = 0x80 | ((uchar >> 6) & 0x3F); - string[string_length++] = 0x80 | (uchar & 0x3F); + if (b == '\\') + { + flags |= flag_escaped; + continue; } - break; - - default: - string_add(b); - }; - - continue; - } + if (b == '"') + { + if (!state.first_pass) + string [string_length] = 0; - if (b == '\\') { - flags |= flag_escaped; - continue; - } + flags &= ~ flag_string; + string = 0; - if (b == '"') { - if (!state.first_pass) - string[string_length] = 0; + switch (top->type) + { + case json_string: - flags &= ~flag_string; - string = 0; + top->u.string.length = string_length; + flags |= flag_next; - switch (top->type) { - case json_string: + break; - top->u.string.length = string_length; - flags |= flag_next; + case json_object: - break; + if (state.first_pass) + (*(json_char **) &top->u.object.values) += string_length + 1; + else + { + top->u.object.values [top->u.object.length].name + = (json_char *) top->_reserved.object_mem; - case json_object: + top->u.object.values [top->u.object.length].name_length + = string_length; - if (state.first_pass) - (*(json_char **)&top->u.object.values) += string_length + 1; - else { - top->u.object.values[top->u.object.length].name = - (json_char *)top->_reserved.object_mem; + (*(json_char **) &top->_reserved.object_mem) += string_length + 1; + } - top->u.object.values[top->u.object.length].name_length = - string_length; + flags |= flag_seek_value | flag_need_colon; + continue; - (*(json_char **)&top->_reserved.object_mem) += string_length + 1; + default: + break; + }; } + else + { + string_add (b); + continue; + } + } + + if (state.settings.settings & json_enable_comments) + { + if (flags & (flag_line_comment | flag_block_comment)) + { + if (flags & flag_line_comment) + { + if (b == '\r' || b == '\n' || !b) + { + flags &= ~ flag_line_comment; + -- state.ptr; /* so null can be reproc'd */ + } - flags |= flag_seek_value | flag_need_colon; - continue; - - default: - break; - }; - } else { - string_add(b); - continue; - } - } + continue; + } - if (state.settings.settings & json_enable_comments) { - if (flags & (flag_line_comment | flag_block_comment)) { - if (flags & flag_line_comment) { - if (b == '\r' || b == '\n' || !b) { - flags &= ~flag_line_comment; - --state.ptr; /* so null can be reproc'd */ - } + if (flags & flag_block_comment) + { + if (!b) + { sprintf (error, "%d:%d: Unexpected EOF in block comment", line_and_col); + goto e_failed; + } - continue; - } + if (b == '*' && state.ptr < (end - 1) && state.ptr [1] == '/') + { + flags &= ~ flag_block_comment; + ++ state.ptr; /* skip closing sequence */ + } - if (flags & flag_block_comment) { - if (!b) { - sprintf(error, "%d:%d: Unexpected EOF in block comment", - line_and_col); - goto e_failed; + continue; + } } + else if (b == '/') + { + if (! (flags & (flag_seek_value | flag_done)) && top->type != json_object) + { sprintf (error, "%d:%d: Comment not allowed here", line_and_col); + goto e_failed; + } - if (b == '*' && state.ptr < (end - 1) && state.ptr[1] == '/') { - flags &= ~flag_block_comment; - ++state.ptr; /* skip closing sequence */ + if (++ state.ptr == end) + { sprintf (error, "%d:%d: EOF unexpected", line_and_col); + goto e_failed; + } + + switch (b = *state.ptr) + { + case '/': + flags |= flag_line_comment; + continue; + + case '*': + flags |= flag_block_comment; + continue; + + default: + sprintf (error, "%d:%d: Unexpected `%c` in comment opening sequence", line_and_col, b); + goto e_failed; + }; } + } - continue; - } - } else if (b == '/') { - if (!(flags & (flag_seek_value | flag_done)) && - top->type != json_object) { - sprintf(error, "%d:%d: Comment not allowed here", line_and_col); - goto e_failed; - } - - if (++state.ptr == end) { - sprintf(error, "%d:%d: EOF unexpected", line_and_col); - goto e_failed; - } - - switch (b = *state.ptr) { - case '/': - flags |= flag_line_comment; - continue; + if (flags & flag_done) + { + if (!b) + break; - case '*': - flags |= flag_block_comment; - continue; + switch (b) + { + whitespace: + continue; - default: - sprintf(error, "%d:%d: Unexpected `%c` in comment opening sequence", - line_and_col, b); - goto e_failed; - }; - } - } + default: - if (flags & flag_done) { - if (!b) - break; + sprintf (error, "%d:%d: Trailing garbage: `%c`", + state.cur_line, state.cur_col, b); - switch (b) { - whitespace: - continue; + goto e_failed; + }; + } + + if (flags & flag_seek_value) + { + switch (b) + { + whitespace: + continue; + + case ']': + + if (top && top->type == json_array) + flags = (flags & ~ (flag_need_comma | flag_seek_value)) | flag_next; + else + { sprintf (error, "%d:%d: Unexpected ]", line_and_col); + goto e_failed; + } - default: + break; - sprintf(error, "%d:%d: Trailing garbage: `%c`", state.cur_line, - state.cur_col, b); + default: - goto e_failed; - }; - } + if (flags & flag_need_comma) + { + if (b == ',') + { flags &= ~ flag_need_comma; + continue; + } + else + { + sprintf (error, "%d:%d: Expected , before %c", + state.cur_line, state.cur_col, b); - if (flags & flag_seek_value) { - switch (b) { - whitespace: - continue; + goto e_failed; + } + } - case ']': + if (flags & flag_need_colon) + { + if (b == ':') + { flags &= ~ flag_need_colon; + continue; + } + else + { + sprintf (error, "%d:%d: Expected : before %c", + state.cur_line, state.cur_col, b); + + goto e_failed; + } + } - if (top && top->type == json_array) - flags = (flags & ~(flag_need_comma | flag_seek_value)) | flag_next; - else { - sprintf(error, "%d:%d: Unexpected ]", line_and_col); - goto e_failed; - } + flags &= ~ flag_seek_value; - break; + switch (b) + { + case '{': - default: + if (!new_value (&state, &top, &root, &alloc, json_object)) + goto e_alloc_failure; - if (flags & flag_need_comma) { - if (b == ',') { - flags &= ~flag_need_comma; - continue; - } else { - sprintf(error, "%d:%d: Expected , before %c", state.cur_line, - state.cur_col, b); + continue; - goto e_failed; - } - } + case '[': - if (flags & flag_need_colon) { - if (b == ':') { - flags &= ~flag_need_colon; - continue; - } else { - sprintf(error, "%d:%d: Expected : before %c", state.cur_line, - state.cur_col, b); + if (!new_value (&state, &top, &root, &alloc, json_array)) + goto e_alloc_failure; - goto e_failed; - } - } + flags |= flag_seek_value; + continue; - flags &= ~flag_seek_value; + case '"': - switch (b) { - case '{': + if (!new_value (&state, &top, &root, &alloc, json_string)) + goto e_alloc_failure; - if (!new_value(&state, &top, &root, &alloc, json_object)) - goto e_alloc_failure; + flags |= flag_string; - continue; + string = top->u.string.ptr; + string_length = 0; - case '[': + continue; - if (!new_value(&state, &top, &root, &alloc, json_array)) - goto e_alloc_failure; + case 't': - flags |= flag_seek_value; - continue; + if ((end - state.ptr) < 3 || *(++ state.ptr) != 'r' || + *(++ state.ptr) != 'u' || *(++ state.ptr) != 'e') + { + goto e_unknown_value; + } - case '"': + if (!new_value (&state, &top, &root, &alloc, json_boolean)) + goto e_alloc_failure; - if (!new_value(&state, &top, &root, &alloc, json_string)) - goto e_alloc_failure; + top->u.boolean = 1; - flags |= flag_string; + flags |= flag_next; + break; - string = top->u.string.ptr; - string_length = 0; + case 'f': - continue; + if ((end - state.ptr) < 4 || *(++ state.ptr) != 'a' || + *(++ state.ptr) != 'l' || *(++ state.ptr) != 's' || + *(++ state.ptr) != 'e') + { + goto e_unknown_value; + } - case 't': + if (!new_value (&state, &top, &root, &alloc, json_boolean)) + goto e_alloc_failure; - if ((end - state.ptr) < 3 || *(++state.ptr) != 'r' || - *(++state.ptr) != 'u' || *(++state.ptr) != 'e') { - goto e_unknown_value; - } + flags |= flag_next; + break; - if (!new_value(&state, &top, &root, &alloc, json_boolean)) - goto e_alloc_failure; + case 'n': - top->u.boolean = 1; + if ((end - state.ptr) < 3 || *(++ state.ptr) != 'u' || + *(++ state.ptr) != 'l' || *(++ state.ptr) != 'l') + { + goto e_unknown_value; + } - flags |= flag_next; - break; + if (!new_value (&state, &top, &root, &alloc, json_null)) + goto e_alloc_failure; - case 'f': + flags |= flag_next; + break; - if ((end - state.ptr) < 4 || *(++state.ptr) != 'a' || - *(++state.ptr) != 'l' || *(++state.ptr) != 's' || - *(++state.ptr) != 'e') { - goto e_unknown_value; - } + default: - if (!new_value(&state, &top, &root, &alloc, json_boolean)) - goto e_alloc_failure; + if (isdigit (b) || b == '-') + { + if (!new_value (&state, &top, &root, &alloc, json_integer)) + goto e_alloc_failure; - flags |= flag_next; - break; + if (!state.first_pass) + { + while (isdigit (b) || b == '+' || b == '-' + || b == 'e' || b == 'E' || b == '.') + { + if ( (++ state.ptr) == end) + { + b = 0; + break; + } - case 'n': + b = *state.ptr; + } - if ((end - state.ptr) < 3 || *(++state.ptr) != 'u' || - *(++state.ptr) != 'l' || *(++state.ptr) != 'l') { - goto e_unknown_value; - } + flags |= flag_next | flag_reproc; + break; + } - if (!new_value(&state, &top, &root, &alloc, json_null)) - goto e_alloc_failure; + flags &= ~ (flag_num_negative | flag_num_e | + flag_num_e_got_sign | flag_num_e_negative | + flag_num_zero); - flags |= flag_next; - break; - - default: - - if (isdigit(b) || b == '-') { - if (!new_value(&state, &top, &root, &alloc, json_integer)) - goto e_alloc_failure; + num_digits = 0; + num_fraction = 0; + num_e = 0; - if (!state.first_pass) { - while (isdigit(b) || b == '+' || b == '-' || b == 'e' || - b == 'E' || b == '.') { - if ((++state.ptr) == end) { - b = 0; - break; + if (b != '-') + { + flags |= flag_reproc; + break; + } + + flags |= flag_num_negative; + continue; + } + else + { sprintf (error, "%d:%d: Unexpected %c when seeking value", line_and_col, b); + goto e_failed; + } + }; + }; + } + else + { + switch (top->type) + { + case json_object: + + switch (b) + { + whitespace: + continue; + + case '"': + + if (flags & flag_need_comma) + { sprintf (error, "%d:%d: Expected , before \"", line_and_col); + goto e_failed; + } + + flags |= flag_string; + + string = (json_char *) top->_reserved.object_mem; + string_length = 0; + + break; + + case '}': + + flags = (flags & ~ flag_need_comma) | flag_next; + break; + + case ',': + + if (flags & flag_need_comma) + { + flags &= ~ flag_need_comma; + break; + } + + default: + sprintf (error, "%d:%d: Unexpected `%c` in object", line_and_col, b); + goto e_failed; + }; + + break; + + case json_integer: + case json_double: + + if (isdigit (b)) + { + ++ num_digits; + + if (top->type == json_integer || flags & flag_num_e) + { + if (! (flags & flag_num_e)) + { + if (flags & flag_num_zero) + { sprintf (error, "%d:%d: Unexpected `0` before `%c`", line_and_col, b); + goto e_failed; + } + + if (num_digits == 1 && b == '0') + flags |= flag_num_zero; + } + else + { + flags |= flag_num_e_got_sign; + num_e = (num_e * 10) + (b - '0'); + continue; + } + + top->u.integer = (top->u.integer * 10) + (b - '0'); + continue; } - b = *state.ptr; - } - - flags |= flag_next | flag_reproc; - break; - } + num_fraction = (num_fraction * 10) + (b - '0'); + continue; + } - flags &= ~(flag_num_negative | flag_num_e | flag_num_e_got_sign | - flag_num_e_negative | flag_num_zero); + if (b == '+' || b == '-') + { + if ( (flags & flag_num_e) && !(flags & flag_num_e_got_sign)) + { + flags |= flag_num_e_got_sign; - num_digits = 0; - num_fraction = 0; - num_e = 0; - - if (b != '-') { - flags |= flag_reproc; - break; - } - - flags |= flag_num_negative; - continue; - } else { - sprintf(error, "%d:%d: Unexpected %c when seeking value", - line_and_col, b); - goto e_failed; - } - }; - }; - } else { - switch (top->type) { - case json_object: - - switch (b) { - whitespace: - continue; + if (b == '-') + flags |= flag_num_e_negative; - case '"': - - if (flags & flag_need_comma) { - sprintf(error, "%d:%d: Expected , before \"", line_and_col); - goto e_failed; - } - - flags |= flag_string; - - string = (json_char *)top->_reserved.object_mem; - string_length = 0; + continue; + } + } + else if (b == '.' && top->type == json_integer) + { + if (!num_digits) + { sprintf (error, "%d:%d: Expected digit before `.`", line_and_col); + goto e_failed; + } - break; + top->type = json_double; + top->u.dbl = (double) top->u.integer; - case '}': + num_digits = 0; + continue; + } - flags = (flags & ~flag_need_comma) | flag_next; - break; + if (! (flags & flag_num_e)) + { + if (top->type == json_double) + { + if (!num_digits) + { sprintf (error, "%d:%d: Expected digit after `.`", line_and_col); + goto e_failed; + } - case ',': - - if (flags & flag_need_comma) { - flags &= ~flag_need_comma; - break; - } + top->u.dbl += ((double) num_fraction) / (pow (10.0, (double) num_digits)); + } - default: - sprintf(error, "%d:%d: Unexpected `%c` in object", line_and_col, b); - goto e_failed; - }; + if (b == 'e' || b == 'E') + { + flags |= flag_num_e; - break; + if (top->type == json_integer) + { + top->type = json_double; + top->u.dbl = (double) top->u.integer; + } - case json_integer: - case json_double: + num_digits = 0; + flags &= ~ flag_num_zero; - if (isdigit(b)) { - ++num_digits; + continue; + } + } + else + { + if (!num_digits) + { sprintf (error, "%d:%d: Expected digit after `e`", line_and_col); + goto e_failed; + } - if (top->type == json_integer || flags & flag_num_e) { - if (!(flags & flag_num_e)) { - if (flags & flag_num_zero) { - sprintf(error, "%d:%d: Unexpected `0` before `%c`", - line_and_col, b); - goto e_failed; - } - - if (num_digits == 1 && b == '0') - flags |= flag_num_zero; - } else { - flags |= flag_num_e_got_sign; - num_e = (num_e * 10) + (b - '0'); - continue; - } - - top->u.integer = (top->u.integer * 10) + (b - '0'); - continue; + top->u.dbl *= pow (10.0, (double) + (flags & flag_num_e_negative ? - num_e : num_e)); + } + + if (flags & flag_num_negative) + { + if (top->type == json_integer) + top->u.integer = - top->u.integer; + else + top->u.dbl = - top->u.dbl; + } + + flags |= flag_next | flag_reproc; + break; + + default: + break; + }; + } + + if (flags & flag_reproc) + { + flags &= ~ flag_reproc; + -- state.ptr; + } + + if (flags & flag_next) + { + flags = (flags & ~ flag_next) | flag_need_comma; + + if (!top->parent) + { + /* root value done */ + + flags |= flag_done; + continue; } - num_fraction = (num_fraction * 10) + (b - '0'); - continue; - } - - if (b == '+' || b == '-') { - if ((flags & flag_num_e) && !(flags & flag_num_e_got_sign)) { - flags |= flag_num_e_got_sign; + if (top->parent->type == json_array) + flags |= flag_seek_value; - if (b == '-') - flags |= flag_num_e_negative; + if (!state.first_pass) + { + json_value * parent = top->parent; - continue; - } - } else if (b == '.' && top->type == json_integer) { - if (!num_digits) { - sprintf(error, "%d:%d: Expected digit before `.`", line_and_col); - goto e_failed; - } - - top->type = json_double; - top->u.dbl = (double)top->u.integer; - - num_digits = 0; - continue; - } + switch (parent->type) + { + case json_object: - if (!(flags & flag_num_e)) { - if (top->type == json_double) { - if (!num_digits) { - sprintf(error, "%d:%d: Expected digit after `.`", line_and_col); - goto e_failed; - } + parent->u.object.values + [parent->u.object.length].value = top; - top->u.dbl += - ((double)num_fraction) / (pow(10.0, (double)num_digits)); - } + break; - if (b == 'e' || b == 'E') { - flags |= flag_num_e; + case json_array: - if (top->type == json_integer) { - top->type = json_double; - top->u.dbl = (double)top->u.integer; - } + parent->u.array.values + [parent->u.array.length] = top; - num_digits = 0; - flags &= ~flag_num_zero; + break; - continue; - } - } else { - if (!num_digits) { - sprintf(error, "%d:%d: Expected digit after `e`", line_and_col); - goto e_failed; + default: + break; + }; } - top->u.dbl *= pow( - 10.0, (double)(flags & flag_num_e_negative ? -num_e : num_e)); - } + if ( (++ top->parent->u.array.length) > state.uint_max) + goto e_overflow; - if (flags & flag_num_negative) { - if (top->type == json_integer) - top->u.integer = -top->u.integer; - else - top->u.dbl = -top->u.dbl; - } + top = top->parent; - flags |= flag_next | flag_reproc; - break; - - default: - break; - }; - } - - if (flags & flag_reproc) { - flags &= ~flag_reproc; - --state.ptr; - } - - if (flags & flag_next) { - flags = (flags & ~flag_next) | flag_need_comma; - - if (!top->parent) { - /* root value done */ - - flags |= flag_done; - continue; - } - - if (top->parent->type == json_array) - flags |= flag_seek_value; - - if (!state.first_pass) { - json_value *parent = top->parent; - - switch (parent->type) { - case json_object: - - parent->u.object.values[parent->u.object.length].value = top; - - break; - - case json_array: - - parent->u.array.values[parent->u.array.length] = top; - - break; - - default: - break; - }; - } - - if ((++top->parent->u.array.length) > state.uint_max) - goto e_overflow; - - top = top->parent; - - continue; + continue; + } } - } - alloc = root; - } + alloc = root; + } - return root; + return root; e_unknown_value: - sprintf(error, "%d:%d: Unknown value", line_and_col); - goto e_failed; + sprintf (error, "%d:%d: Unknown value", line_and_col); + goto e_failed; e_alloc_failure: - strcpy(error, "Memory allocation failure"); - goto e_failed; + strcpy (error, "Memory allocation failure"); + goto e_failed; e_overflow: - sprintf(error, "%d:%d: Too long (caught overflow)", line_and_col); - goto e_failed; + sprintf (error, "%d:%d: Too long (caught overflow)", line_and_col); + goto e_failed; e_failed: - if (error_buf) { - if (*error) - strcpy(error_buf, error); - else - strcpy(error_buf, "Unknown error"); - } + if (error_buf) + { + if (*error) + strcpy (error_buf, error); + else + strcpy (error_buf, "Unknown error"); + } - if (state.first_pass) - alloc = root; + if (state.first_pass) + alloc = root; - while (alloc) { - top = alloc->_reserved.next_alloc; - state.settings.mem_free(alloc, state.settings.user_data); - alloc = top; - } + while (alloc) + { + top = alloc->_reserved.next_alloc; + state.settings.mem_free (alloc, state.settings.user_data); + alloc = top; + } - if (!state.first_pass) - json_value_free_ex(&state.settings, root); + if (!state.first_pass) + json_value_free_ex (&state.settings, root); - return 0; + return 0; } -json_value *json_parse(const json_char *json, size_t length) { - json_settings settings = {0}; - return json_parse_ex(&settings, json, length, 0); +json_value * json_parse (const json_char * json, size_t length) +{ + json_settings settings = { 0 }; + return json_parse_ex (&settings, json, length, 0); } -void json_value_free_ex(json_settings *settings, json_value *value) { - json_value *cur_value; +void json_value_free_ex (json_settings * settings, json_value * value) +{ + json_value * cur_value; - if (!value) - return; + if (!value) + return; - value->parent = 0; + value->parent = 0; - while (value) { - switch (value->type) { - case json_array: + while (value) + { + switch (value->type) + { + case json_array: - if (!value->u.array.length) { - settings->mem_free(value->u.array.values, settings->user_data); - break; - } + if (!value->u.array.length) + { + settings->mem_free (value->u.array.values, settings->user_data); + break; + } - value = value->u.array.values[--value->u.array.length]; - continue; + value = value->u.array.values [-- value->u.array.length]; + continue; - case json_object: + case json_object: - if (!value->u.object.length) { - settings->mem_free(value->u.object.values, settings->user_data); - break; - } + if (!value->u.object.length) + { + settings->mem_free (value->u.object.values, settings->user_data); + break; + } - value = value->u.object.values[--value->u.object.length].value; - continue; + value = value->u.object.values [-- value->u.object.length].value; + continue; - case json_string: + case json_string: - settings->mem_free(value->u.string.ptr, settings->user_data); - break; + settings->mem_free (value->u.string.ptr, settings->user_data); + break; - default: - break; - }; + default: + break; + }; - cur_value = value; - value = value->parent; - settings->mem_free(cur_value, settings->user_data); - } + cur_value = value; + value = value->parent; + settings->mem_free (cur_value, settings->user_data); + } } -void json_value_free(json_value *value) { - json_settings settings = {0}; - settings.mem_free = default_free; - json_value_free_ex(&settings, value); -} +void json_value_free (json_value * value) +{ + json_settings settings = { 0 }; + settings.mem_free = default_free; + json_value_free_ex (&settings, value); +} \ No newline at end of file diff --git a/clients/spatter/json.h b/clients/spatter/json.h index 25628c3..065dd89 100644 --- a/clients/spatter/json.h +++ b/clients/spatter/json.h @@ -32,212 +32,248 @@ #define _JSON_H #ifndef json_char -#define json_char char + #define json_char char #endif #ifndef json_int_t -#ifndef _MSC_VER -#include -#define json_int_t int64_t -#else -#define json_int_t __int64 -#endif + #ifndef _MSC_VER + #include + #define json_int_t int64_t + #else + #define json_int_t __int64 + #endif #endif #include #ifdef __cplusplus -#include + #include -extern "C" { + extern "C" + { #endif -typedef struct { - unsigned long max_memory; - int settings; +typedef struct +{ + unsigned long max_memory; + int settings; - /* Custom allocator support (leave null to use malloc/free) - */ + /* Custom allocator support (leave null to use malloc/free) + */ - void *(*mem_alloc)(size_t, int zero, void *user_data); - void (*mem_free)(void *, void *user_data); + void * (* mem_alloc) (size_t, int zero, void * user_data); + void (* mem_free) (void *, void * user_data); - void *user_data; /* will be passed to mem_alloc and mem_free */ + void * user_data; /* will be passed to mem_alloc and mem_free */ - size_t value_extra; /* how much extra space to allocate for values? */ + size_t value_extra; /* how much extra space to allocate for values? */ } json_settings; -#define json_enable_comments 0x01 +#define json_enable_comments 0x01 -typedef enum { - json_none, - json_object, - json_array, - json_integer, - json_double, - json_string, - json_boolean, - json_null +typedef enum +{ + json_none, + json_object, + json_array, + json_integer, + json_double, + json_string, + json_boolean, + json_null } json_type; extern const struct _json_value json_value_none; -typedef struct _json_object_entry { - json_char *name; - unsigned int name_length; +typedef struct _json_object_entry +{ + json_char * name; + unsigned int name_length; - struct _json_value *value; + struct _json_value * value; } json_object_entry; -typedef struct _json_value { - struct _json_value *parent; +typedef struct _json_value +{ + struct _json_value * parent; - json_type type; + json_type type; - union { - int boolean; - json_int_t integer; - double dbl; + union + { + int boolean; + json_int_t integer; + double dbl; - struct { - unsigned int length; - json_char *ptr; /* null terminated */ + struct + { + unsigned int length; + json_char * ptr; /* null terminated */ - } string; + } string; - struct { - unsigned int length; + struct + { + unsigned int length; - json_object_entry *values; + json_object_entry * values; -#if defined(__cplusplus) && __cplusplus >= 201103L - decltype(values) begin() const { return values; } - decltype(values) end() const { return values + length; } -#endif + #if defined(__cplusplus) && __cplusplus >= 201103L + decltype(values) begin () const + { return values; + } + decltype(values) end () const + { return values + length; + } + #endif - } object; + } object; - struct { - unsigned int length; - struct _json_value **values; + struct + { + unsigned int length; + struct _json_value ** values; -#if defined(__cplusplus) && __cplusplus >= 201103L - decltype(values) begin() const { return values; } - decltype(values) end() const { return values + length; } -#endif + #if defined(__cplusplus) && __cplusplus >= 201103L + decltype(values) begin () const + { return values; + } + decltype(values) end () const + { return values + length; + } + #endif - } array; + } array; - } u; + } u; - union { - struct _json_value *next_alloc; - void *object_mem; + union + { + struct _json_value * next_alloc; + void * object_mem; - } _reserved; + } _reserved; -#ifdef JSON_TRACK_SOURCE + #ifdef JSON_TRACK_SOURCE - /* Location of the value in the source JSON - */ - unsigned int line, col; + /* Location of the value in the source JSON + */ + unsigned int line, col; -#endif + #endif - /* Some C++ operator sugar */ + /* Some C++ operator sugar */ -#ifdef __cplusplus + #ifdef __cplusplus -public: - inline _json_value() { memset(this, 0, sizeof(_json_value)); } + public: - inline const struct _json_value &operator[](int index) const { - if (type != json_array || index < 0 || - ((unsigned int)index) >= u.array.length) { - return json_value_none; - } + inline _json_value () + { memset (this, 0, sizeof (_json_value)); + } - return *u.array.values[index]; - } + inline const struct _json_value &operator [] (int index) const + { + if (type != json_array || index < 0 + || ((unsigned int) index) >= u.array.length) + { + return json_value_none; + } - inline const struct _json_value &operator[](const char *index) const { - if (type != json_object) - return json_value_none; + return *u.array.values [index]; + } - for (unsigned int i = 0; i < u.object.length; ++i) - if (!strcmp(u.object.values[i].name, index)) - return *u.object.values[i].value; + inline const struct _json_value &operator [] (const char * index) const + { + if (type != json_object) + return json_value_none; - return json_value_none; - } + for (unsigned int i = 0; i < u.object.length; ++ i) + if (!strcmp (u.object.values [i].name, index)) + return *u.object.values [i].value; - inline operator const char *() const { - switch (type) { - case json_string: - return u.string.ptr; + return json_value_none; + } - default: - return ""; - }; - } + inline operator const char * () const + { + switch (type) + { + case json_string: + return u.string.ptr; - inline operator json_int_t() const { - switch (type) { - case json_integer: - return u.integer; + default: + return ""; + }; + } - case json_double: - return (json_int_t)u.dbl; + inline operator json_int_t () const + { + switch (type) + { + case json_integer: + return u.integer; - default: - return 0; - }; - } + case json_double: + return (json_int_t) u.dbl; - inline operator bool() const { - if (type != json_boolean) - return false; + default: + return 0; + }; + } - return u.boolean != 0; - } + inline operator bool () const + { + if (type != json_boolean) + return false; - inline operator double() const { - switch (type) { - case json_integer: - return (double)u.integer; + return u.boolean != 0; + } - case json_double: - return u.dbl; + inline operator double () const + { + switch (type) + { + case json_integer: + return (double) u.integer; - default: - return 0; - }; - } + case json_double: + return u.dbl; -#endif + default: + return 0; + }; + } + + #endif } json_value; -json_value *json_parse(const json_char *json, size_t length); +json_value * json_parse (const json_char * json, + size_t length); #define json_error_max 128 -json_value *json_parse_ex(json_settings *settings, const json_char *json, - size_t length, char *error); +json_value * json_parse_ex (json_settings * settings, + const json_char * json, + size_t length, + char * error); -void json_value_free(json_value *); +void json_value_free (json_value *); /* Not usually necessary, unless you used a custom mem_alloc and now want to * use a custom mem_free. */ -void json_value_free_ex(json_settings *settings, json_value *); +void json_value_free_ex (json_settings * settings, + json_value *); + #ifdef __cplusplus -} /* extern "C" */ + } /* extern "C" */ #endif #endif diff --git a/clients/spatter/parse-args.c b/clients/spatter/parse-args.c index f419298..43b7573 100644 --- a/clients/spatter/parse-args.c +++ b/clients/spatter/parse-args.c @@ -34,23 +34,23 @@ LANL and GT), this list of conditions and the following disclaimer. this software without specific prior written permission. */ -#include "parse-args.h" -#include "argtable3.h" -#include "backend-support-tests.h" -#include "json.h" -#include "pcg_basic.h" -#include "sp_alloc.h" #include -#include #include +#include #include #include +#include #include #include -#include +#include "parse-args.h" +#include "backend-support-tests.h" +#include "sp_alloc.h" +#include "json.h" +#include "pcg_basic.h" +#include "argtable3.h" #ifdef USE_CUDA -#include "../src/cuda/cuda-backend.h" +#include "../src/cuda/cuda-backend.cu" #endif #ifdef USE_OPENMP @@ -86,1379 +86,1399 @@ int verbose; FILE *err_file; void safestrcopy(char *dest, const char *src); -void parse_p(char *, struct run_config *, int mode); -ssize_t setincludes(size_t key, size_t *set, size_t set_len); -void xkp_pattern(size_t *pat, size_t dim); +void parse_p(char*, struct run_config *, int mode); +ssize_t setincludes(size_t key, size_t* set, size_t set_len); +void xkp_pattern(ssize_t *pat, ptrdiff_t dim); void parse_backend(int argc, char **argv); -void **argtable; -unsigned int number_of_arguments = 35; +void** argtable; +unsigned int number_of_arguments = 36; struct arg_lit *verb, *help, *interactive, *validate, *aggregate, *compress; -struct arg_str *backend_arg, *cl_platform, *cl_device, *pattern, - *pattern_gather, *pattern_scatter, *kernelName, *delta, *delta_gather, - *delta_scatter, *name, *papi, *op; -struct arg_int *count, *wrap, *runs, *omp_threads, *vector_len, - *local_work_size, *shared_memory, *morton, *hilbert, *roblock, *stride, - *random_arg, *no_print_header; +struct arg_str *backend_arg, *cl_platform, *cl_device, *pattern, *pattern_gather, *pattern_scatter, *kernelName, *delta, *delta_gather, *delta_scatter, *name, *papi, *op; +struct arg_int *boundary, *pattern_size, *count, *wrap, *runs, *omp_threads, *vector_len, *local_work_size, *shared_memory, *morton, *hilbert, *roblock, *stride, *random_arg, *no_print_header; struct arg_file *kernelFile; struct arg_end *end; -void initialize_argtable() { - // Initialize the argtable on the stack just because it is easier and how the - // documentation handles it - void **malloc_argtable = - (void **)malloc(sizeof(void *) * number_of_arguments); - - // Arguments that do not take parameters - malloc_argtable[0] = help = arg_litn( - NULL, "help", 0, 1, "Displays info about commands and then exits."); - malloc_argtable[1] = verb = arg_litn( - NULL, "verbose", 0, 1, - "Print info about default arguments that you have not overridden."); - malloc_argtable[2] = no_print_header = arg_intn( - "q", "no-print-header", "", 0, 1, "Do not print header information."); - malloc_argtable[3] = interactive = - arg_litn("i", "interactive", 0, 1, - "Pick the platform and the device interactively."); - malloc_argtable[4] = validate = - arg_litn(NULL, "validate", 0, 1, - "Perform extra validation checks to ensure data validity"); - malloc_argtable[5] = aggregate = - arg_litn("a", "aggregate", 0, 1, - "Report a minimum time for all runs of a given configuration " - "for 2 or more runs. [Default 1] (Do not use with PAPI)"); - malloc_argtable[6] = compress = arg_litn("c", "compress", 0, 1, "TODO"); - // Benchmark Configuration - malloc_argtable[7] = pattern = arg_strn( - "p", "pattern", "", 0, 1, - "Specify either a built-in pattern (i.e. UNIFORM), a custom pattern " - "(i.e. 1,2,3,4), or a path to a json file with a run-configuration."); - malloc_argtable[8] = pattern_gather = - arg_strn("g", "pattern-gather", "", 0, 1, - "Valid wtih [kernel-name: GS, MultiGather]. Specify either a " - "built-in pattern (i.e. UNIFORM), a custom pattern (i.e. " - "1,2,3,4), or a path to a json file with a run-configuration."); - malloc_argtable[9] = pattern_scatter = - arg_strn("h", "pattern-scatter", "", 0, 1, - "Valid with [kernel-name: GS, MultiScatter]. Specify either a " - "built-in pattern (i.e. UNIFORM), a custom pattern (i.e. " - "1,2,3,4), or a path to a json file with a run-configuration."); - malloc_argtable[10] = kernelName = - arg_strn("k", "kernel-name", "", 0, 1, - "Specify the kernel you want to run. [Default: Gather, Options: " - "Gather, Scatter, GS, MultiGather, MultiScatter]"); - malloc_argtable[11] = op = arg_strn("o", "op", "", 0, 1, "TODO"); - malloc_argtable[12] = delta = - arg_strn("d", "delta", "", 0, 1, - "Specify one or more deltas. [Default: 8]"); - malloc_argtable[13] = delta_gather = - arg_strn("x", "delta gather", "", 0, 1, - "Specify one or more deltas. [Default: 8]"); - malloc_argtable[14] = delta_scatter = - arg_strn("y", "delta scatter", "", 0, 1, - "Specify one or more deltas. [Default: 8]"); - malloc_argtable[15] = count = arg_intn( - "l", "count", "", 0, 1, "Number of Gathers or Scatters to perform."); - malloc_argtable[16] = wrap = - arg_intn("w", "wrap", "", 0, 1, - "Number of independent slots in the small buffer (source buffer " - "if Scatter, Target buffer if Gather. [Default: 1]"); - malloc_argtable[17] = runs = arg_intn( - "R", "runs", "", 0, 1, - "Number of times to repeat execution of the kernel. [Default: 10]"); - malloc_argtable[18] = omp_threads = - arg_intn("t", "omp-threads", "", 0, 1, - "Number of OpenMP threads. [Default: OMP_MAX_THREADS]"); - malloc_argtable[19] = vector_len = - arg_intn("v", "vector-len", "", 0, 1, "TODO"); - malloc_argtable[20] = local_work_size = arg_intn( - "z", "local-work-size", "", 0, 1, - "Numer of Gathers or Scatters performed by each thread on a GPU."); - malloc_argtable[21] = shared_memory = - arg_intn("m", "shared-memory", "", 0, 1, - "Amount of dummy shared memory to allocate on GPUs (used for " - "occupancy control)."); - malloc_argtable[22] = name = - arg_strn("n", "name", "", 0, 1, - "Specify and name this configuration in the output."); - malloc_argtable[23] = random_arg = - arg_intn("s", "random", "", 0, 1, - "Sets the seed, or uses a random one if no seed is specified."); - malloc_argtable[24] = backend_arg = - arg_strn("b", "backend", "", 0, 1, - "Specify a backend: OpenCL, OpenMP, CUDA, or Serial."); - malloc_argtable[25] = cl_platform = arg_strn( - NULL, "cl-platform", "", 0, 1, - "Specify platform if using OpenCL (case-insensitive, fuzzy matching)."); - malloc_argtable[26] = cl_device = arg_strn( - NULL, "cl-device", "", 0, 1, - "Specify device if using OpenCL (case-insensitive, fuzzy matching)."); - malloc_argtable[27] = kernelFile = - arg_filen("f", "kernel-file", "", 0, 1, - "Specify the location of an OpenCL kernel file."); - // Other Configurations - malloc_argtable[28] = morton = arg_intn(NULL, "morton", "", 0, 1, "TODO"); - malloc_argtable[29] = hilbert = - arg_intn(NULL, "hilbert", "", 0, 1, "TODO"); - malloc_argtable[30] = roblock = - arg_intn(NULL, "roblock", "", 0, 1, "TODO"); - malloc_argtable[31] = stride = arg_intn(NULL, "stride", "", 0, 1, "TODO"); - malloc_argtable[32] = papi = arg_strn(NULL, "papi", "", 0, 1, "TODO"); - malloc_argtable[33] = end = arg_end(20); - - // Random has an option to provide an argument. Default its value to -1. - random_arg->hdr.flag |= ARG_HASOPTVALUE; - random_arg->ival[0] = -1; - - // Set default values - kernelName->sval[0] = "Gather\0"; - delta->sval[0] = "8\0"; - delta_gather->sval[0] = "8\0"; - delta_scatter->sval[0] = "8\0"; - wrap->ival[0] = 1; - runs->ival[0] = 10; - - // Set the global argtable equal to the malloc argtable - argtable = malloc_argtable; +void initialize_argtable() +{ + // Initialize the argtable on the stack just because it is easier and how the documentation handles it + void** malloc_argtable = (void**) malloc(sizeof(void*) * number_of_arguments); + + // Arguments that do not take parameters + malloc_argtable[0] = help = arg_litn(NULL, "help", 0, 1, "Displays info about commands and then exits."); + malloc_argtable[1] = verb = arg_litn(NULL, "verbose", 0, 1, "Print info about default arguments that you have not overridden."); + malloc_argtable[2] = no_print_header = arg_intn("q", "no-print-header", "", 0, 1, "Do not print header information."); + malloc_argtable[3] = interactive = arg_litn("i", "interactive", 0, 1, "Pick the platform and the device interactively."); + malloc_argtable[4] = validate = arg_litn(NULL, "validate", 0, 1, "Perform extra validation checks to ensure data validity"); + malloc_argtable[5] = aggregate = arg_litn("a", "aggregate", 0, 1, "Report a minimum time for all runs of a given configuration for 2 or more runs. [Default 1] (Do not use with PAPI)"); + malloc_argtable[6] = compress = arg_litn("c", "compress", 0, 1, "TODO"); + // Benchmark Configuration + malloc_argtable[7] = pattern = arg_strn("p", "pattern", "", 0, 1, "Specify either a built-in pattern (i.e. UNIFORM), a custom pattern (i.e. 1,2,3,4), or a path to a json file with a run-configuration."); + malloc_argtable[8] = pattern_gather = arg_strn("g", "pattern-gather", "", 0, 1, "Valid wtih [kernel-name: GS, MultiGather]. Specify either a built-in pattern (i.e. UNIFORM), a custom pattern (i.e. 1,2,3,4), or a path to a json file with a run-configuration."); + malloc_argtable[9] = pattern_scatter = arg_strn("h", "pattern-scatter", "", 0, 1, "Valid with [kernel-name: GS, MultiScatter]. Specify either a built-in pattern (i.e. UNIFORM), a custom pattern (i.e. 1,2,3,4), or a path to a json file with a run-configuration."); + malloc_argtable[10] = kernelName = arg_strn("k", "kernel-name", "", 0, 1, "Specify the kernel you want to run. [Default: Gather, Options: Gather, Scatter, GS, MultiGather, MultiScatter]"); + malloc_argtable[11] = op = arg_strn("o", "op", "", 0, 1, "TODO"); + malloc_argtable[12] = delta = arg_strn("d", "delta", "", 0, 1, "Specify one or more deltas. [Default: 8]"); + malloc_argtable[13] = delta_gather = arg_strn("x", "delta-gather", "", 0, 1, "Specify one or more deltas. [Default: 8]"); + malloc_argtable[14] = delta_scatter = arg_strn("y", "delta-scatter", "", 0, 1, "Specify one or more deltas. [Default: 8]"); + malloc_argtable[15] = boundary = arg_intn("e", "boundary", "", 0, 1, "Specify the boundary to mod pattern indices with to limit data array size."); + malloc_argtable[16] = pattern_size = arg_intn("j", "pattern-size", "", 0, 1, "Valid with [kernel-name: Gather, Scatter] and custom patterns (i.e. not UNIFORM, MS1, LAPLACIAN, etc.). Size of Gather/Scatter pattern. Pattern will be truncated to size if used."); + malloc_argtable[17] = count = arg_intn("l", "count", "", 0, 1, "Number of Gathers or Scatters to perform."); + malloc_argtable[18] = wrap = arg_intn("w", "wrap", "", 0, 1, "Number of independent slots in the small buffer (source buffer if Scatter, Target buffer if Gather. [Default: 1]"); + malloc_argtable[19] = runs = arg_intn("R", "runs", "", 0, 1, "Number of times to repeat execution of the kernel. [Default: 10]"); + malloc_argtable[20] = omp_threads = arg_intn("t", "omp-threads", "", 0, 1, "Number of OpenMP threads. [Default: OMP_MAX_THREADS]"); + malloc_argtable[21] = vector_len = arg_intn("v", "vector-len", "", 0, 1, "TODO"); + malloc_argtable[22] = local_work_size = arg_intn("z", "local-work-size", "", 0, 1, "Numer of Gathers or Scatters performed by each thread on a GPU."); + malloc_argtable[23] = shared_memory = arg_intn("m", "shared-memory", "", 0, 1, "Amount of dummy shared memory to allocate on GPUs (used for occupancy control)."); + malloc_argtable[24] = name = arg_strn("n", "name", "", 0, 1, "Specify and name this configuration in the output."); + malloc_argtable[25] = random_arg = arg_intn("s", "random", "", 0, 1, "Sets the seed, or uses a random one if no seed is specified."); + malloc_argtable[26] = backend_arg = arg_strn("b", "backend", "", 0, 1, "Specify a backend: OpenCL, OpenMP, CUDA, or Serial."); + malloc_argtable[27] = cl_platform = arg_strn(NULL, "cl-platform", "", 0, 1, "Specify platform if using OpenCL (case-insensitive, fuzzy matching)."); + malloc_argtable[28] = cl_device = arg_strn(NULL, "cl-device", "", 0, 1, "Specify device if using OpenCL (case-insensitive, fuzzy matching)."); + malloc_argtable[29] = kernelFile = arg_filen("f", "kernel-file", "", 0, 1, "Specify the location of an OpenCL kernel file."); + // Other Configurations + malloc_argtable[30] = morton = arg_intn(NULL, "morton", "", 0, 1, "TODO"); + malloc_argtable[31] = hilbert = arg_intn(NULL, "hilbert", "", 0, 1, "TODO"); + malloc_argtable[32] = roblock = arg_intn(NULL, "roblock", "", 0, 1, "TODO"); + malloc_argtable[33] = stride = arg_intn(NULL, "stride", "", 0, 1, "TODO"); + malloc_argtable[34] = papi = arg_strn(NULL, "papi", "", 0, 1, "TODO"); + malloc_argtable[35] = end = arg_end(20); + + // Random has an option to provide an argument. Default its value to -1. + random_arg->hdr.flag |= ARG_HASOPTVALUE; + random_arg->ival[0] = -1; + + // Set default values + kernelName->sval[0] = "Gather\0"; + delta->sval[0] = "8\0"; + delta_gather->sval[0] = "8\0"; + delta_scatter->sval[0] = "8\0"; + wrap->ival[0] = 1; + runs->ival[0] = 10; + + // Set the global argtable equal to the malloc argtable + argtable = malloc_argtable; } -void copy_str_ignore_leading_space(char *dest, const char *source) { - if (source[0] == ' ') - safestrcopy(dest, &source[1]); - else - safestrcopy(dest, source); + +void copy_str_ignore_leading_space(char* dest, const char* source) +{ + if (source[0] == ' ') + safestrcopy(dest, &source[1]); + else + safestrcopy(dest, source); } -int get_num_configs(json_value *value) { - if (value->type != json_array) { - error("get_num_configs was not passed an array", ERROR); - } +int get_num_configs(json_value* value) +{ + if (value->type != json_array) { + error("get_num_configs was not passed an array", ERROR); + } - return value->u.array.length; + return value->u.array.length; } -void parse_json_kernel(json_object_entry cur, char **argv, int i) { - if (!strcasecmp(cur.value->u.string.ptr, "SCATTER") || - !strcasecmp(cur.value->u.string.ptr, "GATHER") || - !strcasecmp(cur.value->u.string.ptr, "GS") || - !strcasecmp(cur.value->u.string.ptr, "MULTISCATTER") || - !strcasecmp(cur.value->u.string.ptr, "MULTIGATHER")) { - error("Ambiguous Kernel Type: Assuming kernel-name option.", WARN); - snprintf(argv[i + 1], STRING_SIZE, "--kernel-name=%s", - cur.value->u.string.ptr); - } else { - error("Ambigous Kernel Type: Assuming kernel-file option.", WARN); - snprintf(argv[i + 1], STRING_SIZE, "--kernel-file=%s", - cur.value->u.string.ptr); - } +void parse_json_kernel(json_object_entry cur, char** argv, int i) +{ + if (!strcasecmp(cur.value->u.string.ptr, "SCATTER") || !strcasecmp(cur.value->u.string.ptr, "GATHER") || !strcasecmp(cur.value->u.string.ptr, "GS") || !strcasecmp(cur.value->u.string.ptr, "MULTISCATTER") || !strcasecmp(cur.value->u.string.ptr, "MULTIGATHER")) + { + error("Ambiguous Kernel Type: Assuming kernel-name option.", WARN); + snprintf(argv[i+1], STRING_SIZE, "--kernel-name=%s", cur.value->u.string.ptr); + } + else + { + error("Ambigous Kernel Type: Assuming kernel-file option.", WARN); + snprintf(argv[i+1], STRING_SIZE, "--kernel-file=%s", cur.value->u.string.ptr); + } } -void parse_json_array(json_object_entry cur, char **argv, int i) { - int index = 0; - index += snprintf(argv[i + 1], STRING_SIZE, "--%s=", cur.name); - printf("argv[%d]: %s\n", i + 1, argv[i + 1]); +void parse_json_array(json_object_entry cur, char** argv, int i) +{ + int index = 0; + index += snprintf(argv[i+1], STRING_SIZE, "--%s=", cur.name); + printf("argv[%d]: %s\n", i+1, argv[i+1]); - for (int j = 0; j < cur.value->u.array.length; j++) { - if (cur.value->u.array.values[j]->type != json_integer) { - error("Encountered non-integer json type while parsing array", ERROR); - } + for (int j = 0; j < cur.value->u.array.length; j++) { + if (cur.value->u.array.values[j]->type != json_integer) { + error ("Encountered non-integer json type while parsing array", ERROR); + } - char buffer[STRING_SIZE]; - int check = snprintf(buffer, STRING_SIZE, "%zd", - cur.value->u.array.values[j]->u.integer); - int added = snprintf(buffer, STRING_SIZE - index, "%zd", - cur.value->u.array.values[j]->u.integer); + char buffer[STRING_SIZE]; + int check = snprintf(buffer, STRING_SIZE, "%zd", cur.value->u.array.values[j]->u.integer); + int added = snprintf(buffer, STRING_SIZE-index, "%zd", cur.value->u.array.values[j]->u.integer); - if (check == added) { - index += snprintf(&argv[i + 1][index], STRING_SIZE - index, "%zd", - cur.value->u.array.values[j]->u.integer); + if (check == added) { + index += snprintf(&argv[i+1][index], STRING_SIZE-index, "%zd", cur.value->u.array.values[j]->u.integer); - if (index >= STRING_SIZE - 1) { - break; - } else if (j != cur.value->u.array.length - 1 && - index < STRING_SIZE - 1) { - index += snprintf(&argv[i + 1][index], STRING_SIZE - index, ","); - } + if (index >= STRING_SIZE-1) { + break; + } else if (j != cur.value->u.array.length-1 && index < STRING_SIZE-1) { + index += snprintf(&argv[i+1][index], STRING_SIZE-index, ","); + } - } else { - index--; - argv[i + 1][index] = '\0'; - break; + } else { + index--; + argv[i+1][index] = '\0'; + break; + } } - } } -struct run_config *parse_json_config(json_value *value) { +struct run_config *parse_json_config(json_value *value) +{ - struct run_config *rc = - (struct run_config *)calloc(1, sizeof(struct run_config)); + struct run_config *rc = (struct run_config *)calloc(1,sizeof(struct run_config)); - if (!value) - error("parse_json_config passed NULL pointer", ERROR); - - if (value->type != json_object) - error("parse_json_config should only be passed json_objects", ERROR); + if (!value) + error ("parse_json_config passed NULL pointer", ERROR); - int argc = value->u.object.length + 1; - char **argv = (char **)sp_malloc(sizeof(char *), argc * 2, ALIGN_CACHE); + if (value->type != json_object) + error ("parse_json_config should only be passed json_objects", ERROR); - for (int i = 0; i < argc; i++) - argv[i] = (char *)sp_malloc(1, STRING_SIZE * 2, ALIGN_CACHE); + int argc = value->u.object.length + 1; + char **argv = (char **)sp_malloc(sizeof(char*), argc*2, ALIGN_CACHE); - for (int i = 0; i < argc - 1; i++) { - json_object_entry cur = value->u.object.values[i]; + for (int i = 0; i < argc; i++) + argv[i] = (char *)sp_malloc(1, STRING_SIZE*2, ALIGN_CACHE); - if (cur.value->type == json_string) { - if (!strcasecmp(cur.name, "kernel")) { - parse_json_kernel(cur, argv, i); - } else { - snprintf(argv[i + 1], STRING_SIZE, "--%s=%s", cur.name, - cur.value->u.string.ptr); - } - } else if (cur.value->type == json_integer) { - snprintf(argv[i + 1], STRING_SIZE, "--%s=%zd", cur.name, - cur.value->u.integer); - } else if (cur.value->type == json_array) { - parse_json_array(cur, argv, i); - } else { - error("Unexpected json type", ERROR); + for (int i = 0; i < argc-1; i++) + { + json_object_entry cur = value->u.object.values[i]; + + if (cur.value->type == json_string) + { + if (!strcasecmp(cur.name, "kernel")) + { + parse_json_kernel(cur, argv, i); + } + else + { + snprintf(argv[i+1], STRING_SIZE, "--%s=%s", cur.name, cur.value->u.string.ptr); + } + } + else if (cur.value->type == json_integer) + { + snprintf(argv[i+1], STRING_SIZE, "--%s=%zd", cur.name, cur.value->u.integer); + } + else if (cur.value->type == json_array) + { + parse_json_array(cur, argv, i); + } + else + { + error ("Unexpected json type", ERROR); + } } - } - // yeah its hacky - parse_args ignores the first arg - safestrcopy(argv[0], argv[1]); + //yeah its hacky - parse_args ignores the first arg + safestrcopy(argv[0], argv[1]); - int nerrors = arg_parse(argc, argv, argtable); + int nerrors = arg_parse(argc, argv, argtable); - if (nerrors > 0) { - arg_print_errors(stdout, end, "Spatter"); - printf("Error while parsing json file.\n"); - exit(0); - } + if (nerrors > 0) + { + arg_print_errors(stdout, end, "Spatter"); + printf("Error while parsing json file.\n"); + exit(0); + } - rc = parse_runs(argc, argv); + rc = parse_runs(argc, argv); - for (int i = 0; i < argc; i++) - free(argv[i]); + for (int i = 0; i < argc; i++) + free(argv[i]); - free(argv); + free(argv); - return rc; + return rc; } -void parse_args(int argc, char **argv, int *nrc, struct run_config **rc) { - initialize_argtable(); - int nerrors = arg_parse(argc, argv, argtable); +void parse_args(int argc, char **argv, int *nrc, struct run_config **rc) +{ + initialize_argtable(); + int nerrors = arg_parse(argc, argv, argtable); + + if (help->count > 0) + { + printf("Usage:\n"); + arg_print_syntax(stdout, argtable, "\n"); + arg_print_glossary(stdout, argtable, " %-28s %s\n"); + exit(0); + } - if (help->count > 0) { - printf("Usage:\n"); - arg_print_syntax(stdout, argtable, "\n"); - arg_print_glossary(stdout, argtable, " %-28s %s\n"); - exit(0); - } + if (nerrors > 0) + { + arg_print_errors(stdout, end, "Spatter"); + printf("Try './spatter --help' for more information.\n"); + exit(0); + } - if (nerrors > 0) { - arg_print_errors(stdout, end, "Spatter"); - printf("Try './spatter --help' for more information.\n"); - exit(0); - } + parse_backend(argc, argv); - parse_backend(argc, argv); - // Parse command-line arguments to in case of specified json file. - int json = 0; + // Parse command-line arguments to in case of specified json file. + int json = 0; - if (pattern->count > 0) { - if (strstr(pattern->sval[0], "FILE")) { - safestrcopy(jsonfilename, strchr(pattern->sval[0], '=') + 1); - printf("Reading patterns from %s.\n", jsonfilename); - json = 1; + if (pattern->count > 0) + { + if (strstr(pattern->sval[0], "FILE")) + { + safestrcopy(jsonfilename, strchr(pattern->sval[0], '=') + 1); + printf("Reading patterns from %s.\n", jsonfilename); + json = 1; + } } - } - if (json) { - FILE *fp; - struct stat filestatus; - int file_size; - char *file_contents; - json_char *json; - json_value *value; + if (json) + { + FILE *fp; + struct stat filestatus; + int file_size; + char *file_contents; + json_char *json; + json_value *value; + + if (stat(jsonfilename, &filestatus) != 0) + error ("Json file not found", ERROR); + + file_size = filestatus.st_size; + file_contents = (char *)sp_malloc(file_size, 1+1, ALIGN_CACHE); + + fp = fopen(jsonfilename, "rt"); + if (!fp) + error ("Unable to open Json file", ERROR); + + if (fread(file_contents, file_size, 1, fp) != 1) + { + fclose(fp); + error ("Unable to read content of Json file", ERROR); + } + fclose(fp); - if (stat(jsonfilename, &filestatus) != 0) - error("Json file not found", ERROR); + json = (json_char*)file_contents; + value = json_parse(json, file_size); - file_size = filestatus.st_size; - file_contents = (char *)sp_malloc(file_size, 1 + 1, ALIGN_CACHE); + if (!value) + error ("Unable to parse Json file", ERROR); - fp = fopen(jsonfilename, "rt"); - if (!fp) - error("Unable to open Json file", ERROR); + // This is the number of specified runs in the json file. + *nrc = get_num_configs(value); - if (fread(file_contents, file_size, 1, fp) != 1) { - fclose(fp); - error("Unable to read content of Json file", ERROR); - } - fclose(fp); + *rc = (struct run_config*)sp_calloc(sizeof(struct run_config), *nrc, ALIGN_CACHE); - json = (json_char *)file_contents; - value = json_parse(json, file_size); - if (!value) - error("Unable to parse Json file", ERROR); - - // This is the number of specified runs in the json file. - *nrc = get_num_configs(value); - - *rc = (struct run_config *)sp_calloc(sizeof(struct run_config), *nrc, - ALIGN_CACHE); + for (int i = 0; i < *nrc; i++){ + struct run_config *rctemp = parse_json_config(value->u.array.values[i]); + rc[0][i] = *rctemp; + free(rctemp); + } - for (int i = 0; i < *nrc; i++) { - struct run_config *rctemp = parse_json_config(value->u.array.values[i]); - rc[0][i] = *rctemp; - free(rctemp); + json_value_free(value); + free(file_contents); + } + else + { + *rc = (struct run_config*)sp_calloc(sizeof(struct run_config), 1, ALIGN_CACHE); + rc[0][0] = *parse_runs(argc, argv); + *nrc = 1; } - json_value_free(value); - free(file_contents); - } else { - *rc = (struct run_config *)sp_calloc(sizeof(struct run_config), 1, - ALIGN_CACHE); - rc[0][0] = *parse_runs(argc, argv); - *nrc = 1; - } - - free(argtable); + free(argtable); - return; + return; } -struct run_config *parse_runs(int argc, char **argv) { - int pattern_found = 0; - int pattern_scatter_found = 0; - int pattern_gather_found = 0; - - struct run_config *rc = - (struct run_config *)calloc(1, sizeof(struct run_config)); - rc->delta = -1; - rc->delta_gather = -1; - rc->delta_scatter = -1; - rc->stride_kernel = -1; - rc->ro_block = 1; - rc->ro_order = NULL; +struct run_config *parse_runs(int argc, char **argv) +{ + int pattern_found = 0; + int pattern_scatter_found = 0; + int pattern_gather_found = 0; + + struct run_config *rc = (struct run_config *)calloc(1, sizeof(struct run_config)); + rc->pattern_size = 0; + rc->delta = -1; + rc->delta_gather = -1; + rc->delta_scatter = -1; + rc->stride_kernel = -1; + rc->ro_block = 1; + rc->ro_order = NULL; #ifdef USE_OPENMP - rc->omp_threads = omp_get_max_threads(); + rc->omp_threads = omp_get_max_threads(); #else - rc->omp_threads = 1; + rc->omp_threads = 1; #endif - rc->kernel = INVALID_KERNEL; - safestrcopy(rc->name, "NONE"); - - if (kernelName->count > 0) { - copy_str_ignore_leading_space(kernel_name, kernelName->sval[0]); - if (!strcasecmp("MULTISCATTER", kernel_name)) - rc->kernel = MULTISCATTER; - else if (!strcasecmp("MULTIGATHER", kernel_name)) - rc->kernel = MULTIGATHER; - else if (!strcasecmp("GS", kernel_name)) - rc->kernel = GS; - else if (!strcasecmp("SCATTER", kernel_name)) - rc->kernel = SCATTER; - else if (!strcasecmp("GATHER", kernel_name)) - rc->kernel = GATHER; - else { - char output[STRING_SIZE]; - sprintf(output, "Invalid kernel %s\n", kernel_name); - error(output, ERROR); - } - } - - if (op->count > 0) { - copy_str_ignore_leading_space(op_string, op->sval[0]); - if (!strcasecmp("COPY", op_string)) - rc->op = OP_COPY; - else if (!strcasecmp("ACCUM", op_string)) - rc->op = OP_ACCUM; - else - error("Unrecognzied op type", ERROR); - } - - if (random_arg->count > 0) { - // Parsing the seed parameter - // If no argument was passed, use the current time in seconds since the - // epoch as the random seed - if (random_arg->ival[0] == -1) - rc->random_seed = time(NULL); - else - // sscanf(optarg, "%zu", rc->random_seed); - rc->random_seed = random_arg->ival[0]; - } - - if (omp_threads->count > 0) - rc->omp_threads = omp_threads->ival[0]; - - if (vector_len->count > 0) { - rc->vector_len = vector_len->ival[0]; - if (rc->vector_len < 1) - error("Invalid vector len!", ERROR); - } - - if (runs->count > 0) - rc->nruns = runs->ival[0]; - - if (wrap->count > 0) - rc->wrap = wrap->ival[0]; - - if (count->count > 0) - rc->generic_len = count->ival[0]; - - if (local_work_size->count > 0) - rc->local_work_size = local_work_size->ival[0]; - - if (shared_memory->count > 0) - rc->shmem = shared_memory->ival[0]; - - if (name->count > 0) - copy_str_ignore_leading_space(rc->name, name->sval[0]); - - if (pattern->count > 0) { - copy_str_ignore_leading_space(rc->generator, pattern->sval[0]); - // char* filePtr = strstr(rc->generator, "FILE"); - // if (filePtr) - // safestrcopy(rc->generator, filePtr); - parse_p(rc->generator, rc, 0); - pattern_found = 1; - } - - if (pattern_gather->count > 0) { - copy_str_ignore_leading_space(rc->generator, pattern_gather->sval[0]); - parse_p(rc->generator, rc, 1); - pattern_gather_found = 1; - } - - if (pattern_scatter->count > 0) { - copy_str_ignore_leading_space(rc->generator, pattern_scatter->sval[0]); - parse_p(rc->generator, rc, 2); - pattern_scatter_found = 1; - } - - if (delta->count > 0) { - char delta_temp[STRING_SIZE]; - copy_str_ignore_leading_space(delta_temp, delta->sval[0]); - char *delim = ","; - char *ptr = strtok(delta_temp, delim); - if (!ptr) - error("Pattern not found", ERROR); - - spIdx_t *mydeltas; - spIdx_t *mydeltas_ps; - - mydeltas = sp_malloc(sizeof(size_t), MAX_PATTERN_LEN, ALIGN_CACHE); - mydeltas_ps = sp_malloc(sizeof(size_t), MAX_PATTERN_LEN, ALIGN_CACHE); - - size_t read = 0; - if (sscanf(ptr, "%zu", &(mydeltas[read++])) < 1) - error("Failed to parse first pattern element in deltas", ERROR); - - while ((ptr = strtok(NULL, delim)) && read < MAX_PATTERN_LEN) { - if (sscanf(ptr, "%zu", &(mydeltas[read++])) < 1) - error("Failed to parse pattern", ERROR); - } - - rc->deltas = mydeltas; - rc->deltas_ps = mydeltas_ps; - rc->deltas_len = read; - - // rotate - for (size_t i = 0; i < rc->deltas_len; i++) - rc->deltas_ps[i] = - rc->deltas[((i - 1) + rc->deltas_len) % rc->deltas_len]; - - // compute prefix-sum - for (size_t i = 1; i < rc->deltas_len; i++) - rc->deltas_ps[i] += rc->deltas_ps[i - 1]; - - // compute max - size_t m = rc->deltas_ps[0]; - for (size_t i = 1; i < rc->deltas_len; i++) { - if (rc->deltas_ps[i] > m) - m = rc->deltas_ps[i]; - } - rc->delta = m; - } - - if (delta_gather->count > 0) { - char delta_gather_temp[STRING_SIZE]; - copy_str_ignore_leading_space(delta_gather_temp, delta_gather->sval[0]); - char *delim_gather = ","; - char *ptr_gather = strtok(delta_gather_temp, delim_gather); - if (!ptr_gather) - error("Pattern not found", ERROR); - - spIdx_t *mydeltas_gather; - spIdx_t *mydeltas_gather_ps; - - mydeltas_gather = sp_malloc(sizeof(size_t), MAX_PATTERN_LEN, ALIGN_CACHE); - mydeltas_gather_ps = - sp_malloc(sizeof(size_t), MAX_PATTERN_LEN, ALIGN_CACHE); - - size_t read_gather = 0; - if (sscanf(ptr_gather, "%zu", &(mydeltas_gather[read_gather++])) < 1) - error("Failed to parse first pattern element in deltas", ERROR); - - while ((ptr_gather = strtok(NULL, delim_gather)) && - read_gather < MAX_PATTERN_LEN) { - if (sscanf(ptr_gather, "%zu", &(mydeltas_gather[read_gather++])) < 1) - error("Failed to parse pattern", ERROR); - } - - rc->deltas_gather = mydeltas_gather; - rc->deltas_gather_ps = mydeltas_gather_ps; - rc->deltas_gather_len = read_gather; - - // rotate - for (size_t i = 0; i < rc->deltas_gather_len; i++) - rc->deltas_gather_ps[i] = - rc->deltas_gather[((i - 1) + rc->deltas_gather_len) % - rc->deltas_gather_len]; - - // compute prefix-sum - for (size_t i = 1; i < rc->deltas_gather_len; i++) - rc->deltas_gather_ps[i] += rc->deltas_gather_ps[i - 1]; - - // compute max - size_t m = rc->deltas_gather_ps[0]; - for (size_t i = 1; i < rc->deltas_gather_len; i++) { - if (rc->deltas_gather_ps[i] > m) - m = rc->deltas_gather_ps[i]; - } - rc->delta_gather = m; - } - - if (delta_scatter->count > 0) { - char delta_scatter_temp[STRING_SIZE]; - copy_str_ignore_leading_space(delta_scatter_temp, delta_scatter->sval[0]); - char *delim_scatter = ","; - char *ptr_scatter = strtok(delta_scatter_temp, delim_scatter); - if (!ptr_scatter) - error("Pattern not found", ERROR); - - spIdx_t *mydeltas_scatter; - spIdx_t *mydeltas_scatter_ps; - - mydeltas_scatter = sp_malloc(sizeof(size_t), MAX_PATTERN_LEN, ALIGN_CACHE); - mydeltas_scatter_ps = - sp_malloc(sizeof(size_t), MAX_PATTERN_LEN, ALIGN_CACHE); - - size_t read_scatter = 0; - if (sscanf(ptr_scatter, "%zu", &(mydeltas_scatter[read_scatter++])) < 1) - error("Failed to parse first pattern element in deltas", ERROR); - - while ((ptr_scatter = strtok(NULL, delim_scatter)) && - read_scatter < MAX_PATTERN_LEN) { - if (sscanf(ptr_scatter, "%zu", &(mydeltas_scatter[read_scatter++])) < 1) - error("Failed to parse pattern", ERROR); - } - - rc->deltas_scatter = mydeltas_scatter; - rc->deltas_scatter_ps = mydeltas_scatter_ps; - rc->deltas_scatter_len = read_scatter; - - // rotate - for (size_t i = 0; i < rc->deltas_scatter_len; i++) - rc->deltas_scatter_ps[i] = - rc->deltas_scatter[((i - 1) + rc->deltas_scatter_len) % - rc->deltas_scatter_len]; - - // compute prefix-sum - for (size_t i = 1; i < rc->deltas_scatter_len; i++) - rc->deltas_scatter_ps[i] += rc->deltas_scatter_ps[i - 1]; - - // compute max - size_t m = rc->deltas_scatter_ps[0]; - for (size_t i = 1; i < rc->deltas_scatter_len; i++) { - if (rc->deltas_scatter_ps[i] > m) - m = rc->deltas_scatter_ps[i]; - } - rc->delta_scatter = m; - } - - if (morton->count > 0) - rc->ro_morton = morton->ival[0]; - - if (hilbert->count > 0) - rc->ro_hilbert = hilbert->ival[0]; - - if (roblock->count > 0) - rc->ro_block = roblock->ival[0]; - - if (stride->count > 0) - rc->stride_kernel = stride->ival[0]; - - // VALIDATE ARGUMENTS - if (rc->kernel != GS && !pattern_found) - error("Please specify a pattern", ERROR); - - if ((rc->kernel == MULTISCATTER && !pattern_scatter_found) || - (rc->kernel == MULTISCATTER && !pattern_found)) - error("Please specify an inner scatter pattern (scatter pattern -h) and an " - "outer scatter pattern (pattern -p", - ERROR); - - if ((rc->kernel == MULTIGATHER && !pattern_gather_found) || - (rc->kernel == MULTIGATHER && !pattern_found)) - error("Please specify an inner gather pattern (gather pattern -g) and an " - "outer gather pattern (pattern -p", - ERROR); - - if ((rc->kernel == GS && !pattern_scatter_found) || - (rc->kernel == GS && !pattern_gather_found)) - error("Please specify a gather pattern and a scatter pattern for an GS " - "kernel", - ERROR); - - if (rc->kernel == GS && (rc->pattern_gather_len != rc->pattern_scatter_len)) - error("Gather pattern and scatter pattern must have the same length", - ERROR); - - if (rc->vector_len == 0) { - error("Vector length not set. Default is 1", WARN); - rc->vector_len = 1; - } - - if (rc->wrap == 0) { - error("length of smallbuf not specified. Default is 1 (slot of size " - "pattern_len elements)", - WARN); - rc->wrap = 1; - } - - if (rc->nruns == 0) { - error("Number of runs not specified. Default is 10 ", WARN); - rc->nruns = 10; - } - - if (rc->generic_len == 0) { - error("Length not specified. Default is 1024 (gathers/scatters)", WARN); - rc->generic_len = 1024; - } - - if (rc->kernel == INVALID_KERNEL) { - error("Kernel unspecified, guess GATHER", WARN); - rc->kernel = GATHER; - safestrcopy(kernel_name, "gather"); - } - - if (rc->kernel == SCATTER) - sprintf(kernel_name, "%s%zu", "scatter", rc->vector_len); - else if (rc->kernel == GATHER) - sprintf(kernel_name, "%s%zu", "gather", rc->vector_len); - else if (rc->kernel == GS) - sprintf(kernel_name, "%s%zu", "sg", rc->vector_len); - else if (rc->kernel == MULTISCATTER) - sprintf(kernel_name, "%s%zu", "multiscatter", rc->vector_len); - else if (rc->kernel == MULTIGATHER) - sprintf(kernel_name, "%s%zu", "multigather", rc->vector_len); - - if (pattern_found) { - if (rc->delta <= -1) { - error("delta not specified, default is 8\n", WARN); - rc->delta = 8; - rc->deltas_len = 1; - } - } - - if (pattern_gather_found) { - if (rc->delta_gather <= -1) { - error("delta gather not specified, default is 8\n", WARN); - rc->delta_gather = 8; - rc->deltas_gather_len = 1; - } - } - - if (pattern_scatter_found) { - if (rc->delta_scatter <= -1) { - error("delta scatter not specified, default is 8\n", WARN); - rc->delta_scatter = 8; - rc->deltas_scatter_len = 1; - } - } - - if (rc->op != OP_COPY) - error("OP must be OP_COPY", WARN); - - if (!strcasecmp(rc->name, "NONE")) { - if (rc->type != CUSTOM) - safestrcopy(rc->name, rc->generator); + rc->kernel = INVALID_KERNEL; + safestrcopy(rc->name,"NONE"); + + if (kernelName->count > 0) + { + copy_str_ignore_leading_space(kernel_name, kernelName->sval[0]); + if (!strcasecmp("MULTISCATTER", kernel_name)) + rc->kernel = MULTISCATTER; + else if (!strcasecmp("MULTIGATHER", kernel_name)) + rc->kernel = MULTIGATHER; + else if (!strcasecmp("GS", kernel_name)) + rc->kernel=GS; + else if (!strcasecmp("SCATTER", kernel_name)) + rc->kernel=SCATTER; + else if (!strcasecmp("GATHER", kernel_name)) + rc->kernel=GATHER; + else + { + char output[STRING_SIZE]; + sprintf(output, "Invalid kernel %s\n", kernel_name); + error(output, ERROR); + } + } + + if (op->count > 0) + { + copy_str_ignore_leading_space(op_string, op->sval[0]); + if (!strcasecmp("COPY", op_string)) + rc->op = OP_COPY; + else if (!strcasecmp("ACCUM", op_string)) + rc->op = OP_ACCUM; + else + error("Unrecognzied op type", ERROR); + } + + if (random_arg->count > 0) + { + // Parsing the seed parameter + // If no argument was passed, use the current time in seconds since the epoch as the random seed + if (random_arg->ival[0] == -1) + rc->random_seed = time(NULL); + else + //sscanf(optarg, "%zu", rc->random_seed); + rc->random_seed = random_arg->ival[0]; + } + + if (omp_threads->count > 0) + rc->omp_threads = omp_threads->ival[0]; + + if (vector_len->count > 0) + { + rc->vector_len = vector_len->ival[0]; + if (rc->vector_len < 1) + error("Invalid vector len!", ERROR); + } + + if (runs->count > 0) + rc->nruns = runs->ival[0]; + + if (wrap->count > 0) + rc->wrap = wrap->ival[0]; + + if (boundary->count > 0) + rc->boundary = (spIdx_t)boundary->ival[0]; else - safestrcopy(rc->name, "CUSTOM"); - } + rc->boundary = -1; + + if (pattern_size->count > 0) + rc->pattern_size = pattern_size->ival[0]; + + if (count->count > 0) + rc->generic_len = count->ival[0]; + + if (local_work_size->count > 0) + rc->local_work_size = local_work_size->ival[0]; + + if (shared_memory->count > 0) + rc->shmem = shared_memory->ival[0]; + + if (name->count > 0) + copy_str_ignore_leading_space(rc->name, name->sval[0]); + + if (pattern->count > 0) + { + copy_str_ignore_leading_space(rc->generator, pattern->sval[0]); + //char* filePtr = strstr(rc->generator, "FILE"); + //if (filePtr) + // safestrcopy(rc->generator, filePtr); + parse_p(rc->generator, rc, 0); + pattern_found = 1; + } + + if (pattern_gather->count > 0) + { + copy_str_ignore_leading_space(rc->generator, pattern_gather->sval[0]); + parse_p(rc->generator, rc, 1); + pattern_gather_found = 1; + } + + if (pattern_scatter->count > 0) + { + copy_str_ignore_leading_space(rc->generator, pattern_scatter->sval[0]); + parse_p(rc->generator, rc, 2); + pattern_scatter_found = 1; + } + + if (delta->count > 0) + { + char delta_temp[STRING_SIZE]; + copy_str_ignore_leading_space(delta_temp, delta->sval[0]); + char *delim = ","; + char *ptr = strtok(delta_temp, delim); + if (!ptr) + error("Pattern not found", ERROR); + + spIdx_t *mydeltas; + spIdx_t *mydeltas_ps; + + mydeltas = sp_malloc(sizeof(size_t), MAX_PATTERN_LEN, ALIGN_CACHE); + mydeltas_ps = sp_malloc(sizeof(size_t), MAX_PATTERN_LEN, ALIGN_CACHE); + + size_t read = 0; + if (sscanf(ptr, "%zu", &(mydeltas[read++])) < 1) + error("Failed to parse first pattern element in deltas", ERROR); + + while ((ptr = strtok(NULL, delim)) && read < MAX_PATTERN_LEN) + { + if (sscanf(ptr, "%zu", &(mydeltas[read++])) < 1) + error("Failed to parse pattern", ERROR); + } + + rc->deltas = mydeltas; + rc->deltas_ps = mydeltas_ps; + rc->deltas_len = read; + + // rotate + for (size_t i = 0; i < rc->deltas_len; i++) + rc->deltas_ps[i] = rc->deltas[((i-1)+rc->deltas_len)%rc->deltas_len]; + + // compute prefix-sum + for (size_t i = 1; i < rc->deltas_len; i++) + rc->deltas_ps[i] += rc->deltas_ps[i-1]; + + // compute max + size_t m = rc->deltas_ps[0]; + for (size_t i = 1; i < rc->deltas_len; i++) + { + if (rc->deltas_ps[i] > m) + m = rc->deltas_ps[i]; + } + rc->delta = m; + } + + if (delta_gather->count > 0) + { + char delta_gather_temp[STRING_SIZE]; + copy_str_ignore_leading_space(delta_gather_temp, delta_gather->sval[0]); + char *delim_gather = ","; + char *ptr_gather = strtok(delta_gather_temp, delim_gather); + if (!ptr_gather) + error("Pattern not found", ERROR); + + spIdx_t *mydeltas_gather; + spIdx_t *mydeltas_gather_ps; + + mydeltas_gather = sp_malloc(sizeof(size_t), MAX_PATTERN_LEN, ALIGN_CACHE); + mydeltas_gather_ps = sp_malloc(sizeof(size_t), MAX_PATTERN_LEN, ALIGN_CACHE); + + size_t read_gather = 0; + if (sscanf(ptr_gather, "%zu", &(mydeltas_gather[read_gather++])) < 1) + error("Failed to parse first pattern element in deltas", ERROR); + + while ((ptr_gather = strtok(NULL, delim_gather)) && read_gather < MAX_PATTERN_LEN) + { + if (sscanf(ptr_gather, "%zu", &(mydeltas_gather[read_gather++])) < 1) + error("Failed to parse pattern", ERROR); + } + + rc->deltas_gather = mydeltas_gather; + rc->deltas_gather_ps = mydeltas_gather_ps; + rc->deltas_gather_len = read_gather; + + // rotate + for (size_t i = 0; i < rc->deltas_gather_len; i++) + rc->deltas_gather_ps[i] = rc->deltas_gather[((i-1)+rc->deltas_gather_len)%rc->deltas_gather_len]; + + // compute prefix-sum + for (size_t i = 1; i < rc->deltas_gather_len; i++) + rc->deltas_gather_ps[i] += rc->deltas_gather_ps[i-1]; + + // compute max + size_t m = rc->deltas_gather_ps[0]; + for (size_t i = 1; i < rc->deltas_gather_len; i++) + { + if (rc->deltas_gather_ps[i] > m) + m = rc->deltas_gather_ps[i]; + } + rc->delta_gather = m; + } + + if (delta_scatter->count > 0) + { + char delta_scatter_temp[STRING_SIZE]; + copy_str_ignore_leading_space(delta_scatter_temp, delta_scatter->sval[0]); + char *delim_scatter = ","; + char *ptr_scatter = strtok(delta_scatter_temp, delim_scatter); + if (!ptr_scatter) + error("Pattern not found", ERROR); + + spIdx_t *mydeltas_scatter; + spIdx_t *mydeltas_scatter_ps; + + mydeltas_scatter = sp_malloc(sizeof(size_t), MAX_PATTERN_LEN, ALIGN_CACHE); + mydeltas_scatter_ps = sp_malloc(sizeof(size_t), MAX_PATTERN_LEN, ALIGN_CACHE); + + size_t read_scatter = 0; + if (sscanf(ptr_scatter, "%zu", &(mydeltas_scatter[read_scatter++])) < 1) + error("Failed to parse first pattern element in deltas", ERROR); + + while ((ptr_scatter = strtok(NULL, delim_scatter)) && read_scatter < MAX_PATTERN_LEN) + { + if (sscanf(ptr_scatter, "%zu", &(mydeltas_scatter[read_scatter++])) < 1) + error("Failed to parse pattern", ERROR); + } + + rc->deltas_scatter = mydeltas_scatter; + rc->deltas_scatter_ps = mydeltas_scatter_ps; + rc->deltas_scatter_len = read_scatter; + + // rotate + for (size_t i = 0; i < rc->deltas_scatter_len; i++) + rc->deltas_scatter_ps[i] = rc->deltas_scatter[((i-1)+rc->deltas_scatter_len)%rc->deltas_scatter_len]; + + // compute prefix-sum + for (size_t i = 1; i < rc->deltas_scatter_len; i++) + rc->deltas_scatter_ps[i] += rc->deltas_scatter_ps[i-1]; + + // compute max + size_t m = rc->deltas_scatter_ps[0]; + for (size_t i = 1; i < rc->deltas_scatter_len; i++) + { + if (rc->deltas_scatter_ps[i] > m) + m = rc->deltas_scatter_ps[i]; + } + rc->delta_scatter = m; + } + + + if (morton->count > 0) + rc->ro_morton = morton->ival[0]; + + if (hilbert->count > 0) + rc->ro_hilbert = hilbert->ival[0]; + + if (roblock->count > 0) + rc->ro_block = roblock->ival[0]; + + if (stride->count > 0) + rc->stride_kernel = stride->ival[0]; + + // VALIDATE ARGUMENTS + if (rc->kernel != GS && !pattern_found) + error ("Please specify a pattern", ERROR); + + if ((rc->kernel == MULTISCATTER && !pattern_scatter_found) || (rc->kernel == MULTISCATTER && !pattern_found)) + error ("Please specify an inner scatter pattern (scatter pattern -h) and an outer scatter pattern (pattern -p", ERROR); + + if ((rc->kernel == MULTIGATHER && !pattern_gather_found) || (rc->kernel == MULTIGATHER && !pattern_found)) + error ("Please specify an inner gather pattern (gather pattern -g) and an outer gather pattern (pattern -p", ERROR); + + if ((rc->kernel == GS && !pattern_scatter_found) || (rc->kernel == GS && !pattern_gather_found)) + error ("Please specify a gather pattern and a scatter pattern for an GS kernel", ERROR); + + if (rc->kernel == GS && (rc->pattern_gather_len != rc->pattern_scatter_len)) + error ("Gather pattern and scatter pattern must have the same length", ERROR); + + if (rc->vector_len == 0) + { + error ("Vector length not set. Default is 1", WARN); + rc->vector_len = 1; + } + + if (rc->wrap == 0) + { + error ("length of smallbuf not specified. Default is 1 (slot of size pattern_len elements)", WARN); + rc->wrap = 1; + } + + if (rc->nruns == 0) + { + error ("Number of runs not specified. Default is 10 ", WARN); + rc->nruns = 10; + } + + if (rc->generic_len == 0) + { + error ("Length not specified. Default is 1024 (gathers/scatters)", WARN); + rc->generic_len = 1024; + } + + if (rc->kernel == INVALID_KERNEL) + { + error("Kernel unspecified, guess GATHER", WARN); + rc->kernel = GATHER; + safestrcopy(kernel_name, "gather"); + } + + if (rc->kernel == SCATTER) + sprintf(kernel_name, "%s%zu", "scatter", rc->vector_len); + else if (rc->kernel == GATHER) + sprintf(kernel_name, "%s%zu", "gather", rc->vector_len); + else if (rc->kernel == GS) + sprintf(kernel_name, "%s%zu", "sg", rc->vector_len); + else if (rc->kernel == MULTISCATTER) + sprintf(kernel_name, "%s%zu", "multiscatter", rc->vector_len); + else if (rc->kernel == MULTIGATHER) + sprintf(kernel_name, "%s%zu", "multigather", rc->vector_len); + + if (pattern_found) + { + if (rc->delta <= -1) + { + error("delta not specified, default is 8\n", WARN); + rc->delta = 8; + rc->deltas_len = 1; + } + } + + if (pattern_gather_found) + { + if (rc->delta_gather <= -1) + { + error("delta gather not specified, default is 8\n", WARN); + rc->delta_gather = 8; + rc->deltas_gather_len = 1; + } + } + + if (pattern_scatter_found) + { + if (rc->delta_scatter <= -1) + { + error("delta scatter not specified, default is 8\n", WARN); + rc->delta_scatter = 8; + rc->deltas_scatter_len = 1; + } + } + + if (rc->op != OP_COPY) + error("OP must be OP_COPY", WARN); + + if (!strcasecmp(rc->name, "NONE")) + { + if (rc->type != CUSTOM) + safestrcopy(rc->name, rc->generator); + else + safestrcopy(rc->name, "CUSTOM"); + } #ifdef USE_OPENMP - int max_threads = omp_get_max_threads(); - if (rc->omp_threads > max_threads) { - error("Too many OpenMP threads requested, using the max instead", WARN); - rc->omp_threads = max_threads; - } - if (rc->omp_threads == 0) { - error("Number of OpenMP threads not specified, using the max", WARN); - rc->omp_threads = max_threads; - } + int max_threads = omp_get_max_threads(); + if (rc->omp_threads > max_threads) + { + error ("Too many OpenMP threads requested, using the max instead", WARN); + rc->omp_threads = max_threads; + } + if (rc->omp_threads == 0) + { + error ("Number of OpenMP threads not specified, using the max", WARN); + rc->omp_threads = max_threads; + } #else - if (rc->omp_threads > 1) - error("Compiled without OpenMP support but requsted more than 1 thread, " - "using 1 instead", - WARN); + if (rc->omp_threads > 1) + error ("Compiled without OpenMP support but requsted more than 1 thread, using 1 instead", WARN); #endif #if defined USE_CUDA || defined USE_OPENCL - if (rc->local_work_size == 0) { - error("Local_work_size not set. Default is 1", WARN); - rc->local_work_size = 1; - } + if (rc->local_work_size == 0) + { + error ("Local_work_size not set. Default is 1", WARN); + rc->local_work_size = 1; + } #endif - return rc; + return rc; } ssize_t power(int base, int exp) { - int i, result = 1; - for (i = 0; i < exp; i++) - result *= base; - return result; + int i, result = 1; + for (i = 0; i < exp; i++) + result *= base; + return result; } // Yes, there is no need for recursion here but I did this in python first. I'll // update this later with a cleaner implementation -void static laplacian_branch(int depth, int order, int n, int **pos, - int *pos_len) { - *pos = (int *)realloc(*pos, ((*pos_len) + order) * sizeof(int)); +void static laplacian_branch(int depth, int order, int n, int **pos, int *pos_len) +{ + *pos = (int*)realloc(*pos, ((*pos_len)+order) * sizeof(int)); - for (int i = 0; i < order; i++) { - (*pos)[i + *pos_len] = (i + 1) * power(n, depth); - } + for (int i = 0; i < order; i++) { + (*pos)[i+*pos_len] = (i+1) * power(n, depth); + } - *pos_len += order; - return; + *pos_len += order; + return; } -void static laplacian(int dim, int order, int n, struct run_config *rc, - int mode) { - spIdx_t **pattern; - spSize_t *pattern_len; +void static laplacian(int dim, int order, int n, struct run_config *rc, int mode) +{ + ssize_t **pattern; + spSize_t *pattern_len; + + if (mode == 0) { // Normal pattern + pattern = &rc->pattern; + pattern_len = &rc->pattern_len; + } + else if (mode == 1) { // Gather pattern (GS Kernel) + pattern = &rc->pattern_gather; + pattern_len = &rc->pattern_gather_len; + } + else if (mode == 2) { // Scatter pattern (GS Kernel) + pattern = &rc->pattern_scatter; + pattern_len = &rc->pattern_scatter_len; + } + else { + printf("laplacian: invalid mode %d\n", mode); + exit(1); + } - if (mode == 0) { // Normal pattern - pattern = &rc->pattern; - pattern_len = &rc->pattern_len; - } else if (mode == 1) { // Gather pattern (GS Kernel) - pattern = &rc->pattern_gather; - pattern_len = &rc->pattern_gather_len; - } else if (mode == 2) { // Scatter pattern (GS Kernel) - pattern = &rc->pattern_scatter; - pattern_len = &rc->pattern_scatter_len; - } else { - printf("laplacian: invalid mode %d\n", mode); - exit(1); - } - if (dim < 1) { - error("laplacian: dim must be positive", ERROR); - } + if (dim < 1) { + error("laplacian: dim must be positive", ERROR); + } - int final_len = dim * order * 2 + 1; - if (final_len > MAX_PATTERN_LEN) { - error("laplacian: resulting pattern too long", ERROR); - } + int final_len = dim * order * 2 + 1; + if (final_len > MAX_PATTERN_LEN) { + error("laplacian: resulting pattern too long", ERROR); + } - int pos_len = 0; - int *pos = NULL; + int pos_len = 0; + int *pos = NULL; - for (int i = 0; i < dim; i++) { - laplacian_branch(i, order, n, &pos, &pos_len); - } + for (int i = 0; i < dim; i++) { + laplacian_branch(i, order, n, &pos, &pos_len); + } - *pattern_len = final_len; + *pattern_len = final_len; - *pattern = sp_calloc(sizeof(spIdx_t), *pattern_len, ALIGN_CACHE); + *pattern = sp_calloc(sizeof(spIdx_t), *pattern_len, ALIGN_CACHE); - int max = pos[pos_len - 1]; + int max = pos[pos_len-1]; - for (int i = 0; i < *pattern_len; i++) { - (*pattern)[i] = 2; - } + for (int i = 0; i < *pattern_len; i++) { + (*pattern)[i] = 2; + } - // populate rc->pattern - for (int i = 0; i < pos_len; i++) { - (*pattern)[i] = (-pos[pos_len - i - 1] + max); - } + //populate rc->pattern + for(int i = 0; i < pos_len; i++) { + (*pattern)[i] = (-pos[pos_len - i - 1] + max); + } - (*pattern)[pos_len] = max; + (*pattern)[pos_len] = max; - for (int i = 0; i < pos_len; i++) { - (*pattern)[pos_len + 1 + i] = pos[i] + max; - } + for(int i = 0; i < pos_len; i++) { + (*pattern)[pos_len+1+i] = pos[i] + max; + } - free(pos); - return; + free(pos); + return; } -void parse_backend(int argc, char **argv) { - err_file = stderr; - - safestrcopy(platform_string, "NONE"); - safestrcopy(device_string, "NONE"); - safestrcopy(kernel_file, "NONE"); - safestrcopy(kernel_name, "NONE"); - - if (backend_arg->count > 0) { - if (!strcasecmp("OPENCL", backend_arg->sval[0])) - backend = OPENCL; - else if (!strcasecmp("OPENMP", backend_arg->sval[0])) - backend = OPENMP; - else if (!strcasecmp("CUDA", backend_arg->sval[0])) - backend = CUDA; - else if (!strcasecmp("SERIAL", backend_arg->sval[0])) - backend = SERIAL; - else - error("Unrecognized Backend", ERROR); - } +void parse_backend(int argc, char **argv) +{ + err_file = stderr; + + safestrcopy(platform_string, "NONE"); + safestrcopy(device_string, "NONE"); + safestrcopy(kernel_file, "NONE"); + safestrcopy(kernel_name, "NONE"); + + if (backend_arg->count > 0) + { + if(!strcasecmp("OPENCL", backend_arg->sval[0])) + backend = OPENCL; + else if(!strcasecmp("OPENMP", backend_arg->sval[0])) + backend = OPENMP; + else if(!strcasecmp("CUDA", backend_arg->sval[0])) + backend = CUDA; + else if(!strcasecmp("SERIAL", backend_arg->sval[0])) + backend = SERIAL; + else + error ("Unrecognized Backend", ERROR); + } - if (cl_platform->count > 0) - copy_str_ignore_leading_space(platform_string, cl_platform->sval[0]); + if (cl_platform->count > 0) + copy_str_ignore_leading_space(platform_string, cl_platform->sval[0]); - if (cl_device->count > 0) - copy_str_ignore_leading_space(device_string, cl_device->sval[0]); + if (cl_device->count > 0) + copy_str_ignore_leading_space(device_string, cl_device->sval[0]); - if (interactive->count > 0) { - safestrcopy(platform_string, INTERACTIVE); - safestrcopy(device_string, INTERACTIVE); - } + if (interactive->count > 0) + { + safestrcopy(platform_string, INTERACTIVE); + safestrcopy(device_string, INTERACTIVE); + } - if (kernelFile->count > 0) - copy_str_ignore_leading_space(kernel_file, kernelFile->filename[0]); + if (kernelFile->count > 0) + copy_str_ignore_leading_space(kernel_file, kernelFile->filename[0]); - if (no_print_header->count > 0) - quiet_flag = no_print_header->ival[0]; + if (no_print_header->count > 0) + quiet_flag = no_print_header->ival[0]; - if (validate->count > 0) - validate_flag++; + if (validate->count > 0) + validate_flag++; - if (aggregate->count > 0) - aggregate_flag = 1; + if (aggregate->count > 0) + aggregate_flag = 1; - if (compress->count > 0) - compress_flag = 1; + if (compress->count > 0) + compress_flag = 1; - if (papi->count > 0) { -#ifdef USE_PAPI + if (papi->count > 0) { - char *pch = strtok(papi->sval[0], ","); - while (pch != NULL) { - safestrcopy(papi_event_names[papi_nevents++], pch); - pch = strtok(NULL, ","); - if (papi_nevents == PAPI_MAX_COUNTERS) - break; - } + #ifdef USE_PAPI + { + char *pch = strtok(papi->sval[0], ","); + while (pch != NULL) + { + safestrcopy(papi_event_names[papi_nevents++], pch); + pch = strtok (NULL, ","); + if (papi_nevents == PAPI_MAX_COUNTERS) + break; + } + } + #endif } -#endif - } - - /* Check argument coherency */ - if (backend == INVALID_BACKEND) { - if (sg_cuda_support()) { - backend = CUDA; - error("No backend specified, guessing CUDA", WARN); - } else if (sg_opencl_support()) { - backend = OPENCL; - error("No backend specified, guessing OpenCL", WARN); - } else if (sg_openmp_support()) { - backend = OPENMP; - error("No backend specified, guessing OpenMP", WARN); - } else if (sg_serial_support()) { - backend = SERIAL; - error("No backend specified, guessing Serial", WARN); - } else - error("No backends available! Please recompile spatter with at least one " - "backend.", - ERROR); - } - - // Check to see if they compiled with support for their requested backend - if (backend == OPENCL) { - if (!sg_opencl_support()) - error("You did not compile with support for OpenCL", ERROR); - } else if (backend == OPENMP) { - if (!sg_openmp_support()) - error("You did not compile with support for OpenMP", ERROR); - } else if (backend == CUDA) { - if (!sg_cuda_support()) - error("You did not compile with support for CUDA", ERROR); - } else if (backend == SERIAL) { - if (!sg_serial_support()) - error("You did not compile with support for serial execution", ERROR); - } - - if (backend == OPENCL) { - if (!strcasecmp(platform_string, "NONE")) { - safestrcopy(platform_string, INTERACTIVE); - safestrcopy(device_string, INTERACTIVE); - } - if (!strcasecmp(device_string, "NONE")) { - safestrcopy(platform_string, INTERACTIVE); - safestrcopy(device_string, INTERACTIVE); - } - } -#ifdef USE_CUDA - if (backend == CUDA) { - int dev = find_device_cuda(device_string); - if (dev == -1) { - error("Specified CUDA device not found or no device specified. Using " - "device 0", - WARN); - dev = 0; - } - cuda_dev = dev; - cudaSetDevice(dev); - } -#endif + /* Check argument coherency */ + if (backend == INVALID_BACKEND){ + if (sg_cuda_support()) + { + backend = CUDA; + error ("No backend specified, guessing CUDA", WARN); + } + else if (sg_opencl_support()) + { + backend = OPENCL; + error ("No backend specified, guessing OpenCL", WARN); + } + else if (sg_openmp_support()) + { + backend = OPENMP; + error ("No backend specified, guessing OpenMP", WARN); + } + else if (sg_serial_support()) + { + backend = SERIAL; + error ("No backend specified, guessing Serial", WARN); + } + else + error ("No backends available! Please recompile spatter with at least one backend.", ERROR); + } - if (!strcasecmp(kernel_file, "NONE") && backend == OPENCL) { - error("Kernel file unspecified, guessing kernels/kernels_vector.cl", WARN); - safestrcopy(kernel_file, "kernels/kernels_vector.cl"); - } + // Check to see if they compiled with support for their requested backend + if (backend == OPENCL) + { + if (!sg_opencl_support()) + error("You did not compile with support for OpenCL", ERROR); + } + else if (backend == OPENMP) + { + if (!sg_openmp_support()) + error("You did not compile with support for OpenMP", ERROR); + } + else if (backend == CUDA) + { + if (!sg_cuda_support()) + error("You did not compile with support for CUDA", ERROR); + } + else if (backend == SERIAL) + { + if (!sg_serial_support()) + error("You did not compile with support for serial execution", ERROR); + } - return; -} + if (backend == OPENCL) + { + if (!strcasecmp(platform_string, "NONE")) + { + safestrcopy(platform_string, INTERACTIVE); + safestrcopy(device_string, INTERACTIVE); + } + if (!strcasecmp(device_string, "NONE")) + { + safestrcopy(platform_string, INTERACTIVE); + safestrcopy(device_string, INTERACTIVE); + } + } -void parse_p(char *optarg, struct run_config *rc, int mode) { - spIdx_t **pattern; - spSize_t *pattern_len; - ssize_t *delta; - size_t **deltas; - size_t *deltas_len; - - if (mode == 0) { // Normal pattern - pattern = &rc->pattern; - pattern_len = &rc->pattern_len; - delta = &rc->delta; - deltas = &rc->deltas_gather; - deltas_len = &rc->deltas_len; - } else if (mode == 1) { // Gather pattern (GS Kernel) - pattern = &rc->pattern_gather; - pattern_len = &rc->pattern_gather_len; - delta = &rc->delta_gather; - deltas = &rc->deltas_gather; - deltas_len = &rc->deltas_gather_len; - } else if (mode == 2) { // Scatter pattern (GS Kernel) - pattern = &rc->pattern_scatter; - pattern_len = &rc->pattern_scatter_len; - delta = &rc->delta_scatter; - deltas = &rc->deltas_scatter; - deltas_len = &rc->deltas_scatter_len; - } else { - printf("parse_p: invalid mode %d\n", mode); - exit(1); - } - - rc->type = INVALID_IDX; - char *arg = 0; - if ((arg = strchr(optarg, ':'))) { - *arg = '\0'; - arg++; // arg now points to arguments to the pattern type - - // FILE mode indicates that we will load a - // config from a file - if (!strcmp(optarg, "FILE")) { - // TODO - // safestrcopy(idx_pattern_file, arg); - rc->type = CONFIG_FILE; - } - - // The Exxon Kernel Proxy-derived stencil - // It used to be called HYDRO so we will accept that too - // XKP:dim - else if (!strcmp(optarg, "XKP") || !strcmp(optarg, "HYDRO")) { - rc->type = XKP; - - size_t dim = 0; - char *dim_char = strtok(arg, ":"); - if (!dim_char) - error("XKP: size not found", 1); - if (sscanf(dim_char, "%zu", &dim) < 1) - error("XKP: Dimension not parsed", 1); - - *pattern_len = 73; - - *pattern = sp_malloc(sizeof(spIdx_t), *pattern_len, ALIGN_CACHE); - - // The default delta is 1 - *delta = 1; - - if (!(*deltas)) { - *deltas = sp_malloc(sizeof(size_t), 1, ALIGN_CACHE); - } - *deltas[0] = *delta; - *deltas_len = 1; - - xkp_pattern(*pattern, dim); - } - - // Parse Uniform Stride Arguments, which are - // UNIFORM:index_length:stride - else if (!strcmp(optarg, "UNIFORM")) { - rc->type = UNIFORM; - - // Read the length - char *len = strtok(arg, ":"); - if (!len) - error("UNIFORM: Index Length not found", 1); - if (sscanf(len, "%zu", &(*pattern_len)) < 1) - error("UNIFORM: Length not parsed", 1); - - *pattern = sp_malloc(sizeof(spIdx_t), *pattern_len, ALIGN_CACHE); - - // Read the stride - char *stride = strtok(NULL, ":"); - ssize_t strideval = 0; - if (!stride) - error("UNIFORM: Stride not found", 1); - if (sscanf(stride, "%zd", &strideval) < 1) - error("UNIFORM: Stride not parsed", 1); - - // Fill the pattern buffer - for (int i = 0; i < *pattern_len; i++) - (*pattern)[i] = i * strideval; - - char *delta2 = strtok(NULL, ":"); - if (delta2) { - if (!*deltas) { - *deltas = sp_malloc(sizeof(size_t), 1, ALIGN_CACHE); + #ifdef USE_CUDA + if (backend == CUDA) + { + int dev = find_device_cuda(device_string); + if (dev == -1) + { + error("Specified CUDA device not found or no device specified. Using device 0", WARN); + dev = 0; } - *deltas_len = 1; + cuda_dev = dev; + cudaSetDevice(dev); + } + #endif - if (!strcmp(delta2, "NR")) { - *delta = strideval * (*pattern_len); - (*deltas)[0] = *delta; - } else { - if (sscanf(delta2, "%zd", &(*delta)) < 1) - error("UNIFORM: delta not parsed", 1); - (*deltas)[0] = *delta; + if (!strcasecmp(kernel_file, "NONE") && backend == OPENCL) + { + error("Kernel file unspecified, guessing kernels/kernels_vector.cl", WARN); + safestrcopy(kernel_file, "kernels/kernels_vector.cl"); + } + + return; +} + +void parse_p(char* optarg, struct run_config *rc, int mode) +{ + ssize_t **pattern; + spSize_t *pattern_len; + ssize_t *delta; + size_t **deltas; + size_t *deltas_len; + + if (mode == 0) { // Normal pattern + pattern = &rc->pattern; + pattern_len = &rc->pattern_len; + delta = &rc->delta; + deltas = &rc->deltas_gather; + deltas_len = &rc->deltas_len; + } + else if (mode == 1) { // Gather pattern (GS Kernel) + pattern = &rc->pattern_gather; + pattern_len = &rc->pattern_gather_len; + delta = &rc->delta_gather; + deltas = &rc->deltas_gather; + deltas_len = &rc->deltas_gather_len; + } + else if (mode == 2) { // Scatter pattern (GS Kernel) + pattern = &rc->pattern_scatter; + pattern_len = &rc->pattern_scatter_len; + delta = &rc->delta_scatter; + deltas = &rc->deltas_scatter; + deltas_len = &rc->deltas_scatter_len; + } + else { + printf("parse_p: invalid mode %d\n", mode); + exit(1); + } + + rc->type = INVALID_IDX; + char *arg = 0; + if ((arg=strchr(optarg, ':'))) + { + *arg = '\0'; + arg++; //arg now points to arguments to the pattern type + + // FILE mode indicates that we will load a + // config from a file + if (!strcmp(optarg, "FILE")) + { + //TODO + //safestrcopy(idx_pattern_file, arg); + rc->type = CONFIG_FILE; } - } - - } - - // LAPLACIAN:DIM:ORDER:N - else if (!strcmp(optarg, "LAPLACIAN")) { - int dim_val, order_val, problem_size_val; - - *pattern = sp_malloc(sizeof(spIdx_t), *pattern_len, ALIGN_CACHE); - rc->type = LAPLACIAN; - - // Read the dimension - char *dim = strtok(arg, ":"); - if (!dim) - error("LAPLACIAN: Dimension not found", 1); - if (sscanf(dim, "%d", &dim_val) < 1) - error("LAPLACIAN: Dimension not parsed", 1); - - // Read the order - char *order = strtok(NULL, ":"); - if (!order) - error("LAPLACIAN: Order not found", 1); - if (sscanf(order, "%d", &order_val) < 1) - error("LAPLACIAN: Order not parsed", 1); - - // Read the problem size - char *problem_size = strtok(NULL, ":"); - if (!problem_size) - error("LAPLACIAN: Problem size not found", 1); - if (sscanf(problem_size, "%d", &problem_size_val) < 1) - error("LAPLACIAN: Problem size not parsed", 1); - - *delta = 1; - if (!(*deltas)) { - *deltas = sp_malloc(sizeof(spIdx_t), *delta, ALIGN_CACHE); - } - (*deltas)[0] = *delta; - *deltas_len = 1; - - laplacian(dim_val, order_val, problem_size_val, rc, mode); - } - - // Mostly Stride 1 Mode - // Arguments: index_length:list_of_breaks:list_of_deltas - // list_of_deltas should be length 1 or the same length as - // list_of_breaks. - // The elements of both lists should be nonnegative and - // the the elements of list_of_breaks should be strictly less - // than index_length - else if (!strcmp(optarg, "MS1")) { - rc->type = MS1; - - char *len = strtok(arg, ":"); - char *breaks = strtok(NULL, ":"); - char *gaps = strtok(NULL, ":"); - - size_t *ms1_breaks = - sp_malloc(sizeof(size_t), MAX_PATTERN_LEN, ALIGN_CACHE); - size_t *ms1_deltas = - sp_malloc(sizeof(size_t), MAX_PATTERN_LEN, ALIGN_CACHE); - size_t ms1_breaks_len = 0; - size_t ms1_deltas_len = 0; - - // Parse index length - sscanf(len, "%zu", &(*pattern_len)); - *pattern = sp_malloc(sizeof(spIdx_t), *pattern_len, ALIGN_CACHE); - - // Parse breaks - char *ptr = strtok(breaks, ","); - size_t read = 0; - if (!ptr) - error("MS1: Breaks missing", 1); - if (sscanf(ptr, "%zu", &(ms1_breaks[read++])) < 1) - error("MS1: Failed to parse first break", 1); - - while ((ptr = strtok(NULL, ",")) && read < MAX_PATTERN_LEN) { - if (sscanf(ptr, "%zu", &(ms1_breaks[read++])) < 1) - error("MS1: Failed to parse breaks", 1); - } - - ms1_breaks_len = read; - - if (!gaps) { - printf("1\n"); - error("error", ERROR); - } - - ptr = strtok(gaps, ","); - read = 0; - if (ptr) { - if (sscanf(ptr, "%zu", &(ms1_deltas[read++])) < 1) - error("Failed to parse first delta", 1); - - while ((ptr = strtok(NULL, ",")) && read < MAX_PATTERN_LEN) { - if (sscanf(ptr, "%zu", &(ms1_deltas[read++])) < 1) - error("Failed to parse deltas", 1); + + // The Exxon Kernel Proxy-derived stencil + // It used to be called HYDRO so we will accept that too + // XKP:dim + else if (!strcmp(optarg, "XKP") || !strcmp(optarg, "HYDRO")) + { + rc->type = XKP; + + size_t dim = 0; + char *dim_char = strtok(arg, ":"); + if (!dim_char) + error("XKP: size not found", 1); + if (sscanf(dim_char, "%zu", &dim) < 1) + error("XKP: Dimension not parsed", 1); + + *pattern_len = 73; + + *pattern = sp_malloc(sizeof(spIdx_t), *pattern_len, ALIGN_CACHE); + + // The default delta is 1 + *delta = 1; + + if (!(*deltas)) { + *deltas = sp_malloc(sizeof(size_t), 1, ALIGN_CACHE); + } + *deltas[0] = *delta; + *deltas_len = 1; + + xkp_pattern(*pattern, dim); } - } else - error("MS1: deltas missing", 1); - ms1_deltas_len = read; + // Parse Uniform Stride Arguments, which are + // UNIFORM:index_length:stride + else if (!strcmp(optarg, "UNIFORM")) + { + rc->type = UNIFORM; + + // Read the length + char *len = strtok(arg,":"); + if (!len) + error("UNIFORM: Index Length not found", 1); + if (sscanf(len, "%zu", &(*pattern_len)) < 1) + error("UNIFORM: Length not parsed", 1); + + *pattern = sp_malloc(sizeof(spIdx_t), *pattern_len, ALIGN_CACHE); + + // Read the stride + char *stride = strtok(NULL, ":"); + ssize_t strideval = 0; + if (!stride) + error("UNIFORM: Stride not found", 1); + if (sscanf(stride, "%zd", &strideval) < 1) + error("UNIFORM: Stride not parsed", 1); + + // Fill the pattern buffer + for (int i = 0; i < *pattern_len; i++) + (*pattern)[i] = i*strideval; + + char *delta2 = strtok(NULL, ":"); + if (delta2) + { + if (!*deltas) { + *deltas = sp_malloc(sizeof(size_t), 1, ALIGN_CACHE); + } + *deltas_len = 1; + + if (!strcmp(delta2, "NR")) + { + *delta = strideval*(*pattern_len); + (*deltas)[0] = *delta; + } + else + { + if (sscanf(delta2, "%zd", &(*delta)) < 1) + error("UNIFORM: delta not parsed", 1); + (*deltas)[0] = *delta; + } + } - (*pattern)[0] = -1; - size_t last = -1; - ssize_t j; - for (int i = 0; i < *pattern_len; i++) { - if ((j = setincludes(i, ms1_breaks, ms1_breaks_len)) != -1) - (*pattern)[i] = last + ms1_deltas[ms1_deltas_len > 1 ? j : 0]; - else - (*pattern)[i] = last + 1; - last = (*pattern)[i]; - } + } - free(ms1_breaks); - free(ms1_deltas); - } else - error("Unrecognized mode in -p argument", 1); - } + //LAPLACIAN:DIM:ORDER:N + else if (!strcmp(optarg, "LAPLACIAN")) + { + int dim_val, order_val, problem_size_val; + + *pattern = sp_malloc(sizeof(spIdx_t), *pattern_len, ALIGN_CACHE); + rc->type = LAPLACIAN; + + // Read the dimension + char *dim = strtok(arg,":"); + if (!dim) + error("LAPLACIAN: Dimension not found", 1); + if (sscanf(dim, "%d", &dim_val) < 1) + error("LAPLACIAN: Dimension not parsed", 1); + + // Read the order + char *order = strtok(NULL, ":"); + if (!order) + error("LAPLACIAN: Order not found", 1); + if (sscanf(order, "%d", &order_val) < 1) + error("LAPLACIAN: Order not parsed", 1); + + // Read the problem size + char *problem_size = strtok(NULL, ":"); + if (!problem_size) + error("LAPLACIAN: Problem size not found", 1); + if (sscanf(problem_size, "%d", &problem_size_val) < 1) + error("LAPLACIAN: Problem size not parsed", 1); + + *delta = 1; + if (!(*deltas)) { + *deltas = sp_malloc(sizeof(spIdx_t), *delta, ALIGN_CACHE); + } + (*deltas)[0] = *delta; + *deltas_len = 1; + + laplacian(dim_val, order_val, problem_size_val, rc, mode); + } - // CUSTOM mode means that the user supplied a single index buffer on the - // command line - else { - if (quiet_flag > 3) { - printf("Parse P Custom Pattern: %s\n", optarg); + // Mostly Stride 1 Mode + // Arguments: index_length:list_of_breaks:list_of_deltas + // list_of_deltas should be length 1 or the same length as + // list_of_breaks. + // The elements of both lists should be nonnegative and + // the the elements of list_of_breaks should be strictly less + // than index_length + else if (!strcmp(optarg, "MS1")) + { + rc->type = MS1; + + char *len = strtok(arg,":"); + char *breaks = strtok(NULL,":"); + char *gaps = strtok(NULL,":"); + + size_t *ms1_breaks = sp_malloc(sizeof(size_t), MAX_PATTERN_LEN, ALIGN_CACHE); + size_t *ms1_deltas = sp_malloc(sizeof(size_t), MAX_PATTERN_LEN, ALIGN_CACHE); + size_t ms1_breaks_len = 0; + size_t ms1_deltas_len = 0; + + // Parse index length + sscanf(len, "%zu", &(*pattern_len)); + *pattern = sp_malloc(sizeof(spIdx_t), *pattern_len, ALIGN_CACHE); + + // Parse breaks + char *ptr = strtok(breaks, ","); + size_t read = 0; + if (!ptr) + error("MS1: Breaks missing", 1); + if (sscanf(ptr, "%zu", &(ms1_breaks[read++])) < 1) + error("MS1: Failed to parse first break", 1); + + while ((ptr = strtok(NULL, ",")) && read < MAX_PATTERN_LEN) + { + if (sscanf(ptr, "%zu", &(ms1_breaks[read++])) < 1) + error("MS1: Failed to parse breaks", 1); + } + + ms1_breaks_len = read; + + if(!gaps) + { + printf("1\n"); + error("error", ERROR); + } + + ptr = strtok(gaps, ","); + read = 0; + if (ptr) + { + if (sscanf(ptr, "%zu", &(ms1_deltas[read++])) < 1) + error("Failed to parse first delta", 1); + + while ((ptr = strtok(NULL, ",")) && read < MAX_PATTERN_LEN) + { + if (sscanf(ptr, "%zu", &(ms1_deltas[read++])) < 1) + error("Failed to parse deltas", 1); + } + } + else + error("MS1: deltas missing",1); + + ms1_deltas_len = read; + + (*pattern)[0] = -1; + ssize_t last = -1; + ssize_t j; + for (int i = 0; i < *pattern_len; i++) + { + if ((j=setincludes(i, ms1_breaks, ms1_breaks_len))!=-1) + (*pattern)[i] = last+ms1_deltas[ms1_deltas_len>1?j:0]; + else + (*pattern)[i] = last + 1; + last = (*pattern)[i]; + } + + free(ms1_breaks); + free(ms1_deltas); + } + else + error("Unrecognized mode in -p argument", 1); } - rc->type = CUSTOM; - char *delim = ","; - char *ptr = strtok(optarg, delim); - size_t read = 0; - if (!ptr) - error("Pattern not found", 1); - spIdx_t *mypat; + // CUSTOM mode means that the user supplied a single index buffer on the command line + else + { + if (quiet_flag > 3) { + printf("Parse P Custom Pattern: %s\n", optarg); + } + rc->type = CUSTOM; + char *delim = ","; + char *ptr = strtok(optarg, delim); + size_t read = 0; + if (!ptr) + error("Pattern not found", 1); + + ssize_t *mypat; + + size_t psize; + if (rc->pattern_size > 0) + psize = rc->pattern_size; + else + psize = MAX_PATTERN_LEN; - mypat = sp_malloc(sizeof(spIdx_t), MAX_PATTERN_LEN, ALIGN_CACHE); + mypat = sp_malloc(sizeof(spIdx_t), psize, ALIGN_CACHE); - if (sscanf(ptr, "%zu", &(mypat[read++])) < 1) - error("Failed to parse first pattern element in custom mode", 1); + if (sscanf(ptr, "%zu", &(mypat[read++])) < 1) + error("Failed to parse first pattern element in custom mode", 1); - while ((ptr = strtok(NULL, delim)) && read < MAX_PATTERN_LEN) { - if (sscanf(ptr, "%zu", &(mypat[read++])) < 1) - error("Failed to parse pattern", 1); + while ((ptr = strtok(NULL, delim)) && read < psize) + { + if (sscanf(ptr, "%zu", &(mypat[read++])) < 1) + error("Failed to parse pattern", 1); + } + *pattern = mypat; + *pattern_len = read; } - *pattern = mypat; - *pattern_len = read; - } - if (*pattern_len == 0) - error("Pattern length of 0", ERROR); + if (*pattern_len == 0) + error("Pattern length of 0", ERROR); - if (rc->type == INVALID_IDX) - error("No pattern type set", ERROR); + if (rc->type == INVALID_IDX) + error("No pattern type set", ERROR); } -ssize_t setincludes(size_t key, size_t *set, size_t set_len) { - for (size_t i = 0; i < set_len; i++) { - if (set[i] == key) - return i; - } - return -1; +ssize_t setincludes(size_t key, size_t* set, size_t set_len) +{ + for (size_t i = 0; i < set_len; i++) + { + if (set[i] == key) + return i; + } + return -1; } -void print_run_config(struct run_config rc) { - printf("Index: %zu ", rc.pattern_len); - printf("["); - for (size_t i = 0; i < rc.pattern_len; i++) { - printf("%zu", rc.pattern[i]); - if (i != rc.pattern_len - 1) - printf(" "); - } - printf("]\n"); - if (rc.deltas_len > 0) { - printf("Deltas: %zu ", rc.deltas_len); +void print_run_config(struct run_config rc) +{ + printf("Index: %zu ", rc.pattern_len); printf("["); - for (size_t i = 0; i < rc.deltas_len; i++) { - printf("%zu", rc.deltas[i]); - if (i != rc.deltas_len - 1) - printf(" "); + for (size_t i = 0; i < rc.pattern_len; i++) + { + printf("%zu", rc.pattern[i]); + if (i != rc.pattern_len-1) + printf(" "); } printf("]\n"); - printf("Deltas_ps: %zu ", rc.deltas_len); - printf("["); - for (size_t i = 0; i < rc.deltas_len; i++) { - printf("%zu", rc.deltas_ps[i]); - if (i != rc.deltas_len - 1) - printf(" "); + if (rc.deltas_len > 0) + { + printf("Deltas: %zu ", rc.deltas_len); + printf("["); + for (size_t i = 0; i < rc.deltas_len; i++) + { + printf("%zu", rc.deltas[i]); + if (i != rc.deltas_len-1) + printf(" "); + } + printf("]\n"); + printf("Deltas_ps: %zu ", rc.deltas_len); + printf("["); + for (size_t i = 0; i < rc.deltas_len; i++) + { + printf("%zu", rc.deltas_ps[i]); + if (i != rc.deltas_len-1) + printf(" "); + } + printf("] (%zu)\n", rc.delta); } - printf("] (%zu)\n", rc.delta); - } else - printf("Delta: %zu\n", rc.delta); + else + printf("Delta: %zu\n", rc.delta); - printf("kern: %s\n", kernel_name); - printf("genlen: %zu\n", rc.generic_len); + printf("kern: %s\n", kernel_name); + printf("genlen: %zu\n", rc.generic_len); } -void error(char *what, int code) { - if (code == ERROR) - fprintf(err_file, "Error: "); - else if (code == WARN) { - if (verbose) - fprintf(err_file, "Warning: "); - } - - if (verbose || code) { - fprintf(err_file, "%s", what); - fprintf(err_file, "\n"); - } - - if (code) - exit(code); +void error(char *what, int code) +{ + if (code == ERROR) + fprintf(err_file, "Error: "); + else if (code == WARN) + { + if (verbose) + fprintf(err_file, "Warning: "); + } + + if (verbose || code) + { + fprintf(err_file, "%s", what); + fprintf(err_file, "\n"); + } + + if(code) + exit(code); } -void safestrcopy(char *dest, const char *src) { - dest[0] = '\0'; - strncat(dest, src, STRING_SIZE - 1); +void safestrcopy(char *dest, const char *src) +{ + dest[0] = '\0'; + strncat(dest, src, STRING_SIZE-1); } -int compare_ssizet(const void *a, const void *b) { - if (*(ssize_t *)a > *(ssize_t *)b) - return 1; - else if (*(ssize_t *)a < *(ssize_t *)b) - return -1; - else - return 0; +int compare_ptrdiff_t(const void *a, const void *b) +{ + if (*(ptrdiff_t*)a > *(ptrdiff_t*)b) return 1; + else if (*(ptrdiff_t*)a < *(ptrdiff_t*)b) return -1; + else return 0; } -void copy4(ssize_t *dest, ssize_t *a, int *off) { - for (int i = 0; i < 4; i++) { - dest[i + *off] = a[i]; - } - *off += 4; +void copy4(ptrdiff_t *dest, ptrdiff_t *a, int *off) +{ + for (int i = 0; i < 4; i++) { + dest[i + *off] = a[i]; + } + *off += 4; } -void add4(ssize_t *dest, ssize_t *a, ssize_t *b, int *off) { - for (int i = 0; i < 4; i++) { - dest[i + *off] = a[i] + b[i]; - } - *off += 4; +void add4(ptrdiff_t *dest, ptrdiff_t *a, ptrdiff_t *b, int *off) +{ + for (int i = 0; i < 4; i++) { + dest[i + *off] = a[i] + b[i]; + } + *off += 4; } -void xkp_pattern(size_t *pat_, size_t dim) { - ssize_t pat[73]; - for (int i = 0; i < 73; i++) { - pat[i] = i; - } - - ssize_t Xp[4]; - ssize_t Xn[4]; - ssize_t Yp[4]; - ssize_t Yn[4]; - ssize_t Zp[4]; - ssize_t Zn[4]; - - Xp[0] = 1; - Xp[1] = 2; - Xp[2] = 3; - Xp[3] = 4; - Xn[0] = -1; - Xn[1] = -2; - Xn[2] = -3; - Xn[3] = -4; - Yp[0] = dim; - Yp[1] = 2 * dim; - Yp[2] = 3 * dim; - Yp[3] = 4 * dim; - Yn[0] = -dim; - Yn[1] = -2 * dim; - Yn[2] = -3 * dim; - Yn[3] = -4 * dim; - Zp[0] = dim * dim; - Zp[1] = 2 * dim * dim; - Zp[2] = 3 * dim * dim; - Zp[3] = 4 * dim * dim; - Zn[0] = -dim * dim; - Zn[1] = -2 * dim * dim; - Zn[2] = -3 * dim * dim; - Zn[3] = -4 * dim * dim; - - int idx = 0; - pat[idx++] = 0; - copy4(pat, Xp, &idx); - copy4(pat, Xn, &idx); - copy4(pat, Yp, &idx); - copy4(pat, Yn, &idx); - copy4(pat, Zp, &idx); - copy4(pat, Zn, &idx); - - add4(pat, Xp, Yp, &idx); - add4(pat, Xp, Zp, &idx); - add4(pat, Xp, Yn, &idx); - add4(pat, Xp, Zn, &idx); - - add4(pat, Xn, Yp, &idx); - add4(pat, Xn, Zp, &idx); - add4(pat, Xn, Yn, &idx); - add4(pat, Xn, Zn, &idx); - - add4(pat, Yp, Zp, &idx); - add4(pat, Yp, Zn, &idx); - add4(pat, Yn, Zp, &idx); - add4(pat, Yn, Zn, &idx); - - qsort(pat, 73, sizeof(ssize_t), compare_ssizet); - - ssize_t min = pat[0]; - for (int i = 1; i < 73; i++) { - if (pat[i] < min) { - min = pat[i]; - } - } - - for (int i = 0; i < 73; i++) { - pat[i] -= min; - } - - for (int i = 0; i < 73; i++) { - pat_[i] = pat[i]; - } +void xkp_pattern(ssize_t *pat_, ptrdiff_t dim) +{ + ptrdiff_t pat[73]; + for (int i = 0; i < 73; i++) { + pat[i] = i; + } + + ptrdiff_t Xp[4]; + ptrdiff_t Xn[4]; + ptrdiff_t Yp[4]; + ptrdiff_t Yn[4]; + ptrdiff_t Zp[4]; + ptrdiff_t Zn[4]; + + Xp[0] = 1; Xp[1] = 2; Xp[2] = 3; Xp[3] = 4; + Xn[0] = -1; Xn[1] = -2; Xn[2] = -3; Xn[3] = -4; + Yp[0] = dim; Yp[1] = 2*dim; Yp[2] = 3*dim; Yp[3] = 4*dim; + Yn[0] = -dim; Yn[1] = -2*dim; Yn[2] = -3*dim; Yn[3] = -4*dim; + Zp[0] = dim*dim; Zp[1] = 2*dim*dim; Zp[2] = 3*dim*dim; Zp[3] = 4*dim*dim; + Zn[0] = -dim*dim; Zn[1] = -2*dim*dim; Zn[2] = -3*dim*dim; Zn[3] = -4*dim*dim; + + int idx = 0; + pat[idx++] = 0; + copy4(pat, Xp, &idx); + copy4(pat, Xn, &idx); + copy4(pat, Yp, &idx); + copy4(pat, Yn, &idx); + copy4(pat, Zp, &idx); + copy4(pat, Zn, &idx); + + add4(pat, Xp, Yp, &idx); + add4(pat, Xp, Zp, &idx); + add4(pat, Xp, Yn, &idx); + add4(pat, Xp, Zn, &idx); + + add4(pat, Xn, Yp, &idx); + add4(pat, Xn, Zp, &idx); + add4(pat, Xn, Yn, &idx); + add4(pat, Xn, Zn, &idx); + + add4(pat, Yp, Zp, &idx); + add4(pat, Yp, Zn, &idx); + add4(pat, Yn, Zp, &idx); + add4(pat, Yn, Zn, &idx); + + qsort(pat, 73, sizeof(ptrdiff_t), compare_ptrdiff_t); + + ptrdiff_t min = pat[0]; + for (int i = 1; i < 73; i++) { + if (pat[i] < min) { + min = pat[i]; + } + } + + for (int i = 0; i < 73; i++) { + pat[i] -= min; + } + + for (int i = 0; i < 73; i++) { + pat_[i] = pat[i]; + } } diff --git a/clients/spatter/parse-args.h b/clients/spatter/parse-args.h index 9757806..2d1d738 100644 --- a/clients/spatter/parse-args.h +++ b/clients/spatter/parse-args.h @@ -45,6 +45,7 @@ LANL and GT), this list of conditions and the following disclaimer. #define WARN 0 #define ERROR 1 + #define STRING_SIZE 1000000 #define MAX_PATTERN_LEN 1048576 @@ -54,34 +55,42 @@ LANL and GT), this list of conditions and the following disclaimer. /** @brief Supported benchmark backends */ -enum sg_backend { - OPENCL, /**< OpenCL Backend */ - OPENMP, /**< OpenMP CPU Backend */ - CUDA, /**< CUDA Backend */ - SERIAL, /**< SERIAL Backend */ - INVALID_BACKEND /**< Used as a default backend */ +enum sg_backend +{ + OPENCL, /**< OpenCL Backend */ + OPENMP, /**< OpenMP CPU Backend */ + CUDA, /**< CUDA Backend */ + SERIAL, /**< SERIAL Backend */ + INVALID_BACKEND /**< Used as a default backend */ +}; + +enum sg_kernel +{ + INVALID_KERNEL=0, + SCATTER, + GATHER, + GS, + MULTISCATTER, + MULTIGATHER }; -enum sg_kernel { - INVALID_KERNEL = 0, - SCATTER, - GATHER, - GS, - MULTISCATTER, - MULTIGATHER +enum sg_op +{ + OP_COPY, + OP_ACCUM, + INVALID_OP }; -enum sg_op { OP_COPY, OP_ACCUM, INVALID_OP }; - -// Specifies the indexing or offset type -enum idx_type { - UNIFORM, - MS1, - LAPLACIAN, - CUSTOM, - CONFIG_FILE, - XKP, - INVALID_IDX +//Specifies the indexing or offset type +enum idx_type +{ + UNIFORM, + MS1, + LAPLACIAN, + CUSTOM, + CONFIG_FILE, + XKP, + INVALID_IDX }; /* @@ -93,63 +102,68 @@ enum state }; */ -struct run_config { - // keep arrays at top so they are aligned - spIdx_t *pattern; - spIdx_t *pattern_gather; - spIdx_t *pattern_scatter; - size_t *deltas; - size_t *deltas_ps; - size_t *deltas_gather; - size_t *deltas_gather_ps; - size_t *deltas_scatter; - size_t *deltas_scatter_ps; - spSize_t pattern_len; - spSize_t pattern_gather_len; - spSize_t pattern_scatter_len; - ssize_t delta; - size_t deltas_len; - ssize_t delta_gather; - size_t deltas_gather_len; - ssize_t delta_scatter; - size_t deltas_scatter_len; - enum sg_kernel kernel; - enum idx_type type; - enum idx_type type_gather; - enum idx_type type_scatter; - spSize_t generic_len; - size_t wrap; - size_t nruns; - char pattern_file[STRING_SIZE]; - char generator[STRING_SIZE]; - char name[STRING_SIZE]; - size_t random_seed; - size_t omp_threads; - enum sg_op op; - size_t vector_len; - unsigned int shmem; - size_t local_work_size; - double *time_ms; - long long **papi_ctr; - int papi_counters; - int stride_kernel; - // Reorder based kernels - int ro_morton; - int ro_hilbert; - int ro_block; - uint32_t *ro_order; - uint32_t *ro_order_dev; +struct run_config +{ + // keep arrays at top so they are aligned + ssize_t *pattern; + ssize_t *pattern_gather; + ssize_t *pattern_scatter; + size_t *deltas; + size_t *deltas_ps; + size_t *deltas_gather; + size_t *deltas_gather_ps; + size_t *deltas_scatter; + size_t *deltas_scatter_ps; + ssize_t boundary; + spSize_t pattern_size; + spSize_t pattern_len; + spSize_t pattern_gather_len; + spSize_t pattern_scatter_len; + ssize_t delta; + size_t deltas_len; + ssize_t delta_gather; + size_t deltas_gather_len; + ssize_t delta_scatter; + size_t deltas_scatter_len; + enum sg_kernel kernel; + enum idx_type type; + enum idx_type type_gather; + enum idx_type type_scatter; + spSize_t generic_len; + size_t wrap; + size_t nruns; + char pattern_file[STRING_SIZE]; + char generator[STRING_SIZE]; + char name[STRING_SIZE]; + size_t random_seed; + size_t omp_threads; + enum sg_op op; + size_t vector_len; + unsigned int shmem; + size_t local_work_size; + double *time_ms; + long long **papi_ctr; + int papi_counters; + int stride_kernel; + // Reorder based kernels + int ro_morton; + int ro_hilbert; + int ro_block; + uint32_t *ro_order; + uint32_t *ro_order_dev; }; -struct backend_config { - enum sg_backend backend; - enum sg_kernel kernel; - enum sg_op op; +struct backend_config +{ + enum sg_backend backend; + enum sg_kernel kernel; + enum sg_op op; + + char platform_string[STRING_SIZE]; + char device_string[STRING_SIZE]; + char kernel_file[STRING_SIZE]; + char kernel_name[STRING_SIZE]; - char platform_string[STRING_SIZE]; - char device_string[STRING_SIZE]; - char kernel_file[STRING_SIZE]; - char kernel_name[STRING_SIZE]; }; /** @brief Read command-line arguments and populate global variables. @@ -158,6 +172,6 @@ struct backend_config { */ void parse_args(int argc, char **argv, int *nrc, struct run_config **rc); struct run_config *parse_runs(int arrr, char **argv); -void error(char *what, int code); +void error (char* what, int code); void print_run_config(struct run_config rc); #endif diff --git a/clients/spatter/pcg_basic.c b/clients/spatter/pcg_basic.c index 86e4ff3..9de611a 100644 --- a/clients/spatter/pcg_basic.c +++ b/clients/spatter/pcg_basic.c @@ -39,68 +39,77 @@ static pcg32_random_t pcg32_global = PCG32_INITIALIZER; // Seed the rng. Specified in two parts, state initializer and a // sequence selection constant (a.k.a. stream id) -void pcg32_srandom_r(pcg32_random_t *rng, uint64_t initstate, - uint64_t initseq) { - rng->state = 0U; - rng->inc = (initseq << 1u) | 1u; - pcg32_random_r(rng); - rng->state += initstate; - pcg32_random_r(rng); +void pcg32_srandom_r(pcg32_random_t* rng, uint64_t initstate, uint64_t initseq) +{ + rng->state = 0U; + rng->inc = (initseq << 1u) | 1u; + pcg32_random_r(rng); + rng->state += initstate; + pcg32_random_r(rng); } -void pcg32_srandom(uint64_t seed, uint64_t seq) { - pcg32_srandom_r(&pcg32_global, seed, seq); +void pcg32_srandom(uint64_t seed, uint64_t seq) +{ + pcg32_srandom_r(&pcg32_global, seed, seq); } // pcg32_random() // pcg32_random_r(rng) // Generate a uniformly distributed 32-bit random number -uint32_t pcg32_random_r(pcg32_random_t *rng) { - uint64_t oldstate = rng->state; - rng->state = oldstate * 6364136223846793005ULL + rng->inc; - uint32_t xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u; - uint32_t rot = oldstate >> 59u; - return (xorshifted >> rot) | (xorshifted << ((-rot) & 31)); +uint32_t pcg32_random_r(pcg32_random_t* rng) +{ + uint64_t oldstate = rng->state; + rng->state = oldstate * 6364136223846793005ULL + rng->inc; + uint32_t xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u; + uint32_t rot = oldstate >> 59u; + return (xorshifted >> rot) | (xorshifted << ((-rot) & 31)); +} + +uint32_t pcg32_random() +{ + return pcg32_random_r(&pcg32_global); } -uint32_t pcg32_random() { return pcg32_random_r(&pcg32_global); } // pcg32_boundedrand(bound): // pcg32_boundedrand_r(rng, bound): // Generate a uniformly distributed number, r, where 0 <= r < bound -uint32_t pcg32_boundedrand_r(pcg32_random_t *rng, uint32_t bound) { - // To avoid bias, we need to make the range of the RNG a multiple of - // bound, which we do by dropping output less than a threshold. - // A naive scheme to calculate the threshold would be to do - // - // uint32_t threshold = 0x100000000ull % bound; - // - // but 64-bit div/mod is slower than 32-bit div/mod (especially on - // 32-bit platforms). In essence, we do - // - // uint32_t threshold = (0x100000000ull-bound) % bound; - // - // because this version will calculate the same modulus, but the LHS - // value is less than 2^32. - - uint32_t threshold = -bound % bound; - - // Uniformity guarantees that this loop will terminate. In practice, it - // should usually terminate quickly; on average (assuming all bounds are - // equally likely), 82.25% of the time, we can expect it to require just - // one iteration. In the worst case, someone passes a bound of 2^31 + 1 - // (i.e., 2147483649), which invalidates almost 50% of the range. In - // practice, bounds are typically small and only a tiny amount of the range - // is eliminated. - for (;;) { - uint32_t r = pcg32_random_r(rng); - if (r >= threshold) - return r % bound; - } +uint32_t pcg32_boundedrand_r(pcg32_random_t* rng, uint32_t bound) +{ + // To avoid bias, we need to make the range of the RNG a multiple of + // bound, which we do by dropping output less than a threshold. + // A naive scheme to calculate the threshold would be to do + // + // uint32_t threshold = 0x100000000ull % bound; + // + // but 64-bit div/mod is slower than 32-bit div/mod (especially on + // 32-bit platforms). In essence, we do + // + // uint32_t threshold = (0x100000000ull-bound) % bound; + // + // because this version will calculate the same modulus, but the LHS + // value is less than 2^32. + + uint32_t threshold = -bound % bound; + + // Uniformity guarantees that this loop will terminate. In practice, it + // should usually terminate quickly; on average (assuming all bounds are + // equally likely), 82.25% of the time, we can expect it to require just + // one iteration. In the worst case, someone passes a bound of 2^31 + 1 + // (i.e., 2147483649), which invalidates almost 50% of the range. In + // practice, bounds are typically small and only a tiny amount of the range + // is eliminated. + for (;;) { + uint32_t r = pcg32_random_r(rng); + if (r >= threshold) + return r % bound; + } } -uint32_t pcg32_boundedrand(uint32_t bound) { - return pcg32_boundedrand_r(&pcg32_global, bound); + +uint32_t pcg32_boundedrand(uint32_t bound) +{ + return pcg32_boundedrand_r(&pcg32_global, bound); } diff --git a/clients/spatter/pcg_basic.h b/clients/spatter/pcg_basic.h index 6a47067..e2b526a 100644 --- a/clients/spatter/pcg_basic.h +++ b/clients/spatter/pcg_basic.h @@ -37,17 +37,16 @@ extern "C" { #endif -struct pcg_state_setseq_64 { // Internals are *Private*. - uint64_t state; // RNG state. All values are possible. - uint64_t inc; // Controls which RNG sequence (stream) is - // selected. Must *always* be odd. +struct pcg_state_setseq_64 { // Internals are *Private*. + uint64_t state; // RNG state. All values are possible. + uint64_t inc; // Controls which RNG sequence (stream) is + // selected. Must *always* be odd. }; typedef struct pcg_state_setseq_64 pcg32_random_t; // If you *must* statically initialize it, here's one. -#define PCG32_INITIALIZER \ - { 0x853c49e6748fea9bULL, 0xda3e39cb94b95bdbULL } +#define PCG32_INITIALIZER { 0x853c49e6748fea9bULL, 0xda3e39cb94b95bdbULL } // pcg32_srandom(initstate, initseq) // pcg32_srandom_r(rng, initstate, initseq): @@ -55,21 +54,22 @@ typedef struct pcg_state_setseq_64 pcg32_random_t; // sequence selection constant (a.k.a. stream id) void pcg32_srandom(uint64_t initstate, uint64_t initseq); -void pcg32_srandom_r(pcg32_random_t *rng, uint64_t initstate, uint64_t initseq); +void pcg32_srandom_r(pcg32_random_t* rng, uint64_t initstate, + uint64_t initseq); // pcg32_random() // pcg32_random_r(rng) // Generate a uniformly distributed 32-bit random number uint32_t pcg32_random(void); -uint32_t pcg32_random_r(pcg32_random_t *rng); +uint32_t pcg32_random_r(pcg32_random_t* rng); // pcg32_boundedrand(bound): // pcg32_boundedrand_r(rng, bound): // Generate a uniformly distributed number, r, where 0 <= r < bound uint32_t pcg32_boundedrand(uint32_t bound); -uint32_t pcg32_boundedrand_r(pcg32_random_t *rng, uint32_t bound); +uint32_t pcg32_boundedrand_r(pcg32_random_t* rng, uint32_t bound); #if __cplusplus } diff --git a/clients/spatter/sgtype.h b/clients/spatter/sgtype.h index 1459d19..fa7e795 100644 --- a/clients/spatter/sgtype.h +++ b/clients/spatter/sgtype.h @@ -52,21 +52,13 @@ LANL and GT), this list of conditions and the following disclaimer. #ifdef USE_OPENCL #include "cl-helper.h" -static_assert(sizeof(cl_ulong) == sizeof(unsigned long), - "Due to size differences between cl_ulong and unsigned long, we " - "cannot compile with OpenCL support on your system"); -static_assert(sizeof(cl_double) == sizeof(double), - "Due to size differences between cl_double and double, we cannot " - "compile with OpenCL support on your system"); -static_assert(sizeof(cl_uint) == sizeof(unsigned int), - "Due to size differences between cl_uint and unsigned int, we " - "cannot compile with OpenCL support on your system"); -static_assert(sizeof(cl_float) == sizeof(float), - "Due to size differences between cl_double and double, we cannot " - "compile with OpenCL support on your system"); +static_assert(sizeof(cl_ulong) == sizeof(unsigned long), "Due to size differences between cl_ulong and unsigned long, we cannot compile with OpenCL support on your system"); +static_assert(sizeof(cl_double) == sizeof(double), "Due to size differences between cl_double and double, we cannot compile with OpenCL support on your system"); +static_assert(sizeof(cl_uint) == sizeof(unsigned int), "Due to size differences between cl_uint and unsigned int, we cannot compile with OpenCL support on your system"); +static_assert(sizeof(cl_float) == sizeof(float), "Due to size differences between cl_double and double, we cannot compile with OpenCL support on your system"); #endif -typedef double sgData_t; +typedef double sgData_t; #define SGD "%lf" typedef unsigned long sgIdx_t; typedef unsigned long spIdx_t; @@ -77,4 +69,4 @@ typedef long sgsIdx_t; typedef size_t spSize_t; #define SPS "%zu" -#endif // endif SGTYPE +#endif //endif SGTYPE \ No newline at end of file diff --git a/clients/spatter/sp_alloc.c b/clients/spatter/sp_alloc.c index 1741e29..12a6a69 100644 --- a/clients/spatter/sp_alloc.c +++ b/clients/spatter/sp_alloc.c @@ -40,58 +40,62 @@ LANL and GT), this list of conditions and the following disclaimer. this software without specific prior written permission. */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include //memset +#include //exit +#include #include "sp_alloc.h" #include "parse-args.h" //error #include -#include //exit -#include //memset + long long total_mem_used = 0; -long long get_mem_used() { return total_mem_used; } +long long get_mem_used() { + return total_mem_used; +} void check_size(size_t size) { - total_mem_used += size; - // printf("size: %zu\n", size); - if (total_mem_used > SP_MAX_ALLOC) { - error("Too much memory used.", ERROR); - } + total_mem_used += size; + //printf("size: %zu\n", size); + if (total_mem_used > SP_MAX_ALLOC) { + error("Too much memory used.", ERROR); + } } void check_safe_mult(size_t a, size_t b) { - int hi_bit_a = 0; - int hi_bit_b = 0; + unsigned int hi_bit_a = 0; + unsigned int hi_bit_b = 0; + + while (a >>= 1) hi_bit_a++; + while (b >>= 1) hi_bit_b++; - while (a >>= 1) - hi_bit_a++; - while (b >>= 1) - hi_bit_b++; + if (hi_bit_a + hi_bit_b > sizeof(size_t) * 8) { + error("Error: Multiplication would overflow.", ERROR); + } - if (hi_bit_a + hi_bit_b > sizeof(size_t) * 8) { - error("Error: Multiplication would overflow.", ERROR); - } } -void *sp_malloc(size_t size, size_t count, size_t align) { - check_safe_mult(size, count); - check_size(size * count); +void *sp_malloc (size_t size, size_t count, size_t align) { + check_safe_mult(size, count); + check_size(size*count); #ifdef USE_POSIX_MEMALIGN - void *ptr = NULL; - int ret = posix_memalign(&ptr, align, size * count); - if (ret != 0) - ptr = NULL; + void *ptr = NULL; + int ret = posix_memalign (&ptr,align,size*count); + if (ret!=0) ptr = NULL; #else - void *ptr = aligned_alloc(align, size * count); + void *ptr = aligned_alloc (align, size*count); #endif - if (!ptr) { - printf("Attempted to allocate %zu bytes (%zu * %zu)\n", size * count, size, - count); - error("Error: failed to allocate memory", ERROR); - } - return ptr; + if (!ptr) { + printf("Attempted to allocate %zu bytes (%zu * %zu)\n", size*count, size , count); + error("Error: failed to allocate memory", ERROR); + } + return ptr; } -void *sp_calloc(size_t size, size_t count, size_t align) { - void *ptr = sp_malloc(size, count, align); - memset(ptr, 0, size * count); - return ptr; +void *sp_calloc (size_t size, size_t count, size_t align) { + void *ptr = sp_malloc(size, count, align); + memset(ptr, 0, size*count); + return ptr; } diff --git a/clients/spatter/sp_alloc.h b/clients/spatter/sp_alloc.h index 4af6090..15a80fd 100644 --- a/clients/spatter/sp_alloc.h +++ b/clients/spatter/sp_alloc.h @@ -42,16 +42,13 @@ LANL and GT), this list of conditions and the following disclaimer. #ifndef SP_ALLOC_H #define SP_ALLOC_H - -#include - #ifndef SP_MAX_ALLOC -// 65GB -#define SP_MAX_ALLOC (65ll * 1000 * 1000 * 1000) + //65GB + #define SP_MAX_ALLOC (65ll * 1000 * 1000 * 1000) #endif #define ALIGN_CACHE 64 -#define ALIGN_PAGE 4096 -void *sp_malloc(size_t size, size_t count, size_t align); -void *sp_calloc(size_t size, size_t count, size_t align); +#define ALIGN_PAGE 4096 +void *sp_malloc (size_t size, size_t count, size_t align); +void *sp_calloc (size_t size, size_t count, size_t align); long long get_mem_used(); #endif diff --git a/clients/spatter/unused.h b/clients/spatter/unused.h new file mode 100644 index 0000000..52641b6 --- /dev/null +++ b/clients/spatter/unused.h @@ -0,0 +1,12 @@ +#ifndef UNUSED_H +#define UNUSED_H +// Use this for specifying unused variables in a function declaration +#ifdef __GNUC__ +# define UNUSED(x) UNUSED_ ## x __attribute__((__unused__)) +#else +# define UNUSED(x) UNUSED_ ## x +#endif + +// Use this for suppressing warnings when variables are sometimes used +#define UNUSED_VAR(expr) do { (void)(expr); } while (0) +#endif \ No newline at end of file