Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ Bug fixes
----------
- Incorrect calculations for non-native endian data [#191]
- Workaround for GNU Assembler bug causing incorrect calculations [#196]
- Only report runtime isa support if we also have compiler support [#200]



Expand Down
2 changes: 2 additions & 0 deletions common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,8 @@ ifeq ($(DO_CHECKS), 1)
CFLAGS += -xCORE-AVX2
endif

CFLAGS += -DGAS_BUG_DISABLE_AVX512

ifneq ($(GAS_BUG_WARNING_PRINTED),1)
$(warning $(ccred)DISABLING AVX-512 SUPPORT DUE TO GNU ASSEMBLER BUG. UPGRADE TO BINUTILS >=2.32 TO FIX THIS.$(ccreset))
endif
Expand Down
79 changes: 40 additions & 39 deletions mocks/DDrppi_mocks/countpairs_rp_pi_mocks_impl.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -114,63 +114,63 @@ countpairs_mocks_func_ptr_DOUBLE countpairs_rp_pi_mocks_driver_DOUBLE(const stru

/* Array of function pointers */
countpairs_mocks_func_ptr_DOUBLE allfunctions[] = {
#ifdef __AVX512F__
countpairs_rp_pi_mocks_avx512_intrinsics_DOUBLE,
countpairs_rp_pi_mocks_fallback_DOUBLE,
#ifdef __SSE4_2__
countpairs_rp_pi_mocks_sse_intrinsics_DOUBLE,
#endif
#ifdef __AVX__
countpairs_rp_pi_mocks_avx_intrinsics_DOUBLE,
countpairs_rp_pi_mocks_avx_intrinsics_DOUBLE,
#endif
#ifdef __SSE4_2__
countpairs_rp_pi_mocks_sse_intrinsics_DOUBLE,
#ifdef __AVX512F__
countpairs_rp_pi_mocks_avx512_intrinsics_DOUBLE,
#endif
countpairs_rp_pi_mocks_fallback_DOUBLE
};

const int num_functions = sizeof(allfunctions)/sizeof(void *);
const int fallback_offset = num_functions - 1;
const int fallback_index = 0;
#if defined(__AVX512F__) || defined(__AVX__) || defined(__SSE4_2__)
const int highest_isa = instrset_detect();
const int highest_isa = get_max_usable_isa();
#endif
int curr_offset = 0;
int curr_index = 0;

/* Check for AVX512F support */
int avx512_offset = fallback_offset;
#ifdef __AVX512F__
avx512_offset = highest_isa >= 9 ? curr_offset:fallback_offset;
curr_offset++;
/* Is the SSE function supported at runtime and enabled at compile-time?*/
int sse_index = curr_index;
#ifdef __SSE4_2__
curr_index++;
if(highest_isa >= SSE42) sse_index = curr_index;
#endif

/* Now check if AVX is supported by the CPU */
int avx_offset = fallback_offset;
int avx_index = curr_index;
#ifdef __AVX__
avx_offset = highest_isa >= 7 ? curr_offset:fallback_offset;
curr_offset++;
curr_index++;
if(highest_isa >= AVX) avx_index = curr_index;
#endif

/* Is the SSE function supported at runtime and enabled at compile-time?*/
int sse_offset = fallback_offset;
#ifdef __SSE4_2__
sse_offset = highest_isa >= 6 ? curr_offset:fallback_offset;
curr_offset++;
/* Check for AVX512F support */
int avx512_index = curr_index;
#ifdef __AVX512F__
curr_index++;
if(highest_isa >= AVX512F) avx512_index = curr_index;
#endif
if( curr_offset != fallback_offset) {
fprintf(stderr,"ERROR: Bug in code (current offset = %d *should equal* fallback function offset = %d)\n",
curr_offset, fallback_offset);

if( curr_index != num_functions-1) {
fprintf(stderr,"ERROR: Bug in code (current index = %d *should equal* num_functions-1 = %d-1)\n",
curr_index, num_functions);
return NULL;
}

int function_dispatch=0;
int function_dispatch = num_functions-1; //Set default to fastest available
/* Check that cpu supports feature */
if(options->instruction_set >= 0) {
switch(options->instruction_set) {
case(AVX512F):function_dispatch=avx512_offset;break;
case(AVX512F):function_dispatch=avx512_index;break;
case(AVX2):
case(AVX):function_dispatch=avx_offset;break;
case(SSE42): function_dispatch=sse_offset;break;
default:function_dispatch=fallback_offset;break;
case(AVX):function_dispatch=avx_index;break;
case(SSE42):function_dispatch=sse_index;break;
default:function_dispatch=fallback_index;break;
}
}

if(function_dispatch >= num_functions) {
fprintf(stderr,"In %s> ERROR: Could not resolve the correct function.\n Function index = %d must lie between [0, %d)\n",
__FUNCTION__, function_dispatch, num_functions);
Expand All @@ -180,17 +180,18 @@ countpairs_mocks_func_ptr_DOUBLE countpairs_rp_pi_mocks_driver_DOUBLE(const stru
old_isa = options->instruction_set;

if(options->verbose){
// This must be first (AVX/SSE may be aliased to fallback)
if(function_dispatch == fallback_offset){
// Must be ordered low to high, since higher ISA may be aliased to lower ones
if(function_dispatch == fallback_index){
fprintf(stderr,"Using fallback kernel\n");
} else if(function_dispatch == avx512_offset){
fprintf(stderr,"Using AVX512 kernel\n");
} else if(function_dispatch == avx_offset){
fprintf(stderr,"Using AVX kernel\n");
} else if(function_dispatch == sse_offset){
} else if(function_dispatch == sse_index){
fprintf(stderr,"Using SSE kernel\n");
} else if(function_dispatch == avx_index){
fprintf(stderr,"Using AVX kernel\n");
} else if(function_dispatch == avx512_index){
fprintf(stderr,"Using AVX512 kernel\n");
} else {
printf("Unknown kernel!\n");
fprintf(stderr,"Unknown kernel!\n");
return NULL;
}
}

Expand Down
74 changes: 37 additions & 37 deletions mocks/DDsmu_mocks/countpairs_s_mu_mocks_impl.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -115,63 +115,63 @@ countpairs_mocks_func_ptr_DOUBLE countpairs_s_mu_mocks_driver_DOUBLE(const struc

/* Array of function pointers */
countpairs_mocks_func_ptr_DOUBLE allfunctions[] = {
#ifdef __AVX512F__
countpairs_s_mu_mocks_avx512_intrinsics_DOUBLE,
countpairs_s_mu_mocks_fallback_DOUBLE,
#ifdef __SSE4_2__
countpairs_s_mu_mocks_sse_intrinsics_DOUBLE,
#endif
#ifdef __AVX__
countpairs_s_mu_mocks_avx_intrinsics_DOUBLE,
#endif
#ifdef __SSE4_2__
countpairs_s_mu_mocks_sse_intrinsics_DOUBLE,
#ifdef __AVX512F__
countpairs_s_mu_mocks_avx512_intrinsics_DOUBLE,
#endif
countpairs_s_mu_mocks_fallback_DOUBLE
};

const int num_functions = sizeof(allfunctions)/sizeof(void *);
const int fallback_offset = num_functions - 1;
const int fallback_index = 0;
#if defined(__AVX512F__) || defined(__AVX__) || defined(__SSE4_2__)
const int highest_isa = instrset_detect();
const int highest_isa = get_max_usable_isa();
#endif
int curr_offset = 0;
int curr_index = 0;

/* Check for AVX512F support */
int avx512_offset = fallback_offset;
#ifdef __AVX512F__
avx512_offset = highest_isa >= 9 ? curr_offset:fallback_offset;
curr_offset++;
/* Is the SSE function supported at runtime and enabled at compile-time?*/
int sse_index = curr_index;
#ifdef __SSE4_2__
curr_index++;
if(highest_isa >= SSE42) sse_index = curr_index;
#endif

/* Now check if AVX is supported by the CPU */
int avx_offset = fallback_offset;
int avx_index = curr_index;
#ifdef __AVX__
avx_offset = highest_isa >= 7 ? curr_offset:fallback_offset;
curr_offset++;
curr_index++;
if(highest_isa >= AVX) avx_index = curr_index;
#endif

/* Is the SSE function supported at runtime and enabled at compile-time?*/
int sse_offset = fallback_offset;
#ifdef __SSE4_2__
sse_offset = highest_isa >= 6 ? curr_offset:fallback_offset;
curr_offset++;
/* Check for AVX512F support */
int avx512_index = curr_index;
#ifdef __AVX512F__
curr_index++;
if(highest_isa >= AVX512F) avx512_index = curr_index;
#endif
if( curr_offset != fallback_offset) {
fprintf(stderr,"ERROR: Bug in code (current offset = %d *should equal* fallback function offset = %d)\n",
curr_offset, fallback_offset);

if( curr_index != num_functions-1) {
fprintf(stderr,"ERROR: Bug in code (current index = %d *should equal* num_functions-1 = %d-1)\n",
curr_index, num_functions);
return NULL;
}

int function_dispatch=0;
int function_dispatch = num_functions-1; //Set default to fastest available
/* Check that cpu supports feature */
if(options->instruction_set >= 0) {
switch(options->instruction_set) {
case(AVX512F):function_dispatch=avx512_offset;break;
case(AVX512F):function_dispatch=avx512_index;break;
case(AVX2):
case(AVX):function_dispatch=avx_offset;break;
case(SSE42): function_dispatch=sse_offset;break;
default:function_dispatch=fallback_offset;break;
case(AVX):function_dispatch=avx_index;break;
case(SSE42):function_dispatch=sse_index;break;
default:function_dispatch=fallback_index;break;
}
}

if(function_dispatch >= num_functions) {
fprintf(stderr,"In %s> ERROR: Could not resolve the correct function.\n Function index = %d must lie between [0, %d)\n",
__FUNCTION__, function_dispatch, num_functions);
Expand All @@ -181,15 +181,15 @@ countpairs_mocks_func_ptr_DOUBLE countpairs_s_mu_mocks_driver_DOUBLE(const struc
old_isa = options->instruction_set;

if(options->verbose){
// This must be first (AVX/SSE may be aliased to fallback)
if(function_dispatch == fallback_offset){
// Must be ordered low to high, since higher ISA may be aliased to lower ones
if(function_dispatch == fallback_index){
fprintf(stderr,"Using fallback kernel\n");
} else if(function_dispatch == avx512_offset){
fprintf(stderr,"Using AVX512 kernel\n");
} else if(function_dispatch == avx_offset){
fprintf(stderr,"Using AVX kernel\n");
} else if(function_dispatch == sse_offset){
} else if(function_dispatch == sse_index){
fprintf(stderr,"Using SSE kernel\n");
} else if(function_dispatch == avx_index){
fprintf(stderr,"Using AVX kernel\n");
} else if(function_dispatch == avx512_index){
fprintf(stderr,"Using AVX512 kernel\n");
} else {
fprintf(stderr,"Unknown kernel!\n");
return NULL;
Expand Down
78 changes: 38 additions & 40 deletions mocks/DDtheta_mocks/countpairs_theta_mocks_impl.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -94,63 +94,61 @@ countpairs_theta_mocks_func_ptr_DOUBLE countpairs_theta_mocks_driver_DOUBLE(cons

/* Array of function pointers */
countpairs_theta_mocks_func_ptr_DOUBLE allfunctions[] = {
#ifdef __AVX512F__
countpairs_theta_mocks_avx512_instrinsics_DOUBLE,
countpairs_theta_mocks_fallback_DOUBLE,
#ifdef __SSE4_2__
countpairs_theta_mocks_sse_intrinsics_DOUBLE,
#endif
#ifdef __AVX__
countpairs_theta_mocks_avx_instrinsics_DOUBLE,
countpairs_theta_mocks_avx_intrinsics_DOUBLE,
#endif
#ifdef __SSE4_2__
countpairs_theta_mocks_sse_instrinsics_DOUBLE,
#ifdef __AVX512F__
countpairs_theta_mocks_avx512_intrinsics_DOUBLE,
#endif
countpairs_theta_mocks_fallback_DOUBLE
};

const int num_functions = sizeof(allfunctions)/sizeof(void *);
const int fallback_offset = num_functions - 1;
const int fallback_index = 0;
#if defined(__AVX512F__) || defined(__AVX__) || defined(__SSE4_2__)
/* Since highest_isa is only used in cases where SSE4.2 or AVX is defined,
without this protection, there will be an unnecessary function call
and an unused variable compiler warning. */
const int highest_isa = instrset_detect();
const int highest_isa = get_max_usable_isa();
#endif
int curr_offset = 0;
int curr_index = 0;

/* Check for AVX512F support */
int avx512_offset = fallback_offset;
#ifdef __AVX512F__
avx512_offset = highest_isa >= 9 ? curr_offset:fallback_offset;
curr_offset++;
/* Is the SSE function supported at runtime and enabled at compile-time?*/
int sse_index = curr_index;
#ifdef __SSE4_2__
curr_index++;
if(highest_isa >= SSE42) sse_index = curr_index;
#endif

/* Now check if AVX is supported by the CPU */
int avx_offset = fallback_offset;
int avx_index = curr_index;
#ifdef __AVX__
avx_offset = highest_isa >= 7 ? curr_offset:fallback_offset;
curr_offset++;
curr_index++;
if(highest_isa >= AVX) avx_index = curr_index;
#endif

/* Is the SSE function supported at runtime and enabled at compile-time?*/
int sse_offset = fallback_offset;
#ifdef __SSE4_2__
sse_offset = highest_isa >= 6 ? curr_offset:fallback_offset;
curr_offset++;
/* Check for AVX512F support */
int avx512_index = curr_index;
#ifdef __AVX512F__
curr_index++;
if(highest_isa >= AVX512F) avx512_index = curr_index;
#endif
if( curr_offset != fallback_offset) {
fprintf(stderr,"ERROR: Bug in code (current offset = %d *should equal* fallback function offset = %d)\n",
curr_offset, fallback_offset);

if( curr_index != num_functions-1) {
fprintf(stderr,"ERROR: Bug in code (current index = %d *should equal* num_functions-1 = %d-1)\n",
curr_index, num_functions);
return NULL;
}

int function_dispatch=0;
int function_dispatch = num_functions-1; //Set default to fastest available
/* Check that cpu supports feature */
if(options->instruction_set >= 0) {
switch(options->instruction_set) {
case(AVX512F):function_dispatch=avx512_offset;break;
case(AVX512F):function_dispatch=avx512_index;break;
case(AVX2):
case(AVX):function_dispatch=avx_offset;break;
case(SSE42):function_dispatch=sse_offset;break;
default:function_dispatch=fallback_offset;break;
case(AVX):function_dispatch=avx_index;break;
case(SSE42):function_dispatch=sse_index;break;
default:function_dispatch=fallback_index;break;
}
}
if(function_dispatch >= num_functions) {
Expand All @@ -162,15 +160,15 @@ countpairs_theta_mocks_func_ptr_DOUBLE countpairs_theta_mocks_driver_DOUBLE(cons
old_isa = options->instruction_set;

if(options->verbose){
// This must be first (AVX/SSE may be aliased to fallback)
if(function_dispatch == fallback_offset){
// Must be ordered low to high, since higher ISA may be aliased to lower ones
if(function_dispatch == fallback_index){
fprintf(stderr,"Using fallback kernel\n");
} else if(function_dispatch == avx512_offset){
fprintf(stderr,"Using AVX512 kernel\n");
} else if(function_dispatch == avx_offset){
fprintf(stderr,"Using AVX kernel\n");
} else if(function_dispatch == sse_offset){
} else if(function_dispatch == sse_index){
fprintf(stderr,"Using SSE kernel\n");
} else if(function_dispatch == avx_index){
fprintf(stderr,"Using AVX kernel\n");
} else if(function_dispatch == avx512_index){
fprintf(stderr,"Using AVX512 kernel\n");
} else {
fprintf(stderr,"Unknown kernel!\n");
return NULL;
Expand Down
Loading