Skip to content

Commit

Permalink
Distinct between LIBXSMM_X86_AVX512_MIC and LIBXSMM_X86_AVX512_CORE, …
Browse files Browse the repository at this point in the history
…and removed LIBXSMM_X86_AVX512. We do not need this (at the moment) for the actual code generation (AFV512F level), however we already use it for certain platform defaults. Adjusted some functions to rely on integer value ("enumeration") in order to distinct the platform rather than comparing the platform name (string).
  • Loading branch information
hfp committed May 15, 2016
1 parent 2e7ff0b commit 3610639
Show file tree
Hide file tree
Showing 14 changed files with 75 additions and 51 deletions.
3 changes: 2 additions & 1 deletion include/libxsmm_typedefs.h
Expand Up @@ -44,7 +44,8 @@
#define LIBXSMM_X86_SSE4_2 1004
#define LIBXSMM_X86_AVX 1005
#define LIBXSMM_X86_AVX2 1006
#define LIBXSMM_X86_AVX512 1007
#define LIBXSMM_X86_AVX512_MIC 1007
#define LIBXSMM_X86_AVX512_CORE 1008


/** Flag enumeration which can be binary ORed. */
Expand Down
14 changes: 9 additions & 5 deletions src/generator_gemm_common.c
Expand Up @@ -173,8 +173,10 @@ void libxsmm_generator_gemm_init_micro_kernel_config_fullvector( libxsmm_micro_k
(strcmp( i_arch, "skx" ) == 0) ) {
if ((strcmp( i_arch, "knc" ) == 0)) {
io_micro_kernel_config->instruction_set = LIBXSMM_X86_IMCI;
} else if ((strcmp( i_arch, "knl" ) == 0)) {
io_micro_kernel_config->instruction_set = LIBXSMM_X86_AVX512_MIC;
} else {
io_micro_kernel_config->instruction_set = LIBXSMM_X86_AVX512;
io_micro_kernel_config->instruction_set = LIBXSMM_X86_AVX512_CORE;
}
io_micro_kernel_config->vector_reg_count = 32;
io_micro_kernel_config->use_masking_a_c = i_use_masking_a_c;
Expand Down Expand Up @@ -592,8 +594,9 @@ void libxsmm_generator_gemm_load_C( libxsmm_generated_code* io_gener
libxsmm_handle_error( io_generated_code, LIBXSMM_ERR_REG_BLOCK );
return;
}
} else if (i_micro_kernel_config->instruction_set == LIBXSMM_X86_IMCI ||
i_micro_kernel_config->instruction_set == LIBXSMM_X86_AVX512 ) {
} else if (i_micro_kernel_config->instruction_set == LIBXSMM_X86_IMCI ||
i_micro_kernel_config->instruction_set == LIBXSMM_X86_AVX512_MIC ||
i_micro_kernel_config->instruction_set == LIBXSMM_X86_AVX512_CORE ) {
if ( (i_n_blocking > 30) || (i_n_blocking < 1) || (i_m_blocking != i_micro_kernel_config->vector_length) ) {
libxsmm_handle_error( io_generated_code, LIBXSMM_ERR_REG_BLOCK );
return;
Expand Down Expand Up @@ -661,8 +664,9 @@ void libxsmm_generator_gemm_store_C( libxsmm_generated_code* io_gene
libxsmm_handle_error( io_generated_code, LIBXSMM_ERR_REG_BLOCK );
return;
}
} else if (i_micro_kernel_config->instruction_set == LIBXSMM_X86_IMCI ||
i_micro_kernel_config->instruction_set == LIBXSMM_X86_AVX512 ) {
} else if (i_micro_kernel_config->instruction_set == LIBXSMM_X86_IMCI ||
i_micro_kernel_config->instruction_set == LIBXSMM_X86_AVX512_MIC ||
i_micro_kernel_config->instruction_set == LIBXSMM_X86_AVX512_CORE ) {
if ( (i_n_blocking > 30) || (i_n_blocking < 1) || (i_m_blocking != i_micro_kernel_config->vector_length) ) {
libxsmm_handle_error( io_generated_code, LIBXSMM_ERR_REG_BLOCK );
return;
Expand Down
3 changes: 2 additions & 1 deletion src/generator_gemm_imci_avx512.c
Expand Up @@ -69,7 +69,8 @@ LIBXSMM_INLINE void libxsmm_generator_gemm_imci_avx512_kernel_initialize_mask( l
LIBXSMM_X86_INSTR_KMOV,
i_gp_reg_mapping->gp_reg_help_5,
LIBXSMM_X86_IMCI_AVX512_MASK );
} else if ( i_micro_kernel_config->instruction_set == LIBXSMM_X86_AVX512 ) {
} else if ( i_micro_kernel_config->instruction_set == LIBXSMM_X86_AVX512_MIC ||
i_micro_kernel_config->instruction_set == LIBXSMM_X86_AVX512_CORE ) {
libxsmm_x86_instruction_mask_move( io_generated_code,
LIBXSMM_X86_INSTR_KMOVW,
i_gp_reg_mapping->gp_reg_help_5,
Expand Down
9 changes: 5 additions & 4 deletions src/generator_x86_instructions.c
Expand Up @@ -316,7 +316,7 @@ void libxsmm_x86_instruction_vec_move( libxsmm_generated_code* io_generated_code
libxsmm_get_x86_gp_reg_name( i_gp_reg_base, l_gp_reg_base_name, 3 );
libxsmm_get_x86_instr_name( i_vmove_instr, l_instr_name, 15 );

if ( (i_instruction_set == LIBXSMM_X86_AVX512) && (i_use_masking != 0) ) {
if ( (i_instruction_set == LIBXSMM_X86_AVX512_MIC || i_instruction_set == LIBXSMM_X86_AVX512_CORE) && (i_use_masking != 0) ) {
/* build vmovpd/ps/sd/ss instruction, load use */
if ( i_is_store == 0 ) {
if ( io_generated_code->code_type == 0 ) {
Expand Down Expand Up @@ -784,9 +784,10 @@ void libxsmm_x86_instruction_vec_compute_mem( libxsmm_generated_code* io_generat
const unsigned int i_vec_reg_number_0,
const unsigned int i_vec_reg_number_1 ) {
/* @TODO add checks in debug mode */
if ( (i_instruction_set != LIBXSMM_X86_IMCI) &&
(i_instruction_set != LIBXSMM_X86_AVX512) &&
(i_use_broadcast != 0) ) {
if ( (i_instruction_set != LIBXSMM_X86_IMCI) &&
(i_instruction_set != LIBXSMM_X86_AVX512_MIC) &&
(i_instruction_set != LIBXSMM_X86_AVX512_CORE) &&
(i_use_broadcast != 0) ) {
libxsmm_handle_error( io_generated_code, LIBXSMM_ERR_NO_IMCI_AVX512_BCAST );
return;
}
Expand Down
50 changes: 29 additions & 21 deletions src/libxsmm.c
Expand Up @@ -573,7 +573,7 @@ LIBXSMM_INLINE LIBXSMM_RETARGETABLE internal_regentry* internal_init(void)
}
libxsmm_hash_init(internal_target_arch);
libxsmm_gemm_diff_init(internal_target_arch);
init_code = libxsmm_gemm_init(internal_target_archid, internal_prefetch);
init_code = libxsmm_gemm_init(internal_target_arch, internal_prefetch);
#if defined(__TRACE)
const char *const env_trace_init = getenv("LIBXSMM_TRACE");
if (EXIT_SUCCESS == init_code && 0 != env_trace_init) {
Expand Down Expand Up @@ -817,38 +817,42 @@ LIBXSMM_EXTERN_C LIBXSMM_RETARGETABLE int libxsmm_get_target_arch()
LIBXSMM_EXTERN_C LIBXSMM_RETARGETABLE void libxsmm_set_target_arch(int archid)
{
switch (archid) {
case LIBXSMM_X86_AVX512: {
internal_target_arch = LIBXSMM_X86_AVX512;
internal_target_archid = "knl"; /* "skx" is fine too */
case LIBXSMM_X86_AVX512_CORE: {
internal_target_arch = archid;
internal_target_archid = "skx";
} break;
case LIBXSMM_X86_AVX512_MIC: {
internal_target_arch = archid;
internal_target_archid = "knl";
} break;
case LIBXSMM_X86_AVX2: {
internal_target_arch = LIBXSMM_X86_AVX2;
internal_target_arch = archid;
internal_target_archid = "hsw";
} break;
case LIBXSMM_X86_AVX: {
internal_target_arch = LIBXSMM_X86_AVX;
internal_target_arch = archid;
internal_target_archid = "snb";
} break;
case LIBXSMM_X86_SSE4_2: {
internal_target_arch = LIBXSMM_X86_SSE4_2;
internal_target_arch = archid;
internal_target_archid = "wsm";
} break;
case LIBXSMM_X86_SSE4_1: {
internal_target_arch = LIBXSMM_X86_SSE4_2;
internal_target_archid = "sse4_1";
internal_target_arch = archid;
internal_target_archid = "sse4";
} break;
case LIBXSMM_X86_SSE3: {
internal_target_arch = LIBXSMM_X86_SSE3;
internal_target_arch = archid;
internal_target_archid = "sse3";
} break;
case LIBXSMM_TARGET_ARCH_GENERIC: {
internal_target_arch = archid;
internal_target_archid = "generic";
} break;
default: if (LIBXSMM_X86_GENERIC <= archid) {
internal_target_arch = LIBXSMM_X86_GENERIC;
internal_target_archid = "x86";
}
else if (LIBXSMM_TARGET_ARCH_GENERIC == 1) {
internal_target_arch = LIBXSMM_TARGET_ARCH_GENERIC;
internal_target_archid = "generic";
}
else {
internal_target_arch = LIBXSMM_TARGET_ARCH_UNKNOWN;
internal_target_archid = "unknown";
Expand Down Expand Up @@ -898,9 +902,13 @@ LIBXSMM_EXTERN_C LIBXSMM_RETARGETABLE void libxsmm_set_target_archid(const char*
else if (1 < jit) { /* suppress libxsmm_cpuid_x86 and override archid */
libxsmm_set_target_arch(LIBXSMM_X86_GENERIC + jit);
}
else if (0 == strcmp("knl", name) || 0 == strcmp("skx", name) || 0 == strcmp("avx3", name) || 0 == strcmp("avx512", name) || 0 == strcmp("avx-512", name)) {
internal_target_arch = LIBXSMM_X86_AVX512;
internal_target_archid = name;
else if (0 == strcmp("skx", name) || 0 == strcmp("avx3", name) || 0 == strcmp("avx512", name)) {
internal_target_arch = LIBXSMM_X86_AVX512_CORE;
internal_target_archid = "skx";
}
else if (0 == strcmp("knl", name) || 0 == strcmp("mic2", name)) {
internal_target_arch = LIBXSMM_X86_AVX512_MIC;
internal_target_archid = "knl";
}
else if (0 == strcmp("hsw", name) || 0 == strcmp("avx2", name)) {
internal_target_arch = LIBXSMM_X86_AVX2;
Expand All @@ -916,19 +924,19 @@ LIBXSMM_EXTERN_C LIBXSMM_RETARGETABLE void libxsmm_set_target_archid(const char*
}
else if (0 == strcmp("sse4_1", name)) {
internal_target_arch = LIBXSMM_X86_SSE4_1;
internal_target_archid = name;
internal_target_archid = "sse4";
}
else if (0 == strcmp("sse3", name)) {
internal_target_arch = LIBXSMM_X86_SSE3;
internal_target_archid = name;
internal_target_archid = "sse3";
}
else if (0 == strcmp("x86", name)) {
internal_target_arch = LIBXSMM_X86_GENERIC;
internal_target_archid = name;
internal_target_archid = "x86";
}
else if (0 == strcmp("generic", name)) {
internal_target_arch = LIBXSMM_TARGET_ARCH_GENERIC;
internal_target_archid = name;
internal_target_archid = "generic";
}
}
if (0 == internal_target_archid) {
Expand Down
3 changes: 2 additions & 1 deletion src/libxsmm.template.f
Expand Up @@ -122,7 +122,8 @@ MODULE LIBXSMM
& LIBXSMM_X86_SSE4_2 = 1004, &
& LIBXSMM_X86_AVX = 1005, &
& LIBXSMM_X86_AVX2 = 1006, &
& LIBXSMM_X86_AVX512 = 1007
& LIBXSMM_X86_AVX512_MIC = 1007, &
& LIBXSMM_X86_AVX512_CORE = 1008

! Type of a function specialized for a given parameter set.
ABSTRACT INTERFACE
Expand Down
4 changes: 2 additions & 2 deletions src/libxsmm_cpuid_x86.c
Expand Up @@ -67,13 +67,13 @@ LIBXSMM_EXTERN_C LIBXSMM_RETARGETABLE int libxsmm_cpuid_x86(const char** archid)
/* AVX512F(0x00010000), AVX512CD(0x10000000), AVX512PF(0x04000000),
AVX512ER(0x08000000) */
if (0x1C010000 == (0x1C010000 & ebx)) {
target_arch = LIBXSMM_X86_AVX512;
target_arch = LIBXSMM_X86_AVX512_MIC;
if (archid) *archid = "knl";
}
/* AVX512F(0x00010000), AVX512CD(0x10000000), AVX512DQ(0x00020000),
AVX512BW(0x40000000), AVX512VL(0x80000000) */
else if (0xD0030000 == (0xD0030000 & ebx)) {
target_arch = LIBXSMM_X86_AVX512;
target_arch = LIBXSMM_X86_AVX512_CORE;
if (archid) *archid = "skx";
}
}
Expand Down
7 changes: 3 additions & 4 deletions src/libxsmm_gemm.c
Expand Up @@ -41,7 +41,6 @@
#endif
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#if defined(LIBXSMM_OFFLOAD_TARGET)
# pragma offload_attribute(pop)
#endif
Expand All @@ -68,7 +67,7 @@ LIBXSMM_RETARGETABLE int libxsmm_internal_gemm_omp = 2;
LIBXSMM_RETARGETABLE int libxsmm_internal_gemm = 0;


LIBXSMM_EXTERN_C LIBXSMM_RETARGETABLE void libxsmm_gemm_configure(const char* archid, int prefetch,
LIBXSMM_EXTERN_C LIBXSMM_RETARGETABLE void libxsmm_gemm_configure(int archid, int prefetch,
libxsmm_sgemm_function sgemm_function, libxsmm_dgemm_function dgemm_function)
{
int config = 0;
Expand All @@ -79,7 +78,7 @@ LIBXSMM_EXTERN_C LIBXSMM_RETARGETABLE void libxsmm_gemm_configure(const char* ar
#if defined(__MIC__)
LIBXSMM_UNUSED(archid);
#else
if (0 == strcmp("knl", archid))
if (LIBXSMM_X86_AVX512_MIC == archid)
#endif
{
libxsmm_internal_gemm_nthreads_per_core = 4;
Expand Down Expand Up @@ -137,7 +136,7 @@ LIBXSMM_EXTERN_C LIBXSMM_RETARGETABLE void libxsmm_gemm_configure(const char* ar
}


LIBXSMM_EXTERN_C LIBXSMM_RETARGETABLE LIBXSMM_GEMM_WEAK_DLIB int libxsmm_gemm_init(const char* archid, int prefetch)
LIBXSMM_EXTERN_C LIBXSMM_RETARGETABLE LIBXSMM_GEMM_WEAK_DLIB int libxsmm_gemm_init(int archid, int prefetch)
{
/* internal pre-initialization step */
libxsmm_gemm_configure(archid, prefetch, 0/*auto-discovered*/, 0/*auto-discovered*/);
Expand Down
4 changes: 2 additions & 2 deletions src/libxsmm_gemm.h
Expand Up @@ -53,15 +53,15 @@ typedef LIBXSMM_RETARGETABLE void (*libxsmm_dgemm_function)(
* INTERNAL pre-initialization step called by libxsmm_gemm_init,
* e.g. configures the tile sizes for multithreaded GEMM functions.
*/
LIBXSMM_EXTERN_C LIBXSMM_RETARGETABLE void libxsmm_gemm_configure(const char* archid, int prefetch,
LIBXSMM_EXTERN_C LIBXSMM_RETARGETABLE void libxsmm_gemm_configure(int archid, int prefetch,
/** If NULL is given, the routine attempts to find the SGEMM function. */
libxsmm_sgemm_function sgemm_function,
/** If NULL is given, the routine attempts to find the DGEMM function. */
libxsmm_dgemm_function dgemm_function);

/** Provides GEMM functions available via BLAS; NOT thread-safe. */
LIBXSMM_EXTERN_C LIBXSMM_RETARGETABLE int libxsmm_gemm_init(
const char* archid, int prefetch/*default prefetch strategy*/);
int archid, int prefetch/*default prefetch strategy*/);

/** Finalizes the gemm facility; NOT thread-safe. */
LIBXSMM_EXTERN_C LIBXSMM_RETARGETABLE void libxsmm_gemm_finalize(void);
Expand Down
16 changes: 12 additions & 4 deletions src/libxsmm_gemm_diff.c
Expand Up @@ -45,8 +45,10 @@
#endif

#if defined(LIBXSMM_MAX_STATIC_TARGET_ARCH) && (28 == LIBXSMM_GEMM_DESCRIPTOR_SIZE /*|| any other implemented size*/)
# if (LIBXSMM_X86_AVX512 <= LIBXSMM_MAX_STATIC_TARGET_ARCH)
# define LIBXSMM_GEMM_DIFF_AVX512
# if (LIBXSMM_X86_AVX512_CORE <= LIBXSMM_MAX_STATIC_TARGET_ARCH)
# define LIBXSMM_GEMM_DIFF_AVX512_CORE
# elif (LIBXSMM_X86_AVX512_MIC <= LIBXSMM_MAX_STATIC_TARGET_ARCH)
# define LIBXSMM_GEMM_DIFF_AVX512_MIC
# elif (LIBXSMM_X86_AVX2 <= LIBXSMM_MAX_STATIC_TARGET_ARCH)
# define LIBXSMM_GEMM_DIFF_AVX2
# elif (LIBXSMM_X86_AVX <= LIBXSMM_MAX_STATIC_TARGET_ARCH)
Expand All @@ -68,7 +70,11 @@ LIBXSMM_EXTERN_C LIBXSMM_RETARGETABLE void libxsmm_gemm_diff_init(int target_arc
internal_gemm_diffn_function = libxsmm_gemm_diffn_imci;
internal_gemm_diff_function = libxsmm_gemm_diff_imci;
#else
if (LIBXSMM_X86_AVX512 <= target_arch) {
if (LIBXSMM_X86_AVX512_CORE <= target_arch) {
internal_gemm_diffn_function = libxsmm_gemm_diffn_avx512;
internal_gemm_diff_function = libxsmm_gemm_diff_avx2;
}
else if (LIBXSMM_X86_AVX512_MIC <= target_arch) {
internal_gemm_diffn_function = libxsmm_gemm_diffn_avx512;
internal_gemm_diff_function = libxsmm_gemm_diff_avx2;
}
Expand Down Expand Up @@ -240,7 +246,9 @@ unsigned int libxsmm_gemm_diffn(const libxsmm_gemm_descriptor* reference, const
/* attempt to rely on static code path avoids to rely on capability of inlining pointer-based function call */
#if defined(LIBXSMM_GEMM_DIFF_SW) && (0 != LIBXSMM_GEMM_DIFF_SW)
return libxsmm_gemm_diffn_sw(reference, descs, hint, ndescs, nbytes);
#elif defined(LIBXSMM_STATIC_TARGET_ARCH) && (LIBXSMM_X86_AVX512 <= LIBXSMM_STATIC_TARGET_ARCH)
#elif defined(LIBXSMM_STATIC_TARGET_ARCH) && (LIBXSMM_X86_AVX512_CORE <= LIBXSMM_STATIC_TARGET_ARCH)
return libxsmm_gemm_diffn_avx512(reference, descs, hint, ndescs, nbytes);
#elif defined(LIBXSMM_STATIC_TARGET_ARCH) && (LIBXSMM_X86_AVX512_MIC <= LIBXSMM_STATIC_TARGET_ARCH)
return libxsmm_gemm_diffn_avx512(reference, descs, hint, ndescs, nbytes);
#elif defined(LIBXSMM_STATIC_TARGET_ARCH) && (LIBXSMM_X86_AVX2 <= LIBXSMM_STATIC_TARGET_ARCH)
return libxsmm_gemm_diffn_avx2(reference, descs, hint, ndescs, nbytes);
Expand Down
2 changes: 1 addition & 1 deletion src/libxsmm_gemm_extomp.c
Expand Up @@ -196,7 +196,7 @@
#if defined(LIBXSMM_GEMM_EXTWRAP) && !defined(__STATIC)

/* implementation variant for non-static linkage; overrides weak libxsmm_gemm_init in libxsmm_gemm.c */
LIBXSMM_EXTERN_C LIBXSMM_RETARGETABLE int libxsmm_gemm_init(const char* archid, int prefetch)
LIBXSMM_EXTERN_C LIBXSMM_RETARGETABLE int libxsmm_gemm_init(int archid, int prefetch)
{
union { const void* pv; libxsmm_sgemm_function pf; } internal_sgemm = { NULL };
union { const void* pv; libxsmm_dgemm_function pf; } internal_dgemm = { NULL };
Expand Down
7 changes: 4 additions & 3 deletions src/libxsmm_intrinsics_x86.h
Expand Up @@ -37,7 +37,8 @@
# define LIBXSMM_STATIC_TARGET_ARCH LIBXSMM_X86_IMCI
#else
# if defined(__AVX512F__)
# define LIBXSMM_STATIC_TARGET_ARCH LIBXSMM_X86_AVX512
# define LIBXSMM_STATIC_TARGET_ARCH LIBXSMM_X86_AVX512_CORE
# define LIBXSMM_STATIC_TARGET_ARCH LIBXSMM_X86_AVX512_MIC
# elif defined(__AVX2__)
# define LIBXSMM_STATIC_TARGET_ARCH LIBXSMM_X86_AVX2
# elif defined(__AVX__)
Expand All @@ -52,7 +53,7 @@
# define LIBXSMM_STATIC_TARGET_ARCH LIBXSMM_X86_GENERIC
# endif
# if defined(__INTEL_COMPILER) /*TODO: version check*/
# define LIBXSMM_MAX_STATIC_TARGET_ARCH LIBXSMM_X86_AVX512
# define LIBXSMM_MAX_STATIC_TARGET_ARCH LIBXSMM_X86_AVX512_CORE
# include <immintrin.h>
# elif defined(_MSC_VER) /*TODO: version check*/
# define LIBXSMM_MAX_STATIC_TARGET_ARCH LIBXSMM_X86_AVX2
Expand Down Expand Up @@ -106,7 +107,7 @@
# if (LIBXSMM_VERSION3(4, 9, 0) <= LIBXSMM_VERSION3(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__)) \
&& 0 /*AVX-512 support in GCC appears to be incomplete (missing at least _mm512_mask_reduce_or_epi32)*/
# define LIBXSMM_INTRINSICS LIBXSMM_ATTRIBUTE(target("sse3,sse4.1,sse4.2,avx,avx2,avx512f"))
# define LIBXSMM_MAX_STATIC_TARGET_ARCH LIBXSMM_X86_AVX512
# define LIBXSMM_MAX_STATIC_TARGET_ARCH LIBXSMM_X86_AVX512_CORE
# pragma GCC push_options
# pragma GCC target("sse3,sse4.1,sse4.2,avx,avx2,avx512f")
# include <immintrin.h>
Expand Down
2 changes: 1 addition & 1 deletion tests/descriptor.c
Expand Up @@ -160,7 +160,7 @@ int main()
result = 26;
}
}
if (EXIT_SUCCESS == result && LIBXSMM_X86_AVX512 <= cpuid) {
if (EXIT_SUCCESS == result && LIBXSMM_X86_AVX512_MIC/*incl. LIBXSMM_X86_AVX512_CORE*/) {
if (1 != libxsmm_gemm_diffn_avx512(&a.descriptor, &descs[0].desc, 0/*hint*/,
sizeof(descs) / sizeof(*descs), sizeof(*descs)))
{
Expand Down
2 changes: 1 addition & 1 deletion version.txt
@@ -1 +1 @@
master-1.4.1-54
master-1.4.1-55

0 comments on commit 3610639

Please sign in to comment.