diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index 6ae91f5cc32d6..6c7af9bf2e245 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -200,6 +200,7 @@ XX(jl_get_binding_wr) \ XX(jl_get_cpu_name) \ XX(jl_get_cpu_features) \ + XX(jl_cpu_has_fma) \ XX(jl_get_current_task) \ XX(jl_get_default_sysimg_path) \ XX(jl_get_excstack) \ diff --git a/src/llvm-cpufeatures.cpp b/src/llvm-cpufeatures.cpp index 033a43720c96d..2539c5cd2e37c 100644 --- a/src/llvm-cpufeatures.cpp +++ b/src/llvm-cpufeatures.cpp @@ -59,7 +59,7 @@ static bool have_fma(Function &intr, Function &caller, const Triple &TT) JL_NOTS StringRef FS = FSAttr.isValid() ? FSAttr.getValueAsString() : jl_ExecutionEngine->getTargetFeatureString(); - SmallVector Features; + SmallVector Features; FS.split(Features, ','); for (StringRef Feature : Features) if (TT.isARM()) { @@ -67,7 +67,7 @@ static bool have_fma(Function &intr, Function &caller, const Triple &TT) JL_NOTS return typ == "f32" || typ == "f64"; else if (Feature == "+vfp4sp") return typ == "f32"; - } else { + } else if (TT.isX86()) { if (Feature == "+fma" || Feature == "+fma4") return typ == "f32" || typ == "f64"; } diff --git a/src/processor.h b/src/processor.h index c22f8cff34a63..a4c8deb9a8796 100644 --- a/src/processor.h +++ b/src/processor.h @@ -226,6 +226,8 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void); // Return the features of the host CPU as a julia string. JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void); // Dump the name and feature set of the host CPU +JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits); +// Check if the CPU has native FMA instructions; // For debugging only JL_DLLEXPORT void jl_dump_host_cpu(void); JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char* data); diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp index 1cb209dbbcd62..6a09e00cf23e7 100644 --- a/src/processor_arm.cpp +++ b/src/processor_arm.cpp @@ -1808,6 +1808,22 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void) return jl_cstr_to_string(jl_get_cpu_features_llvm().c_str()); } +JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits) +{ +#ifdef _CPU_AARCH64_ + return jl_true; +#else + TargetData target = jit_targets.front(); + FeatureList features = target.en.features; + if (bits == 32 && test_nbit(features, Feature::vfp4sp)) + return jl_true; + else if ((bits == 64 || bits == 32) && test_nbit(features, Feature::vfp4)) + return jl_true; + else + return jl_false; +#endif +} + jl_image_t jl_init_processor_sysimg(void *hdl) { if (!jit_targets.empty()) diff --git a/src/processor_fallback.cpp b/src/processor_fallback.cpp index 8c343aa982470..399d31af05ebf 100644 --- a/src/processor_fallback.cpp +++ b/src/processor_fallback.cpp @@ -172,6 +172,11 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void) return jl_cstr_to_string(jl_get_cpu_features_llvm().c_str()); } +JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits) +{ + return jl_false; // Match behaviour of have_fma in src/llvm-cpufeatures.cpp (assume false) +} + JL_DLLEXPORT void jl_dump_host_cpu(void) { jl_safe_printf("CPU: %s\n", host_cpu_name().c_str()); diff --git a/src/processor_x86.cpp b/src/processor_x86.cpp index 361a8c6167541..3f8c62acaed11 100644 --- a/src/processor_x86.cpp +++ b/src/processor_x86.cpp @@ -4,6 +4,7 @@ // CPUID +#include "julia.h" extern "C" JL_DLLEXPORT void jl_cpuid(int32_t CPUInfo[4], int32_t InfoType) { asm volatile ( @@ -1062,6 +1063,16 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void) return jl_cstr_to_string(jl_get_cpu_features_llvm().c_str()); } +JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits) +{ + TargetData target = jit_targets.front(); + FeatureList features = target.en.features; + if ((bits == 32 || bits == 64) && (test_nbit(features, Feature::fma) || test_nbit(features, Feature::fma4))) + return jl_true; + else + return jl_false; +} + jl_image_t jl_init_processor_sysimg(void *hdl) { if (!jit_targets.empty()) diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c index 7b4ddcd6e8b29..4569c2081ae88 100644 --- a/src/runtime_intrinsics.c +++ b/src/runtime_intrinsics.c @@ -1671,10 +1671,15 @@ un_fintrinsic(trunc_float,trunc_llvm) un_fintrinsic(rint_float,rint_llvm) un_fintrinsic(sqrt_float,sqrt_llvm) un_fintrinsic(sqrt_float,sqrt_llvm_fast) +jl_value_t *jl_cpu_has_fma(int bits); JL_DLLEXPORT jl_value_t *jl_have_fma(jl_value_t *typ) { - JL_TYPECHK(have_fma, datatype, typ); - // TODO: run-time feature check? - return jl_false; + JL_TYPECHK(have_fma, datatype, typ); // TODO what about float16/bfloat16? + if (typ == (jl_value_t*)jl_float32_type) + return jl_cpu_has_fma(32); + else if (typ == (jl_value_t*)jl_float64_type) + return jl_cpu_has_fma(64); + else + return jl_false; } diff --git a/test/llvmpasses/cpu-features.ll b/test/llvmpasses/cpu-features.ll index 5125743248fec..323f5e24015e9 100644 --- a/test/llvmpasses/cpu-features.ll +++ b/test/llvmpasses/cpu-features.ll @@ -3,6 +3,8 @@ ; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s ; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13" +target triple = "x86_64-linux-gnu" declare i1 @julia.cpu.have_fma.f64() declare double @with_fma(double %0, double %1, double %2) diff --git a/test/sysinfo.jl b/test/sysinfo.jl index e8194b113ba24..8864e3a48efc7 100644 --- a/test/sysinfo.jl +++ b/test/sysinfo.jl @@ -12,6 +12,8 @@ Base.Sys.loadavg() @test length(ccall(:jl_get_cpu_name, String, ())) != 0 @test length(ccall(:jl_get_cpu_features, String, ())) >= 0 +foo_fma() = Core.Intrinsics.have_fma(Int64) +@test ccall(:jl_cpu_has_fma, Bool, (Cint,), 64) == foo_fma() if Sys.isunix() mktempdir() do tempdir