From d8706d621ecd2f84ba645152d3ab015ce2d73553 Mon Sep 17 00:00:00 2001 From: Tim Corringham Date: Wed, 1 Oct 2025 17:48:36 +0100 Subject: [PATCH 1/4] [HLSL] Implement the f16tof32() intrinsic Implement the f16tof32() intrinsic, including DXILand SPIRV codegen, and associated tests. --- clang/include/clang/Basic/Builtins.td | 6 + clang/lib/CodeGen/CGHLSLBuiltins.cpp | 16 +++ clang/lib/CodeGen/CGHLSLRuntime.h | 1 + .../lib/Headers/hlsl/hlsl_alias_intrinsics.h | 21 +++ clang/lib/Sema/SemaHLSL.cpp | 57 ++++++-- .../builtins/f16tof32-builtin.hlsl | 30 ++++ clang/test/CodeGenHLSL/builtins/f16tof32.hlsl | 30 ++++ .../SemaHLSL/BuiltIns/f16tof32-errors.hlsl | 134 ++++++++++++++++++ llvm/include/llvm/IR/IntrinsicsDirectX.td | 3 + llvm/include/llvm/IR/IntrinsicsSPIRV.td | 4 + llvm/lib/Target/DirectX/DXIL.td | 9 ++ .../DirectX/DirectXTargetTransformInfo.cpp | 8 +- .../Target/SPIRV/SPIRVInstructionSelector.cpp | 90 ++++++++++++ llvm/test/CodeGen/DirectX/f16tof32.ll | 57 ++++++++ .../CodeGen/SPIRV/hlsl-intrinsics/f16tof32.ll | 76 ++++++++++ 15 files changed, 527 insertions(+), 15 deletions(-) create mode 100644 clang/test/CodeGenHLSL/builtins/f16tof32-builtin.hlsl create mode 100644 clang/test/CodeGenHLSL/builtins/f16tof32.hlsl create mode 100644 clang/test/SemaHLSL/BuiltIns/f16tof32-errors.hlsl create mode 100644 llvm/test/CodeGen/DirectX/f16tof32.ll create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/f16tof32.ll diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index a350acdf146ab..8bdff10d57745 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -5222,6 +5222,12 @@ def HLSLGetSpirvSpecConstant : LangBuiltin<"HLSL_LANG">, HLSLScalarTemplate { let Prototype = "T(unsigned int, T)"; } +def HLSLF16ToF32 : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_elementwise_f16tof32"]; + let Attributes = [NoThrow, Const, CustomTypeChecking]; + let Prototype = "void(...)"; +} + // Builtins for XRay. def XRayCustomEvent : Builtin { let Spellings = ["__xray_customevent"]; diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp index 384bd59e7533a..d5aebd6d64817 100644 --- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -560,6 +560,22 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getDegreesIntrinsic(), ArrayRef{X}, nullptr, "hlsl.degrees"); } + case Builtin::BI__builtin_hlsl_elementwise_f16tof32: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + llvm::Type *Xty = Op0->getType(); + llvm::Type *retType = llvm::Type::getFloatTy(this->getLLVMContext()); + if (Xty->isVectorTy()) { + auto *XVecTy = E->getArg(0)->getType()->castAs(); + retType = llvm::VectorType::get( + retType, ElementCount::getFixed(XVecTy->getNumElements())); + } + if (!E->getArg(0)->getType()->hasUnsignedIntegerRepresentation()) + llvm_unreachable( + "f16tof32 operand must have an unsigned int representation"); + return Builder.CreateIntrinsic( + retType, CGM.getHLSLRuntime().getLegacyF16ToF32Intrinsic(), + ArrayRef{Op0}, nullptr, "hlsl.f16tof32"); + } case Builtin::BI__builtin_hlsl_elementwise_frac: { Value *Op0 = EmitScalarExpr(E->getArg(0)); if (!E->getArg(0)->getType()->hasFloatingRepresentation()) diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index 103b4a98f6c26..e36e89fe16125 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -96,6 +96,7 @@ class CGHLSLRuntime { flattened_thread_id_in_group) GENERATE_HLSL_INTRINSIC_FUNCTION(IsInf, isinf) GENERATE_HLSL_INTRINSIC_FUNCTION(IsNaN, isnan) + GENERATE_HLSL_INTRINSIC_FUNCTION(LegacyF16ToF32, legacyf16tof32) GENERATE_HLSL_INTRINSIC_FUNCTION(Lerp, lerp) GENERATE_HLSL_INTRINSIC_FUNCTION(Normalize, normalize) GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt) diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h index d973371312701..a85accdcc4549 100644 --- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h @@ -1052,6 +1052,27 @@ float3 exp2(float3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) float4 exp2(float4); +//===----------------------------------------------------------------------===// +// f16tof32 builtins +//===----------------------------------------------------------------------===// + +/// \fn float f16tof32(uint x) +/// \brief Returns the half value stored in the low 16 bits of the uint arg +/// converted to a float. +/// \param x The uint containing two half values. +/// +/// The float value of the half value found in the low 16 bits of the \a xi +/// parameter. + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32) +float f16tof32(uint); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32) +float2 f16tof32(uint2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32) +float3 f16tof32(uint3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32) +float4 f16tof32(uint4); + //===----------------------------------------------------------------------===// // firstbithigh builtins //===----------------------------------------------------------------------===// diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 5b3e89f936327..be2a85442c9b6 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2738,6 +2738,23 @@ static bool CheckUnsignedIntRepresentation(Sema *S, SourceLocation Loc, return false; } +static bool CheckExpectedBitWidth(Sema *S, CallExpr *TheCall, + unsigned ArgOrdinal, unsigned Width) { + QualType ArgTy = TheCall->getArg(0)->getType(); + if (auto *VTy = ArgTy->getAs()) + ArgTy = VTy->getElementType(); + // ensure arg type has expected bit width + uint64_t ElementBitCount = + S->getASTContext().getTypeSizeInChars(ArgTy).getQuantity() * 8; + if (ElementBitCount != Width) { + S->Diag(TheCall->getArg(0)->getBeginLoc(), + diag::err_integer_incorrect_bit_count) + << Width << ElementBitCount; + return true; + } + return false; +} + static void SetElementTypeAsReturnType(Sema *S, CallExpr *TheCall, QualType ReturnType) { auto *VecTyA = TheCall->getArg(0)->getType()->getAs(); @@ -2897,24 +2914,16 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { CheckUnsignedIntVecRepresentation)) return true; - auto *VTy = TheCall->getArg(0)->getType()->getAs(); // ensure arg integers are 32-bits - uint64_t ElementBitCount = getASTContext() - .getTypeSizeInChars(VTy->getElementType()) - .getQuantity() * - 8; - if (ElementBitCount != 32) { - SemaRef.Diag(TheCall->getBeginLoc(), - diag::err_integer_incorrect_bit_count) - << 32 << ElementBitCount; + if (CheckExpectedBitWidth(&SemaRef, TheCall, 0, 32)) return true; - } // ensure both args are vectors of total bit size of a multiple of 64 + auto *VTy = TheCall->getArg(0)->getType()->getAs(); int NumElementsArg = VTy->getNumElements(); if (NumElementsArg != 2 && NumElementsArg != 4) { SemaRef.Diag(TheCall->getBeginLoc(), diag::err_vector_incorrect_bit_count) - << 1 /*a multiple of*/ << 64 << NumElementsArg * ElementBitCount; + << 1 /*a multiple of*/ << 64 << NumElementsArg * 32; return true; } @@ -3230,7 +3239,7 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { break; } // Note these are llvm builtins that we want to catch invalid intrinsic - // generation. Normal handling of these builitns will occur elsewhere. + // generation. Normal handling of these builtins will occur elsewhere. case Builtin::BI__builtin_elementwise_bitreverse: { // does not include a check for number of arguments // because that is done previously @@ -3340,6 +3349,30 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { } break; } + case Builtin::BI__builtin_hlsl_elementwise_f16tof32: { + if (SemaRef.checkArgCount(TheCall, 1)) + return true; + if (CheckAllArgTypesAreCorrect(&SemaRef, TheCall, + CheckUnsignedIntRepresentation)) + return true; + // ensure arg integers are 32 bits + if (CheckExpectedBitWidth(&SemaRef, TheCall, 0, 32)) + return true; + // check it wasn't a bool type + QualType ArgTy = TheCall->getArg(0)->getType(); + if (auto *VTy = ArgTy->getAs()) + ArgTy = VTy->getElementType(); + if (ArgTy->isBooleanType()) { + SemaRef.Diag(TheCall->getArg(0)->getBeginLoc(), + diag::err_builtin_invalid_arg_type) + << 1 << /* scalar or vector of */ 5 << /* unsigned int */ 3 + << /* no fp */ 0 << TheCall->getArg(0)->getType(); + return true; + } + + SetElementTypeAsReturnType(&SemaRef, TheCall, getASTContext().FloatTy); + break; + } } return false; } diff --git a/clang/test/CodeGenHLSL/builtins/f16tof32-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/f16tof32-builtin.hlsl new file mode 100644 index 0000000000000..65dba664bb5ea --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/f16tof32-builtin.hlsl @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ +// RUN: -o - | FileCheck %s + +// CHECK: define hidden noundef nofpclass(nan inf) float +// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn float @llvm.dx.legacyf16tof32.i32(i32 %0) +// CHECK: ret float %hlsl.f16tof32 +// CHECK: declare float @llvm.dx.legacyf16tof32.i32(i32) +float test_scalar(uint p0) { return __builtin_hlsl_elementwise_f16tof32(p0); } + +// CHECK: define hidden noundef nofpclass(nan inf) <2 x float> +// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32> %0) +// CHECK: ret <2 x float> %hlsl.f16tof32 +// CHECK: declare <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32>) +float2 test_uint2(uint2 p0) { return __builtin_hlsl_elementwise_f16tof32(p0); } + +// CHECK: define hidden noundef nofpclass(nan inf) <3 x float> @_Z10test_uint3Dv3_j(<3 x i32> noundef %p0) #0 { +// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32> %0) +// CHECK: ret <3 x float> %hlsl.f16tof32 +// CHECK: declare <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32>) +float3 test_uint3(uint3 p0) { return __builtin_hlsl_elementwise_f16tof32(p0); } + +// CHECK: define hidden noundef nofpclass(nan inf) <4 x float> @_Z10test_uint4Dv4_j(<4 x i32> noundef %p0) #0 { +// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32> %0) +// CHECK: ret <4 x float> %hlsl.f16tof32 +// CHECK: declare <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32>) +float4 test_uint4(uint4 p0) { return __builtin_hlsl_elementwise_f16tof32(p0); } + + + diff --git a/clang/test/CodeGenHLSL/builtins/f16tof32.hlsl b/clang/test/CodeGenHLSL/builtins/f16tof32.hlsl new file mode 100644 index 0000000000000..b68bc197f16c5 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/f16tof32.hlsl @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ +// RUN: -o - | FileCheck %s + +// CHECK: define hidden noundef nofpclass(nan inf) float +// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn float @llvm.dx.legacyf16tof32.i32(i32 %0) +// CHECK: ret float %hlsl.f16tof32 +// CHECK: declare float @llvm.dx.legacyf16tof32.i32(i32) +float test_scalar(uint p0) { return f16tof32(p0); } + +// CHECK: define hidden noundef nofpclass(nan inf) <2 x float> +// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32> %0) +// CHECK: ret <2 x float> %hlsl.f16tof32 +// CHECK: declare <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32>) +float2 test_uint2(uint2 p0) { return f16tof32(p0); } + +// CHECK: define hidden noundef nofpclass(nan inf) <3 x float> @_Z10test_uint3Dv3_j(<3 x i32> noundef %p0) #0 { +// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32> %0) +// CHECK: ret <3 x float> %hlsl.f16tof32 +// CHECK: declare <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32>) +float3 test_uint3(uint3 p0) { return f16tof32(p0); } + +// CHECK: define hidden noundef nofpclass(nan inf) <4 x float> @_Z10test_uint4Dv4_j(<4 x i32> noundef %p0) #0 { +// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32> %0) +// CHECK: ret <4 x float> %hlsl.f16tof32 +// CHECK: declare <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32>) +float4 test_uint4(uint4 p0) { return f16tof32(p0); } + + + diff --git a/clang/test/SemaHLSL/BuiltIns/f16tof32-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/f16tof32-errors.hlsl new file mode 100644 index 0000000000000..2c4baae524977 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/f16tof32-errors.hlsl @@ -0,0 +1,134 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify + +float builtin_f16tof32_too_few_arg() { + return __builtin_hlsl_elementwise_f16tof32(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} + // expected-note@hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function not viable: requires 1 argument, but 0 were provided}} +} + +float builtin_f16tof32_too_many_arg(uint p0) { + return __builtin_hlsl_elementwise_f16tof32(p0, p0); + // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} + // expected-note@hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function not viable: requires 1 argument, but 2 were provided}} +} + +float builtin_f16tof32_bool(bool p0) { + return __builtin_hlsl_elementwise_f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool')}} +} + +float builtin_f16tof32_bool4(bool4 p0) { + return __builtin_hlsl_elementwise_f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool4' (aka 'vector')}} +} + +float builtin_f16tof32_int16_t(int16_t p0) { + return __builtin_hlsl_elementwise_f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int16_t' (aka 'short'))}} +} + +float builtin_f16tof32_int16_t(unsigned short p0) { + return __builtin_hlsl_elementwise_f16tof32(p0); + // expected-error@-1 {{incorrect number of bits in integer (expected 32 bits, have 16)}} +} + +float builtin_f16tof32_int(int p0) { + return __builtin_hlsl_elementwise_f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int')}} +} + +float builtin_f16tof32_int64_t(long p0) { + return __builtin_hlsl_elementwise_f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'long')}} +} + +float2 builtin_f16tof32_int2_to_float2_promotion(int2 p0) { + return __builtin_hlsl_elementwise_f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int2' (aka 'vector'))}} +} + +float builtin_f16tof32_half(half p0) { + return __builtin_hlsl_elementwise_f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'half')}} +} + +float builtin_f16tof32_half4(half4 p0) { + return __builtin_hlsl_elementwise_f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'half4' (aka 'vector'))}} +} + +float builtin_f16tof32_float(float p0) { + return __builtin_hlsl_elementwise_f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'float')}} +} + +float builtin_f16tof32_double(double p0) { + return __builtin_hlsl_elementwise_f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'double')}} +} + +float f16tof32_too_few_arg() { + return f16tof32(); + // expected-error@-1 {{no matching function for call to 'f16tof32'}} +} + +float f16tof32_too_many_arg(uint p0) { + return f16tof32(p0, p0); + // expected-error@-1 {{no matching function for call to 'f16tof32'}} +} + +float f16tof32_bool(bool p0) { + return f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool')}} +} + +float f16tof32_bool3(bool3 p0) { + return f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool3' (aka 'vector')}} +} + + +float f16tof32_int16_t(int16_t p0) { + return f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int16_t' (aka 'short'))}} +} + +float f16tof32_int16_t(unsigned short p0) { + return f16tof32(p0); + // expected-error@-1 {{incorrect number of bits in integer (expected 32 bits, have 16)}} +} + +float f16tof32_int(int p0) { + return f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int')}} +} + +float f16tof32_int64_t(long p0) { + return f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'long')}} +} + +float2 f16tof32_int2_to_float2_promotion(int3 p0) { + return f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int3' (aka 'vector'))}} +} + +float f16tof32_half(half p0) { + return f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'half')}} +} + +float f16tof32_half2(half2 p0) { + return f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'half2' (aka 'vector'))}} +} + +float f16tof32_float(float p0) { + return f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'float')}} +} + +float f16tof32_double(double p0) { + return f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'double')}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index 3b7077c52db21..74a6bc41f8da7 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -140,6 +140,9 @@ def int_dx_isinf : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1 def int_dx_isnan : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [llvm_anyfloat_ty], [IntrNoMem]>; +def int_dx_legacyf16tof32 : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_float_ty>], + [llvm_anyint_ty], [IntrNoMem]>; + def int_dx_lerp : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>], [IntrNoMem]>; diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 49a182be98acd..655b1aa1dde21 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -198,4 +198,8 @@ def int_spv_resource_nonuniformindex def int_spv_generic_cast_to_ptr_explicit : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [generic_ptr_ty], [IntrNoMem, NoUndef]>; + + def int_spv_legacyf16tof32 : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_float_ty>], + [llvm_anyint_ty], [IntrNoMem]>; + } diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 44c48305f2832..3ae4e09b2f45d 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -1069,6 +1069,15 @@ def WaveActiveOp : DXILOp<119, waveActiveOp> { let attributes = [Attributes]; } +def LegacyF16ToF32 : DXILOp<131, legacyF16ToF32> { + let Doc = "returns the float16 stored in the low-half of the uint converted " + "to a float"; + let intrinsics = [IntrinSelect]; + let arguments = [Int32Ty]; + let result = FloatTy; + let stages = [Stages]; +} + def WaveAllBitCount : DXILOp<135, waveAllOp> { let Doc = "returns the count of bits set to 1 across the wave"; let intrinsics = [IntrinSelect]; diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp index 68fd3e0bc74c7..614a4bab36e67 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp @@ -29,11 +29,12 @@ bool DirectXTTIImpl::isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx) const { switch (ID) { case Intrinsic::dx_asdouble: - case Intrinsic::dx_isinf: - case Intrinsic::dx_isnan: case Intrinsic::dx_firstbitlow: - case Intrinsic::dx_firstbituhigh: case Intrinsic::dx_firstbitshigh: + case Intrinsic::dx_firstbituhigh: + case Intrinsic::dx_isinf: + case Intrinsic::dx_isnan: + case Intrinsic::dx_legacyf16tof32: return OpdIdx == 0; default: return OpdIdx == -1; @@ -50,6 +51,7 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable( case Intrinsic::dx_frac: case Intrinsic::dx_isinf: case Intrinsic::dx_isnan: + case Intrinsic::dx_legacyf16tof32: case Intrinsic::dx_rsqrt: case Intrinsic::dx_saturate: case Intrinsic::dx_splitdouble: diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 021353ab716f7..12ae8ff2d0478 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -212,6 +212,9 @@ class SPIRVInstructionSelector : public InstructionSelector { bool selectOpIsNan(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const; + bool selectF16ToF32(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + template bool selectDot4AddPacked(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const; @@ -3472,6 +3475,10 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, case Intrinsic::spv_resource_nonuniformindex: { return selectResourceNonUniformIndex(ResVReg, ResType, I); } + case Intrinsic::spv_legacyf16tof32: { + return selectF16ToF32(ResVReg, ResType, I); + } + default: { std::string DiagMsg; raw_string_ostream OS(DiagMsg); @@ -3744,6 +3751,89 @@ bool SPIRVInstructionSelector::selectResourceNonUniformIndex( return true; } +bool SPIRVInstructionSelector::selectF16ToF32(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + assert(I.getNumOperands() == 3); + assert(I.getOperand(0).isReg()); + assert(I.getOperand(2).isReg()); + Register SrcReg = I.getOperand(2).getReg(); + const SPIRVType *SrcRegType = GR.getSPIRVTypeForVReg(SrcReg); + LLT SrcType = MRI->getType(SrcReg); + SPIRVType *SrcEltType = GR.getScalarOrVectorComponentType(SrcRegType); + SPIRVType *ResEltType = GR.getScalarOrVectorComponentType(ResType); + const TargetRegisterClass *SrcRegClass = GR.getRegClass(SrcEltType); + const TargetRegisterClass *ResRegClass = GR.getRegClass(ResEltType); + MachineIRBuilder MIRBuilder(I); + const SPIRVType *Vec2ResType = + GR.getOrCreateSPIRVVectorType(ResEltType, 2, MIRBuilder, false); + const TargetRegisterClass *Vec2RegClass = GR.getRegClass(Vec2ResType); + + bool Result = true; + MachineBasicBlock &BB = *I.getParent(); + if (SrcType.isVector()) { + // We have a vector of uints to convert elementwise + uint64_t ResultSize = GR.getScalarOrVectorComponentCount(ResType); + SmallVector ComponentRegisters; + for (uint64_t Idx = 0; Idx < ResultSize; Idx++) { + Register EltReg = MRI->createVirtualRegister(SrcRegClass); + Register FReg = MRI->createVirtualRegister(ResRegClass); + Register Vec2Reg = MRI->createVirtualRegister(Vec2RegClass); + + Result = + BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract)) + .addDef(EltReg) + .addUse(GR.getSPIRVTypeID(SrcEltType)) + .addUse(SrcReg) + .addImm(Idx) + .constrainAllUses(TII, TRI, RBI); + + Result &= + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst)) + .addDef(Vec2Reg) + .addUse(GR.getSPIRVTypeID(Vec2ResType)) + .addImm( + static_cast(SPIRV::InstructionSet::GLSL_std_450)) + .addImm(GL::UnpackHalf2x16) + .addUse(EltReg) + .constrainAllUses(TII, TRI, RBI); + + Result &= + BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract)) + .addDef(FReg) + .addUse(GR.getSPIRVTypeID(ResEltType)) + .addUse(Vec2Reg) + .addImm(0) + .constrainAllUses(TII, TRI, RBI); + + ComponentRegisters.emplace_back(FReg); + } + + MachineInstrBuilder MIB = + BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeConstruct)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)); + + for (Register ComponentReg : ComponentRegisters) + MIB.addUse(ComponentReg); + return Result && MIB.constrainAllUses(TII, TRI, RBI); + + } else if (SrcType.isScalar()) { + // just a scalar uint to convert + Register Vec2Reg = MRI->createVirtualRegister(Vec2RegClass); + Result &= selectExtInst(Vec2Reg, Vec2ResType, I, GL::UnpackHalf2x16); + Result &= + BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(Vec2Reg) + .addImm(0) + .constrainAllUses(TII, TRI, RBI); + return Result; + } + return false; +} + void SPIRVInstructionSelector::decorateUsesAsNonUniform( Register &NonUniformReg) const { llvm::SmallVector WorkList = {NonUniformReg}; diff --git a/llvm/test/CodeGen/DirectX/f16tof32.ll b/llvm/test/CodeGen/DirectX/f16tof32.ll new file mode 100644 index 0000000000000..edc5c1942e8bd --- /dev/null +++ b/llvm/test/CodeGen/DirectX/f16tof32.ll @@ -0,0 +1,57 @@ +; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.9-library %s | FileCheck %s + +define hidden noundef nofpclass(nan inf) float @_Z11test_scalarj(i32 noundef %p0) local_unnamed_addr #0 { +entry: + ; CHECK : [[UINT:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 %p0) + ; CHECK : ret float [[UINT]] + %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn float @llvm.dx.legacyf16tof32.i32(i32 %p0) + ret float %hlsl.f16tof32 +} + +define hidden noundef nofpclass(nan inf) <2 x float> @_Z10test_uint2Dv2_j(<2 x i32> noundef %p0) local_unnamed_addr #0 { +entry: + ; CHECK: [[UINT2_0:%.*]] = extractelement <2 x i32> %p0, i64 0 + ; CHECK: [[FLOAT_0:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT2_0]]) + ; CHECK: [[UINT2_1:%.*]] = extractelement <2 x i32> %p0, i64 1 + ; CHECK: [[FLOAT_1:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT2_1]]) + ; CHECK: [[FLOAT2_0:%.*]] = insertelement <2 x float> poison, float [[FLOAT_0]], i64 0 + ; CHECK: [[FLOAT2_1:%.*]] = insertelement <2 x float> [[FLOAT2_0]], float [[FLOAT_1]], i64 1 + ; CHECK : ret <2 x float> [[FLOAT2_1]] + %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32> %p0) + ret <2 x float> %hlsl.f16tof32 +} + +define hidden noundef nofpclass(nan inf) <3 x float> @_Z10test_uint3Dv3_j(<3 x i32> noundef %p0) local_unnamed_addr #0 { +entry: + ; CHECK: [[UINT3_0:%.*]] = extractelement <3 x i32> %p0, i64 0 + ; CHECK: [[FLOAT_0:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT3_0]]) + ; CHECK: [[UINT3_1:%.*]] = extractelement <3 x i32> %p0, i64 1 + ; CHECK: [[FLOAT_1:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT3_1]]) + ; CHECK: [[UINT3_2:%.*]] = extractelement <3 x i32> %p0, i64 2 + ; CHECK: [[FLOAT_2:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT3_2]]) + ; CHECK: [[FLOAT3_0:%.*]] = insertelement <3 x float> poison, float [[FLOAT_0]], i64 0 + ; CHECK: [[FLOAT3_1:%.*]] = insertelement <3 x float> [[FLOAT3_0]], float [[FLOAT_1]], i64 1 + ; CHECK: [[FLOAT3_2:%.*]] = insertelement <3 x float> [[FLOAT3_1]], float [[FLOAT_2]], i64 2 + ; CHECK : ret <3 x float> [[FLOAT3_2]] + %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32> %p0) + ret <3 x float> %hlsl.f16tof32 +} + +define hidden noundef nofpclass(nan inf) <4 x float> @_Z10test_uint4Dv4_j(<4 x i32> noundef %p0) local_unnamed_addr #0 { +entry: + ; CHECK: [[UINT4_0:%.*]] = extractelement <4 x i32> %p0, i64 0 + ; CHECK: [[FLOAT_0:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT4_0]]) + ; CHECK: [[UINT4_1:%.*]] = extractelement <4 x i32> %p0, i64 1 + ; CHECK: [[FLOAT_1:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT4_1]]) + ; CHECK: [[UINT4_2:%.*]] = extractelement <4 x i32> %p0, i64 2 + ; CHECK: [[FLOAT_2:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT4_2]]) + ; CHECK: [[UINT4_3:%.*]] = extractelement <4 x i32> %p0, i64 3 + ; CHECK: [[FLOAT_3:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT4_3]]) + ; CHECK: [[FLOAT4_0:%.*]] = insertelement <4 x float> poison, float [[FLOAT_0]], i64 0 + ; CHECK: [[FLOAT4_1:%.*]] = insertelement <4 x float> [[FLOAT4_0]], float [[FLOAT_1]], i64 1 + ; CHECK: [[FLOAT4_2:%.*]] = insertelement <4 x float> [[FLOAT4_1]], float [[FLOAT_2]], i64 2 + ; CHECK: [[FLOAT4_3:%.*]] = insertelement <4 x float> [[FLOAT4_2]], float [[FLOAT_3]], i64 3 + ; CHECK : ret <4 x float> [[FLOAT4_3]] + %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32> %p0) + ret <4 x float> %hlsl.f16tof32 +} diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/f16tof32.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/f16tof32.ll new file mode 100644 index 0000000000000..883fcbd02e769 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/f16tof32.ll @@ -0,0 +1,76 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: [[SET:%.*]] = OpExtInstImport "GLSL.std.450" +; CHECK-DAG: [[UINT:%.*]] = OpTypeInt 32 0 +; CHECK-DAG: [[UINT2:%.*]] = OpTypeVector [[UINT]] 2 +; CHECK-DAG: [[UINT3:%.*]] = OpTypeVector [[UINT]] 3 +; CHECK-DAG: [[UINT4:%.*]] = OpTypeVector [[UINT]] 4 +; CHECK-DAG: [[FLOAT:%.*]] = OpTypeFloat 32 +; CHECK-DAG: [[FLOAT2:%.*]] = OpTypeVector [[FLOAT]] 2 +; CHECK-DAG: [[FLOAT3:%.*]] = OpTypeVector [[FLOAT]] 3 +; CHECK-DAG: [[FLOAT4:%.*]] = OpTypeVector [[FLOAT]] 4 + +; CHECK: [[P0:%.*]] = OpFunctionParameter [[UINT]] +; CHECK: [[UNPACK2:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0]] +; CHECK: [[UNPACK:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK2]] 0 +; CHECK: OpReturnValue [[UNPACK]] +define hidden noundef nofpclass(nan inf) float @_Z11test_scalarj(i32 noundef %p0) local_unnamed_addr #0 { +entry: + %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn float @llvm.spv.legacyf16tof32.i32(i32 %p0) + ret float %hlsl.f16tof32 +} + +; CHECK: [[P0:%.*]] = OpFunctionParameter [[UINT2]] +; CHECK-DAG: [[P0_0:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 0 +; CHECK-DAG: [[P0_1:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 1 +; CHECK-DAG: [[UNPACK2_0:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_0]] +; CHECK-DAG: [[UNPACK2_1:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_1]] +; CHECK-DAG: [[RESULT_0:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK2_0]] 0 +; CHECK-DAG: [[RESULT_1:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK2_1]] 0 +; CHECK: [[RESULT:%.*]] = OpCompositeConstruct [[FLOAT2]] [[RESULT_0]] [[RESULT_1]] +; CHECK: OpReturnValue [[RESULT]] +define hidden noundef nofpclass(nan inf) <2 x float> @_Z10test_uint2Dv2_j(<2 x i32> noundef %p0) local_unnamed_addr #0 { +entry: + %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.spv.legacyf16tof32.v2i32(<2 x i32> %p0) + ret <2 x float> %hlsl.f16tof32 +} + +; CHECK: [[P0:%.*]] = OpFunctionParameter [[UINT3]] +; CHECK-DAG: [[P0_0:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 0 +; CHECK-DAG: [[P0_1:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 1 +; CHECK-DAG: [[P0_2:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 2 +; CHECK-DAG: [[UNPACK3_0:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_0]] +; CHECK-DAG: [[UNPACK3_1:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_1]] +; CHECK-DAG: [[UNPACK3_2:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_2]] +; CHECK-DAG: [[RESULT_0:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK3_0]] 0 +; CHECK-DAG: [[RESULT_1:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK3_1]] 0 +; CHECK-DAG: [[RESULT_2:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK3_2]] 0 +; CHECK: [[RESULT:%.*]] = OpCompositeConstruct [[FLOAT3]] [[RESULT_0]] [[RESULT_1]] [[RESULT_2]] +; CHECK: OpReturnValue [[RESULT]] +define hidden noundef nofpclass(nan inf) <3 x float> @_Z10test_uint3Dv3_j(<3 x i32> noundef %p0) local_unnamed_addr #0 { +entry: + %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.spv.legacyf16tof32.v3i32(<3 x i32> %p0) + ret <3 x float> %hlsl.f16tof32 +} + +; CHECK: [[P0:%.*]] = OpFunctionParameter [[UINT4]] +; CHECK-DAG: [[P0_0:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 0 +; CHECK-DAG: [[P0_1:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 1 +; CHECK-DAG: [[P0_2:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 2 +; CHECK-DAG: [[P0_3:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 3 +; CHECK-DAG: [[UNPACK4_0:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_0]] +; CHECK-DAG: [[UNPACK4_1:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_1]] +; CHECK-DAG: [[UNPACK4_2:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_2]] +; CHECK-DAG: [[UNPACK4_3:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_3]] +; CHECK-DAG: [[RESULT_0:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK4_0]] 0 +; CHECK-DAG: [[RESULT_1:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK4_1]] 0 +; CHECK-DAG: [[RESULT_2:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK4_2]] 0 +; CHECK-DAG: [[RESULT_3:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK4_3]] 0 +; CHECK: [[RESULT:%.*]] = OpCompositeConstruct [[FLOAT4]] [[RESULT_0]] [[RESULT_1]] [[RESULT_2]] [[RESULT_3]] +; CHECK: OpReturnValue [[RESULT]] +define hidden noundef nofpclass(nan inf) <4 x float> @_Z10test_uint4Dv4_j(<4 x i32> noundef %p0) local_unnamed_addr #0 { +entry: + %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.spv.legacyf16tof32.v4i32(<4 x i32> %p0) + ret <4 x float> %hlsl.f16tof32 +} From da0a2143d099ac2a66b6b655f5d8c17a10111630 Mon Sep 17 00:00:00 2001 From: Tim Corringham Date: Mon, 3 Nov 2025 18:57:56 +0000 Subject: [PATCH 2/4] Changes from review comments Move the unrolling of the UnpackHalf2x16 from the SPIRV codegen to Clang IR CodeGen, and replace the spv_legacyf16tof32 intrinsic with a spv_unpackhalf2x16 intrinsic. This greatly simplifies the SPIRV codegen, at the expense of slightly complicating the Clang IR codegen. --- clang/lib/CodeGen/CGHLSLBuiltins.cpp | 64 ++++++++++--- clang/lib/CodeGen/CGHLSLRuntime.h | 1 - llvm/include/llvm/IR/IntrinsicsSPIRV.td | 3 +- .../Target/SPIRV/SPIRVInstructionSelector.cpp | 90 +------------------ 4 files changed, 53 insertions(+), 105 deletions(-) diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp index d5aebd6d64817..d7e69eb816873 100644 --- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -160,6 +160,55 @@ static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) { return LastInst; } +static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF, const CallExpr *E) { + Value *Op0 = CGF.EmitScalarExpr(E->getArg(0)); + QualType Op0Ty = E->getArg(0)->getType(); + llvm::Type *ResType = CGF.FloatTy; + uint64_t NumElements = 0; + if (Op0->getType()->isVectorTy()) { + NumElements = + E->getArg(0)->getType()->castAs()->getNumElements(); + ResType = + llvm::VectorType::get(ResType, ElementCount::getFixed(NumElements)); + } + if (!Op0Ty->hasUnsignedIntegerRepresentation()) + llvm_unreachable( + "f16tof32 operand must have an unsigned int representation"); + + if (CGF.CGM.getTriple().isDXIL()) + return CGF.Builder.CreateIntrinsic( + ResType, Intrinsic::dx_legacyf16tof32, + ArrayRef{Op0}, nullptr, "hlsl.f16tof32"); + + if (CGF.CGM.getTriple().isSPIRV()) { + // We use the SPIRV UnpackHalf2x16 operation to avoid the need for the + // Int16 and Float16 capabilities + auto UnpackType = llvm::VectorType::get(CGF.FloatTy, ElementCount::getFixed(2)); + if (NumElements == 0) { + // a scalar input - simply extract the first element of the unpacked vector + Value *Unpack = CGF.Builder.CreateIntrinsic( + UnpackType, Intrinsic::spv_unpackhalf2x16, + ArrayRef{Op0}); + return CGF.Builder.CreateExtractElement(Unpack, (uint64_t)0); + } else { + // a vector input - build a congruent output vector by iterating through + // the input vector calling unpackhalf2x16 for each element + Value *Result = PoisonValue::get(ResType); + for (uint64_t i = 0; i < NumElements; i++) { + Value *InVal = CGF.Builder.CreateExtractElement(Op0, i); + Value *Unpack = CGF.Builder.CreateIntrinsic(UnpackType, + Intrinsic::spv_unpackhalf2x16, ArrayRef{InVal}); + Value *Res = CGF.Builder.CreateExtractElement(Unpack, (uint64_t)0); + Result = CGF.Builder.CreateInsertElement(Result, Res, i); + } + return Result; + } + } + + llvm_unreachable( + "Intrinsic F16ToF32 not supported by target architecture"); +} + static Value *emitBufferStride(CodeGenFunction *CGF, const Expr *HandleExpr, LValue &Stride) { // Figure out the stride of the buffer elements from the handle type. @@ -561,20 +610,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, ArrayRef{X}, nullptr, "hlsl.degrees"); } case Builtin::BI__builtin_hlsl_elementwise_f16tof32: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - llvm::Type *Xty = Op0->getType(); - llvm::Type *retType = llvm::Type::getFloatTy(this->getLLVMContext()); - if (Xty->isVectorTy()) { - auto *XVecTy = E->getArg(0)->getType()->castAs(); - retType = llvm::VectorType::get( - retType, ElementCount::getFixed(XVecTy->getNumElements())); - } - if (!E->getArg(0)->getType()->hasUnsignedIntegerRepresentation()) - llvm_unreachable( - "f16tof32 operand must have an unsigned int representation"); - return Builder.CreateIntrinsic( - retType, CGM.getHLSLRuntime().getLegacyF16ToF32Intrinsic(), - ArrayRef{Op0}, nullptr, "hlsl.f16tof32"); + return handleElementwiseF16ToF32(*this, E); } case Builtin::BI__builtin_hlsl_elementwise_frac: { Value *Op0 = EmitScalarExpr(E->getArg(0)); diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index e36e89fe16125..103b4a98f6c26 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -96,7 +96,6 @@ class CGHLSLRuntime { flattened_thread_id_in_group) GENERATE_HLSL_INTRINSIC_FUNCTION(IsInf, isinf) GENERATE_HLSL_INTRINSIC_FUNCTION(IsNaN, isnan) - GENERATE_HLSL_INTRINSIC_FUNCTION(LegacyF16ToF32, legacyf16tof32) GENERATE_HLSL_INTRINSIC_FUNCTION(Lerp, lerp) GENERATE_HLSL_INTRINSIC_FUNCTION(Normalize, normalize) GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt) diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 655b1aa1dde21..4ecc69122abdd 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -199,7 +199,6 @@ def int_spv_resource_nonuniformindex : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [generic_ptr_ty], [IntrNoMem, NoUndef]>; - def int_spv_legacyf16tof32 : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_float_ty>], - [llvm_anyint_ty], [IntrNoMem]>; + def int_spv_unpackhalf2x16 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_i32_ty], [IntrNoMem]>; } diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 12ae8ff2d0478..97b19bdd35ea5 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -212,9 +212,6 @@ class SPIRVInstructionSelector : public InstructionSelector { bool selectOpIsNan(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const; - bool selectF16ToF32(Register ResVReg, const SPIRVType *ResType, - MachineInstr &I) const; - template bool selectDot4AddPacked(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const; @@ -3475,8 +3472,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, case Intrinsic::spv_resource_nonuniformindex: { return selectResourceNonUniformIndex(ResVReg, ResType, I); } - case Intrinsic::spv_legacyf16tof32: { - return selectF16ToF32(ResVReg, ResType, I); + case Intrinsic::spv_unpackhalf2x16: { + return selectExtInst(ResVReg, ResType, I, GL::UnpackHalf2x16); } default: { @@ -3751,89 +3748,6 @@ bool SPIRVInstructionSelector::selectResourceNonUniformIndex( return true; } -bool SPIRVInstructionSelector::selectF16ToF32(Register ResVReg, - const SPIRVType *ResType, - MachineInstr &I) const { - assert(I.getNumOperands() == 3); - assert(I.getOperand(0).isReg()); - assert(I.getOperand(2).isReg()); - Register SrcReg = I.getOperand(2).getReg(); - const SPIRVType *SrcRegType = GR.getSPIRVTypeForVReg(SrcReg); - LLT SrcType = MRI->getType(SrcReg); - SPIRVType *SrcEltType = GR.getScalarOrVectorComponentType(SrcRegType); - SPIRVType *ResEltType = GR.getScalarOrVectorComponentType(ResType); - const TargetRegisterClass *SrcRegClass = GR.getRegClass(SrcEltType); - const TargetRegisterClass *ResRegClass = GR.getRegClass(ResEltType); - MachineIRBuilder MIRBuilder(I); - const SPIRVType *Vec2ResType = - GR.getOrCreateSPIRVVectorType(ResEltType, 2, MIRBuilder, false); - const TargetRegisterClass *Vec2RegClass = GR.getRegClass(Vec2ResType); - - bool Result = true; - MachineBasicBlock &BB = *I.getParent(); - if (SrcType.isVector()) { - // We have a vector of uints to convert elementwise - uint64_t ResultSize = GR.getScalarOrVectorComponentCount(ResType); - SmallVector ComponentRegisters; - for (uint64_t Idx = 0; Idx < ResultSize; Idx++) { - Register EltReg = MRI->createVirtualRegister(SrcRegClass); - Register FReg = MRI->createVirtualRegister(ResRegClass); - Register Vec2Reg = MRI->createVirtualRegister(Vec2RegClass); - - Result = - BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract)) - .addDef(EltReg) - .addUse(GR.getSPIRVTypeID(SrcEltType)) - .addUse(SrcReg) - .addImm(Idx) - .constrainAllUses(TII, TRI, RBI); - - Result &= - BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst)) - .addDef(Vec2Reg) - .addUse(GR.getSPIRVTypeID(Vec2ResType)) - .addImm( - static_cast(SPIRV::InstructionSet::GLSL_std_450)) - .addImm(GL::UnpackHalf2x16) - .addUse(EltReg) - .constrainAllUses(TII, TRI, RBI); - - Result &= - BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract)) - .addDef(FReg) - .addUse(GR.getSPIRVTypeID(ResEltType)) - .addUse(Vec2Reg) - .addImm(0) - .constrainAllUses(TII, TRI, RBI); - - ComponentRegisters.emplace_back(FReg); - } - - MachineInstrBuilder MIB = - BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeConstruct)) - .addDef(ResVReg) - .addUse(GR.getSPIRVTypeID(ResType)); - - for (Register ComponentReg : ComponentRegisters) - MIB.addUse(ComponentReg); - return Result && MIB.constrainAllUses(TII, TRI, RBI); - - } else if (SrcType.isScalar()) { - // just a scalar uint to convert - Register Vec2Reg = MRI->createVirtualRegister(Vec2RegClass); - Result &= selectExtInst(Vec2Reg, Vec2ResType, I, GL::UnpackHalf2x16); - Result &= - BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract)) - .addDef(ResVReg) - .addUse(GR.getSPIRVTypeID(ResType)) - .addUse(Vec2Reg) - .addImm(0) - .constrainAllUses(TII, TRI, RBI); - return Result; - } - return false; -} - void SPIRVInstructionSelector::decorateUsesAsNonUniform( Register &NonUniformReg) const { llvm::SmallVector WorkList = {NonUniformReg}; From 1ea977b8407a2e149bac216cc77641fd976d591c Mon Sep 17 00:00:00 2001 From: Tim Corringham Date: Mon, 3 Nov 2025 20:51:26 +0000 Subject: [PATCH 3/4] Fix clang-format issue Apply clang-format --- clang/lib/CodeGen/CGHLSLBuiltins.cpp | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp index 16a929234396f..b6928ce7d9c44 100644 --- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -160,7 +160,8 @@ static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) { return LastInst; } -static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF, const CallExpr *E) { +static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF, + const CallExpr *E) { Value *Op0 = CGF.EmitScalarExpr(E->getArg(0)); QualType Op0Ty = E->getArg(0)->getType(); llvm::Type *ResType = CGF.FloatTy; @@ -176,19 +177,20 @@ static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF, const CallExpr *E) "f16tof32 operand must have an unsigned int representation"); if (CGF.CGM.getTriple().isDXIL()) - return CGF.Builder.CreateIntrinsic( - ResType, Intrinsic::dx_legacyf16tof32, - ArrayRef{Op0}, nullptr, "hlsl.f16tof32"); + return CGF.Builder.CreateIntrinsic(ResType, Intrinsic::dx_legacyf16tof32, + ArrayRef{Op0}, nullptr, + "hlsl.f16tof32"); if (CGF.CGM.getTriple().isSPIRV()) { // We use the SPIRV UnpackHalf2x16 operation to avoid the need for the // Int16 and Float16 capabilities - auto UnpackType = llvm::VectorType::get(CGF.FloatTy, ElementCount::getFixed(2)); + auto UnpackType = + llvm::VectorType::get(CGF.FloatTy, ElementCount::getFixed(2)); if (NumElements == 0) { - // a scalar input - simply extract the first element of the unpacked vector - Value *Unpack = CGF.Builder.CreateIntrinsic( - UnpackType, Intrinsic::spv_unpackhalf2x16, - ArrayRef{Op0}); + // a scalar input - simply extract the first element of the unpacked + // vector + Value *Unpack = CGF.Builder.CreateIntrinsic( + UnpackType, Intrinsic::spv_unpackhalf2x16, ArrayRef{Op0}); return CGF.Builder.CreateExtractElement(Unpack, (uint64_t)0); } else { // a vector input - build a congruent output vector by iterating through @@ -196,8 +198,9 @@ static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF, const CallExpr *E) Value *Result = PoisonValue::get(ResType); for (uint64_t i = 0; i < NumElements; i++) { Value *InVal = CGF.Builder.CreateExtractElement(Op0, i); - Value *Unpack = CGF.Builder.CreateIntrinsic(UnpackType, - Intrinsic::spv_unpackhalf2x16, ArrayRef{InVal}); + Value *Unpack = CGF.Builder.CreateIntrinsic( + UnpackType, Intrinsic::spv_unpackhalf2x16, + ArrayRef{InVal}); Value *Res = CGF.Builder.CreateExtractElement(Unpack, (uint64_t)0); Result = CGF.Builder.CreateInsertElement(Result, Res, i); } @@ -205,8 +208,7 @@ static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF, const CallExpr *E) } } - llvm_unreachable( - "Intrinsic F16ToF32 not supported by target architecture"); + llvm_unreachable("Intrinsic F16ToF32 not supported by target architecture"); } static Value *emitBufferStride(CodeGenFunction *CGF, const Expr *HandleExpr, From a617a49c78e5faded640f072f35d4cac275e55b8 Mon Sep 17 00:00:00 2001 From: Tim Corringham Date: Tue, 4 Nov 2025 13:05:31 +0000 Subject: [PATCH 4/4] Update tests for f16tof32() Replace the f16tof32 test with an unpackhalf2x16 test, to match changes to the spirv codegen for f16to32 support. Adjust the f16tof32-errors.hlsl diagnostics test. --- .../SemaHLSL/BuiltIns/f16tof32-errors.hlsl | 14 ++-- .../CodeGen/SPIRV/hlsl-intrinsics/f16tof32.ll | 76 ------------------- llvm/test/CodeGen/SPIRV/unpackfloat2x16.ll | 18 +++++ 3 files changed, 25 insertions(+), 83 deletions(-) delete mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/f16tof32.ll create mode 100644 llvm/test/CodeGen/SPIRV/unpackfloat2x16.ll diff --git a/clang/test/SemaHLSL/BuiltIns/f16tof32-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/f16tof32-errors.hlsl index 2c4baae524977..8f2f9308ed966 100644 --- a/clang/test/SemaHLSL/BuiltIns/f16tof32-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/f16tof32-errors.hlsl @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.6-library %s -fnative-int16-type -emit-llvm-only -disable-llvm-passes -verify float builtin_f16tof32_too_few_arg() { return __builtin_hlsl_elementwise_f16tof32(); @@ -22,12 +22,12 @@ float builtin_f16tof32_bool4(bool4 p0) { // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool4' (aka 'vector')}} } -float builtin_f16tof32_int16_t(int16_t p0) { +float builtin_f16tof32_short(short p0) { return __builtin_hlsl_elementwise_f16tof32(p0); - // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int16_t' (aka 'short'))}} + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'short')}} } -float builtin_f16tof32_int16_t(unsigned short p0) { +float builtin_f16tof32_unsigned_short(unsigned short p0) { return __builtin_hlsl_elementwise_f16tof32(p0); // expected-error@-1 {{incorrect number of bits in integer (expected 32 bits, have 16)}} } @@ -84,13 +84,13 @@ float f16tof32_bool(bool p0) { float f16tof32_bool3(bool3 p0) { return f16tof32(p0); - // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool3' (aka 'vector')}} + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool3' (aka 'vector'))}} } -float f16tof32_int16_t(int16_t p0) { +float f16tof32_int16_t(short p0) { return f16tof32(p0); - // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int16_t' (aka 'short'))}} + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'short')}} } float f16tof32_int16_t(unsigned short p0) { diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/f16tof32.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/f16tof32.ll deleted file mode 100644 index 883fcbd02e769..0000000000000 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/f16tof32.ll +++ /dev/null @@ -1,76 +0,0 @@ -; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s -; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val %} - -; CHECK-DAG: [[SET:%.*]] = OpExtInstImport "GLSL.std.450" -; CHECK-DAG: [[UINT:%.*]] = OpTypeInt 32 0 -; CHECK-DAG: [[UINT2:%.*]] = OpTypeVector [[UINT]] 2 -; CHECK-DAG: [[UINT3:%.*]] = OpTypeVector [[UINT]] 3 -; CHECK-DAG: [[UINT4:%.*]] = OpTypeVector [[UINT]] 4 -; CHECK-DAG: [[FLOAT:%.*]] = OpTypeFloat 32 -; CHECK-DAG: [[FLOAT2:%.*]] = OpTypeVector [[FLOAT]] 2 -; CHECK-DAG: [[FLOAT3:%.*]] = OpTypeVector [[FLOAT]] 3 -; CHECK-DAG: [[FLOAT4:%.*]] = OpTypeVector [[FLOAT]] 4 - -; CHECK: [[P0:%.*]] = OpFunctionParameter [[UINT]] -; CHECK: [[UNPACK2:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0]] -; CHECK: [[UNPACK:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK2]] 0 -; CHECK: OpReturnValue [[UNPACK]] -define hidden noundef nofpclass(nan inf) float @_Z11test_scalarj(i32 noundef %p0) local_unnamed_addr #0 { -entry: - %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn float @llvm.spv.legacyf16tof32.i32(i32 %p0) - ret float %hlsl.f16tof32 -} - -; CHECK: [[P0:%.*]] = OpFunctionParameter [[UINT2]] -; CHECK-DAG: [[P0_0:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 0 -; CHECK-DAG: [[P0_1:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 1 -; CHECK-DAG: [[UNPACK2_0:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_0]] -; CHECK-DAG: [[UNPACK2_1:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_1]] -; CHECK-DAG: [[RESULT_0:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK2_0]] 0 -; CHECK-DAG: [[RESULT_1:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK2_1]] 0 -; CHECK: [[RESULT:%.*]] = OpCompositeConstruct [[FLOAT2]] [[RESULT_0]] [[RESULT_1]] -; CHECK: OpReturnValue [[RESULT]] -define hidden noundef nofpclass(nan inf) <2 x float> @_Z10test_uint2Dv2_j(<2 x i32> noundef %p0) local_unnamed_addr #0 { -entry: - %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.spv.legacyf16tof32.v2i32(<2 x i32> %p0) - ret <2 x float> %hlsl.f16tof32 -} - -; CHECK: [[P0:%.*]] = OpFunctionParameter [[UINT3]] -; CHECK-DAG: [[P0_0:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 0 -; CHECK-DAG: [[P0_1:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 1 -; CHECK-DAG: [[P0_2:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 2 -; CHECK-DAG: [[UNPACK3_0:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_0]] -; CHECK-DAG: [[UNPACK3_1:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_1]] -; CHECK-DAG: [[UNPACK3_2:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_2]] -; CHECK-DAG: [[RESULT_0:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK3_0]] 0 -; CHECK-DAG: [[RESULT_1:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK3_1]] 0 -; CHECK-DAG: [[RESULT_2:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK3_2]] 0 -; CHECK: [[RESULT:%.*]] = OpCompositeConstruct [[FLOAT3]] [[RESULT_0]] [[RESULT_1]] [[RESULT_2]] -; CHECK: OpReturnValue [[RESULT]] -define hidden noundef nofpclass(nan inf) <3 x float> @_Z10test_uint3Dv3_j(<3 x i32> noundef %p0) local_unnamed_addr #0 { -entry: - %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.spv.legacyf16tof32.v3i32(<3 x i32> %p0) - ret <3 x float> %hlsl.f16tof32 -} - -; CHECK: [[P0:%.*]] = OpFunctionParameter [[UINT4]] -; CHECK-DAG: [[P0_0:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 0 -; CHECK-DAG: [[P0_1:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 1 -; CHECK-DAG: [[P0_2:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 2 -; CHECK-DAG: [[P0_3:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 3 -; CHECK-DAG: [[UNPACK4_0:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_0]] -; CHECK-DAG: [[UNPACK4_1:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_1]] -; CHECK-DAG: [[UNPACK4_2:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_2]] -; CHECK-DAG: [[UNPACK4_3:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_3]] -; CHECK-DAG: [[RESULT_0:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK4_0]] 0 -; CHECK-DAG: [[RESULT_1:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK4_1]] 0 -; CHECK-DAG: [[RESULT_2:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK4_2]] 0 -; CHECK-DAG: [[RESULT_3:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK4_3]] 0 -; CHECK: [[RESULT:%.*]] = OpCompositeConstruct [[FLOAT4]] [[RESULT_0]] [[RESULT_1]] [[RESULT_2]] [[RESULT_3]] -; CHECK: OpReturnValue [[RESULT]] -define hidden noundef nofpclass(nan inf) <4 x float> @_Z10test_uint4Dv4_j(<4 x i32> noundef %p0) local_unnamed_addr #0 { -entry: - %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.spv.legacyf16tof32.v4i32(<4 x i32> %p0) - ret <4 x float> %hlsl.f16tof32 -} diff --git a/llvm/test/CodeGen/SPIRV/unpackfloat2x16.ll b/llvm/test/CodeGen/SPIRV/unpackfloat2x16.ll new file mode 100644 index 0000000000000..6a9ce4515f5c0 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/unpackfloat2x16.ll @@ -0,0 +1,18 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: [[SET:%.*]] = OpExtInstImport "GLSL.std.450" +; CHECK-DAG: [[UINT:%.*]] = OpTypeInt 32 0 +; CHECK-DAG: [[FLOAT:%.*]] = OpTypeFloat 32 +; CHECK-DAG: [[FLOAT2:%.*]] = OpTypeVector [[FLOAT]] 2 + +; CHECK: [[P0:%.*]] = OpFunctionParameter [[UINT]] +; CHECK: [[UNPACK2:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0]] +; CHECK: [[UNPACK:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK2]] 0 +; CHECK: OpReturnValue [[UNPACK]] +define hidden spir_func noundef nofpclass(nan inf) float @_Z9test_funcj(i32 noundef %0) local_unnamed_addr #0 { + %2 = tail call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.spv.unpackhalf2x16.v2f32(i32 %0) + %3 = extractelement <2 x float> %2, i64 0 + ret float %3 +} +