From d8706d621ecd2f84ba645152d3ab015ce2d73553 Mon Sep 17 00:00:00 2001
From: Tim Corringham <tcorring@amd.com>
Date: Wed, 1 Oct 2025 17:48:36 +0100
Subject: [PATCH 1/4] [HLSL] Implement the f16tof32() intrinsic

Implement the f16tof32() intrinsic, including DXILand SPIRV codegen,
and associated tests.
---
 clang/include/clang/Basic/Builtins.td         |   6 +
 clang/lib/CodeGen/CGHLSLBuiltins.cpp          |  16 +++
 clang/lib/CodeGen/CGHLSLRuntime.h             |   1 +
 .../lib/Headers/hlsl/hlsl_alias_intrinsics.h  |  21 +++
 clang/lib/Sema/SemaHLSL.cpp                   |  57 ++++++--
 .../builtins/f16tof32-builtin.hlsl            |  30 ++++
 clang/test/CodeGenHLSL/builtins/f16tof32.hlsl |  30 ++++
 .../SemaHLSL/BuiltIns/f16tof32-errors.hlsl    | 134 ++++++++++++++++++
 llvm/include/llvm/IR/IntrinsicsDirectX.td     |   3 +
 llvm/include/llvm/IR/IntrinsicsSPIRV.td       |   4 +
 llvm/lib/Target/DirectX/DXIL.td               |   9 ++
 .../DirectX/DirectXTargetTransformInfo.cpp    |   8 +-
 .../Target/SPIRV/SPIRVInstructionSelector.cpp |  90 ++++++++++++
 llvm/test/CodeGen/DirectX/f16tof32.ll         |  57 ++++++++
 .../CodeGen/SPIRV/hlsl-intrinsics/f16tof32.ll |  76 ++++++++++
 15 files changed, 527 insertions(+), 15 deletions(-)
 create mode 100644 clang/test/CodeGenHLSL/builtins/f16tof32-builtin.hlsl
 create mode 100644 clang/test/CodeGenHLSL/builtins/f16tof32.hlsl
 create mode 100644 clang/test/SemaHLSL/BuiltIns/f16tof32-errors.hlsl
 create mode 100644 llvm/test/CodeGen/DirectX/f16tof32.ll
 create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/f16tof32.ll
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index a350acdf146ab..8bdff10d57745 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -5222,6 +5222,12 @@ def HLSLGetSpirvSpecConstant : LangBuiltin<"HLSL_LANG">, HLSLScalarTemplate {
   let Prototype = "T(unsigned int, T)";
 }
 
+def HLSLF16ToF32 : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_elementwise_f16tof32"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 // Builtins for XRay.
 def XRayCustomEvent : Builtin {
   let Spellings = ["__xray_customevent"];
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 384bd59e7533a..d5aebd6d64817 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -560,6 +560,22 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
         /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getDegreesIntrinsic(),
         ArrayRef<Value *>{X}, nullptr, "hlsl.degrees");
   }
+  case Builtin::BI__builtin_hlsl_elementwise_f16tof32: {
+    Value *Op0 = EmitScalarExpr(E->getArg(0));
+    llvm::Type *Xty = Op0->getType();
+    llvm::Type *retType = llvm::Type::getFloatTy(this->getLLVMContext());
+    if (Xty->isVectorTy()) {
+      auto *XVecTy = E->getArg(0)->getType()->castAs<VectorType>();
+      retType = llvm::VectorType::get(
+          retType, ElementCount::getFixed(XVecTy->getNumElements()));
+    }
+    if (!E->getArg(0)->getType()->hasUnsignedIntegerRepresentation())
+      llvm_unreachable(
+          "f16tof32 operand must have an unsigned int representation");
+    return Builder.CreateIntrinsic(
+        retType, CGM.getHLSLRuntime().getLegacyF16ToF32Intrinsic(),
+        ArrayRef<Value *>{Op0}, nullptr, "hlsl.f16tof32");
+  }
   case Builtin::BI__builtin_hlsl_elementwise_frac: {
     Value *Op0 = EmitScalarExpr(E->getArg(0));
     if (!E->getArg(0)->getType()->hasFloatingRepresentation())
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 103b4a98f6c26..e36e89fe16125 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -96,6 +96,7 @@ class CGHLSLRuntime {
                                    flattened_thread_id_in_group)
   GENERATE_HLSL_INTRINSIC_FUNCTION(IsInf, isinf)
   GENERATE_HLSL_INTRINSIC_FUNCTION(IsNaN, isnan)
+  GENERATE_HLSL_INTRINSIC_FUNCTION(LegacyF16ToF32, legacyf16tof32)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Lerp, lerp)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Normalize, normalize)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt)
diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
index d973371312701..a85accdcc4549 100644
--- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
@@ -1052,6 +1052,27 @@ float3 exp2(float3);
 _HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2)
 float4 exp2(float4);
 
+//===----------------------------------------------------------------------===//
+// f16tof32 builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn float f16tof32(uint x)
+/// \brief Returns the half value stored in the low 16 bits of the uint arg
+/// converted to a float.
+/// \param x The uint containing two half values.
+///
+/// The float value of the half value found in the low 16 bits of the \a xi
+/// parameter.
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
+float f16tof32(uint);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
+float2 f16tof32(uint2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
+float3 f16tof32(uint3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
+float4 f16tof32(uint4);
+
 //===----------------------------------------------------------------------===//
 // firstbithigh builtins
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 5b3e89f936327..be2a85442c9b6 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2738,6 +2738,23 @@ static bool CheckUnsignedIntRepresentation(Sema *S, SourceLocation Loc,
   return false;
 }
 
+static bool CheckExpectedBitWidth(Sema *S, CallExpr *TheCall,
+                                  unsigned ArgOrdinal, unsigned Width) {
+  QualType ArgTy = TheCall->getArg(0)->getType();
+  if (auto *VTy = ArgTy->getAs<VectorType>())
+    ArgTy = VTy->getElementType();
+  // ensure arg type has expected bit width
+  uint64_t ElementBitCount =
+      S->getASTContext().getTypeSizeInChars(ArgTy).getQuantity() * 8;
+  if (ElementBitCount != Width) {
+    S->Diag(TheCall->getArg(0)->getBeginLoc(),
+            diag::err_integer_incorrect_bit_count)
+        << Width << ElementBitCount;
+    return true;
+  }
+  return false;
+}
+
 static void SetElementTypeAsReturnType(Sema *S, CallExpr *TheCall,
                                        QualType ReturnType) {
   auto *VecTyA = TheCall->getArg(0)->getType()->getAs<VectorType>();
@@ -2897,24 +2914,16 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
                                    CheckUnsignedIntVecRepresentation))
       return true;
 
-    auto *VTy = TheCall->getArg(0)->getType()->getAs<VectorType>();
     // ensure arg integers are 32-bits
-    uint64_t ElementBitCount = getASTContext()
-                                   .getTypeSizeInChars(VTy->getElementType())
-                                   .getQuantity() *
-                               8;
-    if (ElementBitCount != 32) {
-      SemaRef.Diag(TheCall->getBeginLoc(),
-                   diag::err_integer_incorrect_bit_count)
-          << 32 << ElementBitCount;
+    if (CheckExpectedBitWidth(&SemaRef, TheCall, 0, 32))
       return true;
-    }
 
     // ensure both args are vectors of total bit size of a multiple of 64
+    auto *VTy = TheCall->getArg(0)->getType()->getAs<VectorType>();
     int NumElementsArg = VTy->getNumElements();
     if (NumElementsArg != 2 && NumElementsArg != 4) {
       SemaRef.Diag(TheCall->getBeginLoc(), diag::err_vector_incorrect_bit_count)
-          << 1 /*a multiple of*/ << 64 << NumElementsArg * ElementBitCount;
+          << 1 /*a multiple of*/ << 64 << NumElementsArg * 32;
       return true;
     }
 
@@ -3230,7 +3239,7 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
     break;
   }
   // Note these are llvm builtins that we want to catch invalid intrinsic
-  // generation. Normal handling of these builitns will occur elsewhere.
+  // generation. Normal handling of these builtins will occur elsewhere.
   case Builtin::BI__builtin_elementwise_bitreverse: {
     // does not include a check for number of arguments
     // because that is done previously
@@ -3340,6 +3349,30 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
     }
     break;
   }
+  case Builtin::BI__builtin_hlsl_elementwise_f16tof32: {
+    if (SemaRef.checkArgCount(TheCall, 1))
+      return true;
+    if (CheckAllArgTypesAreCorrect(&SemaRef, TheCall,
+                                   CheckUnsignedIntRepresentation))
+      return true;
+    // ensure arg integers are 32 bits
+    if (CheckExpectedBitWidth(&SemaRef, TheCall, 0, 32))
+      return true;
+    // check it wasn't a bool type
+    QualType ArgTy = TheCall->getArg(0)->getType();
+    if (auto *VTy = ArgTy->getAs<VectorType>())
+      ArgTy = VTy->getElementType();
+    if (ArgTy->isBooleanType()) {
+      SemaRef.Diag(TheCall->getArg(0)->getBeginLoc(),
+                   diag::err_builtin_invalid_arg_type)
+          << 1 << /* scalar or vector of */ 5 << /* unsigned int */ 3
+          << /* no fp */ 0 << TheCall->getArg(0)->getType();
+      return true;
+    }
+
+    SetElementTypeAsReturnType(&SemaRef, TheCall, getASTContext().FloatTy);
+    break;
+  }
   }
   return false;
 }
diff --git a/clang/test/CodeGenHLSL/builtins/f16tof32-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/f16tof32-builtin.hlsl
new file mode 100644
index 0000000000000..65dba664bb5ea
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/f16tof32-builtin.hlsl
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s
+
+// CHECK: define hidden noundef nofpclass(nan inf) float
+// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn float @llvm.dx.legacyf16tof32.i32(i32 %0)
+// CHECK: ret float %hlsl.f16tof32
+// CHECK: declare float @llvm.dx.legacyf16tof32.i32(i32)
+float test_scalar(uint p0) { return __builtin_hlsl_elementwise_f16tof32(p0); }
+
+// CHECK: define hidden noundef nofpclass(nan inf) <2 x float>
+// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32> %0)
+// CHECK: ret <2 x float> %hlsl.f16tof32
+// CHECK: declare <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32>)
+float2 test_uint2(uint2 p0) { return __builtin_hlsl_elementwise_f16tof32(p0); }
+
+// CHECK: define hidden noundef nofpclass(nan inf) <3 x float> @_Z10test_uint3Dv3_j(<3 x i32> noundef %p0) #0 {
+// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32> %0)
+// CHECK: ret <3 x float> %hlsl.f16tof32
+// CHECK: declare <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32>)
+float3 test_uint3(uint3 p0) { return __builtin_hlsl_elementwise_f16tof32(p0); }
+
+// CHECK: define hidden noundef nofpclass(nan inf) <4 x float> @_Z10test_uint4Dv4_j(<4 x i32> noundef %p0) #0 {
+// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32> %0)
+// CHECK: ret <4 x float> %hlsl.f16tof32
+// CHECK: declare <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32>)
+float4 test_uint4(uint4 p0) { return __builtin_hlsl_elementwise_f16tof32(p0); }
+
+
+
diff --git a/clang/test/CodeGenHLSL/builtins/f16tof32.hlsl b/clang/test/CodeGenHLSL/builtins/f16tof32.hlsl
new file mode 100644
index 0000000000000..b68bc197f16c5
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/f16tof32.hlsl
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s
+
+// CHECK: define hidden noundef nofpclass(nan inf) float
+// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn float @llvm.dx.legacyf16tof32.i32(i32 %0)
+// CHECK: ret float %hlsl.f16tof32
+// CHECK: declare float @llvm.dx.legacyf16tof32.i32(i32)
+float test_scalar(uint p0) { return f16tof32(p0); }
+
+// CHECK: define hidden noundef nofpclass(nan inf) <2 x float>
+// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32> %0)
+// CHECK: ret <2 x float> %hlsl.f16tof32
+// CHECK: declare <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32>)
+float2 test_uint2(uint2 p0) { return f16tof32(p0); }
+
+// CHECK: define hidden noundef nofpclass(nan inf) <3 x float> @_Z10test_uint3Dv3_j(<3 x i32> noundef %p0) #0 {
+// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32> %0)
+// CHECK: ret <3 x float> %hlsl.f16tof32
+// CHECK: declare <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32>)
+float3 test_uint3(uint3 p0) { return f16tof32(p0); }
+
+// CHECK: define hidden noundef nofpclass(nan inf) <4 x float> @_Z10test_uint4Dv4_j(<4 x i32> noundef %p0) #0 {
+// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32> %0)
+// CHECK: ret <4 x float> %hlsl.f16tof32
+// CHECK: declare <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32>)
+float4 test_uint4(uint4 p0) { return f16tof32(p0); }
+
+
+
diff --git a/clang/test/SemaHLSL/BuiltIns/f16tof32-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/f16tof32-errors.hlsl
new file mode 100644
index 0000000000000..2c4baae524977
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/f16tof32-errors.hlsl
@@ -0,0 +1,134 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify
+
+float builtin_f16tof32_too_few_arg() {
+  return __builtin_hlsl_elementwise_f16tof32();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+  // expected-note@hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function not viable: requires 1 argument, but 0 were provided}}
+}
+
+float builtin_f16tof32_too_many_arg(uint p0) {
+  return __builtin_hlsl_elementwise_f16tof32(p0, p0);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+  // expected-note@hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function not viable: requires 1 argument, but 2 were provided}}
+}
+
+float builtin_f16tof32_bool(bool p0) {
+  return __builtin_hlsl_elementwise_f16tof32(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool')}}
+}
+
+float builtin_f16tof32_bool4(bool4 p0) {
+  return __builtin_hlsl_elementwise_f16tof32(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool4' (aka 'vector<bool, 4>')}}
+}
+
+float builtin_f16tof32_int16_t(int16_t p0) {
+  return __builtin_hlsl_elementwise_f16tof32(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int16_t' (aka 'short'))}}
+}
+
+float builtin_f16tof32_int16_t(unsigned short p0) {
+  return __builtin_hlsl_elementwise_f16tof32(p0);
+  // expected-error@-1 {{incorrect number of bits in integer (expected 32 bits, have 16)}}
+}
+
+float builtin_f16tof32_int(int p0) {
+  return __builtin_hlsl_elementwise_f16tof32(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int')}}
+}
+
+float builtin_f16tof32_int64_t(long p0) {
+  return __builtin_hlsl_elementwise_f16tof32(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'long')}}
+}
+
+float2 builtin_f16tof32_int2_to_float2_promotion(int2 p0) {
+  return __builtin_hlsl_elementwise_f16tof32(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int2' (aka 'vector<int, 2>'))}}
+}
+
+float builtin_f16tof32_half(half p0) {
+  return __builtin_hlsl_elementwise_f16tof32(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'half')}}
+}
+
+float builtin_f16tof32_half4(half4 p0) {
+  return __builtin_hlsl_elementwise_f16tof32(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'half4' (aka 'vector<half, 4>'))}}
+}
+
+float builtin_f16tof32_float(float p0) {
+  return __builtin_hlsl_elementwise_f16tof32(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'float')}}
+}
+
+float builtin_f16tof32_double(double p0) {
+  return __builtin_hlsl_elementwise_f16tof32(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'double')}}
+}
+
+float f16tof32_too_few_arg() {
+  return f16tof32();
+  // expected-error@-1 {{no matching function for call to 'f16tof32'}}
+}
+
+float f16tof32_too_many_arg(uint p0) {
+  return f16tof32(p0, p0);
+  // expected-error@-1 {{no matching function for call to 'f16tof32'}}
+}
+
+float f16tof32_bool(bool p0) {
+  return f16tof32(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool')}}
+}
+
+float f16tof32_bool3(bool3 p0) {
+  return f16tof32(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool3' (aka 'vector<bool, 3>')}}
+}
+
+
+float f16tof32_int16_t(int16_t p0) {
+  return f16tof32(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int16_t' (aka 'short'))}}
+}
+
+float f16tof32_int16_t(unsigned short p0) {
+  return f16tof32(p0);
+  // expected-error@-1 {{incorrect number of bits in integer (expected 32 bits, have 16)}}
+}
+
+float f16tof32_int(int p0) {
+  return f16tof32(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int')}}
+}
+
+float f16tof32_int64_t(long p0) {
+  return f16tof32(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'long')}}
+}
+
+float2 f16tof32_int2_to_float2_promotion(int3 p0) {
+  return f16tof32(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int3' (aka 'vector<int, 3>'))}}
+}
+
+float f16tof32_half(half p0) {
+  return f16tof32(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'half')}}
+}
+
+float f16tof32_half2(half2 p0) {
+  return f16tof32(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'half2' (aka 'vector<half, 2>'))}}
+}
+
+float f16tof32_float(float p0) {
+  return f16tof32(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'float')}}
+}
+
+float f16tof32_double(double p0) {
+  return f16tof32(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'double')}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 3b7077c52db21..74a6bc41f8da7 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -140,6 +140,9 @@ def int_dx_isinf : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1
 def int_dx_isnan : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
     [llvm_anyfloat_ty], [IntrNoMem]>;
 
+def int_dx_legacyf16tof32 : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_float_ty>],
+    [llvm_anyint_ty], [IntrNoMem]>;
+
 def int_dx_lerp : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>],
     [IntrNoMem]>;
 
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index 49a182be98acd..655b1aa1dde21 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -198,4 +198,8 @@ def int_spv_resource_nonuniformindex
   def int_spv_generic_cast_to_ptr_explicit
     : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [generic_ptr_ty],
        [IntrNoMem, NoUndef<RetIndex>]>;
+
+  def int_spv_legacyf16tof32 : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_float_ty>],
+    [llvm_anyint_ty], [IntrNoMem]>;
+
 }
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 44c48305f2832..3ae4e09b2f45d 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -1069,6 +1069,15 @@ def WaveActiveOp : DXILOp<119, waveActiveOp> {
   let attributes = [Attributes<DXIL1_0, []>];
 }
 
+def LegacyF16ToF32 : DXILOp<131, legacyF16ToF32> {
+  let Doc = "returns the float16 stored in the low-half of the uint converted "
+            "to a float";
+  let intrinsics = [IntrinSelect<int_dx_legacyf16tof32>];
+  let arguments = [Int32Ty];
+  let result = FloatTy;
+  let stages = [Stages<DXIL1_0, [all_stages]>];
+}
+
 def WaveAllBitCount : DXILOp<135, waveAllOp> {
   let Doc = "returns the count of bits set to 1 across the wave";
   let intrinsics = [IntrinSelect<int_dx_wave_active_countbits>];
diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
index 68fd3e0bc74c7..614a4bab36e67 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
@@ -29,11 +29,12 @@ bool DirectXTTIImpl::isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
                                                             int OpdIdx) const {
   switch (ID) {
   case Intrinsic::dx_asdouble:
-  case Intrinsic::dx_isinf:
-  case Intrinsic::dx_isnan:
   case Intrinsic::dx_firstbitlow:
-  case Intrinsic::dx_firstbituhigh:
   case Intrinsic::dx_firstbitshigh:
+  case Intrinsic::dx_firstbituhigh:
+  case Intrinsic::dx_isinf:
+  case Intrinsic::dx_isnan:
+  case Intrinsic::dx_legacyf16tof32:
     return OpdIdx == 0;
   default:
     return OpdIdx == -1;
@@ -50,6 +51,7 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
   case Intrinsic::dx_frac:
   case Intrinsic::dx_isinf:
   case Intrinsic::dx_isnan:
+  case Intrinsic::dx_legacyf16tof32:
   case Intrinsic::dx_rsqrt:
   case Intrinsic::dx_saturate:
   case Intrinsic::dx_splitdouble:
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 021353ab716f7..12ae8ff2d0478 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -212,6 +212,9 @@ class SPIRVInstructionSelector : public InstructionSelector {
   bool selectOpIsNan(Register ResVReg, const SPIRVType *ResType,
                      MachineInstr &I) const;
 
+  bool selectF16ToF32(Register ResVReg, const SPIRVType *ResType,
+                      MachineInstr &I) const;
+
   template <bool Signed>
   bool selectDot4AddPacked(Register ResVReg, const SPIRVType *ResType,
                            MachineInstr &I) const;
@@ -3472,6 +3475,10 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
   case Intrinsic::spv_resource_nonuniformindex: {
     return selectResourceNonUniformIndex(ResVReg, ResType, I);
   }
+  case Intrinsic::spv_legacyf16tof32: {
+    return selectF16ToF32(ResVReg, ResType, I);
+  }
+
   default: {
     std::string DiagMsg;
     raw_string_ostream OS(DiagMsg);
@@ -3744,6 +3751,89 @@ bool SPIRVInstructionSelector::selectResourceNonUniformIndex(
   return true;
 }
 
+bool SPIRVInstructionSelector::selectF16ToF32(Register ResVReg,
+                                              const SPIRVType *ResType,
+                                              MachineInstr &I) const {
+  assert(I.getNumOperands() == 3);
+  assert(I.getOperand(0).isReg());
+  assert(I.getOperand(2).isReg());
+  Register SrcReg = I.getOperand(2).getReg();
+  const SPIRVType *SrcRegType = GR.getSPIRVTypeForVReg(SrcReg);
+  LLT SrcType = MRI->getType(SrcReg);
+  SPIRVType *SrcEltType = GR.getScalarOrVectorComponentType(SrcRegType);
+  SPIRVType *ResEltType = GR.getScalarOrVectorComponentType(ResType);
+  const TargetRegisterClass *SrcRegClass = GR.getRegClass(SrcEltType);
+  const TargetRegisterClass *ResRegClass = GR.getRegClass(ResEltType);
+  MachineIRBuilder MIRBuilder(I);
+  const SPIRVType *Vec2ResType =
+      GR.getOrCreateSPIRVVectorType(ResEltType, 2, MIRBuilder, false);
+  const TargetRegisterClass *Vec2RegClass = GR.getRegClass(Vec2ResType);
+
+  bool Result = true;
+  MachineBasicBlock &BB = *I.getParent();
+  if (SrcType.isVector()) {
+    // We have a vector of uints to convert elementwise
+    uint64_t ResultSize = GR.getScalarOrVectorComponentCount(ResType);
+    SmallVector<Register> ComponentRegisters;
+    for (uint64_t Idx = 0; Idx < ResultSize; Idx++) {
+      Register EltReg = MRI->createVirtualRegister(SrcRegClass);
+      Register FReg = MRI->createVirtualRegister(ResRegClass);
+      Register Vec2Reg = MRI->createVirtualRegister(Vec2RegClass);
+
+      Result =
+          BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract))
+              .addDef(EltReg)
+              .addUse(GR.getSPIRVTypeID(SrcEltType))
+              .addUse(SrcReg)
+              .addImm(Idx)
+              .constrainAllUses(TII, TRI, RBI);
+
+      Result &=
+          BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst))
+              .addDef(Vec2Reg)
+              .addUse(GR.getSPIRVTypeID(Vec2ResType))
+              .addImm(
+                  static_cast<uint32_t>(SPIRV::InstructionSet::GLSL_std_450))
+              .addImm(GL::UnpackHalf2x16)
+              .addUse(EltReg)
+              .constrainAllUses(TII, TRI, RBI);
+
+      Result &=
+          BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract))
+              .addDef(FReg)
+              .addUse(GR.getSPIRVTypeID(ResEltType))
+              .addUse(Vec2Reg)
+              .addImm(0)
+              .constrainAllUses(TII, TRI, RBI);
+
+      ComponentRegisters.emplace_back(FReg);
+    }
+
+    MachineInstrBuilder MIB =
+        BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeConstruct))
+            .addDef(ResVReg)
+            .addUse(GR.getSPIRVTypeID(ResType));
+
+    for (Register ComponentReg : ComponentRegisters)
+      MIB.addUse(ComponentReg);
+    return Result && MIB.constrainAllUses(TII, TRI, RBI);
+
+  } else if (SrcType.isScalar()) {
+    // just a scalar uint to convert
+    Register Vec2Reg = MRI->createVirtualRegister(Vec2RegClass);
+    Result &= selectExtInst(Vec2Reg, Vec2ResType, I, GL::UnpackHalf2x16);
+    Result &=
+        BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract))
+            .addDef(ResVReg)
+            .addUse(GR.getSPIRVTypeID(ResType))
+            .addUse(Vec2Reg)
+            .addImm(0)
+            .constrainAllUses(TII, TRI, RBI);
+    return Result;
+  }
+  return false;
+}
+
 void SPIRVInstructionSelector::decorateUsesAsNonUniform(
     Register &NonUniformReg) const {
   llvm::SmallVector<Register> WorkList = {NonUniformReg};
diff --git a/llvm/test/CodeGen/DirectX/f16tof32.ll b/llvm/test/CodeGen/DirectX/f16tof32.ll
new file mode 100644
index 0000000000000..edc5c1942e8bd
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/f16tof32.ll
@@ -0,0 +1,57 @@
+; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.9-library %s | FileCheck %s
+
+define hidden noundef nofpclass(nan inf) float @_Z11test_scalarj(i32 noundef %p0) local_unnamed_addr #0 {
+entry:
+  ; CHECK : [[UINT:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 %p0)
+  ; CHECK : ret float [[UINT]]
+  %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn float @llvm.dx.legacyf16tof32.i32(i32 %p0)
+  ret float %hlsl.f16tof32
+}
+
+define hidden noundef nofpclass(nan inf) <2 x float> @_Z10test_uint2Dv2_j(<2 x i32> noundef %p0) local_unnamed_addr #0 {
+entry:
+  ; CHECK: [[UINT2_0:%.*]] = extractelement <2 x i32> %p0, i64 0
+  ; CHECK: [[FLOAT_0:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT2_0]])
+  ; CHECK: [[UINT2_1:%.*]] = extractelement <2 x i32> %p0, i64 1
+  ; CHECK: [[FLOAT_1:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT2_1]])
+  ; CHECK: [[FLOAT2_0:%.*]] = insertelement <2 x float> poison, float [[FLOAT_0]], i64 0
+  ; CHECK: [[FLOAT2_1:%.*]] = insertelement <2 x float> [[FLOAT2_0]], float [[FLOAT_1]], i64 1
+  ; CHECK : ret <2 x float>  [[FLOAT2_1]]
+  %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32> %p0)
+  ret <2 x float> %hlsl.f16tof32
+}
+
+define hidden noundef nofpclass(nan inf) <3 x float> @_Z10test_uint3Dv3_j(<3 x i32> noundef %p0) local_unnamed_addr #0 {
+entry:
+  ; CHECK: [[UINT3_0:%.*]] = extractelement <3 x i32> %p0, i64 0
+  ; CHECK: [[FLOAT_0:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT3_0]])
+  ; CHECK: [[UINT3_1:%.*]] = extractelement <3 x i32> %p0, i64 1
+  ; CHECK: [[FLOAT_1:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT3_1]])
+  ; CHECK: [[UINT3_2:%.*]] = extractelement <3 x i32> %p0, i64 2
+  ; CHECK: [[FLOAT_2:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT3_2]])
+  ; CHECK: [[FLOAT3_0:%.*]] = insertelement <3 x float> poison, float [[FLOAT_0]], i64 0
+  ; CHECK: [[FLOAT3_1:%.*]] = insertelement <3 x float> [[FLOAT3_0]], float [[FLOAT_1]], i64 1
+  ; CHECK: [[FLOAT3_2:%.*]] = insertelement <3 x float> [[FLOAT3_1]], float [[FLOAT_2]], i64 2
+  ; CHECK : ret <3 x float> [[FLOAT3_2]]
+  %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32> %p0)
+  ret <3 x float> %hlsl.f16tof32
+}
+
+define hidden noundef nofpclass(nan inf) <4 x float> @_Z10test_uint4Dv4_j(<4 x i32> noundef %p0) local_unnamed_addr #0 {
+entry:
+  ; CHECK: [[UINT4_0:%.*]] = extractelement <4 x i32> %p0, i64 0
+  ; CHECK: [[FLOAT_0:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT4_0]])
+  ; CHECK: [[UINT4_1:%.*]] = extractelement <4 x i32> %p0, i64 1
+  ; CHECK: [[FLOAT_1:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT4_1]])
+  ; CHECK: [[UINT4_2:%.*]] = extractelement <4 x i32> %p0, i64 2
+  ; CHECK: [[FLOAT_2:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT4_2]])
+  ; CHECK: [[UINT4_3:%.*]] = extractelement <4 x i32> %p0, i64 3
+  ; CHECK: [[FLOAT_3:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT4_3]])
+  ; CHECK: [[FLOAT4_0:%.*]] = insertelement <4 x float> poison, float [[FLOAT_0]], i64 0
+  ; CHECK: [[FLOAT4_1:%.*]] = insertelement <4 x float> [[FLOAT4_0]], float [[FLOAT_1]], i64 1
+  ; CHECK: [[FLOAT4_2:%.*]] = insertelement <4 x float> [[FLOAT4_1]], float [[FLOAT_2]], i64 2
+  ; CHECK: [[FLOAT4_3:%.*]] = insertelement <4 x float> [[FLOAT4_2]], float [[FLOAT_3]], i64 3
+  ; CHECK : ret <4 x float> [[FLOAT4_3]]
+  %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32> %p0)
+  ret <4 x float> %hlsl.f16tof32
+}
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/f16tof32.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/f16tof32.ll
new file mode 100644
index 0000000000000..883fcbd02e769
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/f16tof32.ll
@@ -0,0 +1,76 @@
+; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-DAG: [[SET:%.*]] = OpExtInstImport "GLSL.std.450"
+; CHECK-DAG: [[UINT:%.*]] = OpTypeInt 32 0
+; CHECK-DAG: [[UINT2:%.*]] = OpTypeVector [[UINT]] 2
+; CHECK-DAG: [[UINT3:%.*]] = OpTypeVector [[UINT]] 3
+; CHECK-DAG: [[UINT4:%.*]] = OpTypeVector [[UINT]] 4
+; CHECK-DAG: [[FLOAT:%.*]] = OpTypeFloat 32
+; CHECK-DAG: [[FLOAT2:%.*]] = OpTypeVector [[FLOAT]] 2
+; CHECK-DAG: [[FLOAT3:%.*]] = OpTypeVector [[FLOAT]] 3
+; CHECK-DAG: [[FLOAT4:%.*]] = OpTypeVector [[FLOAT]] 4
+
+; CHECK: [[P0:%.*]] = OpFunctionParameter [[UINT]]
+; CHECK: [[UNPACK2:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0]]
+; CHECK: [[UNPACK:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK2]] 0
+; CHECK: OpReturnValue [[UNPACK]]
+define hidden noundef nofpclass(nan inf) float @_Z11test_scalarj(i32 noundef %p0) local_unnamed_addr #0 {
+entry:
+  %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn float @llvm.spv.legacyf16tof32.i32(i32 %p0)
+  ret float %hlsl.f16tof32
+}
+
+; CHECK: [[P0:%.*]] = OpFunctionParameter [[UINT2]]
+; CHECK-DAG: [[P0_0:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 0
+; CHECK-DAG: [[P0_1:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 1
+; CHECK-DAG: [[UNPACK2_0:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_0]]
+; CHECK-DAG: [[UNPACK2_1:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_1]]
+; CHECK-DAG: [[RESULT_0:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK2_0]] 0
+; CHECK-DAG: [[RESULT_1:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK2_1]] 0
+; CHECK: [[RESULT:%.*]] = OpCompositeConstruct [[FLOAT2]] [[RESULT_0]] [[RESULT_1]]
+; CHECK: OpReturnValue [[RESULT]]
+define hidden noundef nofpclass(nan inf) <2 x float> @_Z10test_uint2Dv2_j(<2 x i32> noundef %p0) local_unnamed_addr #0 {
+entry:
+  %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.spv.legacyf16tof32.v2i32(<2 x i32> %p0)
+  ret <2 x float> %hlsl.f16tof32
+}
+
+; CHECK: [[P0:%.*]] = OpFunctionParameter [[UINT3]]
+; CHECK-DAG: [[P0_0:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 0
+; CHECK-DAG: [[P0_1:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 1
+; CHECK-DAG: [[P0_2:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 2
+; CHECK-DAG: [[UNPACK3_0:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_0]]
+; CHECK-DAG: [[UNPACK3_1:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_1]]
+; CHECK-DAG: [[UNPACK3_2:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_2]]
+; CHECK-DAG: [[RESULT_0:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK3_0]] 0
+; CHECK-DAG: [[RESULT_1:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK3_1]] 0
+; CHECK-DAG: [[RESULT_2:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK3_2]] 0
+; CHECK: [[RESULT:%.*]] = OpCompositeConstruct [[FLOAT3]] [[RESULT_0]] [[RESULT_1]] [[RESULT_2]]
+; CHECK: OpReturnValue [[RESULT]]
+define hidden noundef nofpclass(nan inf) <3 x float> @_Z10test_uint3Dv3_j(<3 x i32> noundef %p0) local_unnamed_addr #0 {
+entry:
+  %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.spv.legacyf16tof32.v3i32(<3 x i32> %p0)
+  ret <3 x float> %hlsl.f16tof32
+}
+
+; CHECK: [[P0:%.*]] = OpFunctionParameter [[UINT4]]
+; CHECK-DAG: [[P0_0:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 0
+; CHECK-DAG: [[P0_1:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 1
+; CHECK-DAG: [[P0_2:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 2
+; CHECK-DAG: [[P0_3:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 3
+; CHECK-DAG: [[UNPACK4_0:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_0]]
+; CHECK-DAG: [[UNPACK4_1:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_1]]
+; CHECK-DAG: [[UNPACK4_2:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_2]]
+; CHECK-DAG: [[UNPACK4_3:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_3]]
+; CHECK-DAG: [[RESULT_0:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK4_0]] 0
+; CHECK-DAG: [[RESULT_1:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK4_1]] 0
+; CHECK-DAG: [[RESULT_2:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK4_2]] 0
+; CHECK-DAG: [[RESULT_3:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK4_3]] 0
+; CHECK: [[RESULT:%.*]] = OpCompositeConstruct [[FLOAT4]] [[RESULT_0]] [[RESULT_1]] [[RESULT_2]] [[RESULT_3]]
+; CHECK: OpReturnValue [[RESULT]]
+define hidden noundef nofpclass(nan inf) <4 x float> @_Z10test_uint4Dv4_j(<4 x i32> noundef %p0) local_unnamed_addr #0 {
+entry:
+  %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.spv.legacyf16tof32.v4i32(<4 x i32> %p0)
+  ret <4 x float> %hlsl.f16tof32
+}

From da0a2143d099ac2a66b6b655f5d8c17a10111630 Mon Sep 17 00:00:00 2001
From: Tim Corringham <tcorring@amd.com>
Date: Mon, 3 Nov 2025 18:57:56 +0000
Subject: [PATCH 2/4] Changes from review comments

Move the unrolling of the UnpackHalf2x16 from the SPIRV codegen to
Clang IR CodeGen, and replace the spv_legacyf16tof32 intrinsic with
a spv_unpackhalf2x16 intrinsic.
This greatly simplifies the SPIRV codegen, at the expense of slightly
complicating the Clang IR codegen.
---
 clang/lib/CodeGen/CGHLSLBuiltins.cpp          | 64 ++++++++++---
 clang/lib/CodeGen/CGHLSLRuntime.h             |  1 -
 llvm/include/llvm/IR/IntrinsicsSPIRV.td       |  3 +-
 .../Target/SPIRV/SPIRVInstructionSelector.cpp | 90 +------------------
 4 files changed, 53 insertions(+), 105 deletions(-)

diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index d5aebd6d64817..d7e69eb816873 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -160,6 +160,55 @@ static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) {
   return LastInst;
 }
 
+static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF, const CallExpr *E) {
+  Value *Op0 = CGF.EmitScalarExpr(E->getArg(0));
+  QualType Op0Ty = E->getArg(0)->getType();
+  llvm::Type *ResType = CGF.FloatTy;
+  uint64_t NumElements = 0;
+  if (Op0->getType()->isVectorTy()) {
+    NumElements =
+        E->getArg(0)->getType()->castAs<clang::VectorType>()->getNumElements();
+    ResType =
+        llvm::VectorType::get(ResType, ElementCount::getFixed(NumElements));
+  }
+  if (!Op0Ty->hasUnsignedIntegerRepresentation())
+    llvm_unreachable(
+        "f16tof32 operand must have an unsigned int representation");
+
+  if (CGF.CGM.getTriple().isDXIL())
+    return CGF.Builder.CreateIntrinsic(
+        ResType, Intrinsic::dx_legacyf16tof32,
+        ArrayRef<Value *>{Op0}, nullptr, "hlsl.f16tof32");
+
+  if (CGF.CGM.getTriple().isSPIRV()) {
+    // We use the SPIRV UnpackHalf2x16 operation to avoid the need for the
+    // Int16 and Float16 capabilities
+    auto UnpackType = llvm::VectorType::get(CGF.FloatTy, ElementCount::getFixed(2));
+    if (NumElements == 0) {
+      // a scalar input - simply extract the first element of the unpacked vector
+      Value *Unpack =  CGF.Builder.CreateIntrinsic(
+          UnpackType, Intrinsic::spv_unpackhalf2x16,
+          ArrayRef<Value *>{Op0});
+      return CGF.Builder.CreateExtractElement(Unpack, (uint64_t)0);
+    } else {
+      // a vector input - build a congruent output vector by iterating through
+      // the input vector calling unpackhalf2x16 for each element
+      Value *Result = PoisonValue::get(ResType);
+      for (uint64_t i = 0; i < NumElements; i++) {
+        Value *InVal = CGF.Builder.CreateExtractElement(Op0, i);
+        Value *Unpack = CGF.Builder.CreateIntrinsic(UnpackType,
+            Intrinsic::spv_unpackhalf2x16, ArrayRef<Value *>{InVal});
+        Value *Res = CGF.Builder.CreateExtractElement(Unpack, (uint64_t)0);
+        Result = CGF.Builder.CreateInsertElement(Result, Res, i);
+      }
+      return Result;
+    }
+  }
+
+  llvm_unreachable(
+        "Intrinsic F16ToF32 not supported by target architecture");
+}
+
 static Value *emitBufferStride(CodeGenFunction *CGF, const Expr *HandleExpr,
                                LValue &Stride) {
   // Figure out the stride of the buffer elements from the handle type.
@@ -561,20 +610,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
         ArrayRef<Value *>{X}, nullptr, "hlsl.degrees");
   }
   case Builtin::BI__builtin_hlsl_elementwise_f16tof32: {
-    Value *Op0 = EmitScalarExpr(E->getArg(0));
-    llvm::Type *Xty = Op0->getType();
-    llvm::Type *retType = llvm::Type::getFloatTy(this->getLLVMContext());
-    if (Xty->isVectorTy()) {
-      auto *XVecTy = E->getArg(0)->getType()->castAs<VectorType>();
-      retType = llvm::VectorType::get(
-          retType, ElementCount::getFixed(XVecTy->getNumElements()));
-    }
-    if (!E->getArg(0)->getType()->hasUnsignedIntegerRepresentation())
-      llvm_unreachable(
-          "f16tof32 operand must have an unsigned int representation");
-    return Builder.CreateIntrinsic(
-        retType, CGM.getHLSLRuntime().getLegacyF16ToF32Intrinsic(),
-        ArrayRef<Value *>{Op0}, nullptr, "hlsl.f16tof32");
+    return handleElementwiseF16ToF32(*this, E);
   }
   case Builtin::BI__builtin_hlsl_elementwise_frac: {
     Value *Op0 = EmitScalarExpr(E->getArg(0));
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index e36e89fe16125..103b4a98f6c26 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -96,7 +96,6 @@ class CGHLSLRuntime {
                                    flattened_thread_id_in_group)
   GENERATE_HLSL_INTRINSIC_FUNCTION(IsInf, isinf)
   GENERATE_HLSL_INTRINSIC_FUNCTION(IsNaN, isnan)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(LegacyF16ToF32, legacyf16tof32)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Lerp, lerp)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Normalize, normalize)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt)
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index 655b1aa1dde21..4ecc69122abdd 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -199,7 +199,6 @@ def int_spv_resource_nonuniformindex
     : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [generic_ptr_ty],
        [IntrNoMem, NoUndef<RetIndex>]>;
 
-  def int_spv_legacyf16tof32 : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_float_ty>],
-    [llvm_anyint_ty], [IntrNoMem]>;
+  def int_spv_unpackhalf2x16 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_i32_ty], [IntrNoMem]>;
 
 }
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 12ae8ff2d0478..97b19bdd35ea5 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -212,9 +212,6 @@ class SPIRVInstructionSelector : public InstructionSelector {
   bool selectOpIsNan(Register ResVReg, const SPIRVType *ResType,
                      MachineInstr &I) const;
 
-  bool selectF16ToF32(Register ResVReg, const SPIRVType *ResType,
-                      MachineInstr &I) const;
-
   template <bool Signed>
   bool selectDot4AddPacked(Register ResVReg, const SPIRVType *ResType,
                            MachineInstr &I) const;
@@ -3475,8 +3472,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
   case Intrinsic::spv_resource_nonuniformindex: {
     return selectResourceNonUniformIndex(ResVReg, ResType, I);
   }
-  case Intrinsic::spv_legacyf16tof32: {
-    return selectF16ToF32(ResVReg, ResType, I);
+  case Intrinsic::spv_unpackhalf2x16: {
+    return selectExtInst(ResVReg, ResType, I, GL::UnpackHalf2x16);
   }
 
   default: {
@@ -3751,89 +3748,6 @@ bool SPIRVInstructionSelector::selectResourceNonUniformIndex(
   return true;
 }
 
-bool SPIRVInstructionSelector::selectF16ToF32(Register ResVReg,
-                                              const SPIRVType *ResType,
-                                              MachineInstr &I) const {
-  assert(I.getNumOperands() == 3);
-  assert(I.getOperand(0).isReg());
-  assert(I.getOperand(2).isReg());
-  Register SrcReg = I.getOperand(2).getReg();
-  const SPIRVType *SrcRegType = GR.getSPIRVTypeForVReg(SrcReg);
-  LLT SrcType = MRI->getType(SrcReg);
-  SPIRVType *SrcEltType = GR.getScalarOrVectorComponentType(SrcRegType);
-  SPIRVType *ResEltType = GR.getScalarOrVectorComponentType(ResType);
-  const TargetRegisterClass *SrcRegClass = GR.getRegClass(SrcEltType);
-  const TargetRegisterClass *ResRegClass = GR.getRegClass(ResEltType);
-  MachineIRBuilder MIRBuilder(I);
-  const SPIRVType *Vec2ResType =
-      GR.getOrCreateSPIRVVectorType(ResEltType, 2, MIRBuilder, false);
-  const TargetRegisterClass *Vec2RegClass = GR.getRegClass(Vec2ResType);
-
-  bool Result = true;
-  MachineBasicBlock &BB = *I.getParent();
-  if (SrcType.isVector()) {
-    // We have a vector of uints to convert elementwise
-    uint64_t ResultSize = GR.getScalarOrVectorComponentCount(ResType);
-    SmallVector<Register> ComponentRegisters;
-    for (uint64_t Idx = 0; Idx < ResultSize; Idx++) {
-      Register EltReg = MRI->createVirtualRegister(SrcRegClass);
-      Register FReg = MRI->createVirtualRegister(ResRegClass);
-      Register Vec2Reg = MRI->createVirtualRegister(Vec2RegClass);
-
-      Result =
-          BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract))
-              .addDef(EltReg)
-              .addUse(GR.getSPIRVTypeID(SrcEltType))
-              .addUse(SrcReg)
-              .addImm(Idx)
-              .constrainAllUses(TII, TRI, RBI);
-
-      Result &=
-          BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst))
-              .addDef(Vec2Reg)
-              .addUse(GR.getSPIRVTypeID(Vec2ResType))
-              .addImm(
-                  static_cast<uint32_t>(SPIRV::InstructionSet::GLSL_std_450))
-              .addImm(GL::UnpackHalf2x16)
-              .addUse(EltReg)
-              .constrainAllUses(TII, TRI, RBI);
-
-      Result &=
-          BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract))
-              .addDef(FReg)
-              .addUse(GR.getSPIRVTypeID(ResEltType))
-              .addUse(Vec2Reg)
-              .addImm(0)
-              .constrainAllUses(TII, TRI, RBI);
-
-      ComponentRegisters.emplace_back(FReg);
-    }
-
-    MachineInstrBuilder MIB =
-        BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeConstruct))
-            .addDef(ResVReg)
-            .addUse(GR.getSPIRVTypeID(ResType));
-
-    for (Register ComponentReg : ComponentRegisters)
-      MIB.addUse(ComponentReg);
-    return Result && MIB.constrainAllUses(TII, TRI, RBI);
-
-  } else if (SrcType.isScalar()) {
-    // just a scalar uint to convert
-    Register Vec2Reg = MRI->createVirtualRegister(Vec2RegClass);
-    Result &= selectExtInst(Vec2Reg, Vec2ResType, I, GL::UnpackHalf2x16);
-    Result &=
-        BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract))
-            .addDef(ResVReg)
-            .addUse(GR.getSPIRVTypeID(ResType))
-            .addUse(Vec2Reg)
-            .addImm(0)
-            .constrainAllUses(TII, TRI, RBI);
-    return Result;
-  }
-  return false;
-}
-
 void SPIRVInstructionSelector::decorateUsesAsNonUniform(
     Register &NonUniformReg) const {
   llvm::SmallVector<Register> WorkList = {NonUniformReg};

From 1ea977b8407a2e149bac216cc77641fd976d591c Mon Sep 17 00:00:00 2001
From: Tim Corringham <tcorring@amd.com>
Date: Mon, 3 Nov 2025 20:51:26 +0000
Subject: [PATCH 3/4] Fix clang-format issue

Apply clang-format
---
 clang/lib/CodeGen/CGHLSLBuiltins.cpp | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 16a929234396f..b6928ce7d9c44 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -160,7 +160,8 @@ static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) {
   return LastInst;
 }
 
-static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF, const CallExpr *E) {
+static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF,
+                                        const CallExpr *E) {
   Value *Op0 = CGF.EmitScalarExpr(E->getArg(0));
   QualType Op0Ty = E->getArg(0)->getType();
   llvm::Type *ResType = CGF.FloatTy;
@@ -176,19 +177,20 @@ static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF, const CallExpr *E)
         "f16tof32 operand must have an unsigned int representation");
 
   if (CGF.CGM.getTriple().isDXIL())
-    return CGF.Builder.CreateIntrinsic(
-        ResType, Intrinsic::dx_legacyf16tof32,
-        ArrayRef<Value *>{Op0}, nullptr, "hlsl.f16tof32");
+    return CGF.Builder.CreateIntrinsic(ResType, Intrinsic::dx_legacyf16tof32,
+                                       ArrayRef<Value *>{Op0}, nullptr,
+                                       "hlsl.f16tof32");
 
   if (CGF.CGM.getTriple().isSPIRV()) {
     // We use the SPIRV UnpackHalf2x16 operation to avoid the need for the
     // Int16 and Float16 capabilities
-    auto UnpackType = llvm::VectorType::get(CGF.FloatTy, ElementCount::getFixed(2));
+    auto UnpackType =
+        llvm::VectorType::get(CGF.FloatTy, ElementCount::getFixed(2));
     if (NumElements == 0) {
-      // a scalar input - simply extract the first element of the unpacked vector
-      Value *Unpack =  CGF.Builder.CreateIntrinsic(
-          UnpackType, Intrinsic::spv_unpackhalf2x16,
-          ArrayRef<Value *>{Op0});
+      // a scalar input - simply extract the first element of the unpacked
+      // vector
+      Value *Unpack = CGF.Builder.CreateIntrinsic(
+          UnpackType, Intrinsic::spv_unpackhalf2x16, ArrayRef<Value *>{Op0});
       return CGF.Builder.CreateExtractElement(Unpack, (uint64_t)0);
     } else {
       // a vector input - build a congruent output vector by iterating through
@@ -196,8 +198,9 @@ static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF, const CallExpr *E)
       Value *Result = PoisonValue::get(ResType);
       for (uint64_t i = 0; i < NumElements; i++) {
         Value *InVal = CGF.Builder.CreateExtractElement(Op0, i);
-        Value *Unpack = CGF.Builder.CreateIntrinsic(UnpackType,
-            Intrinsic::spv_unpackhalf2x16, ArrayRef<Value *>{InVal});
+        Value *Unpack = CGF.Builder.CreateIntrinsic(
+            UnpackType, Intrinsic::spv_unpackhalf2x16,
+            ArrayRef<Value *>{InVal});
         Value *Res = CGF.Builder.CreateExtractElement(Unpack, (uint64_t)0);
         Result = CGF.Builder.CreateInsertElement(Result, Res, i);
       }
@@ -205,8 +208,7 @@ static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF, const CallExpr *E)
     }
   }
 
-  llvm_unreachable(
-        "Intrinsic F16ToF32 not supported by target architecture");
+  llvm_unreachable("Intrinsic F16ToF32 not supported by target architecture");
 }
 
 static Value *emitBufferStride(CodeGenFunction *CGF, const Expr *HandleExpr,

From a617a49c78e5faded640f072f35d4cac275e55b8 Mon Sep 17 00:00:00 2001
From: Tim Corringham <tcorring@amd.com>
Date: Tue, 4 Nov 2025 13:05:31 +0000
Subject: [PATCH 4/4] Update tests for f16tof32()

Replace the f16tof32 test with an unpackhalf2x16 test, to match changes
to the spirv codegen for f16to32 support.
Adjust the f16tof32-errors.hlsl diagnostics test.
---
 .../SemaHLSL/BuiltIns/f16tof32-errors.hlsl    | 14 ++--
 .../CodeGen/SPIRV/hlsl-intrinsics/f16tof32.ll | 76 -------------------
 llvm/test/CodeGen/SPIRV/unpackfloat2x16.ll    | 18 +++++
 3 files changed, 25 insertions(+), 83 deletions(-)
 delete mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/f16tof32.ll
 create mode 100644 llvm/test/CodeGen/SPIRV/unpackfloat2x16.ll

diff --git a/clang/test/SemaHLSL/BuiltIns/f16tof32-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/f16tof32-errors.hlsl
index 2c4baae524977..8f2f9308ed966 100644
--- a/clang/test/SemaHLSL/BuiltIns/f16tof32-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/f16tof32-errors.hlsl
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.6-library %s -fnative-int16-type -emit-llvm-only -disable-llvm-passes -verify
 
 float builtin_f16tof32_too_few_arg() {
   return __builtin_hlsl_elementwise_f16tof32();
@@ -22,12 +22,12 @@ float builtin_f16tof32_bool4(bool4 p0) {
   // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool4' (aka 'vector<bool, 4>')}}
 }
 
-float builtin_f16tof32_int16_t(int16_t p0) {
+float builtin_f16tof32_short(short p0) {
   return __builtin_hlsl_elementwise_f16tof32(p0);
-  // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int16_t' (aka 'short'))}}
+  // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'short')}}
 }
 
-float builtin_f16tof32_int16_t(unsigned short p0) {
+float builtin_f16tof32_unsigned_short(unsigned short p0) {
   return __builtin_hlsl_elementwise_f16tof32(p0);
   // expected-error@-1 {{incorrect number of bits in integer (expected 32 bits, have 16)}}
 }
@@ -84,13 +84,13 @@ float f16tof32_bool(bool p0) {
 
 float f16tof32_bool3(bool3 p0) {
   return f16tof32(p0);
-  // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool3' (aka 'vector<bool, 3>')}}
+  // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool3' (aka 'vector<bool, 3>'))}}
 }
 
 
-float f16tof32_int16_t(int16_t p0) {
+float f16tof32_int16_t(short p0) {
   return f16tof32(p0);
-  // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int16_t' (aka 'short'))}}
+  // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'short')}}
 }
 
 float f16tof32_int16_t(unsigned short p0) {
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/f16tof32.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/f16tof32.ll
deleted file mode 100644
index 883fcbd02e769..0000000000000
--- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/f16tof32.ll
+++ /dev/null
@@ -1,76 +0,0 @@
-; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s
-; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val %}
-
-; CHECK-DAG: [[SET:%.*]] = OpExtInstImport "GLSL.std.450"
-; CHECK-DAG: [[UINT:%.*]] = OpTypeInt 32 0
-; CHECK-DAG: [[UINT2:%.*]] = OpTypeVector [[UINT]] 2
-; CHECK-DAG: [[UINT3:%.*]] = OpTypeVector [[UINT]] 3
-; CHECK-DAG: [[UINT4:%.*]] = OpTypeVector [[UINT]] 4
-; CHECK-DAG: [[FLOAT:%.*]] = OpTypeFloat 32
-; CHECK-DAG: [[FLOAT2:%.*]] = OpTypeVector [[FLOAT]] 2
-; CHECK-DAG: [[FLOAT3:%.*]] = OpTypeVector [[FLOAT]] 3
-; CHECK-DAG: [[FLOAT4:%.*]] = OpTypeVector [[FLOAT]] 4
-
-; CHECK: [[P0:%.*]] = OpFunctionParameter [[UINT]]
-; CHECK: [[UNPACK2:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0]]
-; CHECK: [[UNPACK:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK2]] 0
-; CHECK: OpReturnValue [[UNPACK]]
-define hidden noundef nofpclass(nan inf) float @_Z11test_scalarj(i32 noundef %p0) local_unnamed_addr #0 {
-entry:
-  %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn float @llvm.spv.legacyf16tof32.i32(i32 %p0)
-  ret float %hlsl.f16tof32
-}
-
-; CHECK: [[P0:%.*]] = OpFunctionParameter [[UINT2]]
-; CHECK-DAG: [[P0_0:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 0
-; CHECK-DAG: [[P0_1:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 1
-; CHECK-DAG: [[UNPACK2_0:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_0]]
-; CHECK-DAG: [[UNPACK2_1:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_1]]
-; CHECK-DAG: [[RESULT_0:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK2_0]] 0
-; CHECK-DAG: [[RESULT_1:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK2_1]] 0
-; CHECK: [[RESULT:%.*]] = OpCompositeConstruct [[FLOAT2]] [[RESULT_0]] [[RESULT_1]]
-; CHECK: OpReturnValue [[RESULT]]
-define hidden noundef nofpclass(nan inf) <2 x float> @_Z10test_uint2Dv2_j(<2 x i32> noundef %p0) local_unnamed_addr #0 {
-entry:
-  %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.spv.legacyf16tof32.v2i32(<2 x i32> %p0)
-  ret <2 x float> %hlsl.f16tof32
-}
-
-; CHECK: [[P0:%.*]] = OpFunctionParameter [[UINT3]]
-; CHECK-DAG: [[P0_0:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 0
-; CHECK-DAG: [[P0_1:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 1
-; CHECK-DAG: [[P0_2:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 2
-; CHECK-DAG: [[UNPACK3_0:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_0]]
-; CHECK-DAG: [[UNPACK3_1:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_1]]
-; CHECK-DAG: [[UNPACK3_2:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_2]]
-; CHECK-DAG: [[RESULT_0:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK3_0]] 0
-; CHECK-DAG: [[RESULT_1:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK3_1]] 0
-; CHECK-DAG: [[RESULT_2:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK3_2]] 0
-; CHECK: [[RESULT:%.*]] = OpCompositeConstruct [[FLOAT3]] [[RESULT_0]] [[RESULT_1]] [[RESULT_2]]
-; CHECK: OpReturnValue [[RESULT]]
-define hidden noundef nofpclass(nan inf) <3 x float> @_Z10test_uint3Dv3_j(<3 x i32> noundef %p0) local_unnamed_addr #0 {
-entry:
-  %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.spv.legacyf16tof32.v3i32(<3 x i32> %p0)
-  ret <3 x float> %hlsl.f16tof32
-}
-
-; CHECK: [[P0:%.*]] = OpFunctionParameter [[UINT4]]
-; CHECK-DAG: [[P0_0:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 0
-; CHECK-DAG: [[P0_1:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 1
-; CHECK-DAG: [[P0_2:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 2
-; CHECK-DAG: [[P0_3:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 3
-; CHECK-DAG: [[UNPACK4_0:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_0]]
-; CHECK-DAG: [[UNPACK4_1:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_1]]
-; CHECK-DAG: [[UNPACK4_2:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_2]]
-; CHECK-DAG: [[UNPACK4_3:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_3]]
-; CHECK-DAG: [[RESULT_0:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK4_0]] 0
-; CHECK-DAG: [[RESULT_1:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK4_1]] 0
-; CHECK-DAG: [[RESULT_2:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK4_2]] 0
-; CHECK-DAG: [[RESULT_3:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK4_3]] 0
-; CHECK: [[RESULT:%.*]] = OpCompositeConstruct [[FLOAT4]] [[RESULT_0]] [[RESULT_1]] [[RESULT_2]] [[RESULT_3]]
-; CHECK: OpReturnValue [[RESULT]]
-define hidden noundef nofpclass(nan inf) <4 x float> @_Z10test_uint4Dv4_j(<4 x i32> noundef %p0) local_unnamed_addr #0 {
-entry:
-  %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.spv.legacyf16tof32.v4i32(<4 x i32> %p0)
-  ret <4 x float> %hlsl.f16tof32
-}
diff --git a/llvm/test/CodeGen/SPIRV/unpackfloat2x16.ll b/llvm/test/CodeGen/SPIRV/unpackfloat2x16.ll
new file mode 100644
index 0000000000000..6a9ce4515f5c0
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/unpackfloat2x16.ll
@@ -0,0 +1,18 @@
+; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-DAG: [[SET:%.*]] = OpExtInstImport "GLSL.std.450"
+; CHECK-DAG: [[UINT:%.*]] = OpTypeInt 32 0
+; CHECK-DAG: [[FLOAT:%.*]] = OpTypeFloat 32
+; CHECK-DAG: [[FLOAT2:%.*]] = OpTypeVector [[FLOAT]] 2
+
+; CHECK: [[P0:%.*]] = OpFunctionParameter [[UINT]]
+; CHECK: [[UNPACK2:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0]]
+; CHECK: [[UNPACK:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK2]] 0
+; CHECK: OpReturnValue [[UNPACK]]
+define hidden spir_func noundef nofpclass(nan inf) float @_Z9test_funcj(i32 noundef %0) local_unnamed_addr #0 {
+  %2 = tail call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.spv.unpackhalf2x16.v2f32(i32 %0)
+  %3 = extractelement <2 x float> %2, i64 0
+  ret float %3
+}
+