[SPIRV] Addition of @llvm.lround.* and @llvm.llround.* intrinsic (#129240)

aadeshps-mcw · web-flow · commit 78428cec39b3 · 2025-09-25T17:32:39.000+02:00
--Added legalizer for @llvm.lround.* and @llvm.llround.* inrinsic
--Added Instruction Selector for @llvm.lround.* and @llvm.llround.*
intrinsic
--Added tests for @llvm.lround.* and @llvm.llround.* intrinsic
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -278,6 +278,12 @@ class SPIRVInstructionSelector : public InstructionSelector {
                      GL::GLSLExtInst GLInst) const;
   bool selectExtInst(Register ResVReg, const SPIRVType *ResType,
                      MachineInstr &I, const ExtInstList &ExtInsts) const;
+  bool selectExtInstForLRound(Register ResVReg, const SPIRVType *ResType,
+                              MachineInstr &I, CL::OpenCLExtInst CLInst,
+                              GL::GLSLExtInst GLInst) const;
+  bool selectExtInstForLRound(Register ResVReg, const SPIRVType *ResType,
+                              MachineInstr &I,
+                              const ExtInstList &ExtInsts) const;
 
   bool selectLog10(Register ResVReg, const SPIRVType *ResType,
                    MachineInstr &I) const;
@@ -708,7 +714,22 @@ bool SPIRVInstructionSelector::spvSelect(Register ResVReg,
     return selectSUCmp(ResVReg, ResType, I, true);
   case TargetOpcode::G_UCMP:
     return selectSUCmp(ResVReg, ResType, I, false);
-
+  case TargetOpcode::G_LROUND:
+  case TargetOpcode::G_LLROUND: {
+    Register regForLround =
+        MRI->createVirtualRegister(MRI->getRegClass(ResVReg), "lround");
+    MRI->setRegClass(regForLround, &SPIRV::iIDRegClass);
+    GR.assignSPIRVTypeToVReg(GR.getSPIRVTypeForVReg(I.getOperand(1).getReg()),
+                             regForLround, *(I.getParent()->getParent()));
+    selectExtInstForLRound(regForLround, GR.getSPIRVTypeForVReg(regForLround),
+                           I, CL::round, GL::Round);
+    MachineBasicBlock &BB = *I.getParent();
+    auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConvertFToS))
+                   .addDef(ResVReg)
+                   .addUse(GR.getSPIRVTypeID(ResType))
+                   .addUse(regForLround);
+    return MIB.constrainAllUses(TII, TRI, RBI);
+  }
   case TargetOpcode::G_STRICT_FMA:
   case TargetOpcode::G_FMA:
     return selectExtInst(ResVReg, ResType, I, CL::fma, GL::Fma);
@@ -1047,6 +1068,41 @@ bool SPIRVInstructionSelector::selectExtInst(Register ResVReg,
   }
   return false;
 }
+bool SPIRVInstructionSelector::selectExtInstForLRound(
+    Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
+    CL::OpenCLExtInst CLInst, GL::GLSLExtInst GLInst) const {
+  ExtInstList ExtInsts = {{SPIRV::InstructionSet::OpenCL_std, CLInst},
+                          {SPIRV::InstructionSet::GLSL_std_450, GLInst}};
+  return selectExtInstForLRound(ResVReg, ResType, I, ExtInsts);
+}
+
+bool SPIRVInstructionSelector::selectExtInstForLRound(
+    Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
+    const ExtInstList &Insts) const {
+  for (const auto &Ex : Insts) {
+    SPIRV::InstructionSet::InstructionSet Set = Ex.first;
+    uint32_t Opcode = Ex.second;
+    if (STI.canUseExtInstSet(Set)) {
+      MachineBasicBlock &BB = *I.getParent();
+      auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst))
+                     .addDef(ResVReg)
+                     .addUse(GR.getSPIRVTypeID(ResType))
+                     .addImm(static_cast<uint32_t>(Set))
+                     .addImm(Opcode);
+      const unsigned NumOps = I.getNumOperands();
+      unsigned Index = 1;
+      if (Index < NumOps &&
+          I.getOperand(Index).getType() ==
+              MachineOperand::MachineOperandType::MO_IntrinsicID)
+        Index = 2;
+      for (; Index < NumOps; ++Index)
+        MIB.add(I.getOperand(Index));
+      MIB.constrainAllUses(TII, TRI, RBI);
+      return true;
+    }
+  }
+  return false;
+}
 
 bool SPIRVInstructionSelector::selectOpWithSrcs(Register ResVReg,
                                                 const SPIRVType *ResType,
diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
@@ -276,6 +276,10 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) {
       {G_UADDO, G_SADDO, G_USUBO, G_SSUBO, G_UMULO, G_SMULO})
       .alwaysLegal();
 
+  getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
+      .legalForCartesianProduct(allFloatScalarsAndVectors,
+                                allIntScalarsAndVectors);
+
   // FP conversions.
   getActionDefinitionsBuilder({G_FPTRUNC, G_FPEXT})
       .legalForCartesianProduct(allFloatScalarsAndVectors);
diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/llround.ll b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/llround.ll
@@ -0,0 +1,87 @@
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; CHECK:          [[opencl:%[0-9]+]] = OpExtInstImport "OpenCL.std"
+; CHECK-DAG:      [[f32:%[0-9]+]] = OpTypeFloat 32
+; CHECK-DAG:      [[i32:%[0-9]+]] = OpTypeInt 32 0
+; CHECK-DAG:      [[f64:%[0-9]+]] = OpTypeFloat 64
+; CHECK-DAG:      [[i64:%[0-9]+]] = OpTypeInt 64 0
+; CHECK-DAG:      [[vecf32:%[0-9]+]] = OpTypeVector [[f32]]
+; CHECK-DAG:      [[veci32:%[0-9]+]] = OpTypeVector [[i32]]
+; CHECK-DAG:      [[vecf64:%[0-9]+]] = OpTypeVector [[f64]]
+; CHECK-DAG:      [[veci64:%[0-9]+]] = OpTypeVector [[i64]]
+
+; CHECK:      [[rounded_i32_f32:%[0-9]+]] = OpExtInst [[f32]] [[opencl]] round %[[#]]
+; CHECK-NEXT:      %[[#]] = OpConvertFToS [[i32]] [[rounded_i32_f32]]
+; CHECK:      [[rounded_i32_f64:%[0-9]+]] = OpExtInst [[f64]] [[opencl]] round %[[#]]
+; CHECK-NEXT:      %[[#]] = OpConvertFToS [[i32]] [[rounded_i32_f64]]
+; CHECK:      [[rounded_i64_f32:%[0-9]+]] = OpExtInst [[f32]] [[opencl]] round %[[#]]
+; CHECK-NEXT:      %[[#]] = OpConvertFToS [[i64]] [[rounded_i64_f32]]
+; CHECK:      [[rounded_i64_f64:%[0-9]+]] = OpExtInst [[f64]] [[opencl]] round %[[#]]
+; CHECK-NEXT:      %[[#]] = OpConvertFToS [[i64]] [[rounded_i64_f64]]
+; CHECK:      [[rounded_v4i32_f32:%[0-9]+]] = OpExtInst [[vecf32]] [[opencl]] round %[[#]]
+; CHECK-NEXT:      %[[#]] = OpConvertFToS [[veci32]] [[rounded_v4i32_f32]]
+; CHECK:      [[rounded_v4i32_f64:%[0-9]+]] = OpExtInst [[vecf64]] [[opencl]] round %[[#]]
+; CHECK-NEXT:      %[[#]] = OpConvertFToS [[veci32]] [[rounded_v4i32_f64]]
+; CHECK:      [[rounded_v4i64_f32:%[0-9]+]] = OpExtInst [[vecf32]] [[opencl]] round %[[#]]
+; CHECK-NEXT:      %[[#]] = OpConvertFToS [[veci64]] [[rounded_v4i64_f32]]
+; CHECK:      [[rounded_v4i64_f64:%[0-9]+]] = OpExtInst [[vecf64]] [[opencl]] round %[[#]]
+; CHECK-NEXT:      %[[#]] = OpConvertFToS [[veci64]] [[rounded_v4i64_f64]]
+
+define spir_func i32 @test_llround_i32_f32(float %arg0) {
+entry:
+  %0 = call i32 @llvm.llround.i32.f32(float %arg0)
+  ret i32 %0
+}
+
+define spir_func i32 @test_llround_i32_f64(double %arg0) {
+entry:
+  %0 = call i32 @llvm.llround.i32.f64(double %arg0)
+  ret i32 %0
+}
+ 
+define spir_func i64 @test_llround_i64_f32(float %arg0) {
+entry:
+  %0 = call i64 @llvm.llround.i64.f32(float %arg0)
+  ret i64 %0
+}
+
+define spir_func i64 @test_llround_i64_f64(double %arg0) {
+entry:
+  %0 = call i64 @llvm.llround.i64.f64(double %arg0)
+  ret i64 %0
+}
+
+define spir_func <4 x i32> @test_llround_v4i32_f32(<4 x float> %arg0) {
+entry:
+  %0 = call <4 x i32> @llvm.llround.v4i32.f32(<4 x float> %arg0)
+  ret <4 x i32> %0
+}
+
+define spir_func <4 x i32> @test_llround_v4i32_f64(<4 x double> %arg0) {
+entry:
+  %0 = call <4 x i32> @llvm.llround.v4i32.f64(<4 x double> %arg0)
+  ret <4 x i32> %0
+}
+
+define spir_func <4 x i64> @test_llround_v4i64_f32(<4 x float> %arg0) {
+entry:
+  %0 = call <4 x i64> @llvm.llround.v4i64.f32(<4 x float> %arg0)
+  ret <4 x i64> %0
+}
+
+define spir_func <4 x i64> @test_llround_v4i64_f64(<4 x double> %arg0) {
+entry:
+  %0 = call <4 x i64> @llvm.llround.v4i64.f64(<4 x double> %arg0)
+  ret <4 x i64> %0
+}
+ 
+declare i32 @llvm.llround.i32.f32(float)
+declare i32 @llvm.llround.i32.f64(double)
+declare i64 @llvm.llround.i64.f32(float)
+declare i64 @llvm.llround.i64.f64(double)
+
+declare <4 x i32> @llvm.llround.v4i32.f32(<4 x float>)
+declare <4 x i32> @llvm.llround.v4i32.f64(<4 x double>)
+declare <4 x i64> @llvm.llround.v4i64.f32(<4 x float>)
+declare <4 x i64> @llvm.llround.v4i64.f64(<4 x double>)
diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/lround.ll b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/lround.ll
@@ -0,0 +1,87 @@
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+ 
+; CHECK:          [[opencl:%[0-9]+]] = OpExtInstImport "OpenCL.std"
+; CHECK-DAG:      [[f32:%[0-9]+]] = OpTypeFloat 32
+; CHECK-DAG:      [[i32:%[0-9]+]] = OpTypeInt 32 0
+; CHECK-DAG:      [[f64:%[0-9]+]] = OpTypeFloat 64
+; CHECK-DAG:      [[i64:%[0-9]+]] = OpTypeInt 64 0
+; CHECK-DAG:      [[vecf32:%[0-9]+]] = OpTypeVector [[f32]]
+; CHECK-DAG:      [[veci32:%[0-9]+]] = OpTypeVector [[i32]]
+; CHECK-DAG:      [[vecf64:%[0-9]+]] = OpTypeVector [[f64]]
+; CHECK-DAG:      [[veci64:%[0-9]+]] = OpTypeVector [[i64]]
+
+; CHECK:      [[rounded_i32_f32:%[0-9]+]] = OpExtInst [[f32]] [[opencl]] round %[[#]]
+; CHECK-NEXT:      %[[#]] = OpConvertFToS [[i32]] [[rounded_i32_f32]]
+; CHECK:      [[rounded_i32_f64:%[0-9]+]] = OpExtInst [[f64]] [[opencl]] round %[[#]]
+; CHECK-NEXT:      %[[#]] = OpConvertFToS [[i32]] [[rounded_i32_f64]]
+; CHECK:      [[rounded_i64_f32:%[0-9]+]] = OpExtInst [[f32]] [[opencl]] round %[[#]]
+; CHECK-NEXT:      %[[#]] = OpConvertFToS [[i64]] [[rounded_i64_f32]]
+; CHECK:      [[rounded_i64_f64:%[0-9]+]] = OpExtInst [[f64]] [[opencl]] round %[[#]]
+; CHECK-NEXT:      %[[#]] = OpConvertFToS [[i64]] [[rounded_i64_f64]]
+; CHECK:      [[rounded_v4i32_f32:%[0-9]+]] = OpExtInst [[vecf32]] [[opencl]] round %[[#]]
+; CHECK-NEXT:      %[[#]] = OpConvertFToS [[veci32]] [[rounded_v4i32_f32]]
+; CHECK:      [[rounded_v4i32_f64:%[0-9]+]] = OpExtInst [[vecf64]] [[opencl]] round %[[#]]
+; CHECK-NEXT:      %[[#]] = OpConvertFToS [[veci32]] [[rounded_v4i32_f64]]
+; CHECK:      [[rounded_v4i64_f32:%[0-9]+]] = OpExtInst [[vecf32]] [[opencl]] round %[[#]]
+; CHECK-NEXT:      %[[#]] = OpConvertFToS [[veci64]] [[rounded_v4i64_f32]]
+; CHECK:      [[rounded_v4i64_f64:%[0-9]+]] = OpExtInst [[vecf64]] [[opencl]] round %[[#]]
+; CHECK-NEXT:      %[[#]] = OpConvertFToS [[veci64]] [[rounded_v4i64_f64]]
+
+define spir_func i32 @test_lround_i32_f32(float %arg0) {
+entry:
+  %0 = call i32 @llvm.lround.i32.f32(float %arg0)
+  ret i32 %0
+}
+
+define spir_func i32 @test_lround_i32_f64(double %arg0) {
+entry:
+  %0 = call i32 @llvm.lround.i32.f64(double %arg0)
+  ret i32 %0
+}
+ 
+define spir_func i64 @test_lround_i64_f32(float %arg0) {
+entry:
+  %0 = call i64 @llvm.lround.i64.f32(float %arg0)
+  ret i64 %0
+}
+
+define spir_func i64 @test_lround_i64_f64(double %arg0) {
+entry:
+  %0 = call i64 @llvm.lround.i64.f64(double %arg0)
+  ret i64 %0
+}
+
+define spir_func <4 x i32> @test_lround_v4i32_f32(<4 x float> %arg0) {
+entry:
+  %0 = call <4 x i32> @llvm.lround.v4i32.f32(<4 x float> %arg0)
+  ret <4 x i32> %0
+}
+
+define spir_func <4 x i32> @test_lround_v4i32_f64(<4 x double> %arg0) {
+entry:
+  %0 = call <4 x i32> @llvm.lround.v4i32.f64(<4 x double> %arg0)
+  ret <4 x i32> %0
+}
+
+define spir_func <4 x i64> @test_lround_v4i64_f32(<4 x float> %arg0) {
+entry:
+  %0 = call <4 x i64> @llvm.lround.v4i64.f32(<4 x float> %arg0)
+  ret <4 x i64> %0
+}
+
+define spir_func <4 x i64> @test_lround_v4i64_f64(<4 x double> %arg0) {
+entry:
+  %0 = call <4 x i64> @llvm.lround.v4i64.f64(<4 x double> %arg0)
+  ret <4 x i64> %0
+}
+ 
+declare i32 @llvm.lround.i32.f32(float)
+declare i32 @llvm.lround.i32.f64(double)
+declare i64 @llvm.lround.i64.f32(float)
+declare i64 @llvm.lround.i64.f64(double)
+
+declare <4 x i32> @llvm.lround.v4i32.f32(<4 x float>)
+declare <4 x i32> @llvm.lround.v4i32.f64(<4 x double>)
+declare <4 x i64> @llvm.lround.v4i64.f32(<4 x float>)
+declare <4 x i64> @llvm.lround.v4i64.f64(<4 x double>)