Skip to content

Commit 78428ce

Browse files
authored
[SPIRV] Addition of @llvm.lround.* and @llvm.llround.* intrinsic (#129240)
--Added legalizer for @llvm.lround.* and @llvm.llround.* inrinsic --Added Instruction Selector for @llvm.lround.* and @llvm.llround.* intrinsic --Added tests for @llvm.lround.* and @llvm.llround.* intrinsic
1 parent ca9fbb5 commit 78428ce

File tree

4 files changed

+235
-1
lines changed

4 files changed

+235
-1
lines changed

llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,12 @@ class SPIRVInstructionSelector : public InstructionSelector {
278278
GL::GLSLExtInst GLInst) const;
279279
bool selectExtInst(Register ResVReg, const SPIRVType *ResType,
280280
MachineInstr &I, const ExtInstList &ExtInsts) const;
281+
bool selectExtInstForLRound(Register ResVReg, const SPIRVType *ResType,
282+
MachineInstr &I, CL::OpenCLExtInst CLInst,
283+
GL::GLSLExtInst GLInst) const;
284+
bool selectExtInstForLRound(Register ResVReg, const SPIRVType *ResType,
285+
MachineInstr &I,
286+
const ExtInstList &ExtInsts) const;
281287

282288
bool selectLog10(Register ResVReg, const SPIRVType *ResType,
283289
MachineInstr &I) const;
@@ -708,7 +714,22 @@ bool SPIRVInstructionSelector::spvSelect(Register ResVReg,
708714
return selectSUCmp(ResVReg, ResType, I, true);
709715
case TargetOpcode::G_UCMP:
710716
return selectSUCmp(ResVReg, ResType, I, false);
711-
717+
case TargetOpcode::G_LROUND:
718+
case TargetOpcode::G_LLROUND: {
719+
Register regForLround =
720+
MRI->createVirtualRegister(MRI->getRegClass(ResVReg), "lround");
721+
MRI->setRegClass(regForLround, &SPIRV::iIDRegClass);
722+
GR.assignSPIRVTypeToVReg(GR.getSPIRVTypeForVReg(I.getOperand(1).getReg()),
723+
regForLround, *(I.getParent()->getParent()));
724+
selectExtInstForLRound(regForLround, GR.getSPIRVTypeForVReg(regForLround),
725+
I, CL::round, GL::Round);
726+
MachineBasicBlock &BB = *I.getParent();
727+
auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConvertFToS))
728+
.addDef(ResVReg)
729+
.addUse(GR.getSPIRVTypeID(ResType))
730+
.addUse(regForLround);
731+
return MIB.constrainAllUses(TII, TRI, RBI);
732+
}
712733
case TargetOpcode::G_STRICT_FMA:
713734
case TargetOpcode::G_FMA:
714735
return selectExtInst(ResVReg, ResType, I, CL::fma, GL::Fma);
@@ -1047,6 +1068,41 @@ bool SPIRVInstructionSelector::selectExtInst(Register ResVReg,
10471068
}
10481069
return false;
10491070
}
1071+
bool SPIRVInstructionSelector::selectExtInstForLRound(
1072+
Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
1073+
CL::OpenCLExtInst CLInst, GL::GLSLExtInst GLInst) const {
1074+
ExtInstList ExtInsts = {{SPIRV::InstructionSet::OpenCL_std, CLInst},
1075+
{SPIRV::InstructionSet::GLSL_std_450, GLInst}};
1076+
return selectExtInstForLRound(ResVReg, ResType, I, ExtInsts);
1077+
}
1078+
1079+
bool SPIRVInstructionSelector::selectExtInstForLRound(
1080+
Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
1081+
const ExtInstList &Insts) const {
1082+
for (const auto &Ex : Insts) {
1083+
SPIRV::InstructionSet::InstructionSet Set = Ex.first;
1084+
uint32_t Opcode = Ex.second;
1085+
if (STI.canUseExtInstSet(Set)) {
1086+
MachineBasicBlock &BB = *I.getParent();
1087+
auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst))
1088+
.addDef(ResVReg)
1089+
.addUse(GR.getSPIRVTypeID(ResType))
1090+
.addImm(static_cast<uint32_t>(Set))
1091+
.addImm(Opcode);
1092+
const unsigned NumOps = I.getNumOperands();
1093+
unsigned Index = 1;
1094+
if (Index < NumOps &&
1095+
I.getOperand(Index).getType() ==
1096+
MachineOperand::MachineOperandType::MO_IntrinsicID)
1097+
Index = 2;
1098+
for (; Index < NumOps; ++Index)
1099+
MIB.add(I.getOperand(Index));
1100+
MIB.constrainAllUses(TII, TRI, RBI);
1101+
return true;
1102+
}
1103+
}
1104+
return false;
1105+
}
10501106

10511107
bool SPIRVInstructionSelector::selectOpWithSrcs(Register ResVReg,
10521108
const SPIRVType *ResType,

llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,10 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) {
276276
{G_UADDO, G_SADDO, G_USUBO, G_SSUBO, G_UMULO, G_SMULO})
277277
.alwaysLegal();
278278

279+
getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
280+
.legalForCartesianProduct(allFloatScalarsAndVectors,
281+
allIntScalarsAndVectors);
282+
279283
// FP conversions.
280284
getActionDefinitionsBuilder({G_FPTRUNC, G_FPEXT})
281285
.legalForCartesianProduct(allFloatScalarsAndVectors);
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
2+
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
3+
4+
; CHECK: [[opencl:%[0-9]+]] = OpExtInstImport "OpenCL.std"
5+
; CHECK-DAG: [[f32:%[0-9]+]] = OpTypeFloat 32
6+
; CHECK-DAG: [[i32:%[0-9]+]] = OpTypeInt 32 0
7+
; CHECK-DAG: [[f64:%[0-9]+]] = OpTypeFloat 64
8+
; CHECK-DAG: [[i64:%[0-9]+]] = OpTypeInt 64 0
9+
; CHECK-DAG: [[vecf32:%[0-9]+]] = OpTypeVector [[f32]]
10+
; CHECK-DAG: [[veci32:%[0-9]+]] = OpTypeVector [[i32]]
11+
; CHECK-DAG: [[vecf64:%[0-9]+]] = OpTypeVector [[f64]]
12+
; CHECK-DAG: [[veci64:%[0-9]+]] = OpTypeVector [[i64]]
13+
14+
; CHECK: [[rounded_i32_f32:%[0-9]+]] = OpExtInst [[f32]] [[opencl]] round %[[#]]
15+
; CHECK-NEXT: %[[#]] = OpConvertFToS [[i32]] [[rounded_i32_f32]]
16+
; CHECK: [[rounded_i32_f64:%[0-9]+]] = OpExtInst [[f64]] [[opencl]] round %[[#]]
17+
; CHECK-NEXT: %[[#]] = OpConvertFToS [[i32]] [[rounded_i32_f64]]
18+
; CHECK: [[rounded_i64_f32:%[0-9]+]] = OpExtInst [[f32]] [[opencl]] round %[[#]]
19+
; CHECK-NEXT: %[[#]] = OpConvertFToS [[i64]] [[rounded_i64_f32]]
20+
; CHECK: [[rounded_i64_f64:%[0-9]+]] = OpExtInst [[f64]] [[opencl]] round %[[#]]
21+
; CHECK-NEXT: %[[#]] = OpConvertFToS [[i64]] [[rounded_i64_f64]]
22+
; CHECK: [[rounded_v4i32_f32:%[0-9]+]] = OpExtInst [[vecf32]] [[opencl]] round %[[#]]
23+
; CHECK-NEXT: %[[#]] = OpConvertFToS [[veci32]] [[rounded_v4i32_f32]]
24+
; CHECK: [[rounded_v4i32_f64:%[0-9]+]] = OpExtInst [[vecf64]] [[opencl]] round %[[#]]
25+
; CHECK-NEXT: %[[#]] = OpConvertFToS [[veci32]] [[rounded_v4i32_f64]]
26+
; CHECK: [[rounded_v4i64_f32:%[0-9]+]] = OpExtInst [[vecf32]] [[opencl]] round %[[#]]
27+
; CHECK-NEXT: %[[#]] = OpConvertFToS [[veci64]] [[rounded_v4i64_f32]]
28+
; CHECK: [[rounded_v4i64_f64:%[0-9]+]] = OpExtInst [[vecf64]] [[opencl]] round %[[#]]
29+
; CHECK-NEXT: %[[#]] = OpConvertFToS [[veci64]] [[rounded_v4i64_f64]]
30+
31+
define spir_func i32 @test_llround_i32_f32(float %arg0) {
32+
entry:
33+
%0 = call i32 @llvm.llround.i32.f32(float %arg0)
34+
ret i32 %0
35+
}
36+
37+
define spir_func i32 @test_llround_i32_f64(double %arg0) {
38+
entry:
39+
%0 = call i32 @llvm.llround.i32.f64(double %arg0)
40+
ret i32 %0
41+
}
42+
43+
define spir_func i64 @test_llround_i64_f32(float %arg0) {
44+
entry:
45+
%0 = call i64 @llvm.llround.i64.f32(float %arg0)
46+
ret i64 %0
47+
}
48+
49+
define spir_func i64 @test_llround_i64_f64(double %arg0) {
50+
entry:
51+
%0 = call i64 @llvm.llround.i64.f64(double %arg0)
52+
ret i64 %0
53+
}
54+
55+
define spir_func <4 x i32> @test_llround_v4i32_f32(<4 x float> %arg0) {
56+
entry:
57+
%0 = call <4 x i32> @llvm.llround.v4i32.f32(<4 x float> %arg0)
58+
ret <4 x i32> %0
59+
}
60+
61+
define spir_func <4 x i32> @test_llround_v4i32_f64(<4 x double> %arg0) {
62+
entry:
63+
%0 = call <4 x i32> @llvm.llround.v4i32.f64(<4 x double> %arg0)
64+
ret <4 x i32> %0
65+
}
66+
67+
define spir_func <4 x i64> @test_llround_v4i64_f32(<4 x float> %arg0) {
68+
entry:
69+
%0 = call <4 x i64> @llvm.llround.v4i64.f32(<4 x float> %arg0)
70+
ret <4 x i64> %0
71+
}
72+
73+
define spir_func <4 x i64> @test_llround_v4i64_f64(<4 x double> %arg0) {
74+
entry:
75+
%0 = call <4 x i64> @llvm.llround.v4i64.f64(<4 x double> %arg0)
76+
ret <4 x i64> %0
77+
}
78+
79+
declare i32 @llvm.llround.i32.f32(float)
80+
declare i32 @llvm.llround.i32.f64(double)
81+
declare i64 @llvm.llround.i64.f32(float)
82+
declare i64 @llvm.llround.i64.f64(double)
83+
84+
declare <4 x i32> @llvm.llround.v4i32.f32(<4 x float>)
85+
declare <4 x i32> @llvm.llround.v4i32.f64(<4 x double>)
86+
declare <4 x i64> @llvm.llround.v4i64.f32(<4 x float>)
87+
declare <4 x i64> @llvm.llround.v4i64.f64(<4 x double>)
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
2+
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
3+
4+
; CHECK: [[opencl:%[0-9]+]] = OpExtInstImport "OpenCL.std"
5+
; CHECK-DAG: [[f32:%[0-9]+]] = OpTypeFloat 32
6+
; CHECK-DAG: [[i32:%[0-9]+]] = OpTypeInt 32 0
7+
; CHECK-DAG: [[f64:%[0-9]+]] = OpTypeFloat 64
8+
; CHECK-DAG: [[i64:%[0-9]+]] = OpTypeInt 64 0
9+
; CHECK-DAG: [[vecf32:%[0-9]+]] = OpTypeVector [[f32]]
10+
; CHECK-DAG: [[veci32:%[0-9]+]] = OpTypeVector [[i32]]
11+
; CHECK-DAG: [[vecf64:%[0-9]+]] = OpTypeVector [[f64]]
12+
; CHECK-DAG: [[veci64:%[0-9]+]] = OpTypeVector [[i64]]
13+
14+
; CHECK: [[rounded_i32_f32:%[0-9]+]] = OpExtInst [[f32]] [[opencl]] round %[[#]]
15+
; CHECK-NEXT: %[[#]] = OpConvertFToS [[i32]] [[rounded_i32_f32]]
16+
; CHECK: [[rounded_i32_f64:%[0-9]+]] = OpExtInst [[f64]] [[opencl]] round %[[#]]
17+
; CHECK-NEXT: %[[#]] = OpConvertFToS [[i32]] [[rounded_i32_f64]]
18+
; CHECK: [[rounded_i64_f32:%[0-9]+]] = OpExtInst [[f32]] [[opencl]] round %[[#]]
19+
; CHECK-NEXT: %[[#]] = OpConvertFToS [[i64]] [[rounded_i64_f32]]
20+
; CHECK: [[rounded_i64_f64:%[0-9]+]] = OpExtInst [[f64]] [[opencl]] round %[[#]]
21+
; CHECK-NEXT: %[[#]] = OpConvertFToS [[i64]] [[rounded_i64_f64]]
22+
; CHECK: [[rounded_v4i32_f32:%[0-9]+]] = OpExtInst [[vecf32]] [[opencl]] round %[[#]]
23+
; CHECK-NEXT: %[[#]] = OpConvertFToS [[veci32]] [[rounded_v4i32_f32]]
24+
; CHECK: [[rounded_v4i32_f64:%[0-9]+]] = OpExtInst [[vecf64]] [[opencl]] round %[[#]]
25+
; CHECK-NEXT: %[[#]] = OpConvertFToS [[veci32]] [[rounded_v4i32_f64]]
26+
; CHECK: [[rounded_v4i64_f32:%[0-9]+]] = OpExtInst [[vecf32]] [[opencl]] round %[[#]]
27+
; CHECK-NEXT: %[[#]] = OpConvertFToS [[veci64]] [[rounded_v4i64_f32]]
28+
; CHECK: [[rounded_v4i64_f64:%[0-9]+]] = OpExtInst [[vecf64]] [[opencl]] round %[[#]]
29+
; CHECK-NEXT: %[[#]] = OpConvertFToS [[veci64]] [[rounded_v4i64_f64]]
30+
31+
define spir_func i32 @test_lround_i32_f32(float %arg0) {
32+
entry:
33+
%0 = call i32 @llvm.lround.i32.f32(float %arg0)
34+
ret i32 %0
35+
}
36+
37+
define spir_func i32 @test_lround_i32_f64(double %arg0) {
38+
entry:
39+
%0 = call i32 @llvm.lround.i32.f64(double %arg0)
40+
ret i32 %0
41+
}
42+
43+
define spir_func i64 @test_lround_i64_f32(float %arg0) {
44+
entry:
45+
%0 = call i64 @llvm.lround.i64.f32(float %arg0)
46+
ret i64 %0
47+
}
48+
49+
define spir_func i64 @test_lround_i64_f64(double %arg0) {
50+
entry:
51+
%0 = call i64 @llvm.lround.i64.f64(double %arg0)
52+
ret i64 %0
53+
}
54+
55+
define spir_func <4 x i32> @test_lround_v4i32_f32(<4 x float> %arg0) {
56+
entry:
57+
%0 = call <4 x i32> @llvm.lround.v4i32.f32(<4 x float> %arg0)
58+
ret <4 x i32> %0
59+
}
60+
61+
define spir_func <4 x i32> @test_lround_v4i32_f64(<4 x double> %arg0) {
62+
entry:
63+
%0 = call <4 x i32> @llvm.lround.v4i32.f64(<4 x double> %arg0)
64+
ret <4 x i32> %0
65+
}
66+
67+
define spir_func <4 x i64> @test_lround_v4i64_f32(<4 x float> %arg0) {
68+
entry:
69+
%0 = call <4 x i64> @llvm.lround.v4i64.f32(<4 x float> %arg0)
70+
ret <4 x i64> %0
71+
}
72+
73+
define spir_func <4 x i64> @test_lround_v4i64_f64(<4 x double> %arg0) {
74+
entry:
75+
%0 = call <4 x i64> @llvm.lround.v4i64.f64(<4 x double> %arg0)
76+
ret <4 x i64> %0
77+
}
78+
79+
declare i32 @llvm.lround.i32.f32(float)
80+
declare i32 @llvm.lround.i32.f64(double)
81+
declare i64 @llvm.lround.i64.f32(float)
82+
declare i64 @llvm.lround.i64.f64(double)
83+
84+
declare <4 x i32> @llvm.lround.v4i32.f32(<4 x float>)
85+
declare <4 x i32> @llvm.lround.v4i32.f64(<4 x double>)
86+
declare <4 x i64> @llvm.lround.v4i64.f32(<4 x float>)
87+
declare <4 x i64> @llvm.lround.v4i64.f64(<4 x double>)

0 commit comments

Comments
 (0)