diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 1b559a628be08..7f942fdd95211 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -502,9 +502,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, // The hardware supports 32-bit FSHR, but not FSHL. setOperationAction(ISD::FSHR, MVT::i32, Legal); - // The hardware supports 32-bit ROTR, but not ROTL. - setOperationAction(ISD::ROTL, {MVT::i32, MVT::i64}, Expand); - setOperationAction(ISD::ROTR, MVT::i64, Expand); + setOperationAction({ISD::ROTL, ISD::ROTR}, {MVT::i32, MVT::i64}, Expand); setOperationAction({ISD::MULHU, ISD::MULHS}, MVT::i16, Expand); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index bd443b5b6f1e6..ddcb431f39a87 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -806,12 +806,6 @@ class DwordAddrPat : AMDGPUPat < (vt rc:$addr) >; -// rotr pattern -class ROTRPattern : AMDGPUPat < - (rotr i32:$src0, i32:$src1), - (BIT_ALIGN $src0, $src0, $src1) ->; - // Special conversion patterns def cvt_rpi_i32_f32 : PatFrag < diff --git a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td index dadc7dcd7054a..a2e3ecef1c206 100644 --- a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td +++ b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td @@ -505,7 +505,6 @@ def : AMDGPUPat < (fshr i32:$src0, i32:$src1, i32:$src2), (BIT_ALIGN_INT_eg $src0, $src1, $src2) >; -def : ROTRPattern ; def MULADD_eg : MULADD_Common<0x14>; def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>; def FMA_eg : FMA_Common<0x7>; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 6f1feb1dc2996..a432d297da595 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2663,8 +2663,6 @@ def : AMDGPUPat < let True16Predicate = NotHasTrue16BitInsts in { let SubtargetPredicate = isNotGFX9Plus in { -def : ROTRPattern ; - def : GCNPat<(i32 (DivergentUnaryFrag (srl i64:$src0, (and i32:$src1, (i32 31))))), (V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)), (i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>; @@ -2675,14 +2673,6 @@ def : GCNPat<(i32 (DivergentUnaryFrag (srl i64:$src0, (i32 ShiftAmt32Imm: } // isNotGFX9Plus let SubtargetPredicate = isGFX9GFX10 in { -def : GCNPat < - (rotr i32:$src0, i32:$src1), - (V_ALIGNBIT_B32_opsel_e64 /* src0_modifiers */ 0, $src0, - /* src1_modifiers */ 0, $src0, - /* src2_modifiers */ 0, - $src1, /* clamp */ 0, /* op_sel */ 0) ->; - foreach pat = [(i32 (DivergentUnaryFrag (srl i64:$src0, (and i32:$src1, (i32 31))))), (i32 (DivergentUnaryFrag (srl i64:$src0, (i32 ShiftAmt32Imm:$src1))))] in def : GCNPat; - def : GCNPat<(i32 (DivergentUnaryFrag (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))), (V_ALIGNBIT_B32_t16_e64 0, /* src0_modifiers */ (i32 (EXTRACT_SUBREG (i64 $src0), sub1)), @@ -2731,14 +2712,6 @@ def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2), } // end True16Predicate = UseRealTrue16Insts let True16Predicate = UseFakeTrue16Insts in { -def : GCNPat < - (rotr i32:$src0, i32:$src1), - (V_ALIGNBIT_B32_fake16_e64 /* src0_modifiers */ 0, $src0, - /* src1_modifiers */ 0, $src0, - /* src2_modifiers */ 0, - $src1, /* clamp */ 0, /* op_sel */ 0) ->; - def : GCNPat<(i32 (DivergentUnaryFrag (srl i64:$src0, (and i32:$src1, (i32 31))))), (V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */ (i32 (EXTRACT_SUBREG (i64 $src0), sub1)), diff --git a/llvm/test/CodeGen/AMDGPU/packetizer.ll b/llvm/test/CodeGen/AMDGPU/packetizer.ll index b9bf13886d366..9d620d671dd8a 100644 --- a/llvm/test/CodeGen/AMDGPU/packetizer.ll +++ b/llvm/test/CodeGen/AMDGPU/packetizer.ll @@ -5,43 +5,37 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %x_arg, i32 %y_arg, i32 %z_arg, i32 %w_arg, i32 %e) { ; R600-LABEL: test: ; R600: ; %bb.0: ; %entry -; R600-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] +; R600-NEXT: ALU 9, @4, KC0[CB0:0-32], KC1[] ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 ; R600-NEXT: CF_END ; R600-NEXT: PAD ; R600-NEXT: ALU clause starting at 4: -; R600-NEXT: ADD_INT T0.Y, KC0[3].X, 1, -; R600-NEXT: ADD_INT T0.Z, KC0[3].Y, 1, -; R600-NEXT: ADD_INT T0.W, KC0[2].Z, 1, -; R600-NEXT: ADD_INT * T1.W, KC0[2].W, 1, -; R600-NEXT: BIT_ALIGN_INT T0.X, PS, PS, KC0[3].Z, -; R600-NEXT: BIT_ALIGN_INT T1.Y, PV.W, PV.W, KC0[3].Z, -; R600-NEXT: BIT_ALIGN_INT T0.Z, PV.Z, PV.Z, KC0[3].Z, -; R600-NEXT: BIT_ALIGN_INT * T0.W, PV.Y, PV.Y, KC0[3].Z, -; R600-NEXT: OR_INT T0.W, PV.W, PV.Z, -; R600-NEXT: OR_INT * T1.W, PV.Y, PV.X, -; R600-NEXT: OR_INT T0.X, PS, PV.W, +; R600-NEXT: ADD_INT T0.Y, KC0[2].W, 1, +; R600-NEXT: ADD_INT T0.Z, KC0[2].Z, 1, +; R600-NEXT: ADD_INT T0.W, KC0[3].Y, 1, +; R600-NEXT: ADD_INT * T1.W, KC0[3].X, 1, +; R600-NEXT: OR_INT T0.W, PS, PV.W, +; R600-NEXT: OR_INT * T1.W, PV.Z, PV.Y, +; R600-NEXT: OR_INT * T0.W, PS, PV.W, +; R600-NEXT: BIT_ALIGN_INT T0.X, PV.W, PV.W, KC0[3].Z, ; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; ; CM-LABEL: test: ; CM: ; %bb.0: ; %entry -; CM-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] +; CM-NEXT: ALU 9, @4, KC0[CB0:0-32], KC1[] ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X ; CM-NEXT: CF_END ; CM-NEXT: PAD ; CM-NEXT: ALU clause starting at 4: -; CM-NEXT: ADD_INT T0.X, KC0[3].X, 1, -; CM-NEXT: ADD_INT T0.Y, KC0[3].Y, 1, -; CM-NEXT: ADD_INT T0.Z, KC0[2].Z, 1, -; CM-NEXT: ADD_INT * T0.W, KC0[2].W, 1, -; CM-NEXT: BIT_ALIGN_INT T1.X, PV.W, PV.W, KC0[3].Z, -; CM-NEXT: BIT_ALIGN_INT T1.Y, PV.Z, PV.Z, KC0[3].Z, -; CM-NEXT: BIT_ALIGN_INT T0.Z, PV.Y, PV.Y, KC0[3].Z, -; CM-NEXT: BIT_ALIGN_INT * T0.W, PV.X, PV.X, KC0[3].Z, +; CM-NEXT: ADD_INT T0.X, KC0[2].W, 1, +; CM-NEXT: ADD_INT T0.Y, KC0[2].Z, 1, +; CM-NEXT: ADD_INT T0.Z, KC0[3].Y, 1, +; CM-NEXT: ADD_INT * T0.W, KC0[3].X, 1, ; CM-NEXT: OR_INT T0.Z, PV.W, PV.Z, ; CM-NEXT: OR_INT * T0.W, PV.Y, PV.X, -; CM-NEXT: OR_INT * T0.X, PV.W, PV.Z, +; CM-NEXT: OR_INT * T0.W, PV.W, PV.Z, +; CM-NEXT: BIT_ALIGN_INT * T0.X, PV.W, PV.W, KC0[3].Z, ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: diff --git a/llvm/test/CodeGen/AMDGPU/permute_i8.ll b/llvm/test/CodeGen/AMDGPU/permute_i8.ll index 0741cb256cc24..75263683371be 100644 --- a/llvm/test/CodeGen/AMDGPU/permute_i8.ll +++ b/llvm/test/CodeGen/AMDGPU/permute_i8.ll @@ -353,7 +353,7 @@ define hidden void @shuffle5341ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX10-NEXT: v_perm_b32 v0, v0, v0, 0x5040706 ; GFX10-NEXT: global_store_dword v[2:3], v0, off ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -361,8 +361,9 @@ define hidden void @shuffle5341ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0 ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dword v0, v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 0x5040706 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX9-NEXT: v_perm_b32 v0, v0, v0, s4 ; GFX9-NEXT: global_store_dword v[2:3], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/shl.ll b/llvm/test/CodeGen/AMDGPU/shl.ll index 28330bfc9bb69..acf999e586a68 100644 --- a/llvm/test/CodeGen/AMDGPU/shl.ll +++ b/llvm/test/CodeGen/AMDGPU/shl.ll @@ -1470,21 +1470,20 @@ define amdgpu_kernel void @s_shl_inline_imm_1_i64(ptr addrspace(1) %out, ptr add ; ; EG-LABEL: s_shl_inline_imm_1_i64: ; EG: ; %bb.0: -; EG-NEXT: ALU 11, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 10, @4, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD ; EG-NEXT: ALU clause starting at 4: ; EG-NEXT: AND_INT T0.W, KC0[2].W, literal.x, -; EG-NEXT: LSHL * T1.W, KC0[2].W, literal.y, -; EG-NEXT: 31(4.344025e-44), 26(3.643376e-44) -; EG-NEXT: ASHR T1.W, PS, literal.x, -; EG-NEXT: LSHL * T0.W, 1, PV.W, +; EG-NEXT: NOT_INT * T1.W, KC0[2].W, ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: AND_INT T0.Y, PV.W, PS, -; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.x, +; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, 0.0, PS, +; EG-NEXT: AND_INT T1.W, KC0[2].W, literal.x, +; EG-NEXT: LSHL * T0.W, 1, PV.W, ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T0.X, PV.W, T0.W, 0.0, +; EG-NEXT: CNDE_INT * T0.Y, PV.W, PV.Z, PS, +; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %shl = shl i64 1, %a