diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 15d5bf2c704929..7c4fd8124e7e53 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1245,7 +1245,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( const LLT AddrTy = MRI->getType(MI.getOperand(VAddrIdx).getReg()); const bool IsA16 = AddrTy.getScalarType() == S16; - Register VData; + Register VDataIn, VDataOut; LLT VDataTy; int NumVDataDwords = -1; bool IsD16 = false; @@ -1271,7 +1271,24 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( unsigned DMaskLanes = 0; if (BaseOpcode->Atomic) { - return false; // TODO + VDataOut = MI.getOperand(0).getReg(); + VDataIn = MI.getOperand(2).getReg(); + LLT Ty = MRI->getType(VDataIn); + + // Be careful to allow atomic swap on 16-bit element vectors. + const bool Is64Bit = BaseOpcode->AtomicX2 ? + Ty.getSizeInBits() == 128 : + Ty.getSizeInBits() == 64; + + if (BaseOpcode->AtomicX2) { + assert(MI.getOperand(3).getReg() == AMDGPU::NoRegister); + + DMask = Is64Bit ? 0xf : 0x3; + NumVDataDwords = Is64Bit ? 4 : 2; + } else { + DMask = Is64Bit ? 0x3 : 0x1; + NumVDataDwords = Is64Bit ? 2 : 1; + } } else { const int DMaskIdx = 2; // Input/output + intrinsic ID. @@ -1279,12 +1296,12 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( DMaskLanes = BaseOpcode->Gather4 ? 4 : countPopulation(DMask); if (BaseOpcode->Store) { - VData = MI.getOperand(1).getReg(); - VDataTy = MRI->getType(VData); + VDataIn = MI.getOperand(1).getReg(); + VDataTy = MRI->getType(VDataIn); NumVDataDwords = (VDataTy.getSizeInBits() + 31) / 32; } else { - VData = MI.getOperand(0).getReg(); - VDataTy = MRI->getType(VData); + VDataOut = MI.getOperand(0).getReg(); + VDataTy = MRI->getType(VDataOut); NumVDataDwords = DMaskLanes; // One memoperand is mandatory, except for getresinfo. @@ -1386,11 +1403,25 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opcode)) .cloneMemRefs(MI); - if (!BaseOpcode->Store || BaseOpcode->Atomic) - MIB.addDef(VData); // vdata output + if (VDataOut) { + if (BaseOpcode->AtomicX2) { + const bool Is64 = MRI->getType(VDataOut).getSizeInBits() == 64; + + Register TmpReg = MRI->createVirtualRegister( + Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass); + unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0; + + MIB.addDef(TmpReg); + BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), VDataOut) + .addReg(TmpReg, RegState::Kill, SubReg); + + } else { + MIB.addDef(VDataOut); // vdata output + } + } - if (BaseOpcode->Store || BaseOpcode->Atomic) - MIB.addReg(VData); // vdata input + if (VDataIn) + MIB.addReg(VDataIn); // vdata input for (int i = 0; i != NumVAddrRegs; ++i) { MachineOperand &SrcOp = MI.getOperand(VAddrIdx + i); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 8de8b337428e41..e8c950cfecc2f7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -3698,6 +3698,24 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( MI.getOperand(DMaskIdx).setImm(DMask); } + if (BaseOpcode->Atomic) { + Register VData0 = MI.getOperand(2).getReg(); + LLT Ty = MRI->getType(VData0); + + // TODO: Allow atomic swap and bit ops for v2s16/v4s16 + if (Ty.isVector()) + return false; + + if (BaseOpcode->AtomicX2) { + Register VData1 = MI.getOperand(3).getReg(); + // The two values are packed in one register. + LLT PackedTy = LLT::vector(2, Ty); + auto Concat = B.buildBuildVector(PackedTy, {VData0, VData1}); + MI.getOperand(2).setReg(Concat.getReg(0)); + MI.getOperand(3).setReg(AMDGPU::NoRegister); + } + } + int CorrectedNumVAddrs = NumVAddrs; // Optimize _L to _LZ when _L is zero @@ -3785,6 +3803,7 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( convertImageAddrToPacked(B, MI, AddrIdx, NumVAddrs); } + if (BaseOpcode->Store) { // No TFE for stores? // TODO: Handle dmask trim Register VData = MI.getOperand(1).getReg(); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll index 7f40f1c6651042..a0a1366f7408c5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll @@ -536,7 +536,8 @@ define amdgpu_ps float @atomic_cmpswap_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 % ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[COPY8]](s32), [[COPY9]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_cmpswap_1d @@ -555,7 +556,8 @@ define amdgpu_ps float @atomic_cmpswap_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 % ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[COPY8]](s32), [[COPY9]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") + ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -1014,10 +1016,11 @@ define amdgpu_ps float @atomic_cmpswap_2d(<8 x i32> inreg %rsrc, i32 %cmp, i32 % ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[COPY8]](s32), [[COPY9]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_cmpswap_2d @@ -1036,10 +1039,11 @@ define amdgpu_ps float @atomic_cmpswap_2d(<8 x i32> inreg %rsrc, i32 %cmp, i32 % ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[COPY8]](s32), [[COPY9]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -1066,6 +1070,7 @@ define amdgpu_ps float @atomic_cmpswap_3d(<8 x i32> inreg %rsrc, i32 %cmp, i32 % ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) @@ -1073,7 +1078,7 @@ define amdgpu_ps float @atomic_cmpswap_3d(<8 x i32> inreg %rsrc, i32 %cmp, i32 % ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[COPY8]](s32), [[COPY9]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_cmpswap_3d @@ -1093,13 +1098,14 @@ define amdgpu_ps float @atomic_cmpswap_3d(<8 x i32> inreg %rsrc, i32 %cmp, i32 % ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[COPY8]](s32), [[COPY9]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -1127,6 +1133,7 @@ define amdgpu_ps float @atomic_cmpswap_2darraymsaa(<8 x i32> inreg %rsrc, i32 %c ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) @@ -1134,7 +1141,7 @@ define amdgpu_ps float @atomic_cmpswap_2darraymsaa(<8 x i32> inreg %rsrc, i32 %c ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[COPY8]](s32), [[COPY9]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_cmpswap_2darraymsaa @@ -1155,13 +1162,14 @@ define amdgpu_ps float @atomic_cmpswap_2darraymsaa(<8 x i32> inreg %rsrc, i32 %c ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10NSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[COPY8]](s32), [[COPY9]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.a16.ll new file mode 100644 index 00000000000000..304dee10c66ce1 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.a16.ll @@ -0,0 +1,1637 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s + +define amdgpu_ps float @atomic_swap_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { +; GFX9-LABEL: atomic_swap_i32_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_swap_i32_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_add_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { +; GFX9-LABEL: atomic_add_i32_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i32_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_sub_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { +; GFX9-LABEL: atomic_sub_i32_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_sub_i32_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_smin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { +; GFX9-LABEL: atomic_smin_i32_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_smin_i32_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_umin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { +; GFX9-LABEL: atomic_umin_i32_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_umin_i32_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_smax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { +; GFX9-LABEL: atomic_smax_i32_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_smax_i32_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_umax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { +; GFX9-LABEL: atomic_umax_i32_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_umax_i32_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_and_i321d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { +; GFX9-LABEL: atomic_and_i321d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_and_i321d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_or_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { +; GFX9-LABEL: atomic_or_i32_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_or_i32_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_xor_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { +; GFX9-LABEL: atomic_xor_i32_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_xor_i32_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_inc_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { +; GFX9-LABEL: atomic_inc_i32_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_inc_i32_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_dec_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { +; GFX9-LABEL: atomic_dec_i32_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_dec_i32_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_cmpswap_i32_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i16 %s) { +; GFX9-LABEL: atomic_cmpswap_i32_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_cmpswap_i32_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32 %cmp, i32 %swap, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_add_i32_2d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t) { +; GFX9-LABEL: atomic_add_i32_2d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: v_and_or_b32 v1, v1, v3, v2 +; GFX9-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i32_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: v_and_or_b32 v1, v1, 0xffff, v2 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32 %data, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_add_i32_3d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %r) { +; GFX9-LABEL: atomic_add_i32_3d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-NEXT: s_lshl_b32 s8, s0, 16 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: v_and_or_b32 v1, v1, v4, v2 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: v_and_or_b32 v2, v3, v4, s8 +; GFX9-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i32_3d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX10-NEXT: s_lshl_b32 s8, s0, 16 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: v_and_or_b32 v1, v1, v4, v2 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: v_and_or_b32 v2, v3, v4, s8 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32 %data, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_add_i32_cube(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %face) { +; GFX9-LABEL: atomic_add_i32_cube: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-NEXT: s_lshl_b32 s8, s0, 16 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: v_and_or_b32 v1, v1, v4, v2 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: v_and_or_b32 v2, v3, v4, s8 +; GFX9-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 da +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i32_cube: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX10-NEXT: s_lshl_b32 s8, s0, 16 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: v_and_or_b32 v1, v1, v4, v2 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: v_and_or_b32 v2, v3, v4, s8 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32 %data, i16 %s, i16 %t, i16 %face, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_add_i32_1darray(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %slice) { +; GFX9-LABEL: atomic_add_i32_1darray: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: v_and_or_b32 v1, v1, v3, v2 +; GFX9-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc a16 da +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i32_1darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: v_and_or_b32 v1, v1, 0xffff, v2 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32 %data, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_add_i32_2darray(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %slice) { +; GFX9-LABEL: atomic_add_i32_2darray: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-NEXT: s_lshl_b32 s8, s0, 16 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: v_and_or_b32 v1, v1, v4, v2 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: v_and_or_b32 v2, v3, v4, s8 +; GFX9-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 da +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i32_2darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX10-NEXT: s_lshl_b32 s8, s0, 16 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: v_and_or_b32 v1, v1, v4, v2 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: v_and_or_b32 v2, v3, v4, s8 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_add_i32_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %fragid) { +; GFX9-LABEL: atomic_add_i32_2dmsaa: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-NEXT: s_lshl_b32 s8, s0, 16 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: v_and_or_b32 v1, v1, v4, v2 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: v_and_or_b32 v2, v3, v4, s8 +; GFX9-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i32_2dmsaa: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX10-NEXT: s_lshl_b32 s8, s0, 16 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: v_and_or_b32 v1, v1, v4, v2 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: v_and_or_b32 v2, v3, v4, s8 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_add_i32_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid) { +; GFX9-LABEL: atomic_add_i32_2darraymsaa: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: v_mov_b32_e32 v5, 0xffff +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-NEXT: v_and_or_b32 v1, v1, v5, v2 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v4 +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX9-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 da +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i32_2darraymsaa: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: v_and_or_b32 v1, v1, v5, v2 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: v_and_or_b32 v2, v3, v5, v4 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_add_i32_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { +; GFX9-LABEL: atomic_add_i32_1d_slc: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i32_1d_slc: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc slc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 2) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps <2 x float> @atomic_swap_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { +; GFX9-LABEL: atomic_swap_i64_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_swap_i64_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_add_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { +; GFX9-LABEL: atomic_add_i64_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i64_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_sub_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { +; GFX9-LABEL: atomic_sub_i64_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_sub_i64_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_smin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { +; GFX9-LABEL: atomic_smin_i64_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_smin_i64_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_umin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { +; GFX9-LABEL: atomic_umin_i64_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_umin_i64_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_smax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { +; GFX9-LABEL: atomic_smax_i64_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_smax_i64_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_umax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { +; GFX9-LABEL: atomic_umax_i64_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_umax_i64_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_and_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { +; GFX9-LABEL: atomic_and_i64_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_and_i64_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.and.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_or_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { +; GFX9-LABEL: atomic_or_i64_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_or_i64_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.or.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_xor_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { +; GFX9-LABEL: atomic_xor_i64_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_xor_i64_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_inc_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { +; GFX9-LABEL: atomic_inc_i64_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_inc_i64_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_dec_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { +; GFX9-LABEL: atomic_dec_i64_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_dec_i64_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_cmpswap_i64_1d(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i16 %s) { +; GFX9-LABEL: atomic_cmpswap_i64_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_cmpswap_i64_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i16(i64 %cmp, i64 %swap, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_add_i64_2d(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t) { +; GFX9-LABEL: atomic_add_i64_2d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: v_and_or_b32 v2, v2, v4, v3 +; GFX9-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i64_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: v_and_or_b32 v2, v2, 0xffff, v3 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.add.2d.i64.i16(i64 %data, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_add_i64_3d(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %r) { +; GFX9-LABEL: atomic_add_i64_3d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: v_mov_b32_e32 v5, 0xffff +; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX9-NEXT: s_lshl_b32 s8, s0, 16 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: v_and_or_b32 v2, v2, v5, v3 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: v_and_or_b32 v3, v4, v5, s8 +; GFX9-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i64_3d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff +; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX10-NEXT: s_lshl_b32 s8, s0, 16 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: v_and_or_b32 v2, v2, v5, v3 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: v_and_or_b32 v3, v4, v5, s8 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.add.3d.i64.i16(i64 %data, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_add_i64_cube(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %face) { +; GFX9-LABEL: atomic_add_i64_cube: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: v_mov_b32_e32 v5, 0xffff +; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX9-NEXT: s_lshl_b32 s8, s0, 16 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: v_and_or_b32 v2, v2, v5, v3 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: v_and_or_b32 v3, v4, v5, s8 +; GFX9-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 da +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i64_cube: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff +; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX10-NEXT: s_lshl_b32 s8, s0, 16 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: v_and_or_b32 v2, v2, v5, v3 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: v_and_or_b32 v3, v4, v5, s8 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_CUBE unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.add.cube.i64.i16(i64 %data, i16 %s, i16 %t, i16 %face , <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_add_i64_1darray(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %slice) { +; GFX9-LABEL: atomic_add_i64_1darray: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: v_and_or_b32 v2, v2, v4, v3 +; GFX9-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 da +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i64_1darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: v_and_or_b32 v2, v2, 0xffff, v3 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i16(i64 %data, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_add_i64_2darray(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %slice) { +; GFX9-LABEL: atomic_add_i64_2darray: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: v_mov_b32_e32 v5, 0xffff +; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX9-NEXT: s_lshl_b32 s8, s0, 16 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: v_and_or_b32 v2, v2, v5, v3 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: v_and_or_b32 v3, v4, v5, s8 +; GFX9-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 da +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i64_2darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff +; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX10-NEXT: s_lshl_b32 s8, s0, 16 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: v_and_or_b32 v2, v2, v5, v3 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: v_and_or_b32 v3, v4, v5, s8 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i16(i64 %data, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_add_i64_2dmsaa(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %fragid) { +; GFX9-LABEL: atomic_add_i64_2dmsaa: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: v_mov_b32_e32 v5, 0xffff +; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX9-NEXT: s_lshl_b32 s8, s0, 16 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: v_and_or_b32 v2, v2, v5, v3 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: v_and_or_b32 v3, v4, v5, s8 +; GFX9-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i64_2dmsaa: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff +; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX10-NEXT: s_lshl_b32 s8, s0, 16 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: v_and_or_b32 v2, v2, v5, v3 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: v_and_or_b32 v3, v4, v5, s8 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i16(i64 %data, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_add_i64_2darraymsaa(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid) { +; GFX9-LABEL: atomic_add_i64_2darraymsaa: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: v_mov_b32_e32 v6, 0xffff +; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX9-NEXT: v_and_or_b32 v2, v2, v6, v3 +; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v5 +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: v_and_or_b32 v3, v4, v6, v3 +; GFX9-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 da +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i64_2darraymsaa: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v6, 0xffff +; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: v_and_or_b32 v2, v2, v6, v3 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: v_and_or_b32 v3, v4, v6, v5 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i16(i64 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_add_i64_1d_slc(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { +; GFX9-LABEL: atomic_add_i64_1d_slc: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i64_1d_slc: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc slc a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 2) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32, i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 + +declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.add.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.and.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.or.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i16(i64, i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.add.2d.i64.i16(i64, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.add.3d.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.add.cube.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i16(i64, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i16(i64, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll new file mode 100644 index 00000000000000..48dbdf42480957 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll @@ -0,0 +1,2107 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - %s | FileCheck -check-prefix=GFX6 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - %s | FileCheck -check-prefix=GFX8 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s + + +define amdgpu_ps float @atomic_swap_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX6-LABEL: atomic_swap_i32_1d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_swap_i32_1d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_swap_i32_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_add_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX6-LABEL: atomic_add_i32_1d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_add_i32_1d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i32_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_sub_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX6-LABEL: atomic_sub_i32_1d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_sub_i32_1d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_sub_i32_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_smin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX6-LABEL: atomic_smin_i32_1d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_smin_i32_1d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_smin_i32_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_umin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX6-LABEL: atomic_umin_i32_1d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_umin_i32_1d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_umin_i32_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_smax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX6-LABEL: atomic_smax_i32_1d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_smax_i32_1d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_smax_i32_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_umax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX6-LABEL: atomic_umax_i32_1d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_umax_i32_1d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_umax_i32_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_and_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX6-LABEL: atomic_and_i32_1d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_and_i32_1d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_and_i32_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_or_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX6-LABEL: atomic_or_i32_1d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_or_i32_1d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_or_i32_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_xor_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX6-LABEL: atomic_xor_i32_1d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_xor_i32_1d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_xor_i32_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_inc_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX6-LABEL: atomic_inc_i32_1d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_inc_i32_1d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_inc_i32_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_dec_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX6-LABEL: atomic_dec_i32_1d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_dec_i32_1d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_dec_i32_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_cmpswap_i32_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s) { +; GFX6-LABEL: atomic_cmpswap_i32_1d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_cmpswap_i32_1d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_cmpswap_i32_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_add_i32_2d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t) { +; GFX6-LABEL: atomic_add_i32_2d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_add_i32_2d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i32_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_add_i32_3d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %r) { +; GFX6-LABEL: atomic_add_i32_3d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_add_i32_3d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i32_3d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32 %data, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_add_i32_cube(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %face) { +; GFX6-LABEL: atomic_add_i32_cube: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_add_i32_cube: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i32_cube: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32 %data, i32 %s, i32 %t, i32 %face, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_add_i32_1darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %slice) { +; GFX6-LABEL: atomic_add_i32_1darray: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_add_i32_1darray: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i32_1darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32 %data, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_add_i32_2darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice) { +; GFX6-LABEL: atomic_add_i32_2darray: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_add_i32_2darray: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i32_2darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_add_i32_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %fragid) { +; GFX6-LABEL: atomic_add_i32_2dmsaa: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_add_i32_2dmsaa: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i32_2dmsaa: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_add_i32_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid) { +; GFX6-LABEL: atomic_add_i32_2darraymsaa: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_add_i32_2darraymsaa: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i32_2darraymsaa: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps float @atomic_add_i32_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX6-LABEL: atomic_add_i32_1d_slc: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_add_i32_1d_slc: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i32_1d_slc: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc slc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2) + %out = bitcast i32 %v to float + ret float %out +} + +define amdgpu_ps <2 x float> @atomic_swap_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { +; GFX6-LABEL: atomic_swap_i64_1d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_swap_i64_1d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_swap_i64_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_add_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { +; GFX6-LABEL: atomic_add_i64_1d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_add_i64_1d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i64_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_sub_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { +; GFX6-LABEL: atomic_sub_i64_1d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_sub_i64_1d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_sub_i64_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_smin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { +; GFX6-LABEL: atomic_smin_i64_1d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_smin_i64_1d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_smin_i64_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_umin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { +; GFX6-LABEL: atomic_umin_i64_1d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_umin_i64_1d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_umin_i64_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_smax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { +; GFX6-LABEL: atomic_smax_i64_1d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_smax_i64_1d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_smax_i64_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_umax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { +; GFX6-LABEL: atomic_umax_i64_1d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_umax_i64_1d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_umax_i64_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_and_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { +; GFX6-LABEL: atomic_and_i64_1d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_and_i64_1d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_and_i64_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.and.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_or_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { +; GFX6-LABEL: atomic_or_i64_1d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_or_i64_1d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_or_i64_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.or.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_xor_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { +; GFX6-LABEL: atomic_xor_i64_1d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_xor_i64_1d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_xor_i64_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_inc_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { +; GFX6-LABEL: atomic_inc_i64_1d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_inc_i64_1d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_inc_i64_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_dec_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { +; GFX6-LABEL: atomic_dec_i64_1d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_dec_i64_1d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_dec_i64_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_cmpswap_i64_1d(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i32 %s) { +; GFX6-LABEL: atomic_cmpswap_i64_1d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_cmpswap_i64_1d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_cmpswap_i64_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_add_i64_2d(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t) { +; GFX6-LABEL: atomic_add_i64_2d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_add_i64_2d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i64_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.add.2d.i64.i32(i64 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_add_i64_3d(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %r) { +; GFX6-LABEL: atomic_add_i64_3d: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_add_i64_3d: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i64_3d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.add.3d.i64.i32(i64 %data, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_add_i64_cube(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %face) { +; GFX6-LABEL: atomic_add_i64_cube: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_add_i64_cube: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i64_cube: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_CUBE unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.add.cube.i64.i32(i64 %data, i32 %s, i32 %t, i32 %face, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_add_i64_1darray(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %slice) { +; GFX6-LABEL: atomic_add_i64_1darray: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_add_i64_1darray: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i64_1darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i32(i64 %data, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_add_i64_2darray(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %slice) { +; GFX6-LABEL: atomic_add_i64_2darray: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_add_i64_2darray: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i64_2darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i32(i64 %data, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_add_i64_2dmsaa(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %fragid) { +; GFX6-LABEL: atomic_add_i64_2dmsaa: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_add_i64_2dmsaa: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i64_2dmsaa: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i32(i64 %data, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_add_i64_2darraymsaa(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid) { +; GFX6-LABEL: atomic_add_i64_2darraymsaa: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_add_i64_2darraymsaa: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i64_2darraymsaa: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i32(i64 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +define amdgpu_ps <2 x float> @atomic_add_i64_1d_slc(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { +; GFX6-LABEL: atomic_add_i64_1d_slc: +; GFX6: ; %bb.0: ; %main_body +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, s4 +; GFX6-NEXT: s_mov_b32 s3, s5 +; GFX6-NEXT: s_mov_b32 s4, s6 +; GFX6-NEXT: s_mov_b32 s5, s7 +; GFX6-NEXT: s_mov_b32 s6, s8 +; GFX6-NEXT: s_mov_b32 s7, s9 +; GFX6-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: atomic_add_i64_1d_slc: +; GFX8: ; %bb.0: ; %main_body +; GFX8-NEXT: s_mov_b32 s0, s2 +; GFX8-NEXT: s_mov_b32 s1, s3 +; GFX8-NEXT: s_mov_b32 s2, s4 +; GFX8-NEXT: s_mov_b32 s3, s5 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s7 +; GFX8-NEXT: s_mov_b32 s6, s8 +; GFX8-NEXT: s_mov_b32 s7, s9 +; GFX8-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_add_i64_1d_slc: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc slc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2) + %out = bitcast i64 %v to <2 x float> + ret <2 x float> %out +} + +declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 + +declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.add.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.and.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.or.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64, i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.add.2d.i64.i32(i64, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.add.3d.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.add.cube.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i32(i64, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 +declare i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i32(i64, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 + +attributes #0 = { nounwind }