diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index b064ec2b3c9a6..6f3a4908623da 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -32,9 +32,10 @@ static const char *const DataLayoutStringR600 = static const char *const DataLayoutStringAMDGCN = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" - "-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" + "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:" + "32-v48:64-v96:128" "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" - "-ni:7:8"; + "-ni:7:8:9"; const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { llvm::AMDGPUAS::FLAT_ADDRESS, // Default diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c index c5cb922576dd4..acff367d50eb9 100644 --- a/clang/test/CodeGen/target-data.c +++ b/clang/test/CodeGen/target-data.c @@ -176,12 +176,12 @@ // RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm %s \ // RUN: | FileCheck %s -check-prefix=R600SI -// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" // Test default -target-cpu // RUN: %clang_cc1 -triple amdgcn-unknown -o - -emit-llvm %s \ // RUN: | FileCheck %s -check-prefix=R600SIDefault -// R600SIDefault: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +// R600SIDefault: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" // RUN: %clang_cc1 -triple arm64-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=AARCH64 diff --git a/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl b/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl index a085c3dd70ebc..bb52f87615214 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl @@ -1,5 +1,5 @@ // RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 %s -O0 -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s -// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" void foo(void) {} diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 5fcf651046943..d49d1cd381251 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -703,23 +703,24 @@ supported for the ``amdgcn`` target. .. table:: AMDGPU Address Spaces :name: amdgpu-address-spaces-table - ================================= =============== =========== ================ ======= ============================ - .. 64-Bit Process Address Space - --------------------------------- --------------- ----------- ---------------- ------------------------------------ - Address Space Name LLVM IR Address HSA Segment Hardware Address NULL Value - Space Number Name Name Size - ================================= =============== =========== ================ ======= ============================ - Generic 0 flat flat 64 0x0000000000000000 - Global 1 global global 64 0x0000000000000000 - Region 2 N/A GDS 32 *not implemented for AMDHSA* - Local 3 group LDS 32 0xFFFFFFFF - Constant 4 constant *same as global* 64 0x0000000000000000 - Private 5 private scratch 32 0xFFFFFFFF - Constant 32-bit 6 *TODO* 0x00000000 - Buffer Fat Pointer (experimental) 7 *TODO* - Buffer Resource (experimental) 8 *TODO* - Streamout Registers 128 N/A GS_REGS - ================================= =============== =========== ================ ======= ============================ + ===================================== =============== =========== ================ ======= ============================ + .. 64-Bit Process Address Space + ------------------------------------- --------------- ----------- ---------------- ------------------------------------ + Address Space Name LLVM IR Address HSA Segment Hardware Address NULL Value + Space Number Name Name Size + ===================================== =============== =========== ================ ======= ============================ + Generic 0 flat flat 64 0x0000000000000000 + Global 1 global global 64 0x0000000000000000 + Region 2 N/A GDS 32 *not implemented for AMDHSA* + Local 3 group LDS 32 0xFFFFFFFF + Constant 4 constant *same as global* 64 0x0000000000000000 + Private 5 private scratch 32 0xFFFFFFFF + Constant 32-bit 6 *TODO* 0x00000000 + Buffer Fat Pointer (experimental) 7 *TODO* + Buffer Resource (experimental) 8 *TODO* + Buffer Strided Pointer (experimental) 9 *TODO* + Streamout Registers 128 N/A GS_REGS + ===================================== =============== =========== ================ ======= ============================ **Generic** The generic address space is supported unless the *Target Properties* column @@ -836,7 +837,7 @@ supported for the ``amdgcn`` target. the backend. The buffer descriptor used to construct a buffer fat pointer must be *raw*: - the stride must be 0, the "add tid" flag bust be 0, the swizzle enable bits + the stride must be 0, the "add tid" flag must be 0, the swizzle enable bits must be off, and the extent must be measured in bytes. (On subtargets where bounds checking may be disabled, buffer fat pointers may choose to enable it or not). @@ -864,6 +865,18 @@ supported for the ``amdgcn`` target. (bits `127:96`). The specific interpretation of these fields varies by the target architecture and is detailed in the ISA descriptions. +**Buffer Strided Pointer** + The buffer index pointer is an experimental address space. It represents + a 128-bit buffer descriptor and a 32-bit offset, like the **Buffer Fat + Pointer**. Additionally, it contains an index into the buffer, which + allows the direct addressing of structured elements. These components appear + in that order, i.e., the descriptor comes first, then the 32-bit offset + followed by the 32-bit index. + + The bits in the buffer descriptor must meet the following requirements: + the stride is the size of a structured element, the "add tid" flag must be 0, + and the swizzle enable bits must be off. + **Streamout Registers** Dedicated registers used by the GS NGG Streamout Instructions. The register file is modelled as a memory in a distinct address space because it is indexed diff --git a/llvm/include/llvm/Support/AMDGPUAddrSpace.h b/llvm/include/llvm/Support/AMDGPUAddrSpace.h index 72caf8d458c7a..c9d9bdd2f2fa3 100644 --- a/llvm/include/llvm/Support/AMDGPUAddrSpace.h +++ b/llvm/include/llvm/Support/AMDGPUAddrSpace.h @@ -25,7 +25,7 @@ namespace llvm { namespace AMDGPUAS { enum : unsigned { // The maximum value for flat, generic, local, private, constant and region. - MAX_AMDGPU_ADDRESS = 8, + MAX_AMDGPU_ADDRESS = 9, FLAT_ADDRESS = 0, ///< Address space for flat memory. GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). @@ -42,6 +42,9 @@ enum : unsigned { BUFFER_RESOURCE = 8, ///< Address space for 128-bit buffer resources. + BUFFER_STRIDED_POINTER = 9, ///< Address space for 192-bit fat buffer + ///< pointers with an additional index. + /// Internal address spaces. Can be freely renumbered. STREAMOUT_REGISTER = 128, ///< Address space for GS NGG Streamout registers. /// end Internal address spaces. diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index eeac80c013a1f..738ec301d1479 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -5207,10 +5207,12 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) { // This goes before adding new address spaces to prevent incoherent string // values. if (!DL.contains("-ni") && !DL.starts_with("ni")) - Res.append("-ni:7:8"); - // Update ni:7 to ni:7:8. + Res.append("-ni:7:8:9"); + // Update ni:7 to ni:7:8:9. if (DL.ends_with("ni:7")) - Res.append(":8"); + Res.append(":8:9"); + if (DL.ends_with("ni:7:8")) + Res.append(":9"); // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer // resources) An empty data layout has already been upgraded to G1 by now. @@ -5218,6 +5220,8 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) { Res.append("-p7:160:256:256:32"); if (!DL.contains("-p8") && !DL.starts_with("p8")) Res.append("-p8:128:128"); + if (!DL.contains("-p9") && !DL.startswith("p9")) + Res.append("-p9:192:256:256:32"); return Res; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 89319527c410a..35d33cb60bc47 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -410,24 +410,25 @@ inline bool isExtendedGlobalAddrSpace(unsigned AS) { } static inline bool addrspacesMayAlias(unsigned AS1, unsigned AS2) { - static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 8, "Addr space out of range"); + static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 9, "Addr space out of range"); if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS) return true; - // This array is indexed by address space value enum elements 0 ... to 8 + // This array is indexed by address space value enum elements 0 ... to 9 // clang-format off - static const bool ASAliasRules[9][9] = { - /* Flat Global Region Group Constant Private Const32 BufFatPtr BufRsrc */ - /* Flat */ {true, true, false, true, true, true, true, true, true}, - /* Global */ {true, true, false, false, true, false, true, true, true}, - /* Region */ {false, false, true, false, false, false, false, false, false}, - /* Group */ {true, false, false, true, false, false, false, false, false}, - /* Constant */ {true, true, false, false, false, false, true, true, true}, - /* Private */ {true, false, false, false, false, true, false, false, false}, - /* Constant 32-bit */ {true, true, false, false, true, false, false, true, true}, - /* Buffer Fat Ptr */ {true, true, false, false, true, false, true, true, true}, - /* Buffer Resource */ {true, true, false, false, true, false, true, true, true}, + static const bool ASAliasRules[10][10] = { + /* Flat Global Region Group Constant Private Const32 BufFatPtr BufRsrc BufStrdPtr */ + /* Flat */ {true, true, false, true, true, true, true, true, true, true}, + /* Global */ {true, true, false, false, true, false, true, true, true, true}, + /* Region */ {false, false, true, false, false, false, false, false, false, false}, + /* Group */ {true, false, false, true, false, false, false, false, false, false}, + /* Constant */ {true, true, false, false, false, false, true, true, true, true}, + /* Private */ {true, false, false, false, false, true, false, false, false, false}, + /* Constant 32-bit */ {true, true, false, false, true, false, false, true, true, true}, + /* Buffer Fat Ptr */ {true, true, false, false, true, false, true, true, true, true}, + /* Buffer Resource */ {true, true, false, false, true, false, true, true, true, true}, + /* Buffer Strided Ptr */ {true, true, false, false, true, false, true, true, true, true}, }; // clang-format on diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index d4a50403cee0c..52d531319005d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -633,6 +633,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, const LLT PrivatePtr = GetAddrSpacePtr(AMDGPUAS::PRIVATE_ADDRESS); const LLT BufferFatPtr = GetAddrSpacePtr(AMDGPUAS::BUFFER_FAT_POINTER); const LLT RsrcPtr = GetAddrSpacePtr(AMDGPUAS::BUFFER_RESOURCE); + const LLT BufferStridedPtr = + GetAddrSpacePtr(AMDGPUAS::BUFFER_STRIDED_POINTER); const LLT CodePtr = FlatPtr; @@ -1113,7 +1115,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, } getActionDefinitionsBuilder(G_PTR_ADD) - .unsupportedFor({BufferFatPtr, RsrcPtr}) + .unsupportedFor({BufferFatPtr, BufferStridedPtr, RsrcPtr}) .legalIf(all(isPointer(0), sameSize(0, 1))) .scalarize(0) .scalarSameSizeAs(1, 0); @@ -1403,7 +1405,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // The custom pointers (fat pointers, buffer resources) don't work with load // and store at this level. Fat pointers should have been lowered to // intrinsics before the translation to MIR. - Actions.unsupportedIf(typeInSet(1, {BufferFatPtr, RsrcPtr})); + Actions.unsupportedIf( + typeInSet(1, {BufferFatPtr, BufferStridedPtr, RsrcPtr})); // Address space 8 pointers are handled by a 4xs32 load, bitcast, and // ptrtoint. This is needed to account for the fact that we can't have i128 diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 0e0094cb9cd6e..e8c04ecf39ba0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -539,9 +539,10 @@ static StringRef computeDataLayout(const Triple &TT) { // space 8) which cannot be non-trivilally accessed by LLVM memory operations // like getelementptr. return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" - "-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:" + "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-" + "v32:32-v48:64-v96:" "128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-" - "G1-ni:7:8"; + "G1-ni:7:8:9"; } LLVM_READNONE diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 53a9fc2d8e7f6..f1da1a61bf4dd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -368,7 +368,8 @@ unsigned GCNTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { AddrSpace == AMDGPUAS::CONSTANT_ADDRESS || AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT || AddrSpace == AMDGPUAS::BUFFER_FAT_POINTER || - AddrSpace == AMDGPUAS::BUFFER_RESOURCE) { + AddrSpace == AMDGPUAS::BUFFER_RESOURCE || + AddrSpace == AMDGPUAS::BUFFER_STRIDED_POINTER) { return 512; } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index aced46bac9e51..dddfebfd4e357 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1046,12 +1046,20 @@ static EVT memVTFromLoadIntrReturn(Type *Ty, unsigned MaxNumLanes) { MVT SITargetLowering::getPointerTy(const DataLayout &DL, unsigned AS) const { if (AMDGPUAS::BUFFER_FAT_POINTER == AS && DL.getPointerSizeInBits(AS) == 160) return MVT::v5i32; + if (AMDGPUAS::BUFFER_STRIDED_POINTER == AS && + DL.getPointerSizeInBits(AS) == 192) + return MVT::v6i32; return AMDGPUTargetLowering::getPointerTy(DL, AS); } /// Similarly, the in-memory representation of a p7 is {p8, i32}, aka /// v8i32 when padding is added. +/// The in-memory representation of a p9 is {p8, i32, i32}, which is +/// also v8i32 with padding. MVT SITargetLowering::getPointerMemTy(const DataLayout &DL, unsigned AS) const { - if (AMDGPUAS::BUFFER_FAT_POINTER == AS && DL.getPointerSizeInBits(AS) == 160) + if ((AMDGPUAS::BUFFER_FAT_POINTER == AS && + DL.getPointerSizeInBits(AS) == 160) || + (AMDGPUAS::BUFFER_STRIDED_POINTER == AS && + DL.getPointerSizeInBits(AS) == 192)) return MVT::v8i32; return AMDGPUTargetLowering::getPointerMemTy(DL, AS); } @@ -1418,7 +1426,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, if (AS == AMDGPUAS::CONSTANT_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT || - AS == AMDGPUAS::BUFFER_FAT_POINTER || AS == AMDGPUAS::BUFFER_RESOURCE) { + AS == AMDGPUAS::BUFFER_FAT_POINTER || AS == AMDGPUAS::BUFFER_RESOURCE || + AS == AMDGPUAS::BUFFER_STRIDED_POINTER) { // If the offset isn't a multiple of 4, it probably isn't going to be // correctly aligned. // FIXME: Can we get the real alignment here? diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll index 2453b1f415ec3..a13eb5c6d085f 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll @@ -248,3 +248,73 @@ define void @test_8_5(ptr %p) { load i8, ptr addrspace(3) @shm ret void } + +; CHECK: MayAlias: i8 addrspace(9)* %p, i8* %p1 +define void @test_9_0(ptr addrspace(9) %p, ptr addrspace(0) %p1) { + load i8, ptr addrspace(9) %p + load i8, ptr addrspace(0) %p1 + ret void +} + +; CHECK: MayAlias: i8 addrspace(9)* %p, i8 addrspace(1)* %p1 +define void @test_9_1(ptr addrspace(9) %p, ptr addrspace(1) %p1) { + load i8, ptr addrspace(9) %p + load i8, ptr addrspace(1) %p1 + ret void +} + +; CHECK: NoAlias: i8 addrspace(9)* %p, i8 addrspace(2)* %p1 +define void @test_9_2(ptr addrspace(9) %p, ptr addrspace(2) %p1) { + load i8, ptr addrspace(9) %p + load i8, ptr addrspace(2) %p1 + ret void +} + +; CHECK: NoAlias: i8 addrspace(9)* %p, i8 addrspace(3)* %p1 +define void @test_9_3(ptr addrspace(9) %p, ptr addrspace(3) %p1) { + load i8, ptr addrspace(9) %p + load i8, ptr addrspace(3) %p1 + ret void +} + +; CHECK: MayAlias: i8 addrspace(9)* %p, i8 addrspace(4)* %p1 +define void @test_9_4(ptr addrspace(9) %p, ptr addrspace(4) %p1) { + load i8, ptr addrspace(9) %p + load i8, ptr addrspace(4) %p1 + ret void +} + +; CHECK: NoAlias: i8 addrspace(9)* %p, i8 addrspace(5)* %p1 +define void @test_9_5(ptr addrspace(9) %p, ptr addrspace(5) %p1) { + load i8, ptr addrspace(9) %p + load i8, ptr addrspace(5) %p1 + ret void +} + +; CHECK: MayAlias: i8 addrspace(9)* %p, i8 addrspace(6)* %p1 +define void @test_9_6(ptr addrspace(9) %p, ptr addrspace(6) %p1) { + load i8, ptr addrspace(9) %p + load i8, ptr addrspace(6) %p1 + ret void +} + +; CHECK: MayAlias: i8 addrspace(9)* %p, i8 addrspace(7)* %p1 +define void @test_9_7(ptr addrspace(9) %p, ptr addrspace(7) %p1) { + load i8, ptr addrspace(9) %p + load i8, ptr addrspace(7) %p1 + ret void +} + +; CHECK: MayAlias: i8 addrspace(9)* %p, i8 addrspace(8)* %p1 +define void @test_9_8(ptr addrspace(9) %p, ptr addrspace(8) %p1) { + load i8, ptr addrspace(9) %p + load i8, ptr addrspace(8) %p1 + ret void +} + +; CHECK: MayAlias: i8 addrspace(9)* %p, i8 addrspace(9)* %p1 +define void @test_9_9(ptr addrspace(9) %p, ptr addrspace(9) %p1) { + load i8, ptr addrspace(9) %p + load i8, ptr addrspace(9) %p1 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ceil.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ceil.ll index eac3f4f1dbcb3..ad543c0d2338a 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ceil.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ceil.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" declare float @_Z4ceilf(float) declare <2 x float> @_Z4ceilDv2_f(<2 x float>) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-copysign.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-copysign.ll index 9058f101b78f4..5ab12f53a3b5c 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-copysign.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-copysign.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" declare float @_Z8copysignff(float, float) declare <2 x float> @_Z8copysignDv2_fS_(<2 x float>, <2 x float>) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-exp.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-exp.ll index f34e6031effb7..0da0acc2030a1 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-exp.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-exp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" declare float @_Z3expf(float) declare <2 x float> @_Z3expDv2_f(<2 x float>) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-exp2.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-exp2.ll index eddf5ff602dfb..96db9c65959d8 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-exp2.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-exp2.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" declare float @_Z4exp2f(float) declare <2 x float> @_Z4exp2Dv2_f(<2 x float>) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fabs.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fabs.ll index 9e10d68ab6f31..74b867e93ca18 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fabs.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fabs.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" declare float @_Z4fabsf(float) declare <2 x float> @_Z4fabsDv2_f(<2 x float>) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-floor.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-floor.ll index ed4c49fb0b13f..6b3b4cc95d149 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-floor.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-floor.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" declare float @_Z5floorf(float) declare <2 x float> @_Z5floorDv2_f(<2 x float>) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fma.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fma.ll index cc068ac5c443b..93c223f342a1d 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fma.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fma.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" declare float @_Z3fmafff(float, float, float) declare <2 x float> @_Z3fmaDv2_fS_S_(<2 x float>, <2 x float>, <2 x float>) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmax-splat.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmax-splat.ll index 0d058f113a541..439864ec48ab7 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmax-splat.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmax-splat.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" declare <2 x float> @_Z4fmaxDv2_ff(<2 x float>, float) declare <2 x float> @_Z4fmaxDv2_fS_(<2 x float>, <2 x float>) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmax.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmax.ll index 153f608070284..a6ff5c9984ea8 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmax.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" declare float @_Z4fmaxff(float, float) declare <2 x float> @_Z4fmaxDv2_fS_(<2 x float>, <2 x float>) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmin-splat.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmin-splat.ll index e54676615c7e1..36f65c1aa9a71 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmin-splat.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmin-splat.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" declare <2 x float> @_Z4fminDv2_ff(<2 x float>, float) declare <2 x float> @_Z4fminDv2_fS_(<2 x float>, <2 x float>) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmin.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmin.ll index 3b680d8699020..c9b1112ba3981 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmin.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmin.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" declare float @_Z4fminff(float, float) declare <2 x float> @_Z4fminDv2_fS_(<2 x float>, <2 x float>) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ldexp-splat.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ldexp-splat.ll index 1f6b8ef6a3266..ca6e94a8523c6 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ldexp-splat.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ldexp-splat.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" declare <2 x float> @_Z5ldexpDv2_fi(<2 x float>, i32) declare <3 x float> @_Z5ldexpDv3_fi(<3 x float>, i32) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ldexp.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ldexp.ll index 9eaf10933d4d2..24082b8c66611 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ldexp.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ldexp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" declare float @_Z5ldexpfi(float, i32) declare <2 x float> @_Z5ldexpDv2_fDv2_i(<2 x float>, <2 x i32>) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log.ll index eebdb1a35001b..ec0b2283aef02 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" declare float @_Z3logf(float) declare <2 x float> @_Z3logDv2_f(<2 x float>) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log10.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log10.ll index 35c2cc77d91cd..1a03f9c420099 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log10.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log10.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" declare float @_Z5log10f(float) declare <2 x float> @_Z5log10Dv2_f(<2 x float>) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log2.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log2.ll index af9048873b9dd..a6b3265d92d2c 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log2.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log2.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" declare float @_Z4log2f(float) declare <2 x float> @_Z4log2Dv2_f(<2 x float>) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-mad.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-mad.ll index 0aace74dbeb2c..ad7402c9f3a84 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-mad.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-mad.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" declare float @_Z3madfff(float, float, float) declare <2 x float> @_Z3madDv2_fS_S_(<2 x float>, <2 x float>, <2 x float>) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll index 0fc0a40c7f8e6..c4bd4bc126f73 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib,instcombine -amdgpu-prelink %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" declare float @_Z3powff(float, float) declare <2 x float> @_Z3powDv2_fS_(<2 x float>, <2 x float>) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll index 85e14048f3947..942f459ea6b8c 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib,instcombine -amdgpu-prelink %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" declare float @_Z4pownfi(float, i32) declare <2 x float> @_Z4pownDv2_fDv2_i(<2 x float>, <2 x i32>) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll index 0d9f3e2de416f..dc4cf1d067ef1 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib,instcombine -amdgpu-prelink %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" declare float @_Z4powrff(float, float) declare <2 x float> @_Z4powrDv2_fS_(<2 x float>, <2 x float>) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rint.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rint.ll index 5843c0e5506ae..534a42b039790 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rint.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rint.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" declare float @_Z4rintf(float) declare <2 x float> @_Z4rintDv2_f(<2 x float>) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll index 421b2d88dcfe9..2ffa647d1869a 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib,instcombine -amdgpu-prelink %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" declare float @_Z5rootnfi(float, i32) declare <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float>, <2 x i32>) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-round.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-round.ll index 70fd1cb579f39..8a4697983bb1e 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-round.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-round.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" declare float @_Z5roundf(float) declare <2 x float> @_Z5roundDv2_f(<2 x float>) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.ll index a36bd0782431a..a7a68d6780605 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-simplifylib < %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" ; sin, cos, and sincos are already defined in the module. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.sin.cos.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.sin.cos.ll index 1f952f8a0c44c..960019c277a73 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.sin.cos.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.sin.cos.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-simplifylib < %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" ; sin and cos are already defined in the module but sincos isn't. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll index d71ca9e448ac6..4061857789ed6 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall=1 -amdgpu-prelink < %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" declare float @_Z3sinf(float) #0 declare float @_Z3cosf(float) #0 diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.nobuiltins.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.nobuiltins.ll index 6adc536c1dbbe..5d765b614db36 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.nobuiltins.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.nobuiltins.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals --version 2 ; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-simplifylib -amdgpu-prelink < %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" declare float @_Z3sinf(float noundef) declare float @_Z3cosf(float noundef) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.weak.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.weak.ll index ea67756010358..ef76ca4979237 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.weak.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.weak.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-simplifylib < %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" declare extern_weak float @_Z3sinf(float noundef) declare extern_weak float @_Z3cosf(float noundef) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll index d1a58a7a0148d..5b57778d5fdcd 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" declare float @_Z4sqrtf(float) declare <2 x float> @_Z4sqrtDv2_f(<2 x float>) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-trunc.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-trunc.ll index f7e273c92dd05..27b6adc1a9c28 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-trunc.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-trunc.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" declare float @_Z5truncf(float) declare <2 x float> @_Z5truncDv2_f(<2 x float>) diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll index 1c75a2fc3dce6..0f2b2aa4d3562 100644 --- a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll +++ b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll @@ -3,7 +3,7 @@ ; ModuleID = 'kernel_round1_passing.bc' source_filename = "/tmp/comgr-295d04/input/CompileSource" -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn-amd-amdhsa" @kernel_round1.first_words_data = external hidden unnamed_addr addrspace(3) global [896 x i8], align 1 diff --git a/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll b/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll index 6ee6bbbff69f9..1d0533ca07b3e 100644 --- a/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn-amd-amdhsa" @_RSENC_gDcd_______________________________ = external protected addrspace(1) externally_initialized global [4096 x i8], align 16 diff --git a/llvm/test/CodeGen/AMDGPU/nullptr.ll b/llvm/test/CodeGen/AMDGPU/nullptr.ll index eab5cc82ee6e3..513332a07331b 100644 --- a/llvm/test/CodeGen/AMDGPU/nullptr.ll +++ b/llvm/test/CodeGen/AMDGPU/nullptr.ll @@ -38,9 +38,9 @@ ; FIXME-R600-NEXT: .long 0 ; FIXME @nullptr8 = global ptr addrspace(8) addrspacecast (ptr null to ptr addrspace(8)) -; CHECK-LABEL: nullptr9: -; R600-NEXT: .long 0 -@nullptr9 = global ptr addrspace(9) addrspacecast (ptr null to ptr addrspace(9)) +; FIXME-LABEL: nullptr9: +; FIXME-R600-NEXT: .long 0 +; FIXME @nullptr9 = global ptr addrspace(9) addrspacecast (ptr null to ptr addrspace(9)) ; CHECK-LABEL: nullptr10: ; R600-NEXT: .long 0 diff --git a/llvm/test/CodeGen/AMDGPU/vectorize-buffer-fat-pointer.ll b/llvm/test/CodeGen/AMDGPU/vectorize-buffer-fat-pointer.ll index c109d38b9cb2f..4aab097229a47 100644 --- a/llvm/test/CodeGen/AMDGPU/vectorize-buffer-fat-pointer.ll +++ b/llvm/test/CodeGen/AMDGPU/vectorize-buffer-fat-pointer.ll @@ -1,7 +1,7 @@ ; RUN: opt -S -mtriple=amdgcn-- -passes=load-store-vectorizer < %s | FileCheck -check-prefix=OPT %s -; OPT-LABEL: @func( -define void @func(ptr addrspace(7) %out) { +; OPT-LABEL: @buffer_fat_ptrs( +define void @buffer_fat_ptrs(ptr addrspace(7) %out) { entry: %a1 = getelementptr i32, ptr addrspace(7) %out, i32 1 %a2 = getelementptr i32, ptr addrspace(7) %out, i32 2 @@ -14,3 +14,18 @@ entry: store i32 3, ptr addrspace(7) %a3 ret void } + +; OPT-LABEL: @buffer_strided_ptrs( +define void @buffer_strided_ptrs(ptr addrspace(9) %out) { +entry: + %a1 = getelementptr i32, ptr addrspace(9) %out, i32 1 + %a2 = getelementptr i32, ptr addrspace(9) %out, i32 2 + %a3 = getelementptr i32, ptr addrspace(9) %out, i32 3 + +; OPT: store <4 x i32> , ptr addrspace(9) %out, align 4 + store i32 0, ptr addrspace(9) %out + store i32 1, ptr addrspace(9) %a1 + store i32 2, ptr addrspace(9) %a2 + store i32 3, ptr addrspace(9) %a3 + ret void +} diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/adaptive_constant_global_redzones.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/adaptive_constant_global_redzones.ll index c4d21b059558b..08a78c6d32f1c 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/adaptive_constant_global_redzones.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/adaptive_constant_global_redzones.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -passes=asan -S | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn-amd-amdhsa" ; Here we check that the global redzone sizes grow with the object size diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/adaptive_global_redzones.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/adaptive_global_redzones.ll index 5b7821c2c146b..4959b4c4ca1e8 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/adaptive_global_redzones.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/adaptive_global_redzones.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -passes=asan -S | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn-amd-amdhsa" ; Here we check that the global redzone sizes grow with the object size diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_do_not_instrument_lds.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_do_not_instrument_lds.ll index 8b0fa816721bb..44149b28fd9f9 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_do_not_instrument_lds.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_do_not_instrument_lds.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -passes=asan -S | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn-amd-amdhsa" ; Memory access to lds are not instrumented diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_do_not_instrument_scratch.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_do_not_instrument_scratch.ll index c614c9621a761..6cc15f04a473a 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_do_not_instrument_scratch.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_do_not_instrument_scratch.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -passes=asan -S | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn-amd-amdhsa" ; Memory access to scratch are not instrumented diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_constant_address_space.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_constant_address_space.ll index 47b289ba32b80..4ce337e2b68ef 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_constant_address_space.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_constant_address_space.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt < %s -passes=asan -S | FileCheck %s ; RUN: opt < %s -passes=asan -asan-recover -S | FileCheck %s --check-prefix=RECOV -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn-amd-amdhsa" @x = addrspace(4) global [2 x i32] zeroinitializer, align 4 diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_generic_address_space.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_generic_address_space.ll index 58af1eafa1801..f86a5722c0006 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_generic_address_space.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_generic_address_space.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt < %s -passes=asan -S | FileCheck %s ; RUN: opt < %s -passes=asan -asan-recover -S | FileCheck %s --check-prefix=RECOV -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @generic_store(ptr addrspace(1) %p, i32 %i) sanitize_address { diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_global_address_space.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_global_address_space.ll index e792c453a723e..6b39ff6ff8462 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_global_address_space.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_global_address_space.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt < %s -passes=asan -S | FileCheck %s ; RUN: opt < %s -passes=asan -asan-recover -S | FileCheck %s --check-prefix=RECOV -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @global_store(ptr addrspace(1) %p, i32 %i) sanitize_address { diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/global_metadata_addrspacecasts.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/global_metadata_addrspacecasts.ll index 53331a6bbe49b..3fd8dcfa11320 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/global_metadata_addrspacecasts.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/global_metadata_addrspacecasts.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -passes=asan -S | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn-amd-amdhsa" @g = addrspace(1) global [1 x i32] zeroinitializer, align 4 diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/no_redzones_in_lds_globals.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/no_redzones_in_lds_globals.ll index fca6c24ba31ad..8ec097b4a4e27 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/no_redzones_in_lds_globals.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/no_redzones_in_lds_globals.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -passes=asan -S | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn-amd-amdhsa" @G10 = addrspace(3) global [10 x i8] zeroinitializer, align 1 diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/no_redzones_in_scratch_globals.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/no_redzones_in_scratch_globals.ll index daf9aa48b96f0..433e4b98e37f9 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/no_redzones_in_scratch_globals.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/no_redzones_in_scratch_globals.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -passes=asan -S | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn-amd-amdhsa" @G10 = addrspace(5) global [10 x i8] zeroinitializer, align 1 diff --git a/llvm/test/Transforms/GlobalOpt/global_alloca_diff_addrspaces.ll b/llvm/test/Transforms/GlobalOpt/global_alloca_diff_addrspaces.ll index f3e0a3e78bee4..0e37df359e613 100644 --- a/llvm/test/Transforms/GlobalOpt/global_alloca_diff_addrspaces.ll +++ b/llvm/test/Transforms/GlobalOpt/global_alloca_diff_addrspaces.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt -S -passes=globalopt < %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" ; Check that we don't convert the global into an alloca if their respective address ; spaces differ, and the alloca addrspace is non-zero. diff --git a/llvm/test/Transforms/IndVarSimplify/AMDGPU/addrspace-7-doesnt-crash.ll b/llvm/test/Transforms/IndVarSimplify/AMDGPU/addrspace-7-doesnt-crash.ll index 98d7b46c0e898..f66b4d1a0f54f 100644 --- a/llvm/test/Transforms/IndVarSimplify/AMDGPU/addrspace-7-doesnt-crash.ll +++ b/llvm/test/Transforms/IndVarSimplify/AMDGPU/addrspace-7-doesnt-crash.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -passes=indvars -S < %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn--amdpal" define void @f(ptr addrspace(7) %arg) { diff --git a/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll b/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll index be1d3b09b768c..9a2bfac0feb02 100644 --- a/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll +++ b/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll @@ -3,7 +3,7 @@ ; Gracefully handle the alloca that is not in the alloca AS (=5) -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn-amd-amdhsa" declare void @use(ptr) diff --git a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-invalid-ptr-extend.ll b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-invalid-ptr-extend.ll index 2d365ef527e93..5f58e7614e65e 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-invalid-ptr-extend.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-invalid-ptr-extend.ll @@ -5,7 +5,7 @@ ; Test that LSR does not attempt to extend a pointer type to an integer type, ; which causes a SCEV analysis assertion. -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn-amd-amdhsa" diff --git a/llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll b/llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll index d2ac2e6e30a40..4dfa8cc828248 100644 --- a/llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll +++ b/llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll @@ -2,7 +2,7 @@ ; Verify the address space cast doesn't cause a crash -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" %"struct.(anonymous namespace)::TeamStateTy" = type { %"struct.(anonymous namespace)::ICVStateTy", i32, ptr } %"struct.(anonymous namespace)::ICVStateTy" = type { i32, i32, i32, i32, i32, i32 } diff --git a/llvm/test/Transforms/OpenMP/indirect_call_kernel_info_crash.ll b/llvm/test/Transforms/OpenMP/indirect_call_kernel_info_crash.ll index 1b6273e8f60f0..19d41f9d1e301 100644 --- a/llvm/test/Transforms/OpenMP/indirect_call_kernel_info_crash.ll +++ b/llvm/test/Transforms/OpenMP/indirect_call_kernel_info_crash.ll @@ -1,5 +1,5 @@ ; RUN: opt -S -passes=openmp-opt < %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn-amd-amdhsa" %"struct.ompx::state::TeamStateTy" = type { %"struct.ompx::state::ICVStateTy", i32, i32, ptr } diff --git a/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll b/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll index 6b15dd4f6b9b1..75e01f3295fe2 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll @@ -8,7 +8,7 @@ ; CHECK: store i32 1, ptr addrspace(3) @IsSPMDMode ; CHECK-NOT: store i32 0, ptr addrspace(3) @IsSPMDMode ; -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn-amd-amdhsa" %struct.ident_t = type { i32, i32, i32, i32, ptr } diff --git a/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll b/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll index 0120cb16cf09a..7a632dc0a968d 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn-amd-amdhsa" %struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy.8, ptr, ptr } diff --git a/llvm/test/Transforms/OpenMP/values_in_offload_arrays.alloca.ll b/llvm/test/Transforms/OpenMP/values_in_offload_arrays.alloca.ll index a933b7d393eef..74871a2babcb1 100644 --- a/llvm/test/Transforms/OpenMP/values_in_offload_arrays.alloca.ll +++ b/llvm/test/Transforms/OpenMP/values_in_offload_arrays.alloca.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -passes=openmp-opt-cgscc -aa-pipeline=basic-aa -openmp-hide-memory-transfer-latency < %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" @.__omp_offloading_heavyComputation.region_id = weak constant i8 0 @.offload_maptypes. = private unnamed_addr constant [2 x i64] [i64 35, i64 35] diff --git a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp index 5d798daf66271..4865616e3e2ba 100644 --- a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp +++ b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp @@ -33,10 +33,12 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) { // Check that AMDGPU targets add -G1 if it's not present. EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32", "r600"), "e-p:32:32-G1"); // and that ANDGCN adds p7 and p8 as well. - EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64", "amdgcn"), - "e-p:64:64-G1-ni:7:8-p7:160:256:256:32-p8:128:128"); - EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-G1", "amdgcn"), - "e-p:64:64-G1-ni:7:8-p7:160:256:256:32-p8:128:128"); + EXPECT_EQ( + UpgradeDataLayoutString("e-p:64:64", "amdgcn"), + "e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); + EXPECT_EQ( + UpgradeDataLayoutString("e-p:64:64-G1", "amdgcn"), + "e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); // but that r600 does not. EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32-G1", "r600"), "e-p:32:32-G1"); @@ -50,7 +52,8 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) { "amdgcn"), "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-" "v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:" - "1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8-p7:160:256:256:32-p8:128:128"); + "1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-" + "p9:192:256:256:32"); // Check that RISCV64 upgrades -n64 to -n32:64. EXPECT_EQ(UpgradeDataLayoutString("e-m:e-p:64:64-i64:64-i128:128-n64-S128", @@ -80,20 +83,23 @@ TEST(DataLayoutUpgradeTest, NoDataLayoutUpgrade) { // Check that AMDGPU targets don't add -G1 if there is already a -G flag. EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32-G2", "r600"), "e-p:32:32-G2"); EXPECT_EQ(UpgradeDataLayoutString("G2", "r600"), "G2"); - EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-G2", "amdgcn"), - "e-p:64:64-G2-ni:7:8-p7:160:256:256:32-p8:128:128"); - EXPECT_EQ(UpgradeDataLayoutString("G2-e-p:64:64", "amdgcn"), - "G2-e-p:64:64-ni:7:8-p7:160:256:256:32-p8:128:128"); - EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-G0", "amdgcn"), - "e-p:64:64-G0-ni:7:8-p7:160:256:256:32-p8:128:128"); + EXPECT_EQ( + UpgradeDataLayoutString("e-p:64:64-G2", "amdgcn"), + "e-p:64:64-G2-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); + EXPECT_EQ( + UpgradeDataLayoutString("G2-e-p:64:64", "amdgcn"), + "G2-e-p:64:64-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); + EXPECT_EQ( + UpgradeDataLayoutString("e-p:64:64-G0", "amdgcn"), + "e-p:64:64-G0-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); // Check that AMDGCN targets don't add already declared address space 7. EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-p7:64:64", "amdgcn"), - "e-p:64:64-p7:64:64-G1-ni:7:8-p8:128:128"); + "e-p:64:64-p7:64:64-G1-ni:7:8:9-p8:128:128-p9:192:256:256:32"); EXPECT_EQ(UpgradeDataLayoutString("p7:64:64-G2-e-p:64:64", "amdgcn"), - "p7:64:64-G2-e-p:64:64-ni:7:8-p8:128:128"); + "p7:64:64-G2-e-p:64:64-ni:7:8:9-p8:128:128-p9:192:256:256:32"); EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-p7:64:64-G1", "amdgcn"), - "e-p:64:64-p7:64:64-G1-ni:7:8-p8:128:128"); + "e-p:64:64-p7:64:64-G1-ni:7:8:9-p8:128:128-p9:192:256:256:32"); } TEST(DataLayoutUpgradeTest, EmptyDataLayout) { @@ -106,7 +112,7 @@ TEST(DataLayoutUpgradeTest, EmptyDataLayout) { // Check that AMDGPU targets add G1 if it's not present. EXPECT_EQ(UpgradeDataLayoutString("", "r600"), "G1"); EXPECT_EQ(UpgradeDataLayoutString("", "amdgcn"), - "G1-ni:7:8-p7:160:256:256:32-p8:128:128"); + "G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); } } // end namespace diff --git a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp index 528d332393326..046010716862f 100644 --- a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp +++ b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp @@ -560,7 +560,7 @@ TEST(CodeExtractor, OpenMPAggregateArgs) { LLVMContext Ctx; SMDiagnostic Err; std::unique_ptr M(parseAssemblyString(R"ir( - target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" + target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn-amd-amdhsa" define void @foo(ptr %0) { diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir index c99f2954d7613..a21e6d61a5618 100644 --- a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir @@ -3,7 +3,7 @@ // The aim of the test is to check the LLVM IR codegen for the device // for omp target parallel construct -module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true, omp.target = #omp.target} { +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true, omp.target = #omp.target} { llvm.func @_QQmain_omp_outline_1(%arg0: !llvm.ptr) attributes {omp.declare_target = #omp.declaretarget, omp.outline_parent_name = "_QQmain"} { %0 = omp.map_info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = "d"} omp.target map_entries(%0 -> %arg2 : !llvm.ptr) {