From d0d754330cf44f22db57c1e8367ff20aebc3e312 Mon Sep 17 00:00:00 2001 From: Jeffrey Byrnes Date: Tue, 24 Oct 2023 13:18:17 -0700 Subject: [PATCH] [ValueTracking] Allow tracking values through Integral AddrSpaceCasts Change-Id: Ia9aa4cebb99e8f5631b8ecd69367b1f1cf07c50c --- llvm/include/llvm/Support/KnownBits.h | 8 + llvm/lib/Analysis/ValueTracking.cpp | 28 + .../CodeGen/AMDGPU/memcpy-crash-issue63986.ll | 162 +++--- .../InferAlignment/addrspacecast_amdgpu.ll | 523 ++++++++++++++++++ .../InferAlignment/addrspacecast_x86.ll | 394 +++++++++++++ .../InstCombine/memcpy-from-global.ll | 4 +- .../AMDGPU/align-before-vectorize.ll | 46 ++ 7 files changed, 1081 insertions(+), 84 deletions(-) create mode 100644 llvm/test/Transforms/InferAlignment/addrspacecast_amdgpu.ll create mode 100644 llvm/test/Transforms/InferAlignment/addrspacecast_x86.ll create mode 100644 llvm/test/Transforms/PhaseOrdering/AMDGPU/align-before-vectorize.ll diff --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h index fb034e0b9e3ba..545a694f612fa 100644 --- a/llvm/include/llvm/Support/KnownBits.h +++ b/llvm/include/llvm/Support/KnownBits.h @@ -148,6 +148,14 @@ struct KnownBits { return Max; } + KnownBits clearLowBits(unsigned BitWidth) { + APInt NewZero = Zero; + APInt NewOne = One; + NewZero.clearLowBits(BitWidth); + NewOne.clearLowBits(BitWidth); + return KnownBits(NewZero, NewOne); + } + /// Return known bits for a truncation of the value we're tracking. KnownBits trunc(unsigned BitWidth) const { return KnownBits(Zero.trunc(BitWidth), One.trunc(BitWidth)); diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index c303d261107eb..db0f2e7203841 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1775,6 +1775,34 @@ static void computeKnownBitsFromOperator(const Operator *I, Depth + 1)) computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); break; + case Instruction::AddrSpaceCast: { + auto ASC = cast(I); + unsigned SrcAS = ASC->getSrcAddressSpace(); + unsigned DestAS = ASC->getDestAddressSpace(); + + auto DL = Q.DL; + if (DL.isNonIntegralAddressSpace(SrcAS) || + DL.isNonIntegralAddressSpace(DestAS)) + break; + + auto SrcSize = DL.getPointerSizeInBits(SrcAS); + auto DstSize = DL.getPointerSizeInBits(DestAS); + + if (DstSize > SrcSize) { + Known2 = Known; + Known2 = Known2.clearLowBits(SrcSize); + Known = Known.trunc(SrcSize); + computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q); + Known = Known.anyext(DstSize); + Known = Known.unionWith(Known2); + } + + else { // DstSize <= SrcSize + Known = Known.anyext(SrcSize); + computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q); + Known = Known.trunc(DstSize); + } + } } } diff --git a/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll b/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll index 0e6c1aecb6774..73dbb4b72f03f 100644 --- a/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll +++ b/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll @@ -9,70 +9,68 @@ define void @issue63986(i64 %0, i64 %idxprom) { ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: v_lshlrev_b64 v[4:5], 6, v[2:3] ; CHECK-NEXT: s_mov_b64 s[4:5], 0 -; CHECK-NEXT: ; %bb.1: ; %loop-memcpy-expansion.preheader -; CHECK-NEXT: v_lshlrev_b64 v[6:7], 6, v[2:3] ; CHECK-NEXT: s_mov_b64 s[6:7], 0 -; CHECK-NEXT: .LBB0_2: ; %loop-memcpy-expansion +; CHECK-NEXT: .LBB0_1: ; %loop-memcpy-expansion ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: v_mov_b32_e32 v9, s7 -; CHECK-NEXT: v_mov_b32_e32 v8, s6 -; CHECK-NEXT: flat_load_ubyte v10, v[8:9] offset:5 -; CHECK-NEXT: flat_load_ubyte v11, v[8:9] offset:6 -; CHECK-NEXT: flat_load_ubyte v12, v[8:9] offset:7 -; CHECK-NEXT: flat_load_ubyte v13, v[8:9] offset:3 -; CHECK-NEXT: flat_load_ubyte v14, v[8:9] offset:2 -; CHECK-NEXT: flat_load_ubyte v15, v[8:9] offset:1 -; CHECK-NEXT: flat_load_ubyte v16, v[8:9] -; CHECK-NEXT: flat_load_ubyte v17, v[8:9] offset:4 -; CHECK-NEXT: flat_load_ubyte v18, v[8:9] offset:13 -; CHECK-NEXT: flat_load_ubyte v19, v[8:9] offset:14 -; CHECK-NEXT: flat_load_ubyte v20, v[8:9] offset:15 -; CHECK-NEXT: flat_load_ubyte v21, v[8:9] offset:11 -; CHECK-NEXT: flat_load_ubyte v22, v[8:9] offset:10 -; CHECK-NEXT: flat_load_ubyte v23, v[8:9] offset:9 -; CHECK-NEXT: flat_load_ubyte v24, v[8:9] offset:8 -; CHECK-NEXT: flat_load_ubyte v25, v[8:9] offset:12 +; CHECK-NEXT: v_mov_b32_e32 v6, s6 +; CHECK-NEXT: v_mov_b32_e32 v7, s7 +; CHECK-NEXT: flat_load_ubyte v8, v[6:7] offset:5 +; CHECK-NEXT: flat_load_ubyte v9, v[6:7] offset:6 +; CHECK-NEXT: flat_load_ubyte v10, v[6:7] offset:7 +; CHECK-NEXT: flat_load_ubyte v11, v[6:7] offset:3 +; CHECK-NEXT: flat_load_ubyte v12, v[6:7] offset:2 +; CHECK-NEXT: flat_load_ubyte v13, v[6:7] offset:1 +; CHECK-NEXT: flat_load_ubyte v14, v[6:7] +; CHECK-NEXT: flat_load_ubyte v15, v[6:7] offset:4 +; CHECK-NEXT: flat_load_ubyte v16, v[6:7] offset:13 +; CHECK-NEXT: flat_load_ubyte v17, v[6:7] offset:14 +; CHECK-NEXT: flat_load_ubyte v18, v[6:7] offset:15 +; CHECK-NEXT: flat_load_ubyte v19, v[6:7] offset:11 +; CHECK-NEXT: flat_load_ubyte v20, v[6:7] offset:10 +; CHECK-NEXT: flat_load_ubyte v21, v[6:7] offset:9 +; CHECK-NEXT: flat_load_ubyte v22, v[6:7] offset:8 +; CHECK-NEXT: flat_load_ubyte v23, v[6:7] offset:12 ; CHECK-NEXT: s_add_u32 s4, s4, 1 ; CHECK-NEXT: s_addc_u32 s5, s5, 0 -; CHECK-NEXT: v_add_co_u32_e32 v8, vcc, s6, v6 +; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s6, v4 ; CHECK-NEXT: v_cmp_ge_u64_e64 s[8:9], s[4:5], 2 -; CHECK-NEXT: v_addc_co_u32_e32 v9, vcc, v9, v7, vcc +; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, v7, v5, vcc ; CHECK-NEXT: s_add_u32 s6, s6, 16 ; CHECK-NEXT: s_addc_u32 s7, s7, 0 ; CHECK-NEXT: s_and_b64 vcc, exec, s[8:9] ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; CHECK-NEXT: flat_store_byte v[8:9], v13 offset:3 -; CHECK-NEXT: flat_store_byte v[8:9], v14 offset:2 -; CHECK-NEXT: flat_store_byte v[8:9], v15 offset:1 -; CHECK-NEXT: flat_store_byte v[8:9], v16 -; CHECK-NEXT: flat_store_byte v[8:9], v12 offset:7 -; CHECK-NEXT: flat_store_byte v[8:9], v11 offset:6 -; CHECK-NEXT: flat_store_byte v[8:9], v10 offset:5 -; CHECK-NEXT: flat_store_byte v[8:9], v17 offset:4 -; CHECK-NEXT: flat_store_byte v[8:9], v21 offset:11 -; CHECK-NEXT: flat_store_byte v[8:9], v22 offset:10 -; CHECK-NEXT: flat_store_byte v[8:9], v23 offset:9 -; CHECK-NEXT: flat_store_byte v[8:9], v24 offset:8 -; CHECK-NEXT: flat_store_byte v[8:9], v20 offset:15 -; CHECK-NEXT: flat_store_byte v[8:9], v19 offset:14 -; CHECK-NEXT: flat_store_byte v[8:9], v18 offset:13 -; CHECK-NEXT: flat_store_byte v[8:9], v25 offset:12 -; CHECK-NEXT: s_cbranch_vccz .LBB0_2 -; CHECK-NEXT: ; %bb.3: ; %loop-memcpy-residual-header +; CHECK-NEXT: flat_store_byte v[6:7], v11 offset:3 +; CHECK-NEXT: flat_store_byte v[6:7], v12 offset:2 +; CHECK-NEXT: flat_store_byte v[6:7], v13 offset:1 +; CHECK-NEXT: flat_store_byte v[6:7], v14 +; CHECK-NEXT: flat_store_byte v[6:7], v10 offset:7 +; CHECK-NEXT: flat_store_byte v[6:7], v9 offset:6 +; CHECK-NEXT: flat_store_byte v[6:7], v8 offset:5 +; CHECK-NEXT: flat_store_byte v[6:7], v15 offset:4 +; CHECK-NEXT: flat_store_byte v[6:7], v19 offset:11 +; CHECK-NEXT: flat_store_byte v[6:7], v20 offset:10 +; CHECK-NEXT: flat_store_byte v[6:7], v21 offset:9 +; CHECK-NEXT: flat_store_byte v[6:7], v22 offset:8 +; CHECK-NEXT: flat_store_byte v[6:7], v18 offset:15 +; CHECK-NEXT: flat_store_byte v[6:7], v17 offset:14 +; CHECK-NEXT: flat_store_byte v[6:7], v16 offset:13 +; CHECK-NEXT: flat_store_byte v[6:7], v23 offset:12 +; CHECK-NEXT: s_cbranch_vccz .LBB0_1 +; CHECK-NEXT: ; %bb.2: ; %loop-memcpy-residual-header ; CHECK-NEXT: s_and_b32 s4, 32, 15 ; CHECK-NEXT: s_mov_b32 s5, 0 -; CHECK-NEXT: s_cbranch_scc0 .LBB0_5 -; CHECK-NEXT: ; %bb.4: +; CHECK-NEXT: s_cbranch_scc0 .LBB0_4 +; CHECK-NEXT: ; %bb.3: ; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3 -; CHECK-NEXT: s_branch .LBB0_6 -; CHECK-NEXT: .LBB0_5: ; %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge +; CHECK-NEXT: s_branch .LBB0_5 +; CHECK-NEXT: .LBB0_4: ; %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge ; CHECK-NEXT: v_lshlrev_b64 v[2:3], 6, v[2:3] -; CHECK-NEXT: s_cbranch_execnz .LBB0_9 -; CHECK-NEXT: .LBB0_6: ; %loop-memcpy-residual.preheader +; CHECK-NEXT: s_cbranch_execnz .LBB0_8 +; CHECK-NEXT: .LBB0_5: ; %loop-memcpy-residual.preheader ; CHECK-NEXT: v_or_b32_e32 v2, 32, v4 ; CHECK-NEXT: v_mov_b32_e32 v3, v5 ; CHECK-NEXT: s_mov_b64 s[6:7], 0 -; CHECK-NEXT: .LBB0_7: ; %loop-memcpy-residual +; CHECK-NEXT: .LBB0_6: ; %loop-memcpy-residual ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: s_add_u32 s8, 32, s6 ; CHECK-NEXT: s_addc_u32 s9, 0, s7 @@ -85,15 +83,15 @@ define void @issue63986(i64 %0, i64 %idxprom) { ; CHECK-NEXT: s_add_u32 s6, s6, 1 ; CHECK-NEXT: v_mov_b32_e32 v6, s4 ; CHECK-NEXT: v_addc_co_u32_e32 v9, vcc, v3, v9, vcc -; CHECK-NEXT: s_addc_u32 s7, s7, 0 +; CHECK-NEXT: s_addc_u32 s7, 0, s7 ; CHECK-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[6:7] ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CHECK-NEXT: flat_store_byte v[8:9], v10 -; CHECK-NEXT: s_cbranch_vccnz .LBB0_7 -; CHECK-NEXT: ; %bb.8: +; CHECK-NEXT: s_cbranch_vccnz .LBB0_6 +; CHECK-NEXT: ; %bb.7: ; CHECK-NEXT: v_mov_b32_e32 v2, v4 ; CHECK-NEXT: v_mov_b32_e32 v3, v5 -; CHECK-NEXT: .LBB0_9: ; %post-loop-memcpy-expansion +; CHECK-NEXT: .LBB0_8: ; %post-loop-memcpy-expansion ; CHECK-NEXT: v_lshrrev_b64 v[4:5], 4, v[0:1] ; CHECK-NEXT: v_and_b32_e32 v6, 15, v0 ; CHECK-NEXT: v_mov_b32_e32 v7, 0 @@ -102,28 +100,28 @@ define void @issue63986(i64 %0, i64 %idxprom) { ; CHECK-NEXT: v_cmp_ne_u64_e64 s[6:7], 0, v[6:7] ; CHECK-NEXT: v_add_co_u32_e32 v8, vcc, v2, v0 ; CHECK-NEXT: v_addc_co_u32_e32 v9, vcc, v3, v1, vcc -; CHECK-NEXT: s_branch .LBB0_12 -; CHECK-NEXT: .LBB0_10: ; %Flow19 -; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1 +; CHECK-NEXT: s_branch .LBB0_11 +; CHECK-NEXT: .LBB0_9: ; %Flow19 +; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1 ; CHECK-NEXT: s_or_b64 exec, exec, s[10:11] ; CHECK-NEXT: s_mov_b64 s[8:9], 0 -; CHECK-NEXT: .LBB0_11: ; %Flow21 -; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1 +; CHECK-NEXT: .LBB0_10: ; %Flow21 +; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1 ; CHECK-NEXT: s_andn2_b64 vcc, exec, s[8:9] -; CHECK-NEXT: s_cbranch_vccz .LBB0_20 -; CHECK-NEXT: .LBB0_12: ; %while.cond +; CHECK-NEXT: s_cbranch_vccz .LBB0_19 +; CHECK-NEXT: .LBB0_11: ; %while.cond ; CHECK-NEXT: ; =>This Loop Header: Depth=1 -; CHECK-NEXT: ; Child Loop BB0_14 Depth 2 -; CHECK-NEXT: ; Child Loop BB0_18 Depth 2 +; CHECK-NEXT: ; Child Loop BB0_13 Depth 2 +; CHECK-NEXT: ; Child Loop BB0_17 Depth 2 ; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] -; CHECK-NEXT: s_cbranch_execz .LBB0_15 -; CHECK-NEXT: ; %bb.13: ; %loop-memcpy-expansion2.preheader -; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1 +; CHECK-NEXT: s_cbranch_execz .LBB0_14 +; CHECK-NEXT: ; %bb.12: ; %loop-memcpy-expansion2.preheader +; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1 ; CHECK-NEXT: s_mov_b64 s[10:11], 0 ; CHECK-NEXT: s_mov_b64 s[12:13], 0 ; CHECK-NEXT: s_mov_b64 s[14:15], 0 -; CHECK-NEXT: .LBB0_14: ; %loop-memcpy-expansion2 -; CHECK-NEXT: ; Parent Loop BB0_12 Depth=1 +; CHECK-NEXT: .LBB0_13: ; %loop-memcpy-expansion2 +; CHECK-NEXT: ; Parent Loop BB0_11 Depth=1 ; CHECK-NEXT: ; => This Inner Loop Header: Depth=2 ; CHECK-NEXT: v_mov_b32_e32 v10, s10 ; CHECK-NEXT: v_mov_b32_e32 v11, s11 @@ -169,23 +167,23 @@ define void @issue63986(i64 %0, i64 %idxprom) { ; CHECK-NEXT: flat_store_byte v[10:11], v20 offset:13 ; CHECK-NEXT: flat_store_byte v[10:11], v27 offset:12 ; CHECK-NEXT: s_andn2_b64 exec, exec, s[12:13] -; CHECK-NEXT: s_cbranch_execnz .LBB0_14 -; CHECK-NEXT: .LBB0_15: ; %Flow20 -; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1 +; CHECK-NEXT: s_cbranch_execnz .LBB0_13 +; CHECK-NEXT: .LBB0_14: ; %Flow20 +; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1 ; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] ; CHECK-NEXT: s_mov_b64 s[8:9], -1 -; CHECK-NEXT: s_cbranch_execz .LBB0_11 -; CHECK-NEXT: ; %bb.16: ; %loop-memcpy-residual-header5 -; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1 +; CHECK-NEXT: s_cbranch_execz .LBB0_10 +; CHECK-NEXT: ; %bb.15: ; %loop-memcpy-residual-header5 +; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1 ; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[6:7] ; CHECK-NEXT: s_xor_b64 s[10:11], exec, s[8:9] -; CHECK-NEXT: s_cbranch_execz .LBB0_10 -; CHECK-NEXT: ; %bb.17: ; %loop-memcpy-residual4.preheader -; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1 +; CHECK-NEXT: s_cbranch_execz .LBB0_9 +; CHECK-NEXT: ; %bb.16: ; %loop-memcpy-residual4.preheader +; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1 ; CHECK-NEXT: s_mov_b64 s[12:13], 0 ; CHECK-NEXT: s_mov_b64 s[14:15], 0 -; CHECK-NEXT: .LBB0_18: ; %loop-memcpy-residual4 -; CHECK-NEXT: ; Parent Loop BB0_12 Depth=1 +; CHECK-NEXT: .LBB0_17: ; %loop-memcpy-residual4 +; CHECK-NEXT: ; Parent Loop BB0_11 Depth=1 ; CHECK-NEXT: ; => This Inner Loop Header: Depth=2 ; CHECK-NEXT: v_mov_b32_e32 v12, s15 ; CHECK-NEXT: v_add_co_u32_e32 v10, vcc, s14, v0 @@ -200,12 +198,12 @@ define void @issue63986(i64 %0, i64 %idxprom) { ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CHECK-NEXT: flat_store_byte v[10:11], v13 ; CHECK-NEXT: s_andn2_b64 exec, exec, s[12:13] -; CHECK-NEXT: s_cbranch_execnz .LBB0_18 -; CHECK-NEXT: ; %bb.19: ; %Flow -; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1 +; CHECK-NEXT: s_cbranch_execnz .LBB0_17 +; CHECK-NEXT: ; %bb.18: ; %Flow +; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1 ; CHECK-NEXT: s_or_b64 exec, exec, s[12:13] -; CHECK-NEXT: s_branch .LBB0_10 -; CHECK-NEXT: .LBB0_20: ; %DummyReturnBlock +; CHECK-NEXT: s_branch .LBB0_9 +; CHECK-NEXT: .LBB0_19: ; %DummyReturnBlock ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: diff --git a/llvm/test/Transforms/InferAlignment/addrspacecast_amdgpu.ll b/llvm/test/Transforms/InferAlignment/addrspacecast_amdgpu.ll new file mode 100644 index 0000000000000..dff71098ae236 --- /dev/null +++ b/llvm/test/Transforms/InferAlignment/addrspacecast_amdgpu.ll @@ -0,0 +1,523 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -passes='infer-alignment' -mtriple=amdgcn -mcpu=gfx90a -S < %s | FileCheck -check-prefix=AMDGPU %s +; RUN: opt -passes='infer-alignment' -mcpu=corei7 -mtriple=x86_64-linux -S < %s | FileCheck -check-prefix=X86 %s + +$globalArrayAS0 = comdat any +$globalArrayAS1 = comdat any +$globalArrayAS2 = comdat any +$globalArrayAS3 = comdat any +$globalArrayAS4 = comdat any +$globalArrayAS5 = comdat any +$globalArrayAS6 = comdat any +$globalArrayAS7 = comdat any +$globalArrayAS8 = comdat any +@globalArrayAS0 = linkonce_odr hidden addrspace(0) global [4096 x i8] undef, comdat, align 16 +@globalArrayAS1 = linkonce_odr hidden addrspace(1) global [4096 x i8] undef, comdat, align 16 +@globalArrayAS2 = linkonce_odr hidden addrspace(2) global [4096 x i8] undef, comdat, align 16 +@globalArrayAS3 = linkonce_odr hidden addrspace(3) global [4096 x i8] undef, comdat, align 16 +@globalArrayAS4 = linkonce_odr hidden addrspace(4) global [4096 x i8] undef, comdat, align 16 +@globalArrayAS5 = linkonce_odr hidden addrspace(5) global [4096 x i8] undef, comdat, align 16 +@globalArrayAS6 = linkonce_odr hidden addrspace(6) global [4096 x i8] undef, comdat, align 16 +@globalArrayAS7 = linkonce_odr hidden addrspace(7) global [4096 x i8] undef, comdat, align 16 +@globalArrayAS8 = linkonce_odr hidden addrspace(8) global [4096 x i8] undef, comdat, align 16 + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @infer_AS10(i32 %idx) unnamed_addr align 2 { +; AMDGPU-LABEL: define amdgpu_kernel void @infer_AS10( +; AMDGPU-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] align 2 { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; AMDGPU-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; AMDGPU-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; AMDGPU-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(1) @globalArrayAS1 to ptr), i64 [[IDXPROM37_I21]] +; AMDGPU-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX38_I22]], align 16 +; AMDGPU-NEXT: ret void +; +; X86-LABEL: define amdgpu_kernel void @infer_AS10( +; X86-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] align 2 { +; X86-NEXT: entry: +; X86-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; X86-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; X86-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; X86-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(1) @globalArrayAS1 to ptr), i64 [[IDXPROM37_I21]] +; X86-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX38_I22]], align 16 +; X86-NEXT: ret void +; +entry: + %mul32.i = shl nuw nsw i32 %idx, 8 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(1) @globalArrayAS1 to ptr), i64 %idxprom37.i21 + %l1 = load float, ptr %arrayidx38.i22, align 4 + ret void +} + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @infer_AS20(i32 %idx) unnamed_addr align 2 { +; AMDGPU-LABEL: define amdgpu_kernel void @infer_AS20( +; AMDGPU-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; AMDGPU-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; AMDGPU-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; AMDGPU-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(2) @globalArrayAS2 to ptr), i64 [[IDXPROM37_I21]] +; AMDGPU-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX38_I22]], align 16 +; AMDGPU-NEXT: ret void +; +; X86-LABEL: define amdgpu_kernel void @infer_AS20( +; X86-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; X86-NEXT: entry: +; X86-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; X86-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; X86-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; X86-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(2) @globalArrayAS2 to ptr), i64 [[IDXPROM37_I21]] +; X86-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX38_I22]], align 16 +; X86-NEXT: ret void +; +entry: + %mul32.i = shl nuw nsw i32 %idx, 8 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(2) @globalArrayAS2 to ptr), i64 %idxprom37.i21 + %l1 = load float, ptr %arrayidx38.i22, align 4 + ret void +} + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @infer_AS30(i32 %idx) unnamed_addr align 2 { +; AMDGPU-LABEL: define amdgpu_kernel void @infer_AS30( +; AMDGPU-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; AMDGPU-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; AMDGPU-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; AMDGPU-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(3) @globalArrayAS3 to ptr), i64 [[IDXPROM37_I21]] +; AMDGPU-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX38_I22]], align 16 +; AMDGPU-NEXT: ret void +; +; X86-LABEL: define amdgpu_kernel void @infer_AS30( +; X86-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; X86-NEXT: entry: +; X86-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; X86-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; X86-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; X86-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(3) @globalArrayAS3 to ptr), i64 [[IDXPROM37_I21]] +; X86-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX38_I22]], align 16 +; X86-NEXT: ret void +; +entry: + %mul32.i = shl nuw nsw i32 %idx, 8 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(3) @globalArrayAS3 to ptr), i64 %idxprom37.i21 + %l1 = load float, ptr %arrayidx38.i22, align 4 + ret void +} + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @infer_AS40(i32 %idx) unnamed_addr align 2 { +; AMDGPU-LABEL: define amdgpu_kernel void @infer_AS40( +; AMDGPU-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; AMDGPU-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; AMDGPU-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; AMDGPU-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(4) @globalArrayAS4 to ptr), i64 [[IDXPROM37_I21]] +; AMDGPU-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX38_I22]], align 16 +; AMDGPU-NEXT: ret void +; +; X86-LABEL: define amdgpu_kernel void @infer_AS40( +; X86-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; X86-NEXT: entry: +; X86-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; X86-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; X86-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; X86-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(4) @globalArrayAS4 to ptr), i64 [[IDXPROM37_I21]] +; X86-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX38_I22]], align 16 +; X86-NEXT: ret void +; +entry: + %mul32.i = shl nuw nsw i32 %idx, 8 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(4) @globalArrayAS4 to ptr), i64 %idxprom37.i21 + %l1 = load float, ptr %arrayidx38.i22, align 4 + ret void +} + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @infer_AS50(i32 %idx) unnamed_addr align 2 { +; AMDGPU-LABEL: define amdgpu_kernel void @infer_AS50( +; AMDGPU-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; AMDGPU-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; AMDGPU-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; AMDGPU-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(5) @globalArrayAS5 to ptr), i64 [[IDXPROM37_I21]] +; AMDGPU-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX38_I22]], align 16 +; AMDGPU-NEXT: ret void +; +; X86-LABEL: define amdgpu_kernel void @infer_AS50( +; X86-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; X86-NEXT: entry: +; X86-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; X86-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; X86-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; X86-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(5) @globalArrayAS5 to ptr), i64 [[IDXPROM37_I21]] +; X86-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX38_I22]], align 16 +; X86-NEXT: ret void +; +entry: + %mul32.i = shl nuw nsw i32 %idx, 8 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(5) @globalArrayAS5 to ptr), i64 %idxprom37.i21 + %l1 = load float, ptr %arrayidx38.i22, align 4 + ret void +} + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @infer_AS60(i32 %idx) unnamed_addr align 2 { +; AMDGPU-LABEL: define amdgpu_kernel void @infer_AS60( +; AMDGPU-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; AMDGPU-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; AMDGPU-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; AMDGPU-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(6) @globalArrayAS6 to ptr), i64 [[IDXPROM37_I21]] +; AMDGPU-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX38_I22]], align 16 +; AMDGPU-NEXT: ret void +; +; X86-LABEL: define amdgpu_kernel void @infer_AS60( +; X86-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; X86-NEXT: entry: +; X86-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; X86-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; X86-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; X86-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(6) @globalArrayAS6 to ptr), i64 [[IDXPROM37_I21]] +; X86-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX38_I22]], align 16 +; X86-NEXT: ret void +; +entry: + %mul32.i = shl nuw nsw i32 %idx, 8 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(6) @globalArrayAS6 to ptr), i64 %idxprom37.i21 + %l1 = load float, ptr %arrayidx38.i22, align 4 + ret void +} + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @infer_AS70(i32 %idx) unnamed_addr align 2 { +; AMDGPU-LABEL: define amdgpu_kernel void @infer_AS70( +; AMDGPU-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; AMDGPU-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; AMDGPU-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; AMDGPU-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(7) @globalArrayAS7 to ptr), i64 [[IDXPROM37_I21]] +; AMDGPU-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX38_I22]], align 4 +; AMDGPU-NEXT: ret void +; +; X86-LABEL: define amdgpu_kernel void @infer_AS70( +; X86-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; X86-NEXT: entry: +; X86-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; X86-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; X86-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; X86-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(7) @globalArrayAS7 to ptr), i64 [[IDXPROM37_I21]] +; X86-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX38_I22]], align 16 +; X86-NEXT: ret void +; +entry: + %mul32.i = shl nuw nsw i32 %idx, 8 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(7) @globalArrayAS7 to ptr), i64 %idxprom37.i21 + %l1 = load float, ptr %arrayidx38.i22, align 4 + ret void +} + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @infer_AS80(i32 %idx) unnamed_addr align 2 { +; AMDGPU-LABEL: define amdgpu_kernel void @infer_AS80( +; AMDGPU-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; AMDGPU-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; AMDGPU-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; AMDGPU-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(8) @globalArrayAS8 to ptr), i64 [[IDXPROM37_I21]] +; AMDGPU-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX38_I22]], align 4 +; AMDGPU-NEXT: ret void +; +; X86-LABEL: define amdgpu_kernel void @infer_AS80( +; X86-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; X86-NEXT: entry: +; X86-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; X86-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; X86-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; X86-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(8) @globalArrayAS8 to ptr), i64 [[IDXPROM37_I21]] +; X86-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX38_I22]], align 16 +; X86-NEXT: ret void +; +entry: + %mul32.i = shl nuw nsw i32 %idx, 8 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(8) @globalArrayAS8 to ptr), i64 %idxprom37.i21 + %l1 = load float, ptr %arrayidx38.i22, align 4 + ret void +} + + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @infer_AS01(i32 %idx) unnamed_addr align 2 { +; AMDGPU-LABEL: define amdgpu_kernel void @infer_AS01( +; AMDGPU-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; AMDGPU-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; AMDGPU-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; AMDGPU-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspace(1) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(1)), i64 [[IDXPROM37_I21]] +; AMDGPU-NEXT: [[L1:%.*]] = load float, ptr addrspace(1) [[ARRAYIDX38_I22]], align 16 +; AMDGPU-NEXT: ret void +; +; X86-LABEL: define amdgpu_kernel void @infer_AS01( +; X86-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; X86-NEXT: entry: +; X86-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; X86-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; X86-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; X86-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspace(1) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(1)), i64 [[IDXPROM37_I21]] +; X86-NEXT: [[L1:%.*]] = load float, ptr addrspace(1) [[ARRAYIDX38_I22]], align 16 +; X86-NEXT: ret void +; +entry: + %mul32.i = shl nuw nsw i32 %idx, 8 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspace(1) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(1)), i64 %idxprom37.i21 + %l1 = load float, ptr addrspace(1) %arrayidx38.i22, align 4 + ret void +} + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @infer_AS02(i32 %idx) unnamed_addr align 2 { +; AMDGPU-LABEL: define amdgpu_kernel void @infer_AS02( +; AMDGPU-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; AMDGPU-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; AMDGPU-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; AMDGPU-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspace(2) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(2)), i64 [[IDXPROM37_I21]] +; AMDGPU-NEXT: [[L1:%.*]] = load float, ptr addrspace(2) [[ARRAYIDX38_I22]], align 16 +; AMDGPU-NEXT: ret void +; +; X86-LABEL: define amdgpu_kernel void @infer_AS02( +; X86-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; X86-NEXT: entry: +; X86-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; X86-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; X86-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; X86-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspace(2) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(2)), i64 [[IDXPROM37_I21]] +; X86-NEXT: [[L1:%.*]] = load float, ptr addrspace(2) [[ARRAYIDX38_I22]], align 16 +; X86-NEXT: ret void +; +entry: + %mul32.i = shl nuw nsw i32 %idx, 8 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspace(2) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(2)), i64 %idxprom37.i21 + %l1 = load float, ptr addrspace(2) %arrayidx38.i22, align 4 + ret void +} + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @infer_AS03(i32 %idx) unnamed_addr align 2 { +; AMDGPU-LABEL: define amdgpu_kernel void @infer_AS03( +; AMDGPU-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; AMDGPU-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; AMDGPU-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; AMDGPU-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspace(3) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(3)), i64 [[IDXPROM37_I21]] +; AMDGPU-NEXT: [[L1:%.*]] = load float, ptr addrspace(3) [[ARRAYIDX38_I22]], align 16 +; AMDGPU-NEXT: ret void +; +; X86-LABEL: define amdgpu_kernel void @infer_AS03( +; X86-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; X86-NEXT: entry: +; X86-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; X86-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; X86-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; X86-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspace(3) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(3)), i64 [[IDXPROM37_I21]] +; X86-NEXT: [[L1:%.*]] = load float, ptr addrspace(3) [[ARRAYIDX38_I22]], align 16 +; X86-NEXT: ret void +; +entry: + %mul32.i = shl nuw nsw i32 %idx, 8 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspace(3) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(3)), i64 %idxprom37.i21 + %l1 = load float, ptr addrspace(3) %arrayidx38.i22, align 4 + ret void +} + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @infer_AS04(i32 %idx) unnamed_addr align 2 { +; AMDGPU-LABEL: define amdgpu_kernel void @infer_AS04( +; AMDGPU-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; AMDGPU-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; AMDGPU-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; AMDGPU-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspace(4) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(4)), i64 [[IDXPROM37_I21]] +; AMDGPU-NEXT: [[L1:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX38_I22]], align 16 +; AMDGPU-NEXT: ret void +; +; X86-LABEL: define amdgpu_kernel void @infer_AS04( +; X86-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; X86-NEXT: entry: +; X86-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; X86-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; X86-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; X86-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspace(4) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(4)), i64 [[IDXPROM37_I21]] +; X86-NEXT: [[L1:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX38_I22]], align 16 +; X86-NEXT: ret void +; +entry: + %mul32.i = shl nuw nsw i32 %idx, 8 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspace(4) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(4)), i64 %idxprom37.i21 + %l1 = load float, ptr addrspace(4) %arrayidx38.i22, align 4 + ret void +} + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @infer_AS05(i32 %idx) unnamed_addr align 2 { +; AMDGPU-LABEL: define amdgpu_kernel void @infer_AS05( +; AMDGPU-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; AMDGPU-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; AMDGPU-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; AMDGPU-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspace(5) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(5)), i64 [[IDXPROM37_I21]] +; AMDGPU-NEXT: [[L1:%.*]] = load float, ptr addrspace(5) [[ARRAYIDX38_I22]], align 16 +; AMDGPU-NEXT: ret void +; +; X86-LABEL: define amdgpu_kernel void @infer_AS05( +; X86-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; X86-NEXT: entry: +; X86-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; X86-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; X86-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; X86-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspace(5) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(5)), i64 [[IDXPROM37_I21]] +; X86-NEXT: [[L1:%.*]] = load float, ptr addrspace(5) [[ARRAYIDX38_I22]], align 16 +; X86-NEXT: ret void +; +entry: + %mul32.i = shl nuw nsw i32 %idx, 8 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspace(5) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(5)), i64 %idxprom37.i21 + %l1 = load float, ptr addrspace(5) %arrayidx38.i22, align 4 + ret void +} + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @infer_AS06(i32 %idx) unnamed_addr align 2 { +; AMDGPU-LABEL: define amdgpu_kernel void @infer_AS06( +; AMDGPU-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; AMDGPU-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; AMDGPU-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; AMDGPU-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspace(6) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(6)), i64 [[IDXPROM37_I21]] +; AMDGPU-NEXT: [[L1:%.*]] = load float, ptr addrspace(6) [[ARRAYIDX38_I22]], align 16 +; AMDGPU-NEXT: ret void +; +; X86-LABEL: define amdgpu_kernel void @infer_AS06( +; X86-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; X86-NEXT: entry: +; X86-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; X86-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; X86-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; X86-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspace(6) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(6)), i64 [[IDXPROM37_I21]] +; X86-NEXT: [[L1:%.*]] = load float, ptr addrspace(6) [[ARRAYIDX38_I22]], align 16 +; X86-NEXT: ret void +; +entry: + %mul32.i = shl nuw nsw i32 %idx, 8 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspace(6) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(6)), i64 %idxprom37.i21 + %l1 = load float, ptr addrspace(6) %arrayidx38.i22, align 4 + ret void +} + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @infer_AS07(i32 %idx) unnamed_addr align 2 { +; AMDGPU-LABEL: define amdgpu_kernel void @infer_AS07( +; AMDGPU-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; AMDGPU-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; AMDGPU-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; AMDGPU-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspace(7) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(7)), i64 [[IDXPROM37_I21]] +; AMDGPU-NEXT: [[L1:%.*]] = load float, ptr addrspace(7) [[ARRAYIDX38_I22]], align 4 +; AMDGPU-NEXT: ret void +; +; X86-LABEL: define amdgpu_kernel void @infer_AS07( +; X86-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; X86-NEXT: entry: +; X86-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; X86-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; X86-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; X86-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspace(7) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(7)), i64 [[IDXPROM37_I21]] +; X86-NEXT: [[L1:%.*]] = load float, ptr addrspace(7) [[ARRAYIDX38_I22]], align 16 +; X86-NEXT: ret void +; +entry: + %mul32.i = shl nuw nsw i32 %idx, 8 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspace(7) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(7)), i64 %idxprom37.i21 + %l1 = load float, ptr addrspace(7) %arrayidx38.i22, align 4 + ret void +} + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @infer_AS08(i32 %idx) unnamed_addr align 2 { +; AMDGPU-LABEL: define amdgpu_kernel void @infer_AS08( +; AMDGPU-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; AMDGPU-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; AMDGPU-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; AMDGPU-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspace(8) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(8)), i64 [[IDXPROM37_I21]] +; AMDGPU-NEXT: [[L1:%.*]] = load float, ptr addrspace(8) [[ARRAYIDX38_I22]], align 4 +; AMDGPU-NEXT: ret void +; +; X86-LABEL: define amdgpu_kernel void @infer_AS08( +; X86-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; X86-NEXT: entry: +; X86-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; X86-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; X86-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; X86-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspace(8) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(8)), i64 [[IDXPROM37_I21]] +; X86-NEXT: [[L1:%.*]] = load float, ptr addrspace(8) [[ARRAYIDX38_I22]], align 16 +; X86-NEXT: ret void +; +entry: + %mul32.i = shl nuw nsw i32 %idx, 8 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspace(8) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(8)), i64 %idxprom37.i21 + %l1 = load float, ptr addrspace(8) %arrayidx38.i22, align 4 + ret void +} + + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/Transforms/InferAlignment/addrspacecast_x86.ll b/llvm/test/Transforms/InferAlignment/addrspacecast_x86.ll new file mode 100644 index 0000000000000..37914bca04c8b --- /dev/null +++ b/llvm/test/Transforms/InferAlignment/addrspacecast_x86.ll @@ -0,0 +1,394 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -passes='infer-alignment' -mtriple=amdgcn -mcpu=gfx90a -S < %s | FileCheck -check-prefix=AMDGPU %s +; RUN: opt -passes='infer-alignment' -mcpu=corei7 -mtriple=x86_64-linux -S < %s | FileCheck -check-prefix=X86 %s + +$globalArrayAS0 = comdat any +$globalArrayAS256 = comdat any +$globalArrayAS257 = comdat any +$globalArrayAS258 = comdat any +$globalArrayAS270 = comdat any +$globalArrayAS271 = comdat any +$globalArrayAS272 = comdat any +@globalArrayAS0 = linkonce_odr hidden addrspace(0) global [4096 x i8] undef, comdat, align 16 +@globalArrayAS256 = linkonce_odr hidden addrspace(256) global [4096 x i8] undef, comdat, align 16 +@globalArrayAS257 = linkonce_odr hidden addrspace(257) global [4096 x i8] undef, comdat, align 16 +@globalArrayAS258 = linkonce_odr hidden addrspace(258) global [4096 x i8] undef, comdat, align 16 +@globalArrayAS270 = linkonce_odr hidden addrspace(270) global [4096 x i8] undef, comdat, align 16 +@globalArrayAS271 = linkonce_odr hidden addrspace(271) global [4096 x i8] undef, comdat, align 16 +@globalArrayAS272 = linkonce_odr hidden addrspace(272) global [4096 x i8] undef, comdat, align 16 + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @infer_AS10(i32 %idx) unnamed_addr align 2 { +; AMDGPU-LABEL: define amdgpu_kernel void @infer_AS10( +; AMDGPU-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] align 2 { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; AMDGPU-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; AMDGPU-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; AMDGPU-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(256) @globalArrayAS256 to ptr), i64 [[IDXPROM37_I21]] +; AMDGPU-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX38_I22]], align 16 +; AMDGPU-NEXT: ret void +; +; X86-LABEL: define amdgpu_kernel void @infer_AS10( +; X86-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] align 2 { +; X86-NEXT: entry: +; X86-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; X86-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; X86-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; X86-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(256) @globalArrayAS256 to ptr), i64 [[IDXPROM37_I21]] +; X86-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX38_I22]], align 16 +; X86-NEXT: ret void +; +entry: + %mul32.i = shl nuw nsw i32 %idx, 8 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(256) @globalArrayAS256 to ptr), i64 %idxprom37.i21 + %l1 = load float, ptr %arrayidx38.i22, align 4 + ret void +} + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @infer_AS20(i32 %idx) unnamed_addr align 2 { +; AMDGPU-LABEL: define amdgpu_kernel void @infer_AS20( +; AMDGPU-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; AMDGPU-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; AMDGPU-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; AMDGPU-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(257) @globalArrayAS257 to ptr), i64 [[IDXPROM37_I21]] +; AMDGPU-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX38_I22]], align 16 +; AMDGPU-NEXT: ret void +; +; X86-LABEL: define amdgpu_kernel void @infer_AS20( +; X86-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; X86-NEXT: entry: +; X86-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; X86-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; X86-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; X86-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(257) @globalArrayAS257 to ptr), i64 [[IDXPROM37_I21]] +; X86-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX38_I22]], align 16 +; X86-NEXT: ret void +; +entry: + %mul32.i = shl nuw nsw i32 %idx, 8 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(257) @globalArrayAS257 to ptr), i64 %idxprom37.i21 + %l1 = load float, ptr %arrayidx38.i22, align 4 + ret void +} + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @infer_AS30(i32 %idx) unnamed_addr align 2 { +; AMDGPU-LABEL: define amdgpu_kernel void @infer_AS30( +; AMDGPU-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; AMDGPU-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; AMDGPU-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; AMDGPU-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(258) @globalArrayAS258 to ptr), i64 [[IDXPROM37_I21]] +; AMDGPU-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX38_I22]], align 16 +; AMDGPU-NEXT: ret void +; +; X86-LABEL: define amdgpu_kernel void @infer_AS30( +; X86-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; X86-NEXT: entry: +; X86-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; X86-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; X86-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; X86-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(258) @globalArrayAS258 to ptr), i64 [[IDXPROM37_I21]] +; X86-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX38_I22]], align 16 +; X86-NEXT: ret void +; +entry: + %mul32.i = shl nuw nsw i32 %idx, 8 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(258) @globalArrayAS258 to ptr), i64 %idxprom37.i21 + %l1 = load float, ptr %arrayidx38.i22, align 4 + ret void +} + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @infer_AS40(i32 %idx) unnamed_addr align 2 { +; AMDGPU-LABEL: define amdgpu_kernel void @infer_AS40( +; AMDGPU-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; AMDGPU-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; AMDGPU-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; AMDGPU-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(270) @globalArrayAS270 to ptr), i64 [[IDXPROM37_I21]] +; AMDGPU-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX38_I22]], align 16 +; AMDGPU-NEXT: ret void +; +; X86-LABEL: define amdgpu_kernel void @infer_AS40( +; X86-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; X86-NEXT: entry: +; X86-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; X86-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; X86-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; X86-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(270) @globalArrayAS270 to ptr), i64 [[IDXPROM37_I21]] +; X86-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX38_I22]], align 16 +; X86-NEXT: ret void +; +entry: + %mul32.i = shl nuw nsw i32 %idx, 8 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(270) @globalArrayAS270 to ptr), i64 %idxprom37.i21 + %l1 = load float, ptr %arrayidx38.i22, align 4 + ret void +} + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @infer_AS50(i32 %idx) unnamed_addr align 2 { +; AMDGPU-LABEL: define amdgpu_kernel void @infer_AS50( +; AMDGPU-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; AMDGPU-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; AMDGPU-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; AMDGPU-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(271) @globalArrayAS271 to ptr), i64 [[IDXPROM37_I21]] +; AMDGPU-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX38_I22]], align 16 +; AMDGPU-NEXT: ret void +; +; X86-LABEL: define amdgpu_kernel void @infer_AS50( +; X86-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; X86-NEXT: entry: +; X86-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; X86-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; X86-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; X86-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(271) @globalArrayAS271 to ptr), i64 [[IDXPROM37_I21]] +; X86-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX38_I22]], align 16 +; X86-NEXT: ret void +; +entry: + %mul32.i = shl nuw nsw i32 %idx, 8 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(271) @globalArrayAS271 to ptr), i64 %idxprom37.i21 + %l1 = load float, ptr %arrayidx38.i22, align 4 + ret void +} + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @infer_AS60(i32 %idx) unnamed_addr align 2 { +; AMDGPU-LABEL: define amdgpu_kernel void @infer_AS60( +; AMDGPU-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; AMDGPU-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; AMDGPU-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; AMDGPU-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(272) @globalArrayAS272 to ptr), i64 [[IDXPROM37_I21]] +; AMDGPU-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX38_I22]], align 16 +; AMDGPU-NEXT: ret void +; +; X86-LABEL: define amdgpu_kernel void @infer_AS60( +; X86-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; X86-NEXT: entry: +; X86-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; X86-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; X86-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; X86-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(272) @globalArrayAS272 to ptr), i64 [[IDXPROM37_I21]] +; X86-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX38_I22]], align 16 +; X86-NEXT: ret void +; +entry: + %mul32.i = shl nuw nsw i32 %idx, 8 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(272) @globalArrayAS272 to ptr), i64 %idxprom37.i21 + %l1 = load float, ptr %arrayidx38.i22, align 4 + ret void +} + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @infer_AS01(i32 %idx) unnamed_addr align 2 { +; AMDGPU-LABEL: define amdgpu_kernel void @infer_AS01( +; AMDGPU-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; AMDGPU-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; AMDGPU-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; AMDGPU-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspace(256) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(256)), i64 [[IDXPROM37_I21]] +; AMDGPU-NEXT: [[L1:%.*]] = load float, ptr addrspace(256) [[ARRAYIDX38_I22]], align 16 +; AMDGPU-NEXT: ret void +; +; X86-LABEL: define amdgpu_kernel void @infer_AS01( +; X86-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; X86-NEXT: entry: +; X86-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; X86-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; X86-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; X86-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspace(256) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(256)), i64 [[IDXPROM37_I21]] +; X86-NEXT: [[L1:%.*]] = load float, ptr addrspace(256) [[ARRAYIDX38_I22]], align 16 +; X86-NEXT: ret void +; +entry: + %mul32.i = shl nuw nsw i32 %idx, 8 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspace(256) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(256)), i64 %idxprom37.i21 + %l1 = load float, ptr addrspace(256) %arrayidx38.i22, align 4 + ret void +} + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @infer_AS02(i32 %idx) unnamed_addr align 2 { +; AMDGPU-LABEL: define amdgpu_kernel void @infer_AS02( +; AMDGPU-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; AMDGPU-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; AMDGPU-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; AMDGPU-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspace(257) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(257)), i64 [[IDXPROM37_I21]] +; AMDGPU-NEXT: [[L1:%.*]] = load float, ptr addrspace(257) [[ARRAYIDX38_I22]], align 16 +; AMDGPU-NEXT: ret void +; +; X86-LABEL: define amdgpu_kernel void @infer_AS02( +; X86-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; X86-NEXT: entry: +; X86-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; X86-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; X86-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; X86-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspace(257) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(257)), i64 [[IDXPROM37_I21]] +; X86-NEXT: [[L1:%.*]] = load float, ptr addrspace(257) [[ARRAYIDX38_I22]], align 16 +; X86-NEXT: ret void +; +entry: + %mul32.i = shl nuw nsw i32 %idx, 8 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspace(257) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(257)), i64 %idxprom37.i21 + %l1 = load float, ptr addrspace(257) %arrayidx38.i22, align 4 + ret void +} + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @infer_AS03(i32 %idx) unnamed_addr align 2 { +; AMDGPU-LABEL: define amdgpu_kernel void @infer_AS03( +; AMDGPU-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; AMDGPU-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; AMDGPU-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; AMDGPU-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspace(258) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(258)), i64 [[IDXPROM37_I21]] +; AMDGPU-NEXT: [[L1:%.*]] = load float, ptr addrspace(258) [[ARRAYIDX38_I22]], align 16 +; AMDGPU-NEXT: ret void +; +; X86-LABEL: define amdgpu_kernel void @infer_AS03( +; X86-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; X86-NEXT: entry: +; X86-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; X86-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; X86-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; X86-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspace(258) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(258)), i64 [[IDXPROM37_I21]] +; X86-NEXT: [[L1:%.*]] = load float, ptr addrspace(258) [[ARRAYIDX38_I22]], align 16 +; X86-NEXT: ret void +; +entry: + %mul32.i = shl nuw nsw i32 %idx, 8 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspace(258) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(258)), i64 %idxprom37.i21 + %l1 = load float, ptr addrspace(258) %arrayidx38.i22, align 4 + ret void +} + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @infer_AS04(i32 %idx) unnamed_addr align 2 { +; AMDGPU-LABEL: define amdgpu_kernel void @infer_AS04( +; AMDGPU-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; AMDGPU-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; AMDGPU-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; AMDGPU-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspace(270) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(270)), i64 [[IDXPROM37_I21]] +; AMDGPU-NEXT: [[L1:%.*]] = load float, ptr addrspace(270) [[ARRAYIDX38_I22]], align 16 +; AMDGPU-NEXT: ret void +; +; X86-LABEL: define amdgpu_kernel void @infer_AS04( +; X86-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; X86-NEXT: entry: +; X86-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; X86-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; X86-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; X86-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspace(270) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(270)), i64 [[IDXPROM37_I21]] +; X86-NEXT: [[L1:%.*]] = load float, ptr addrspace(270) [[ARRAYIDX38_I22]], align 16 +; X86-NEXT: ret void +; +entry: + %mul32.i = shl nuw nsw i32 %idx, 8 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspace(270) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(270)), i64 %idxprom37.i21 + %l1 = load float, ptr addrspace(270) %arrayidx38.i22, align 4 + ret void +} + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @infer_AS05(i32 %idx) unnamed_addr align 2 { +; AMDGPU-LABEL: define amdgpu_kernel void @infer_AS05( +; AMDGPU-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; AMDGPU-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; AMDGPU-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; AMDGPU-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspace(271) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(271)), i64 [[IDXPROM37_I21]] +; AMDGPU-NEXT: [[L1:%.*]] = load float, ptr addrspace(271) [[ARRAYIDX38_I22]], align 16 +; AMDGPU-NEXT: ret void +; +; X86-LABEL: define amdgpu_kernel void @infer_AS05( +; X86-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; X86-NEXT: entry: +; X86-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; X86-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; X86-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; X86-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspace(271) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(271)), i64 [[IDXPROM37_I21]] +; X86-NEXT: [[L1:%.*]] = load float, ptr addrspace(271) [[ARRAYIDX38_I22]], align 16 +; X86-NEXT: ret void +; +entry: + %mul32.i = shl nuw nsw i32 %idx, 8 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspace(271) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(271)), i64 %idxprom37.i21 + %l1 = load float, ptr addrspace(271) %arrayidx38.i22, align 4 + ret void +} + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @infer_AS06(i32 %idx) unnamed_addr align 2 { +; AMDGPU-LABEL: define amdgpu_kernel void @infer_AS06( +; AMDGPU-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; AMDGPU-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; AMDGPU-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; AMDGPU-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspace(272) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(272)), i64 [[IDXPROM37_I21]] +; AMDGPU-NEXT: [[L1:%.*]] = load float, ptr addrspace(272) [[ARRAYIDX38_I22]], align 16 +; AMDGPU-NEXT: ret void +; +; X86-LABEL: define amdgpu_kernel void @infer_AS06( +; X86-SAME: i32 [[IDX:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +; X86-NEXT: entry: +; X86-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 8 +; X86-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; X86-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; X86-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspace(272) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(272)), i64 [[IDXPROM37_I21]] +; X86-NEXT: [[L1:%.*]] = load float, ptr addrspace(272) [[ARRAYIDX38_I22]], align 16 +; X86-NEXT: ret void +; +entry: + %mul32.i = shl nuw nsw i32 %idx, 8 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspace(272) addrspacecast (ptr @globalArrayAS0 to ptr addrspace(272)), i64 %idxprom37.i21 + %l1 = load float, ptr addrspace(272) %arrayidx38.i22, align 4 + ret void +} + + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/Transforms/InstCombine/memcpy-from-global.ll b/llvm/test/Transforms/InstCombine/memcpy-from-global.ll index ea9b16e1382ee..da73f591ee3f2 100644 --- a/llvm/test/Transforms/InstCombine/memcpy-from-global.ll +++ b/llvm/test/Transforms/InstCombine/memcpy-from-global.ll @@ -118,7 +118,7 @@ define void @test2_addrspacecast() { ; CHECK-LABEL: @test2_addrspacecast( ; CHECK-NEXT: [[B:%.*]] = alloca [[T:%.*]], align 8 ; CHECK-NEXT: [[B_CAST:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(1) -; CHECK-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef align 4 dereferenceable(124) [[B_CAST]], ptr addrspace(1) noundef align 4 dereferenceable(124) addrspacecast (ptr @G to ptr addrspace(1)), i64 124, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef align 8 dereferenceable(124) [[B_CAST]], ptr addrspace(1) noundef align 16 dereferenceable(124) addrspacecast (ptr @G to ptr addrspace(1)), i64 124, i1 false) ; CHECK-NEXT: call void @bar_as1(ptr addrspace(1) [[B_CAST]]) ; CHECK-NEXT: ret void ; @@ -234,7 +234,7 @@ define void @test8() { define void @test8_addrspacecast() { ; CHECK-LABEL: @test8_addrspacecast( ; CHECK-NEXT: [[AL:%.*]] = alloca [[U:%.*]], align 16 -; CHECK-NEXT: call void @llvm.memcpy.p0.p1.i64(ptr noundef nonnull align 16 dereferenceable(20) [[AL]], ptr addrspace(1) noundef align 4 dereferenceable(20) addrspacecast (ptr getelementptr inbounds ([2 x %U], ptr @H, i64 0, i64 1) to ptr addrspace(1)), i64 20, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p1.i64(ptr noundef nonnull align 16 dereferenceable(20) [[AL]], ptr addrspace(1) noundef nonnull align 4 dereferenceable(20) addrspacecast (ptr getelementptr inbounds ([2 x %U], ptr @H, i64 0, i64 1) to ptr addrspace(1)), i64 20, i1 false) ; CHECK-NEXT: call void @bar(ptr nonnull [[AL]]) #[[ATTR3]] ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/PhaseOrdering/AMDGPU/align-before-vectorize.ll b/llvm/test/Transforms/PhaseOrdering/AMDGPU/align-before-vectorize.ll new file mode 100644 index 0000000000000..4a00537858733 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/AMDGPU/align-before-vectorize.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -passes='infer-alignment,load-store-vectorizer' -S < %s | FileCheck %s + +$_ZZ11test_kernelILi256ELi4096EEvPfS0_E4smem = comdat any +@_ZZ11test_kernelILi256ELi4096EEvPfS0_E4smem = linkonce_odr hidden addrspace(3) global [4096 x i8] undef, comdat, align 16 + +; Function Attrs: alwaysinline convergent mustprogress nounwind +define amdgpu_kernel void @vectorized_load() unnamed_addr align 2 { +; CHECK-LABEL: define amdgpu_kernel void @vectorized_load() unnamed_addr align 2 { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDX:%.*]] = call noundef i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[MUL32_I:%.*]] = shl nuw nsw i32 [[IDX]], 2 +; CHECK-NEXT: [[ADD36_I:%.*]] = add nuw nsw i32 [[MUL32_I]], 1024 +; CHECK-NEXT: [[IDXPROM37_I21:%.*]] = zext i32 [[ADD36_I]] to i64 +; CHECK-NEXT: [[ARRAYIDX38_I22:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(3) @_ZZ11test_kernelILi256ELi4096EEvPfS0_E4smem to ptr), i64 [[IDXPROM37_I21]] +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[ARRAYIDX38_I22]], align 16 +; CHECK-NEXT: [[L11:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; CHECK-NEXT: [[L22:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; CHECK-NEXT: [[L33:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; CHECK-NEXT: [[L44:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; CHECK-NEXT: ret void +; +entry: + %idx = call noundef i32 @llvm.amdgcn.workitem.id.x() + %mul32.i = shl nuw nsw i32 %idx, 2 + %add36.i = add nuw nsw i32 %mul32.i, 1024 + %idxprom37.i21 = zext i32 %add36.i to i64 + %arrayidx38.i22 = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(3) @_ZZ11test_kernelILi256ELi4096EEvPfS0_E4smem to ptr), i64 %idxprom37.i21 + %l1 = load float, ptr %arrayidx38.i22, align 4 + %add45.i = add nuw nsw i32 %mul32.i, 1025 + %idxprom46.i23 = zext i32 %add45.i to i64 + %arrayidx47.i24 = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(3) @_ZZ11test_kernelILi256ELi4096EEvPfS0_E4smem to ptr), i64 %idxprom46.i23 + %l2 = load float, ptr %arrayidx47.i24, align 4 + %add54.i = add nuw nsw i32 %mul32.i, 1026 + %idxprom55.i25 = zext i32 %add54.i to i64 + %arrayidx56.i26 = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(3) @_ZZ11test_kernelILi256ELi4096EEvPfS0_E4smem to ptr), i64 %idxprom55.i25 + %l3 = load float, ptr %arrayidx56.i26, align 4 + %add63.i = add nuw nsw i32 %mul32.i, 1027 + %idxprom64.i27 = zext i32 %add63.i to i64 + %arrayidx65.i28= getelementptr inbounds float, ptr addrspacecast (ptr addrspace(3) @_ZZ11test_kernelILi256ELi4096EEvPfS0_E4smem to ptr), i64 %idxprom64.i27 + %l4 = load float, ptr %arrayidx65.i28, align 4 + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.amdgcn.workitem.id.x()