Skip to content

Commit

Permalink
[PPCGCodeGeneration] Update PPCG Code Generation for OpenCL compatibi…
Browse files Browse the repository at this point in the history
…lity

Added a small change to the way pointer arguments are set in the kernel
code generation. The way the pointer is retrieved now, specifically requests
global address space to be annotated. This is necessary, if the IR should be
run through NVPTX to generate OpenCL compatible PTX.

The changes do not affect the PTX Strings generated for the CUDA target
(nvptx64-nvidia-cuda), but are necessary for OpenCL (nvptx64-nvidia-nvcl).

Additionally, the data layout has been updated to what the NVPTX Backend requests/recommends.

Contributed-by: Philipp Schaad

Reviewers: Meinersbur, grosser, bollu

Reviewed By: grosser, bollu

Subscribers: jlebar, pollydev, llvm-commits, nemanjai, yaxunl, Anastasia

Tags: #polly

Differential Revision: https://reviews.llvm.org/D32215

llvm-svn: 301299
  • Loading branch information
bollu committed Apr 25, 2017
1 parent 561247a commit d277fed
Show file tree
Hide file tree
Showing 12 changed files with 55 additions and 49 deletions.
18 changes: 12 additions & 6 deletions polly/lib/CodeGen/PPCGCodeGeneration.cpp
Expand Up @@ -1273,12 +1273,17 @@ void GPUNodeBuilder::createKernel(__isl_take isl_ast_node *KernelStmt) {
///
/// @param is64Bit Are we looking for a 64 bit architecture?
static std::string computeNVPTXDataLayout(bool is64Bit) {
std::string Ret = "e";
std::string Ret = "";

if (!is64Bit)
Ret += "-p:32:32";

Ret += "-i64:64-v16:16-v32:32-n16:32:64";
if (!is64Bit) {
Ret += "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:"
"64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:"
"64-v128:128:128-n16:32:64";
} else {
Ret += "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:"
"64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:"
"64-v128:128:128-n16:32:64";
}

return Ret;
}
Expand All @@ -1298,7 +1303,8 @@ GPUNodeBuilder::createKernelFunctionDecl(ppcg_kernel *Kernel,
const ScopArrayInfo *SAI = ScopArrayInfo::getFromId(Id);
Args.push_back(SAI->getElementType());
} else {
Args.push_back(Builder.getInt8PtrTy());
static const int UseGlobalMemory = 1;
Args.push_back(Builder.getInt8PtrTy(UseGlobalMemory));
}
}

Expand Down
4 changes: 2 additions & 2 deletions polly/test/GPGPU/cuda-annotations.ll
Expand Up @@ -4,11 +4,11 @@

; REQUIRES: pollyacc

; KERNEL: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %n) #0 {
; KERNEL: define ptx_kernel void @kernel_0(i8 addrspace(1)* %MemRef_A, i64 %n) #0 {

; KERNEL: !nvvm.annotations = !{!0}

; KERNEL: !0 = !{void (i8*, i64)* @kernel_0, !"maxntidx", i32 32, !"maxntidy", i32 1, !"maxntidz", i32 1}
; KERNEL: !0 = !{void (i8 addrspace(1)*, i64)* @kernel_0, !"maxntidx", i32 32, !"maxntidy", i32 1, !"maxntidz", i32 1}

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

Expand Down
32 changes: 16 additions & 16 deletions polly/test/GPGPU/host-control-flow.ll
Expand Up @@ -42,7 +42,7 @@
; IR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar, 98
; IR-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit

; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %c0)
; KERNEL-IR: define ptx_kernel void @kernel_0(i8 addrspace(1)* %MemRef_A, i64 %c0)
; KERNEL-IR-LABEL: entry:
; KERNEL-IR-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
; KERNEL-IR-NEXT: %b0 = zext i32 %0 to i64
Expand All @@ -65,55 +65,55 @@
; KERNEL-IR-NEXT: br label %polly.stmt.for.body3

; KERNEL-IR-LABEL: polly.stmt.for.body3: ; preds = %polly.then
; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A = bitcast i8* %MemRef_A to float*
; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)*
; KERNEL-IR-NEXT: %pexp.pdiv_r = urem i64 %c0, 2
; KERNEL-IR-NEXT: %polly.access.mul.MemRef_A = mul nsw i64 %pexp.pdiv_r, 100
; KERNEL-IR-NEXT: %7 = mul nsw i64 32, %b0
; KERNEL-IR-NEXT: %8 = add nsw i64 %7, %t0
; KERNEL-IR-NEXT: %polly.access.add.MemRef_A = add nsw i64 %polly.access.mul.MemRef_A, %8
; KERNEL-IR-NEXT: %polly.access.MemRef_A = getelementptr float, float* %polly.access.cast.MemRef_A, i64 %polly.access.add.MemRef_A
; KERNEL-IR-NEXT: %tmp_p_scalar_ = load float, float* %polly.access.MemRef_A, align 4
; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A1 = bitcast i8* %MemRef_A to float*
; KERNEL-IR-NEXT: %polly.access.MemRef_A = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A, i64 %polly.access.add.MemRef_A
; KERNEL-IR-NEXT: %tmp_p_scalar_ = load float, float addrspace(1)* %polly.access.MemRef_A, align 4
; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A1 = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)*
; KERNEL-IR-NEXT: %pexp.pdiv_r2 = urem i64 %c0, 2
; KERNEL-IR-NEXT: %polly.access.mul.MemRef_A3 = mul nsw i64 %pexp.pdiv_r2, 100
; KERNEL-IR-NEXT: %9 = mul nsw i64 32, %b0
; KERNEL-IR-NEXT: %10 = add nsw i64 %9, %t0
; KERNEL-IR-NEXT: %11 = add nsw i64 %10, 1
; KERNEL-IR-NEXT: %polly.access.add.MemRef_A4 = add nsw i64 %polly.access.mul.MemRef_A3, %11
; KERNEL-IR-NEXT: %polly.access.MemRef_A5 = getelementptr float, float* %polly.access.cast.MemRef_A1, i64 %polly.access.add.MemRef_A4
; KERNEL-IR-NEXT: %tmp2_p_scalar_ = load float, float* %polly.access.MemRef_A5, align 4
; KERNEL-IR-NEXT: %polly.access.MemRef_A5 = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A1, i64 %polly.access.add.MemRef_A4
; KERNEL-IR-NEXT: %tmp2_p_scalar_ = load float, float addrspace(1)* %polly.access.MemRef_A5, align 4
; KERNEL-IR-NEXT: %p_add = fadd float %tmp_p_scalar_, %tmp2_p_scalar_
; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A6 = bitcast i8* %MemRef_A to float*
; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A6 = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)*
; KERNEL-IR-NEXT: %pexp.pdiv_r7 = urem i64 %c0, 2
; KERNEL-IR-NEXT: %polly.access.mul.MemRef_A8 = mul nsw i64 %pexp.pdiv_r7, 100
; KERNEL-IR-NEXT: %12 = mul nsw i64 32, %b0
; KERNEL-IR-NEXT: %13 = add nsw i64 %12, %t0
; KERNEL-IR-NEXT: %14 = add nsw i64 %13, 2
; KERNEL-IR-NEXT: %polly.access.add.MemRef_A9 = add nsw i64 %polly.access.mul.MemRef_A8, %14
; KERNEL-IR-NEXT: %polly.access.MemRef_A10 = getelementptr float, float* %polly.access.cast.MemRef_A6, i64 %polly.access.add.MemRef_A9
; KERNEL-IR-NEXT: %tmp3_p_scalar_ = load float, float* %polly.access.MemRef_A10, align 4
; KERNEL-IR-NEXT: %polly.access.MemRef_A10 = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A6, i64 %polly.access.add.MemRef_A9
; KERNEL-IR-NEXT: %tmp3_p_scalar_ = load float, float addrspace(1)* %polly.access.MemRef_A10, align 4
; KERNEL-IR-NEXT: %p_add12 = fadd float %p_add, %tmp3_p_scalar_
; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A11 = bitcast i8* %MemRef_A to float*
; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A11 = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)*
; KERNEL-IR-NEXT: %15 = add nsw i64 %c0, 1
; KERNEL-IR-NEXT: %pexp.pdiv_r12 = urem i64 %15, 2
; KERNEL-IR-NEXT: %polly.access.mul.MemRef_A13 = mul nsw i64 %pexp.pdiv_r12, 100
; KERNEL-IR-NEXT: %16 = mul nsw i64 32, %b0
; KERNEL-IR-NEXT: %17 = add nsw i64 %16, %t0
; KERNEL-IR-NEXT: %18 = add nsw i64 %17, 1
; KERNEL-IR-NEXT: %polly.access.add.MemRef_A14 = add nsw i64 %polly.access.mul.MemRef_A13, %18
; KERNEL-IR-NEXT: %polly.access.MemRef_A15 = getelementptr float, float* %polly.access.cast.MemRef_A11, i64 %polly.access.add.MemRef_A14
; KERNEL-IR-NEXT: %tmp4_p_scalar_ = load float, float* %polly.access.MemRef_A15, align 4
; KERNEL-IR-NEXT: %polly.access.MemRef_A15 = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A11, i64 %polly.access.add.MemRef_A14
; KERNEL-IR-NEXT: %tmp4_p_scalar_ = load float, float addrspace(1)* %polly.access.MemRef_A15, align 4
; KERNEL-IR-NEXT: %p_add17 = fadd float %tmp4_p_scalar_, %p_add12
; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A16 = bitcast i8* %MemRef_A to float*
; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A16 = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)*
; KERNEL-IR-NEXT: %19 = add nsw i64 %c0, 1
; KERNEL-IR-NEXT: %pexp.pdiv_r17 = urem i64 %19, 2
; KERNEL-IR-NEXT: %polly.access.mul.MemRef_A18 = mul nsw i64 %pexp.pdiv_r17, 100
; KERNEL-IR-NEXT: %20 = mul nsw i64 32, %b0
; KERNEL-IR-NEXT: %21 = add nsw i64 %20, %t0
; KERNEL-IR-NEXT: %22 = add nsw i64 %21, 1
; KERNEL-IR-NEXT: %polly.access.add.MemRef_A19 = add nsw i64 %polly.access.mul.MemRef_A18, %22
; KERNEL-IR-NEXT: %polly.access.MemRef_A20 = getelementptr float, float* %polly.access.cast.MemRef_A16, i64 %polly.access.add.MemRef_A19
; KERNEL-IR-NEXT: store float %p_add17, float* %polly.access.MemRef_A20, align 4
; KERNEL-IR-NEXT: %polly.access.MemRef_A20 = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A16, i64 %polly.access.add.MemRef_A19
; KERNEL-IR-NEXT: store float %p_add17, float addrspace(1)* %polly.access.MemRef_A20, align 4
; KERNEL-IR-NEXT: br label %polly.merge

; KERNEL-IR-LABEL: polly.else: ; preds = %polly.cond
Expand Down
8 changes: 4 additions & 4 deletions polly/test/GPGPU/kernel-params-only-some-arrays.ll
Expand Up @@ -18,10 +18,10 @@

; KERNEL: ; ModuleID = 'kernel_0'
; KERNEL-NEXT: source_filename = "kernel_0"
; KERNEL-NEXT: target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
; KERNEL-NEXT: target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda"

; KERNEL: define ptx_kernel void @kernel_0(i8* %MemRef_A)
; KERNEL: define ptx_kernel void @kernel_0(i8 addrspace(1)* %MemRef_A)
; KERNEL-NEXT: entry:
; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
; KERNEL-NEXT: %b0 = zext i32 %0 to i64
Expand All @@ -33,10 +33,10 @@

; KERNEL: ; ModuleID = 'kernel_1'
; KERNEL-NEXT: source_filename = "kernel_1"
; KERNEL-NEXT: target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
; KERNEL-NEXT: target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda"

; KERNEL: define ptx_kernel void @kernel_1(i8* %MemRef_B)
; KERNEL: define ptx_kernel void @kernel_1(i8 addrspace(1)* %MemRef_B)
; KERNEL-NEXT: entry:
; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
; KERNEL-NEXT: %b0 = zext i32 %0 to i64
Expand Down
2 changes: 1 addition & 1 deletion polly/test/GPGPU/kernel-params-scop-parameter.ll
Expand Up @@ -9,7 +9,7 @@
; A[i] += 42;
; }

; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %n)
; KERNEL-IR: define ptx_kernel void @kernel_0(i8 addrspace(1)* %MemRef_A, i64 %n)

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

Expand Down
4 changes: 2 additions & 2 deletions polly/test/GPGPU/non-read-only-scalars.ll
Expand Up @@ -87,10 +87,10 @@
; CODE-NEXT: Stmt_bb20(c0);

; KERNEL-IR: store float %p_tmp23, float* %sum.0.phiops
; KERNEL-IR-NEXT: [[REGA:%.+]] = bitcast i8* %MemRef_sum_0__phi to float*
; KERNEL-IR-NEXT: [[REGA:%.+]] = addrspacecast i8 addrspace(1)* %MemRef_sum_0__phi to float*
; KERNEL-IR-NEXT: [[REGB:%.+]] = load float, float* %sum.0.phiops
; KERNEL-IR-NEXT: store float [[REGB]], float* [[REGA]]
; KERNEL-IR-NEXT: [[REGC:%.+]] = bitcast i8* %MemRef_sum_0 to float*
; KERNEL-IR-NEXT: [[REGC:%.+]] = addrspacecast i8 addrspace(1)* %MemRef_sum_0 to float*
; KERNEL-IR-NEXT: [[REGD:%.+]] = load float, float* %sum.0.s2a
; KERNEL-IR-NEXT: store float [[REGD]], float* [[REGC]]
; KERNEL-IR-NEXT: ret void
Expand Down
4 changes: 2 additions & 2 deletions polly/test/GPGPU/phi-nodes-in-kernel.ll
Expand Up @@ -49,10 +49,10 @@ target triple = "x86_64-unknown-linux-gnu"
; KERNEL-IR: entry:
; KERNEL-IR-NEXT: %out_l.055.s2a = alloca i32
; KERNEL-IR-NEXT: %out_l.055.phiops = alloca i32
; KERNEL-IR-NEXT: %1 = bitcast i8* %MemRef_out_l_055__phi to i32*
; KERNEL-IR-NEXT: %1 = addrspacecast i8 addrspace(1)* %MemRef_out_l_055__phi to i32*
; KERNEL-IR-NEXT: %2 = load i32, i32* %1
; KERNEL-IR-NEXT: store i32 %2, i32* %out_l.055.phiops
; KERNEL-IR-NEXT: %3 = bitcast i8* %MemRef_out_l_055 to i32*
; KERNEL-IR-NEXT: %3 = addrspacecast i8 addrspace(1)* %MemRef_out_l_055 to i32*
; KERNEL-IR-NEXT: %4 = load i32, i32* %3
; KERNEL-IR-NEXT: store i32 %4, i32* %out_l.055.s2a

Expand Down
12 changes: 6 additions & 6 deletions polly/test/GPGPU/private-memory.ll
Expand Up @@ -28,17 +28,17 @@

; KERNEL: %polly.access.cast.private_array = bitcast [1 x float]* %private_array to float*
; KERNEL-NEXT: %polly.access.private_array = getelementptr float, float* %polly.access.cast.private_array, i64 0
; KERNEL-NEXT: %polly.access.cast.MemRef_A = bitcast i8* %MemRef_A to float*
; KERNEL-NEXT: %polly.access.MemRef_A = getelementptr float, float* %polly.access.cast.MemRef_A, i64 %t0
; KERNEL-NEXT: %shared.read = load float, float* %polly.access.MemRef_A
; KERNEL-NEXT: %polly.access.cast.MemRef_A = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)*
; KERNEL-NEXT: %polly.access.MemRef_A = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A, i64 %t0
; KERNEL-NEXT: %shared.read = load float, float addrspace(1)* %polly.access.MemRef_A
; KERNEL-NEXT: store float %shared.read, float* %polly.access.private_array

; KERNEL: %polly.access.cast.private_array5 = bitcast [1 x float]* %private_array to float*
; KERNEL-NEXT: %polly.access.private_array6 = getelementptr float, float* %polly.access.cast.private_array5, i64 0
; KERNEL-NEXT: %polly.access.cast.MemRef_A7 = bitcast i8* %MemRef_A to float*
; KERNEL-NEXT: %polly.access.MemRef_A8 = getelementptr float, float* %polly.access.cast.MemRef_A7, i64 %t0
; KERNEL-NEXT: %polly.access.cast.MemRef_A7 = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)*
; KERNEL-NEXT: %polly.access.MemRef_A8 = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A7, i64 %t0
; KERNEL-NEXT: %shared.write = load float, float* %polly.access.private_array6
; KERNEL-NEXT: store float %shared.write, float* %polly.access.MemRef_A8
; KERNEL-NEXT: store float %shared.write, float addrspace(1)* %polly.access.MemRef_A8

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

Expand Down
2 changes: 1 addition & 1 deletion polly/test/GPGPU/remove-dead-instructions-in-stmt-2.ll
Expand Up @@ -7,7 +7,7 @@
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; KERNEL-IR: store i32 0, i32* %polly.access.MemRef_sum_c, align 4
; KERNEL-IR: store i32 0, i32 addrspace(1)* %polly.access.MemRef_sum_c, align 4
; KERNEL-IR-NEXT: br label %polly.merge

define void @kernel_dynprog([50 x [50 x i32]]* %sum_c) {
Expand Down
2 changes: 1 addition & 1 deletion polly/test/GPGPU/remove-dead-instructions-in-stmt.ll
Expand Up @@ -10,7 +10,7 @@
; condition. This code referred to CPU registers and consequently resulted
; in invalid bitcode.

; KERNEL-IR: store i32 0, i32* %polly.access.MemRef_sum_c, align 4
; KERNEL-IR: store i32 0, i32 addrspace(1)* %polly.access.MemRef_sum_c, align 4
; KERNEL-IR-NEXT: br label %polly.merge

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
Expand Down
4 changes: 2 additions & 2 deletions polly/test/GPGPU/shared-memory-two-dimensional.ll
Expand Up @@ -36,8 +36,8 @@

; KERNEL: %polly.access.mul.MemRef_b = mul nsw i64 %polly.indvar, 8
; KERNEL-NEXT: %polly.access.add.MemRef_b = add nsw i64 %polly.access.mul.MemRef_b, %t0
; KERNEL-NEXT: %polly.access.MemRef_b = getelementptr float, float* %polly.access.cast.MemRef_b, i64 %polly.access.add.MemRef_b
; KERNEL-NEXT: %shared.read = load float, float* %polly.access.MemRef_b
; KERNEL-NEXT: %polly.access.MemRef_b = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_b, i64 %polly.access.add.MemRef_b
; KERNEL-NEXT: %shared.read = load float, float addrspace(1)* %polly.access.MemRef_b
; KERNEL-NEXT: store float %shared.read, float addrspace(3)* %polly.access.shared_MemRef_b


Expand Down
12 changes: 6 additions & 6 deletions polly/test/GPGPU/shared-memory.ll
Expand Up @@ -29,16 +29,16 @@
; KERNEL: @shared_MemRef_A = internal addrspace(3) global [32 x float] zeroinitializer, align 4

; KERNEL: %polly.access.shared_MemRef_A = getelementptr float, float addrspace(3)* getelementptr inbounds ([32 x float], [32 x float] addrspace(3)* @shared_MemRef_A, i32 0, i32 0), i64 %t0
; KERNEL-NEXT: %polly.access.cast.MemRef_A = bitcast i8* %MemRef_A to float*
; KERNEL-NEXT: %polly.access.MemRef_A = getelementptr float, float* %polly.access.cast.MemRef_A, i64 %t0
; KERNEL-NEXT: %shared.read = load float, float* %polly.access.MemRef_A
; KERNEL-NEXT: %polly.access.cast.MemRef_A = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)*
; KERNEL-NEXT: %polly.access.MemRef_A = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A, i64 %t0
; KERNEL-NEXT: %shared.read = load float, float addrspace(1)* %polly.access.MemRef_A
; KERNEL-NEXT: store float %shared.read, float addrspace(3)* %polly.access.shared_MemRef_A

; KERNEL: %polly.access.shared_MemRef_A3 = getelementptr float, float addrspace(3)* getelementptr inbounds ([32 x float], [32 x float] addrspace(3)* @shared_MemRef_A, i32 0, i32 0), i64 %t0
; KERNEL-NEXT: %polly.access.cast.MemRef_A4 = bitcast i8* %MemRef_A to float*
; KERNEL-NEXT: %polly.access.MemRef_A5 = getelementptr float, float* %polly.access.cast.MemRef_A4, i64 %t0
; KERNEL-NEXT: %polly.access.cast.MemRef_A4 = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)*
; KERNEL-NEXT: %polly.access.MemRef_A5 = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A4, i64 %t0
; KERNEL-NEXT: %shared.write = load float, float addrspace(3)* %polly.access.shared_MemRef_A3
; KERNEL-NEXT: store float %shared.write, float* %polly.access.MemRef_A5
; KERNEL-NEXT: store float %shared.write, float addrspace(1)* %polly.access.MemRef_A5


target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
Expand Down

0 comments on commit d277fed

Please sign in to comment.