Skip to content

Commit

Permalink
CodeGen: Fix invalid bitcasts for atomic builtins
Browse files Browse the repository at this point in the history
Currently clang assumes the temporary variables emitted during
codegen of atomic builtins have address space 0, which
is not true for target triple amdgcn---amdgiz and causes invalid
bitcasts.

This patch fixes that.

Differential Revision: https://reviews.llvm.org/D38966

llvm-svn: 316000
  • Loading branch information
yxsamliu committed Oct 17, 2017
1 parent 63c7c04 commit 8ab5ab0
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 41 deletions.
6 changes: 4 additions & 2 deletions clang/lib/CodeGen/CGAtomic.cpp
Expand Up @@ -1226,7 +1226,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
return RValue::get(nullptr);

return convertTempToRValue(
Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo()),
Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo(
Dest.getAddressSpace())),
RValTy, E->getExprLoc());
}

Expand Down Expand Up @@ -1298,7 +1299,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {

assert(Atomics.getValueSizeInBits() <= Atomics.getAtomicSizeInBits());
return convertTempToRValue(
Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo()),
Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo(
Dest.getAddressSpace())),
RValTy, E->getExprLoc());
}

Expand Down
78 changes: 39 additions & 39 deletions clang/test/CodeGenOpenCL/atomic-ops.cl
@@ -1,8 +1,8 @@
// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa-opencl | opt -instnamer -S | FileCheck %s
// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa-amdgizcl | opt -instnamer -S | FileCheck %s

// Also test serialization of atomic operations here, to avoid duplicating the test.
// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa-opencl
// RUN: %clang_cc1 %s -cl-std=CL2.0 -include-pch %t -O0 -triple=amdgcn-amd-amdhsa-opencl -emit-llvm -o - | opt -instnamer -S | FileCheck %s
// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa-amdgizcl
// RUN: %clang_cc1 %s -cl-std=CL2.0 -include-pch %t -O0 -triple=amdgcn-amd-amdhsa-amdgizcl -emit-llvm -o - | opt -instnamer -S | FileCheck %s

#ifndef ALREADY_INCLUDED
#define ALREADY_INCLUDED
Expand Down Expand Up @@ -32,22 +32,22 @@ atomic_int j;

void fi1(atomic_int *i) {
// CHECK-LABEL: @fi1
// CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
// CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
int x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);

// CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst
// CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst
x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_device);

// CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} seq_cst
// CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} seq_cst
x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_all_svm_devices);

// CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("subgroup") seq_cst
// CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("subgroup") seq_cst
x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_sub_group);
}

void fi2(atomic_int *i) {
// CHECK-LABEL: @fi2
// CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
// CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
__opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group);
}

Expand All @@ -56,7 +56,7 @@ void test_addr(global atomic_int *ig, private atomic_int *ip, local atomic_int *
// CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(1)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
__opencl_atomic_store(ig, 1, memory_order_seq_cst, memory_scope_work_group);

// CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
// CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(5)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
__opencl_atomic_store(ip, 1, memory_order_seq_cst, memory_scope_work_group);

// CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(3)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
Expand All @@ -65,25 +65,25 @@ void test_addr(global atomic_int *ig, private atomic_int *ip, local atomic_int *

void fi3(atomic_int *i, atomic_uint *ui) {
// CHECK-LABEL: @fi3
// CHECK: atomicrmw and i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
// CHECK: atomicrmw and i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
int x = __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group);

// CHECK: atomicrmw min i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
// CHECK: atomicrmw min i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
x = __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group);

// CHECK: atomicrmw max i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
// CHECK: atomicrmw max i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
x = __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group);

// CHECK: atomicrmw umin i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
// CHECK: atomicrmw umin i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
x = __opencl_atomic_fetch_min(ui, 1, memory_order_seq_cst, memory_scope_work_group);

// CHECK: atomicrmw umax i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
// CHECK: atomicrmw umax i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
x = __opencl_atomic_fetch_max(ui, 1, memory_order_seq_cst, memory_scope_work_group);
}

bool fi4(atomic_int *i) {
// CHECK-LABEL: @fi4(
// CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32 addrspace(4)* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup") acquire acquire
// CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup") acquire acquire
// CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0
// CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1
// CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]]
Expand All @@ -100,16 +100,16 @@ void fi5(atomic_int *i, int scope) {
// CHECK-NEXT: i32 4, label %[[opencl_subgroup:.*]]
// CHECK-NEXT: ]
// CHECK: [[opencl_workgroup]]:
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") seq_cst
// CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst
// CHECK: br label %[[continue:.*]]
// CHECK: [[opencl_device]]:
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") seq_cst
// CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") seq_cst
// CHECK: br label %[[continue]]
// CHECK: [[opencl_allsvmdevices]]:
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} seq_cst
// CHECK: load atomic i32, i32* %{{.*}} seq_cst
// CHECK: br label %[[continue]]
// CHECK: [[opencl_subgroup]]:
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("subgroup") seq_cst
// CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") seq_cst
// CHECK: br label %[[continue]]
// CHECK: [[continue]]:
int x = __opencl_atomic_load(i, memory_order_seq_cst, scope);
Expand Down Expand Up @@ -141,29 +141,29 @@ void fi6(atomic_int *i, int order, int scope) {
// CHECK-NEXT: i32 4, label %[[SEQ_SUB:.*]]
// CHECK-NEXT: ]
// CHECK: [[MON_WG]]:
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") monotonic
// CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") monotonic
// CHECK: [[MON_DEV]]:
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") monotonic
// CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") monotonic
// CHECK: [[MON_ALL]]:
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} monotonic
// CHECK: load atomic i32, i32* %{{.*}} monotonic
// CHECK: [[MON_SUB]]:
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("subgroup") monotonic
// CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") monotonic
// CHECK: [[ACQ_WG]]:
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") acquire
// CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") acquire
// CHECK: [[ACQ_DEV]]:
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") acquire
// CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") acquire
// CHECK: [[ACQ_ALL]]:
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} acquire
// CHECK: load atomic i32, i32* %{{.*}} acquire
// CHECK: [[ACQ_SUB]]:
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("subgroup") acquire
// CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") acquire
// CHECK: [[SEQ_WG]]:
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") seq_cst
// CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst
// CHECK: [[SEQ_DEV]]:
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") seq_cst
// CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") seq_cst
// CHECK: [[SEQ_ALL]]:
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} seq_cst
// CHECK: load atomic i32, i32* %{{.*}} seq_cst
// CHECK: [[SEQ_SUB]]:
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("subgroup") seq_cst
// CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") seq_cst
int x = __opencl_atomic_load(i, order, scope);
}

Expand All @@ -181,7 +181,7 @@ void ff2(atomic_float *d) {

float ff3(atomic_float *d) {
// CHECK-LABEL: @ff3
// CHECK: atomicrmw xchg i32 addrspace(4)* {{.*}} syncscope("workgroup") seq_cst
// CHECK: atomicrmw xchg i32* {{.*}} syncscope("workgroup") seq_cst
return __opencl_atomic_exchange(d, 2, memory_order_seq_cst, memory_scope_work_group);
}

Expand All @@ -198,10 +198,10 @@ void atomic_init_foo()

// CHECK-LABEL: @failureOrder
void failureOrder(atomic_int *ptr, int *ptr2) {
// CHECK: cmpxchg i32 addrspace(4)* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") acquire monotonic
// CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") acquire monotonic
__opencl_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed, memory_scope_work_group);

// CHECK: cmpxchg weak i32 addrspace(4)* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire
// CHECK: cmpxchg weak i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire
__opencl_atomic_compare_exchange_weak(ptr, ptr2, 43, memory_order_seq_cst, memory_order_acquire, memory_scope_work_group);
}

Expand Down Expand Up @@ -279,11 +279,11 @@ int test_volatile(volatile atomic_int *i) {
// CHECK-LABEL: @test_volatile
// CHECK: %[[i_addr:.*]] = alloca i32
// CHECK-NEXT: %[[atomicdst:.*]] = alloca i32
// CHECK-NEXT: store i32 addrspace(4)* %i, i32 addrspace(4)** %[[i_addr]]
// CHECK-NEXT: %[[addr:.*]] = load i32 addrspace(4)*, i32 addrspace(4)** %[[i_addr]]
// CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, i32 addrspace(4)* %[[addr]] syncscope("workgroup") seq_cst
// CHECK-NEXT: store i32 %[[res]], i32* %[[atomicdst]]
// CHECK-NEXT: %[[retval:.*]] = load i32, i32* %[[atomicdst]]
// CHECK-NEXT: store i32* %i, i32* addrspace(5)* %[[i_addr]]
// CHECK-NEXT: %[[addr:.*]] = load i32*, i32* addrspace(5)* %[[i_addr]]
// CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, i32* %[[addr]] syncscope("workgroup") seq_cst
// CHECK-NEXT: store i32 %[[res]], i32 addrspace(5)* %[[atomicdst]]
// CHECK-NEXT: %[[retval:.*]] = load i32, i32 addrspace(5)* %[[atomicdst]]
// CHECK-NEXT: ret i32 %[[retval]]
return __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
}
Expand Down

0 comments on commit 8ab5ab0

Please sign in to comment.