Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix invalid addrspacecast due to combining alloca with global var
For function-scope variables with large initialisation list, FE usually generates a global variable to hold the initializer, then generates memcpy intrinsic to initialize the alloca. InstCombiner::visitAllocaInst identifies such allocas which are accessed only by reading and replaces them with the global variable. This is done by casting the global variable to the type of the alloca and replacing all references. However, when the global variable is in a different address space which is disjoint with addr space 0 (e.g. for IR generated from OpenCL, global variable cannot be in private addr space i.e. addr space 0), casting the global variable to addr space 0 results in invalid IR for certain targets (e.g. amdgpu). To fix this issue, when the global variable is not in addr space 0, instead of casting it to addr space 0, this patch chases down the uses of alloca until reaching the load instructions, then replaces load from alloca with load from the global variable. If during the chasing bitcast and GEP are encountered, new bitcast and GEP based on the global variable are generated and used in the load instructions. A debug output is also added to amdgpu backend to facilitate debugging such issues. Differential Revision: https://reviews.llvm.org/D27283
- Loading branch information
Showing
4 changed files
with
138 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
; RUN: opt < %s -instcombine -S | FileCheck %s | ||
|
||
@test.data = private unnamed_addr addrspace(2) constant [8 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7], align 4 | ||
|
||
; CHECK-LABEL: test_load | ||
; CHECK: %[[GEP:.*]] = getelementptr [8 x i32], [8 x i32] addrspace(2)* @test.data, i64 0, i64 %x | ||
; CHECK: %{{.*}} = load i32, i32 addrspace(2)* %[[GEP]] | ||
; CHECK-NOT: alloca | ||
; CHECK-NOT: call void @llvm.memcpy.p0i8.p2i8.i64 | ||
; CHECK-NOT: addrspacecast | ||
; CHECK-NOT: load i32, i32* | ||
define void @test_load(i32 addrspace(1)* %out, i64 %x) { | ||
entry: | ||
%data = alloca [8 x i32], align 4 | ||
%0 = bitcast [8 x i32]* %data to i8* | ||
call void @llvm.memcpy.p0i8.p2i8.i64(i8* %0, i8 addrspace(2)* bitcast ([8 x i32] addrspace(2)* @test.data to i8 addrspace(2)*), i64 32, i32 4, i1 false) | ||
%arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %data, i64 0, i64 %x | ||
%1 = load i32, i32* %arrayidx, align 4 | ||
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %x | ||
store i32 %1, i32 addrspace(1)* %arrayidx1, align 4 | ||
ret void | ||
} | ||
|
||
; CHECK-LABEL: test_call | ||
; CHECK: alloca | ||
; CHECK: call void @llvm.memcpy.p0i8.p2i8.i64 | ||
; CHECK-NOT: addrspacecast | ||
; CHECK: call i32 @foo(i32* %{{.*}}) | ||
define void @test_call(i32 addrspace(1)* %out, i64 %x) { | ||
entry: | ||
%data = alloca [8 x i32], align 4 | ||
%0 = bitcast [8 x i32]* %data to i8* | ||
call void @llvm.memcpy.p0i8.p2i8.i64(i8* %0, i8 addrspace(2)* bitcast ([8 x i32] addrspace(2)* @test.data to i8 addrspace(2)*), i64 32, i32 4, i1 false) | ||
%arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %data, i64 0, i64 %x | ||
%1 = call i32 @foo(i32* %arrayidx) | ||
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %x | ||
store i32 %1, i32 addrspace(1)* %arrayidx1, align 4 | ||
ret void | ||
} | ||
|
||
; CHECK-LABEL: test_load_and_call | ||
; CHECK: alloca | ||
; CHECK: call void @llvm.memcpy.p0i8.p2i8.i64 | ||
; CHECK: load i32, i32* %{{.*}} | ||
; CHECK: call i32 @foo(i32* %{{.*}}) | ||
; CHECK-NOT: addrspacecast | ||
; CHECK-NOT: load i32, i32 addrspace(2)* | ||
define void @test_load_and_call(i32 addrspace(1)* %out, i64 %x, i64 %y) { | ||
entry: | ||
%data = alloca [8 x i32], align 4 | ||
%0 = bitcast [8 x i32]* %data to i8* | ||
call void @llvm.memcpy.p0i8.p2i8.i64(i8* %0, i8 addrspace(2)* bitcast ([8 x i32] addrspace(2)* @test.data to i8 addrspace(2)*), i64 32, i32 4, i1 false) | ||
%arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %data, i64 0, i64 %x | ||
%1 = load i32, i32* %arrayidx, align 4 | ||
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %x | ||
store i32 %1, i32 addrspace(1)* %arrayidx1, align 4 | ||
%2 = call i32 @foo(i32* %arrayidx) | ||
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %y | ||
store i32 %2, i32 addrspace(1)* %arrayidx2, align 4 | ||
ret void | ||
} | ||
|
||
|
||
declare void @llvm.memcpy.p0i8.p2i8.i64(i8* nocapture writeonly, i8 addrspace(2)* nocapture readonly, i64, i32, i1) | ||
declare i32 @foo(i32* %x) |