-
Notifications
You must be signed in to change notification settings - Fork 14.8k
SeparateConstOffsetFromGEP: Add more tests with lower-gep #134684
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
SeparateConstOffsetFromGEP: Add more tests with lower-gep #134684
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesI didn't see any failures while trying to break hasMoreThanOneUseInLoop Patch is 21.41 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/134684.diff 1 Files Affected:
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/lower-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/lower-gep.ll
new file mode 100644
index 0000000000000..687e921640492
--- /dev/null
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/lower-gep.ll
@@ -0,0 +1,482 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -passes='separate-const-offset-from-gep<lower-gep>' \
+; RUN: -reassociate-geps-verify-no-dead-code -S | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+
+%struct.Packed = type <{ [3 x i32], [8 x i64] }> ; <> means packed
+
+@packed_struct_array = addrspace(3) global [1024 x %struct.Packed] poison, align 1
+
+; Verifies we can emit correct uglygep if the address is not natually
+; aligned. This shoult not produce a no-op bitcast with opaque
+; pointers.
+define ptr addrspace(3) @packed_struct(i32 %i, i32 %j) {
+; CHECK-LABEL: @packed_struct(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 0 to i32
+; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[IDXPROM]], 77824
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr addrspace(3) @packed_struct_array, i32 [[TMP0]]
+; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[I:%.*]], 76
+; CHECK-NEXT: [[UGLYGEP3:%.*]] = getelementptr i8, ptr addrspace(3) [[UGLYGEP]], i32 [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[J:%.*]], 3
+; CHECK-NEXT: [[UGLYGEP4:%.*]] = getelementptr i8, ptr addrspace(3) [[UGLYGEP3]], i32 [[TMP2]]
+; CHECK-NEXT: [[UGLYGEP5:%.*]] = getelementptr i8, ptr addrspace(3) [[UGLYGEP4]], i32 112
+; CHECK-NEXT: ret ptr addrspace(3) [[UGLYGEP5]]
+;
+entry:
+ %add = add nsw i32 %j, 3
+ %add1 = add nsw i32 %i, 1
+ %arrayidx3 = getelementptr inbounds [1024 x %struct.Packed], ptr addrspace(3) @packed_struct_array, i64 0, i32 %add1, i32 1, i32 %add
+ ret ptr addrspace(3) %arrayidx3
+}
+
+%struct = type { i32, i32, i32 }
+
+define i32 @test1(ptr %ptr, i64 %idx) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[IDX:%.*]], 12
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[TMP1]]
+; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[UGLYGEP]], i64 4
+; CHECK-NEXT: [[LV_1:%.*]] = load i32, ptr [[UGLYGEP1]], align 4
+; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[LV_1]], 0
+; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK: then:
+; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[IDX]], 12
+; CHECK-NEXT: [[UGLYGEP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP2]]
+; CHECK-NEXT: [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[UGLYGEP2]], i64 8
+; CHECK-NEXT: [[LV_2:%.*]] = load i32, ptr [[UGLYGEP3]], align 4
+; CHECK-NEXT: [[RES:%.*]] = add i32 [[LV_1]], [[LV_2]]
+; CHECK-NEXT: ret i32 [[RES]]
+; CHECK: else:
+; CHECK-NEXT: ret i32 0
+;
+ %gep.1 = getelementptr %struct, ptr %ptr, i64 %idx, i32 1
+ %lv.1 = load i32, ptr %gep.1
+ %c = icmp slt i32 %lv.1, 0
+ br i1 %c, label %then, label %else
+
+then:
+ %gep.2 = getelementptr %struct, ptr %ptr, i64 %idx, i32 2
+ %lv.2 = load i32, ptr %gep.2
+ %res = add i32 %lv.1, %lv.2
+ ret i32 %res
+
+else:
+ ret i32 0
+}
+
+define i32 @test1_fatptr(ptr addrspace(7) %ptr, i64 %idx) {
+; CHECK-LABEL: @test1_fatptr(
+; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IDX:%.*]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[IDXPROM]], 12
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr addrspace(7) [[PTR:%.*]], i32 [[TMP1]]
+; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr addrspace(7) [[UGLYGEP]], i32 4
+; CHECK-NEXT: [[LV_1:%.*]] = load i32, ptr addrspace(7) [[UGLYGEP1]], align 4
+; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[LV_1]], 0
+; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK: then:
+; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IDX]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[IDXPROM2]], 12
+; CHECK-NEXT: [[UGLYGEP3:%.*]] = getelementptr i8, ptr addrspace(7) [[PTR]], i32 [[TMP2]]
+; CHECK-NEXT: [[UGLYGEP4:%.*]] = getelementptr i8, ptr addrspace(7) [[UGLYGEP3]], i32 8
+; CHECK-NEXT: [[LV_2:%.*]] = load i32, ptr addrspace(7) [[UGLYGEP4]], align 4
+; CHECK-NEXT: [[RES:%.*]] = add i32 [[LV_1]], [[LV_2]]
+; CHECK-NEXT: ret i32 [[RES]]
+; CHECK: else:
+; CHECK-NEXT: ret i32 0
+;
+ %gep.1 = getelementptr %struct, ptr addrspace(7) %ptr, i64 %idx, i32 1
+ %lv.1 = load i32, ptr addrspace(7) %gep.1
+ %c = icmp slt i32 %lv.1, 0
+ br i1 %c, label %then, label %else
+
+then:
+ %gep.2 = getelementptr %struct, ptr addrspace(7) %ptr, i64 %idx, i32 2
+ %lv.2 = load i32, ptr addrspace(7) %gep.2
+ %res = add i32 %lv.1, %lv.2
+ ret i32 %res
+
+else:
+ ret i32 0
+}
+
+
+; Test lowerToSingleIndexGEPs
+define void @test_A_sub_B_add_ConstantInt(ptr %p) {
+; CHECK-LABEL: @test_A_sub_B_add_ConstantInt(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo()
+; CHECK-NEXT: [[REM:%.*]] = srem i32 [[TMP0]], 5
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[K:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[COND_END:%.*]] ]
+; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[K]], 5
+; CHECK-NEXT: [[SUB1:%.*]] = sub nsw i32 [[MUL]], [[REM]]
+; CHECK-NEXT: [[CMP26:%.*]] = icmp ult i32 [[SUB1]], 512
+; CHECK-NEXT: br i1 [[CMP26]], label [[COND_TRUE:%.*]], label [[COND_END]]
+; CHECK: cond.true:
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[MUL]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[REM]] to i64
+; CHECK-NEXT: [[SUB22:%.*]] = sub i64 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[SUB22]], 2
+; CHECK-NEXT: [[UGLYGEP4:%.*]] = getelementptr i8, ptr [[UGLYGEP3:%.*]], i64 2044
+; CHECK-NEXT: [[UGLYGEP5:%.*]] = getelementptr i8, ptr [[UGLYGEP4]], i64 [[TMP3]]
+; CHECK-NEXT: store float 1.000000e+00, ptr [[UGLYGEP5]], align 4
+; CHECK-NEXT: br label [[COND_END]]
+; CHECK: cond.end:
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[K]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 100
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = tail call i32 @foo()
+ %rem = srem i32 %0, 5
+ %add = add nsw i32 %rem , 511
+ br label %for.body
+
+for.body:
+ %k = phi i32 [ 0, %entry ], [ %inc, %cond.end ]
+ %mul = mul nuw nsw i32 %k, 5
+ %sub1 = sub nsw i32 %mul, %rem
+ %cmp26 = icmp ult i32 %sub1, 512
+ br i1 %cmp26, label %cond.true, label %cond.end
+
+cond.true:
+ %sub2 = sub nsw i32 %add, %mul
+ %idxprom = sext i32 %sub2 to i64
+ %arryidx = getelementptr inbounds float, ptr %p, i64 %idxprom
+ store float 1.0, ptr %arryidx, align 4
+ br label %cond.end
+
+cond.end:
+ %inc = add nuw nsw i32 %k, 1
+ %exitcond = icmp ne i32 %inc, 100
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+@extern_array = global [1024 x i32] poison, align 16
+
+; Test lowerToSingleIndexGEPs with a global variable pointer
+define void @test_A_sub_B_add_ConstantInt_gv_baseptr(ptr %p) {
+; CHECK-LABEL: @test_A_sub_B_add_ConstantInt_gv_baseptr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo()
+; CHECK-NEXT: [[REM:%.*]] = srem i32 [[TMP0]], 5
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[K:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[COND_END:%.*]] ]
+; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[K]], 5
+; CHECK-NEXT: [[SUB1:%.*]] = sub nsw i32 [[MUL]], [[REM]]
+; CHECK-NEXT: [[CMP26:%.*]] = icmp ult i32 [[SUB1]], 512
+; CHECK-NEXT: br i1 [[CMP26]], label [[COND_TRUE:%.*]], label [[COND_END]]
+; CHECK: cond.true:
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[MUL]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[REM]] to i64
+; CHECK-NEXT: [[SUB22:%.*]] = sub i64 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[SUB22]], 2
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr inbounds i8, ptr @extern_array, i64 2044
+; CHECK-NEXT: [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[UGLYGEP]], i64 [[TMP3]]
+; CHECK-NEXT: store float 1.000000e+00, ptr [[UGLYGEP3]], align 4
+; CHECK-NEXT: br label [[COND_END]]
+; CHECK: cond.end:
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[K]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 100
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = tail call i32 @foo()
+ %rem = srem i32 %0, 5
+ %add = add nsw i32 %rem , 511
+ br label %for.body
+
+for.body:
+ %k = phi i32 [ 0, %entry ], [ %inc, %cond.end ]
+ %mul = mul nuw nsw i32 %k, 5
+ %sub1 = sub nsw i32 %mul, %rem
+ %cmp26 = icmp ult i32 %sub1, 512
+ br i1 %cmp26, label %cond.true, label %cond.end
+
+cond.true:
+ %sub2 = sub nsw i32 %add, %mul
+ %idxprom = sext i32 %sub2 to i64
+ %arryidx = getelementptr inbounds float, ptr @extern_array, i64 %idxprom
+ store float 1.0, ptr %arryidx, align 4
+ br label %cond.end
+
+cond.end:
+ %inc = add nuw nsw i32 %k, 1
+ %exitcond = icmp ne i32 %inc, 100
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+; Test lowerToSingleIndexGEPs with a constant data variable pointer
+define void @test_A_sub_B_add_ConstantInt_null_basptr() {
+; CHECK-LABEL: @test_A_sub_B_add_ConstantInt_null_basptr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo()
+; CHECK-NEXT: [[REM:%.*]] = srem i32 [[TMP0]], 5
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[K:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[COND_END:%.*]] ]
+; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[K]], 5
+; CHECK-NEXT: [[SUB1:%.*]] = sub nsw i32 [[MUL]], [[REM]]
+; CHECK-NEXT: [[CMP26:%.*]] = icmp ult i32 [[SUB1]], 512
+; CHECK-NEXT: br i1 [[CMP26]], label [[COND_TRUE:%.*]], label [[COND_END]]
+; CHECK: cond.true:
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[MUL]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[REM]] to i64
+; CHECK-NEXT: [[SUB22:%.*]] = sub i64 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[SUB22]], 2
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr inbounds i8, ptr null, i64 2044
+; CHECK-NEXT: [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[UGLYGEP]], i64 [[TMP3]]
+; CHECK-NEXT: store float 1.000000e+00, ptr [[UGLYGEP3]], align 4
+; CHECK-NEXT: br label [[COND_END]]
+; CHECK: cond.end:
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[K]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 100
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = tail call i32 @foo()
+ %rem = srem i32 %0, 5
+ %add = add nsw i32 %rem , 511
+ br label %for.body
+
+for.body:
+ %k = phi i32 [ 0, %entry ], [ %inc, %cond.end ]
+ %mul = mul nuw nsw i32 %k, 5
+ %sub1 = sub nsw i32 %mul, %rem
+ %cmp26 = icmp ult i32 %sub1, 512
+ br i1 %cmp26, label %cond.true, label %cond.end
+
+cond.true:
+ %sub2 = sub nsw i32 %add, %mul
+ %idxprom = sext i32 %sub2 to i64
+ %arryidx = getelementptr inbounds float, ptr null, i64 %idxprom
+ store float 1.0, ptr %arryidx, align 4
+ br label %cond.end
+
+cond.end:
+ %inc = add nuw nsw i32 %k, 1
+ %exitcond = icmp ne i32 %inc, 100
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+declare i32 @foo()
+
+define amdgpu_kernel void @multi_use_in_loop(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i32 %arg2) {
+; CHECK-LABEL: @multi_use_in_loop(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[TMP:%.*]] = sext i32 [[ARG2:%.*]] to i64
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG:%.*]], i64 [[TMP]]
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[TMP3]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], 0
+; CHECK-NEXT: br i1 [[TMP5]], label [[BB6:%.*]], label [[BB8:%.*]]
+; CHECK: bb6:
+; CHECK-NEXT: br label [[BB11:%.*]]
+; CHECK: bb7:
+; CHECK-NEXT: br label [[BB8]]
+; CHECK: bb8:
+; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP30:%.*]], [[BB7:%.*]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG1:%.*]], i64 [[TMP]]
+; CHECK-NEXT: store i32 [[TMP9]], ptr addrspace(1) [[TMP10]], align 4
+; CHECK-NEXT: ret void
+; CHECK: bb11:
+; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP30]], [[BB22:%.*]] ], [ 0, [[BB6]] ]
+; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP25:%.*]], [[BB22]] ], [ 0, [[BB6]] ]
+; CHECK-NEXT: [[TMP14:%.*]] = srem i32 [[TMP13]], [[ARG2]]
+; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 [[TMP15]]
+; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(1) [[TMP16]], align 4
+; CHECK-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], 100
+; CHECK-NEXT: [[TMP19:%.*]] = sext i32 [[TMP13]] to i64
+; CHECK-NEXT: br i1 [[TMP18]], label [[BB20:%.*]], label [[BB22]]
+; CHECK: bb20:
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG1]], i64 [[TMP19]]
+; CHECK-NEXT: store i32 0, ptr addrspace(1) [[TMP21]], align 4
+; CHECK-NEXT: br label [[BB22]]
+; CHECK: bb22:
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 [[TMP19]]
+; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) [[TMP23]], align 4
+; CHECK-NEXT: [[TMP25]] = add nuw nsw i32 [[TMP13]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[TMP13]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 2
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr addrspace(1) [[ARG1]], i64 [[TMP1]]
+; CHECK-NEXT: [[UGLYGEP2:%.*]] = getelementptr i8, ptr addrspace(1) [[UGLYGEP]], i64 4
+; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(1) [[UGLYGEP2]], align 4
+; CHECK-NEXT: [[TMP29:%.*]] = add i32 [[TMP24]], [[TMP12]]
+; CHECK-NEXT: [[TMP30]] = add i32 [[TMP29]], [[TMP28]]
+; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[TMP25]], [[TMP4]]
+; CHECK-NEXT: br i1 [[TMP31]], label [[BB7]], label [[BB11]]
+;
+bb:
+ %tmp = sext i32 %arg2 to i64
+ %tmp3 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp
+ %tmp4 = load i32, ptr addrspace(1) %tmp3, align 4
+ %tmp5 = icmp sgt i32 %tmp4, 0
+ br i1 %tmp5, label %bb6, label %bb8
+
+bb6: ; preds = %bb
+ br label %bb11
+
+bb7: ; preds = %bb22
+ br label %bb8
+
+bb8: ; preds = %bb7, %bb
+ %tmp9 = phi i32 [ 0, %bb ], [ %tmp30, %bb7 ]
+ %tmp10 = getelementptr inbounds i32, ptr addrspace(1) %arg1, i64 %tmp
+ store i32 %tmp9, ptr addrspace(1) %tmp10, align 4
+ ret void
+
+bb11: ; preds = %bb22, %bb6
+ %tmp12 = phi i32 [ %tmp30, %bb22 ], [ 0, %bb6 ]
+ %tmp13 = phi i32 [ %tmp25, %bb22 ], [ 0, %bb6 ]
+ %tmp14 = srem i32 %tmp13, %arg2
+ %tmp15 = sext i32 %tmp14 to i64
+ %tmp16 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp15
+ %tmp17 = load i32, ptr addrspace(1) %tmp16, align 4
+ %tmp18 = icmp sgt i32 %tmp17, 100
+ %tmp19 = sext i32 %tmp13 to i64
+ br i1 %tmp18, label %bb20, label %bb22
+
+bb20: ; preds = %bb11
+ %tmp21 = getelementptr inbounds i32, ptr addrspace(1) %arg1, i64 %tmp19
+ store i32 0, ptr addrspace(1) %tmp21, align 4
+ br label %bb22
+
+bb22: ; preds = %bb20, %bb11
+ %tmp23 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp19
+ %tmp24 = load i32, ptr addrspace(1) %tmp23, align 4
+ %tmp25 = add nuw nsw i32 %tmp13, 1
+ %tmp26 = sext i32 %tmp25 to i64
+ %tmp27 = getelementptr inbounds i32, ptr addrspace(1) %arg1, i64 %tmp26
+ %tmp28 = load i32, ptr addrspace(1) %tmp27, align 4
+ %tmp29 = add i32 %tmp24, %tmp12
+ %tmp30 = add i32 %tmp29, %tmp28
+ %tmp31 = icmp eq i32 %tmp25, %tmp4
+ br i1 %tmp31, label %bb7, label %bb11
+}
+
+@extern_array_1 = external addrspace(1) global [4096 x i32], align 16
+
+@llvm.used = appending global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @extern_array_1 to ptr) ]
+
+define void @use_in_other_func() {
+; CHECK-LABEL: @use_in_other_func(
+; CHECK-NEXT: store i32 0, ptr addrspace(1) @extern_array_1, align 4
+; CHECK-NEXT: store i32 0, ptr addrspace(1) @extern_array_1, align 4
+; CHECK-NEXT: ret void
+;
+ store i32 0, ptr addrspace(1) @extern_array_1
+ store i32 0, ptr addrspace(1) @extern_array_1
+ ret void
+}
+
+define amdgpu_kernel void @multi_use_in_loop_global_base_address(ptr addrspace(1) nocapture readonly %arg, i32 %arg2) {
+; CHECK-LABEL: @multi_use_in_loop_global_base_address(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[TMP:%.*]] = sext i32 [[ARG2:%.*]] to i64
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG:%.*]], i64 [[TMP]]
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[TMP3]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], 0
+; CHECK-NEXT: br i1 [[TMP5]], label [[BB6:%.*]], label [[BB8:%.*]]
+; CHECK: bb6:
+; CHECK-NEXT: br label [[BB11:%.*]]
+; CHECK: bb7:
+; CHECK-NEXT: br label [[BB8]]
+; CHECK: bb8:
+; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP30:%.*]], [[BB7:%.*]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr addrspace(1) @extern_array_1, i64 [[TMP]]
+; CHECK-NEXT: store i32 [[TMP9]], ptr addrspace(1) [[TMP10]], align 4
+; CHECK-NEXT: ret void
+; CHECK: bb11:
+; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP30]], [[BB22:%.*]] ], [ 0, [[BB6]] ]
+; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP25:%.*]], [[BB22]] ], [ 0, [[BB6]] ]
+; CHECK-NEXT: [[TMP14:%.*]] = srem i32 [[TMP13]], [[ARG2]]
+; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr addrspace(1) @extern_array_1, i64 [[TMP15]]
+; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(1) [[TMP16]], align 4
+; CHECK-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], 100
+; CHECK-NEXT: [[TMP19:%.*]] = sext i32 [[TMP13]] to i64
+; CHECK-NEXT: br i1 [[TMP18]], label [[BB20:%.*]], label [[BB22]]
+; CHECK: bb20:
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr addrspace(1) @extern_array_1, i64 [[TMP19]]
+; CHECK-NEXT: store i32 0, ptr addrspace(1) [[TMP21]], align 4
+; CHECK-NEXT: br label [[BB22]]
+; CHECK: bb22:
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr addrspace(1) @extern_array_1, i64 [[TMP19]]
+; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) [[TMP23]], align 4
+; CHECK-NEXT: [[TMP25]] = add nuw nsw i32 [[TMP13]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[TMP13]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 2
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr addrspace(1) @extern_array_1, i64 [[TMP1]]
+; CHECK-NEXT: [[UGLYGEP2:%.*]] = getelementptr i8, ptr addrspace(1) [[UGLYGEP]], i64 4
+; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(1) [[UGLYGEP2]], align 4
+; CHECK-NEXT: [[TMP29:%.*]] = add i32 [[TMP24]], [[TMP12]]
+; CHECK-NEXT: [[TMP30]] = add i32 [[TMP29]], [[TMP28]]
+; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[TMP25]], [[TMP4]]
+; CHECK-NEXT: br i1 [[TMP31]], label [[BB7]], label [[BB11]]
+;
+bb:
+ %tmp = sext i32 %arg2 to i64
+ %tmp3 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp
+ %tmp4 = load i32, ptr addrspace(1) %tmp3, align 4
+ %tmp5 = icmp sgt i32 %tmp4, 0
+ br i1 %tmp5, label %bb6, label %bb8
+
+bb6: ; preds = %bb
+ br label %bb11
+
+bb7: ; preds = %bb22
+ br label %bb8
+
+bb8: ; preds = %bb7, %bb
+ %tmp9 = phi i32 [ 0, %bb ], [ %tmp30, %bb7 ]
+ %tmp1...
[truncated]
|
@llvm/pr-subscribers-llvm-transforms Author: Matt Arsenault (arsenm) ChangesI didn't see any failures while trying to break hasMoreThanOneUseInLoop Patch is 21.41 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/134684.diff 1 Files Affected:
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/lower-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/lower-gep.ll
new file mode 100644
index 0000000000000..687e921640492
--- /dev/null
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/lower-gep.ll
@@ -0,0 +1,482 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -passes='separate-const-offset-from-gep<lower-gep>' \
+; RUN: -reassociate-geps-verify-no-dead-code -S | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+
+%struct.Packed = type <{ [3 x i32], [8 x i64] }> ; <> means packed
+
+@packed_struct_array = addrspace(3) global [1024 x %struct.Packed] poison, align 1
+
+; Verifies we can emit correct uglygep if the address is not natually
+; aligned. This shoult not produce a no-op bitcast with opaque
+; pointers.
+define ptr addrspace(3) @packed_struct(i32 %i, i32 %j) {
+; CHECK-LABEL: @packed_struct(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 0 to i32
+; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[IDXPROM]], 77824
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr addrspace(3) @packed_struct_array, i32 [[TMP0]]
+; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[I:%.*]], 76
+; CHECK-NEXT: [[UGLYGEP3:%.*]] = getelementptr i8, ptr addrspace(3) [[UGLYGEP]], i32 [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[J:%.*]], 3
+; CHECK-NEXT: [[UGLYGEP4:%.*]] = getelementptr i8, ptr addrspace(3) [[UGLYGEP3]], i32 [[TMP2]]
+; CHECK-NEXT: [[UGLYGEP5:%.*]] = getelementptr i8, ptr addrspace(3) [[UGLYGEP4]], i32 112
+; CHECK-NEXT: ret ptr addrspace(3) [[UGLYGEP5]]
+;
+entry:
+ %add = add nsw i32 %j, 3
+ %add1 = add nsw i32 %i, 1
+ %arrayidx3 = getelementptr inbounds [1024 x %struct.Packed], ptr addrspace(3) @packed_struct_array, i64 0, i32 %add1, i32 1, i32 %add
+ ret ptr addrspace(3) %arrayidx3
+}
+
+%struct = type { i32, i32, i32 }
+
+define i32 @test1(ptr %ptr, i64 %idx) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[IDX:%.*]], 12
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[TMP1]]
+; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[UGLYGEP]], i64 4
+; CHECK-NEXT: [[LV_1:%.*]] = load i32, ptr [[UGLYGEP1]], align 4
+; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[LV_1]], 0
+; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK: then:
+; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[IDX]], 12
+; CHECK-NEXT: [[UGLYGEP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP2]]
+; CHECK-NEXT: [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[UGLYGEP2]], i64 8
+; CHECK-NEXT: [[LV_2:%.*]] = load i32, ptr [[UGLYGEP3]], align 4
+; CHECK-NEXT: [[RES:%.*]] = add i32 [[LV_1]], [[LV_2]]
+; CHECK-NEXT: ret i32 [[RES]]
+; CHECK: else:
+; CHECK-NEXT: ret i32 0
+;
+ %gep.1 = getelementptr %struct, ptr %ptr, i64 %idx, i32 1
+ %lv.1 = load i32, ptr %gep.1
+ %c = icmp slt i32 %lv.1, 0
+ br i1 %c, label %then, label %else
+
+then:
+ %gep.2 = getelementptr %struct, ptr %ptr, i64 %idx, i32 2
+ %lv.2 = load i32, ptr %gep.2
+ %res = add i32 %lv.1, %lv.2
+ ret i32 %res
+
+else:
+ ret i32 0
+}
+
+define i32 @test1_fatptr(ptr addrspace(7) %ptr, i64 %idx) {
+; CHECK-LABEL: @test1_fatptr(
+; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IDX:%.*]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[IDXPROM]], 12
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr addrspace(7) [[PTR:%.*]], i32 [[TMP1]]
+; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr addrspace(7) [[UGLYGEP]], i32 4
+; CHECK-NEXT: [[LV_1:%.*]] = load i32, ptr addrspace(7) [[UGLYGEP1]], align 4
+; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[LV_1]], 0
+; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK: then:
+; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IDX]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[IDXPROM2]], 12
+; CHECK-NEXT: [[UGLYGEP3:%.*]] = getelementptr i8, ptr addrspace(7) [[PTR]], i32 [[TMP2]]
+; CHECK-NEXT: [[UGLYGEP4:%.*]] = getelementptr i8, ptr addrspace(7) [[UGLYGEP3]], i32 8
+; CHECK-NEXT: [[LV_2:%.*]] = load i32, ptr addrspace(7) [[UGLYGEP4]], align 4
+; CHECK-NEXT: [[RES:%.*]] = add i32 [[LV_1]], [[LV_2]]
+; CHECK-NEXT: ret i32 [[RES]]
+; CHECK: else:
+; CHECK-NEXT: ret i32 0
+;
+ %gep.1 = getelementptr %struct, ptr addrspace(7) %ptr, i64 %idx, i32 1
+ %lv.1 = load i32, ptr addrspace(7) %gep.1
+ %c = icmp slt i32 %lv.1, 0
+ br i1 %c, label %then, label %else
+
+then:
+ %gep.2 = getelementptr %struct, ptr addrspace(7) %ptr, i64 %idx, i32 2
+ %lv.2 = load i32, ptr addrspace(7) %gep.2
+ %res = add i32 %lv.1, %lv.2
+ ret i32 %res
+
+else:
+ ret i32 0
+}
+
+
+; Test lowerToSingleIndexGEPs
+define void @test_A_sub_B_add_ConstantInt(ptr %p) {
+; CHECK-LABEL: @test_A_sub_B_add_ConstantInt(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo()
+; CHECK-NEXT: [[REM:%.*]] = srem i32 [[TMP0]], 5
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[K:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[COND_END:%.*]] ]
+; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[K]], 5
+; CHECK-NEXT: [[SUB1:%.*]] = sub nsw i32 [[MUL]], [[REM]]
+; CHECK-NEXT: [[CMP26:%.*]] = icmp ult i32 [[SUB1]], 512
+; CHECK-NEXT: br i1 [[CMP26]], label [[COND_TRUE:%.*]], label [[COND_END]]
+; CHECK: cond.true:
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[MUL]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[REM]] to i64
+; CHECK-NEXT: [[SUB22:%.*]] = sub i64 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[SUB22]], 2
+; CHECK-NEXT: [[UGLYGEP4:%.*]] = getelementptr i8, ptr [[UGLYGEP3:%.*]], i64 2044
+; CHECK-NEXT: [[UGLYGEP5:%.*]] = getelementptr i8, ptr [[UGLYGEP4]], i64 [[TMP3]]
+; CHECK-NEXT: store float 1.000000e+00, ptr [[UGLYGEP5]], align 4
+; CHECK-NEXT: br label [[COND_END]]
+; CHECK: cond.end:
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[K]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 100
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = tail call i32 @foo()
+ %rem = srem i32 %0, 5
+ %add = add nsw i32 %rem , 511
+ br label %for.body
+
+for.body:
+ %k = phi i32 [ 0, %entry ], [ %inc, %cond.end ]
+ %mul = mul nuw nsw i32 %k, 5
+ %sub1 = sub nsw i32 %mul, %rem
+ %cmp26 = icmp ult i32 %sub1, 512
+ br i1 %cmp26, label %cond.true, label %cond.end
+
+cond.true:
+ %sub2 = sub nsw i32 %add, %mul
+ %idxprom = sext i32 %sub2 to i64
+ %arryidx = getelementptr inbounds float, ptr %p, i64 %idxprom
+ store float 1.0, ptr %arryidx, align 4
+ br label %cond.end
+
+cond.end:
+ %inc = add nuw nsw i32 %k, 1
+ %exitcond = icmp ne i32 %inc, 100
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+@extern_array = global [1024 x i32] poison, align 16
+
+; Test lowerToSingleIndexGEPs with a global variable pointer
+define void @test_A_sub_B_add_ConstantInt_gv_baseptr(ptr %p) {
+; CHECK-LABEL: @test_A_sub_B_add_ConstantInt_gv_baseptr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo()
+; CHECK-NEXT: [[REM:%.*]] = srem i32 [[TMP0]], 5
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[K:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[COND_END:%.*]] ]
+; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[K]], 5
+; CHECK-NEXT: [[SUB1:%.*]] = sub nsw i32 [[MUL]], [[REM]]
+; CHECK-NEXT: [[CMP26:%.*]] = icmp ult i32 [[SUB1]], 512
+; CHECK-NEXT: br i1 [[CMP26]], label [[COND_TRUE:%.*]], label [[COND_END]]
+; CHECK: cond.true:
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[MUL]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[REM]] to i64
+; CHECK-NEXT: [[SUB22:%.*]] = sub i64 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[SUB22]], 2
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr inbounds i8, ptr @extern_array, i64 2044
+; CHECK-NEXT: [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[UGLYGEP]], i64 [[TMP3]]
+; CHECK-NEXT: store float 1.000000e+00, ptr [[UGLYGEP3]], align 4
+; CHECK-NEXT: br label [[COND_END]]
+; CHECK: cond.end:
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[K]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 100
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = tail call i32 @foo()
+ %rem = srem i32 %0, 5
+ %add = add nsw i32 %rem , 511
+ br label %for.body
+
+for.body:
+ %k = phi i32 [ 0, %entry ], [ %inc, %cond.end ]
+ %mul = mul nuw nsw i32 %k, 5
+ %sub1 = sub nsw i32 %mul, %rem
+ %cmp26 = icmp ult i32 %sub1, 512
+ br i1 %cmp26, label %cond.true, label %cond.end
+
+cond.true:
+ %sub2 = sub nsw i32 %add, %mul
+ %idxprom = sext i32 %sub2 to i64
+ %arryidx = getelementptr inbounds float, ptr @extern_array, i64 %idxprom
+ store float 1.0, ptr %arryidx, align 4
+ br label %cond.end
+
+cond.end:
+ %inc = add nuw nsw i32 %k, 1
+ %exitcond = icmp ne i32 %inc, 100
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+; Test lowerToSingleIndexGEPs with a constant data variable pointer
+define void @test_A_sub_B_add_ConstantInt_null_basptr() {
+; CHECK-LABEL: @test_A_sub_B_add_ConstantInt_null_basptr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo()
+; CHECK-NEXT: [[REM:%.*]] = srem i32 [[TMP0]], 5
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[K:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[COND_END:%.*]] ]
+; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[K]], 5
+; CHECK-NEXT: [[SUB1:%.*]] = sub nsw i32 [[MUL]], [[REM]]
+; CHECK-NEXT: [[CMP26:%.*]] = icmp ult i32 [[SUB1]], 512
+; CHECK-NEXT: br i1 [[CMP26]], label [[COND_TRUE:%.*]], label [[COND_END]]
+; CHECK: cond.true:
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[MUL]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[REM]] to i64
+; CHECK-NEXT: [[SUB22:%.*]] = sub i64 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[SUB22]], 2
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr inbounds i8, ptr null, i64 2044
+; CHECK-NEXT: [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[UGLYGEP]], i64 [[TMP3]]
+; CHECK-NEXT: store float 1.000000e+00, ptr [[UGLYGEP3]], align 4
+; CHECK-NEXT: br label [[COND_END]]
+; CHECK: cond.end:
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[K]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 100
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = tail call i32 @foo()
+ %rem = srem i32 %0, 5
+ %add = add nsw i32 %rem , 511
+ br label %for.body
+
+for.body:
+ %k = phi i32 [ 0, %entry ], [ %inc, %cond.end ]
+ %mul = mul nuw nsw i32 %k, 5
+ %sub1 = sub nsw i32 %mul, %rem
+ %cmp26 = icmp ult i32 %sub1, 512
+ br i1 %cmp26, label %cond.true, label %cond.end
+
+cond.true:
+ %sub2 = sub nsw i32 %add, %mul
+ %idxprom = sext i32 %sub2 to i64
+ %arryidx = getelementptr inbounds float, ptr null, i64 %idxprom
+ store float 1.0, ptr %arryidx, align 4
+ br label %cond.end
+
+cond.end:
+ %inc = add nuw nsw i32 %k, 1
+ %exitcond = icmp ne i32 %inc, 100
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+declare i32 @foo()
+
+define amdgpu_kernel void @multi_use_in_loop(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i32 %arg2) {
+; CHECK-LABEL: @multi_use_in_loop(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[TMP:%.*]] = sext i32 [[ARG2:%.*]] to i64
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG:%.*]], i64 [[TMP]]
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[TMP3]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], 0
+; CHECK-NEXT: br i1 [[TMP5]], label [[BB6:%.*]], label [[BB8:%.*]]
+; CHECK: bb6:
+; CHECK-NEXT: br label [[BB11:%.*]]
+; CHECK: bb7:
+; CHECK-NEXT: br label [[BB8]]
+; CHECK: bb8:
+; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP30:%.*]], [[BB7:%.*]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG1:%.*]], i64 [[TMP]]
+; CHECK-NEXT: store i32 [[TMP9]], ptr addrspace(1) [[TMP10]], align 4
+; CHECK-NEXT: ret void
+; CHECK: bb11:
+; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP30]], [[BB22:%.*]] ], [ 0, [[BB6]] ]
+; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP25:%.*]], [[BB22]] ], [ 0, [[BB6]] ]
+; CHECK-NEXT: [[TMP14:%.*]] = srem i32 [[TMP13]], [[ARG2]]
+; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 [[TMP15]]
+; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(1) [[TMP16]], align 4
+; CHECK-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], 100
+; CHECK-NEXT: [[TMP19:%.*]] = sext i32 [[TMP13]] to i64
+; CHECK-NEXT: br i1 [[TMP18]], label [[BB20:%.*]], label [[BB22]]
+; CHECK: bb20:
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG1]], i64 [[TMP19]]
+; CHECK-NEXT: store i32 0, ptr addrspace(1) [[TMP21]], align 4
+; CHECK-NEXT: br label [[BB22]]
+; CHECK: bb22:
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 [[TMP19]]
+; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) [[TMP23]], align 4
+; CHECK-NEXT: [[TMP25]] = add nuw nsw i32 [[TMP13]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[TMP13]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 2
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr addrspace(1) [[ARG1]], i64 [[TMP1]]
+; CHECK-NEXT: [[UGLYGEP2:%.*]] = getelementptr i8, ptr addrspace(1) [[UGLYGEP]], i64 4
+; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(1) [[UGLYGEP2]], align 4
+; CHECK-NEXT: [[TMP29:%.*]] = add i32 [[TMP24]], [[TMP12]]
+; CHECK-NEXT: [[TMP30]] = add i32 [[TMP29]], [[TMP28]]
+; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[TMP25]], [[TMP4]]
+; CHECK-NEXT: br i1 [[TMP31]], label [[BB7]], label [[BB11]]
+;
+bb:
+ %tmp = sext i32 %arg2 to i64
+ %tmp3 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp
+ %tmp4 = load i32, ptr addrspace(1) %tmp3, align 4
+ %tmp5 = icmp sgt i32 %tmp4, 0
+ br i1 %tmp5, label %bb6, label %bb8
+
+bb6: ; preds = %bb
+ br label %bb11
+
+bb7: ; preds = %bb22
+ br label %bb8
+
+bb8: ; preds = %bb7, %bb
+ %tmp9 = phi i32 [ 0, %bb ], [ %tmp30, %bb7 ]
+ %tmp10 = getelementptr inbounds i32, ptr addrspace(1) %arg1, i64 %tmp
+ store i32 %tmp9, ptr addrspace(1) %tmp10, align 4
+ ret void
+
+bb11: ; preds = %bb22, %bb6
+ %tmp12 = phi i32 [ %tmp30, %bb22 ], [ 0, %bb6 ]
+ %tmp13 = phi i32 [ %tmp25, %bb22 ], [ 0, %bb6 ]
+ %tmp14 = srem i32 %tmp13, %arg2
+ %tmp15 = sext i32 %tmp14 to i64
+ %tmp16 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp15
+ %tmp17 = load i32, ptr addrspace(1) %tmp16, align 4
+ %tmp18 = icmp sgt i32 %tmp17, 100
+ %tmp19 = sext i32 %tmp13 to i64
+ br i1 %tmp18, label %bb20, label %bb22
+
+bb20: ; preds = %bb11
+ %tmp21 = getelementptr inbounds i32, ptr addrspace(1) %arg1, i64 %tmp19
+ store i32 0, ptr addrspace(1) %tmp21, align 4
+ br label %bb22
+
+bb22: ; preds = %bb20, %bb11
+ %tmp23 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp19
+ %tmp24 = load i32, ptr addrspace(1) %tmp23, align 4
+ %tmp25 = add nuw nsw i32 %tmp13, 1
+ %tmp26 = sext i32 %tmp25 to i64
+ %tmp27 = getelementptr inbounds i32, ptr addrspace(1) %arg1, i64 %tmp26
+ %tmp28 = load i32, ptr addrspace(1) %tmp27, align 4
+ %tmp29 = add i32 %tmp24, %tmp12
+ %tmp30 = add i32 %tmp29, %tmp28
+ %tmp31 = icmp eq i32 %tmp25, %tmp4
+ br i1 %tmp31, label %bb7, label %bb11
+}
+
+@extern_array_1 = external addrspace(1) global [4096 x i32], align 16
+
+@llvm.used = appending global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @extern_array_1 to ptr) ]
+
+define void @use_in_other_func() {
+; CHECK-LABEL: @use_in_other_func(
+; CHECK-NEXT: store i32 0, ptr addrspace(1) @extern_array_1, align 4
+; CHECK-NEXT: store i32 0, ptr addrspace(1) @extern_array_1, align 4
+; CHECK-NEXT: ret void
+;
+ store i32 0, ptr addrspace(1) @extern_array_1
+ store i32 0, ptr addrspace(1) @extern_array_1
+ ret void
+}
+
+define amdgpu_kernel void @multi_use_in_loop_global_base_address(ptr addrspace(1) nocapture readonly %arg, i32 %arg2) {
+; CHECK-LABEL: @multi_use_in_loop_global_base_address(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[TMP:%.*]] = sext i32 [[ARG2:%.*]] to i64
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG:%.*]], i64 [[TMP]]
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[TMP3]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], 0
+; CHECK-NEXT: br i1 [[TMP5]], label [[BB6:%.*]], label [[BB8:%.*]]
+; CHECK: bb6:
+; CHECK-NEXT: br label [[BB11:%.*]]
+; CHECK: bb7:
+; CHECK-NEXT: br label [[BB8]]
+; CHECK: bb8:
+; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP30:%.*]], [[BB7:%.*]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr addrspace(1) @extern_array_1, i64 [[TMP]]
+; CHECK-NEXT: store i32 [[TMP9]], ptr addrspace(1) [[TMP10]], align 4
+; CHECK-NEXT: ret void
+; CHECK: bb11:
+; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP30]], [[BB22:%.*]] ], [ 0, [[BB6]] ]
+; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP25:%.*]], [[BB22]] ], [ 0, [[BB6]] ]
+; CHECK-NEXT: [[TMP14:%.*]] = srem i32 [[TMP13]], [[ARG2]]
+; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr addrspace(1) @extern_array_1, i64 [[TMP15]]
+; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(1) [[TMP16]], align 4
+; CHECK-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], 100
+; CHECK-NEXT: [[TMP19:%.*]] = sext i32 [[TMP13]] to i64
+; CHECK-NEXT: br i1 [[TMP18]], label [[BB20:%.*]], label [[BB22]]
+; CHECK: bb20:
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr addrspace(1) @extern_array_1, i64 [[TMP19]]
+; CHECK-NEXT: store i32 0, ptr addrspace(1) [[TMP21]], align 4
+; CHECK-NEXT: br label [[BB22]]
+; CHECK: bb22:
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr addrspace(1) @extern_array_1, i64 [[TMP19]]
+; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) [[TMP23]], align 4
+; CHECK-NEXT: [[TMP25]] = add nuw nsw i32 [[TMP13]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[TMP13]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 2
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr addrspace(1) @extern_array_1, i64 [[TMP1]]
+; CHECK-NEXT: [[UGLYGEP2:%.*]] = getelementptr i8, ptr addrspace(1) [[UGLYGEP]], i64 4
+; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(1) [[UGLYGEP2]], align 4
+; CHECK-NEXT: [[TMP29:%.*]] = add i32 [[TMP24]], [[TMP12]]
+; CHECK-NEXT: [[TMP30]] = add i32 [[TMP29]], [[TMP28]]
+; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[TMP25]], [[TMP4]]
+; CHECK-NEXT: br i1 [[TMP31]], label [[BB7]], label [[BB11]]
+;
+bb:
+ %tmp = sext i32 %arg2 to i64
+ %tmp3 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp
+ %tmp4 = load i32, ptr addrspace(1) %tmp3, align 4
+ %tmp5 = icmp sgt i32 %tmp4, 0
+ br i1 %tmp5, label %bb6, label %bb8
+
+bb6: ; preds = %bb
+ br label %bb11
+
+bb7: ; preds = %bb22
+ br label %bb8
+
+bb8: ; preds = %bb7, %bb
+ %tmp9 = phi i32 [ 0, %bb ], [ %tmp30, %bb7 ]
+ %tmp1...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks
Merge activity
|
4ca3fdc
to
c642702
Compare
I didn't see any failures while trying to break hasMoreThanOneUseInLoop or other paths here.
c642702
to
65d3626
Compare
I didn't see any failures while trying to break hasMoreThanOneUseInLoop
or other paths here.