130 changes: 63 additions & 67 deletions llvm/test/Transforms/SROA/select-gep.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@ define i32 @test_sroa_select_gep(i1 %cond) {
bb:
%a = alloca %pair, align 4
%b = alloca %pair, align 4
%gep_a = getelementptr inbounds %pair, %pair* %a, i32 0, i32 1
%gep_b = getelementptr inbounds %pair, %pair* %b, i32 0, i32 1
store i32 1, i32* %gep_a, align 4
store i32 2, i32* %gep_b, align 4
%select = select i1 %cond, %pair* %a, %pair* %b
%gep = getelementptr inbounds %pair, %pair* %select, i32 0, i32 1
%load = load i32, i32* %gep, align 4
%gep_a = getelementptr inbounds %pair, ptr %a, i32 0, i32 1
%gep_b = getelementptr inbounds %pair, ptr %b, i32 0, i32 1
store i32 1, ptr %gep_a, align 4
store i32 2, ptr %gep_b, align 4
%select = select i1 %cond, ptr %a, ptr %b
%gep = getelementptr inbounds %pair, ptr %select, i32 0, i32 1
%load = load i32, ptr %gep, align 4
ret i32 %load
}

Expand All @@ -31,13 +31,13 @@ define i32 @test_sroa_select_gep_non_inbound(i1 %cond) {
bb:
%a = alloca %pair, align 4
%b = alloca %pair, align 4
%gep_a = getelementptr %pair, %pair* %a, i32 0, i32 1
%gep_b = getelementptr %pair, %pair* %b, i32 0, i32 1
store i32 1, i32* %gep_a, align 4
store i32 2, i32* %gep_b, align 4
%select = select i1 %cond, %pair* %a, %pair* %b
%gep = getelementptr %pair, %pair* %select, i32 0, i32 1
%load = load i32, i32* %gep, align 4
%gep_a = getelementptr %pair, ptr %a, i32 0, i32 1
%gep_b = getelementptr %pair, ptr %b, i32 0, i32 1
store i32 1, ptr %gep_a, align 4
store i32 2, ptr %gep_b, align 4
%select = select i1 %cond, ptr %a, ptr %b
%gep = getelementptr %pair, ptr %select, i32 0, i32 1
%load = load i32, ptr %gep, align 4
ret i32 %load
}

Expand All @@ -48,33 +48,30 @@ define i32 @test_sroa_select_gep_volatile_load(i1 %cond) {
; CHECK-NEXT: [[A_SROA_2:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B_SROA_0:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B_SROA_2:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 11, i32* [[A_SROA_0]], align 4
; CHECK-NEXT: store i32 12, i32* [[B_SROA_0]], align 4
; CHECK-NEXT: store i32 21, i32* [[A_SROA_2]], align 4
; CHECK-NEXT: store i32 22, i32* [[B_SROA_2]], align 4
; CHECK-NEXT: [[SELECT_SROA_SEL:%.*]] = select i1 [[COND:%.*]], i32* [[A_SROA_0]], i32* [[B_SROA_0]]
; CHECK-NEXT: [[LOAD1:%.*]] = load volatile i32, i32* [[SELECT_SROA_SEL]], align 4
; CHECK-NEXT: [[SELECT_SROA_SEL3:%.*]] = select i1 [[COND]], i32* [[A_SROA_2]], i32* [[B_SROA_2]]
; CHECK-NEXT: [[LOAD2:%.*]] = load volatile i32, i32* [[SELECT_SROA_SEL3]], align 4
; CHECK-NEXT: store i32 11, ptr [[A_SROA_0]], align 4
; CHECK-NEXT: store i32 12, ptr [[B_SROA_0]], align 4
; CHECK-NEXT: store i32 21, ptr [[A_SROA_2]], align 4
; CHECK-NEXT: store i32 22, ptr [[B_SROA_2]], align 4
; CHECK-NEXT: [[SELECT_SROA_SEL:%.*]] = select i1 [[COND:%.*]], ptr [[A_SROA_0]], ptr [[B_SROA_0]]
; CHECK-NEXT: [[LOAD1:%.*]] = load volatile i32, ptr [[SELECT_SROA_SEL]], align 4
; CHECK-NEXT: [[SELECT_SROA_SEL3:%.*]] = select i1 [[COND]], ptr [[A_SROA_2]], ptr [[B_SROA_2]]
; CHECK-NEXT: [[LOAD2:%.*]] = load volatile i32, ptr [[SELECT_SROA_SEL3]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD1]], [[LOAD2]]
; CHECK-NEXT: ret i32 [[ADD]]
;
bb:
%a = alloca %pair, align 4
%b = alloca %pair, align 4
%gep_a0 = getelementptr inbounds %pair, %pair* %a, i32 0, i32 0
%gep_b0 = getelementptr inbounds %pair, %pair* %b, i32 0, i32 0
store i32 11, i32* %gep_a0, align 4
store i32 12, i32* %gep_b0, align 4
%gep_a1 = getelementptr inbounds %pair, %pair* %a, i32 0, i32 1
%gep_b1 = getelementptr inbounds %pair, %pair* %b, i32 0, i32 1
store i32 21, i32* %gep_a1, align 4
store i32 22, i32* %gep_b1, align 4
%select = select i1 %cond, %pair* %a, %pair* %b
%gep1 = getelementptr inbounds %pair, %pair* %select, i32 0, i32 0
%load1 = load volatile i32, i32* %gep1, align 4
%gep2 = getelementptr inbounds %pair, %pair* %select, i32 0, i32 1
%load2 = load volatile i32, i32* %gep2, align 4
store i32 11, ptr %a, align 4
store i32 12, ptr %b, align 4
%gep_a1 = getelementptr inbounds %pair, ptr %a, i32 0, i32 1
%gep_b1 = getelementptr inbounds %pair, ptr %b, i32 0, i32 1
store i32 21, ptr %gep_a1, align 4
store i32 22, ptr %gep_b1, align 4
%select = select i1 %cond, ptr %a, ptr %b
%load1 = load volatile i32, ptr %select, align 4
%gep2 = getelementptr inbounds %pair, ptr %select, i32 0, i32 1
%load2 = load volatile i32, ptr %gep2, align 4
%add = add i32 %load1, %load2
ret i32 %add
}
Expand All @@ -83,15 +80,15 @@ define i32 @test_sroa_select_gep_poison(i1 %cond) {
; CHECK-LABEL: @test_sroa_select_gep_poison(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[SELECT_SROA_SEL:%.*]] = select i1 [[COND:%.*]], i32* [[A_SROA_0]], i32* poison
; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SELECT_SROA_SEL]], align 4
; CHECK-NEXT: [[SELECT_SROA_SEL:%.*]] = select i1 [[COND:%.*]], ptr [[A_SROA_0]], ptr poison
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[SELECT_SROA_SEL]], align 4
; CHECK-NEXT: ret i32 [[LOAD]]
;
bb:
%a = alloca %pair, align 4
%select = select i1 %cond, %pair* %a, %pair* poison
%gep = getelementptr inbounds %pair, %pair* %select, i32 0, i32 1
%load = load i32, i32* %gep, align 4
%select = select i1 %cond, ptr %a, ptr poison
%gep = getelementptr inbounds %pair, ptr %select, i32 0, i32 1
%load = load i32, ptr %gep, align 4
ret i32 %load
}

Expand All @@ -100,24 +97,23 @@ define i32 @test_sroa_gep_select_gep(i1 %cond) {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B_SROA_0:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 1, i32* [[A_SROA_0]], align 4
; CHECK-NEXT: store i32 2, i32* [[B_SROA_0]], align 4
; CHECK-NEXT: [[SELECT_SROA_SEL:%.*]] = select i1 [[COND:%.*]], i32* [[A_SROA_0]], i32* [[B_SROA_0]]
; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[COND]], i32* [[SELECT_SROA_SEL]], i32* [[A_SROA_0]]
; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SELECT2]], align 4
; CHECK-NEXT: store i32 1, ptr [[A_SROA_0]], align 4
; CHECK-NEXT: store i32 2, ptr [[B_SROA_0]], align 4
; CHECK-NEXT: [[SELECT_SROA_SEL:%.*]] = select i1 [[COND:%.*]], ptr [[A_SROA_0]], ptr [[B_SROA_0]]
; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[COND]], ptr [[SELECT_SROA_SEL]], ptr [[A_SROA_0]]
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[SELECT2]], align 4
; CHECK-NEXT: ret i32 [[LOAD]]
;
bb:
%a = alloca %pair, align 4
%b = alloca %pair, align 4
%gep_a = getelementptr inbounds %pair, %pair* %a, i32 0, i32 1
%gep_b = getelementptr inbounds %pair, %pair* %b, i32 0, i32 1
store i32 1, i32* %gep_a, align 4
store i32 2, i32* %gep_b, align 4
%select = select i1 %cond, i32* %gep_a, i32* %gep_b
%gep = getelementptr inbounds i32, i32* %select, i32 0
%select2 = select i1 %cond, i32* %gep, i32* %gep_a
%load = load i32, i32* %select2, align 4
%gep_a = getelementptr inbounds %pair, ptr %a, i32 0, i32 1
%gep_b = getelementptr inbounds %pair, ptr %b, i32 0, i32 1
store i32 1, ptr %gep_a, align 4
store i32 2, ptr %gep_b, align 4
%select = select i1 %cond, ptr %gep_a, ptr %gep_b
%select2 = select i1 %cond, ptr %select, ptr %gep_a
%load = load i32, ptr %select2, align 4
ret i32 %load
}

Expand All @@ -126,24 +122,24 @@ define i32 @test_sroa_gep_select_gep_nonconst_idx(i1 %cond, i32 %idx) {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[A:%.*]] = alloca [[PAIR:%.*]], align 4
; CHECK-NEXT: [[B:%.*]] = alloca [[PAIR]], align 4
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[A]], i32 0, i32 1
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[B]], i32 0, i32 1
; CHECK-NEXT: store i32 1, i32* [[GEP_A]], align 4
; CHECK-NEXT: store i32 2, i32* [[GEP_B]], align 4
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND:%.*]], %pair* [[A]], %pair* [[B]]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[SELECT]], i32 [[IDX:%.*]], i32 1
; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[GEP]], align 4
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [[PAIR]], ptr [[A]], i32 0, i32 1
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [[PAIR]], ptr [[B]], i32 0, i32 1
; CHECK-NEXT: store i32 1, ptr [[GEP_A]], align 4
; CHECK-NEXT: store i32 2, ptr [[GEP_B]], align 4
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND:%.*]], ptr [[A]], ptr [[B]]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[PAIR]], ptr [[SELECT]], i32 [[IDX:%.*]], i32 1
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
; CHECK-NEXT: ret i32 [[LOAD]]
;
bb:
%a = alloca %pair, align 4
%b = alloca %pair, align 4
%gep_a = getelementptr inbounds %pair, %pair* %a, i32 0, i32 1
%gep_b = getelementptr inbounds %pair, %pair* %b, i32 0, i32 1
store i32 1, i32* %gep_a, align 4
store i32 2, i32* %gep_b, align 4
%select = select i1 %cond, %pair* %a, %pair* %b
%gep = getelementptr inbounds %pair, %pair* %select, i32 %idx, i32 1
%load = load i32, i32* %gep, align 4
%gep_a = getelementptr inbounds %pair, ptr %a, i32 0, i32 1
%gep_b = getelementptr inbounds %pair, ptr %b, i32 0, i32 1
store i32 1, ptr %gep_a, align 4
store i32 2, ptr %gep_b, align 4
%select = select i1 %cond, ptr %a, ptr %b
%gep = getelementptr inbounds %pair, ptr %select, i32 %idx, i32 1
%load = load i32, ptr %gep, align 4
ret i32 %load
}
48 changes: 20 additions & 28 deletions llvm/test/Transforms/SROA/select-load.ll
Original file line number Diff line number Diff line change
Expand Up @@ -21,48 +21,40 @@ define <2 x i16> @test_load_bitcast_select(i1 %cond1, i1 %cond2) {
entry:
%true = alloca half, align 2
%false = alloca half, align 2
store half 0xHFFFF, half* %true, align 2
store half 0xH0000, half* %false, align 2
%false.cast = bitcast half* %false to %st.half*
%true.cast = bitcast half* %true to %st.half*
%sel1 = select i1 %cond1, %st.half* %true.cast, %st.half* %false.cast
%cast1 = bitcast %st.half* %sel1 to i16*
%ld1 = load i16, i16* %cast1, align 2
store half 0xHFFFF, ptr %true, align 2
store half 0xH0000, ptr %false, align 2
%sel1 = select i1 %cond1, ptr %true, ptr %false
%ld1 = load i16, ptr %sel1, align 2
%v1 = insertelement <2 x i16> poison, i16 %ld1, i32 0
%sel2 = select i1 %cond2, %st.half* %true.cast, %st.half* %false.cast
%cast2 = bitcast %st.half* %sel2 to i16*
%ld2 = load i16, i16* %cast2, align 2
%sel2 = select i1 %cond2, ptr %true, ptr %false
%ld2 = load i16, ptr %sel2, align 2
%v2 = insertelement <2 x i16> %v1, i16 %ld2, i32 1
ret <2 x i16> %v2
}

%st.args = type { i32, i32* }
%st.args = type { i32, ptr }

; A bitcasted load and a direct load of select.
define void @test_multiple_loads_select(i1 %cmp){
; CHECK-LABEL: @test_multiple_loads_select(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* undef to i8*
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* undef to i8*
; CHECK-NEXT: [[ADDR_I8_SROA_SPECULATED:%.*]] = select i1 [[CMP:%.*]], i8* [[TMP0]], i8* [[TMP1]]
; CHECK-NEXT: call void @foo_i8(i8* [[ADDR_I8_SROA_SPECULATED]])
; CHECK-NEXT: [[ADDR_I32_SROA_SPECULATED:%.*]] = select i1 [[CMP]], i32* undef, i32* undef
; CHECK-NEXT: call void @foo_i32(i32* [[ADDR_I32_SROA_SPECULATED]])
; CHECK-NEXT: [[ADDR_I8_SROA_SPECULATED:%.*]] = select i1 [[CMP:%.*]], ptr undef, ptr undef
; CHECK-NEXT: call void @foo_i8(ptr [[ADDR_I8_SROA_SPECULATED]])
; CHECK-NEXT: [[ADDR_I32_SROA_SPECULATED:%.*]] = select i1 [[CMP]], ptr undef, ptr undef
; CHECK-NEXT: call void @foo_i32(ptr [[ADDR_I32_SROA_SPECULATED]])
; CHECK-NEXT: ret void
;
entry:
%args = alloca [2 x %st.args], align 16
%arr0 = getelementptr inbounds [2 x %st.args], [2 x %st.args]* %args, i64 0, i64 0
%arr1 = getelementptr inbounds [2 x %st.args], [2 x %st.args]* %args, i64 0, i64 1
%sel = select i1 %cmp, %st.args* %arr1, %st.args* %arr0
%addr = getelementptr inbounds %st.args, %st.args* %sel, i64 0, i32 1
%bcast.i8 = bitcast i32** %addr to i8**
%addr.i8 = load i8*, i8** %bcast.i8, align 8
call void @foo_i8(i8* %addr.i8)
%addr.i32 = load i32*, i32** %addr, align 8
call void @foo_i32 (i32* %addr.i32)
%arr1 = getelementptr inbounds [2 x %st.args], ptr %args, i64 0, i64 1
%sel = select i1 %cmp, ptr %arr1, ptr %args
%addr = getelementptr inbounds %st.args, ptr %sel, i64 0, i32 1
%addr.i8 = load ptr, ptr %addr, align 8
call void @foo_i8(ptr %addr.i8)
%addr.i32 = load ptr, ptr %addr, align 8
call void @foo_i32 (ptr %addr.i32)
ret void
}

declare void @foo_i8(i8*)
declare void @foo_i32(i32*)
declare void @foo_i8(ptr)
declare void @foo_i32(ptr)
42 changes: 19 additions & 23 deletions llvm/test/Transforms/SROA/slice-order-independence.ll
Original file line number Diff line number Diff line change
@@ -1,37 +1,33 @@
; RUN: opt < %s -passes=sroa -S | FileCheck %s
target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"

declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
declare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) nounwind

; Check that the chosen type for a split is independent from the order of
; slices even in case of types that are skipped because their width is not a
; byte width multiple
define void @skipped_inttype_first({ i16*, i32 }*) {
define void @skipped_inttype_first(ptr) {
; CHECK-LABEL: @skipped_inttype_first
; CHECK: alloca i8*
%arg = alloca { i16*, i32 }, align 8
%2 = bitcast { i16*, i32 }* %0 to i8*
%3 = bitcast { i16*, i32 }* %arg to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %3, i8* align 8 %2, i32 16, i1 false)
%b = getelementptr inbounds { i16*, i32 }, { i16*, i32 }* %arg, i64 0, i32 0
%pb0 = bitcast i16** %b to i63*
%b0 = load i63, i63* %pb0
%pb1 = bitcast i16** %b to i8**
%b1 = load i8*, i8** %pb1
; CHECK: alloca ptr
%arg = alloca { ptr, i32 }, align 8
%2 = bitcast ptr %0 to ptr
%3 = bitcast ptr %arg to ptr
call void @llvm.memcpy.p0.p0.i32(ptr align 8 %3, ptr align 8 %2, i32 16, i1 false)
%b = getelementptr inbounds { ptr, i32 }, ptr %arg, i64 0, i32 0
%b0 = load i63, ptr %b
%b1 = load ptr, ptr %b
ret void
}

define void @skipped_inttype_last({ i16*, i32 }*) {
define void @skipped_inttype_last(ptr) {
; CHECK-LABEL: @skipped_inttype_last
; CHECK: alloca i8*
%arg = alloca { i16*, i32 }, align 8
%2 = bitcast { i16*, i32 }* %0 to i8*
%3 = bitcast { i16*, i32 }* %arg to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %3, i8* align 8 %2, i32 16, i1 false)
%b = getelementptr inbounds { i16*, i32 }, { i16*, i32 }* %arg, i64 0, i32 0
%pb1 = bitcast i16** %b to i8**
%b1 = load i8*, i8** %pb1
%pb0 = bitcast i16** %b to i63*
%b0 = load i63, i63* %pb0
; CHECK: alloca ptr
%arg = alloca { ptr, i32 }, align 8
%2 = bitcast ptr %0 to ptr
%3 = bitcast ptr %arg to ptr
call void @llvm.memcpy.p0.p0.i32(ptr align 8 %3, ptr align 8 %2, i32 16, i1 false)
%b = getelementptr inbounds { ptr, i32 }, ptr %arg, i64 0, i32 0
%b1 = load ptr, ptr %b
%b0 = load i63, ptr %b
ret void
}
103 changes: 46 additions & 57 deletions llvm/test/Transforms/SROA/slice-width.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,38 +2,36 @@
; RUN: opt < %s -passes=sroa -S | FileCheck %s
target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-f80:128-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"

declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
declare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) nounwind
declare void @llvm.memset.p0.i32(ptr nocapture, i8, i32, i1) nounwind
declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind

; This tests that allocas are not split into slices that are not byte width multiple
define void @no_split_on_non_byte_width(i32) {
; CHECK-LABEL: @no_split_on_non_byte_width(
; CHECK-NEXT: [[ARG_SROA_0:%.*]] = alloca i8, align 8
; CHECK-NEXT: [[ARG_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0:%.*]] to i8
; CHECK-NEXT: store i8 [[ARG_SROA_0_0_EXTRACT_TRUNC]], i8* [[ARG_SROA_0]], align 8
; CHECK-NEXT: store i8 [[ARG_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARG_SROA_0]], align 8
; CHECK-NEXT: [[ARG_SROA_3_0_EXTRACT_SHIFT:%.*]] = lshr i32 [[TMP0]], 8
; CHECK-NEXT: [[ARG_SROA_3_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[ARG_SROA_3_0_EXTRACT_SHIFT]] to i24
; CHECK-NEXT: br label [[LOAD_I32:%.*]]
; CHECK: load_i32:
; CHECK-NEXT: [[ARG_SROA_0_0_ARG_SROA_0_0_R01:%.*]] = load i8, i8* [[ARG_SROA_0]], align 8
; CHECK-NEXT: [[ARG_SROA_0_0_ARG_SROA_0_0_R01:%.*]] = load i8, ptr [[ARG_SROA_0]], align 8
; CHECK-NEXT: br label [[LOAD_I1:%.*]]
; CHECK: load_i1:
; CHECK-NEXT: [[ARG_SROA_0_0_P1_SROA_CAST4:%.*]] = bitcast i8* [[ARG_SROA_0]] to i1*
; CHECK-NEXT: [[ARG_SROA_0_0_ARG_SROA_0_0_T1:%.*]] = load i1, i1* [[ARG_SROA_0_0_P1_SROA_CAST4]], align 8
; CHECK-NEXT: [[ARG_SROA_0_0_ARG_SROA_0_0_T1:%.*]] = load i1, ptr [[ARG_SROA_0]], align 8
; CHECK-NEXT: ret void
;
%arg = alloca i32 , align 8
store i32 %0, i32* %arg
store i32 %0, ptr %arg
br label %load_i32

load_i32:
%r0 = load i32, i32* %arg
%r0 = load i32, ptr %arg
br label %load_i1

load_i1:
%p1 = bitcast i32* %arg to i1*
%t1 = load i1, i1* %p1
%t1 = load i1, ptr %arg
ret void
}

Expand All @@ -49,87 +47,81 @@ load_i1:
define void @memcpy_fp80_padding() {
; CHECK-LABEL: @memcpy_fp80_padding(
; CHECK-NEXT: [[X_SROA_0:%.*]] = alloca x86_fp80, align 16
; CHECK-NEXT: [[X_SROA_0_0_X_I8_SROA_CAST:%.*]] = bitcast x86_fp80* [[X_SROA_0]] to i8*
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[X_SROA_0_0_X_I8_SROA_CAST]], i8* align 16 bitcast (%union.Foo* @foo_copy_source to i8*), i32 16, i1 false)
; CHECK-NEXT: [[X_SROA_1_0_COPYLOAD:%.*]] = load i64, i64* getelementptr inbounds ([[UNION_FOO:%.*]], %union.Foo* @foo_copy_source, i64 0, i32 1), align 16
; CHECK-NEXT: [[X_SROA_2_0_COPYLOAD:%.*]] = load i64, i64* getelementptr inbounds ([[UNION_FOO]], %union.Foo* @foo_copy_source, i64 0, i32 2), align 8
; CHECK-NEXT: store i64 [[X_SROA_1_0_COPYLOAD]], i64* @i64_sink, align 4
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[X_SROA_0]], ptr align 16 @foo_copy_source, i32 16, i1 false)
; CHECK-NEXT: [[X_SROA_1_0_COPYLOAD:%.*]] = load i64, ptr getelementptr inbounds (i8, ptr @foo_copy_source, i64 16), align 16
; CHECK-NEXT: [[X_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr getelementptr inbounds (i8, ptr @foo_copy_source, i64 24), align 8
; CHECK-NEXT: store i64 [[X_SROA_1_0_COPYLOAD]], ptr @i64_sink, align 4
; CHECK-NEXT: ret void
;
%x = alloca %union.Foo

; Copy from a global.
%x_i8 = bitcast %union.Foo* %x to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %x_i8, i8* align 16 bitcast (%union.Foo* @foo_copy_source to i8*), i32 32, i1 false)
call void @llvm.memcpy.p0.p0.i32(ptr align 16 %x, ptr align 16 @foo_copy_source, i32 32, i1 false)

; Access a slice of the alloca to trigger SROA.
%mid_p = getelementptr %union.Foo, %union.Foo* %x, i32 0, i32 1
%elt = load i64, i64* %mid_p
store i64 %elt, i64* @i64_sink
%mid_p = getelementptr %union.Foo, ptr %x, i32 0, i32 1
%elt = load i64, ptr %mid_p
store i64 %elt, ptr @i64_sink
ret void
}

define void @memset_fp80_padding() {
; CHECK-LABEL: @memset_fp80_padding(
; CHECK-NEXT: [[X_SROA_0:%.*]] = alloca x86_fp80, align 16
; CHECK-NEXT: [[X_SROA_0_0_X_I8_SROA_CAST1:%.*]] = bitcast x86_fp80* [[X_SROA_0]] to i8*
; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 16 [[X_SROA_0_0_X_I8_SROA_CAST1]], i8 -1, i32 16, i1 false)
; CHECK-NEXT: store i64 -1, i64* @i64_sink, align 4
; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 16 [[X_SROA_0]], i8 -1, i32 16, i1 false)
; CHECK-NEXT: store i64 -1, ptr @i64_sink, align 4
; CHECK-NEXT: ret void
;
%x = alloca %union.Foo

; Set to all ones.
%x_i8 = bitcast %union.Foo* %x to i8*
call void @llvm.memset.p0i8.i32(i8* align 16 %x_i8, i8 -1, i32 32, i1 false)
call void @llvm.memset.p0.i32(ptr align 16 %x, i8 -1, i32 32, i1 false)

; Access a slice of the alloca to trigger SROA.
%mid_p = getelementptr %union.Foo, %union.Foo* %x, i32 0, i32 1
%elt = load i64, i64* %mid_p
store i64 %elt, i64* @i64_sink
%mid_p = getelementptr %union.Foo, ptr %x, i32 0, i32 1
%elt = load i64, ptr %mid_p
store i64 %elt, ptr @i64_sink
ret void
}

%S.vec3float = type { float, float, float }
%U.vec3float = type { <4 x float> }

declare i32 @memcpy_vec3float_helper(%S.vec3float*)
declare i32 @memcpy_vec3float_helper(ptr)

; PR18726: Check that SROA does not rewrite a 12-byte memcpy into a 16-byte
; vector store, hence accidentally putting gibberish onto the stack.
define i32 @memcpy_vec3float_widening(%S.vec3float* %x) {
define i32 @memcpy_vec3float_widening(ptr %x) {
; CHECK-LABEL: @memcpy_vec3float_widening(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP1_SROA_0_0_TMP1_SROA_0_0__SROA_CAST_SROA_CAST:%.*]] = bitcast %S.vec3float* [[X:%.*]] to <3 x float>*
; CHECK-NEXT: [[TMP1_SROA_0_0_COPYLOAD:%.*]] = load <3 x float>, <3 x float>* [[TMP1_SROA_0_0_TMP1_SROA_0_0__SROA_CAST_SROA_CAST]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = bitcast ptr [[X:%.*]] to ptr
; CHECK-NEXT: [[TMP1_SROA_0_0_COPYLOAD:%.*]] = load <3 x float>, ptr [[TMP0]], align 4
; CHECK-NEXT: [[TMP1_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <3 x float> [[TMP1_SROA_0_0_COPYLOAD]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
; CHECK-NEXT: [[TMP1_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x float> [[TMP1_SROA_0_0_VEC_EXPAND]], <4 x float> undef
; CHECK-NEXT: [[TMP2:%.*]] = alloca [[S_VEC3FLOAT:%.*]], align 4
; CHECK-NEXT: [[TMP1_SROA_0_0_TMP1_SROA_0_0__SROA_CAST2_SROA_CAST:%.*]] = bitcast %S.vec3float* [[TMP2]] to <3 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr [[TMP2]] to ptr
; CHECK-NEXT: [[TMP1_SROA_0_0_VEC_EXTRACT:%.*]] = shufflevector <4 x float> [[TMP1_SROA_0_0_VECBLEND]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
; CHECK-NEXT: store <3 x float> [[TMP1_SROA_0_0_VEC_EXTRACT]], <3 x float>* [[TMP1_SROA_0_0_TMP1_SROA_0_0__SROA_CAST2_SROA_CAST]], align 4
; CHECK-NEXT: [[RESULT:%.*]] = call i32 @memcpy_vec3float_helper(%S.vec3float* [[TMP2]])
; CHECK-NEXT: store <3 x float> [[TMP1_SROA_0_0_VEC_EXTRACT]], ptr [[TMP1]], align 4
; CHECK-NEXT: [[RESULT:%.*]] = call i32 @memcpy_vec3float_helper(ptr [[TMP2]])
; CHECK-NEXT: ret i32 [[RESULT]]
;
entry:
; Create a temporary variable %tmp1 and copy %x[0] into it
%tmp1 = alloca %S.vec3float, align 4
%0 = bitcast %S.vec3float* %tmp1 to i8*
%1 = bitcast %S.vec3float* %x to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 %1, i32 12, i1 false)
%0 = bitcast ptr %tmp1 to ptr
%1 = bitcast ptr %x to ptr
call void @llvm.memcpy.p0.p0.i32(ptr align 4 %0, ptr align 4 %1, i32 12, i1 false)

; The following block does nothing; but appears to confuse SROA
%unused1 = bitcast %S.vec3float* %tmp1 to %U.vec3float*
%unused2 = getelementptr inbounds %U.vec3float, %U.vec3float* %unused1, i32 0, i32 0
%unused3 = load <4 x float>, <4 x float>* %unused2, align 1
%unused3 = load <4 x float>, ptr %tmp1, align 1

; Create a second temporary and copy %tmp1 into it
%tmp2 = alloca %S.vec3float, align 4
%2 = bitcast %S.vec3float* %tmp2 to i8*
%3 = bitcast %S.vec3float* %tmp1 to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %2, i8* align 4 %3, i32 12, i1 false)
%2 = bitcast ptr %tmp2 to ptr
%3 = bitcast ptr %tmp1 to ptr
call void @llvm.memcpy.p0.p0.i32(ptr align 4 %2, ptr align 4 %3, i32 12, i1 false)

%result = call i32 @memcpy_vec3float_helper(%S.vec3float* %tmp2)
%result = call i32 @memcpy_vec3float_helper(ptr %tmp2)
ret i32 %result
}

Expand All @@ -138,11 +130,11 @@ entry:
define void @PR50888() {
; CHECK-LABEL: @PR50888(
; CHECK-NEXT: [[ARRAY:%.*]] = alloca i8, align 1
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[ARRAY]], i8 0, i64 ptrtoint (void ()* @PR50888 to i64), i1 false)
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[ARRAY]], i8 0, i64 ptrtoint (ptr @PR50888 to i64), i1 false)
; CHECK-NEXT: ret void
;
%array = alloca i8
call void @llvm.memset.p0i8.i64(i8* align 16 %array, i8 0, i64 ptrtoint (void ()* @PR50888 to i64), i1 false)
call void @llvm.memset.p0.i64(ptr align 16 %array, i8 0, i64 ptrtoint (ptr @PR50888 to i64), i1 false)
ret void
}

Expand All @@ -151,26 +143,23 @@ define void @PR50888() {
define void @PR50910() {
; CHECK-LABEL: @PR50910(
; CHECK-NEXT: [[T1:%.*]] = alloca i8, i64 1, align 8
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[T1]], i8 0, i64 1, i1 false)
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[T1]], i8 0, i64 1, i1 false)
; CHECK-NEXT: ret void
;
%t1 = alloca i8, i64 1, align 8
call void @llvm.memset.p0i8.i64(i8* align 8 %t1, i8 0, i64 4294967296, i1 false)
call void @llvm.memset.p0.i64(ptr align 8 %t1, i8 0, i64 4294967296, i1 false)
ret void
}

define i1 @presplit_overlarge_load() {
; CHECK-LABEL: @presplit_overlarge_load(
; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i8, align 2
; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_L11:%.*]] = load i8, i8* [[A_SROA_0]], align 2
; CHECK-NEXT: [[A_SROA_0_0_A_1_SROA_CAST3:%.*]] = bitcast i8* [[A_SROA_0]] to i1*
; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_L2:%.*]] = load i1, i1* [[A_SROA_0_0_A_1_SROA_CAST3]], align 2
; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_L11:%.*]] = load i8, ptr [[A_SROA_0]], align 2
; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_L2:%.*]] = load i1, ptr [[A_SROA_0]], align 2
; CHECK-NEXT: ret i1 [[A_SROA_0_0_A_SROA_0_0_L2]]
;
%A = alloca i16
%A.32 = bitcast i16* %A to i32*
%A.1 = bitcast i16* %A to i1*
%L1 = load i32, i32* %A.32
%L2 = load i1, i1* %A.1
%L1 = load i32, ptr %A
%L2 = load i1, ptr %A
ret i1 %L2
}
16 changes: 8 additions & 8 deletions llvm/test/Transforms/SROA/std-clamp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ define float @_Z8stdclampfff(float %x, float %lo, float %hi) {
; CHECK-LABEL: @_Z8stdclampfff(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[I4:%.*]] = alloca float, align 4
; CHECK-NEXT: store float [[HI:%.*]], float* [[I4]], align 4
; CHECK-NEXT: store float [[HI:%.*]], ptr [[I4]], align 4
; CHECK-NEXT: [[I5:%.*]] = fcmp fast olt float [[X:%.*]], [[LO:%.*]]
; CHECK-NEXT: [[I6:%.*]] = fcmp fast olt float [[HI]], [[X]]
; CHECK-NEXT: [[I9_SROA_SPECULATE_LOAD_FALSE_SROA_SPECULATE_LOAD_TRUE:%.*]] = load float, float* [[I4]], align 4
; CHECK-NEXT: [[I9_SROA_SPECULATE_LOAD_FALSE_SROA_SPECULATE_LOAD_TRUE:%.*]] = load float, ptr [[I4]], align 4
; CHECK-NEXT: [[I9_SROA_SPECULATE_LOAD_FALSE_SROA_SPECULATED:%.*]] = select i1 [[I6]], float [[I9_SROA_SPECULATE_LOAD_FALSE_SROA_SPECULATE_LOAD_TRUE]], float [[X]]
; CHECK-NEXT: [[I9_SROA_SPECULATED:%.*]] = select i1 [[I5]], float [[LO]], float [[I9_SROA_SPECULATE_LOAD_FALSE_SROA_SPECULATED]]
; CHECK-NEXT: ret float [[I9_SROA_SPECULATED]]
Expand All @@ -19,13 +19,13 @@ bb:
%i = alloca float, align 4
%i3 = alloca float, align 4
%i4 = alloca float, align 4
store float %x, float* %i, align 4
store float %lo, float* %i3, align 4
store float %hi, float* %i4, align 4
store float %x, ptr %i, align 4
store float %lo, ptr %i3, align 4
store float %hi, ptr %i4, align 4
%i5 = fcmp fast olt float %x, %lo
%i6 = fcmp fast olt float %hi, %x
%i7 = select i1 %i6, float* %i4, float* %i
%i8 = select i1 %i5, float* %i3, float* %i7
%i9 = load float, float* %i8, align 4
%i7 = select i1 %i6, ptr %i4, ptr %i
%i8 = select i1 %i5, ptr %i3, ptr %i7
%i9 = load float, ptr %i8, align 4
ret float %i9
}
20 changes: 8 additions & 12 deletions llvm/test/Transforms/SROA/tbaa-struct.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,19 @@
; SROA should keep `!tbaa.struct` metadata

%vector = type { float, float }
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* writeonly, i8* readonly, i64, i1 immarg)
declare <2 x float> @foo(%vector* %0)
declare void @llvm.memcpy.p0.p0.i64(ptr writeonly, ptr readonly, i64, i1 immarg)
declare <2 x float> @foo(ptr %0)

define void @bar(%vector* %y2) {
define void @bar(ptr %y2) {
; CHECK-LABEL: @bar(
; CHECK-NEXT: [[X14:%.*]] = call <2 x float> @foo(%vector* [[Y2:%.*]])
; CHECK-NEXT: [[X7_SROA_0_0_X18_SROA_CAST:%.*]] = bitcast %vector* [[Y2]] to <2 x float>*
; CHECK-NEXT: store <2 x float> [[X14]], <2 x float>* [[X7_SROA_0_0_X18_SROA_CAST]], align 4, !tbaa.struct !0
; CHECK-NEXT: [[X14:%.*]] = call <2 x float> @foo(ptr [[Y2:%.*]])
; CHECK-NEXT: store <2 x float> [[X14]], ptr [[Y2]], align 4, !tbaa.struct !0
; CHECK-NEXT: ret void
;
%x7 = alloca %vector
%x14 = call <2 x float> @foo(%vector* %y2)
%x15 = bitcast %vector* %x7 to <2 x float>*
store <2 x float> %x14, <2 x float>* %x15
%x19 = bitcast %vector* %x7 to i8*
%x18 = bitcast %vector* %y2 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %x18, i8* align 4 %x19, i64 8, i1 false), !tbaa.struct !10
%x14 = call <2 x float> @foo(ptr %y2)
store <2 x float> %x14, ptr %x7
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %y2, ptr align 4 %x7, i64 8, i1 false), !tbaa.struct !10
ret void
}

Expand Down
30 changes: 12 additions & 18 deletions llvm/test/Transforms/SROA/tbaa-struct2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,15 @@
; SROA should correctly offset `!tbaa.struct` metadata

%struct.Wishart = type { double, i32 }
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* writeonly, i8* readonly, i64, i1 immarg)
declare void @llvm.memcpy.p0.p0.i64(ptr writeonly, ptr readonly, i64, i1 immarg)
declare double @subcall(double %g, i32 %m)

define double @bar(%struct.Wishart* %wishart) {
define double @bar(ptr %wishart) {
%tmp = alloca %struct.Wishart, align 8
%tmpaddr = bitcast %struct.Wishart* %tmp to i8*
%waddr = bitcast %struct.Wishart* %wishart to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmpaddr, i8* align 8 %waddr, i64 16, i1 false), !tbaa.struct !2
%gamma = getelementptr inbounds %struct.Wishart, %struct.Wishart* %tmp, i32 0, i32 0
%lg = load double, double* %gamma, align 8, !tbaa !4
%m = getelementptr inbounds %struct.Wishart, %struct.Wishart* %tmp, i32 0, i32 1
%lm = load i32, i32* %m, align 8, !tbaa !8
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %wishart, i64 16, i1 false), !tbaa.struct !2
%lg = load double, ptr %tmp, align 8, !tbaa !4
%m = getelementptr inbounds %struct.Wishart, ptr %tmp, i32 0, i32 1
%lm = load i32, ptr %m, align 8, !tbaa !8
%call = call double @subcall(double %lg, i32 %lm)
ret double %call
}
Expand All @@ -27,16 +24,13 @@ define double @bar(%struct.Wishart* %wishart) {
!7 = !{!8, !8, i64 0}
!8 = !{!"int", !5, i64 0}

; CHECK: define double @bar(%struct.Wishart* %wishart) {
; CHECK: define double @bar(ptr %wishart) {
; CHECK-NEXT: %tmp.sroa.3 = alloca [4 x i8], align 4
; CHECK-NEXT: %tmp.sroa.0.0.waddr.sroa_idx = getelementptr inbounds %struct.Wishart, %struct.Wishart* %wishart, i64 0, i32 0
; CHECK-NEXT: %tmp.sroa.0.0.copyload = load double, double* %tmp.sroa.0.0.waddr.sroa_idx, align 8, !tbaa.struct !0
; CHECK-NEXT: %tmp.sroa.2.0.waddr.sroa_idx1 = getelementptr inbounds %struct.Wishart, %struct.Wishart* %wishart, i64 0, i32 1
; CHECK-NEXT: %tmp.sroa.2.0.copyload = load i32, i32* %tmp.sroa.2.0.waddr.sroa_idx1, align 8, !tbaa.struct !7
; CHECK-NEXT: %tmp.sroa.3.0.waddr.sroa_raw_cast = bitcast %struct.Wishart* %wishart to i8*
; CHECK-NEXT: %tmp.sroa.3.0.waddr.sroa_raw_idx = getelementptr inbounds i8, i8* %tmp.sroa.3.0.waddr.sroa_raw_cast, i64 12
; CHECK-NEXT: %[[sroa_idx:.+]] = getelementptr inbounds [4 x i8], [4 x i8]* %tmp.sroa.3, i64 0, i64 0
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %[[sroa_idx]], i8* align 4 %tmp.sroa.3.0.waddr.sroa_raw_idx, i64 4, i1 false), !tbaa.struct !8
; CHECK-NEXT: %tmp.sroa.0.0.copyload = load double, ptr %wishart, align 8, !tbaa.struct !0
; CHECK-NEXT: %tmp.sroa.2.0.wishart.sroa_idx = getelementptr inbounds i8, ptr %wishart, i64 8
; CHECK-NEXT: %tmp.sroa.2.0.copyload = load i32, ptr %tmp.sroa.2.0.wishart.sroa_idx, align 8, !tbaa.struct !7
; CHECK-NEXT: %tmp.sroa.3.0.wishart.sroa_idx = getelementptr inbounds i8, ptr %wishart, i64 12
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %tmp.sroa.3, ptr align 4 %tmp.sroa.3.0.wishart.sroa_idx, i64 4, i1 false), !tbaa.struct !8
; CHECK-NEXT: %call = call double @subcall(double %tmp.sroa.0.0.copyload, i32 %tmp.sroa.2.0.copyload)
; CHECK-NEXT: ret double %call
; CHECK-NEXT: }
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/Transforms/SROA/tbaa-subload.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,23 @@ define void @caller() {
; CHECK-LABEL: @caller(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[AGG:%.*]] = alloca [[CLASS_ANON:%.*]], align 8
; CHECK-NEXT: [[OFF:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[AGG]], i32 0, i32 2
; CHECK-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* [[OFF]], i32 0, i32 0
; CHECK-NEXT: store i64 1, i64* [[DOTFCA_0_GEP]], align 8, !tbaa [[TBAA0:![0-9]+]]
; CHECK-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* [[OFF]], i32 0, i32 1
; CHECK-NEXT: store i64 2, i64* [[DOTFCA_1_GEP]], align 8, !tbaa [[TBAA0]]
; CHECK-NEXT: call void @use(%class.anon* [[AGG]])
; CHECK-NEXT: [[OFF:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[AGG]], i32 0, i32 2
; CHECK-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds { i64, i64 }, ptr [[OFF]], i32 0, i32 0
; CHECK-NEXT: store i64 1, ptr [[DOTFCA_0_GEP]], align 8, !tbaa [[TBAA0:![0-9]+]]
; CHECK-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds { i64, i64 }, ptr [[OFF]], i32 0, i32 1
; CHECK-NEXT: store i64 2, ptr [[DOTFCA_1_GEP]], align 8, !tbaa [[TBAA0]]
; CHECK-NEXT: call void @use(ptr [[AGG]])
; CHECK-NEXT: ret void
;
entry:
%agg = alloca %class.anon, align 8
%off = getelementptr inbounds %class.anon, %class.anon* %agg, i32 0, i32 2
store { i64, i64 } { i64 1, i64 2 }, { i64, i64 }* %off, align 8, !tbaa !7
call void @use(%class.anon* %agg)
%off = getelementptr inbounds %class.anon, ptr %agg, i32 0, i32 2
store { i64, i64 } { i64 1, i64 2 }, ptr %off, align 8, !tbaa !7
call void @use(ptr %agg)
ret void
}

declare void @use(%class.anon* %this)
declare void @use(ptr %this)

!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C++ TBAA"}
Expand Down
45 changes: 20 additions & 25 deletions llvm/test/Transforms/SROA/vector-conversion.ll
Original file line number Diff line number Diff line change
@@ -1,48 +1,45 @@
; RUN: opt < %s -passes=sroa -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"

define <4 x i64> @vector_ptrtoint({<2 x i32*>, <2 x i32*>} %x) {
define <4 x i64> @vector_ptrtoint({<2 x ptr>, <2 x ptr>} %x) {
; CHECK-LABEL: @vector_ptrtoint
%a = alloca {<2 x i32*>, <2 x i32*>}
%a = alloca {<2 x ptr>, <2 x ptr>}
; CHECK-NOT: alloca

store {<2 x i32*>, <2 x i32*>} %x, {<2 x i32*>, <2 x i32*>}* %a
store {<2 x ptr>, <2 x ptr>} %x, ptr %a
; CHECK-NOT: store

%cast = bitcast {<2 x i32*>, <2 x i32*>}* %a to <4 x i64>*
%vec = load <4 x i64>, <4 x i64>* %cast
%vec = load <4 x i64>, ptr %a
; CHECK-NOT: load
; CHECK: ptrtoint

ret <4 x i64> %vec
}

define <4 x i32*> @vector_inttoptr({<2 x i64>, <2 x i64>} %x) {
define <4 x ptr> @vector_inttoptr({<2 x i64>, <2 x i64>} %x) {
; CHECK-LABEL: @vector_inttoptr
%a = alloca {<2 x i64>, <2 x i64>}
; CHECK-NOT: alloca

store {<2 x i64>, <2 x i64>} %x, {<2 x i64>, <2 x i64>}* %a
store {<2 x i64>, <2 x i64>} %x, ptr %a
; CHECK-NOT: store

%cast = bitcast {<2 x i64>, <2 x i64>}* %a to <4 x i32*>*
%vec = load <4 x i32*>, <4 x i32*>* %cast
%vec = load <4 x ptr>, ptr %a
; CHECK-NOT: load
; CHECK: inttoptr

ret <4 x i32*> %vec
ret <4 x ptr> %vec
}

define <2 x i64> @vector_ptrtointbitcast({<1 x i32*>, <1 x i32*>} %x) {
define <2 x i64> @vector_ptrtointbitcast({<1 x ptr>, <1 x ptr>} %x) {
; CHECK-LABEL: @vector_ptrtointbitcast(
%a = alloca {<1 x i32*>, <1 x i32*>}
%a = alloca {<1 x ptr>, <1 x ptr>}
; CHECK-NOT: alloca

store {<1 x i32*>, <1 x i32*>} %x, {<1 x i32*>, <1 x i32*>}* %a
store {<1 x ptr>, <1 x ptr>} %x, ptr %a
; CHECK-NOT: store

%cast = bitcast {<1 x i32*>, <1 x i32*>}* %a to <2 x i64>*
%vec = load <2 x i64>, <2 x i64>* %cast
%vec = load <2 x i64>, ptr %a
; CHECK-NOT: load
; CHECK: ptrtoint
; CHECK: bitcast
Expand All @@ -52,35 +49,33 @@ define <2 x i64> @vector_ptrtointbitcast({<1 x i32*>, <1 x i32*>} %x) {
ret <2 x i64> %vec
}

define <2 x i8*> @vector_inttoptrbitcast_vector({<16 x i8>, <16 x i8>} %x) {
define <2 x ptr> @vector_inttoptrbitcast_vector({<16 x i8>, <16 x i8>} %x) {
; CHECK-LABEL: @vector_inttoptrbitcast_vector(
%a = alloca {<16 x i8>, <16 x i8>}
; CHECK-NOT: alloca

store {<16 x i8>, <16 x i8>} %x, {<16 x i8>, <16 x i8>}* %a
store {<16 x i8>, <16 x i8>} %x, ptr %a
; CHECK-NOT: store

%cast = bitcast {<16 x i8>, <16 x i8>}* %a to <2 x i8*>*
%vec = load <2 x i8*>, <2 x i8*>* %cast
%vec = load <2 x ptr>, ptr %a
; CHECK-NOT: load
; CHECK: extractvalue
; CHECK: extractvalue
; CHECK: bitcast
; CHECK: inttoptr

ret <2 x i8*> %vec
ret <2 x ptr> %vec
}

define <16 x i8> @vector_ptrtointbitcast_vector({<2 x i8*>, <2 x i8*>} %x) {
define <16 x i8> @vector_ptrtointbitcast_vector({<2 x ptr>, <2 x ptr>} %x) {
; CHECK-LABEL: @vector_ptrtointbitcast_vector(
%a = alloca {<2 x i8*>, <2 x i8*>}
%a = alloca {<2 x ptr>, <2 x ptr>}
; CHECK-NOT: alloca

store {<2 x i8*>, <2 x i8*>} %x, {<2 x i8*>, <2 x i8*>}* %a
store {<2 x ptr>, <2 x ptr>} %x, ptr %a
; CHECK-NOT: store

%cast = bitcast {<2 x i8*>, <2 x i8*>}* %a to <16 x i8>*
%vec = load <16 x i8>, <16 x i8>* %cast
%vec = load <16 x i8>, ptr %a
; CHECK-NOT: load
; CHECK: extractvalue
; CHECK: ptrtoint
Expand Down
15 changes: 6 additions & 9 deletions llvm/test/Transforms/SROA/vector-lifetime-intrinsic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,21 @@
target datalayout = "e-p:64:32-i64:32-v32:32-n32-S64"

; Function Attrs: nounwind
declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #0
declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #0

; Function Attrs: nounwind
declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #0
declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #0

; CHECK: @wombat
; CHECK-NOT: alloca
; CHECK: ret void
define void @wombat(<4 x float> %arg1) {
bb:
%tmp = alloca <4 x float>, align 16
%tmp8 = bitcast <4 x float>* %tmp to i8*
call void @llvm.lifetime.start.p0i8(i64 16, i8* %tmp8)
store <4 x float> %arg1, <4 x float>* %tmp, align 16
%tmp17 = bitcast <4 x float>* %tmp to <3 x float>*
%tmp18 = load <3 x float>, <3 x float>* %tmp17
%tmp20 = bitcast <4 x float>* %tmp to i8*
call void @llvm.lifetime.end.p0i8(i64 16, i8* %tmp20)
call void @llvm.lifetime.start.p0(i64 16, ptr %tmp)
store <4 x float> %arg1, ptr %tmp, align 16
%tmp18 = load <3 x float>, ptr %tmp
call void @llvm.lifetime.end.p0(i64 16, ptr %tmp)
call void @wombat3(<3 x float> %tmp18)
ret void
}
Expand Down
20 changes: 8 additions & 12 deletions llvm/test/Transforms/SROA/vector-promotion-different-size.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,27 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
define <4 x i1> @vector_bitcast() {
; CHECK-LABEL: @vector_bitcast(
; CHECK-NEXT: [[A:%.*]] = alloca <3 x i1>, align 1
; CHECK-NEXT: store <3 x i1> <i1 true, i1 false, i1 true>, <3 x i1>* [[A]], align 1
; CHECK-NEXT: [[A_0_CAST_SROA_CAST:%.*]] = bitcast <3 x i1>* [[A]] to <4 x i1>*
; CHECK-NEXT: [[A_0_VEC:%.*]] = load <4 x i1>, <4 x i1>* [[A_0_CAST_SROA_CAST]], align 1
; CHECK-NEXT: store <3 x i1> <i1 true, i1 false, i1 true>, ptr [[A]], align 1
; CHECK-NEXT: [[A_0_VEC:%.*]] = load <4 x i1>, ptr [[A]], align 1
; CHECK-NEXT: ret <4 x i1> [[A_0_VEC]]
;

%a = alloca <3 x i1>
store <3 x i1> <i1 1,i1 0,i1 1>, <3 x i1>* %a
%cast = bitcast <3 x i1>* %a to <4 x i1>*
%vec = load <4 x i1>, <4 x i1>* %cast
store <3 x i1> <i1 1,i1 0,i1 1>, ptr %a
%vec = load <4 x i1>, ptr %a
ret <4 x i1> %vec
}

define <64 x i16> @vector_bitcast_2(<32 x i16> %v) {
; CHECK-LABEL: @vector_bitcast_2(
; CHECK-NEXT: [[P:%.*]] = alloca <32 x i16>, align 64
; CHECK-NEXT: store <32 x i16> [[V:%.*]], <32 x i16>* [[P]], align 64
; CHECK-NEXT: [[P_0_Q_SROA_CAST:%.*]] = bitcast <32 x i16>* [[P]] to <64 x i16>*
; CHECK-NEXT: [[P_0_LOAD:%.*]] = load <64 x i16>, <64 x i16>* [[P_0_Q_SROA_CAST]], align 64
; CHECK-NEXT: store <32 x i16> [[V:%.*]], ptr [[P]], align 64
; CHECK-NEXT: [[P_0_LOAD:%.*]] = load <64 x i16>, ptr [[P]], align 64
; CHECK-NEXT: ret <64 x i16> [[P_0_LOAD]]
;

%p = alloca <32 x i16>
store <32 x i16> %v, <32 x i16>* %p
%q = bitcast <32 x i16>* %p to <64 x i16>*
%load = load <64 x i16>, <64 x i16>* %q
store <32 x i16> %v, ptr %p
%load = load <64 x i16>, ptr %p
ret <64 x i16> %load
}
403 changes: 170 additions & 233 deletions llvm/test/Transforms/SROA/vector-promotion.ll

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions llvm/test/Transforms/SROA/vectors-of-pointers.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ target triple = "x86_64-apple-macosx10.8.0"

define void @foo(i1 %c1, i1 %c2) {
entry:
%Args.i = alloca <2 x i32*>, align 16
%Args.i = alloca <2 x ptr>, align 16
br i1 %c1, label %bb0.exit158, label %if.then.i.i.i.i.i138

if.then.i.i.i.i.i138:
Expand All @@ -20,6 +20,6 @@ if.then.i.i.i.i.i237:
unreachable

bb0.exit257:
%0 = load <2 x i32*>, <2 x i32*>* %Args.i, align 16
%0 = load <2 x ptr>, ptr %Args.i, align 16
ret void
}