208 changes: 208 additions & 0 deletions llvm/test/Transforms/InstCombine/insertelt-trunc.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=instcombine -S -data-layout="E" | FileCheck %s --check-prefixes=ALL
; RUN: opt < %s -passes=instcombine -S -data-layout="e" | FileCheck %s --check-prefixes=ALL


define <4 x i16> @insert_01_poison_v4i16(i32 %x) {
; ALL-LABEL: @insert_01_poison_v4i16(
; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; ALL-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16
; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16
; ALL-NEXT: [[INS0:%.*]] = insertelement <4 x i16> poison, i16 [[LO16]], i64 0
; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i16> [[INS0]], i16 [[HI16]], i64 1
; ALL-NEXT: ret <4 x i16> [[INS1]]
;
%hi32 = lshr i32 %x, 16
%hi16 = trunc i32 %hi32 to i16
%lo16 = trunc i32 %x to i16
%ins0 = insertelement <4 x i16> poison, i16 %lo16, i64 0
%ins1 = insertelement <4 x i16> %ins0, i16 %hi16, i64 1
ret <4 x i16> %ins1
}

define <8 x i16> @insert_10_poison_v8i16(i32 %x) {
; ALL-LABEL: @insert_10_poison_v8i16(
; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; ALL-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16
; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16
; ALL-NEXT: [[INS0:%.*]] = insertelement <8 x i16> poison, i16 [[LO16]], i64 1
; ALL-NEXT: [[INS1:%.*]] = insertelement <8 x i16> [[INS0]], i16 [[HI16]], i64 0
; ALL-NEXT: ret <8 x i16> [[INS1]]
;
%hi32 = lshr i32 %x, 16
%hi16 = trunc i32 %hi32 to i16
%lo16 = trunc i32 %x to i16
%ins0 = insertelement <8 x i16> poison, i16 %lo16, i64 1
%ins1 = insertelement <8 x i16> %ins0, i16 %hi16, i64 0
ret <8 x i16> %ins1
}

define <4 x i32> @insert_12_poison_v4i32(i64 %x) {
; ALL-LABEL: @insert_12_poison_v4i32(
; ALL-NEXT: [[HI64:%.*]] = lshr i64 [[X:%.*]], 32
; ALL-NEXT: [[HI32:%.*]] = trunc i64 [[HI64]] to i32
; ALL-NEXT: [[LO32:%.*]] = trunc i64 [[X]] to i32
; ALL-NEXT: [[INS0:%.*]] = insertelement <4 x i32> poison, i32 [[LO32]], i64 1
; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i32> [[INS0]], i32 [[HI32]], i64 2
; ALL-NEXT: ret <4 x i32> [[INS1]]
;
%hi64 = lshr i64 %x, 32
%hi32 = trunc i64 %hi64 to i32
%lo32 = trunc i64 %x to i32
%ins0 = insertelement <4 x i32> poison, i32 %lo32, i64 1
%ins1 = insertelement <4 x i32> %ins0, i32 %hi32, i64 2
ret <4 x i32> %ins1
}

define <4 x i16> @insert_21_poison_v4i16(i32 %x) {
; ALL-LABEL: @insert_21_poison_v4i16(
; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; ALL-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16
; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16
; ALL-NEXT: [[INS0:%.*]] = insertelement <4 x i16> poison, i16 [[LO16]], i64 2
; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i16> [[INS0]], i16 [[HI16]], i64 1
; ALL-NEXT: ret <4 x i16> [[INS1]]
;
%hi32 = lshr i32 %x, 16
%hi16 = trunc i32 %hi32 to i16
%lo16 = trunc i32 %x to i16
%ins0 = insertelement <4 x i16> poison, i16 %lo16, i64 2
%ins1 = insertelement <4 x i16> %ins0, i16 %hi16, i64 1
ret <4 x i16> %ins1
}

define <4 x i32> @insert_23_poison_v4i32(i64 %x) {
; ALL-LABEL: @insert_23_poison_v4i32(
; ALL-NEXT: [[HI64:%.*]] = lshr i64 [[X:%.*]], 32
; ALL-NEXT: [[HI32:%.*]] = trunc i64 [[HI64]] to i32
; ALL-NEXT: [[LO32:%.*]] = trunc i64 [[X]] to i32
; ALL-NEXT: [[INS0:%.*]] = insertelement <4 x i32> poison, i32 [[LO32]], i64 2
; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i32> [[INS0]], i32 [[HI32]], i64 3
; ALL-NEXT: ret <4 x i32> [[INS1]]
;
%hi64 = lshr i64 %x, 32
%hi32 = trunc i64 %hi64 to i32
%lo32 = trunc i64 %x to i32
%ins0 = insertelement <4 x i32> poison, i32 %lo32, i64 2
%ins1 = insertelement <4 x i32> %ins0, i32 %hi32, i64 3
ret <4 x i32> %ins1
}

define <4 x i16> @insert_32_poison_v4i16(i32 %x) {
; ALL-LABEL: @insert_32_poison_v4i16(
; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; ALL-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16
; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16
; ALL-NEXT: [[INS0:%.*]] = insertelement <4 x i16> poison, i16 [[LO16]], i64 3
; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i16> [[INS0]], i16 [[HI16]], i64 2
; ALL-NEXT: ret <4 x i16> [[INS1]]
;
%hi32 = lshr i32 %x, 16
%hi16 = trunc i32 %hi32 to i16
%lo16 = trunc i32 %x to i16
%ins0 = insertelement <4 x i16> poison, i16 %lo16, i64 3
%ins1 = insertelement <4 x i16> %ins0, i16 %hi16, i64 2
ret <4 x i16> %ins1
}

define <2 x i16> @insert_01_v2i16(i32 %x, <2 x i16> %v) {
; ALL-LABEL: @insert_01_v2i16(
; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; ALL-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16
; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16
; ALL-NEXT: [[INS0:%.*]] = insertelement <2 x i16> poison, i16 [[LO16]], i64 0
; ALL-NEXT: [[INS1:%.*]] = insertelement <2 x i16> [[INS0]], i16 [[HI16]], i64 1
; ALL-NEXT: ret <2 x i16> [[INS1]]
;
%hi32 = lshr i32 %x, 16
%hi16 = trunc i32 %hi32 to i16
%lo16 = trunc i32 %x to i16
%ins0 = insertelement <2 x i16> %v, i16 %lo16, i64 0
%ins1 = insertelement <2 x i16> %ins0, i16 %hi16, i64 1
ret <2 x i16> %ins1
}

define <8 x i16> @insert_10_v8i16(i32 %x, <8 x i16> %v) {
; ALL-LABEL: @insert_10_v8i16(
; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; ALL-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16
; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16
; ALL-NEXT: [[INS0:%.*]] = insertelement <8 x i16> [[V:%.*]], i16 [[LO16]], i64 1
; ALL-NEXT: [[INS1:%.*]] = insertelement <8 x i16> [[INS0]], i16 [[HI16]], i64 0
; ALL-NEXT: ret <8 x i16> [[INS1]]
;
%hi32 = lshr i32 %x, 16
%hi16 = trunc i32 %hi32 to i16
%lo16 = trunc i32 %x to i16
%ins0 = insertelement <8 x i16> %v, i16 %lo16, i64 1
%ins1 = insertelement <8 x i16> %ins0, i16 %hi16, i64 0
ret <8 x i16> %ins1
}

define <4 x i32> @insert_12_v4i32(i64 %x, <4 x i32> %v) {
; ALL-LABEL: @insert_12_v4i32(
; ALL-NEXT: [[HI64:%.*]] = lshr i64 [[X:%.*]], 32
; ALL-NEXT: [[HI32:%.*]] = trunc i64 [[HI64]] to i32
; ALL-NEXT: [[LO32:%.*]] = trunc i64 [[X]] to i32
; ALL-NEXT: [[INS0:%.*]] = insertelement <4 x i32> [[V:%.*]], i32 [[LO32]], i64 1
; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i32> [[INS0]], i32 [[HI32]], i64 2
; ALL-NEXT: ret <4 x i32> [[INS1]]
;
%hi64 = lshr i64 %x, 32
%hi32 = trunc i64 %hi64 to i32
%lo32 = trunc i64 %x to i32
%ins0 = insertelement <4 x i32> %v, i32 %lo32, i64 1
%ins1 = insertelement <4 x i32> %ins0, i32 %hi32, i64 2
ret <4 x i32> %ins1
}

define <4 x i16> @insert_21_v4i16(i32 %x, <4 x i16> %v) {
; ALL-LABEL: @insert_21_v4i16(
; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; ALL-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16
; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16
; ALL-NEXT: [[INS0:%.*]] = insertelement <4 x i16> [[V:%.*]], i16 [[LO16]], i64 2
; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i16> [[INS0]], i16 [[HI16]], i64 1
; ALL-NEXT: ret <4 x i16> [[INS1]]
;
%hi32 = lshr i32 %x, 16
%hi16 = trunc i32 %hi32 to i16
%lo16 = trunc i32 %x to i16
%ins0 = insertelement <4 x i16> %v, i16 %lo16, i64 2
%ins1 = insertelement <4 x i16> %ins0, i16 %hi16, i64 1
ret <4 x i16> %ins1
}

define <4 x i32> @insert_23_v4i32(i64 %x, <4 x i32> %v) {
; ALL-LABEL: @insert_23_v4i32(
; ALL-NEXT: [[HI64:%.*]] = lshr i64 [[X:%.*]], 32
; ALL-NEXT: [[HI32:%.*]] = trunc i64 [[HI64]] to i32
; ALL-NEXT: [[LO32:%.*]] = trunc i64 [[X]] to i32
; ALL-NEXT: [[INS0:%.*]] = insertelement <4 x i32> [[V:%.*]], i32 [[LO32]], i64 2
; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i32> [[INS0]], i32 [[HI32]], i64 3
; ALL-NEXT: ret <4 x i32> [[INS1]]
;
%hi64 = lshr i64 %x, 32
%hi32 = trunc i64 %hi64 to i32
%lo32 = trunc i64 %x to i32
%ins0 = insertelement <4 x i32> %v, i32 %lo32, i64 2
%ins1 = insertelement <4 x i32> %ins0, i32 %hi32, i64 3
ret <4 x i32> %ins1
}

define <4 x i16> @insert_32_v4i16(i32 %x, <4 x i16> %v) {
; ALL-LABEL: @insert_32_v4i16(
; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; ALL-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16
; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16
; ALL-NEXT: [[INS0:%.*]] = insertelement <4 x i16> [[V:%.*]], i16 [[LO16]], i64 3
; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i16> [[INS0]], i16 [[HI16]], i64 2
; ALL-NEXT: ret <4 x i16> [[INS1]]
;
%hi32 = lshr i32 %x, 16
%hi16 = trunc i32 %hi32 to i16
%lo16 = trunc i32 %x to i16
%ins0 = insertelement <4 x i16> %v, i16 %lo16, i64 3
%ins1 = insertelement <4 x i16> %ins0, i16 %hi16, i64 2
ret <4 x i16> %ins1
}
23 changes: 12 additions & 11 deletions llvm/test/Transforms/InstCombine/vec_phi_extract-inseltpoison.ll
Original file line number Diff line number Diff line change
Expand Up @@ -83,20 +83,21 @@ ret:
define void @nocopy(i64 %val, i32 %limit, ptr %ptr) {
; CHECK-LABEL: @nocopy(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[VEC_VAL:%.*]] = bitcast i64 [[VAL:%.*]] to <2 x i32>
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VEC_VAL]], <2 x i32> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i32> [[TMP0]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[VAL:%.*]] to i32
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> undef, i32 [[TMP0]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i32> [[TMP2]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[TMP2:%.*]] = phi <16 x i32> [ [[TMP1]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[ELT:%.*]] = extractelement <16 x i32> [[TMP2]], i64 0
; CHECK-NEXT: [[ELTCOPY:%.*]] = extractelement <16 x i32> [[TMP2]], i64 1
; CHECK-NEXT: [[TMP4:%.*]] = phi <16 x i32> [ [[TMP3]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[ELT:%.*]] = extractelement <16 x i32> [[TMP4]], i64 0
; CHECK-NEXT: [[ELTCOPY:%.*]] = extractelement <16 x i32> [[TMP4]], i64 1
; CHECK-NEXT: [[END:%.*]] = icmp ult i32 [[ELT]], [[LIMIT:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[ELTCOPY]], 10
; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[ELT]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP4]]
; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP5]], align 4
; CHECK-NEXT: [[INC]] = add <16 x i32> [[TMP2]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[ELTCOPY]], 10
; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[ELT]] to i64
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP6]]
; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP7]], align 4
; CHECK-NEXT: [[INC]] = add <16 x i32> [[TMP4]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
; CHECK-NEXT: br i1 [[END]], label [[LOOP]], label [[RET:%.*]]
; CHECK: ret:
; CHECK-NEXT: ret void
Expand Down
23 changes: 12 additions & 11 deletions llvm/test/Transforms/InstCombine/vec_phi_extract.ll
Original file line number Diff line number Diff line change
Expand Up @@ -83,20 +83,21 @@ ret:
define void @nocopy(i64 %val, i32 %limit, ptr %ptr) {
; CHECK-LABEL: @nocopy(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[VEC_VAL:%.*]] = bitcast i64 [[VAL:%.*]] to <2 x i32>
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VEC_VAL]], <2 x i32> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i32> [[TMP0]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[VAL:%.*]] to i32
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> undef, i32 [[TMP0]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i32> [[TMP2]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[TMP2:%.*]] = phi <16 x i32> [ [[TMP1]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[ELT:%.*]] = extractelement <16 x i32> [[TMP2]], i64 0
; CHECK-NEXT: [[ELTCOPY:%.*]] = extractelement <16 x i32> [[TMP2]], i64 1
; CHECK-NEXT: [[TMP4:%.*]] = phi <16 x i32> [ [[TMP3]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[ELT:%.*]] = extractelement <16 x i32> [[TMP4]], i64 0
; CHECK-NEXT: [[ELTCOPY:%.*]] = extractelement <16 x i32> [[TMP4]], i64 1
; CHECK-NEXT: [[END:%.*]] = icmp ult i32 [[ELT]], [[LIMIT:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[ELTCOPY]], 10
; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[ELT]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP4]]
; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP5]], align 4
; CHECK-NEXT: [[INC]] = add <16 x i32> [[TMP2]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[ELTCOPY]], 10
; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[ELT]] to i64
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP6]]
; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP7]], align 4
; CHECK-NEXT: [[INC]] = add <16 x i32> [[TMP4]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
; CHECK-NEXT: br i1 [[END]], label [[LOOP]], label [[RET:%.*]]
; CHECK: ret:
; CHECK-NEXT: ret void
Expand Down
19 changes: 10 additions & 9 deletions llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -155,21 +155,22 @@ end:
define hidden void @pointer_phi_v8i16_add1(i16* noalias nocapture readonly %A, i16* noalias nocapture %B, i32 %y) {
; CHECK-LABEL: @pointer_phi_v8i16_add1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[VEC_Y:%.*]] = bitcast i32 [[Y:%.*]] to <2 x i16>
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[VEC_Y]], <2 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i16
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[NEXT_GEP]] to <8 x i16>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[NEXT_GEP4]] to <8 x i16>*
; CHECK-NEXT: store <8 x i16> [[TMP1]], <8 x i16>* [[TMP2]], align 2
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[NEXT_GEP]] to <8 x i16>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2
; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[NEXT_GEP4]] to <8 x i16>*
; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* [[TMP3]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP3]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP4]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: end:
; CHECK-NEXT: ret void
;
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll
Original file line number Diff line number Diff line change
Expand Up @@ -50,31 +50,31 @@ define noundef <4 x float> @ConvertVectors_ByVal(ptr noundef nonnull align 16 de
; SSE-NEXT: [[V_VAL20:%.*]] = load i64, ptr [[V:%.*]], align 16
; SSE-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[V]], i64 8
; SSE-NEXT: [[V_VAL421:%.*]] = load i64, ptr [[TMP0]], align 8
; SSE-NEXT: [[VEC_V_VAL20:%.*]] = bitcast i64 [[V_VAL20]] to <2 x i32>
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_V_VAL20]], <2 x i32> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
; SSE-NEXT: [[TMP2:%.*]] = lshr i64 [[V_VAL20]], 32
; SSE-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
; SSE-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP3]], i64 1
; SSE-NEXT: [[TMP5:%.*]] = trunc i64 [[V_VAL421]] to i32
; SSE-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i64 2
; SSE-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP5]], i64 3
; SSE-NEXT: [[VECINIT16:%.*]] = bitcast <4 x i32> [[TMP7]] to <4 x float>
; SSE-NEXT: [[TMP1:%.*]] = lshr i64 [[V_VAL20]], 32
; SSE-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[V_VAL20]], i64 0
; SSE-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[TMP1]], i64 1
; SSE-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP3]] to <2 x i32>
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; SSE-NEXT: [[TMP6:%.*]] = trunc i64 [[V_VAL421]] to i32
; SSE-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP6]], i64 2
; SSE-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP6]], i64 3
; SSE-NEXT: [[VECINIT16:%.*]] = bitcast <4 x i32> [[TMP8]] to <4 x float>
; SSE-NEXT: ret <4 x float> [[VECINIT16]]
;
; AVX-LABEL: @ConvertVectors_ByVal(
; AVX-NEXT: entry:
; AVX-NEXT: [[V_VAL20:%.*]] = load i64, ptr [[V:%.*]], align 16
; AVX-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[V]], i64 8
; AVX-NEXT: [[V_VAL421:%.*]] = load i64, ptr [[TMP0]], align 8
; AVX-NEXT: [[VEC_V_VAL20:%.*]] = bitcast i64 [[V_VAL20]] to <2 x i32>
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_V_VAL20]], <2 x i32> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
; AVX-NEXT: [[TMP2:%.*]] = lshr i64 [[V_VAL20]], 32
; AVX-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
; AVX-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP3]], i64 1
; AVX-NEXT: [[TMP5:%.*]] = trunc i64 [[V_VAL421]] to i32
; AVX-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i64 2
; AVX-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP5]], i64 3
; AVX-NEXT: [[VECINIT16:%.*]] = bitcast <4 x i32> [[TMP7]] to <4 x float>
; AVX-NEXT: [[TMP1:%.*]] = trunc i64 [[V_VAL20]] to i32
; AVX-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i64 0
; AVX-NEXT: [[TMP3:%.*]] = lshr i64 [[V_VAL20]], 32
; AVX-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; AVX-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i64 1
; AVX-NEXT: [[TMP6:%.*]] = trunc i64 [[V_VAL421]] to i32
; AVX-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP6]], i64 2
; AVX-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP6]], i64 3
; AVX-NEXT: [[VECINIT16:%.*]] = bitcast <4 x i32> [[TMP8]] to <4 x float>
; AVX-NEXT: ret <4 x float> [[VECINIT16]]
;
entry:
Expand Down