3,783 changes: 3,783 additions & 0 deletions llvm/test/Transforms/InstCombine/X86/x86-vector-shifts-inseltpoison.ll

Large diffs are not rendered by default.

305 changes: 305 additions & 0 deletions llvm/test/Transforms/InstCombine/X86/x86-xop-inseltpoison.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,305 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s

define <2 x double> @test_vfrcz_sd(<2 x double> %a) {
; CHECK-LABEL: @test_vfrcz_sd(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> [[A:%.*]])
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %1)
ret <2 x double> %2
}

define double @test_vfrcz_sd_0(double %a) {
; CHECK-LABEL: @test_vfrcz_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
; CHECK-NEXT: ret double [[TMP3]]
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2)
%4 = extractelement <2 x double> %3, i32 0
ret double %4
}

define double @test_vfrcz_sd_1(double %a) {
; CHECK-LABEL: @test_vfrcz_sd_1(
; CHECK-NEXT: ret double 0.000000e+00
;
%1 = insertelement <2 x double> poison, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
%3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2)
%4 = extractelement <2 x double> %3, i32 1
ret double %4
}

define <4 x float> @test_vfrcz_ss(<4 x float> %a) {
; CHECK-LABEL: @test_vfrcz_ss(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> [[A:%.*]])
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
%4 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %3)
ret <4 x float> %4
}

define float @test_vfrcz_ss_0(float %a) {
; CHECK-LABEL: @test_vfrcz_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; CHECK-NEXT: ret float [[TMP3]]
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %4)
%6 = extractelement <4 x float> %5, i32 0
ret float %6
}

define float @test_vfrcz_ss_3(float %a) {
; CHECK-LABEL: @test_vfrcz_ss_3(
; CHECK-NEXT: ret float 0.000000e+00
;
%1 = insertelement <4 x float> poison, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
%3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
%4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
%5 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %4)
%6 = extractelement <4 x float> %5, i32 3
ret float %6
}

define <2 x i64> @cmp_slt_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: @cmp_slt_v2i64(
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i64> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[TMP2]]
;
%1 = tail call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %1
}

define <2 x i64> @cmp_ult_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: @cmp_ult_v2i64(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i64> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[TMP2]]
;
%1 = tail call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %1
}

define <2 x i64> @cmp_sle_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: @cmp_sle_v2i64(
; CHECK-NEXT: [[TMP1:%.*]] = icmp sle <2 x i64> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[TMP2]]
;
%1 = tail call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %1
}

define <2 x i64> @cmp_ule_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: @cmp_ule_v2i64(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <2 x i64> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[TMP2]]
;
%1 = tail call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %1
}

define <4 x i32> @cmp_sgt_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: @cmp_sgt_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
;
%1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %1
}

define <4 x i32> @cmp_ugt_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: @cmp_ugt_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
;
%1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %1
}

define <4 x i32> @cmp_sge_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: @cmp_sge_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = icmp sge <4 x i32> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
;
%1 = tail call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %1
}

define <4 x i32> @cmp_uge_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: @cmp_uge_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = icmp uge <4 x i32> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
;
%1 = tail call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %1
}

define <8 x i16> @cmp_seq_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: @cmp_seq_v8i16(
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
; CHECK-NEXT: ret <8 x i16> [[TMP2]]
;
%1 = tail call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %1
}

define <8 x i16> @cmp_ueq_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: @cmp_ueq_v8i16(
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
; CHECK-NEXT: ret <8 x i16> [[TMP2]]
;
%1 = tail call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %1
}

define <8 x i16> @cmp_sne_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: @cmp_sne_v8i16(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <8 x i16> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
; CHECK-NEXT: ret <8 x i16> [[TMP2]]
;
%1 = tail call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %1
}

define <8 x i16> @cmp_une_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: @cmp_une_v8i16(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <8 x i16> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
; CHECK-NEXT: ret <8 x i16> [[TMP2]]
;
%1 = tail call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %1
}

define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: @cmp_strue_v16i8(
; CHECK-NEXT: ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
;
%1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %1
}

define <16 x i8> @cmp_utrue_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: @cmp_utrue_v16i8(
; CHECK-NEXT: ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
;
%1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %1
}

define <16 x i8> @cmp_sfalse_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: @cmp_sfalse_v16i8(
; CHECK-NEXT: ret <16 x i8> zeroinitializer
;
%1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %1
}

define <16 x i8> @cmp_ufalse_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: @cmp_ufalse_v16i8(
; CHECK-NEXT: ret <16 x i8> zeroinitializer
;
%1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %1
}

declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone
declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone

declare <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64>, <2 x i64>) nounwind readnone
declare <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64>, <2 x i64>) nounwind readnone

declare <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64>, <2 x i64>) nounwind readnone
declare <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64>, <2 x i64>) nounwind readnone

declare <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64>, <2 x i64>) nounwind readnone
declare <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64>, <2 x i64>) nounwind readnone

declare <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64>, <2 x i64>) nounwind readnone
declare <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64>, <2 x i64>) nounwind readnone

declare <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64>, <2 x i64>) nounwind readnone
declare <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64>, <2 x i64>) nounwind readnone

declare <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64>, <2 x i64>) nounwind readnone
declare <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64>, <2 x i64>) nounwind readnone

declare <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64>, <2 x i64>) nounwind readnone
declare <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64>, <2 x i64>) nounwind readnone

declare <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8>, <16 x i8>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64>, <2 x i64>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone
declare <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64>, <2 x i64>) nounwind readnone
573 changes: 573 additions & 0 deletions llvm/test/Transforms/InstCombine/bitcast-inseltpoison.ll

Large diffs are not rendered by default.

167 changes: 167 additions & 0 deletions llvm/test/Transforms/InstCombine/bitcast-vec-canon-inseltpoison.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s

define double @a(<1 x i64> %y) {
; CHECK-LABEL: @a(
; CHECK-NEXT: [[BC:%.*]] = bitcast <1 x i64> [[Y:%.*]] to <1 x double>
; CHECK-NEXT: [[C:%.*]] = extractelement <1 x double> [[BC]], i32 0
; CHECK-NEXT: ret double [[C]]
;
%c = bitcast <1 x i64> %y to double
ret double %c
}

define i64 @b(<1 x i64> %y) {
; CHECK-LABEL: @b(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[Y:%.*]], i32 0
; CHECK-NEXT: ret i64 [[TMP1]]
;
%c = bitcast <1 x i64> %y to i64
ret i64 %c
}

define <1 x i64> @c(double %y) {
; CHECK-LABEL: @c(
; CHECK-NEXT: [[C:%.*]] = bitcast double [[Y:%.*]] to <1 x i64>
; CHECK-NEXT: ret <1 x i64> [[C]]
;
%c = bitcast double %y to <1 x i64>
ret <1 x i64> %c
}

define <1 x i64> @d(i64 %y) {
; CHECK-LABEL: @d(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <1 x i64> undef, i64 [[Y:%.*]], i32 0
; CHECK-NEXT: ret <1 x i64> [[TMP1]]
;
%c = bitcast i64 %y to <1 x i64>
ret <1 x i64> %c
}

define x86_mmx @e(<1 x i64> %y) {
; CHECK-LABEL: @e(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[Y:%.*]], i32 0
; CHECK-NEXT: [[C:%.*]] = bitcast i64 [[TMP1]] to x86_mmx
; CHECK-NEXT: ret x86_mmx [[C]]
;
%c = bitcast <1 x i64> %y to x86_mmx
ret x86_mmx %c
}

define <1 x i64> @f(x86_mmx %y) {
; CHECK-LABEL: @f(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast x86_mmx [[Y:%.*]] to i64
; CHECK-NEXT: [[C:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
; CHECK-NEXT: ret <1 x i64> [[C]]
;
%c = bitcast x86_mmx %y to <1 x i64>
ret <1 x i64> %c
}

define double @g(x86_mmx %x) {
; CHECK-LABEL: @g(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast x86_mmx [[X:%.*]] to double
; CHECK-NEXT: ret double [[TMP0]]
;
entry:
%0 = bitcast x86_mmx %x to <1 x i64>
%1 = bitcast <1 x i64> %0 to double
ret double %1
}

; FP source is ok.

define <3 x i64> @bitcast_inselt_undef(double %x, i32 %idx) {
; CHECK-LABEL: @bitcast_inselt_undef(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x double> undef, double [[X:%.*]], i32 [[IDX:%.*]]
; CHECK-NEXT: [[I:%.*]] = bitcast <3 x double> [[TMP1]] to <3 x i64>
; CHECK-NEXT: ret <3 x i64> [[I]]
;
%xb = bitcast double %x to i64
%i = insertelement <3 x i64> poison, i64 %xb, i32 %idx
ret <3 x i64> %i
}

; Integer source is ok; index is anything.

define <3 x float> @bitcast_inselt_undef_fp(i32 %x, i567 %idx) {
; CHECK-LABEL: @bitcast_inselt_undef_fp(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i32> undef, i32 [[X:%.*]], i567 [[IDX:%.*]]
; CHECK-NEXT: [[I:%.*]] = bitcast <3 x i32> [[TMP1]] to <3 x float>
; CHECK-NEXT: ret <3 x float> [[I]]
;
%xb = bitcast i32 %x to float
%i = insertelement <3 x float> poison, float %xb, i567 %idx
ret <3 x float> %i
}

define <vscale x 3 x float> @bitcast_inselt_undef_vscale(i32 %x, i567 %idx) {
; CHECK-LABEL: @bitcast_inselt_undef_vscale(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <vscale x 3 x i32> undef, i32 [[X:%.*]], i567 [[IDX:%.*]]
; CHECK-NEXT: [[I:%.*]] = bitcast <vscale x 3 x i32> [[TMP1]] to <vscale x 3 x float>
; CHECK-NEXT: ret <vscale x 3 x float> [[I]]
;
%xb = bitcast i32 %x to float
%i = insertelement <vscale x 3 x float> poison, float %xb, i567 %idx
ret <vscale x 3 x float> %i
}

declare void @use(i64)

; Negative test - extra use prevents canonicalization

define <3 x i64> @bitcast_inselt_undef_extra_use(double %x, i32 %idx) {
; CHECK-LABEL: @bitcast_inselt_undef_extra_use(
; CHECK-NEXT: [[XB:%.*]] = bitcast double [[X:%.*]] to i64
; CHECK-NEXT: call void @use(i64 [[XB]])
; CHECK-NEXT: [[I:%.*]] = insertelement <3 x i64> poison, i64 [[XB]], i32 [[IDX:%.*]]
; CHECK-NEXT: ret <3 x i64> [[I]]
;
%xb = bitcast double %x to i64
call void @use(i64 %xb)
%i = insertelement <3 x i64> poison, i64 %xb, i32 %idx
ret <3 x i64> %i
}

; Negative test - source type must be scalar

define <3 x i64> @bitcast_inselt_undef_vec_src(<2 x i32> %x, i32 %idx) {
; CHECK-LABEL: @bitcast_inselt_undef_vec_src(
; CHECK-NEXT: [[XB:%.*]] = bitcast <2 x i32> [[X:%.*]] to i64
; CHECK-NEXT: [[I:%.*]] = insertelement <3 x i64> poison, i64 [[XB]], i32 [[IDX:%.*]]
; CHECK-NEXT: ret <3 x i64> [[I]]
;
%xb = bitcast <2 x i32> %x to i64
%i = insertelement <3 x i64> poison, i64 %xb, i32 %idx
ret <3 x i64> %i
}

; Negative test - source type must be scalar

define <3 x i64> @bitcast_inselt_undef_from_mmx(x86_mmx %x, i32 %idx) {
; CHECK-LABEL: @bitcast_inselt_undef_from_mmx(
; CHECK-NEXT: [[XB:%.*]] = bitcast x86_mmx [[X:%.*]] to i64
; CHECK-NEXT: [[I:%.*]] = insertelement <3 x i64> poison, i64 [[XB]], i32 [[IDX:%.*]]
; CHECK-NEXT: ret <3 x i64> [[I]]
;
%xb = bitcast x86_mmx %x to i64
%i = insertelement <3 x i64> poison, i64 %xb, i32 %idx
ret <3 x i64> %i
}

; Reduce number of casts

define <2 x i64> @PR45748(double %x, double %y) {
; CHECK-LABEL: @PR45748(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[Y:%.*]], i32 1
; CHECK-NEXT: [[I1:%.*]] = bitcast <2 x double> [[TMP2]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[I1]]
;
%xb = bitcast double %x to i64
%i0 = insertelement <2 x i64> poison, i64 %xb, i32 0
%yb = bitcast double %y to i64
%i1 = insertelement <2 x i64> %i0, i64 %yb, i32 1
ret <2 x i64> %i1
}
179 changes: 179 additions & 0 deletions llvm/test/Transforms/InstCombine/broadcast-inseltpoison.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -instcombine -S < %s | FileCheck %s

define <4 x float> @good1(float %arg) {
; CHECK-LABEL: @good1(
; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> poison, float [[ARG:%.*]], i32 0
; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[T]], <4 x float> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: ret <4 x float> [[T6]]
;
%t = insertelement <4 x float> poison, float %arg, i32 0
%t4 = insertelement <4 x float> %t, float %arg, i32 1
%t5 = insertelement <4 x float> %t4, float %arg, i32 2
%t6 = insertelement <4 x float> %t5, float %arg, i32 3
ret <4 x float> %t6
}

define <4 x float> @good2(float %arg) {
; CHECK-LABEL: @good2(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i32 0
; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: ret <4 x float> [[T6]]
;
%t = insertelement <4 x float> poison, float %arg, i32 1
%t4 = insertelement <4 x float> %t, float %arg, i32 2
%t5 = insertelement <4 x float> %t4, float %arg, i32 0
%t6 = insertelement <4 x float> %t5, float %arg, i32 3
ret <4 x float> %t6
}

define <4 x float> @good3(float %arg) {
; CHECK-LABEL: @good3(
; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i32 0
; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[T]], <4 x float> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: ret <4 x float> [[T6]]
;
%t = insertelement <4 x float> zeroinitializer, float %arg, i32 0
%t4 = insertelement <4 x float> %t, float %arg, i32 1
%t5 = insertelement <4 x float> %t4, float %arg, i32 2
%t6 = insertelement <4 x float> %t5, float %arg, i32 3
ret <4 x float> %t6
}

define <4 x float> @good4(float %arg) {
; CHECK-LABEL: @good4(
; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[T]], [[T]]
; CHECK-NEXT: [[T7:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: ret <4 x float> [[T7]]
;
%t = insertelement <4 x float> zeroinitializer, float %arg, i32 0
%t4 = insertelement <4 x float> %t, float %arg, i32 1
%t5 = insertelement <4 x float> %t4, float %arg, i32 2
%t6 = insertelement <4 x float> %t5, float %arg, i32 3
%t7 = fadd <4 x float> %t6, %t6
ret <4 x float> %t7
}

define <4 x float> @good5(float %v) {
; CHECK-LABEL: @good5(
; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x float> poison, float [[V:%.*]], i32 0
; CHECK-NEXT: [[A1:%.*]] = fadd <4 x float> [[INS1]], [[INS1]]
; CHECK-NEXT: [[INS4:%.*]] = shufflevector <4 x float> [[INS1]], <4 x float> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[RES:%.*]] = fadd <4 x float> [[A1]], [[INS4]]
; CHECK-NEXT: ret <4 x float> [[RES]]
;
%ins1 = insertelement <4 x float> poison, float %v, i32 0
%a1 = fadd <4 x float> %ins1, %ins1
%ins2 = insertelement<4 x float> %ins1, float %v, i32 1
%ins3 = insertelement<4 x float> %ins2, float %v, i32 2
%ins4 = insertelement<4 x float> %ins3, float %v, i32 3
%res = fadd <4 x float> %a1, %ins4
ret <4 x float> %res
}

; The insert is changed to allow the canonical shuffle-splat pattern from element 0.

define <4 x float> @splat_undef1(float %arg) {
; CHECK-LABEL: @splat_undef1(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i32 0
; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 0, i32 0>
; CHECK-NEXT: ret <4 x float> [[T6]]
;
%t = insertelement <4 x float> poison, float %arg, i32 1
%t4 = insertelement <4 x float> %t, float %arg, i32 1
%t5 = insertelement <4 x float> %t4, float %arg, i32 2
%t6 = insertelement <4 x float> %t5, float %arg, i32 3
ret <4 x float> %t6
}

; Re-uses the existing first insertelement.

define <4 x float> @splat_undef2(float %arg) {
; CHECK-LABEL: @splat_undef2(
; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> poison, float [[ARG:%.*]], i32 0
; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[T]], <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0>
; CHECK-NEXT: ret <4 x float> [[T6]]
;
%t = insertelement <4 x float> poison, float %arg, i32 0
%t5 = insertelement <4 x float> %t, float %arg, i32 2
%t6 = insertelement <4 x float> %t5, float %arg, i32 3
ret <4 x float> %t6
}

define <4 x float> @bad3(float %arg, float %arg2) {
; CHECK-LABEL: @bad3(
; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> poison, float [[ARG:%.*]], i32 0
; CHECK-NEXT: [[T4:%.*]] = insertelement <4 x float> [[T]], float [[ARG2:%.*]], i32 1
; CHECK-NEXT: [[T5:%.*]] = insertelement <4 x float> [[T4]], float [[ARG]], i32 2
; CHECK-NEXT: [[T6:%.*]] = insertelement <4 x float> [[T5]], float [[ARG]], i32 3
; CHECK-NEXT: ret <4 x float> [[T6]]
;
%t = insertelement <4 x float> poison, float %arg, i32 0
%t4 = insertelement <4 x float> %t, float %arg2, i32 1
%t5 = insertelement <4 x float> %t4, float %arg, i32 2
%t6 = insertelement <4 x float> %t5, float %arg, i32 3
ret <4 x float> %t6
}

define <1 x float> @bad4(float %arg) {
; CHECK-LABEL: @bad4(
; CHECK-NEXT: [[T:%.*]] = insertelement <1 x float> poison, float [[ARG:%.*]], i32 0
; CHECK-NEXT: ret <1 x float> [[T]]
;
%t = insertelement <1 x float> poison, float %arg, i32 0
ret <1 x float> %t
}

; Multiple undef elements are ok.
; TODO: Multiple uses triggers the transform at %t4, but we should sink/scalarize/CSE the splats?

define <4 x float> @splat_undef3(float %arg) {
; CHECK-LABEL: @splat_undef3(
; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> poison, float [[ARG:%.*]], i32 0
; CHECK-NEXT: [[T4:%.*]] = shufflevector <4 x float> [[T]], <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[T]], <4 x float> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[T7:%.*]] = fadd <4 x float> [[T6]], [[T4]]
; CHECK-NEXT: ret <4 x float> [[T7]]
;
%t = insertelement <4 x float> poison, float %arg, i32 0
%t4 = insertelement <4 x float> %t, float %arg, i32 1
%t5 = insertelement <4 x float> %t4, float %arg, i32 2
%t6 = insertelement <4 x float> %t5, float %arg, i32 3
%t7 = fadd <4 x float> %t6, %t4
ret <4 x float> %t7
}

define <4 x float> @bad6(float %arg, i32 %k) {
; CHECK-LABEL: @bad6(
; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> poison, float [[ARG:%.*]], i32 0
; CHECK-NEXT: [[T4:%.*]] = insertelement <4 x float> [[T]], float [[ARG]], i32 1
; CHECK-NEXT: [[T5:%.*]] = insertelement <4 x float> [[T4]], float [[ARG]], i32 [[K:%.*]]
; CHECK-NEXT: [[T6:%.*]] = insertelement <4 x float> [[T5]], float [[ARG]], i32 3
; CHECK-NEXT: ret <4 x float> [[T6]]
;
%t = insertelement <4 x float> poison, float %arg, i32 0
%t4 = insertelement <4 x float> %t, float %arg, i32 1
%t5 = insertelement <4 x float> %t4, float %arg, i32 %k
%t6 = insertelement <4 x float> %t5, float %arg, i32 3
ret <4 x float> %t6
}

define <4 x float> @bad7(float %v) {
; CHECK-LABEL: @bad7(
; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x float> poison, float [[V:%.*]], i32 1
; CHECK-NEXT: [[A1:%.*]] = fadd <4 x float> [[INS1]], [[INS1]]
; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float [[V]], i32 2
; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x float> [[INS2]], float [[V]], i32 3
; CHECK-NEXT: [[INS4:%.*]] = insertelement <4 x float> [[INS3]], float [[V]], i32 0
; CHECK-NEXT: [[RES:%.*]] = fadd <4 x float> [[A1]], [[INS4]]
; CHECK-NEXT: ret <4 x float> [[RES]]
;
%ins1 = insertelement <4 x float> poison, float %v, i32 1
%a1 = fadd <4 x float> %ins1, %ins1
%ins2 = insertelement<4 x float> %ins1, float %v, i32 2
%ins3 = insertelement<4 x float> %ins2, float %v, i32 3
%ins4 = insertelement<4 x float> %ins3, float %v, i32 0
%res = fadd <4 x float> %a1, %ins4
ret <4 x float> %res
}
332 changes: 332 additions & 0 deletions llvm/test/Transforms/InstCombine/extractelement-inseltpoison.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,332 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S -data-layout="e" | FileCheck %s --check-prefixes=ANY,LE
; RUN: opt < %s -instcombine -S -data-layout="E" | FileCheck %s --check-prefixes=ANY,BE

define i32 @extractelement_out_of_range(<2 x i32> %x) {
; ANY-LABEL: @extractelement_out_of_range(
; ANY-NEXT: ret i32 undef
;
%E1 = extractelement <2 x i32> %x, i8 16
ret i32 %E1
}

define i32 @extractelement_type_out_of_range(<2 x i32> %x) {
; ANY-LABEL: @extractelement_type_out_of_range(
; ANY-NEXT: [[E1:%.*]] = extractelement <2 x i32> [[X:%.*]], i128 0
; ANY-NEXT: ret i32 [[E1]]
;
%E1 = extractelement <2 x i32> %x, i128 0
ret i32 %E1
}

define i32 @bitcasted_inselt_equal_num_elts(float %f) {
; ANY-LABEL: @bitcasted_inselt_equal_num_elts(
; ANY-NEXT: [[R:%.*]] = bitcast float [[F:%.*]] to i32
; ANY-NEXT: ret i32 [[R]]
;
%vf = insertelement <4 x float> poison, float %f, i32 0
%vi = bitcast <4 x float> %vf to <4 x i32>
%r = extractelement <4 x i32> %vi, i32 0
ret i32 %r
}

define i64 @test2(i64 %in) {
; ANY-LABEL: @test2(
; ANY-NEXT: ret i64 [[IN:%.*]]
;
%vec = insertelement <8 x i64> poison, i64 %in, i32 0
%splat = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> zeroinitializer
%add = add <8 x i64> %splat, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
%r = extractelement <8 x i64> %add, i32 0
ret i64 %r
}

define i32 @bitcasted_inselt_wide_source_zero_elt(i64 %x) {
; LE-LABEL: @bitcasted_inselt_wide_source_zero_elt(
; LE-NEXT: [[R:%.*]] = trunc i64 [[X:%.*]] to i32
; LE-NEXT: ret i32 [[R]]
;
; BE-LABEL: @bitcasted_inselt_wide_source_zero_elt(
; BE-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 32
; BE-NEXT: [[R:%.*]] = trunc i64 [[TMP1]] to i32
; BE-NEXT: ret i32 [[R]]
;
%i = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
%b = bitcast <2 x i64> %i to <4 x i32>
%r = extractelement <4 x i32> %b, i32 0
ret i32 %r
}

define i16 @bitcasted_inselt_wide_source_modulo_elt(i64 %x) {
; LE-LABEL: @bitcasted_inselt_wide_source_modulo_elt(
; LE-NEXT: [[R:%.*]] = trunc i64 [[X:%.*]] to i16
; LE-NEXT: ret i16 [[R]]
;
; BE-LABEL: @bitcasted_inselt_wide_source_modulo_elt(
; BE-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 48
; BE-NEXT: [[R:%.*]] = trunc i64 [[TMP1]] to i16
; BE-NEXT: ret i16 [[R]]
;
%i = insertelement <2 x i64> poison, i64 %x, i32 1
%b = bitcast <2 x i64> %i to <8 x i16>
%r = extractelement <8 x i16> %b, i32 4
ret i16 %r
}

define i32 @bitcasted_inselt_wide_source_not_modulo_elt(i64 %x) {
; LE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt(
; LE-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 32
; LE-NEXT: [[R:%.*]] = trunc i64 [[TMP1]] to i32
; LE-NEXT: ret i32 [[R]]
;
; BE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt(
; BE-NEXT: [[R:%.*]] = trunc i64 [[X:%.*]] to i32
; BE-NEXT: ret i32 [[R]]
;
%i = insertelement <2 x i64> poison, i64 %x, i32 0
%b = bitcast <2 x i64> %i to <4 x i32>
%r = extractelement <4 x i32> %b, i32 1
ret i32 %r
}

define i8 @bitcasted_inselt_wide_source_not_modulo_elt_not_half(i32 %x) {
; LE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt_not_half(
; LE-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 16
; LE-NEXT: [[R:%.*]] = trunc i32 [[TMP1]] to i8
; LE-NEXT: ret i8 [[R]]
;
; BE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt_not_half(
; BE-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 8
; BE-NEXT: [[R:%.*]] = trunc i32 [[TMP1]] to i8
; BE-NEXT: ret i8 [[R]]
;
%i = insertelement <2 x i32> poison, i32 %x, i32 0
%b = bitcast <2 x i32> %i to <8 x i8>
%r = extractelement <8 x i8> %b, i32 2
ret i8 %r
}

define i3 @bitcasted_inselt_wide_source_not_modulo_elt_not_half_weird_types(i15 %x) {
; LE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt_not_half_weird_types(
; LE-NEXT: [[TMP1:%.*]] = lshr i15 [[X:%.*]], 3
; LE-NEXT: [[R:%.*]] = trunc i15 [[TMP1]] to i3
; LE-NEXT: ret i3 [[R]]
;
; BE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt_not_half_weird_types(
; BE-NEXT: [[TMP1:%.*]] = lshr i15 [[X:%.*]], 9
; BE-NEXT: [[R:%.*]] = trunc i15 [[TMP1]] to i3
; BE-NEXT: ret i3 [[R]]
;
%i = insertelement <3 x i15> poison, i15 %x, i32 0
%b = bitcast <3 x i15> %i to <15 x i3>
%r = extractelement <15 x i3> %b, i32 1
ret i3 %r
}

; Negative test for the above fold, but we can remove the insert here.

define i8 @bitcasted_inselt_wide_source_wrong_insert(<2 x i32> %v, i32 %x) {
; ANY-LABEL: @bitcasted_inselt_wide_source_wrong_insert(
; ANY-NEXT: [[B:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8>
; ANY-NEXT: [[R:%.*]] = extractelement <8 x i8> [[B]], i32 2
; ANY-NEXT: ret i8 [[R]]
;
%i = insertelement <2 x i32> %v, i32 %x, i32 1
%b = bitcast <2 x i32> %i to <8 x i8>
%r = extractelement <8 x i8> %b, i32 2
ret i8 %r
}

; Partial negative test for the above fold, extra uses are not allowed if shift is needed.

declare void @use(<8 x i8>)

define i8 @bitcasted_inselt_wide_source_uses(i32 %x) {
; LE-LABEL: @bitcasted_inselt_wide_source_uses(
; LE-NEXT: [[I:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i32 0
; LE-NEXT: [[B:%.*]] = bitcast <2 x i32> [[I]] to <8 x i8>
; LE-NEXT: call void @use(<8 x i8> [[B]])
; LE-NEXT: [[R:%.*]] = extractelement <8 x i8> [[B]], i32 3
; LE-NEXT: ret i8 [[R]]
;
; BE-LABEL: @bitcasted_inselt_wide_source_uses(
; BE-NEXT: [[I:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i32 0
; BE-NEXT: [[B:%.*]] = bitcast <2 x i32> [[I]] to <8 x i8>
; BE-NEXT: call void @use(<8 x i8> [[B]])
; BE-NEXT: [[R:%.*]] = trunc i32 [[X]] to i8
; BE-NEXT: ret i8 [[R]]
;
%i = insertelement <2 x i32> poison, i32 %x, i32 0
%b = bitcast <2 x i32> %i to <8 x i8>
call void @use(<8 x i8> %b)
%r = extractelement <8 x i8> %b, i32 3
ret i8 %r
}

define float @bitcasted_inselt_to_FP(i64 %x) {
; LE-LABEL: @bitcasted_inselt_to_FP(
; LE-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 32
; LE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
; LE-NEXT: [[R:%.*]] = bitcast i32 [[TMP2]] to float
; LE-NEXT: ret float [[R]]
;
; BE-LABEL: @bitcasted_inselt_to_FP(
; BE-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
; BE-NEXT: [[R:%.*]] = bitcast i32 [[TMP1]] to float
; BE-NEXT: ret float [[R]]
;
%i = insertelement <2 x i64> poison, i64 %x, i32 0
%b = bitcast <2 x i64> %i to <4 x float>
%r = extractelement <4 x float> %b, i32 1
ret float %r
}

declare void @use_v2i128(<2 x i128>)
declare void @use_v8f32(<8 x float>)

define float @bitcasted_inselt_to_FP_uses(i128 %x) {
; ANY-LABEL: @bitcasted_inselt_to_FP_uses(
; ANY-NEXT: [[I:%.*]] = insertelement <2 x i128> poison, i128 [[X:%.*]], i32 0
; ANY-NEXT: call void @use_v2i128(<2 x i128> [[I]])
; ANY-NEXT: [[B:%.*]] = bitcast <2 x i128> [[I]] to <8 x float>
; ANY-NEXT: [[R:%.*]] = extractelement <8 x float> [[B]], i32 1
; ANY-NEXT: ret float [[R]]
;
%i = insertelement <2 x i128> poison, i128 %x, i32 0
call void @use_v2i128(<2 x i128> %i)
%b = bitcast <2 x i128> %i to <8 x float>
%r = extractelement <8 x float> %b, i32 1
ret float %r
}

define float @bitcasted_inselt_to_FP_uses2(i128 %x) {
; ANY-LABEL: @bitcasted_inselt_to_FP_uses2(
; ANY-NEXT: [[I:%.*]] = insertelement <2 x i128> poison, i128 [[X:%.*]], i32 0
; ANY-NEXT: [[B:%.*]] = bitcast <2 x i128> [[I]] to <8 x float>
; ANY-NEXT: call void @use_v8f32(<8 x float> [[B]])
; ANY-NEXT: [[R:%.*]] = extractelement <8 x float> [[B]], i32 1
; ANY-NEXT: ret float [[R]]
;
%i = insertelement <2 x i128> poison, i128 %x, i32 0
%b = bitcast <2 x i128> %i to <8 x float>
call void @use_v8f32(<8 x float> %b)
%r = extractelement <8 x float> %b, i32 1
ret float %r
}

define i32 @bitcasted_inselt_from_FP(double %x) {
; LE-LABEL: @bitcasted_inselt_from_FP(
; LE-NEXT: [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
; LE-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 32
; LE-NEXT: [[R:%.*]] = trunc i64 [[TMP2]] to i32
; LE-NEXT: ret i32 [[R]]
;
; BE-LABEL: @bitcasted_inselt_from_FP(
; BE-NEXT: [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
; BE-NEXT: [[R:%.*]] = trunc i64 [[TMP1]] to i32
; BE-NEXT: ret i32 [[R]]
;
%i = insertelement <2 x double> poison, double %x, i32 0
%b = bitcast <2 x double> %i to <4 x i32>
%r = extractelement <4 x i32> %b, i32 1
ret i32 %r
}

declare void @use_v2f64(<2 x double>)
declare void @use_v8i16(<8 x i16>)

define i16 @bitcasted_inselt_from_FP_uses(double %x) {
; ANY-LABEL: @bitcasted_inselt_from_FP_uses(
; ANY-NEXT: [[I:%.*]] = insertelement <2 x double> poison, double [[X:%.*]], i32 0
; ANY-NEXT: call void @use_v2f64(<2 x double> [[I]])
; ANY-NEXT: [[B:%.*]] = bitcast <2 x double> [[I]] to <8 x i16>
; ANY-NEXT: [[R:%.*]] = extractelement <8 x i16> [[B]], i32 1
; ANY-NEXT: ret i16 [[R]]
;
%i = insertelement <2 x double> poison, double %x, i32 0
call void @use_v2f64(<2 x double> %i)
%b = bitcast <2 x double> %i to <8 x i16>
%r = extractelement <8 x i16> %b, i32 1
ret i16 %r
}

define i16 @bitcasted_inselt_from_FP_uses2(double %x) {
; ANY-LABEL: @bitcasted_inselt_from_FP_uses2(
; ANY-NEXT: [[I:%.*]] = insertelement <2 x double> poison, double [[X:%.*]], i32 0
; ANY-NEXT: [[B:%.*]] = bitcast <2 x double> [[I]] to <8 x i16>
; ANY-NEXT: call void @use_v8i16(<8 x i16> [[B]])
; ANY-NEXT: [[R:%.*]] = extractelement <8 x i16> [[B]], i32 1
; ANY-NEXT: ret i16 [[R]]
;
%i = insertelement <2 x double> poison, double %x, i32 0
%b = bitcast <2 x double> %i to <8 x i16>
call void @use_v8i16(<8 x i16> %b)
%r = extractelement <8 x i16> %b, i32 1
ret i16 %r
}

define float @bitcasted_inselt_to_and_from_FP(double %x) {
; ANY-LABEL: @bitcasted_inselt_to_and_from_FP(
; ANY-NEXT: [[I:%.*]] = insertelement <2 x double> poison, double [[X:%.*]], i32 0
; ANY-NEXT: [[B:%.*]] = bitcast <2 x double> [[I]] to <4 x float>
; ANY-NEXT: [[R:%.*]] = extractelement <4 x float> [[B]], i32 1
; ANY-NEXT: ret float [[R]]
;
%i = insertelement <2 x double> poison, double %x, i32 0
%b = bitcast <2 x double> %i to <4 x float>
%r = extractelement <4 x float> %b, i32 1
ret float %r
}

define float @bitcasted_inselt_to_and_from_FP_uses(double %x) {
; ANY-LABEL: @bitcasted_inselt_to_and_from_FP_uses(
; ANY-NEXT: [[I:%.*]] = insertelement <2 x double> poison, double [[X:%.*]], i32 0
; ANY-NEXT: call void @use_v2f64(<2 x double> [[I]])
; ANY-NEXT: [[B:%.*]] = bitcast <2 x double> [[I]] to <4 x float>
; ANY-NEXT: [[R:%.*]] = extractelement <4 x float> [[B]], i32 1
; ANY-NEXT: ret float [[R]]
;
%i = insertelement <2 x double> poison, double %x, i32 0
call void @use_v2f64(<2 x double> %i)
%b = bitcast <2 x double> %i to <4 x float>
%r = extractelement <4 x float> %b, i32 1
ret float %r
}

declare void @use_v4f32(<4 x float>)

define float @bitcasted_inselt_to_and_from_FP_uses2(double %x) {
; ANY-LABEL: @bitcasted_inselt_to_and_from_FP_uses2(
; ANY-NEXT: [[I:%.*]] = insertelement <2 x double> poison, double [[X:%.*]], i32 0
; ANY-NEXT: [[B:%.*]] = bitcast <2 x double> [[I]] to <4 x float>
; ANY-NEXT: call void @use_v4f32(<4 x float> [[B]])
; ANY-NEXT: [[R:%.*]] = extractelement <4 x float> [[B]], i32 1
; ANY-NEXT: ret float [[R]]
;
%i = insertelement <2 x double> poison, double %x, i32 0
%b = bitcast <2 x double> %i to <4 x float>
call void @use_v4f32(<4 x float> %b)
%r = extractelement <4 x float> %b, i32 1
ret float %r
}

; This would crash/assert because the logic for collectShuffleElements()
; does not consider the possibility of invalid insert/extract operands.

define <4 x double> @invalid_extractelement(<2 x double> %a, <4 x double> %b, double* %p) {
; ANY-LABEL: @invalid_extractelement(
; ANY-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
; ANY-NEXT: [[T4:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> [[TMP1]], <4 x i32> <i32 undef, i32 1, i32 4, i32 3>
; ANY-NEXT: [[E:%.*]] = extractelement <4 x double> [[B]], i32 1
; ANY-NEXT: store double [[E]], double* [[P:%.*]], align 8
; ANY-NEXT: [[R:%.*]] = insertelement <4 x double> [[T4]], double undef, i64 0
; ANY-NEXT: ret <4 x double> [[R]]
;
%t3 = extractelement <2 x double> %a, i32 0
%t4 = insertelement <4 x double> %b, double %t3, i32 2
%e = extractelement <4 x double> %t4, i32 1
store double %e, double* %p
%e1 = extractelement <2 x double> %a, i32 4 ; invalid index
%r = insertelement <4 x double> %t4, double %e1, i64 0
ret <4 x double> %r
}
35 changes: 35 additions & 0 deletions llvm/test/Transforms/InstCombine/fold-vector-zero-inseltpoison.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
; RUN: opt < %s -instcombine -S | not grep zeroinitializer

define void @foo(i64 %A, i64 %B) {
bb8:
br label %bb30

bb30:
%s0 = phi i64 [ 0, %bb8 ], [ %r21, %bb30 ]
%l0 = phi i64 [ -2222, %bb8 ], [ %r23, %bb30 ]
%r2 = add i64 %s0, %B
%r3 = inttoptr i64 %r2 to <2 x double>*
%r4 = load <2 x double>, <2 x double>* %r3, align 8
%r6 = bitcast <2 x double> %r4 to <2 x i64>
%r7 = bitcast <2 x double> zeroinitializer to <2 x i64>
%r8 = insertelement <2 x i64> poison, i64 9223372036854775807, i32 0
%r9 = insertelement <2 x i64> poison, i64 -9223372036854775808, i32 0
%r10 = insertelement <2 x i64> %r8, i64 9223372036854775807, i32 1
%r11 = insertelement <2 x i64> %r9, i64 -9223372036854775808, i32 1
%r12 = and <2 x i64> %r6, %r10
%r13 = and <2 x i64> %r7, %r11
%r14 = or <2 x i64> %r12, %r13
%r15 = bitcast <2 x i64> %r14 to <2 x double>
%r18 = add i64 %s0, %A
%r19 = inttoptr i64 %r18 to <2 x double>*
store <2 x double> %r15, <2 x double>* %r19, align 8
%r21 = add i64 16, %s0
%r23 = add i64 1, %l0
%r25 = icmp slt i64 %r23, 0
%r26 = zext i1 %r25 to i64
%r27 = icmp ne i64 %r26, 0
br i1 %r27, label %bb30, label %bb5

bb5:
ret void
}
127 changes: 127 additions & 0 deletions llvm/test/Transforms/InstCombine/icmp-bc-vec-inseltpoison.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s

; Tests to verify proper functioning of the icmp folding implemented in
; InstCombiner::foldICmpBitCastConstant
; Specifically, folding:
; icmp <pred> iN X, C
; where X = bitcast <M x iK> (shufflevector <M x iK> %vec, undef, SC)) to iN
; and C is a splat of a K-bit pattern
; and SC is a constant vector = <C', C', C', ..., C'>
; Into:
; %E = extractelement <M x iK> %vec, i32 C'
; icmp <pred> iK %E, trunc(C)

define i1 @test_i1_0(i1 %val) {
; CHECK-LABEL: @test_i1_0(
; CHECK-NEXT: [[COND:%.*]] = xor i1 [[VAL:%.*]], true
; CHECK-NEXT: ret i1 [[COND]]
;
%insvec = insertelement <4 x i1> poison, i1 %val, i32 0
%vec = shufflevector <4 x i1> %insvec, <4 x i1> undef, <4 x i32> zeroinitializer
%cast = bitcast <4 x i1> %vec to i4
%cond = icmp eq i4 %cast, 0
ret i1 %cond
}

define i1 @test_i1_0_2(i1 %val) {
; CHECK-LABEL: @test_i1_0_2(
; CHECK-NEXT: [[COND:%.*]] = xor i1 [[VAL:%.*]], true
; CHECK-NEXT: ret i1 [[COND]]
;
%insvec = insertelement <4 x i1> poison, i1 %val, i32 2
%vec = shufflevector <4 x i1> %insvec, <4 x i1> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
%cast = bitcast <4 x i1> %vec to i4
%cond = icmp eq i4 %cast, 0
ret i1 %cond
}

define i1 @test_i1_m1(i1 %val) {
; CHECK-LABEL: @test_i1_m1(
; CHECK-NEXT: ret i1 [[VAL:%.*]]
;
%insvec = insertelement <4 x i1> poison, i1 %val, i32 0
%vec = shufflevector <4 x i1> %insvec, <4 x i1> undef, <4 x i32> zeroinitializer
%cast = bitcast <4 x i1> %vec to i4
%cond = icmp eq i4 %cast, -1
ret i1 %cond
}

define i1 @test_i8_pattern(i8 %val) {
; CHECK-LABEL: @test_i8_pattern(
; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[VAL:%.*]], 72
; CHECK-NEXT: ret i1 [[COND]]
;
%insvec = insertelement <4 x i8> poison, i8 %val, i32 0
%vec = shufflevector <4 x i8> %insvec, <4 x i8> undef, <4 x i32> zeroinitializer
%cast = bitcast <4 x i8> %vec to i32
%cond = icmp eq i32 %cast, 1212696648
ret i1 %cond
}

define i1 @test_i8_pattern_2(i8 %val) {
; CHECK-LABEL: @test_i8_pattern_2(
; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[VAL:%.*]], 72
; CHECK-NEXT: ret i1 [[COND]]
;
%insvec = insertelement <4 x i8> poison, i8 %val, i32 2
%vec = shufflevector <4 x i8> %insvec, <4 x i8> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
%cast = bitcast <4 x i8> %vec to i32
%cond = icmp eq i32 %cast, 1212696648
ret i1 %cond
}

; Make sure we don't try to fold if the shufflemask has differing element values
define i1 @test_i8_pattern_3(<4 x i8> %invec) {
; CHECK-LABEL: @test_i8_pattern_3(
; CHECK-NEXT: [[VEC:%.*]] = shufflevector <4 x i8> [[INVEC:%.*]], <4 x i8> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-NEXT: [[CAST:%.*]] = bitcast <4 x i8> [[VEC]] to i32
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[CAST]], 1212696648
; CHECK-NEXT: ret i1 [[COND]]
;
%vec = shufflevector <4 x i8> %invec, <4 x i8> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
%cast = bitcast <4 x i8> %vec to i32
%cond = icmp eq i32 %cast, 1212696648
ret i1 %cond
}

; Make sure we don't try to fold if the compared-to constant isn't a splatted value
define i1 @test_i8_nopattern(i8 %val) {
; CHECK-LABEL: @test_i8_nopattern(
; CHECK-NEXT: [[INSVEC:%.*]] = insertelement <4 x i8> poison, i8 [[VAL:%.*]], i32 0
; CHECK-NEXT: [[VEC:%.*]] = shufflevector <4 x i8> [[INSVEC]], <4 x i8> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[CAST:%.*]] = bitcast <4 x i8> [[VEC]] to i32
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[CAST]], 1212696647
; CHECK-NEXT: ret i1 [[COND]]
;
%insvec = insertelement <4 x i8> poison, i8 %val, i32 0
%vec = shufflevector <4 x i8> %insvec, <4 x i8> undef, <4 x i32> zeroinitializer
%cast = bitcast <4 x i8> %vec to i32
%cond = icmp eq i32 %cast, 1212696647
ret i1 %cond
}

; Verify that we fold more than just the eq predicate
define i1 @test_i8_ult_pattern(i8 %val) {
; CHECK-LABEL: @test_i8_ult_pattern(
; CHECK-NEXT: [[COND:%.*]] = icmp ult i8 [[VAL:%.*]], 72
; CHECK-NEXT: ret i1 [[COND]]
;
%insvec = insertelement <4 x i8> poison, i8 %val, i32 0
%vec = shufflevector <4 x i8> %insvec, <4 x i8> undef, <4 x i32> zeroinitializer
%cast = bitcast <4 x i8> %vec to i32
%cond = icmp ult i32 %cast, 1212696648
ret i1 %cond
}

define i1 @extending_shuffle_with_weird_types(<2 x i9> %v) {
; CHECK-LABEL: @extending_shuffle_with_weird_types(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i9> [[V:%.*]], i32 0
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i9 [[TMP1]], 1
; CHECK-NEXT: ret i1 [[CMP]]
;
%splat = shufflevector <2 x i9> %v, <2 x i9> undef, <3 x i32> zeroinitializer
%cast = bitcast <3 x i9> %splat to i27
%cmp = icmp slt i27 %cast, 262657 ; 0x040201
ret i1 %cmp
}
635 changes: 635 additions & 0 deletions llvm/test/Transforms/InstCombine/inselt-binop-inseltpoison.ll

Large diffs are not rendered by default.

Large diffs are not rendered by default.

271 changes: 271 additions & 0 deletions llvm/test/Transforms/InstCombine/masked_intrinsics-inseltpoison.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,271 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -instcombine -S < %s | FileCheck %s

declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptrs, i32, <2 x i1> %mask)
declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %passthru)
declare <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %ptrs, i32, <4 x i1> %mask, <4 x double> %passthru)
declare void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %val, <2 x double*> %ptrs, i32, <2 x i1> %mask)

define <2 x double> @load_zeromask(<2 x double>* %ptr, <2 x double> %passthru) {
; CHECK-LABEL: @load_zeromask(
; CHECK-NEXT: ret <2 x double> [[PASSTHRU:%.*]]
;
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 1, <2 x i1> zeroinitializer, <2 x double> %passthru)
ret <2 x double> %res
}

define <2 x double> @load_onemask(<2 x double>* %ptr, <2 x double> %passthru) {
; CHECK-LABEL: @load_onemask(
; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 2
; CHECK-NEXT: ret <2 x double> [[UNMASKEDLOAD]]
;
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 1, i1 1>, <2 x double> %passthru)
ret <2 x double> %res
}

define <2 x double> @load_undefmask(<2 x double>* %ptr, <2 x double> %passthru) {
; CHECK-LABEL: @load_undefmask(
; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 2
; CHECK-NEXT: ret <2 x double> [[UNMASKEDLOAD]]
;
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 1, i1 undef>, <2 x double> %passthru)
ret <2 x double> %res
}

@G = external global i8

define <2 x double> @load_cemask(<2 x double>* %ptr, <2 x double> %passthru) {
; CHECK-LABEL: @load_cemask(
; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* [[PTR:%.*]], i32 2, <2 x i1> <i1 true, i1 ptrtoint (i8* @G to i1)>, <2 x double> [[PASSTHRU:%.*]])
; CHECK-NEXT: ret <2 x double> [[RES]]
;
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 1, i1 ptrtoint (i8* @G to i1)>, <2 x double> %passthru)
ret <2 x double> %res
}

define <2 x double> @load_lane0(<2 x double>* %ptr, double %pt) {
; CHECK-LABEL: @load_lane0(
; CHECK-NEXT: [[PTV2:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 1
; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* [[PTR:%.*]], i32 2, <2 x i1> <i1 true, i1 false>, <2 x double> [[PTV2]])
; CHECK-NEXT: ret <2 x double> [[RES]]
;
%ptv1 = insertelement <2 x double> poison, double %pt, i64 0
%ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 true, i1 false>, <2 x double> %ptv2)
ret <2 x double> %res
}

define double @load_all(double* %base, double %pt) {
; CHECK-LABEL: @load_all(
; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <4 x i64> <i64 0, i64 undef, i64 2, i64 3>
; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> [[PTRS]], i32 4, <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x double> undef)
; CHECK-NEXT: [[ELT:%.*]] = extractelement <4 x double> [[RES]], i64 2
; CHECK-NEXT: ret double [[ELT]]
;
%ptrs = getelementptr double, double* %base, <4 x i64> <i64 0, i64 1, i64 2, i64 3>
%res = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %ptrs, i32 4, <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x double> undef)
%elt = extractelement <4 x double> %res, i64 2
ret double %elt
}

define <2 x double> @load_generic(<2 x double>* %ptr, double %pt, <2 x i1> %mask) {
; CHECK-LABEL: @load_generic(
; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0
; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* [[PTR:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PTV2]])
; CHECK-NEXT: ret <2 x double> [[RES]]
;
%ptv1 = insertelement <2 x double> poison, double %pt, i64 0
%ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2)
ret <2 x double> %res
}

define <2 x double> @load_speculative(<2 x double>* dereferenceable(16) align 4 %ptr, double %pt, <2 x i1> %mask) {
; CHECK-LABEL: @load_speculative(
; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0
; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[MASK:%.*]], <2 x double> [[UNMASKEDLOAD]], <2 x double> [[PTV2]]
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%ptv1 = insertelement <2 x double> poison, double %pt, i64 0
%ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2)
ret <2 x double> %res
}

define <2 x double> @load_speculative_less_aligned(<2 x double>* dereferenceable(16) %ptr, double %pt, <2 x i1> %mask) {
; CHECK-LABEL: @load_speculative_less_aligned(
; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0
; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[MASK:%.*]], <2 x double> [[UNMASKEDLOAD]], <2 x double> [[PTV2]]
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%ptv1 = insertelement <2 x double> poison, double %pt, i64 0
%ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2)
ret <2 x double> %res
}

; Can't speculate since only half of required size is known deref

define <2 x double> @load_spec_neg_size(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask) {
; CHECK-LABEL: @load_spec_neg_size(
; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0
; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* nonnull [[PTR:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PTV2]])
; CHECK-NEXT: ret <2 x double> [[RES]]
;
%ptv1 = insertelement <2 x double> poison, double %pt, i64 0
%ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2)
ret <2 x double> %res
}

; Can only speculate one lane (but it's the only one active)
define <2 x double> @load_spec_lan0(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask) {
; CHECK-LABEL: @load_spec_lan0(
; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0
; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[MASK2:%.*]] = insertelement <2 x i1> [[MASK:%.*]], i1 false, i64 1
; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* nonnull [[PTR:%.*]], i32 4, <2 x i1> [[MASK2]], <2 x double> [[PTV2]])
; CHECK-NEXT: ret <2 x double> [[RES]]
;
%ptv1 = insertelement <2 x double> poison, double %pt, i64 0
%ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
%mask2 = insertelement <2 x i1> %mask, i1 false, i64 1
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask2, <2 x double> %ptv2)
ret <2 x double> %res
}

define void @store_zeromask(<2 x double>* %ptr, <2 x double> %val) {
; CHECK-LABEL: @store_zeromask(
; CHECK-NEXT: ret void
;
call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptr, i32 4, <2 x i1> zeroinitializer)
ret void
}

define void @store_onemask(<2 x double>* %ptr, <2 x double> %val) {
; CHECK-LABEL: @store_onemask(
; CHECK-NEXT: store <2 x double> [[VAL:%.*]], <2 x double>* [[PTR:%.*]], align 4
; CHECK-NEXT: ret void
;
call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptr, i32 4, <2 x i1> <i1 1, i1 1>)
ret void
}

define void @store_demandedelts(<2 x double>* %ptr, double %val) {
; CHECK-LABEL: @store_demandedelts(
; CHECK-NEXT: [[VALVEC1:%.*]] = insertelement <2 x double> poison, double [[VAL:%.*]], i32 0
; CHECK-NEXT: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> [[VALVEC1]], <2 x double>* [[PTR:%.*]], i32 4, <2 x i1> <i1 true, i1 false>)
; CHECK-NEXT: ret void
;
%valvec1 = insertelement <2 x double> poison, double %val, i32 0
%valvec2 = insertelement <2 x double> %valvec1, double %val, i32 1
call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %valvec2, <2 x double>* %ptr, i32 4, <2 x i1> <i1 true, i1 false>)
ret void
}

define <2 x double> @gather_generic(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %passthru) {
; CHECK-LABEL: @gather_generic(
; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PASSTHRU:%.*]])
; CHECK-NEXT: ret <2 x double> [[RES]]
;
%res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %passthru)
ret <2 x double> %res
}


define <2 x double> @gather_zeromask(<2 x double*> %ptrs, <2 x double> %passthru) {
; CHECK-LABEL: @gather_zeromask(
; CHECK-NEXT: ret <2 x double> [[PASSTHRU:%.*]]
;
%res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> zeroinitializer, <2 x double> %passthru)
ret <2 x double> %res
}


define <2 x double> @gather_onemask(<2 x double*> %ptrs, <2 x double> %passthru) {
; CHECK-LABEL: @gather_onemask(
; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS:%.*]], i32 4, <2 x i1> <i1 true, i1 true>, <2 x double> undef)
; CHECK-NEXT: ret <2 x double> [[RES]]
;
%res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> <i1 true, i1 true>, <2 x double> %passthru)
ret <2 x double> %res
}

define <4 x double> @gather_lane2(double* %base, double %pt) {
; CHECK-LABEL: @gather_lane2(
; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <4 x i64> <i64 undef, i64 undef, i64 2, i64 undef>
; CHECK-NEXT: [[PT_V1:%.*]] = insertelement <4 x double> poison, double [[PT:%.*]], i64 0
; CHECK-NEXT: [[PT_V2:%.*]] = shufflevector <4 x double> [[PT_V1]], <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 undef, i32 0>
; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> [[PTRS]], i32 4, <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x double> [[PT_V2]])
; CHECK-NEXT: ret <4 x double> [[RES]]
;
%ptrs = getelementptr double, double *%base, <4 x i64> <i64 0, i64 1, i64 2, i64 3>
%pt_v1 = insertelement <4 x double> poison, double %pt, i64 0
%pt_v2 = shufflevector <4 x double> %pt_v1, <4 x double> undef, <4 x i32> zeroinitializer
%res = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %ptrs, i32 4, <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x double> %pt_v2)
ret <4 x double> %res
}

define <2 x double> @gather_lane0_maybe(double* %base, double %pt, <2 x i1> %mask) {
; CHECK-LABEL: @gather_lane0_maybe(
; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <2 x i64> <i64 0, i64 1>
; CHECK-NEXT: [[PT_V1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0
; CHECK-NEXT: [[PT_V2:%.*]] = shufflevector <2 x double> [[PT_V1]], <2 x double> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[MASK2:%.*]] = insertelement <2 x i1> [[MASK:%.*]], i1 false, i64 1
; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS]], i32 4, <2 x i1> [[MASK2]], <2 x double> [[PT_V2]])
; CHECK-NEXT: ret <2 x double> [[RES]]
;
%ptrs = getelementptr double, double *%base, <2 x i64> <i64 0, i64 1>
%pt_v1 = insertelement <2 x double> poison, double %pt, i64 0
%pt_v2 = insertelement <2 x double> %pt_v1, double %pt, i64 1
%mask2 = insertelement <2 x i1> %mask, i1 false, i64 1
%res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask2, <2 x double> %pt_v2)
ret <2 x double> %res
}

define <2 x double> @gather_lane0_maybe_spec(double* %base, double %pt, <2 x i1> %mask) {
; CHECK-LABEL: @gather_lane0_maybe_spec(
; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <2 x i64> <i64 0, i64 1>
; CHECK-NEXT: [[PT_V1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0
; CHECK-NEXT: [[PT_V2:%.*]] = shufflevector <2 x double> [[PT_V1]], <2 x double> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[MASK2:%.*]] = insertelement <2 x i1> [[MASK:%.*]], i1 false, i64 1
; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS]], i32 4, <2 x i1> [[MASK2]], <2 x double> [[PT_V2]])
; CHECK-NEXT: ret <2 x double> [[RES]]
;
%ptrs = getelementptr double, double *%base, <2 x i64> <i64 0, i64 1>
%pt_v1 = insertelement <2 x double> poison, double %pt, i64 0
%pt_v2 = insertelement <2 x double> %pt_v1, double %pt, i64 1
%mask2 = insertelement <2 x i1> %mask, i1 false, i64 1
%res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask2, <2 x double> %pt_v2)
ret <2 x double> %res
}


define void @scatter_zeromask(<2 x double*> %ptrs, <2 x double> %val) {
; CHECK-LABEL: @scatter_zeromask(
; CHECK-NEXT: ret void
;
call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %val, <2 x double*> %ptrs, i32 8, <2 x i1> zeroinitializer)
ret void
}

define void @scatter_demandedelts(double* %ptr, double %val) {
; CHECK-LABEL: @scatter_demandedelts(
; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[PTR:%.*]], <2 x i64> <i64 0, i64 undef>
; CHECK-NEXT: [[VALVEC1:%.*]] = insertelement <2 x double> poison, double [[VAL:%.*]], i32 0
; CHECK-NEXT: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> [[VALVEC1]], <2 x double*> [[PTRS]], i32 8, <2 x i1> <i1 true, i1 false>)
; CHECK-NEXT: ret void
;
%ptrs = getelementptr double, double* %ptr, <2 x i64> <i64 0, i64 1>
%valvec1 = insertelement <2 x double> poison, double %val, i32 0
%valvec2 = insertelement <2 x double> %valvec1, double %val, i32 1
call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %valvec2, <2 x double*> %ptrs, i32 8, <2 x i1> <i1 true, i1 false>)
ret void
}
41 changes: 41 additions & 0 deletions llvm/test/Transforms/InstCombine/pr38984-inseltpoison.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "p:16:16"

@a = external global [21 x i16], align 1
@offsets = external global [4 x i16], align 1

; The "same gep" optimization should work with vector icmp.
define <4 x i1> @PR38984_1() {
; CHECK-LABEL: @PR38984_1(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
;
entry:
%0 = load i16, i16* getelementptr ([4 x i16], [4 x i16]* @offsets, i16 0, i16 undef), align 1
%1 = insertelement <4 x i16> poison, i16 %0, i32 3
%2 = getelementptr i32, i32* null, <4 x i16> %1
%3 = getelementptr i32, i32* null, <4 x i16> %1
%4 = icmp eq <4 x i32*> %2, %3
ret <4 x i1> %4
}

; The "compare base pointers" optimization should not kick in for vector icmp.
define <4 x i1> @PR38984_2() {
; CHECK-LABEL: @PR38984_2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* getelementptr ([4 x i16], [4 x i16]* @offsets, i16 0, i16 undef), align 2
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i32 3
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, i16* getelementptr inbounds ([21 x i16], [21 x i16]* @a, i16 1, i16 0), <4 x i16> [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, i16* null, <4 x i16> [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i16*> [[TMP2]], [[TMP3]]
; CHECK-NEXT: ret <4 x i1> [[TMP4]]
;
entry:
%0 = load i16, i16* getelementptr ([4 x i16], [4 x i16]* @offsets, i16 0, i16 undef)
%1 = insertelement <4 x i16> poison, i16 %0, i32 3
%2 = getelementptr i16, i16* getelementptr ([21 x i16], [21 x i16]* @a, i64 1, i32 0), <4 x i16> %1
%3 = getelementptr i16, i16* null, <4 x i16> %1
%4 = icmp eq <4 x i16*> %2, %3
ret <4 x i1> %4
}
335 changes: 335 additions & 0 deletions llvm/test/Transforms/InstCombine/scalarization-inseltpoison.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,335 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -instcombine -S < %s | FileCheck %s

define i32 @extract_load(<4 x i32>* %p) {
; CHECK-LABEL: @extract_load(
; CHECK-NEXT: [[X:%.*]] = load <4 x i32>, <4 x i32>* [[P:%.*]], align 4
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x i32> [[X]], i32 1
; CHECK-NEXT: ret i32 [[EXT]]
;
%x = load <4 x i32>, <4 x i32>* %p, align 4
%ext = extractelement <4 x i32> %x, i32 1
ret i32 %ext
}

define double @extract_load_fp(<4 x double>* %p) {
; CHECK-LABEL: @extract_load_fp(
; CHECK-NEXT: [[X:%.*]] = load <4 x double>, <4 x double>* [[P:%.*]], align 32
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 3
; CHECK-NEXT: ret double [[EXT]]
;
%x = load <4 x double>, <4 x double>* %p, align 32
%ext = extractelement <4 x double> %x, i32 3
ret double %ext
}

define double @extract_load_volatile(<4 x double>* %p) {
; CHECK-LABEL: @extract_load_volatile(
; CHECK-NEXT: [[X:%.*]] = load volatile <4 x double>, <4 x double>* [[P:%.*]], align 32
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 2
; CHECK-NEXT: ret double [[EXT]]
;
%x = load volatile <4 x double>, <4 x double>* %p
%ext = extractelement <4 x double> %x, i32 2
ret double %ext
}

define double @extract_load_extra_use(<4 x double>* %p, <4 x double>* %p2) {
; CHECK-LABEL: @extract_load_extra_use(
; CHECK-NEXT: [[X:%.*]] = load <4 x double>, <4 x double>* [[P:%.*]], align 8
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 0
; CHECK-NEXT: store <4 x double> [[X]], <4 x double>* [[P2:%.*]], align 32
; CHECK-NEXT: ret double [[EXT]]
;
%x = load <4 x double>, <4 x double>* %p, align 8
%ext = extractelement <4 x double> %x, i32 0
store <4 x double> %x, <4 x double>* %p2
ret double %ext
}

define double @extract_load_variable_index(<4 x double>* %p, i32 %y) {
; CHECK-LABEL: @extract_load_variable_index(
; CHECK-NEXT: [[X:%.*]] = load <4 x double>, <4 x double>* [[P:%.*]], align 32
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 [[Y:%.*]]
; CHECK-NEXT: ret double [[EXT]]
;
%x = load <4 x double>, <4 x double>* %p
%ext = extractelement <4 x double> %x, i32 %y
ret double %ext
}

define void @scalarize_phi(i32 * %n, float * %inout) {
; CHECK-LABEL: @scalarize_phi(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[T0:%.*]] = load volatile float, float* [[INOUT:%.*]], align 4
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond:
; CHECK-NEXT: [[TMP0:%.*]] = phi float [ [[T0]], [[ENTRY:%.*]] ], [ [[TMP1:%.*]], [[FOR_BODY:%.*]] ]
; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[N:%.*]], align 4
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[I_0]], [[T1]]
; CHECK-NEXT: br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.body:
; CHECK-NEXT: store volatile float [[TMP0]], float* [[INOUT]], align 4
; CHECK-NEXT: [[TMP1]] = fmul float [[TMP0]], 0x4002A3D700000000
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1
; CHECK-NEXT: br label [[FOR_COND]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
entry:
%t0 = load volatile float, float * %inout, align 4
%insert = insertelement <4 x float> poison, float %t0, i32 0
%splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer
%insert1 = insertelement <4 x float> poison, float 3.0, i32 0
br label %for.cond

for.cond:
%x.0 = phi <4 x float> [ %splat, %entry ], [ %mul, %for.body ]
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%t1 = load i32, i32 * %n, align 4
%cmp = icmp ne i32 %i.0, %t1
br i1 %cmp, label %for.body, label %for.end

for.body:
%t2 = extractelement <4 x float> %x.0, i32 1
store volatile float %t2, float * %inout, align 4
%mul = fmul <4 x float> %x.0, <float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000>
%inc = add nsw i32 %i.0, 1
br label %for.cond

for.end:
ret void
}

define float @extract_element_binop_splat_constant_index(<4 x float> %x) {
; CHECK-LABEL: @extract_element_binop_splat_constant_index(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 2
; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], 0x4002A3D700000000
; CHECK-NEXT: ret float [[R]]
;
%b = fadd <4 x float> %x, <float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000>
%r = extractelement <4 x float> %b, i32 2
ret float %r
}

define double @extract_element_binop_splat_with_undef_constant_index(<2 x double> %x) {
; CHECK-LABEL: @extract_element_binop_splat_with_undef_constant_index(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[X:%.*]], i32 0
; CHECK-NEXT: [[R:%.*]] = fdiv double 4.200000e+01, [[TMP1]]
; CHECK-NEXT: ret double [[R]]
;
%b = fdiv <2 x double> <double 42.0, double undef>, %x
%r = extractelement <2 x double> %b, i32 0
ret double %r
}

define float @extract_element_binop_nonsplat_constant_index(<2 x float> %x) {
; CHECK-LABEL: @extract_element_binop_nonsplat_constant_index(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1
; CHECK-NEXT: [[R:%.*]] = fmul float [[TMP1]], 4.300000e+01
; CHECK-NEXT: ret float [[R]]
;
%b = fmul <2 x float> %x, <float 42.0, float 43.0>
%r = extractelement <2 x float> %b, i32 1
ret float %r
}

define i8 @extract_element_binop_splat_variable_index(<4 x i8> %x, i32 %y) {
; CHECK-LABEL: @extract_element_binop_splat_variable_index(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = sdiv i8 [[TMP1]], 42
; CHECK-NEXT: ret i8 [[R]]
;
%b = sdiv <4 x i8> %x, <i8 42, i8 42, i8 42, i8 42>
%r = extractelement <4 x i8> %b, i32 %y
ret i8 %r
}

define i8 @extract_element_binop_splat_with_undef_variable_index(<4 x i8> %x, i32 %y) {
; CHECK-LABEL: @extract_element_binop_splat_with_undef_variable_index(
; CHECK-NEXT: [[B:%.*]] = mul <4 x i8> [[X:%.*]], <i8 42, i8 42, i8 undef, i8 42>
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Y:%.*]]
; CHECK-NEXT: ret i8 [[R]]
;
%b = mul <4 x i8> %x, <i8 42, i8 42, i8 undef, i8 42>
%r = extractelement <4 x i8> %b, i32 %y
ret i8 %r
}

define i8 @extract_element_binop_nonsplat_variable_index(<4 x i8> %x, i32 %y) {
; CHECK-LABEL: @extract_element_binop_nonsplat_variable_index(
; CHECK-NEXT: [[B:%.*]] = lshr <4 x i8> [[X:%.*]], <i8 4, i8 3, i8 undef, i8 2>
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Y:%.*]]
; CHECK-NEXT: ret i8 [[R]]
;
%b = lshr <4 x i8> %x, <i8 4, i8 3, i8 undef, i8 2>
%r = extractelement <4 x i8> %b, i32 %y
ret i8 %r
}

define float @extract_element_load(<4 x float> %x, <4 x float>* %ptr) {
; CHECK-LABEL: @extract_element_load(
; CHECK-NEXT: [[LOAD:%.*]] = load <4 x float>, <4 x float>* [[PTR:%.*]], align 16
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[LOAD]], i32 2
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[X:%.*]], i32 2
; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret float [[R]]
;
%load = load <4 x float>, <4 x float>* %ptr
%add = fadd <4 x float> %x, %load
%r = extractelement <4 x float> %add, i32 2
ret float %r
}

define float @extract_element_multi_Use_load(<4 x float> %x, <4 x float>* %ptr0, <4 x float>* %ptr1) {
; CHECK-LABEL: @extract_element_multi_Use_load(
; CHECK-NEXT: [[LOAD:%.*]] = load <4 x float>, <4 x float>* [[PTR0:%.*]], align 16
; CHECK-NEXT: store <4 x float> [[LOAD]], <4 x float>* [[PTR1:%.*]], align 16
; CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[LOAD]], [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[ADD]], i32 2
; CHECK-NEXT: ret float [[R]]
;
%load = load <4 x float>, <4 x float>* %ptr0
store <4 x float> %load, <4 x float>* %ptr1
%add = fadd <4 x float> %x, %load
%r = extractelement <4 x float> %add, i32 2
ret float %r
}

define float @extract_element_variable_index(<4 x float> %x, i32 %y) {
; CHECK-LABEL: @extract_element_variable_index(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], 1.000000e+00
; CHECK-NEXT: ret float [[R]]
;
%add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
%r = extractelement <4 x float> %add, i32 %y
ret float %r
}

define float @extelt_binop_insertelt(<4 x float> %A, <4 x float> %B, float %f) {
; CHECK-LABEL: @extelt_binop_insertelt(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
; CHECK-NEXT: [[E:%.*]] = fmul nnan float [[TMP1]], [[F:%.*]]
; CHECK-NEXT: ret float [[E]]
;
%C = insertelement <4 x float> %A, float %f, i32 0
%D = fmul nnan <4 x float> %C, %B
%E = extractelement <4 x float> %D, i32 0
ret float %E
}

; We recurse to find a scalarizable operand.
; FIXME: We should propagate the IR flags including wrapping flags.

define i32 @extelt_binop_binop_insertelt(<4 x i32> %A, <4 x i32> %B, i32 %f) {
; CHECK-LABEL: @extelt_binop_binop_insertelt(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], [[F:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[B]], i32 0
; CHECK-NEXT: [[E:%.*]] = mul i32 [[TMP2]], [[TMP3]]
; CHECK-NEXT: ret i32 [[E]]
;
%v = insertelement <4 x i32> %A, i32 %f, i32 0
%C = add <4 x i32> %v, %B
%D = mul nsw <4 x i32> %C, %B
%E = extractelement <4 x i32> %D, i32 0
ret i32 %E
}

define float @extract_element_constant_vector_variable_index(i32 %y) {
; CHECK-LABEL: @extract_element_constant_vector_variable_index(
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>, i32 [[Y:%.*]]
; CHECK-NEXT: ret float [[R]]
;
%r = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %y
ret float %r
}

define i1 @cheap_to_extract_icmp(<4 x i32> %x, <4 x i1> %y) {
; CHECK-LABEL: @cheap_to_extract_icmp(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[Y:%.*]], i32 2
; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP2]], [[TMP3]]
; CHECK-NEXT: ret i1 [[R]]
;
%cmp = icmp eq <4 x i32> %x, zeroinitializer
%and = and <4 x i1> %cmp, %y
%r = extractelement <4 x i1> %and, i32 2
ret i1 %r
}

define i1 @cheap_to_extract_fcmp(<4 x float> %x, <4 x i1> %y) {
; CHECK-LABEL: @cheap_to_extract_fcmp(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 2
; CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[Y:%.*]], i32 2
; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP2]], [[TMP3]]
; CHECK-NEXT: ret i1 [[R]]
;
%cmp = fcmp oeq <4 x float> %x, zeroinitializer
%and = and <4 x i1> %cmp, %y
%r = extractelement <4 x i1> %and, i32 2
ret i1 %r
}

define i1 @extractelt_vector_icmp_constrhs(<2 x i32> %arg) {
; CHECK-LABEL: @extractelt_vector_icmp_constrhs(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[ARG:%.*]], i32 0
; CHECK-NEXT: [[EXT:%.*]] = icmp eq i32 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[EXT]]
;
%cmp = icmp eq <2 x i32> %arg, zeroinitializer
%ext = extractelement <2 x i1> %cmp, i32 0
ret i1 %ext
}

define i1 @extractelt_vector_fcmp_constrhs(<2 x float> %arg) {
; CHECK-LABEL: @extractelt_vector_fcmp_constrhs(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[ARG:%.*]], i32 0
; CHECK-NEXT: [[EXT:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00
; CHECK-NEXT: ret i1 [[EXT]]
;
%cmp = fcmp oeq <2 x float> %arg, zeroinitializer
%ext = extractelement <2 x i1> %cmp, i32 0
ret i1 %ext
}

define i1 @extractelt_vector_icmp_constrhs_dynidx(<2 x i32> %arg, i32 %idx) {
; CHECK-LABEL: @extractelt_vector_icmp_constrhs_dynidx(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[ARG:%.*]], i32 [[IDX:%.*]]
; CHECK-NEXT: [[EXT:%.*]] = icmp eq i32 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[EXT]]
;
%cmp = icmp eq <2 x i32> %arg, zeroinitializer
%ext = extractelement <2 x i1> %cmp, i32 %idx
ret i1 %ext
}

define i1 @extractelt_vector_fcmp_constrhs_dynidx(<2 x float> %arg, i32 %idx) {
; CHECK-LABEL: @extractelt_vector_fcmp_constrhs_dynidx(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[ARG:%.*]], i32 [[IDX:%.*]]
; CHECK-NEXT: [[EXT:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00
; CHECK-NEXT: ret i1 [[EXT]]
;
%cmp = fcmp oeq <2 x float> %arg, zeroinitializer
%ext = extractelement <2 x i1> %cmp, i32 %idx
ret i1 %ext
}

define i1 @extractelt_vector_fcmp_not_cheap_to_scalarize_multi_use(<2 x float> %arg0, <2 x float> %arg1, <2 x float> %arg2, i32 %idx) {
; CHECK-LABEL: @extractelt_vector_fcmp_not_cheap_to_scalarize_multi_use(
; CHECK-NEXT: [[ADD:%.*]] = fadd <2 x float> [[ARG1:%.*]], [[ARG2:%.*]]
; CHECK-NEXT: store volatile <2 x float> [[ADD]], <2 x float>* undef, align 8
; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq <2 x float> [[ADD]], [[ARG0:%.*]]
; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x i1> [[CMP]], i32 0
; CHECK-NEXT: ret i1 [[EXT]]
;
%add = fadd <2 x float> %arg1, %arg2
store volatile <2 x float> %add, <2 x float>* undef
%cmp = fcmp oeq <2 x float> %arg0, %add
%ext = extractelement <2 x i1> %cmp, i32 0
ret i1 %ext
}
213 changes: 213 additions & 0 deletions llvm/test/Transforms/InstCombine/select-extractelement-inseltpoison.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -instcombine < %s | FileCheck %s

declare void @v4float_user(<4 x float>) #0

define float @extract_one_select(<4 x float> %a, <4 x float> %b, i32 %c) #0 {
; CHECK-LABEL: @extract_one_select(
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[C:%.*]], 0
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]]
; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[SEL]], i32 2
; CHECK-NEXT: ret float [[EXTRACT]]
;
%cmp = icmp ne i32 %c, 0
%sel = select i1 %cmp, <4 x float> %a, <4 x float> %b
%extract = extractelement <4 x float> %sel, i32 2
ret float %extract
}

; Multiple extractelements
define <2 x float> @extract_two_select(<4 x float> %a, <4 x float> %b, i32 %c) #0 {
; CHECK-LABEL: @extract_two_select(
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[C:%.*]], 0
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]]
; CHECK-NEXT: [[BUILD2:%.*]] = shufflevector <4 x float> [[SEL]], <4 x float> undef, <2 x i32> <i32 1, i32 2>
; CHECK-NEXT: ret <2 x float> [[BUILD2]]
;
%cmp = icmp ne i32 %c, 0
%sel = select i1 %cmp, <4 x float> %a, <4 x float> %b
%extract1 = extractelement <4 x float> %sel, i32 1
%extract2 = extractelement <4 x float> %sel, i32 2
%build1 = insertelement <2 x float> poison, float %extract1, i32 0
%build2 = insertelement <2 x float> %build1, float %extract2, i32 1
ret <2 x float> %build2
}

; Select has an extra non-extractelement user, don't change it
define float @extract_one_select_user(<4 x float> %a, <4 x float> %b, i32 %c) #0 {
; CHECK-LABEL: @extract_one_select_user(
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[C:%.*]], 0
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]]
; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[SEL]], i32 2
; CHECK-NEXT: call void @v4float_user(<4 x float> [[SEL]])
; CHECK-NEXT: ret float [[EXTRACT]]
;
%cmp = icmp ne i32 %c, 0
%sel = select i1 %cmp, <4 x float> %a, <4 x float> %b
%extract = extractelement <4 x float> %sel, i32 2
call void @v4float_user(<4 x float> %sel)
ret float %extract
}

define float @extract_one_vselect_user(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @extract_one_vselect_user(
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq <4 x i32> [[C:%.*]], zeroinitializer
; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]]
; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[SEL]], i32 2
; CHECK-NEXT: call void @v4float_user(<4 x float> [[SEL]])
; CHECK-NEXT: ret float [[EXTRACT]]
;
%cmp = icmp ne <4 x i32> %c, zeroinitializer
%sel = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b
%extract = extractelement <4 x float> %sel, i32 2
call void @v4float_user(<4 x float> %sel)
ret float %extract
}

; Do not convert the vector select into a scalar select. That would increase
; the instruction count and potentially obfuscate a vector min/max idiom.

define float @extract_one_vselect(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @extract_one_vselect(
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq <4 x i32> [[C:%.*]], zeroinitializer
; CHECK-NEXT: [[SELECT:%.*]] = select <4 x i1> [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]]
; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[SELECT]], i32 0
; CHECK-NEXT: ret float [[EXTRACT]]
;
%cmp = icmp ne <4 x i32> %c, zeroinitializer
%select = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b
%extract = extractelement <4 x float> %select, i32 0
ret float %extract
}

; Multiple extractelements from a vector select
define <2 x float> @extract_two_vselect(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @extract_two_vselect(
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq <4 x i32> [[C:%.*]], zeroinitializer
; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]]
; CHECK-NEXT: [[BUILD2:%.*]] = shufflevector <4 x float> [[SEL]], <4 x float> undef, <2 x i32> <i32 1, i32 2>
; CHECK-NEXT: ret <2 x float> [[BUILD2]]
;
%cmp = icmp ne <4 x i32> %c, zeroinitializer
%sel = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b
%extract1 = extractelement <4 x float> %sel, i32 1
%extract2 = extractelement <4 x float> %sel, i32 2
%build1 = insertelement <2 x float> poison, float %extract1, i32 0
%build2 = insertelement <2 x float> %build1, float %extract2, i32 1
ret <2 x float> %build2
}

; The vector selects are not decomposed into scalar selects because that would increase
; the instruction count. Extract+insert is converted to non-lane-crossing shuffles.
; Test multiple extractelements
define <4 x float> @simple_vector_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @simple_vector_select(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT: [[A_SINK:%.*]] = select i1 [[TOBOOL_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[C]], i32 1
; CHECK-NEXT: [[TOBOOL1_NOT:%.*]] = icmp eq i32 [[TMP1]], 0
; CHECK-NEXT: [[A_SINK1:%.*]] = select i1 [[TOBOOL1_NOT]], <4 x float> [[B]], <4 x float> [[A]]
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A_SINK]], <4 x float> [[A_SINK1]], <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[C]], i32 2
; CHECK-NEXT: [[TOBOOL6_NOT:%.*]] = icmp eq i32 [[TMP3]], 0
; CHECK-NEXT: [[A_SINK2:%.*]] = select i1 [[TOBOOL6_NOT]], <4 x float> [[B]], <4 x float> [[A]]
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[A_SINK2]], <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[C]], i32 3
; CHECK-NEXT: [[TOBOOL11_NOT:%.*]] = icmp eq i32 [[TMP5]], 0
; CHECK-NEXT: [[A_SINK3:%.*]] = select i1 [[TOBOOL11_NOT]], <4 x float> [[B]], <4 x float> [[A]]
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[A_SINK3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
; CHECK-NEXT: ret <4 x float> [[TMP6]]
;
entry:
%0 = extractelement <4 x i32> %c, i32 0
%tobool = icmp ne i32 %0, 0
%a.sink = select i1 %tobool, <4 x float> %a, <4 x float> %b
%1 = extractelement <4 x float> %a.sink, i32 0
%2 = insertelement <4 x float> poison, float %1, i32 0
%3 = extractelement <4 x i32> %c, i32 1
%tobool1 = icmp ne i32 %3, 0
%a.sink1 = select i1 %tobool1, <4 x float> %a, <4 x float> %b
%4 = extractelement <4 x float> %a.sink1, i32 1
%5 = insertelement <4 x float> %2, float %4, i32 1
%6 = extractelement <4 x i32> %c, i32 2
%tobool6 = icmp ne i32 %6, 0
%a.sink2 = select i1 %tobool6, <4 x float> %a, <4 x float> %b
%7 = extractelement <4 x float> %a.sink2, i32 2
%8 = insertelement <4 x float> %5, float %7, i32 2
%9 = extractelement <4 x i32> %c, i32 3
%tobool11 = icmp ne i32 %9, 0
%a.sink3 = select i1 %tobool11, <4 x float> %a, <4 x float> %b
%10 = extractelement <4 x float> %a.sink3, i32 3
%11 = insertelement <4 x float> %8, float %10, i32 3
ret <4 x float> %11
}

define <4 x i32> @extract_cond(<4 x i32> %x, <4 x i32> %y, <4 x i1> %condv) {
; CHECK-LABEL: @extract_cond(
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i1> [[CONDV:%.*]], <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[DOTSPLAT]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%cond = extractelement <4 x i1> %condv, i32 3
%r = select i1 %cond, <4 x i32> %x, <4 x i32> %y
ret <4 x i32> %r
}

define <4 x i32> @splat_cond(<4 x i32> %x, <4 x i32> %y, <4 x i1> %condv) {
; CHECK-LABEL: @splat_cond(
; CHECK-NEXT: [[SPLATCOND:%.*]] = shufflevector <4 x i1> [[CONDV:%.*]], <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[SPLATCOND]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%splatcond = shufflevector <4 x i1> %condv, <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
%r = select <4 x i1> %splatcond, <4 x i32> %x, <4 x i32> %y
ret <4 x i32> %r
}

declare void @extra_use(i1)

; Negative test

define <4 x i32> @extract_cond_extra_use(<4 x i32> %x, <4 x i32> %y, <4 x i1> %condv) {
; CHECK-LABEL: @extract_cond_extra_use(
; CHECK-NEXT: [[COND:%.*]] = extractelement <4 x i1> [[CONDV:%.*]], i32 3
; CHECK-NEXT: call void @extra_use(i1 [[COND]])
; CHECK-NEXT: [[R:%.*]] = select i1 [[COND]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%cond = extractelement <4 x i1> %condv, i32 3
call void @extra_use(i1 %cond)
%r = select i1 %cond, <4 x i32> %x, <4 x i32> %y
ret <4 x i32> %r
}

; Negative test

define <4 x i32> @extract_cond_variable_index(<4 x i32> %x, <4 x i32> %y, <4 x i1> %condv, i32 %index) {
; CHECK-LABEL: @extract_cond_variable_index(
; CHECK-NEXT: [[COND:%.*]] = extractelement <4 x i1> [[CONDV:%.*]], i32 [[INDEX:%.*]]
; CHECK-NEXT: [[R:%.*]] = select i1 [[COND]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%cond = extractelement <4 x i1> %condv, i32 %index
%r = select i1 %cond, <4 x i32> %x, <4 x i32> %y
ret <4 x i32> %r
}

; IR shuffle can alter the number of elements in the vector, so this is ok.

define <4 x i32> @extract_cond_type_mismatch(<4 x i32> %x, <4 x i32> %y, <5 x i1> %condv) {
; CHECK-LABEL: @extract_cond_type_mismatch(
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <5 x i1> [[CONDV:%.*]], <5 x i1> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[DOTSPLAT]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%cond = extractelement <5 x i1> %condv, i32 1
%r = select i1 %cond, <4 x i32> %x, <4 x i32> %y
ret <4 x i32> %r
}


attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
122 changes: 122 additions & 0 deletions llvm/test/Transforms/InstCombine/shift-add-inseltpoison.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; This test makes sure that these instructions are properly eliminated.
;
; RUN: opt < %s -instcombine -S | FileCheck %s

define i32 @shl_C1_add_A_C2_i32(i16 %A) {
; CHECK-LABEL: @shl_C1_add_A_C2_i32(
; CHECK-NEXT: [[B:%.*]] = zext i16 [[A:%.*]] to i32
; CHECK-NEXT: [[D:%.*]] = shl i32 192, [[B]]
; CHECK-NEXT: ret i32 [[D]]
;
%B = zext i16 %A to i32
%C = add i32 %B, 5
%D = shl i32 6, %C
ret i32 %D
}

define i32 @ashr_C1_add_A_C2_i32(i32 %A) {
; CHECK-LABEL: @ashr_C1_add_A_C2_i32(
; CHECK-NEXT: ret i32 0
;
%B = and i32 %A, 65535
%C = add i32 %B, 5
%D = ashr i32 6, %C
ret i32 %D
}

define i32 @lshr_C1_add_A_C2_i32(i32 %A) {
; CHECK-LABEL: @lshr_C1_add_A_C2_i32(
; CHECK-NEXT: [[B:%.*]] = and i32 [[A:%.*]], 65535
; CHECK-NEXT: [[D:%.*]] = shl i32 192, [[B]]
; CHECK-NEXT: ret i32 [[D]]
;
%B = and i32 %A, 65535
%C = add i32 %B, 5
%D = shl i32 6, %C
ret i32 %D
}

define <4 x i32> @shl_C1_add_A_C2_v4i32(<4 x i16> %A) {
; CHECK-LABEL: @shl_C1_add_A_C2_v4i32(
; CHECK-NEXT: [[B:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32>
; CHECK-NEXT: [[D:%.*]] = shl <4 x i32> <i32 6, i32 4, i32 poison, i32 -458752>, [[B]]
; CHECK-NEXT: ret <4 x i32> [[D]]
;
%B = zext <4 x i16> %A to <4 x i32>
%C = add <4 x i32> %B, <i32 0, i32 1, i32 50, i32 16>
%D = shl <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %C
ret <4 x i32> %D
}

define <4 x i32> @ashr_C1_add_A_C2_v4i32(<4 x i32> %A) {
; CHECK-LABEL: @ashr_C1_add_A_C2_v4i32(
; CHECK-NEXT: [[B:%.*]] = and <4 x i32> [[A:%.*]], <i32 0, i32 15, i32 255, i32 65535>
; CHECK-NEXT: [[D:%.*]] = ashr <4 x i32> <i32 6, i32 1, i32 poison, i32 -1>, [[B]]
; CHECK-NEXT: ret <4 x i32> [[D]]
;
%B = and <4 x i32> %A, <i32 0, i32 15, i32 255, i32 65535>
%C = add <4 x i32> %B, <i32 0, i32 1, i32 50, i32 16>
%D = ashr <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %C
ret <4 x i32> %D
}

define <4 x i32> @lshr_C1_add_A_C2_v4i32(<4 x i32> %A) {
; CHECK-LABEL: @lshr_C1_add_A_C2_v4i32(
; CHECK-NEXT: [[B:%.*]] = and <4 x i32> [[A:%.*]], <i32 0, i32 15, i32 255, i32 65535>
; CHECK-NEXT: [[D:%.*]] = lshr <4 x i32> <i32 6, i32 1, i32 poison, i32 65535>, [[B]]
; CHECK-NEXT: ret <4 x i32> [[D]]
;
%B = and <4 x i32> %A, <i32 0, i32 15, i32 255, i32 65535>
%C = add <4 x i32> %B, <i32 0, i32 1, i32 50, i32 16>
%D = lshr <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %C
ret <4 x i32> %D
}

define <4 x i32> @shl_C1_add_A_C2_v4i32_splat(i16 %I) {
; CHECK-LABEL: @shl_C1_add_A_C2_v4i32_splat(
; CHECK-NEXT: [[A:%.*]] = zext i16 [[I:%.*]] to i32
; CHECK-NEXT: [[B:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0
; CHECK-NEXT: [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[E:%.*]] = shl <4 x i32> <i32 6, i32 4, i32 poison, i32 -458752>, [[C]]
; CHECK-NEXT: ret <4 x i32> [[E]]
;
%A = zext i16 %I to i32
%B = insertelement <4 x i32> poison, i32 %A, i32 0
%C = shufflevector <4 x i32> %B, <4 x i32> undef, <4 x i32> zeroinitializer
%D = add <4 x i32> %C, <i32 0, i32 1, i32 50, i32 16>
%E = shl <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %D
ret <4 x i32> %E
}

define <4 x i32> @ashr_C1_add_A_C2_v4i32_splat(i16 %I) {
; CHECK-LABEL: @ashr_C1_add_A_C2_v4i32_splat(
; CHECK-NEXT: [[A:%.*]] = zext i16 [[I:%.*]] to i32
; CHECK-NEXT: [[B:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0
; CHECK-NEXT: [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[E:%.*]] = ashr <4 x i32> <i32 6, i32 1, i32 poison, i32 -1>, [[C]]
; CHECK-NEXT: ret <4 x i32> [[E]]
;
%A = zext i16 %I to i32
%B = insertelement <4 x i32> poison, i32 %A, i32 0
%C = shufflevector <4 x i32> %B, <4 x i32> undef, <4 x i32> zeroinitializer
%D = add <4 x i32> %C, <i32 0, i32 1, i32 50, i32 16>
%E = ashr <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %D
ret <4 x i32> %E
}

define <4 x i32> @lshr_C1_add_A_C2_v4i32_splat(i16 %I) {
; CHECK-LABEL: @lshr_C1_add_A_C2_v4i32_splat(
; CHECK-NEXT: [[A:%.*]] = zext i16 [[I:%.*]] to i32
; CHECK-NEXT: [[B:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0
; CHECK-NEXT: [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[E:%.*]] = lshr <4 x i32> <i32 6, i32 1, i32 poison, i32 65535>, [[C]]
; CHECK-NEXT: ret <4 x i32> [[E]]
;
%A = zext i16 %I to i32
%B = insertelement <4 x i32> poison, i32 %A, i32 0
%C = shufflevector <4 x i32> %B, <4 x i32> undef, <4 x i32> zeroinitializer
%D = add <4 x i32> %C, <i32 0, i32 1, i32 50, i32 16>
%E = lshr <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %D
ret <4 x i32> %E
}
122 changes: 122 additions & 0 deletions llvm/test/Transforms/InstCombine/shufflevector-div-rem-inseltpoison.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S -o - | FileCheck %s

; This test case was added as a reproducer for a miscompile, where instcombine
; introduced an
; srem <2 x i16> %1, <i16 undef, i16 2>
; instruction, which makes the whole srem undefined (even if we only end up
; extracting the second element in the vector).
define i16 @test_srem_orig(i16 %a, i1 %cmp) {
; CHECK-LABEL: @test_srem_orig(
; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = srem <2 x i16> [[SPLATINSERT]], <i16 2, i16 1>
; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> undef, <2 x i32> <i32 undef, i32 0>
; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x i16> <i16 undef, i16 1>, <2 x i16> [[SPLAT_OP]]
; CHECK-NEXT: [[T3:%.*]] = extractelement <2 x i16> [[T2]], i32 1
; CHECK-NEXT: ret i16 [[T3]]
;
%splatinsert = insertelement <2 x i16> poison, i16 %a, i32 0
%splat = shufflevector <2 x i16> %splatinsert, <2 x i16> undef, <2 x i32> zeroinitializer
%t1 = select i1 %cmp, <2 x i16> <i16 1, i16 1>, <2 x i16> %splat
%t2 = srem <2 x i16> %t1, <i16 2, i16 2>
%t3 = extractelement <2 x i16> %t2, i32 1
ret i16 %t3
}

; This is basically a reduced version of test_srem_orig (based on what the
; code would look like after a few iterations of instcombine, just before we
; try to transform the shufflevector by doing
; "evaluateInDifferentElementOrder".
define <2 x i16> @test_srem(i16 %a, i1 %cmp) {
; CHECK-LABEL: @test_srem(
; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[A:%.*]], i32 0
; CHECK-NEXT: [[T1:%.*]] = srem <2 x i16> [[SPLATINSERT]], <i16 2, i16 1>
; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> undef, <2 x i32> <i32 undef, i32 0>
; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x i16> <i16 77, i16 99>, <2 x i16> [[SPLAT_OP]]
; CHECK-NEXT: ret <2 x i16> [[T2]]
;
%splatinsert = insertelement <2 x i16> poison, i16 %a, i32 0
%t1 = srem <2 x i16> %splatinsert, <i16 2, i16 1>
%splat.op = shufflevector <2 x i16> %t1, <2 x i16> undef, <2 x i32> <i32 undef, i32 0>
%t2 = select i1 %cmp, <2 x i16> <i16 77, i16 99>, <2 x i16> %splat.op
ret <2 x i16> %t2
}

define <2 x i16> @test_urem(i16 %a, i1 %cmp) {
; CHECK-LABEL: @test_urem(
; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[A:%.*]], i32 0
; CHECK-NEXT: [[T1:%.*]] = urem <2 x i16> [[SPLATINSERT]], <i16 3, i16 1>
; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> undef, <2 x i32> <i32 undef, i32 0>
; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x i16> <i16 77, i16 99>, <2 x i16> [[SPLAT_OP]]
; CHECK-NEXT: ret <2 x i16> [[T2]]
;
%splatinsert = insertelement <2 x i16> poison, i16 %a, i32 0
%t1 = urem <2 x i16> %splatinsert, <i16 3, i16 1>
%splat.op = shufflevector <2 x i16> %t1, <2 x i16> undef, <2 x i32> <i32 undef, i32 0>
%t2 = select i1 %cmp, <2 x i16> <i16 77, i16 99>, <2 x i16> %splat.op
ret <2 x i16> %t2
}

define <2 x i16> @test_sdiv(i16 %a, i1 %cmp) {
; CHECK-LABEL: @test_sdiv(
; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[A:%.*]], i32 0
; CHECK-NEXT: [[T1:%.*]] = sdiv <2 x i16> [[SPLATINSERT]], <i16 2, i16 1>
; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> undef, <2 x i32> <i32 undef, i32 0>
; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x i16> <i16 77, i16 99>, <2 x i16> [[SPLAT_OP]]
; CHECK-NEXT: ret <2 x i16> [[T2]]
;
%splatinsert = insertelement <2 x i16> poison, i16 %a, i32 0
%t1 = sdiv <2 x i16> %splatinsert, <i16 2, i16 1>
%splat.op = shufflevector <2 x i16> %t1, <2 x i16> undef, <2 x i32> <i32 undef, i32 0>
%t2 = select i1 %cmp, <2 x i16> <i16 77, i16 99>, <2 x i16> %splat.op
ret <2 x i16> %t2
}

define <2 x i16> @test_udiv(i16 %a, i1 %cmp) {
; CHECK-LABEL: @test_udiv(
; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[A:%.*]], i32 0
; CHECK-NEXT: [[T1:%.*]] = udiv <2 x i16> [[SPLATINSERT]], <i16 3, i16 1>
; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> undef, <2 x i32> <i32 undef, i32 0>
; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x i16> <i16 77, i16 99>, <2 x i16> [[SPLAT_OP]]
; CHECK-NEXT: ret <2 x i16> [[T2]]
;
%splatinsert = insertelement <2 x i16> poison, i16 %a, i32 0
%t1 = udiv <2 x i16> %splatinsert, <i16 3, i16 1>
%splat.op = shufflevector <2 x i16> %t1, <2 x i16> undef, <2 x i32> <i32 undef, i32 0>
%t2 = select i1 %cmp, <2 x i16> <i16 77, i16 99>, <2 x i16> %splat.op
ret <2 x i16> %t2
}

; For fdiv we do not need to worry about div by undef. Verify that the
; shufflevector is eliminated here.
define <2 x float> @test_fdiv(float %a, float %b, i1 %cmp) {
; CHECK-LABEL: @test_fdiv(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> undef, float [[A:%.*]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = fdiv <2 x float> [[TMP1]], <float undef, float 3.000000e+00>
; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> [[TMP2]]
; CHECK-NEXT: ret <2 x float> [[T2]]
;
%splatinsert = insertelement <2 x float> poison, float %a, i32 0
%denom = insertelement <2 x float> <float 3.0, float undef>, float 1.0, i32 1
%t1 = fdiv <2 x float> %splatinsert, %denom
%splat.op = shufflevector <2 x float> %t1, <2 x float> undef, <2 x i32> <i32 undef, i32 0>
%t2 = select i1 %cmp, <2 x float> <float 77.0, float 99.0>, <2 x float> %splat.op
ret <2 x float> %t2
}

; For frem we do not need to worry about div by undef. Verify that the
; shufflevector is eliminated here.
define <2 x float> @test_frem(float %a, float %b, i1 %cmp) {
; CHECK-LABEL: @test_frem(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> undef, float [[A:%.*]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = frem <2 x float> [[TMP1]], <float undef, float 3.000000e+00>
; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> [[TMP2]]
; CHECK-NEXT: ret <2 x float> [[T2]]
;
%splatinsert = insertelement <2 x float> poison, float %a, i32 0
%denom = insertelement <2 x float> <float 3.0, float undef>, float 1.0, i32 1
%t1 = frem <2 x float> %splatinsert, %denom
%splat.op = shufflevector <2 x float> %t1, <2 x float> undef, <2 x i32> <i32 undef, i32 0>
%t2 = select i1 %cmp, <2 x float> <float 77.0, float 99.0>, <2 x float> %splat.op
ret <2 x float> %t2
}
Loading