| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,305 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
| ; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s | ||
|
|
||
| define <2 x double> @test_vfrcz_sd(<2 x double> %a) { | ||
| ; CHECK-LABEL: @test_vfrcz_sd( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> [[A:%.*]]) | ||
| ; CHECK-NEXT: ret <2 x double> [[TMP1]] | ||
| ; | ||
| %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1 | ||
| %2 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %1) | ||
| ret <2 x double> %2 | ||
| } | ||
|
|
||
| define double @test_vfrcz_sd_0(double %a) { | ||
| ; CHECK-LABEL: @test_vfrcz_sd_0( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0 | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> [[TMP1]]) | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0 | ||
| ; CHECK-NEXT: ret double [[TMP3]] | ||
| ; | ||
| %1 = insertelement <2 x double> poison, double %a, i32 0 | ||
| %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 | ||
| %3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2) | ||
| %4 = extractelement <2 x double> %3, i32 0 | ||
| ret double %4 | ||
| } | ||
|
|
||
| define double @test_vfrcz_sd_1(double %a) { | ||
| ; CHECK-LABEL: @test_vfrcz_sd_1( | ||
| ; CHECK-NEXT: ret double 0.000000e+00 | ||
| ; | ||
| %1 = insertelement <2 x double> poison, double %a, i32 0 | ||
| %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 | ||
| %3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2) | ||
| %4 = extractelement <2 x double> %3, i32 1 | ||
| ret double %4 | ||
| } | ||
|
|
||
| define <4 x float> @test_vfrcz_ss(<4 x float> %a) { | ||
| ; CHECK-LABEL: @test_vfrcz_ss( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> [[A:%.*]]) | ||
| ; CHECK-NEXT: ret <4 x float> [[TMP1]] | ||
| ; | ||
| %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 | ||
| %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 | ||
| %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 | ||
| %4 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %3) | ||
| ret <4 x float> %4 | ||
| } | ||
|
|
||
| define float @test_vfrcz_ss_0(float %a) { | ||
| ; CHECK-LABEL: @test_vfrcz_ss_0( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0 | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> [[TMP1]]) | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 | ||
| ; CHECK-NEXT: ret float [[TMP3]] | ||
| ; | ||
| %1 = insertelement <4 x float> poison, float %a, i32 0 | ||
| %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 | ||
| %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 | ||
| %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 | ||
| %5 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %4) | ||
| %6 = extractelement <4 x float> %5, i32 0 | ||
| ret float %6 | ||
| } | ||
|
|
||
| define float @test_vfrcz_ss_3(float %a) { | ||
| ; CHECK-LABEL: @test_vfrcz_ss_3( | ||
| ; CHECK-NEXT: ret float 0.000000e+00 | ||
| ; | ||
| %1 = insertelement <4 x float> poison, float %a, i32 0 | ||
| %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 | ||
| %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 | ||
| %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 | ||
| %5 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %4) | ||
| %6 = extractelement <4 x float> %5, i32 3 | ||
| ret float %6 | ||
| } | ||
|
|
||
| define <2 x i64> @cmp_slt_v2i64(<2 x i64> %a, <2 x i64> %b) { | ||
| ; CHECK-LABEL: @cmp_slt_v2i64( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i64> [[A:%.*]], [[B:%.*]] | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> | ||
| ; CHECK-NEXT: ret <2 x i64> [[TMP2]] | ||
| ; | ||
| %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a, <2 x i64> %b) | ||
| ret <2 x i64> %1 | ||
| } | ||
|
|
||
| define <2 x i64> @cmp_ult_v2i64(<2 x i64> %a, <2 x i64> %b) { | ||
| ; CHECK-LABEL: @cmp_ult_v2i64( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i64> [[A:%.*]], [[B:%.*]] | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> | ||
| ; CHECK-NEXT: ret <2 x i64> [[TMP2]] | ||
| ; | ||
| %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a, <2 x i64> %b) | ||
| ret <2 x i64> %1 | ||
| } | ||
|
|
||
| define <2 x i64> @cmp_sle_v2i64(<2 x i64> %a, <2 x i64> %b) { | ||
| ; CHECK-LABEL: @cmp_sle_v2i64( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp sle <2 x i64> [[A:%.*]], [[B:%.*]] | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> | ||
| ; CHECK-NEXT: ret <2 x i64> [[TMP2]] | ||
| ; | ||
| %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a, <2 x i64> %b) | ||
| ret <2 x i64> %1 | ||
| } | ||
|
|
||
| define <2 x i64> @cmp_ule_v2i64(<2 x i64> %a, <2 x i64> %b) { | ||
| ; CHECK-LABEL: @cmp_ule_v2i64( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <2 x i64> [[A:%.*]], [[B:%.*]] | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> | ||
| ; CHECK-NEXT: ret <2 x i64> [[TMP2]] | ||
| ; | ||
| %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a, <2 x i64> %b) | ||
| ret <2 x i64> %1 | ||
| } | ||
|
|
||
| define <4 x i32> @cmp_sgt_v4i32(<4 x i32> %a, <4 x i32> %b) { | ||
| ; CHECK-LABEL: @cmp_sgt_v4i32( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[A:%.*]], [[B:%.*]] | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> | ||
| ; CHECK-NEXT: ret <4 x i32> [[TMP2]] | ||
| ; | ||
| %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a, <4 x i32> %b) | ||
| ret <4 x i32> %1 | ||
| } | ||
|
|
||
| define <4 x i32> @cmp_ugt_v4i32(<4 x i32> %a, <4 x i32> %b) { | ||
| ; CHECK-LABEL: @cmp_ugt_v4i32( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]] | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> | ||
| ; CHECK-NEXT: ret <4 x i32> [[TMP2]] | ||
| ; | ||
| %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a, <4 x i32> %b) | ||
| ret <4 x i32> %1 | ||
| } | ||
|
|
||
| define <4 x i32> @cmp_sge_v4i32(<4 x i32> %a, <4 x i32> %b) { | ||
| ; CHECK-LABEL: @cmp_sge_v4i32( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp sge <4 x i32> [[A:%.*]], [[B:%.*]] | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> | ||
| ; CHECK-NEXT: ret <4 x i32> [[TMP2]] | ||
| ; | ||
| %1 = tail call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a, <4 x i32> %b) | ||
| ret <4 x i32> %1 | ||
| } | ||
|
|
||
| define <4 x i32> @cmp_uge_v4i32(<4 x i32> %a, <4 x i32> %b) { | ||
| ; CHECK-LABEL: @cmp_uge_v4i32( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp uge <4 x i32> [[A:%.*]], [[B:%.*]] | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> | ||
| ; CHECK-NEXT: ret <4 x i32> [[TMP2]] | ||
| ; | ||
| %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a, <4 x i32> %b) | ||
| ret <4 x i32> %1 | ||
| } | ||
|
|
||
| define <8 x i16> @cmp_seq_v8i16(<8 x i16> %a, <8 x i16> %b) { | ||
| ; CHECK-LABEL: @cmp_seq_v8i16( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]] | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> | ||
| ; CHECK-NEXT: ret <8 x i16> [[TMP2]] | ||
| ; | ||
| %1 = tail call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a, <8 x i16> %b) | ||
| ret <8 x i16> %1 | ||
| } | ||
|
|
||
| define <8 x i16> @cmp_ueq_v8i16(<8 x i16> %a, <8 x i16> %b) { | ||
| ; CHECK-LABEL: @cmp_ueq_v8i16( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]] | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> | ||
| ; CHECK-NEXT: ret <8 x i16> [[TMP2]] | ||
| ; | ||
| %1 = tail call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a, <8 x i16> %b) | ||
| ret <8 x i16> %1 | ||
| } | ||
|
|
||
| define <8 x i16> @cmp_sne_v8i16(<8 x i16> %a, <8 x i16> %b) { | ||
| ; CHECK-LABEL: @cmp_sne_v8i16( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <8 x i16> [[A:%.*]], [[B:%.*]] | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> | ||
| ; CHECK-NEXT: ret <8 x i16> [[TMP2]] | ||
| ; | ||
| %1 = tail call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a, <8 x i16> %b) | ||
| ret <8 x i16> %1 | ||
| } | ||
|
|
||
| define <8 x i16> @cmp_une_v8i16(<8 x i16> %a, <8 x i16> %b) { | ||
| ; CHECK-LABEL: @cmp_une_v8i16( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <8 x i16> [[A:%.*]], [[B:%.*]] | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> | ||
| ; CHECK-NEXT: ret <8 x i16> [[TMP2]] | ||
| ; | ||
| %1 = tail call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a, <8 x i16> %b) | ||
| ret <8 x i16> %1 | ||
| } | ||
|
|
||
| define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) { | ||
| ; CHECK-LABEL: @cmp_strue_v16i8( | ||
| ; CHECK-NEXT: ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> | ||
| ; | ||
| %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a, <16 x i8> %b) | ||
| ret <16 x i8> %1 | ||
| } | ||
|
|
||
| define <16 x i8> @cmp_utrue_v16i8(<16 x i8> %a, <16 x i8> %b) { | ||
| ; CHECK-LABEL: @cmp_utrue_v16i8( | ||
| ; CHECK-NEXT: ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> | ||
| ; | ||
| %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a, <16 x i8> %b) | ||
| ret <16 x i8> %1 | ||
| } | ||
|
|
||
| define <16 x i8> @cmp_sfalse_v16i8(<16 x i8> %a, <16 x i8> %b) { | ||
| ; CHECK-LABEL: @cmp_sfalse_v16i8( | ||
| ; CHECK-NEXT: ret <16 x i8> zeroinitializer | ||
| ; | ||
| %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a, <16 x i8> %b) | ||
| ret <16 x i8> %1 | ||
| } | ||
|
|
||
| define <16 x i8> @cmp_ufalse_v16i8(<16 x i8> %a, <16 x i8> %b) { | ||
| ; CHECK-LABEL: @cmp_ufalse_v16i8( | ||
| ; CHECK-NEXT: ret <16 x i8> zeroinitializer | ||
| ; | ||
| %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a, <16 x i8> %b) | ||
| ret <16 x i8> %1 | ||
| } | ||
|
|
||
| declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone | ||
| declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone | ||
|
|
||
| declare <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8>, <16 x i8>) nounwind readnone | ||
| declare <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16>, <8 x i16>) nounwind readnone | ||
| declare <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32>, <4 x i32>) nounwind readnone | ||
| declare <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64>, <2 x i64>) nounwind readnone | ||
| declare <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8>, <16 x i8>) nounwind readnone | ||
| declare <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16>, <8 x i16>) nounwind readnone | ||
| declare <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32>, <4 x i32>) nounwind readnone | ||
| declare <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64>, <2 x i64>) nounwind readnone | ||
|
|
||
| declare <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8>, <16 x i8>) nounwind readnone | ||
| declare <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16>, <8 x i16>) nounwind readnone | ||
| declare <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32>, <4 x i32>) nounwind readnone | ||
| declare <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64>, <2 x i64>) nounwind readnone | ||
| declare <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8>, <16 x i8>) nounwind readnone | ||
| declare <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16>, <8 x i16>) nounwind readnone | ||
| declare <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32>, <4 x i32>) nounwind readnone | ||
| declare <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64>, <2 x i64>) nounwind readnone | ||
|
|
||
| declare <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8>, <16 x i8>) nounwind readnone | ||
| declare <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16>, <8 x i16>) nounwind readnone | ||
| declare <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32>, <4 x i32>) nounwind readnone | ||
| declare <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64>, <2 x i64>) nounwind readnone | ||
| declare <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8>, <16 x i8>) nounwind readnone | ||
| declare <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16>, <8 x i16>) nounwind readnone | ||
| declare <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32>, <4 x i32>) nounwind readnone | ||
| declare <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64>, <2 x i64>) nounwind readnone | ||
|
|
||
| declare <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8>, <16 x i8>) nounwind readnone | ||
| declare <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16>, <8 x i16>) nounwind readnone | ||
| declare <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32>, <4 x i32>) nounwind readnone | ||
| declare <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64>, <2 x i64>) nounwind readnone | ||
| declare <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8>, <16 x i8>) nounwind readnone | ||
| declare <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16>, <8 x i16>) nounwind readnone | ||
| declare <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32>, <4 x i32>) nounwind readnone | ||
| declare <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64>, <2 x i64>) nounwind readnone | ||
|
|
||
| declare <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8>, <16 x i8>) nounwind readnone | ||
| declare <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16>, <8 x i16>) nounwind readnone | ||
| declare <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32>, <4 x i32>) nounwind readnone | ||
| declare <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64>, <2 x i64>) nounwind readnone | ||
| declare <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8>, <16 x i8>) nounwind readnone | ||
| declare <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16>, <8 x i16>) nounwind readnone | ||
| declare <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32>, <4 x i32>) nounwind readnone | ||
| declare <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64>, <2 x i64>) nounwind readnone | ||
|
|
||
| declare <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8>, <16 x i8>) nounwind readnone | ||
| declare <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16>, <8 x i16>) nounwind readnone | ||
| declare <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32>, <4 x i32>) nounwind readnone | ||
| declare <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64>, <2 x i64>) nounwind readnone | ||
| declare <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8>, <16 x i8>) nounwind readnone | ||
| declare <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16>, <8 x i16>) nounwind readnone | ||
| declare <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32>, <4 x i32>) nounwind readnone | ||
| declare <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64>, <2 x i64>) nounwind readnone | ||
|
|
||
| declare <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8>, <16 x i8>) nounwind readnone | ||
| declare <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16>, <8 x i16>) nounwind readnone | ||
| declare <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32>, <4 x i32>) nounwind readnone | ||
| declare <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64>, <2 x i64>) nounwind readnone | ||
| declare <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8>, <16 x i8>) nounwind readnone | ||
| declare <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16>, <8 x i16>) nounwind readnone | ||
| declare <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32>, <4 x i32>) nounwind readnone | ||
| declare <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64>, <2 x i64>) nounwind readnone | ||
|
|
||
| declare <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8>, <16 x i8>) nounwind readnone | ||
| declare <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32>, <4 x i32>) nounwind readnone | ||
| declare <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64>, <2 x i64>) nounwind readnone | ||
| declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone | ||
| declare <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8>, <16 x i8>) nounwind readnone | ||
| declare <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16>, <8 x i16>) nounwind readnone | ||
| declare <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32>, <4 x i32>) nounwind readnone | ||
| declare <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64>, <2 x i64>) nounwind readnone |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,167 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
| ; RUN: opt < %s -instcombine -S | FileCheck %s | ||
|
|
||
| define double @a(<1 x i64> %y) { | ||
| ; CHECK-LABEL: @a( | ||
| ; CHECK-NEXT: [[BC:%.*]] = bitcast <1 x i64> [[Y:%.*]] to <1 x double> | ||
| ; CHECK-NEXT: [[C:%.*]] = extractelement <1 x double> [[BC]], i32 0 | ||
| ; CHECK-NEXT: ret double [[C]] | ||
| ; | ||
| %c = bitcast <1 x i64> %y to double | ||
| ret double %c | ||
| } | ||
|
|
||
| define i64 @b(<1 x i64> %y) { | ||
| ; CHECK-LABEL: @b( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[Y:%.*]], i32 0 | ||
| ; CHECK-NEXT: ret i64 [[TMP1]] | ||
| ; | ||
| %c = bitcast <1 x i64> %y to i64 | ||
| ret i64 %c | ||
| } | ||
|
|
||
| define <1 x i64> @c(double %y) { | ||
| ; CHECK-LABEL: @c( | ||
| ; CHECK-NEXT: [[C:%.*]] = bitcast double [[Y:%.*]] to <1 x i64> | ||
| ; CHECK-NEXT: ret <1 x i64> [[C]] | ||
| ; | ||
| %c = bitcast double %y to <1 x i64> | ||
| ret <1 x i64> %c | ||
| } | ||
|
|
||
| define <1 x i64> @d(i64 %y) { | ||
| ; CHECK-LABEL: @d( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <1 x i64> undef, i64 [[Y:%.*]], i32 0 | ||
| ; CHECK-NEXT: ret <1 x i64> [[TMP1]] | ||
| ; | ||
| %c = bitcast i64 %y to <1 x i64> | ||
| ret <1 x i64> %c | ||
| } | ||
|
|
||
| define x86_mmx @e(<1 x i64> %y) { | ||
| ; CHECK-LABEL: @e( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[Y:%.*]], i32 0 | ||
| ; CHECK-NEXT: [[C:%.*]] = bitcast i64 [[TMP1]] to x86_mmx | ||
| ; CHECK-NEXT: ret x86_mmx [[C]] | ||
| ; | ||
| %c = bitcast <1 x i64> %y to x86_mmx | ||
| ret x86_mmx %c | ||
| } | ||
|
|
||
| define <1 x i64> @f(x86_mmx %y) { | ||
| ; CHECK-LABEL: @f( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = bitcast x86_mmx [[Y:%.*]] to i64 | ||
| ; CHECK-NEXT: [[C:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0 | ||
| ; CHECK-NEXT: ret <1 x i64> [[C]] | ||
| ; | ||
| %c = bitcast x86_mmx %y to <1 x i64> | ||
| ret <1 x i64> %c | ||
| } | ||
|
|
||
| define double @g(x86_mmx %x) { | ||
| ; CHECK-LABEL: @g( | ||
| ; CHECK-NEXT: entry: | ||
| ; CHECK-NEXT: [[TMP0:%.*]] = bitcast x86_mmx [[X:%.*]] to double | ||
| ; CHECK-NEXT: ret double [[TMP0]] | ||
| ; | ||
| entry: | ||
| %0 = bitcast x86_mmx %x to <1 x i64> | ||
| %1 = bitcast <1 x i64> %0 to double | ||
| ret double %1 | ||
| } | ||
|
|
||
| ; FP source is ok. | ||
|
|
||
| define <3 x i64> @bitcast_inselt_undef(double %x, i32 %idx) { | ||
| ; CHECK-LABEL: @bitcast_inselt_undef( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x double> undef, double [[X:%.*]], i32 [[IDX:%.*]] | ||
| ; CHECK-NEXT: [[I:%.*]] = bitcast <3 x double> [[TMP1]] to <3 x i64> | ||
| ; CHECK-NEXT: ret <3 x i64> [[I]] | ||
| ; | ||
| %xb = bitcast double %x to i64 | ||
| %i = insertelement <3 x i64> poison, i64 %xb, i32 %idx | ||
| ret <3 x i64> %i | ||
| } | ||
|
|
||
| ; Integer source is ok; index is anything. | ||
|
|
||
| define <3 x float> @bitcast_inselt_undef_fp(i32 %x, i567 %idx) { | ||
| ; CHECK-LABEL: @bitcast_inselt_undef_fp( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i32> undef, i32 [[X:%.*]], i567 [[IDX:%.*]] | ||
| ; CHECK-NEXT: [[I:%.*]] = bitcast <3 x i32> [[TMP1]] to <3 x float> | ||
| ; CHECK-NEXT: ret <3 x float> [[I]] | ||
| ; | ||
| %xb = bitcast i32 %x to float | ||
| %i = insertelement <3 x float> poison, float %xb, i567 %idx | ||
| ret <3 x float> %i | ||
| } | ||
|
|
||
| define <vscale x 3 x float> @bitcast_inselt_undef_vscale(i32 %x, i567 %idx) { | ||
| ; CHECK-LABEL: @bitcast_inselt_undef_vscale( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <vscale x 3 x i32> undef, i32 [[X:%.*]], i567 [[IDX:%.*]] | ||
| ; CHECK-NEXT: [[I:%.*]] = bitcast <vscale x 3 x i32> [[TMP1]] to <vscale x 3 x float> | ||
| ; CHECK-NEXT: ret <vscale x 3 x float> [[I]] | ||
| ; | ||
| %xb = bitcast i32 %x to float | ||
| %i = insertelement <vscale x 3 x float> poison, float %xb, i567 %idx | ||
| ret <vscale x 3 x float> %i | ||
| } | ||
|
|
||
| declare void @use(i64) | ||
|
|
||
| ; Negative test - extra use prevents canonicalization | ||
|
|
||
| define <3 x i64> @bitcast_inselt_undef_extra_use(double %x, i32 %idx) { | ||
| ; CHECK-LABEL: @bitcast_inselt_undef_extra_use( | ||
| ; CHECK-NEXT: [[XB:%.*]] = bitcast double [[X:%.*]] to i64 | ||
| ; CHECK-NEXT: call void @use(i64 [[XB]]) | ||
| ; CHECK-NEXT: [[I:%.*]] = insertelement <3 x i64> poison, i64 [[XB]], i32 [[IDX:%.*]] | ||
| ; CHECK-NEXT: ret <3 x i64> [[I]] | ||
| ; | ||
| %xb = bitcast double %x to i64 | ||
| call void @use(i64 %xb) | ||
| %i = insertelement <3 x i64> poison, i64 %xb, i32 %idx | ||
| ret <3 x i64> %i | ||
| } | ||
|
|
||
| ; Negative test - source type must be scalar | ||
|
|
||
| define <3 x i64> @bitcast_inselt_undef_vec_src(<2 x i32> %x, i32 %idx) { | ||
| ; CHECK-LABEL: @bitcast_inselt_undef_vec_src( | ||
| ; CHECK-NEXT: [[XB:%.*]] = bitcast <2 x i32> [[X:%.*]] to i64 | ||
| ; CHECK-NEXT: [[I:%.*]] = insertelement <3 x i64> poison, i64 [[XB]], i32 [[IDX:%.*]] | ||
| ; CHECK-NEXT: ret <3 x i64> [[I]] | ||
| ; | ||
| %xb = bitcast <2 x i32> %x to i64 | ||
| %i = insertelement <3 x i64> poison, i64 %xb, i32 %idx | ||
| ret <3 x i64> %i | ||
| } | ||
|
|
||
| ; Negative test - source type must be scalar | ||
|
|
||
| define <3 x i64> @bitcast_inselt_undef_from_mmx(x86_mmx %x, i32 %idx) { | ||
| ; CHECK-LABEL: @bitcast_inselt_undef_from_mmx( | ||
| ; CHECK-NEXT: [[XB:%.*]] = bitcast x86_mmx [[X:%.*]] to i64 | ||
| ; CHECK-NEXT: [[I:%.*]] = insertelement <3 x i64> poison, i64 [[XB]], i32 [[IDX:%.*]] | ||
| ; CHECK-NEXT: ret <3 x i64> [[I]] | ||
| ; | ||
| %xb = bitcast x86_mmx %x to i64 | ||
| %i = insertelement <3 x i64> poison, i64 %xb, i32 %idx | ||
| ret <3 x i64> %i | ||
| } | ||
|
|
||
| ; Reduce number of casts | ||
|
|
||
| define <2 x i64> @PR45748(double %x, double %y) { | ||
| ; CHECK-LABEL: @PR45748( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[Y:%.*]], i32 1 | ||
| ; CHECK-NEXT: [[I1:%.*]] = bitcast <2 x double> [[TMP2]] to <2 x i64> | ||
| ; CHECK-NEXT: ret <2 x i64> [[I1]] | ||
| ; | ||
| %xb = bitcast double %x to i64 | ||
| %i0 = insertelement <2 x i64> poison, i64 %xb, i32 0 | ||
| %yb = bitcast double %y to i64 | ||
| %i1 = insertelement <2 x i64> %i0, i64 %yb, i32 1 | ||
| ret <2 x i64> %i1 | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,179 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
| ; RUN: opt -instcombine -S < %s | FileCheck %s | ||
|
|
||
| define <4 x float> @good1(float %arg) { | ||
| ; CHECK-LABEL: @good1( | ||
| ; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> poison, float [[ARG:%.*]], i32 0 | ||
| ; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[T]], <4 x float> undef, <4 x i32> zeroinitializer | ||
| ; CHECK-NEXT: ret <4 x float> [[T6]] | ||
| ; | ||
| %t = insertelement <4 x float> poison, float %arg, i32 0 | ||
| %t4 = insertelement <4 x float> %t, float %arg, i32 1 | ||
| %t5 = insertelement <4 x float> %t4, float %arg, i32 2 | ||
| %t6 = insertelement <4 x float> %t5, float %arg, i32 3 | ||
| ret <4 x float> %t6 | ||
| } | ||
|
|
||
| define <4 x float> @good2(float %arg) { | ||
| ; CHECK-LABEL: @good2( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i32 0 | ||
| ; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> zeroinitializer | ||
| ; CHECK-NEXT: ret <4 x float> [[T6]] | ||
| ; | ||
| %t = insertelement <4 x float> poison, float %arg, i32 1 | ||
| %t4 = insertelement <4 x float> %t, float %arg, i32 2 | ||
| %t5 = insertelement <4 x float> %t4, float %arg, i32 0 | ||
| %t6 = insertelement <4 x float> %t5, float %arg, i32 3 | ||
| ret <4 x float> %t6 | ||
| } | ||
|
|
||
| define <4 x float> @good3(float %arg) { | ||
| ; CHECK-LABEL: @good3( | ||
| ; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i32 0 | ||
| ; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[T]], <4 x float> undef, <4 x i32> zeroinitializer | ||
| ; CHECK-NEXT: ret <4 x float> [[T6]] | ||
| ; | ||
| %t = insertelement <4 x float> zeroinitializer, float %arg, i32 0 | ||
| %t4 = insertelement <4 x float> %t, float %arg, i32 1 | ||
| %t5 = insertelement <4 x float> %t4, float %arg, i32 2 | ||
| %t6 = insertelement <4 x float> %t5, float %arg, i32 3 | ||
| ret <4 x float> %t6 | ||
| } | ||
|
|
||
| define <4 x float> @good4(float %arg) { | ||
| ; CHECK-LABEL: @good4( | ||
| ; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i32 0 | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[T]], [[T]] | ||
| ; CHECK-NEXT: [[T7:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> zeroinitializer | ||
| ; CHECK-NEXT: ret <4 x float> [[T7]] | ||
| ; | ||
| %t = insertelement <4 x float> zeroinitializer, float %arg, i32 0 | ||
| %t4 = insertelement <4 x float> %t, float %arg, i32 1 | ||
| %t5 = insertelement <4 x float> %t4, float %arg, i32 2 | ||
| %t6 = insertelement <4 x float> %t5, float %arg, i32 3 | ||
| %t7 = fadd <4 x float> %t6, %t6 | ||
| ret <4 x float> %t7 | ||
| } | ||
|
|
||
| define <4 x float> @good5(float %v) { | ||
| ; CHECK-LABEL: @good5( | ||
| ; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x float> poison, float [[V:%.*]], i32 0 | ||
| ; CHECK-NEXT: [[A1:%.*]] = fadd <4 x float> [[INS1]], [[INS1]] | ||
| ; CHECK-NEXT: [[INS4:%.*]] = shufflevector <4 x float> [[INS1]], <4 x float> undef, <4 x i32> zeroinitializer | ||
| ; CHECK-NEXT: [[RES:%.*]] = fadd <4 x float> [[A1]], [[INS4]] | ||
| ; CHECK-NEXT: ret <4 x float> [[RES]] | ||
| ; | ||
| %ins1 = insertelement <4 x float> poison, float %v, i32 0 | ||
| %a1 = fadd <4 x float> %ins1, %ins1 | ||
| %ins2 = insertelement<4 x float> %ins1, float %v, i32 1 | ||
| %ins3 = insertelement<4 x float> %ins2, float %v, i32 2 | ||
| %ins4 = insertelement<4 x float> %ins3, float %v, i32 3 | ||
| %res = fadd <4 x float> %a1, %ins4 | ||
| ret <4 x float> %res | ||
| } | ||
|
|
||
| ; The insert is changed to allow the canonical shuffle-splat pattern from element 0. | ||
|
|
||
| define <4 x float> @splat_undef1(float %arg) { | ||
| ; CHECK-LABEL: @splat_undef1( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i32 0 | ||
| ; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 0, i32 0> | ||
| ; CHECK-NEXT: ret <4 x float> [[T6]] | ||
| ; | ||
| %t = insertelement <4 x float> poison, float %arg, i32 1 | ||
| %t4 = insertelement <4 x float> %t, float %arg, i32 1 | ||
| %t5 = insertelement <4 x float> %t4, float %arg, i32 2 | ||
| %t6 = insertelement <4 x float> %t5, float %arg, i32 3 | ||
| ret <4 x float> %t6 | ||
| } | ||
|
|
||
| ; Re-uses the existing first insertelement. | ||
|
|
||
| define <4 x float> @splat_undef2(float %arg) { | ||
| ; CHECK-LABEL: @splat_undef2( | ||
| ; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> poison, float [[ARG:%.*]], i32 0 | ||
| ; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[T]], <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0> | ||
| ; CHECK-NEXT: ret <4 x float> [[T6]] | ||
| ; | ||
| %t = insertelement <4 x float> poison, float %arg, i32 0 | ||
| %t5 = insertelement <4 x float> %t, float %arg, i32 2 | ||
| %t6 = insertelement <4 x float> %t5, float %arg, i32 3 | ||
| ret <4 x float> %t6 | ||
| } | ||
|
|
||
| define <4 x float> @bad3(float %arg, float %arg2) { | ||
| ; CHECK-LABEL: @bad3( | ||
| ; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> poison, float [[ARG:%.*]], i32 0 | ||
| ; CHECK-NEXT: [[T4:%.*]] = insertelement <4 x float> [[T]], float [[ARG2:%.*]], i32 1 | ||
| ; CHECK-NEXT: [[T5:%.*]] = insertelement <4 x float> [[T4]], float [[ARG]], i32 2 | ||
| ; CHECK-NEXT: [[T6:%.*]] = insertelement <4 x float> [[T5]], float [[ARG]], i32 3 | ||
| ; CHECK-NEXT: ret <4 x float> [[T6]] | ||
| ; | ||
| %t = insertelement <4 x float> poison, float %arg, i32 0 | ||
| %t4 = insertelement <4 x float> %t, float %arg2, i32 1 | ||
| %t5 = insertelement <4 x float> %t4, float %arg, i32 2 | ||
| %t6 = insertelement <4 x float> %t5, float %arg, i32 3 | ||
| ret <4 x float> %t6 | ||
| } | ||
|
|
||
| define <1 x float> @bad4(float %arg) { | ||
| ; CHECK-LABEL: @bad4( | ||
| ; CHECK-NEXT: [[T:%.*]] = insertelement <1 x float> poison, float [[ARG:%.*]], i32 0 | ||
| ; CHECK-NEXT: ret <1 x float> [[T]] | ||
| ; | ||
| %t = insertelement <1 x float> poison, float %arg, i32 0 | ||
| ret <1 x float> %t | ||
| } | ||
|
|
||
| ; Multiple undef elements are ok. | ||
| ; TODO: Multiple uses triggers the transform at %t4, but we should sink/scalarize/CSE the splats? | ||
|
|
||
| define <4 x float> @splat_undef3(float %arg) { | ||
| ; CHECK-LABEL: @splat_undef3( | ||
| ; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> poison, float [[ARG:%.*]], i32 0 | ||
| ; CHECK-NEXT: [[T4:%.*]] = shufflevector <4 x float> [[T]], <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 undef, i32 undef> | ||
| ; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[T]], <4 x float> undef, <4 x i32> zeroinitializer | ||
| ; CHECK-NEXT: [[T7:%.*]] = fadd <4 x float> [[T6]], [[T4]] | ||
| ; CHECK-NEXT: ret <4 x float> [[T7]] | ||
| ; | ||
| %t = insertelement <4 x float> poison, float %arg, i32 0 | ||
| %t4 = insertelement <4 x float> %t, float %arg, i32 1 | ||
| %t5 = insertelement <4 x float> %t4, float %arg, i32 2 | ||
| %t6 = insertelement <4 x float> %t5, float %arg, i32 3 | ||
| %t7 = fadd <4 x float> %t6, %t4 | ||
| ret <4 x float> %t7 | ||
| } | ||
|
|
||
| define <4 x float> @bad6(float %arg, i32 %k) { | ||
| ; CHECK-LABEL: @bad6( | ||
| ; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> poison, float [[ARG:%.*]], i32 0 | ||
| ; CHECK-NEXT: [[T4:%.*]] = insertelement <4 x float> [[T]], float [[ARG]], i32 1 | ||
| ; CHECK-NEXT: [[T5:%.*]] = insertelement <4 x float> [[T4]], float [[ARG]], i32 [[K:%.*]] | ||
| ; CHECK-NEXT: [[T6:%.*]] = insertelement <4 x float> [[T5]], float [[ARG]], i32 3 | ||
| ; CHECK-NEXT: ret <4 x float> [[T6]] | ||
| ; | ||
| %t = insertelement <4 x float> poison, float %arg, i32 0 | ||
| %t4 = insertelement <4 x float> %t, float %arg, i32 1 | ||
| %t5 = insertelement <4 x float> %t4, float %arg, i32 %k | ||
| %t6 = insertelement <4 x float> %t5, float %arg, i32 3 | ||
| ret <4 x float> %t6 | ||
| } | ||
|
|
||
| define <4 x float> @bad7(float %v) { | ||
| ; CHECK-LABEL: @bad7( | ||
| ; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x float> poison, float [[V:%.*]], i32 1 | ||
| ; CHECK-NEXT: [[A1:%.*]] = fadd <4 x float> [[INS1]], [[INS1]] | ||
| ; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float [[V]], i32 2 | ||
| ; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x float> [[INS2]], float [[V]], i32 3 | ||
| ; CHECK-NEXT: [[INS4:%.*]] = insertelement <4 x float> [[INS3]], float [[V]], i32 0 | ||
| ; CHECK-NEXT: [[RES:%.*]] = fadd <4 x float> [[A1]], [[INS4]] | ||
| ; CHECK-NEXT: ret <4 x float> [[RES]] | ||
| ; | ||
| %ins1 = insertelement <4 x float> poison, float %v, i32 1 | ||
| %a1 = fadd <4 x float> %ins1, %ins1 | ||
| %ins2 = insertelement<4 x float> %ins1, float %v, i32 2 | ||
| %ins3 = insertelement<4 x float> %ins2, float %v, i32 3 | ||
| %ins4 = insertelement<4 x float> %ins3, float %v, i32 0 | ||
| %res = fadd <4 x float> %a1, %ins4 | ||
| ret <4 x float> %res | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,332 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
| ; RUN: opt < %s -instcombine -S -data-layout="e" | FileCheck %s --check-prefixes=ANY,LE | ||
| ; RUN: opt < %s -instcombine -S -data-layout="E" | FileCheck %s --check-prefixes=ANY,BE | ||
|
|
||
| define i32 @extractelement_out_of_range(<2 x i32> %x) { | ||
| ; ANY-LABEL: @extractelement_out_of_range( | ||
| ; ANY-NEXT: ret i32 undef | ||
| ; | ||
| %E1 = extractelement <2 x i32> %x, i8 16 | ||
| ret i32 %E1 | ||
| } | ||
|
|
||
| define i32 @extractelement_type_out_of_range(<2 x i32> %x) { | ||
| ; ANY-LABEL: @extractelement_type_out_of_range( | ||
| ; ANY-NEXT: [[E1:%.*]] = extractelement <2 x i32> [[X:%.*]], i128 0 | ||
| ; ANY-NEXT: ret i32 [[E1]] | ||
| ; | ||
| %E1 = extractelement <2 x i32> %x, i128 0 | ||
| ret i32 %E1 | ||
| } | ||
|
|
||
| define i32 @bitcasted_inselt_equal_num_elts(float %f) { | ||
| ; ANY-LABEL: @bitcasted_inselt_equal_num_elts( | ||
| ; ANY-NEXT: [[R:%.*]] = bitcast float [[F:%.*]] to i32 | ||
| ; ANY-NEXT: ret i32 [[R]] | ||
| ; | ||
| %vf = insertelement <4 x float> poison, float %f, i32 0 | ||
| %vi = bitcast <4 x float> %vf to <4 x i32> | ||
| %r = extractelement <4 x i32> %vi, i32 0 | ||
| ret i32 %r | ||
| } | ||
|
|
||
| define i64 @test2(i64 %in) { | ||
| ; ANY-LABEL: @test2( | ||
| ; ANY-NEXT: ret i64 [[IN:%.*]] | ||
| ; | ||
| %vec = insertelement <8 x i64> poison, i64 %in, i32 0 | ||
| %splat = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> zeroinitializer | ||
| %add = add <8 x i64> %splat, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7> | ||
| %r = extractelement <8 x i64> %add, i32 0 | ||
| ret i64 %r | ||
| } | ||
|
|
||
| define i32 @bitcasted_inselt_wide_source_zero_elt(i64 %x) { | ||
| ; LE-LABEL: @bitcasted_inselt_wide_source_zero_elt( | ||
| ; LE-NEXT: [[R:%.*]] = trunc i64 [[X:%.*]] to i32 | ||
| ; LE-NEXT: ret i32 [[R]] | ||
| ; | ||
| ; BE-LABEL: @bitcasted_inselt_wide_source_zero_elt( | ||
| ; BE-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 32 | ||
| ; BE-NEXT: [[R:%.*]] = trunc i64 [[TMP1]] to i32 | ||
| ; BE-NEXT: ret i32 [[R]] | ||
| ; | ||
| %i = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0 | ||
| %b = bitcast <2 x i64> %i to <4 x i32> | ||
| %r = extractelement <4 x i32> %b, i32 0 | ||
| ret i32 %r | ||
| } | ||
|
|
||
| define i16 @bitcasted_inselt_wide_source_modulo_elt(i64 %x) { | ||
| ; LE-LABEL: @bitcasted_inselt_wide_source_modulo_elt( | ||
| ; LE-NEXT: [[R:%.*]] = trunc i64 [[X:%.*]] to i16 | ||
| ; LE-NEXT: ret i16 [[R]] | ||
| ; | ||
| ; BE-LABEL: @bitcasted_inselt_wide_source_modulo_elt( | ||
| ; BE-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 48 | ||
| ; BE-NEXT: [[R:%.*]] = trunc i64 [[TMP1]] to i16 | ||
| ; BE-NEXT: ret i16 [[R]] | ||
| ; | ||
| %i = insertelement <2 x i64> poison, i64 %x, i32 1 | ||
| %b = bitcast <2 x i64> %i to <8 x i16> | ||
| %r = extractelement <8 x i16> %b, i32 4 | ||
| ret i16 %r | ||
| } | ||
|
|
||
| define i32 @bitcasted_inselt_wide_source_not_modulo_elt(i64 %x) { | ||
| ; LE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt( | ||
| ; LE-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 32 | ||
| ; LE-NEXT: [[R:%.*]] = trunc i64 [[TMP1]] to i32 | ||
| ; LE-NEXT: ret i32 [[R]] | ||
| ; | ||
| ; BE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt( | ||
| ; BE-NEXT: [[R:%.*]] = trunc i64 [[X:%.*]] to i32 | ||
| ; BE-NEXT: ret i32 [[R]] | ||
| ; | ||
| %i = insertelement <2 x i64> poison, i64 %x, i32 0 | ||
| %b = bitcast <2 x i64> %i to <4 x i32> | ||
| %r = extractelement <4 x i32> %b, i32 1 | ||
| ret i32 %r | ||
| } | ||
|
|
||
| define i8 @bitcasted_inselt_wide_source_not_modulo_elt_not_half(i32 %x) { | ||
| ; LE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt_not_half( | ||
| ; LE-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 16 | ||
| ; LE-NEXT: [[R:%.*]] = trunc i32 [[TMP1]] to i8 | ||
| ; LE-NEXT: ret i8 [[R]] | ||
| ; | ||
| ; BE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt_not_half( | ||
| ; BE-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 8 | ||
| ; BE-NEXT: [[R:%.*]] = trunc i32 [[TMP1]] to i8 | ||
| ; BE-NEXT: ret i8 [[R]] | ||
| ; | ||
| %i = insertelement <2 x i32> poison, i32 %x, i32 0 | ||
| %b = bitcast <2 x i32> %i to <8 x i8> | ||
| %r = extractelement <8 x i8> %b, i32 2 | ||
| ret i8 %r | ||
| } | ||
|
|
||
| define i3 @bitcasted_inselt_wide_source_not_modulo_elt_not_half_weird_types(i15 %x) { | ||
| ; LE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt_not_half_weird_types( | ||
| ; LE-NEXT: [[TMP1:%.*]] = lshr i15 [[X:%.*]], 3 | ||
| ; LE-NEXT: [[R:%.*]] = trunc i15 [[TMP1]] to i3 | ||
| ; LE-NEXT: ret i3 [[R]] | ||
| ; | ||
| ; BE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt_not_half_weird_types( | ||
| ; BE-NEXT: [[TMP1:%.*]] = lshr i15 [[X:%.*]], 9 | ||
| ; BE-NEXT: [[R:%.*]] = trunc i15 [[TMP1]] to i3 | ||
| ; BE-NEXT: ret i3 [[R]] | ||
| ; | ||
| %i = insertelement <3 x i15> poison, i15 %x, i32 0 | ||
| %b = bitcast <3 x i15> %i to <15 x i3> | ||
| %r = extractelement <15 x i3> %b, i32 1 | ||
| ret i3 %r | ||
| } | ||
|
|
||
| ; Negative test for the above fold, but we can remove the insert here. | ||
|
|
||
| define i8 @bitcasted_inselt_wide_source_wrong_insert(<2 x i32> %v, i32 %x) { | ||
| ; ANY-LABEL: @bitcasted_inselt_wide_source_wrong_insert( | ||
| ; ANY-NEXT: [[B:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> | ||
| ; ANY-NEXT: [[R:%.*]] = extractelement <8 x i8> [[B]], i32 2 | ||
| ; ANY-NEXT: ret i8 [[R]] | ||
| ; | ||
| %i = insertelement <2 x i32> %v, i32 %x, i32 1 | ||
| %b = bitcast <2 x i32> %i to <8 x i8> | ||
| %r = extractelement <8 x i8> %b, i32 2 | ||
| ret i8 %r | ||
| } | ||
|
|
||
| ; Partial negative test for the above fold, extra uses are not allowed if shift is needed. | ||
|
|
||
| declare void @use(<8 x i8>) | ||
|
|
||
| define i8 @bitcasted_inselt_wide_source_uses(i32 %x) { | ||
| ; LE-LABEL: @bitcasted_inselt_wide_source_uses( | ||
| ; LE-NEXT: [[I:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i32 0 | ||
| ; LE-NEXT: [[B:%.*]] = bitcast <2 x i32> [[I]] to <8 x i8> | ||
| ; LE-NEXT: call void @use(<8 x i8> [[B]]) | ||
| ; LE-NEXT: [[R:%.*]] = extractelement <8 x i8> [[B]], i32 3 | ||
| ; LE-NEXT: ret i8 [[R]] | ||
| ; | ||
| ; BE-LABEL: @bitcasted_inselt_wide_source_uses( | ||
| ; BE-NEXT: [[I:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i32 0 | ||
| ; BE-NEXT: [[B:%.*]] = bitcast <2 x i32> [[I]] to <8 x i8> | ||
| ; BE-NEXT: call void @use(<8 x i8> [[B]]) | ||
| ; BE-NEXT: [[R:%.*]] = trunc i32 [[X]] to i8 | ||
| ; BE-NEXT: ret i8 [[R]] | ||
| ; | ||
| %i = insertelement <2 x i32> poison, i32 %x, i32 0 | ||
| %b = bitcast <2 x i32> %i to <8 x i8> | ||
| call void @use(<8 x i8> %b) | ||
| %r = extractelement <8 x i8> %b, i32 3 | ||
| ret i8 %r | ||
| } | ||
|
|
||
| define float @bitcasted_inselt_to_FP(i64 %x) { | ||
| ; LE-LABEL: @bitcasted_inselt_to_FP( | ||
| ; LE-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 32 | ||
| ; LE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 | ||
| ; LE-NEXT: [[R:%.*]] = bitcast i32 [[TMP2]] to float | ||
| ; LE-NEXT: ret float [[R]] | ||
| ; | ||
| ; BE-LABEL: @bitcasted_inselt_to_FP( | ||
| ; BE-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32 | ||
| ; BE-NEXT: [[R:%.*]] = bitcast i32 [[TMP1]] to float | ||
| ; BE-NEXT: ret float [[R]] | ||
| ; | ||
| %i = insertelement <2 x i64> poison, i64 %x, i32 0 | ||
| %b = bitcast <2 x i64> %i to <4 x float> | ||
| %r = extractelement <4 x float> %b, i32 1 | ||
| ret float %r | ||
| } | ||
|
|
||
| declare void @use_v2i128(<2 x i128>) | ||
| declare void @use_v8f32(<8 x float>) | ||
|
|
||
| define float @bitcasted_inselt_to_FP_uses(i128 %x) { | ||
| ; ANY-LABEL: @bitcasted_inselt_to_FP_uses( | ||
| ; ANY-NEXT: [[I:%.*]] = insertelement <2 x i128> poison, i128 [[X:%.*]], i32 0 | ||
| ; ANY-NEXT: call void @use_v2i128(<2 x i128> [[I]]) | ||
| ; ANY-NEXT: [[B:%.*]] = bitcast <2 x i128> [[I]] to <8 x float> | ||
| ; ANY-NEXT: [[R:%.*]] = extractelement <8 x float> [[B]], i32 1 | ||
| ; ANY-NEXT: ret float [[R]] | ||
| ; | ||
| %i = insertelement <2 x i128> poison, i128 %x, i32 0 | ||
| call void @use_v2i128(<2 x i128> %i) | ||
| %b = bitcast <2 x i128> %i to <8 x float> | ||
| %r = extractelement <8 x float> %b, i32 1 | ||
| ret float %r | ||
| } | ||
|
|
||
| define float @bitcasted_inselt_to_FP_uses2(i128 %x) { | ||
| ; ANY-LABEL: @bitcasted_inselt_to_FP_uses2( | ||
| ; ANY-NEXT: [[I:%.*]] = insertelement <2 x i128> poison, i128 [[X:%.*]], i32 0 | ||
| ; ANY-NEXT: [[B:%.*]] = bitcast <2 x i128> [[I]] to <8 x float> | ||
| ; ANY-NEXT: call void @use_v8f32(<8 x float> [[B]]) | ||
| ; ANY-NEXT: [[R:%.*]] = extractelement <8 x float> [[B]], i32 1 | ||
| ; ANY-NEXT: ret float [[R]] | ||
| ; | ||
| %i = insertelement <2 x i128> poison, i128 %x, i32 0 | ||
| %b = bitcast <2 x i128> %i to <8 x float> | ||
| call void @use_v8f32(<8 x float> %b) | ||
| %r = extractelement <8 x float> %b, i32 1 | ||
| ret float %r | ||
| } | ||
|
|
||
| define i32 @bitcasted_inselt_from_FP(double %x) { | ||
| ; LE-LABEL: @bitcasted_inselt_from_FP( | ||
| ; LE-NEXT: [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64 | ||
| ; LE-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 32 | ||
| ; LE-NEXT: [[R:%.*]] = trunc i64 [[TMP2]] to i32 | ||
| ; LE-NEXT: ret i32 [[R]] | ||
| ; | ||
| ; BE-LABEL: @bitcasted_inselt_from_FP( | ||
| ; BE-NEXT: [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64 | ||
| ; BE-NEXT: [[R:%.*]] = trunc i64 [[TMP1]] to i32 | ||
| ; BE-NEXT: ret i32 [[R]] | ||
| ; | ||
| %i = insertelement <2 x double> poison, double %x, i32 0 | ||
| %b = bitcast <2 x double> %i to <4 x i32> | ||
| %r = extractelement <4 x i32> %b, i32 1 | ||
| ret i32 %r | ||
| } | ||
|
|
||
| declare void @use_v2f64(<2 x double>) | ||
| declare void @use_v8i16(<8 x i16>) | ||
|
|
||
| define i16 @bitcasted_inselt_from_FP_uses(double %x) { | ||
| ; ANY-LABEL: @bitcasted_inselt_from_FP_uses( | ||
| ; ANY-NEXT: [[I:%.*]] = insertelement <2 x double> poison, double [[X:%.*]], i32 0 | ||
| ; ANY-NEXT: call void @use_v2f64(<2 x double> [[I]]) | ||
| ; ANY-NEXT: [[B:%.*]] = bitcast <2 x double> [[I]] to <8 x i16> | ||
| ; ANY-NEXT: [[R:%.*]] = extractelement <8 x i16> [[B]], i32 1 | ||
| ; ANY-NEXT: ret i16 [[R]] | ||
| ; | ||
| %i = insertelement <2 x double> poison, double %x, i32 0 | ||
| call void @use_v2f64(<2 x double> %i) | ||
| %b = bitcast <2 x double> %i to <8 x i16> | ||
| %r = extractelement <8 x i16> %b, i32 1 | ||
| ret i16 %r | ||
| } | ||
|
|
||
| define i16 @bitcasted_inselt_from_FP_uses2(double %x) { | ||
| ; ANY-LABEL: @bitcasted_inselt_from_FP_uses2( | ||
| ; ANY-NEXT: [[I:%.*]] = insertelement <2 x double> poison, double [[X:%.*]], i32 0 | ||
| ; ANY-NEXT: [[B:%.*]] = bitcast <2 x double> [[I]] to <8 x i16> | ||
| ; ANY-NEXT: call void @use_v8i16(<8 x i16> [[B]]) | ||
| ; ANY-NEXT: [[R:%.*]] = extractelement <8 x i16> [[B]], i32 1 | ||
| ; ANY-NEXT: ret i16 [[R]] | ||
| ; | ||
| %i = insertelement <2 x double> poison, double %x, i32 0 | ||
| %b = bitcast <2 x double> %i to <8 x i16> | ||
| call void @use_v8i16(<8 x i16> %b) | ||
| %r = extractelement <8 x i16> %b, i32 1 | ||
| ret i16 %r | ||
| } | ||
|
|
||
| define float @bitcasted_inselt_to_and_from_FP(double %x) { | ||
| ; ANY-LABEL: @bitcasted_inselt_to_and_from_FP( | ||
| ; ANY-NEXT: [[I:%.*]] = insertelement <2 x double> poison, double [[X:%.*]], i32 0 | ||
| ; ANY-NEXT: [[B:%.*]] = bitcast <2 x double> [[I]] to <4 x float> | ||
| ; ANY-NEXT: [[R:%.*]] = extractelement <4 x float> [[B]], i32 1 | ||
| ; ANY-NEXT: ret float [[R]] | ||
| ; | ||
| %i = insertelement <2 x double> poison, double %x, i32 0 | ||
| %b = bitcast <2 x double> %i to <4 x float> | ||
| %r = extractelement <4 x float> %b, i32 1 | ||
| ret float %r | ||
| } | ||
|
|
||
| define float @bitcasted_inselt_to_and_from_FP_uses(double %x) { | ||
| ; ANY-LABEL: @bitcasted_inselt_to_and_from_FP_uses( | ||
| ; ANY-NEXT: [[I:%.*]] = insertelement <2 x double> poison, double [[X:%.*]], i32 0 | ||
| ; ANY-NEXT: call void @use_v2f64(<2 x double> [[I]]) | ||
| ; ANY-NEXT: [[B:%.*]] = bitcast <2 x double> [[I]] to <4 x float> | ||
| ; ANY-NEXT: [[R:%.*]] = extractelement <4 x float> [[B]], i32 1 | ||
| ; ANY-NEXT: ret float [[R]] | ||
| ; | ||
| %i = insertelement <2 x double> poison, double %x, i32 0 | ||
| call void @use_v2f64(<2 x double> %i) | ||
| %b = bitcast <2 x double> %i to <4 x float> | ||
| %r = extractelement <4 x float> %b, i32 1 | ||
| ret float %r | ||
| } | ||
|
|
||
| declare void @use_v4f32(<4 x float>) | ||
|
|
||
| define float @bitcasted_inselt_to_and_from_FP_uses2(double %x) { | ||
| ; ANY-LABEL: @bitcasted_inselt_to_and_from_FP_uses2( | ||
| ; ANY-NEXT: [[I:%.*]] = insertelement <2 x double> poison, double [[X:%.*]], i32 0 | ||
| ; ANY-NEXT: [[B:%.*]] = bitcast <2 x double> [[I]] to <4 x float> | ||
| ; ANY-NEXT: call void @use_v4f32(<4 x float> [[B]]) | ||
| ; ANY-NEXT: [[R:%.*]] = extractelement <4 x float> [[B]], i32 1 | ||
| ; ANY-NEXT: ret float [[R]] | ||
| ; | ||
| %i = insertelement <2 x double> poison, double %x, i32 0 | ||
| %b = bitcast <2 x double> %i to <4 x float> | ||
| call void @use_v4f32(<4 x float> %b) | ||
| %r = extractelement <4 x float> %b, i32 1 | ||
| ret float %r | ||
| } | ||
|
|
||
| ; This would crash/assert because the logic for collectShuffleElements() | ||
| ; does not consider the possibility of invalid insert/extract operands. | ||
|
|
||
| define <4 x double> @invalid_extractelement(<2 x double> %a, <4 x double> %b, double* %p) { | ||
| ; ANY-LABEL: @invalid_extractelement( | ||
| ; ANY-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> | ||
| ; ANY-NEXT: [[T4:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> [[TMP1]], <4 x i32> <i32 undef, i32 1, i32 4, i32 3> | ||
| ; ANY-NEXT: [[E:%.*]] = extractelement <4 x double> [[B]], i32 1 | ||
| ; ANY-NEXT: store double [[E]], double* [[P:%.*]], align 8 | ||
| ; ANY-NEXT: [[R:%.*]] = insertelement <4 x double> [[T4]], double undef, i64 0 | ||
| ; ANY-NEXT: ret <4 x double> [[R]] | ||
| ; | ||
| %t3 = extractelement <2 x double> %a, i32 0 | ||
| %t4 = insertelement <4 x double> %b, double %t3, i32 2 | ||
| %e = extractelement <4 x double> %t4, i32 1 | ||
| store double %e, double* %p | ||
| %e1 = extractelement <2 x double> %a, i32 4 ; invalid index | ||
| %r = insertelement <4 x double> %t4, double %e1, i64 0 | ||
| ret <4 x double> %r | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| ; RUN: opt < %s -instcombine -S | not grep zeroinitializer | ||
|
|
||
| define void @foo(i64 %A, i64 %B) { | ||
| bb8: | ||
| br label %bb30 | ||
|
|
||
| bb30: | ||
| %s0 = phi i64 [ 0, %bb8 ], [ %r21, %bb30 ] | ||
| %l0 = phi i64 [ -2222, %bb8 ], [ %r23, %bb30 ] | ||
| %r2 = add i64 %s0, %B | ||
| %r3 = inttoptr i64 %r2 to <2 x double>* | ||
| %r4 = load <2 x double>, <2 x double>* %r3, align 8 | ||
| %r6 = bitcast <2 x double> %r4 to <2 x i64> | ||
| %r7 = bitcast <2 x double> zeroinitializer to <2 x i64> | ||
| %r8 = insertelement <2 x i64> poison, i64 9223372036854775807, i32 0 | ||
| %r9 = insertelement <2 x i64> poison, i64 -9223372036854775808, i32 0 | ||
| %r10 = insertelement <2 x i64> %r8, i64 9223372036854775807, i32 1 | ||
| %r11 = insertelement <2 x i64> %r9, i64 -9223372036854775808, i32 1 | ||
| %r12 = and <2 x i64> %r6, %r10 | ||
| %r13 = and <2 x i64> %r7, %r11 | ||
| %r14 = or <2 x i64> %r12, %r13 | ||
| %r15 = bitcast <2 x i64> %r14 to <2 x double> | ||
| %r18 = add i64 %s0, %A | ||
| %r19 = inttoptr i64 %r18 to <2 x double>* | ||
| store <2 x double> %r15, <2 x double>* %r19, align 8 | ||
| %r21 = add i64 16, %s0 | ||
| %r23 = add i64 1, %l0 | ||
| %r25 = icmp slt i64 %r23, 0 | ||
| %r26 = zext i1 %r25 to i64 | ||
| %r27 = icmp ne i64 %r26, 0 | ||
| br i1 %r27, label %bb30, label %bb5 | ||
|
|
||
| bb5: | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,127 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
| ; RUN: opt < %s -instcombine -S | FileCheck %s | ||
|
|
||
| ; Tests to verify proper functioning of the icmp folding implemented in | ||
| ; InstCombiner::foldICmpBitCastConstant | ||
| ; Specifically, folding: | ||
| ; icmp <pred> iN X, C | ||
| ; where X = bitcast <M x iK> (shufflevector <M x iK> %vec, undef, SC)) to iN | ||
| ; and C is a splat of a K-bit pattern | ||
| ; and SC is a constant vector = <C', C', C', ..., C'> | ||
| ; Into: | ||
| ; %E = extractelement <M x iK> %vec, i32 C' | ||
| ; icmp <pred> iK %E, trunc(C) | ||
|
|
||
| define i1 @test_i1_0(i1 %val) { | ||
| ; CHECK-LABEL: @test_i1_0( | ||
| ; CHECK-NEXT: [[COND:%.*]] = xor i1 [[VAL:%.*]], true | ||
| ; CHECK-NEXT: ret i1 [[COND]] | ||
| ; | ||
| %insvec = insertelement <4 x i1> poison, i1 %val, i32 0 | ||
| %vec = shufflevector <4 x i1> %insvec, <4 x i1> undef, <4 x i32> zeroinitializer | ||
| %cast = bitcast <4 x i1> %vec to i4 | ||
| %cond = icmp eq i4 %cast, 0 | ||
| ret i1 %cond | ||
| } | ||
|
|
||
| define i1 @test_i1_0_2(i1 %val) { | ||
| ; CHECK-LABEL: @test_i1_0_2( | ||
| ; CHECK-NEXT: [[COND:%.*]] = xor i1 [[VAL:%.*]], true | ||
| ; CHECK-NEXT: ret i1 [[COND]] | ||
| ; | ||
| %insvec = insertelement <4 x i1> poison, i1 %val, i32 2 | ||
| %vec = shufflevector <4 x i1> %insvec, <4 x i1> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> | ||
| %cast = bitcast <4 x i1> %vec to i4 | ||
| %cond = icmp eq i4 %cast, 0 | ||
| ret i1 %cond | ||
| } | ||
|
|
||
| define i1 @test_i1_m1(i1 %val) { | ||
| ; CHECK-LABEL: @test_i1_m1( | ||
| ; CHECK-NEXT: ret i1 [[VAL:%.*]] | ||
| ; | ||
| %insvec = insertelement <4 x i1> poison, i1 %val, i32 0 | ||
| %vec = shufflevector <4 x i1> %insvec, <4 x i1> undef, <4 x i32> zeroinitializer | ||
| %cast = bitcast <4 x i1> %vec to i4 | ||
| %cond = icmp eq i4 %cast, -1 | ||
| ret i1 %cond | ||
| } | ||
|
|
||
| define i1 @test_i8_pattern(i8 %val) { | ||
| ; CHECK-LABEL: @test_i8_pattern( | ||
| ; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[VAL:%.*]], 72 | ||
| ; CHECK-NEXT: ret i1 [[COND]] | ||
| ; | ||
| %insvec = insertelement <4 x i8> poison, i8 %val, i32 0 | ||
| %vec = shufflevector <4 x i8> %insvec, <4 x i8> undef, <4 x i32> zeroinitializer | ||
| %cast = bitcast <4 x i8> %vec to i32 | ||
| %cond = icmp eq i32 %cast, 1212696648 | ||
| ret i1 %cond | ||
| } | ||
|
|
||
| define i1 @test_i8_pattern_2(i8 %val) { | ||
| ; CHECK-LABEL: @test_i8_pattern_2( | ||
| ; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[VAL:%.*]], 72 | ||
| ; CHECK-NEXT: ret i1 [[COND]] | ||
| ; | ||
| %insvec = insertelement <4 x i8> poison, i8 %val, i32 2 | ||
| %vec = shufflevector <4 x i8> %insvec, <4 x i8> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> | ||
| %cast = bitcast <4 x i8> %vec to i32 | ||
| %cond = icmp eq i32 %cast, 1212696648 | ||
| ret i1 %cond | ||
| } | ||
|
|
||
| ; Make sure we don't try to fold if the shufflemask has differing element values | ||
| define i1 @test_i8_pattern_3(<4 x i8> %invec) { | ||
| ; CHECK-LABEL: @test_i8_pattern_3( | ||
| ; CHECK-NEXT: [[VEC:%.*]] = shufflevector <4 x i8> [[INVEC:%.*]], <4 x i8> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> | ||
| ; CHECK-NEXT: [[CAST:%.*]] = bitcast <4 x i8> [[VEC]] to i32 | ||
| ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[CAST]], 1212696648 | ||
| ; CHECK-NEXT: ret i1 [[COND]] | ||
| ; | ||
| %vec = shufflevector <4 x i8> %invec, <4 x i8> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> | ||
| %cast = bitcast <4 x i8> %vec to i32 | ||
| %cond = icmp eq i32 %cast, 1212696648 | ||
| ret i1 %cond | ||
| } | ||
|
|
||
| ; Make sure we don't try to fold if the compared-to constant isn't a splatted value | ||
| define i1 @test_i8_nopattern(i8 %val) { | ||
| ; CHECK-LABEL: @test_i8_nopattern( | ||
| ; CHECK-NEXT: [[INSVEC:%.*]] = insertelement <4 x i8> poison, i8 [[VAL:%.*]], i32 0 | ||
| ; CHECK-NEXT: [[VEC:%.*]] = shufflevector <4 x i8> [[INSVEC]], <4 x i8> undef, <4 x i32> zeroinitializer | ||
| ; CHECK-NEXT: [[CAST:%.*]] = bitcast <4 x i8> [[VEC]] to i32 | ||
| ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[CAST]], 1212696647 | ||
| ; CHECK-NEXT: ret i1 [[COND]] | ||
| ; | ||
| %insvec = insertelement <4 x i8> poison, i8 %val, i32 0 | ||
| %vec = shufflevector <4 x i8> %insvec, <4 x i8> undef, <4 x i32> zeroinitializer | ||
| %cast = bitcast <4 x i8> %vec to i32 | ||
| %cond = icmp eq i32 %cast, 1212696647 | ||
| ret i1 %cond | ||
| } | ||
|
|
||
| ; Verify that we fold more than just the eq predicate | ||
| define i1 @test_i8_ult_pattern(i8 %val) { | ||
| ; CHECK-LABEL: @test_i8_ult_pattern( | ||
| ; CHECK-NEXT: [[COND:%.*]] = icmp ult i8 [[VAL:%.*]], 72 | ||
| ; CHECK-NEXT: ret i1 [[COND]] | ||
| ; | ||
| %insvec = insertelement <4 x i8> poison, i8 %val, i32 0 | ||
| %vec = shufflevector <4 x i8> %insvec, <4 x i8> undef, <4 x i32> zeroinitializer | ||
| %cast = bitcast <4 x i8> %vec to i32 | ||
| %cond = icmp ult i32 %cast, 1212696648 | ||
| ret i1 %cond | ||
| } | ||
|
|
||
| define i1 @extending_shuffle_with_weird_types(<2 x i9> %v) { | ||
| ; CHECK-LABEL: @extending_shuffle_with_weird_types( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i9> [[V:%.*]], i32 0 | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i9 [[TMP1]], 1 | ||
| ; CHECK-NEXT: ret i1 [[CMP]] | ||
| ; | ||
| %splat = shufflevector <2 x i9> %v, <2 x i9> undef, <3 x i32> zeroinitializer | ||
| %cast = bitcast <3 x i9> %splat to i27 | ||
| %cmp = icmp slt i27 %cast, 262657 ; 0x040201 | ||
| ret i1 %cmp | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,271 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
| ; RUN: opt -instcombine -S < %s | FileCheck %s | ||
|
|
||
| declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptrs, i32, <2 x i1> %mask, <2 x double> %src0) | ||
| declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptrs, i32, <2 x i1> %mask) | ||
| declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %passthru) | ||
| declare <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %ptrs, i32, <4 x i1> %mask, <4 x double> %passthru) | ||
| declare void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %val, <2 x double*> %ptrs, i32, <2 x i1> %mask) | ||
|
|
||
| define <2 x double> @load_zeromask(<2 x double>* %ptr, <2 x double> %passthru) { | ||
| ; CHECK-LABEL: @load_zeromask( | ||
| ; CHECK-NEXT: ret <2 x double> [[PASSTHRU:%.*]] | ||
| ; | ||
| %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 1, <2 x i1> zeroinitializer, <2 x double> %passthru) | ||
| ret <2 x double> %res | ||
| } | ||
|
|
||
| define <2 x double> @load_onemask(<2 x double>* %ptr, <2 x double> %passthru) { | ||
| ; CHECK-LABEL: @load_onemask( | ||
| ; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 2 | ||
| ; CHECK-NEXT: ret <2 x double> [[UNMASKEDLOAD]] | ||
| ; | ||
| %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 1, i1 1>, <2 x double> %passthru) | ||
| ret <2 x double> %res | ||
| } | ||
|
|
||
| define <2 x double> @load_undefmask(<2 x double>* %ptr, <2 x double> %passthru) { | ||
| ; CHECK-LABEL: @load_undefmask( | ||
| ; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 2 | ||
| ; CHECK-NEXT: ret <2 x double> [[UNMASKEDLOAD]] | ||
| ; | ||
| %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 1, i1 undef>, <2 x double> %passthru) | ||
| ret <2 x double> %res | ||
| } | ||
|
|
||
| @G = external global i8 | ||
|
|
||
| define <2 x double> @load_cemask(<2 x double>* %ptr, <2 x double> %passthru) { | ||
| ; CHECK-LABEL: @load_cemask( | ||
| ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* [[PTR:%.*]], i32 2, <2 x i1> <i1 true, i1 ptrtoint (i8* @G to i1)>, <2 x double> [[PASSTHRU:%.*]]) | ||
| ; CHECK-NEXT: ret <2 x double> [[RES]] | ||
| ; | ||
| %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 1, i1 ptrtoint (i8* @G to i1)>, <2 x double> %passthru) | ||
| ret <2 x double> %res | ||
| } | ||
|
|
||
| define <2 x double> @load_lane0(<2 x double>* %ptr, double %pt) { | ||
| ; CHECK-LABEL: @load_lane0( | ||
| ; CHECK-NEXT: [[PTV2:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 1 | ||
| ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* [[PTR:%.*]], i32 2, <2 x i1> <i1 true, i1 false>, <2 x double> [[PTV2]]) | ||
| ; CHECK-NEXT: ret <2 x double> [[RES]] | ||
| ; | ||
| %ptv1 = insertelement <2 x double> poison, double %pt, i64 0 | ||
| %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1 | ||
| %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 true, i1 false>, <2 x double> %ptv2) | ||
| ret <2 x double> %res | ||
| } | ||
|
|
||
| define double @load_all(double* %base, double %pt) { | ||
| ; CHECK-LABEL: @load_all( | ||
| ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <4 x i64> <i64 0, i64 undef, i64 2, i64 3> | ||
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> [[PTRS]], i32 4, <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x double> undef) | ||
| ; CHECK-NEXT: [[ELT:%.*]] = extractelement <4 x double> [[RES]], i64 2 | ||
| ; CHECK-NEXT: ret double [[ELT]] | ||
| ; | ||
| %ptrs = getelementptr double, double* %base, <4 x i64> <i64 0, i64 1, i64 2, i64 3> | ||
| %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %ptrs, i32 4, <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x double> undef) | ||
| %elt = extractelement <4 x double> %res, i64 2 | ||
| ret double %elt | ||
| } | ||
|
|
||
| define <2 x double> @load_generic(<2 x double>* %ptr, double %pt, <2 x i1> %mask) { | ||
| ; CHECK-LABEL: @load_generic( | ||
| ; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0 | ||
| ; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer | ||
| ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* [[PTR:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PTV2]]) | ||
| ; CHECK-NEXT: ret <2 x double> [[RES]] | ||
| ; | ||
| %ptv1 = insertelement <2 x double> poison, double %pt, i64 0 | ||
| %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1 | ||
| %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2) | ||
| ret <2 x double> %res | ||
| } | ||
|
|
||
| define <2 x double> @load_speculative(<2 x double>* dereferenceable(16) align 4 %ptr, double %pt, <2 x i1> %mask) { | ||
| ; CHECK-LABEL: @load_speculative( | ||
| ; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0 | ||
| ; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer | ||
| ; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 4 | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[MASK:%.*]], <2 x double> [[UNMASKEDLOAD]], <2 x double> [[PTV2]] | ||
| ; CHECK-NEXT: ret <2 x double> [[TMP1]] | ||
| ; | ||
| %ptv1 = insertelement <2 x double> poison, double %pt, i64 0 | ||
| %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1 | ||
| %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2) | ||
| ret <2 x double> %res | ||
| } | ||
|
|
||
| define <2 x double> @load_speculative_less_aligned(<2 x double>* dereferenceable(16) %ptr, double %pt, <2 x i1> %mask) { | ||
| ; CHECK-LABEL: @load_speculative_less_aligned( | ||
| ; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0 | ||
| ; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer | ||
| ; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 4 | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[MASK:%.*]], <2 x double> [[UNMASKEDLOAD]], <2 x double> [[PTV2]] | ||
| ; CHECK-NEXT: ret <2 x double> [[TMP1]] | ||
| ; | ||
| %ptv1 = insertelement <2 x double> poison, double %pt, i64 0 | ||
| %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1 | ||
| %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2) | ||
| ret <2 x double> %res | ||
| } | ||
|
|
||
| ; Can't speculate since only half of required size is known deref | ||
|
|
||
| define <2 x double> @load_spec_neg_size(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask) { | ||
| ; CHECK-LABEL: @load_spec_neg_size( | ||
| ; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0 | ||
| ; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer | ||
| ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* nonnull [[PTR:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PTV2]]) | ||
| ; CHECK-NEXT: ret <2 x double> [[RES]] | ||
| ; | ||
| %ptv1 = insertelement <2 x double> poison, double %pt, i64 0 | ||
| %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1 | ||
| %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2) | ||
| ret <2 x double> %res | ||
| } | ||
|
|
||
| ; Can only speculate one lane (but it's the only one active) | ||
| define <2 x double> @load_spec_lan0(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask) { | ||
| ; CHECK-LABEL: @load_spec_lan0( | ||
| ; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0 | ||
| ; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer | ||
| ; CHECK-NEXT: [[MASK2:%.*]] = insertelement <2 x i1> [[MASK:%.*]], i1 false, i64 1 | ||
| ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* nonnull [[PTR:%.*]], i32 4, <2 x i1> [[MASK2]], <2 x double> [[PTV2]]) | ||
| ; CHECK-NEXT: ret <2 x double> [[RES]] | ||
| ; | ||
| %ptv1 = insertelement <2 x double> poison, double %pt, i64 0 | ||
| %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1 | ||
| %mask2 = insertelement <2 x i1> %mask, i1 false, i64 1 | ||
| %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask2, <2 x double> %ptv2) | ||
| ret <2 x double> %res | ||
| } | ||
|
|
||
| define void @store_zeromask(<2 x double>* %ptr, <2 x double> %val) { | ||
| ; CHECK-LABEL: @store_zeromask( | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
| call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptr, i32 4, <2 x i1> zeroinitializer) | ||
| ret void | ||
| } | ||
|
|
||
| define void @store_onemask(<2 x double>* %ptr, <2 x double> %val) { | ||
| ; CHECK-LABEL: @store_onemask( | ||
| ; CHECK-NEXT: store <2 x double> [[VAL:%.*]], <2 x double>* [[PTR:%.*]], align 4 | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
| call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptr, i32 4, <2 x i1> <i1 1, i1 1>) | ||
| ret void | ||
| } | ||
|
|
||
| define void @store_demandedelts(<2 x double>* %ptr, double %val) { | ||
| ; CHECK-LABEL: @store_demandedelts( | ||
| ; CHECK-NEXT: [[VALVEC1:%.*]] = insertelement <2 x double> poison, double [[VAL:%.*]], i32 0 | ||
| ; CHECK-NEXT: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> [[VALVEC1]], <2 x double>* [[PTR:%.*]], i32 4, <2 x i1> <i1 true, i1 false>) | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
| %valvec1 = insertelement <2 x double> poison, double %val, i32 0 | ||
| %valvec2 = insertelement <2 x double> %valvec1, double %val, i32 1 | ||
| call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %valvec2, <2 x double>* %ptr, i32 4, <2 x i1> <i1 true, i1 false>) | ||
| ret void | ||
| } | ||
|
|
||
| define <2 x double> @gather_generic(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %passthru) { | ||
| ; CHECK-LABEL: @gather_generic( | ||
| ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PASSTHRU:%.*]]) | ||
| ; CHECK-NEXT: ret <2 x double> [[RES]] | ||
| ; | ||
| %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %passthru) | ||
| ret <2 x double> %res | ||
| } | ||
|
|
||
|
|
||
| define <2 x double> @gather_zeromask(<2 x double*> %ptrs, <2 x double> %passthru) { | ||
| ; CHECK-LABEL: @gather_zeromask( | ||
| ; CHECK-NEXT: ret <2 x double> [[PASSTHRU:%.*]] | ||
| ; | ||
| %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> zeroinitializer, <2 x double> %passthru) | ||
| ret <2 x double> %res | ||
| } | ||
|
|
||
|
|
||
| define <2 x double> @gather_onemask(<2 x double*> %ptrs, <2 x double> %passthru) { | ||
| ; CHECK-LABEL: @gather_onemask( | ||
| ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS:%.*]], i32 4, <2 x i1> <i1 true, i1 true>, <2 x double> undef) | ||
| ; CHECK-NEXT: ret <2 x double> [[RES]] | ||
| ; | ||
| %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> <i1 true, i1 true>, <2 x double> %passthru) | ||
| ret <2 x double> %res | ||
| } | ||
|
|
||
| define <4 x double> @gather_lane2(double* %base, double %pt) { | ||
| ; CHECK-LABEL: @gather_lane2( | ||
| ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <4 x i64> <i64 undef, i64 undef, i64 2, i64 undef> | ||
| ; CHECK-NEXT: [[PT_V1:%.*]] = insertelement <4 x double> poison, double [[PT:%.*]], i64 0 | ||
| ; CHECK-NEXT: [[PT_V2:%.*]] = shufflevector <4 x double> [[PT_V1]], <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 undef, i32 0> | ||
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> [[PTRS]], i32 4, <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x double> [[PT_V2]]) | ||
| ; CHECK-NEXT: ret <4 x double> [[RES]] | ||
| ; | ||
| %ptrs = getelementptr double, double *%base, <4 x i64> <i64 0, i64 1, i64 2, i64 3> | ||
| %pt_v1 = insertelement <4 x double> poison, double %pt, i64 0 | ||
| %pt_v2 = shufflevector <4 x double> %pt_v1, <4 x double> undef, <4 x i32> zeroinitializer | ||
| %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %ptrs, i32 4, <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x double> %pt_v2) | ||
| ret <4 x double> %res | ||
| } | ||
|
|
||
| define <2 x double> @gather_lane0_maybe(double* %base, double %pt, <2 x i1> %mask) { | ||
| ; CHECK-LABEL: @gather_lane0_maybe( | ||
| ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <2 x i64> <i64 0, i64 1> | ||
| ; CHECK-NEXT: [[PT_V1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0 | ||
| ; CHECK-NEXT: [[PT_V2:%.*]] = shufflevector <2 x double> [[PT_V1]], <2 x double> undef, <2 x i32> zeroinitializer | ||
| ; CHECK-NEXT: [[MASK2:%.*]] = insertelement <2 x i1> [[MASK:%.*]], i1 false, i64 1 | ||
| ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS]], i32 4, <2 x i1> [[MASK2]], <2 x double> [[PT_V2]]) | ||
| ; CHECK-NEXT: ret <2 x double> [[RES]] | ||
| ; | ||
| %ptrs = getelementptr double, double *%base, <2 x i64> <i64 0, i64 1> | ||
| %pt_v1 = insertelement <2 x double> poison, double %pt, i64 0 | ||
| %pt_v2 = insertelement <2 x double> %pt_v1, double %pt, i64 1 | ||
| %mask2 = insertelement <2 x i1> %mask, i1 false, i64 1 | ||
| %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask2, <2 x double> %pt_v2) | ||
| ret <2 x double> %res | ||
| } | ||
|
|
||
| define <2 x double> @gather_lane0_maybe_spec(double* %base, double %pt, <2 x i1> %mask) { | ||
| ; CHECK-LABEL: @gather_lane0_maybe_spec( | ||
| ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <2 x i64> <i64 0, i64 1> | ||
| ; CHECK-NEXT: [[PT_V1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0 | ||
| ; CHECK-NEXT: [[PT_V2:%.*]] = shufflevector <2 x double> [[PT_V1]], <2 x double> undef, <2 x i32> zeroinitializer | ||
| ; CHECK-NEXT: [[MASK2:%.*]] = insertelement <2 x i1> [[MASK:%.*]], i1 false, i64 1 | ||
| ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS]], i32 4, <2 x i1> [[MASK2]], <2 x double> [[PT_V2]]) | ||
| ; CHECK-NEXT: ret <2 x double> [[RES]] | ||
| ; | ||
| %ptrs = getelementptr double, double *%base, <2 x i64> <i64 0, i64 1> | ||
| %pt_v1 = insertelement <2 x double> poison, double %pt, i64 0 | ||
| %pt_v2 = insertelement <2 x double> %pt_v1, double %pt, i64 1 | ||
| %mask2 = insertelement <2 x i1> %mask, i1 false, i64 1 | ||
| %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask2, <2 x double> %pt_v2) | ||
| ret <2 x double> %res | ||
| } | ||
|
|
||
|
|
||
| define void @scatter_zeromask(<2 x double*> %ptrs, <2 x double> %val) { | ||
| ; CHECK-LABEL: @scatter_zeromask( | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
| call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %val, <2 x double*> %ptrs, i32 8, <2 x i1> zeroinitializer) | ||
| ret void | ||
| } | ||
|
|
||
| define void @scatter_demandedelts(double* %ptr, double %val) { | ||
| ; CHECK-LABEL: @scatter_demandedelts( | ||
| ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[PTR:%.*]], <2 x i64> <i64 0, i64 undef> | ||
| ; CHECK-NEXT: [[VALVEC1:%.*]] = insertelement <2 x double> poison, double [[VAL:%.*]], i32 0 | ||
| ; CHECK-NEXT: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> [[VALVEC1]], <2 x double*> [[PTRS]], i32 8, <2 x i1> <i1 true, i1 false>) | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
| %ptrs = getelementptr double, double* %ptr, <2 x i64> <i64 0, i64 1> | ||
| %valvec1 = insertelement <2 x double> poison, double %val, i32 0 | ||
| %valvec2 = insertelement <2 x double> %valvec1, double %val, i32 1 | ||
| call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %valvec2, <2 x double*> %ptrs, i32 8, <2 x i1> <i1 true, i1 false>) | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
| ; RUN: opt < %s -instcombine -S | FileCheck %s | ||
| target datalayout = "p:16:16" | ||
|
|
||
| @a = external global [21 x i16], align 1 | ||
| @offsets = external global [4 x i16], align 1 | ||
|
|
||
| ; The "same gep" optimization should work with vector icmp. | ||
| define <4 x i1> @PR38984_1() { | ||
| ; CHECK-LABEL: @PR38984_1( | ||
| ; CHECK-NEXT: entry: | ||
| ; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true> | ||
| ; | ||
| entry: | ||
| %0 = load i16, i16* getelementptr ([4 x i16], [4 x i16]* @offsets, i16 0, i16 undef), align 1 | ||
| %1 = insertelement <4 x i16> poison, i16 %0, i32 3 | ||
| %2 = getelementptr i32, i32* null, <4 x i16> %1 | ||
| %3 = getelementptr i32, i32* null, <4 x i16> %1 | ||
| %4 = icmp eq <4 x i32*> %2, %3 | ||
| ret <4 x i1> %4 | ||
| } | ||
|
|
||
| ; The "compare base pointers" optimization should not kick in for vector icmp. | ||
| define <4 x i1> @PR38984_2() { | ||
| ; CHECK-LABEL: @PR38984_2( | ||
| ; CHECK-NEXT: entry: | ||
| ; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* getelementptr ([4 x i16], [4 x i16]* @offsets, i16 0, i16 undef), align 2 | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i32 3 | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, i16* getelementptr inbounds ([21 x i16], [21 x i16]* @a, i16 1, i16 0), <4 x i16> [[TMP1]] | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, i16* null, <4 x i16> [[TMP1]] | ||
| ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i16*> [[TMP2]], [[TMP3]] | ||
| ; CHECK-NEXT: ret <4 x i1> [[TMP4]] | ||
| ; | ||
| entry: | ||
| %0 = load i16, i16* getelementptr ([4 x i16], [4 x i16]* @offsets, i16 0, i16 undef) | ||
| %1 = insertelement <4 x i16> poison, i16 %0, i32 3 | ||
| %2 = getelementptr i16, i16* getelementptr ([21 x i16], [21 x i16]* @a, i64 1, i32 0), <4 x i16> %1 | ||
| %3 = getelementptr i16, i16* null, <4 x i16> %1 | ||
| %4 = icmp eq <4 x i16*> %2, %3 | ||
| ret <4 x i1> %4 | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,335 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
| ; RUN: opt -instcombine -S < %s | FileCheck %s | ||
|
|
||
| define i32 @extract_load(<4 x i32>* %p) { | ||
| ; CHECK-LABEL: @extract_load( | ||
| ; CHECK-NEXT: [[X:%.*]] = load <4 x i32>, <4 x i32>* [[P:%.*]], align 4 | ||
| ; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x i32> [[X]], i32 1 | ||
| ; CHECK-NEXT: ret i32 [[EXT]] | ||
| ; | ||
| %x = load <4 x i32>, <4 x i32>* %p, align 4 | ||
| %ext = extractelement <4 x i32> %x, i32 1 | ||
| ret i32 %ext | ||
| } | ||
|
|
||
| define double @extract_load_fp(<4 x double>* %p) { | ||
| ; CHECK-LABEL: @extract_load_fp( | ||
| ; CHECK-NEXT: [[X:%.*]] = load <4 x double>, <4 x double>* [[P:%.*]], align 32 | ||
| ; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 3 | ||
| ; CHECK-NEXT: ret double [[EXT]] | ||
| ; | ||
| %x = load <4 x double>, <4 x double>* %p, align 32 | ||
| %ext = extractelement <4 x double> %x, i32 3 | ||
| ret double %ext | ||
| } | ||
|
|
||
| define double @extract_load_volatile(<4 x double>* %p) { | ||
| ; CHECK-LABEL: @extract_load_volatile( | ||
| ; CHECK-NEXT: [[X:%.*]] = load volatile <4 x double>, <4 x double>* [[P:%.*]], align 32 | ||
| ; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 2 | ||
| ; CHECK-NEXT: ret double [[EXT]] | ||
| ; | ||
| %x = load volatile <4 x double>, <4 x double>* %p | ||
| %ext = extractelement <4 x double> %x, i32 2 | ||
| ret double %ext | ||
| } | ||
|
|
||
| define double @extract_load_extra_use(<4 x double>* %p, <4 x double>* %p2) { | ||
| ; CHECK-LABEL: @extract_load_extra_use( | ||
| ; CHECK-NEXT: [[X:%.*]] = load <4 x double>, <4 x double>* [[P:%.*]], align 8 | ||
| ; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 0 | ||
| ; CHECK-NEXT: store <4 x double> [[X]], <4 x double>* [[P2:%.*]], align 32 | ||
| ; CHECK-NEXT: ret double [[EXT]] | ||
| ; | ||
| %x = load <4 x double>, <4 x double>* %p, align 8 | ||
| %ext = extractelement <4 x double> %x, i32 0 | ||
| store <4 x double> %x, <4 x double>* %p2 | ||
| ret double %ext | ||
| } | ||
|
|
||
| define double @extract_load_variable_index(<4 x double>* %p, i32 %y) { | ||
| ; CHECK-LABEL: @extract_load_variable_index( | ||
| ; CHECK-NEXT: [[X:%.*]] = load <4 x double>, <4 x double>* [[P:%.*]], align 32 | ||
| ; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 [[Y:%.*]] | ||
| ; CHECK-NEXT: ret double [[EXT]] | ||
| ; | ||
| %x = load <4 x double>, <4 x double>* %p | ||
| %ext = extractelement <4 x double> %x, i32 %y | ||
| ret double %ext | ||
| } | ||
|
|
||
| define void @scalarize_phi(i32 * %n, float * %inout) { | ||
| ; CHECK-LABEL: @scalarize_phi( | ||
| ; CHECK-NEXT: entry: | ||
| ; CHECK-NEXT: [[T0:%.*]] = load volatile float, float* [[INOUT:%.*]], align 4 | ||
| ; CHECK-NEXT: br label [[FOR_COND:%.*]] | ||
| ; CHECK: for.cond: | ||
| ; CHECK-NEXT: [[TMP0:%.*]] = phi float [ [[T0]], [[ENTRY:%.*]] ], [ [[TMP1:%.*]], [[FOR_BODY:%.*]] ] | ||
| ; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ] | ||
| ; CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[N:%.*]], align 4 | ||
| ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[I_0]], [[T1]] | ||
| ; CHECK-NEXT: br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] | ||
| ; CHECK: for.body: | ||
| ; CHECK-NEXT: store volatile float [[TMP0]], float* [[INOUT]], align 4 | ||
| ; CHECK-NEXT: [[TMP1]] = fmul float [[TMP0]], 0x4002A3D700000000 | ||
| ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 | ||
| ; CHECK-NEXT: br label [[FOR_COND]] | ||
| ; CHECK: for.end: | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
| entry: | ||
| %t0 = load volatile float, float * %inout, align 4 | ||
| %insert = insertelement <4 x float> poison, float %t0, i32 0 | ||
| %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer | ||
| %insert1 = insertelement <4 x float> poison, float 3.0, i32 0 | ||
| br label %for.cond | ||
|
|
||
| for.cond: | ||
| %x.0 = phi <4 x float> [ %splat, %entry ], [ %mul, %for.body ] | ||
| %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] | ||
| %t1 = load i32, i32 * %n, align 4 | ||
| %cmp = icmp ne i32 %i.0, %t1 | ||
| br i1 %cmp, label %for.body, label %for.end | ||
|
|
||
| for.body: | ||
| %t2 = extractelement <4 x float> %x.0, i32 1 | ||
| store volatile float %t2, float * %inout, align 4 | ||
| %mul = fmul <4 x float> %x.0, <float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000> | ||
| %inc = add nsw i32 %i.0, 1 | ||
| br label %for.cond | ||
|
|
||
| for.end: | ||
| ret void | ||
| } | ||
|
|
||
| define float @extract_element_binop_splat_constant_index(<4 x float> %x) { | ||
| ; CHECK-LABEL: @extract_element_binop_splat_constant_index( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 2 | ||
| ; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], 0x4002A3D700000000 | ||
| ; CHECK-NEXT: ret float [[R]] | ||
| ; | ||
| %b = fadd <4 x float> %x, <float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000> | ||
| %r = extractelement <4 x float> %b, i32 2 | ||
| ret float %r | ||
| } | ||
|
|
||
| define double @extract_element_binop_splat_with_undef_constant_index(<2 x double> %x) { | ||
| ; CHECK-LABEL: @extract_element_binop_splat_with_undef_constant_index( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[X:%.*]], i32 0 | ||
| ; CHECK-NEXT: [[R:%.*]] = fdiv double 4.200000e+01, [[TMP1]] | ||
| ; CHECK-NEXT: ret double [[R]] | ||
| ; | ||
| %b = fdiv <2 x double> <double 42.0, double undef>, %x | ||
| %r = extractelement <2 x double> %b, i32 0 | ||
| ret double %r | ||
| } | ||
|
|
||
| define float @extract_element_binop_nonsplat_constant_index(<2 x float> %x) { | ||
| ; CHECK-LABEL: @extract_element_binop_nonsplat_constant_index( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1 | ||
| ; CHECK-NEXT: [[R:%.*]] = fmul float [[TMP1]], 4.300000e+01 | ||
| ; CHECK-NEXT: ret float [[R]] | ||
| ; | ||
| %b = fmul <2 x float> %x, <float 42.0, float 43.0> | ||
| %r = extractelement <2 x float> %b, i32 1 | ||
| ret float %r | ||
| } | ||
|
|
||
| define i8 @extract_element_binop_splat_variable_index(<4 x i8> %x, i32 %y) { | ||
| ; CHECK-LABEL: @extract_element_binop_splat_variable_index( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 [[Y:%.*]] | ||
| ; CHECK-NEXT: [[R:%.*]] = sdiv i8 [[TMP1]], 42 | ||
| ; CHECK-NEXT: ret i8 [[R]] | ||
| ; | ||
| %b = sdiv <4 x i8> %x, <i8 42, i8 42, i8 42, i8 42> | ||
| %r = extractelement <4 x i8> %b, i32 %y | ||
| ret i8 %r | ||
| } | ||
|
|
||
| define i8 @extract_element_binop_splat_with_undef_variable_index(<4 x i8> %x, i32 %y) { | ||
| ; CHECK-LABEL: @extract_element_binop_splat_with_undef_variable_index( | ||
| ; CHECK-NEXT: [[B:%.*]] = mul <4 x i8> [[X:%.*]], <i8 42, i8 42, i8 undef, i8 42> | ||
| ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Y:%.*]] | ||
| ; CHECK-NEXT: ret i8 [[R]] | ||
| ; | ||
| %b = mul <4 x i8> %x, <i8 42, i8 42, i8 undef, i8 42> | ||
| %r = extractelement <4 x i8> %b, i32 %y | ||
| ret i8 %r | ||
| } | ||
|
|
||
| define i8 @extract_element_binop_nonsplat_variable_index(<4 x i8> %x, i32 %y) { | ||
| ; CHECK-LABEL: @extract_element_binop_nonsplat_variable_index( | ||
| ; CHECK-NEXT: [[B:%.*]] = lshr <4 x i8> [[X:%.*]], <i8 4, i8 3, i8 undef, i8 2> | ||
| ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Y:%.*]] | ||
| ; CHECK-NEXT: ret i8 [[R]] | ||
| ; | ||
| %b = lshr <4 x i8> %x, <i8 4, i8 3, i8 undef, i8 2> | ||
| %r = extractelement <4 x i8> %b, i32 %y | ||
| ret i8 %r | ||
| } | ||
|
|
||
| define float @extract_element_load(<4 x float> %x, <4 x float>* %ptr) { | ||
| ; CHECK-LABEL: @extract_element_load( | ||
| ; CHECK-NEXT: [[LOAD:%.*]] = load <4 x float>, <4 x float>* [[PTR:%.*]], align 16 | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[LOAD]], i32 2 | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[X:%.*]], i32 2 | ||
| ; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], [[TMP2]] | ||
| ; CHECK-NEXT: ret float [[R]] | ||
| ; | ||
| %load = load <4 x float>, <4 x float>* %ptr | ||
| %add = fadd <4 x float> %x, %load | ||
| %r = extractelement <4 x float> %add, i32 2 | ||
| ret float %r | ||
| } | ||
|
|
||
| define float @extract_element_multi_Use_load(<4 x float> %x, <4 x float>* %ptr0, <4 x float>* %ptr1) { | ||
| ; CHECK-LABEL: @extract_element_multi_Use_load( | ||
| ; CHECK-NEXT: [[LOAD:%.*]] = load <4 x float>, <4 x float>* [[PTR0:%.*]], align 16 | ||
| ; CHECK-NEXT: store <4 x float> [[LOAD]], <4 x float>* [[PTR1:%.*]], align 16 | ||
| ; CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[LOAD]], [[X:%.*]] | ||
| ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[ADD]], i32 2 | ||
| ; CHECK-NEXT: ret float [[R]] | ||
| ; | ||
| %load = load <4 x float>, <4 x float>* %ptr0 | ||
| store <4 x float> %load, <4 x float>* %ptr1 | ||
| %add = fadd <4 x float> %x, %load | ||
| %r = extractelement <4 x float> %add, i32 2 | ||
| ret float %r | ||
| } | ||
|
|
||
| define float @extract_element_variable_index(<4 x float> %x, i32 %y) { | ||
| ; CHECK-LABEL: @extract_element_variable_index( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 [[Y:%.*]] | ||
| ; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], 1.000000e+00 | ||
| ; CHECK-NEXT: ret float [[R]] | ||
| ; | ||
| %add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> | ||
| %r = extractelement <4 x float> %add, i32 %y | ||
| ret float %r | ||
| } | ||
|
|
||
| define float @extelt_binop_insertelt(<4 x float> %A, <4 x float> %B, float %f) { | ||
| ; CHECK-LABEL: @extelt_binop_insertelt( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 | ||
| ; CHECK-NEXT: [[E:%.*]] = fmul nnan float [[TMP1]], [[F:%.*]] | ||
| ; CHECK-NEXT: ret float [[E]] | ||
| ; | ||
| %C = insertelement <4 x float> %A, float %f, i32 0 | ||
| %D = fmul nnan <4 x float> %C, %B | ||
| %E = extractelement <4 x float> %D, i32 0 | ||
| ret float %E | ||
| } | ||
|
|
||
| ; We recurse to find a scalarizable operand. | ||
| ; FIXME: We should propagate the IR flags including wrapping flags. | ||
|
|
||
| define i32 @extelt_binop_binop_insertelt(<4 x i32> %A, <4 x i32> %B, i32 %f) { | ||
| ; CHECK-LABEL: @extelt_binop_binop_insertelt( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[B:%.*]], i32 0 | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], [[F:%.*]] | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[B]], i32 0 | ||
| ; CHECK-NEXT: [[E:%.*]] = mul i32 [[TMP2]], [[TMP3]] | ||
| ; CHECK-NEXT: ret i32 [[E]] | ||
| ; | ||
| %v = insertelement <4 x i32> %A, i32 %f, i32 0 | ||
| %C = add <4 x i32> %v, %B | ||
| %D = mul nsw <4 x i32> %C, %B | ||
| %E = extractelement <4 x i32> %D, i32 0 | ||
| ret i32 %E | ||
| } | ||
|
|
||
| define float @extract_element_constant_vector_variable_index(i32 %y) { | ||
| ; CHECK-LABEL: @extract_element_constant_vector_variable_index( | ||
| ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>, i32 [[Y:%.*]] | ||
| ; CHECK-NEXT: ret float [[R]] | ||
| ; | ||
| %r = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %y | ||
| ret float %r | ||
| } | ||
|
|
||
| define i1 @cheap_to_extract_icmp(<4 x i32> %x, <4 x i1> %y) { | ||
| ; CHECK-LABEL: @cheap_to_extract_icmp( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2 | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[Y:%.*]], i32 2 | ||
| ; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP2]], [[TMP3]] | ||
| ; CHECK-NEXT: ret i1 [[R]] | ||
| ; | ||
| %cmp = icmp eq <4 x i32> %x, zeroinitializer | ||
| %and = and <4 x i1> %cmp, %y | ||
| %r = extractelement <4 x i1> %and, i32 2 | ||
| ret i1 %r | ||
| } | ||
|
|
||
| define i1 @cheap_to_extract_fcmp(<4 x float> %x, <4 x i1> %y) { | ||
| ; CHECK-LABEL: @cheap_to_extract_fcmp( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 2 | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00 | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[Y:%.*]], i32 2 | ||
| ; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP2]], [[TMP3]] | ||
| ; CHECK-NEXT: ret i1 [[R]] | ||
| ; | ||
| %cmp = fcmp oeq <4 x float> %x, zeroinitializer | ||
| %and = and <4 x i1> %cmp, %y | ||
| %r = extractelement <4 x i1> %and, i32 2 | ||
| ret i1 %r | ||
| } | ||
|
|
||
| define i1 @extractelt_vector_icmp_constrhs(<2 x i32> %arg) { | ||
| ; CHECK-LABEL: @extractelt_vector_icmp_constrhs( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[ARG:%.*]], i32 0 | ||
| ; CHECK-NEXT: [[EXT:%.*]] = icmp eq i32 [[TMP1]], 0 | ||
| ; CHECK-NEXT: ret i1 [[EXT]] | ||
| ; | ||
| %cmp = icmp eq <2 x i32> %arg, zeroinitializer | ||
| %ext = extractelement <2 x i1> %cmp, i32 0 | ||
| ret i1 %ext | ||
| } | ||
|
|
||
| define i1 @extractelt_vector_fcmp_constrhs(<2 x float> %arg) { | ||
| ; CHECK-LABEL: @extractelt_vector_fcmp_constrhs( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[ARG:%.*]], i32 0 | ||
| ; CHECK-NEXT: [[EXT:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00 | ||
| ; CHECK-NEXT: ret i1 [[EXT]] | ||
| ; | ||
| %cmp = fcmp oeq <2 x float> %arg, zeroinitializer | ||
| %ext = extractelement <2 x i1> %cmp, i32 0 | ||
| ret i1 %ext | ||
| } | ||
|
|
||
| define i1 @extractelt_vector_icmp_constrhs_dynidx(<2 x i32> %arg, i32 %idx) { | ||
| ; CHECK-LABEL: @extractelt_vector_icmp_constrhs_dynidx( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[ARG:%.*]], i32 [[IDX:%.*]] | ||
| ; CHECK-NEXT: [[EXT:%.*]] = icmp eq i32 [[TMP1]], 0 | ||
| ; CHECK-NEXT: ret i1 [[EXT]] | ||
| ; | ||
| %cmp = icmp eq <2 x i32> %arg, zeroinitializer | ||
| %ext = extractelement <2 x i1> %cmp, i32 %idx | ||
| ret i1 %ext | ||
| } | ||
|
|
||
| define i1 @extractelt_vector_fcmp_constrhs_dynidx(<2 x float> %arg, i32 %idx) { | ||
| ; CHECK-LABEL: @extractelt_vector_fcmp_constrhs_dynidx( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[ARG:%.*]], i32 [[IDX:%.*]] | ||
| ; CHECK-NEXT: [[EXT:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00 | ||
| ; CHECK-NEXT: ret i1 [[EXT]] | ||
| ; | ||
| %cmp = fcmp oeq <2 x float> %arg, zeroinitializer | ||
| %ext = extractelement <2 x i1> %cmp, i32 %idx | ||
| ret i1 %ext | ||
| } | ||
|
|
||
| define i1 @extractelt_vector_fcmp_not_cheap_to_scalarize_multi_use(<2 x float> %arg0, <2 x float> %arg1, <2 x float> %arg2, i32 %idx) { | ||
| ; CHECK-LABEL: @extractelt_vector_fcmp_not_cheap_to_scalarize_multi_use( | ||
| ; CHECK-NEXT: [[ADD:%.*]] = fadd <2 x float> [[ARG1:%.*]], [[ARG2:%.*]] | ||
| ; CHECK-NEXT: store volatile <2 x float> [[ADD]], <2 x float>* undef, align 8 | ||
| ; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq <2 x float> [[ADD]], [[ARG0:%.*]] | ||
| ; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x i1> [[CMP]], i32 0 | ||
| ; CHECK-NEXT: ret i1 [[EXT]] | ||
| ; | ||
| %add = fadd <2 x float> %arg1, %arg2 | ||
| store volatile <2 x float> %add, <2 x float>* undef | ||
| %cmp = fcmp oeq <2 x float> %arg0, %add | ||
| %ext = extractelement <2 x i1> %cmp, i32 0 | ||
| ret i1 %ext | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,213 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
| ; RUN: opt -S -instcombine < %s | FileCheck %s | ||
|
|
||
| declare void @v4float_user(<4 x float>) #0 | ||
|
|
||
| define float @extract_one_select(<4 x float> %a, <4 x float> %b, i32 %c) #0 { | ||
| ; CHECK-LABEL: @extract_one_select( | ||
| ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[C:%.*]], 0 | ||
| ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]] | ||
| ; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[SEL]], i32 2 | ||
| ; CHECK-NEXT: ret float [[EXTRACT]] | ||
| ; | ||
| %cmp = icmp ne i32 %c, 0 | ||
| %sel = select i1 %cmp, <4 x float> %a, <4 x float> %b | ||
| %extract = extractelement <4 x float> %sel, i32 2 | ||
| ret float %extract | ||
| } | ||
|
|
||
| ; Multiple extractelements | ||
| define <2 x float> @extract_two_select(<4 x float> %a, <4 x float> %b, i32 %c) #0 { | ||
| ; CHECK-LABEL: @extract_two_select( | ||
| ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[C:%.*]], 0 | ||
| ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]] | ||
| ; CHECK-NEXT: [[BUILD2:%.*]] = shufflevector <4 x float> [[SEL]], <4 x float> undef, <2 x i32> <i32 1, i32 2> | ||
| ; CHECK-NEXT: ret <2 x float> [[BUILD2]] | ||
| ; | ||
| %cmp = icmp ne i32 %c, 0 | ||
| %sel = select i1 %cmp, <4 x float> %a, <4 x float> %b | ||
| %extract1 = extractelement <4 x float> %sel, i32 1 | ||
| %extract2 = extractelement <4 x float> %sel, i32 2 | ||
| %build1 = insertelement <2 x float> poison, float %extract1, i32 0 | ||
| %build2 = insertelement <2 x float> %build1, float %extract2, i32 1 | ||
| ret <2 x float> %build2 | ||
| } | ||
|
|
||
| ; Select has an extra non-extractelement user, don't change it | ||
| define float @extract_one_select_user(<4 x float> %a, <4 x float> %b, i32 %c) #0 { | ||
| ; CHECK-LABEL: @extract_one_select_user( | ||
| ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[C:%.*]], 0 | ||
| ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]] | ||
| ; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[SEL]], i32 2 | ||
| ; CHECK-NEXT: call void @v4float_user(<4 x float> [[SEL]]) | ||
| ; CHECK-NEXT: ret float [[EXTRACT]] | ||
| ; | ||
| %cmp = icmp ne i32 %c, 0 | ||
| %sel = select i1 %cmp, <4 x float> %a, <4 x float> %b | ||
| %extract = extractelement <4 x float> %sel, i32 2 | ||
| call void @v4float_user(<4 x float> %sel) | ||
| ret float %extract | ||
| } | ||
|
|
||
| define float @extract_one_vselect_user(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { | ||
| ; CHECK-LABEL: @extract_one_vselect_user( | ||
| ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq <4 x i32> [[C:%.*]], zeroinitializer | ||
| ; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]] | ||
| ; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[SEL]], i32 2 | ||
| ; CHECK-NEXT: call void @v4float_user(<4 x float> [[SEL]]) | ||
| ; CHECK-NEXT: ret float [[EXTRACT]] | ||
| ; | ||
| %cmp = icmp ne <4 x i32> %c, zeroinitializer | ||
| %sel = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b | ||
| %extract = extractelement <4 x float> %sel, i32 2 | ||
| call void @v4float_user(<4 x float> %sel) | ||
| ret float %extract | ||
| } | ||
|
|
||
| ; Do not convert the vector select into a scalar select. That would increase | ||
| ; the instruction count and potentially obfuscate a vector min/max idiom. | ||
|
|
||
| define float @extract_one_vselect(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { | ||
| ; CHECK-LABEL: @extract_one_vselect( | ||
| ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq <4 x i32> [[C:%.*]], zeroinitializer | ||
| ; CHECK-NEXT: [[SELECT:%.*]] = select <4 x i1> [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]] | ||
| ; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[SELECT]], i32 0 | ||
| ; CHECK-NEXT: ret float [[EXTRACT]] | ||
| ; | ||
| %cmp = icmp ne <4 x i32> %c, zeroinitializer | ||
| %select = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b | ||
| %extract = extractelement <4 x float> %select, i32 0 | ||
| ret float %extract | ||
| } | ||
|
|
||
| ; Multiple extractelements from a vector select | ||
| define <2 x float> @extract_two_vselect(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { | ||
| ; CHECK-LABEL: @extract_two_vselect( | ||
| ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq <4 x i32> [[C:%.*]], zeroinitializer | ||
| ; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]] | ||
| ; CHECK-NEXT: [[BUILD2:%.*]] = shufflevector <4 x float> [[SEL]], <4 x float> undef, <2 x i32> <i32 1, i32 2> | ||
| ; CHECK-NEXT: ret <2 x float> [[BUILD2]] | ||
| ; | ||
| %cmp = icmp ne <4 x i32> %c, zeroinitializer | ||
| %sel = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b | ||
| %extract1 = extractelement <4 x float> %sel, i32 1 | ||
| %extract2 = extractelement <4 x float> %sel, i32 2 | ||
| %build1 = insertelement <2 x float> poison, float %extract1, i32 0 | ||
| %build2 = insertelement <2 x float> %build1, float %extract2, i32 1 | ||
| ret <2 x float> %build2 | ||
| } | ||
|
|
||
| ; The vector selects are not decomposed into scalar selects because that would increase | ||
| ; the instruction count. Extract+insert is converted to non-lane-crossing shuffles. | ||
| ; Test multiple extractelements | ||
| define <4 x float> @simple_vector_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { | ||
| ; CHECK-LABEL: @simple_vector_select( | ||
| ; CHECK-NEXT: entry: | ||
| ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0 | ||
| ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 | ||
| ; CHECK-NEXT: [[A_SINK:%.*]] = select i1 [[TOBOOL_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]] | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[C]], i32 1 | ||
| ; CHECK-NEXT: [[TOBOOL1_NOT:%.*]] = icmp eq i32 [[TMP1]], 0 | ||
| ; CHECK-NEXT: [[A_SINK1:%.*]] = select i1 [[TOBOOL1_NOT]], <4 x float> [[B]], <4 x float> [[A]] | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A_SINK]], <4 x float> [[A_SINK1]], <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[C]], i32 2 | ||
| ; CHECK-NEXT: [[TOBOOL6_NOT:%.*]] = icmp eq i32 [[TMP3]], 0 | ||
| ; CHECK-NEXT: [[A_SINK2:%.*]] = select i1 [[TOBOOL6_NOT]], <4 x float> [[B]], <4 x float> [[A]] | ||
| ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[A_SINK2]], <4 x i32> <i32 0, i32 1, i32 6, i32 undef> | ||
| ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[C]], i32 3 | ||
| ; CHECK-NEXT: [[TOBOOL11_NOT:%.*]] = icmp eq i32 [[TMP5]], 0 | ||
| ; CHECK-NEXT: [[A_SINK3:%.*]] = select i1 [[TOBOOL11_NOT]], <4 x float> [[B]], <4 x float> [[A]] | ||
| ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[A_SINK3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7> | ||
| ; CHECK-NEXT: ret <4 x float> [[TMP6]] | ||
| ; | ||
| entry: | ||
| %0 = extractelement <4 x i32> %c, i32 0 | ||
| %tobool = icmp ne i32 %0, 0 | ||
| %a.sink = select i1 %tobool, <4 x float> %a, <4 x float> %b | ||
| %1 = extractelement <4 x float> %a.sink, i32 0 | ||
| %2 = insertelement <4 x float> poison, float %1, i32 0 | ||
| %3 = extractelement <4 x i32> %c, i32 1 | ||
| %tobool1 = icmp ne i32 %3, 0 | ||
| %a.sink1 = select i1 %tobool1, <4 x float> %a, <4 x float> %b | ||
| %4 = extractelement <4 x float> %a.sink1, i32 1 | ||
| %5 = insertelement <4 x float> %2, float %4, i32 1 | ||
| %6 = extractelement <4 x i32> %c, i32 2 | ||
| %tobool6 = icmp ne i32 %6, 0 | ||
| %a.sink2 = select i1 %tobool6, <4 x float> %a, <4 x float> %b | ||
| %7 = extractelement <4 x float> %a.sink2, i32 2 | ||
| %8 = insertelement <4 x float> %5, float %7, i32 2 | ||
| %9 = extractelement <4 x i32> %c, i32 3 | ||
| %tobool11 = icmp ne i32 %9, 0 | ||
| %a.sink3 = select i1 %tobool11, <4 x float> %a, <4 x float> %b | ||
| %10 = extractelement <4 x float> %a.sink3, i32 3 | ||
| %11 = insertelement <4 x float> %8, float %10, i32 3 | ||
| ret <4 x float> %11 | ||
| } | ||
|
|
||
| define <4 x i32> @extract_cond(<4 x i32> %x, <4 x i32> %y, <4 x i1> %condv) { | ||
| ; CHECK-LABEL: @extract_cond( | ||
| ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i1> [[CONDV:%.*]], <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> | ||
| ; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[DOTSPLAT]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]] | ||
| ; CHECK-NEXT: ret <4 x i32> [[R]] | ||
| ; | ||
| %cond = extractelement <4 x i1> %condv, i32 3 | ||
| %r = select i1 %cond, <4 x i32> %x, <4 x i32> %y | ||
| ret <4 x i32> %r | ||
| } | ||
|
|
||
| define <4 x i32> @splat_cond(<4 x i32> %x, <4 x i32> %y, <4 x i1> %condv) { | ||
| ; CHECK-LABEL: @splat_cond( | ||
| ; CHECK-NEXT: [[SPLATCOND:%.*]] = shufflevector <4 x i1> [[CONDV:%.*]], <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> | ||
| ; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[SPLATCOND]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]] | ||
| ; CHECK-NEXT: ret <4 x i32> [[R]] | ||
| ; | ||
| %splatcond = shufflevector <4 x i1> %condv, <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> | ||
| %r = select <4 x i1> %splatcond, <4 x i32> %x, <4 x i32> %y | ||
| ret <4 x i32> %r | ||
| } | ||
|
|
||
| declare void @extra_use(i1) | ||
|
|
||
| ; Negative test | ||
|
|
||
| define <4 x i32> @extract_cond_extra_use(<4 x i32> %x, <4 x i32> %y, <4 x i1> %condv) { | ||
| ; CHECK-LABEL: @extract_cond_extra_use( | ||
| ; CHECK-NEXT: [[COND:%.*]] = extractelement <4 x i1> [[CONDV:%.*]], i32 3 | ||
| ; CHECK-NEXT: call void @extra_use(i1 [[COND]]) | ||
| ; CHECK-NEXT: [[R:%.*]] = select i1 [[COND]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]] | ||
| ; CHECK-NEXT: ret <4 x i32> [[R]] | ||
| ; | ||
| %cond = extractelement <4 x i1> %condv, i32 3 | ||
| call void @extra_use(i1 %cond) | ||
| %r = select i1 %cond, <4 x i32> %x, <4 x i32> %y | ||
| ret <4 x i32> %r | ||
| } | ||
|
|
||
| ; Negative test | ||
|
|
||
| define <4 x i32> @extract_cond_variable_index(<4 x i32> %x, <4 x i32> %y, <4 x i1> %condv, i32 %index) { | ||
| ; CHECK-LABEL: @extract_cond_variable_index( | ||
| ; CHECK-NEXT: [[COND:%.*]] = extractelement <4 x i1> [[CONDV:%.*]], i32 [[INDEX:%.*]] | ||
| ; CHECK-NEXT: [[R:%.*]] = select i1 [[COND]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]] | ||
| ; CHECK-NEXT: ret <4 x i32> [[R]] | ||
| ; | ||
| %cond = extractelement <4 x i1> %condv, i32 %index | ||
| %r = select i1 %cond, <4 x i32> %x, <4 x i32> %y | ||
| ret <4 x i32> %r | ||
| } | ||
|
|
||
| ; IR shuffle can alter the number of elements in the vector, so this is ok. | ||
|
|
||
| define <4 x i32> @extract_cond_type_mismatch(<4 x i32> %x, <4 x i32> %y, <5 x i1> %condv) { | ||
| ; CHECK-LABEL: @extract_cond_type_mismatch( | ||
| ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <5 x i1> [[CONDV:%.*]], <5 x i1> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> | ||
| ; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[DOTSPLAT]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]] | ||
| ; CHECK-NEXT: ret <4 x i32> [[R]] | ||
| ; | ||
| %cond = extractelement <5 x i1> %condv, i32 1 | ||
| %r = select i1 %cond, <4 x i32> %x, <4 x i32> %y | ||
| ret <4 x i32> %r | ||
| } | ||
|
|
||
|
|
||
| attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,122 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
| ; This test makes sure that these instructions are properly eliminated. | ||
| ; | ||
| ; RUN: opt < %s -instcombine -S | FileCheck %s | ||
|
|
||
| define i32 @shl_C1_add_A_C2_i32(i16 %A) { | ||
| ; CHECK-LABEL: @shl_C1_add_A_C2_i32( | ||
| ; CHECK-NEXT: [[B:%.*]] = zext i16 [[A:%.*]] to i32 | ||
| ; CHECK-NEXT: [[D:%.*]] = shl i32 192, [[B]] | ||
| ; CHECK-NEXT: ret i32 [[D]] | ||
| ; | ||
| %B = zext i16 %A to i32 | ||
| %C = add i32 %B, 5 | ||
| %D = shl i32 6, %C | ||
| ret i32 %D | ||
| } | ||
|
|
||
| define i32 @ashr_C1_add_A_C2_i32(i32 %A) { | ||
| ; CHECK-LABEL: @ashr_C1_add_A_C2_i32( | ||
| ; CHECK-NEXT: ret i32 0 | ||
| ; | ||
| %B = and i32 %A, 65535 | ||
| %C = add i32 %B, 5 | ||
| %D = ashr i32 6, %C | ||
| ret i32 %D | ||
| } | ||
|
|
||
| define i32 @lshr_C1_add_A_C2_i32(i32 %A) { | ||
| ; CHECK-LABEL: @lshr_C1_add_A_C2_i32( | ||
| ; CHECK-NEXT: [[B:%.*]] = and i32 [[A:%.*]], 65535 | ||
| ; CHECK-NEXT: [[D:%.*]] = shl i32 192, [[B]] | ||
| ; CHECK-NEXT: ret i32 [[D]] | ||
| ; | ||
| %B = and i32 %A, 65535 | ||
| %C = add i32 %B, 5 | ||
| %D = shl i32 6, %C | ||
| ret i32 %D | ||
| } | ||
|
|
||
| define <4 x i32> @shl_C1_add_A_C2_v4i32(<4 x i16> %A) { | ||
| ; CHECK-LABEL: @shl_C1_add_A_C2_v4i32( | ||
| ; CHECK-NEXT: [[B:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32> | ||
| ; CHECK-NEXT: [[D:%.*]] = shl <4 x i32> <i32 6, i32 4, i32 poison, i32 -458752>, [[B]] | ||
| ; CHECK-NEXT: ret <4 x i32> [[D]] | ||
| ; | ||
| %B = zext <4 x i16> %A to <4 x i32> | ||
| %C = add <4 x i32> %B, <i32 0, i32 1, i32 50, i32 16> | ||
| %D = shl <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %C | ||
| ret <4 x i32> %D | ||
| } | ||
|
|
||
| define <4 x i32> @ashr_C1_add_A_C2_v4i32(<4 x i32> %A) { | ||
| ; CHECK-LABEL: @ashr_C1_add_A_C2_v4i32( | ||
| ; CHECK-NEXT: [[B:%.*]] = and <4 x i32> [[A:%.*]], <i32 0, i32 15, i32 255, i32 65535> | ||
| ; CHECK-NEXT: [[D:%.*]] = ashr <4 x i32> <i32 6, i32 1, i32 poison, i32 -1>, [[B]] | ||
| ; CHECK-NEXT: ret <4 x i32> [[D]] | ||
| ; | ||
| %B = and <4 x i32> %A, <i32 0, i32 15, i32 255, i32 65535> | ||
| %C = add <4 x i32> %B, <i32 0, i32 1, i32 50, i32 16> | ||
| %D = ashr <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %C | ||
| ret <4 x i32> %D | ||
| } | ||
|
|
||
| define <4 x i32> @lshr_C1_add_A_C2_v4i32(<4 x i32> %A) { | ||
| ; CHECK-LABEL: @lshr_C1_add_A_C2_v4i32( | ||
| ; CHECK-NEXT: [[B:%.*]] = and <4 x i32> [[A:%.*]], <i32 0, i32 15, i32 255, i32 65535> | ||
| ; CHECK-NEXT: [[D:%.*]] = lshr <4 x i32> <i32 6, i32 1, i32 poison, i32 65535>, [[B]] | ||
| ; CHECK-NEXT: ret <4 x i32> [[D]] | ||
| ; | ||
| %B = and <4 x i32> %A, <i32 0, i32 15, i32 255, i32 65535> | ||
| %C = add <4 x i32> %B, <i32 0, i32 1, i32 50, i32 16> | ||
| %D = lshr <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %C | ||
| ret <4 x i32> %D | ||
| } | ||
|
|
||
| define <4 x i32> @shl_C1_add_A_C2_v4i32_splat(i16 %I) { | ||
| ; CHECK-LABEL: @shl_C1_add_A_C2_v4i32_splat( | ||
| ; CHECK-NEXT: [[A:%.*]] = zext i16 [[I:%.*]] to i32 | ||
| ; CHECK-NEXT: [[B:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0 | ||
| ; CHECK-NEXT: [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <4 x i32> zeroinitializer | ||
| ; CHECK-NEXT: [[E:%.*]] = shl <4 x i32> <i32 6, i32 4, i32 poison, i32 -458752>, [[C]] | ||
| ; CHECK-NEXT: ret <4 x i32> [[E]] | ||
| ; | ||
| %A = zext i16 %I to i32 | ||
| %B = insertelement <4 x i32> poison, i32 %A, i32 0 | ||
| %C = shufflevector <4 x i32> %B, <4 x i32> undef, <4 x i32> zeroinitializer | ||
| %D = add <4 x i32> %C, <i32 0, i32 1, i32 50, i32 16> | ||
| %E = shl <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %D | ||
| ret <4 x i32> %E | ||
| } | ||
|
|
||
| define <4 x i32> @ashr_C1_add_A_C2_v4i32_splat(i16 %I) { | ||
| ; CHECK-LABEL: @ashr_C1_add_A_C2_v4i32_splat( | ||
| ; CHECK-NEXT: [[A:%.*]] = zext i16 [[I:%.*]] to i32 | ||
| ; CHECK-NEXT: [[B:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0 | ||
| ; CHECK-NEXT: [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <4 x i32> zeroinitializer | ||
| ; CHECK-NEXT: [[E:%.*]] = ashr <4 x i32> <i32 6, i32 1, i32 poison, i32 -1>, [[C]] | ||
| ; CHECK-NEXT: ret <4 x i32> [[E]] | ||
| ; | ||
| %A = zext i16 %I to i32 | ||
| %B = insertelement <4 x i32> poison, i32 %A, i32 0 | ||
| %C = shufflevector <4 x i32> %B, <4 x i32> undef, <4 x i32> zeroinitializer | ||
| %D = add <4 x i32> %C, <i32 0, i32 1, i32 50, i32 16> | ||
| %E = ashr <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %D | ||
| ret <4 x i32> %E | ||
| } | ||
|
|
||
| define <4 x i32> @lshr_C1_add_A_C2_v4i32_splat(i16 %I) { | ||
| ; CHECK-LABEL: @lshr_C1_add_A_C2_v4i32_splat( | ||
| ; CHECK-NEXT: [[A:%.*]] = zext i16 [[I:%.*]] to i32 | ||
| ; CHECK-NEXT: [[B:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0 | ||
| ; CHECK-NEXT: [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <4 x i32> zeroinitializer | ||
| ; CHECK-NEXT: [[E:%.*]] = lshr <4 x i32> <i32 6, i32 1, i32 poison, i32 65535>, [[C]] | ||
| ; CHECK-NEXT: ret <4 x i32> [[E]] | ||
| ; | ||
| %A = zext i16 %I to i32 | ||
| %B = insertelement <4 x i32> poison, i32 %A, i32 0 | ||
| %C = shufflevector <4 x i32> %B, <4 x i32> undef, <4 x i32> zeroinitializer | ||
| %D = add <4 x i32> %C, <i32 0, i32 1, i32 50, i32 16> | ||
| %E = lshr <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %D | ||
| ret <4 x i32> %E | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,122 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
| ; RUN: opt < %s -instcombine -S -o - | FileCheck %s | ||
|
|
||
| ; This test case was added as a reproducer for a miscompile, where instcombine | ||
| ; introduced an | ||
| ; srem <2 x i16> %1, <i16 undef, i16 2> | ||
| ; instruction, which makes the whole srem undefined (even if we only end up | ||
| ; extracting the second element in the vector). | ||
| define i16 @test_srem_orig(i16 %a, i1 %cmp) { | ||
| ; CHECK-LABEL: @test_srem_orig( | ||
| ; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[A:%.*]], i32 0 | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = srem <2 x i16> [[SPLATINSERT]], <i16 2, i16 1> | ||
| ; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> undef, <2 x i32> <i32 undef, i32 0> | ||
| ; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x i16> <i16 undef, i16 1>, <2 x i16> [[SPLAT_OP]] | ||
| ; CHECK-NEXT: [[T3:%.*]] = extractelement <2 x i16> [[T2]], i32 1 | ||
| ; CHECK-NEXT: ret i16 [[T3]] | ||
| ; | ||
| %splatinsert = insertelement <2 x i16> poison, i16 %a, i32 0 | ||
| %splat = shufflevector <2 x i16> %splatinsert, <2 x i16> undef, <2 x i32> zeroinitializer | ||
| %t1 = select i1 %cmp, <2 x i16> <i16 1, i16 1>, <2 x i16> %splat | ||
| %t2 = srem <2 x i16> %t1, <i16 2, i16 2> | ||
| %t3 = extractelement <2 x i16> %t2, i32 1 | ||
| ret i16 %t3 | ||
| } | ||
|
|
||
| ; This is basically a reduced version of test_srem_orig (based on what the | ||
| ; code would look like after a few iterations of instcombine, just before we | ||
| ; try to transform the shufflevector by doing | ||
| ; "evaluateInDifferentElementOrder". | ||
| define <2 x i16> @test_srem(i16 %a, i1 %cmp) { | ||
| ; CHECK-LABEL: @test_srem( | ||
| ; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[A:%.*]], i32 0 | ||
| ; CHECK-NEXT: [[T1:%.*]] = srem <2 x i16> [[SPLATINSERT]], <i16 2, i16 1> | ||
| ; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> undef, <2 x i32> <i32 undef, i32 0> | ||
| ; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x i16> <i16 77, i16 99>, <2 x i16> [[SPLAT_OP]] | ||
| ; CHECK-NEXT: ret <2 x i16> [[T2]] | ||
| ; | ||
| %splatinsert = insertelement <2 x i16> poison, i16 %a, i32 0 | ||
| %t1 = srem <2 x i16> %splatinsert, <i16 2, i16 1> | ||
| %splat.op = shufflevector <2 x i16> %t1, <2 x i16> undef, <2 x i32> <i32 undef, i32 0> | ||
| %t2 = select i1 %cmp, <2 x i16> <i16 77, i16 99>, <2 x i16> %splat.op | ||
| ret <2 x i16> %t2 | ||
| } | ||
|
|
||
| define <2 x i16> @test_urem(i16 %a, i1 %cmp) { | ||
| ; CHECK-LABEL: @test_urem( | ||
| ; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[A:%.*]], i32 0 | ||
| ; CHECK-NEXT: [[T1:%.*]] = urem <2 x i16> [[SPLATINSERT]], <i16 3, i16 1> | ||
| ; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> undef, <2 x i32> <i32 undef, i32 0> | ||
| ; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x i16> <i16 77, i16 99>, <2 x i16> [[SPLAT_OP]] | ||
| ; CHECK-NEXT: ret <2 x i16> [[T2]] | ||
| ; | ||
| %splatinsert = insertelement <2 x i16> poison, i16 %a, i32 0 | ||
| %t1 = urem <2 x i16> %splatinsert, <i16 3, i16 1> | ||
| %splat.op = shufflevector <2 x i16> %t1, <2 x i16> undef, <2 x i32> <i32 undef, i32 0> | ||
| %t2 = select i1 %cmp, <2 x i16> <i16 77, i16 99>, <2 x i16> %splat.op | ||
| ret <2 x i16> %t2 | ||
| } | ||
|
|
||
| define <2 x i16> @test_sdiv(i16 %a, i1 %cmp) { | ||
| ; CHECK-LABEL: @test_sdiv( | ||
| ; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[A:%.*]], i32 0 | ||
| ; CHECK-NEXT: [[T1:%.*]] = sdiv <2 x i16> [[SPLATINSERT]], <i16 2, i16 1> | ||
| ; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> undef, <2 x i32> <i32 undef, i32 0> | ||
| ; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x i16> <i16 77, i16 99>, <2 x i16> [[SPLAT_OP]] | ||
| ; CHECK-NEXT: ret <2 x i16> [[T2]] | ||
| ; | ||
| %splatinsert = insertelement <2 x i16> poison, i16 %a, i32 0 | ||
| %t1 = sdiv <2 x i16> %splatinsert, <i16 2, i16 1> | ||
| %splat.op = shufflevector <2 x i16> %t1, <2 x i16> undef, <2 x i32> <i32 undef, i32 0> | ||
| %t2 = select i1 %cmp, <2 x i16> <i16 77, i16 99>, <2 x i16> %splat.op | ||
| ret <2 x i16> %t2 | ||
| } | ||
|
|
||
| define <2 x i16> @test_udiv(i16 %a, i1 %cmp) { | ||
| ; CHECK-LABEL: @test_udiv( | ||
| ; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[A:%.*]], i32 0 | ||
| ; CHECK-NEXT: [[T1:%.*]] = udiv <2 x i16> [[SPLATINSERT]], <i16 3, i16 1> | ||
| ; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> undef, <2 x i32> <i32 undef, i32 0> | ||
| ; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x i16> <i16 77, i16 99>, <2 x i16> [[SPLAT_OP]] | ||
| ; CHECK-NEXT: ret <2 x i16> [[T2]] | ||
| ; | ||
| %splatinsert = insertelement <2 x i16> poison, i16 %a, i32 0 | ||
| %t1 = udiv <2 x i16> %splatinsert, <i16 3, i16 1> | ||
| %splat.op = shufflevector <2 x i16> %t1, <2 x i16> undef, <2 x i32> <i32 undef, i32 0> | ||
| %t2 = select i1 %cmp, <2 x i16> <i16 77, i16 99>, <2 x i16> %splat.op | ||
| ret <2 x i16> %t2 | ||
| } | ||
|
|
||
| ; For fdiv we do not need to worry about div by undef. Verify that the | ||
| ; shufflevector is eliminated here. | ||
| define <2 x float> @test_fdiv(float %a, float %b, i1 %cmp) { | ||
| ; CHECK-LABEL: @test_fdiv( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> undef, float [[A:%.*]], i32 1 | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = fdiv <2 x float> [[TMP1]], <float undef, float 3.000000e+00> | ||
| ; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> [[TMP2]] | ||
| ; CHECK-NEXT: ret <2 x float> [[T2]] | ||
| ; | ||
| %splatinsert = insertelement <2 x float> poison, float %a, i32 0 | ||
| %denom = insertelement <2 x float> <float 3.0, float undef>, float 1.0, i32 1 | ||
| %t1 = fdiv <2 x float> %splatinsert, %denom | ||
| %splat.op = shufflevector <2 x float> %t1, <2 x float> undef, <2 x i32> <i32 undef, i32 0> | ||
| %t2 = select i1 %cmp, <2 x float> <float 77.0, float 99.0>, <2 x float> %splat.op | ||
| ret <2 x float> %t2 | ||
| } | ||
|
|
||
| ; For frem we do not need to worry about div by undef. Verify that the | ||
| ; shufflevector is eliminated here. | ||
| define <2 x float> @test_frem(float %a, float %b, i1 %cmp) { | ||
| ; CHECK-LABEL: @test_frem( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> undef, float [[A:%.*]], i32 1 | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = frem <2 x float> [[TMP1]], <float undef, float 3.000000e+00> | ||
| ; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> [[TMP2]] | ||
| ; CHECK-NEXT: ret <2 x float> [[T2]] | ||
| ; | ||
| %splatinsert = insertelement <2 x float> poison, float %a, i32 0 | ||
| %denom = insertelement <2 x float> <float 3.0, float undef>, float 1.0, i32 1 | ||
| %t1 = frem <2 x float> %splatinsert, %denom | ||
| %splat.op = shufflevector <2 x float> %t1, <2 x float> undef, <2 x i32> <i32 undef, i32 0> | ||
| %t2 = select i1 %cmp, <2 x float> <float 77.0, float 99.0>, <2 x float> %splat.op | ||
| ret <2 x float> %t2 | ||
| } |