| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,301 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
| ; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s | ||
| target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | ||
|
|
||
| ; Verify that instcombine is able to fold identity shuffles. | ||
|
|
||
| define <4 x float> @identity_test_vpermilvar_ps(<4 x float> %v) { | ||
| ; CHECK-LABEL: @identity_test_vpermilvar_ps( | ||
| ; CHECK-NEXT: ret <4 x float> [[V:%.*]] | ||
| ; | ||
| %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 0, i32 1, i32 2, i32 3>) | ||
| ret <4 x float> %a | ||
| } | ||
|
|
||
| define <8 x float> @identity_test_vpermilvar_ps_256(<8 x float> %v) { | ||
| ; CHECK-LABEL: @identity_test_vpermilvar_ps_256( | ||
| ; CHECK-NEXT: ret <8 x float> [[V:%.*]] | ||
| ; | ||
| %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>) | ||
| ret <8 x float> %a | ||
| } | ||
|
|
||
| define <16 x float> @identity_test_vpermilvar_ps_512(<16 x float> %v) { | ||
| ; CHECK-LABEL: @identity_test_vpermilvar_ps_512( | ||
| ; CHECK-NEXT: ret <16 x float> [[V:%.*]] | ||
| ; | ||
| %a = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %v, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>) | ||
| ret <16 x float> %a | ||
| } | ||
|
|
||
| define <2 x double> @identity_test_vpermilvar_pd(<2 x double> %v) { | ||
| ; CHECK-LABEL: @identity_test_vpermilvar_pd( | ||
| ; CHECK-NEXT: ret <2 x double> [[V:%.*]] | ||
| ; | ||
| %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 0, i64 2>) | ||
| ret <2 x double> %a | ||
| } | ||
|
|
||
| define <4 x double> @identity_test_vpermilvar_pd_256(<4 x double> %v) { | ||
| ; CHECK-LABEL: @identity_test_vpermilvar_pd_256( | ||
| ; CHECK-NEXT: ret <4 x double> [[V:%.*]] | ||
| ; | ||
| %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 0, i64 2, i64 0, i64 2>) | ||
| ret <4 x double> %a | ||
| } | ||
|
|
||
| define <8 x double> @identity_test_vpermilvar_pd_512(<8 x double> %v) { | ||
| ; CHECK-LABEL: @identity_test_vpermilvar_pd_512( | ||
| ; CHECK-NEXT: ret <8 x double> [[V:%.*]] | ||
| ; | ||
| %a = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %v, <8 x i64> <i64 0, i64 2, i64 0, i64 2, i64 0, i64 2, i64 0, i64 2>) | ||
| ret <8 x double> %a | ||
| } | ||
|
|
||
| ; Instcombine should be able to fold the following byte shuffle to a builtin shufflevector | ||
| ; with a shuffle mask of all zeroes. | ||
|
|
||
| define <4 x float> @zero_test_vpermilvar_ps_zero(<4 x float> %v) { | ||
| ; CHECK-LABEL: @zero_test_vpermilvar_ps_zero( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V:%.*]], <4 x float> undef, <4 x i32> zeroinitializer | ||
| ; CHECK-NEXT: ret <4 x float> [[TMP1]] | ||
| ; | ||
| %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> zeroinitializer) | ||
| ret <4 x float> %a | ||
| } | ||
|
|
||
| define <8 x float> @zero_test_vpermilvar_ps_256_zero(<8 x float> %v) { | ||
| ; CHECK-LABEL: @zero_test_vpermilvar_ps_256_zero( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[V:%.*]], <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> | ||
| ; CHECK-NEXT: ret <8 x float> [[TMP1]] | ||
| ; | ||
| %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> zeroinitializer) | ||
| ret <8 x float> %a | ||
| } | ||
|
|
||
| define <16 x float> @zero_test_vpermilvar_ps_512_zero(<16 x float> %v) { | ||
| ; CHECK-LABEL: @zero_test_vpermilvar_ps_512_zero( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[V:%.*]], <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12> | ||
| ; CHECK-NEXT: ret <16 x float> [[TMP1]] | ||
| ; | ||
| %a = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %v, <16 x i32> zeroinitializer) | ||
| ret <16 x float> %a | ||
| } | ||
|
|
||
| define <2 x double> @zero_test_vpermilvar_pd_zero(<2 x double> %v) { | ||
| ; CHECK-LABEL: @zero_test_vpermilvar_pd_zero( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[V:%.*]], <2 x double> undef, <2 x i32> zeroinitializer | ||
| ; CHECK-NEXT: ret <2 x double> [[TMP1]] | ||
| ; | ||
| %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> zeroinitializer) | ||
| ret <2 x double> %a | ||
| } | ||
|
|
||
| define <4 x double> @zero_test_vpermilvar_pd_256_zero(<4 x double> %v) { | ||
| ; CHECK-LABEL: @zero_test_vpermilvar_pd_256_zero( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V:%.*]], <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> | ||
| ; CHECK-NEXT: ret <4 x double> [[TMP1]] | ||
| ; | ||
| %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> zeroinitializer) | ||
| ret <4 x double> %a | ||
| } | ||
|
|
||
| define <8 x double> @zero_test_vpermilvar_pd_512_zero(<8 x double> %v) { | ||
| ; CHECK-LABEL: @zero_test_vpermilvar_pd_512_zero( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[V:%.*]], <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> | ||
| ; CHECK-NEXT: ret <8 x double> [[TMP1]] | ||
| ; | ||
| %a = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %v, <8 x i64> zeroinitializer) | ||
| ret <8 x double> %a | ||
| } | ||
|
|
||
| ; Verify that instcombine is able to fold constant shuffles. | ||
|
|
||
| define <4 x float> @test_vpermilvar_ps(<4 x float> %v) { | ||
| ; CHECK-LABEL: @test_vpermilvar_ps( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V:%.*]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> | ||
| ; CHECK-NEXT: ret <4 x float> [[TMP1]] | ||
| ; | ||
| %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 3, i32 2, i32 1, i32 0>) | ||
| ret <4 x float> %a | ||
| } | ||
|
|
||
| define <8 x float> @test_vpermilvar_ps_256(<8 x float> %v) { | ||
| ; CHECK-LABEL: @test_vpermilvar_ps_256( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[V:%.*]], <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> | ||
| ; CHECK-NEXT: ret <8 x float> [[TMP1]] | ||
| ; | ||
| %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>) | ||
| ret <8 x float> %a | ||
| } | ||
|
|
||
| define <16 x float> @test_vpermilvar_ps_512(<16 x float> %v) { | ||
| ; CHECK-LABEL: @test_vpermilvar_ps_512( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[V:%.*]], <16 x float> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> | ||
| ; CHECK-NEXT: ret <16 x float> [[TMP1]] | ||
| ; | ||
| %a = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %v, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>) | ||
| ret <16 x float> %a | ||
| } | ||
|
|
||
| define <2 x double> @test_vpermilvar_pd(<2 x double> %v) { | ||
| ; CHECK-LABEL: @test_vpermilvar_pd( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[V:%.*]], <2 x double> undef, <2 x i32> <i32 1, i32 0> | ||
| ; CHECK-NEXT: ret <2 x double> [[TMP1]] | ||
| ; | ||
| %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 2, i64 0>) | ||
| ret <2 x double> %a | ||
| } | ||
|
|
||
| define <4 x double> @test_vpermilvar_pd_256(<4 x double> %v) { | ||
| ; CHECK-LABEL: @test_vpermilvar_pd_256( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V:%.*]], <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> | ||
| ; CHECK-NEXT: ret <4 x double> [[TMP1]] | ||
| ; | ||
| %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 3, i64 1, i64 2, i64 0>) | ||
| ret <4 x double> %a | ||
| } | ||
|
|
||
| define <8 x double> @test_vpermilvar_pd_512(<8 x double> %v) { | ||
| ; CHECK-LABEL: @test_vpermilvar_pd_512( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[V:%.*]], <8 x double> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> | ||
| ; CHECK-NEXT: ret <8 x double> [[TMP1]] | ||
| ; | ||
| %a = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %v, <8 x i64> <i64 3, i64 1, i64 2, i64 0, i64 7, i64 5, i64 6, i64 4>) | ||
| ret <8 x double> %a | ||
| } | ||
|
|
||
| ; Verify that instcombine is able to fold constant shuffles with undef mask elements. | ||
|
|
||
| define <4 x float> @undef_test_vpermilvar_ps(<4 x float> %v) { | ||
| ; CHECK-LABEL: @undef_test_vpermilvar_ps( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V:%.*]], <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef> | ||
| ; CHECK-NEXT: ret <4 x float> [[TMP1]] | ||
| ; | ||
| %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef>) | ||
| ret <4 x float> %a | ||
| } | ||
|
|
||
| define <8 x float> @undef_test_vpermilvar_ps_256(<8 x float> %v) { | ||
| ; CHECK-LABEL: @undef_test_vpermilvar_ps_256( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[V:%.*]], <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 7, i32 6, i32 5, i32 4> | ||
| ; CHECK-NEXT: ret <8 x float> [[TMP1]] | ||
| ; | ||
| %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 undef, i32 6, i32 5, i32 undef, i32 3, i32 2, i32 1, i32 0>) | ||
| ret <8 x float> %a | ||
| } | ||
|
|
||
| define <16 x float> @undef_test_vpermilvar_ps_512(<16 x float> %v) { | ||
| ; CHECK-LABEL: @undef_test_vpermilvar_ps_512( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[V:%.*]], <16 x float> undef, <16 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 7, i32 6, i32 5, i32 4, i32 undef, i32 10, i32 9, i32 undef, i32 15, i32 14, i32 13, i32 12> | ||
| ; CHECK-NEXT: ret <16 x float> [[TMP1]] | ||
| ; | ||
| %a = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %v, <16 x i32> <i32 undef, i32 6, i32 5, i32 undef, i32 3, i32 2, i32 1, i32 0, i32 undef, i32 6, i32 5, i32 undef, i32 3, i32 2, i32 1, i32 0>) | ||
| ret <16 x float> %a | ||
| } | ||
|
|
||
| define <2 x double> @undef_test_vpermilvar_pd(<2 x double> %v) { | ||
| ; CHECK-LABEL: @undef_test_vpermilvar_pd( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[V:%.*]], <2 x double> undef, <2 x i32> <i32 undef, i32 0> | ||
| ; CHECK-NEXT: ret <2 x double> [[TMP1]] | ||
| ; | ||
| %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 undef, i64 0>) | ||
| ret <2 x double> %a | ||
| } | ||
|
|
||
| define <4 x double> @undef_test_vpermilvar_pd_256(<4 x double> %v) { | ||
| ; CHECK-LABEL: @undef_test_vpermilvar_pd_256( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V:%.*]], <4 x double> undef, <4 x i32> <i32 undef, i32 0, i32 3, i32 undef> | ||
| ; CHECK-NEXT: ret <4 x double> [[TMP1]] | ||
| ; | ||
| %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 undef, i64 1, i64 2, i64 undef>) | ||
| ret <4 x double> %a | ||
| } | ||
|
|
||
| define <8 x double> @undef_test_vpermilvar_pd_512(<8 x double> %v) { | ||
| ; CHECK-LABEL: @undef_test_vpermilvar_pd_512( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[V:%.*]], <8 x double> undef, <8 x i32> <i32 undef, i32 0, i32 3, i32 undef, i32 undef, i32 4, i32 7, i32 undef> | ||
| ; CHECK-NEXT: ret <8 x double> [[TMP1]] | ||
| ; | ||
| %a = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %v, <8 x i64> <i64 undef, i64 1, i64 2, i64 undef, i64 undef, i64 1, i64 2, i64 undef>) | ||
| ret <8 x double> %a | ||
| } | ||
|
|
||
| ; Simplify demanded elts | ||
|
|
||
| define <4 x float> @elts_test_vpermilvar_ps(<4 x float> %a0, i32 %a1) { | ||
| ; CHECK-LABEL: @elts_test_vpermilvar_ps( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef> | ||
| ; CHECK-NEXT: ret <4 x float> [[TMP1]] | ||
| ; | ||
| %1 = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %a1, i32 3 | ||
| %2 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %1) | ||
| %3 = shufflevector <4 x float> %2, <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef> | ||
| ret <4 x float> %3 | ||
| } | ||
|
|
||
| define <8 x float> @elts_test_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) { | ||
| ; CHECK-LABEL: @elts_test_vpermilvar_ps_256( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 6, i32 undef, i32 7> | ||
| ; CHECK-NEXT: ret <8 x float> [[TMP1]] | ||
| ; | ||
| %1 = shufflevector <8 x i32> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 3, i32 2, i32 1, i32 0>, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> | ||
| %2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %1) | ||
| %3 = shufflevector <8 x float> %2, <8 x float> poison, <8 x i32> <i32 undef, i32 1, i32 undef, i32 3, i32 undef, i32 5, i32 undef, i32 7> | ||
| ret <8 x float> %3 | ||
| } | ||
|
|
||
| define <16 x float> @elts_test_vpermilvar_ps_512(<16 x float> %a0, <16 x i32> %a1, i32 %a2) { | ||
| ; CHECK-LABEL: @elts_test_vpermilvar_ps_512( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[A0:%.*]], <16 x i32> [[A1:%.*]]) | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> poison, <16 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> | ||
| ; CHECK-NEXT: ret <16 x float> [[TMP2]] | ||
| ; | ||
| %1 = insertelement <16 x i32> %a1, i32 %a2, i32 0 | ||
| %2 = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %a0, <16 x i32> %1) | ||
| %3 = shufflevector <16 x float> %2, <16 x float> poison, <16 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> | ||
| ret <16 x float> %3 | ||
| } | ||
|
|
||
| define <2 x double> @elts_test_vpermilvar_pd(<2 x double> %a0, i64 %a1) { | ||
| ; CHECK-LABEL: @elts_test_vpermilvar_pd( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A0:%.*]], <2 x double> poison, <2 x i32> <i32 0, i32 undef> | ||
| ; CHECK-NEXT: ret <2 x double> [[TMP1]] | ||
| ; | ||
| %1 = insertelement <2 x i64> <i64 0, i64 2>, i64 %a1, i32 1 | ||
| %2 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %1) | ||
| %3 = shufflevector <2 x double> %2, <2 x double> poison, <2 x i32> <i32 0, i32 undef> | ||
| ret <2 x double> %3 | ||
| } | ||
|
|
||
| define <4 x double> @elts_test_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) { | ||
| ; CHECK-LABEL: @elts_test_vpermilvar_pd_256( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 undef> | ||
| ; CHECK-NEXT: ret <4 x double> [[TMP1]] | ||
| ; | ||
| %1 = shufflevector <4 x i64> <i64 0, i64 2, i64 0, i64 2>, <4 x i64> %a1, <4 x i32> <i32 1, i32 2, i32 3, i32 4> | ||
| %2 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %1) | ||
| %3 = shufflevector <4 x double> %2, <4 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef> | ||
| ret <4 x double> %3 | ||
| } | ||
|
|
||
| define <8 x double> @elts_test_vpermilvar_pd_512(<8 x double> %a0, <8 x i64> %a1, i64 %a2) { | ||
| ; CHECK-LABEL: @elts_test_vpermilvar_pd_512( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i64> poison, i64 [[A2:%.*]], i32 0 | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[A0:%.*]], <8 x i64> [[TMP1]]) | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <8 x i32> zeroinitializer | ||
| ; CHECK-NEXT: ret <8 x double> [[TMP3]] | ||
| ; | ||
| %1 = insertelement <8 x i64> %a1, i64 %a2, i32 0 | ||
| %2 = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %a0, <8 x i64> %1) | ||
| %3 = shufflevector <8 x double> %2, <8 x double> poison, <8 x i32> zeroinitializer | ||
| ret <8 x double> %3 | ||
| } | ||
|
|
||
| declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) | ||
| declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) | ||
| declare <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double>, <8 x i64>) | ||
|
|
||
| declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) | ||
| declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) | ||
| declare <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float>, <16 x i32>) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,375 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
| ; RUN: opt < %s -instcombine -S | FileCheck %s | ||
|
|
||
| ; Canonicalize vector ge/le comparisons with constants to gt/lt. | ||
|
|
||
| ; Normal types are ConstantDataVectors. Test the constant values adjacent to the | ||
| ; min/max values that we're not allowed to transform. | ||
|
|
||
| define <2 x i1> @sge(<2 x i8> %x) { | ||
| ; CHECK-LABEL: @sge( | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], <i8 -128, i8 126> | ||
| ; CHECK-NEXT: ret <2 x i1> [[CMP]] | ||
| ; | ||
| %cmp = icmp sge <2 x i8> %x, <i8 -127, i8 -129> | ||
| ret <2 x i1> %cmp | ||
| } | ||
|
|
||
| define <2 x i1> @uge(<2 x i8> %x) { | ||
| ; CHECK-LABEL: @uge( | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i8> [[X:%.*]], <i8 -2, i8 0> | ||
| ; CHECK-NEXT: ret <2 x i1> [[CMP]] | ||
| ; | ||
| %cmp = icmp uge <2 x i8> %x, <i8 -1, i8 1> | ||
| ret <2 x i1> %cmp | ||
| } | ||
|
|
||
| define <2 x i1> @sle(<2 x i8> %x) { | ||
| ; CHECK-LABEL: @sle( | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], <i8 127, i8 -127> | ||
| ; CHECK-NEXT: ret <2 x i1> [[CMP]] | ||
| ; | ||
| %cmp = icmp sle <2 x i8> %x, <i8 126, i8 128> | ||
| ret <2 x i1> %cmp | ||
| } | ||
|
|
||
| define <2 x i1> @ule(<2 x i8> %x) { | ||
| ; CHECK-LABEL: @ule( | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i8> [[X:%.*]], <i8 -1, i8 1> | ||
| ; CHECK-NEXT: ret <2 x i1> [[CMP]] | ||
| ; | ||
| %cmp = icmp ule <2 x i8> %x, <i8 254, i8 0> | ||
| ret <2 x i1> %cmp | ||
| } | ||
|
|
||
| define <2 x i1> @ult_min_signed_value(<2 x i8> %x) { | ||
| ; CHECK-LABEL: @ult_min_signed_value( | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], <i8 -1, i8 -1> | ||
| ; CHECK-NEXT: ret <2 x i1> [[CMP]] | ||
| ; | ||
| %cmp = icmp ult <2 x i8> %x, <i8 128, i8 128> | ||
| ret <2 x i1> %cmp | ||
| } | ||
|
|
||
| ; Zeros are special: they're ConstantAggregateZero. | ||
|
|
||
| define <2 x i1> @sge_zero(<2 x i8> %x) { | ||
| ; CHECK-LABEL: @sge_zero( | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], <i8 -1, i8 -1> | ||
| ; CHECK-NEXT: ret <2 x i1> [[CMP]] | ||
| ; | ||
| %cmp = icmp sge <2 x i8> %x, <i8 0, i8 0> | ||
| ret <2 x i1> %cmp | ||
| } | ||
|
|
||
| define <2 x i1> @uge_zero(<2 x i8> %x) { | ||
| ; CHECK-LABEL: @uge_zero( | ||
| ; CHECK-NEXT: ret <2 x i1> <i1 true, i1 true> | ||
| ; | ||
| %cmp = icmp uge <2 x i8> %x, <i8 0, i8 0> | ||
| ret <2 x i1> %cmp | ||
| } | ||
|
|
||
| define <2 x i1> @sle_zero(<2 x i8> %x) { | ||
| ; CHECK-LABEL: @sle_zero( | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], <i8 1, i8 1> | ||
| ; CHECK-NEXT: ret <2 x i1> [[CMP]] | ||
| ; | ||
| %cmp = icmp sle <2 x i8> %x, <i8 0, i8 0> | ||
| ret <2 x i1> %cmp | ||
| } | ||
|
|
||
| define <2 x i1> @ule_zero(<2 x i8> %x) { | ||
| ; CHECK-LABEL: @ule_zero( | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], zeroinitializer | ||
| ; CHECK-NEXT: ret <2 x i1> [[CMP]] | ||
| ; | ||
| %cmp = icmp ule <2 x i8> %x, <i8 0, i8 0> | ||
| ret <2 x i1> %cmp | ||
| } | ||
|
|
||
| ; Weird types are ConstantVectors, not ConstantDataVectors. For an i3 type: | ||
| ; Signed min = -4 | ||
| ; Unsigned min = 0 | ||
| ; Signed max = 3 | ||
| ; Unsigned max = 7 | ||
|
|
||
| define <3 x i1> @sge_weird(<3 x i3> %x) { | ||
| ; CHECK-LABEL: @sge_weird( | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <3 x i3> [[X:%.*]], <i3 -4, i3 2, i3 -1> | ||
| ; CHECK-NEXT: ret <3 x i1> [[CMP]] | ||
| ; | ||
| %cmp = icmp sge <3 x i3> %x, <i3 -3, i3 -5, i3 0> | ||
| ret <3 x i1> %cmp | ||
| } | ||
|
|
||
| define <3 x i1> @uge_weird(<3 x i3> %x) { | ||
| ; CHECK-LABEL: @uge_weird( | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <3 x i3> [[X:%.*]], <i3 -2, i3 0, i3 1> | ||
| ; CHECK-NEXT: ret <3 x i1> [[CMP]] | ||
| ; | ||
| %cmp = icmp uge <3 x i3> %x, <i3 -1, i3 1, i3 2> | ||
| ret <3 x i1> %cmp | ||
| } | ||
|
|
||
| define <3 x i1> @sle_weird(<3 x i3> %x) { | ||
| ; CHECK-LABEL: @sle_weird( | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp slt <3 x i3> [[X:%.*]], <i3 3, i3 -3, i3 1> | ||
| ; CHECK-NEXT: ret <3 x i1> [[CMP]] | ||
| ; | ||
| %cmp = icmp sle <3 x i3> %x, <i3 2, i3 4, i3 0> | ||
| ret <3 x i1> %cmp | ||
| } | ||
|
|
||
| define <3 x i1> @ule_weird(<3 x i3> %x) { | ||
| ; CHECK-LABEL: @ule_weird( | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp ult <3 x i3> [[X:%.*]], <i3 -1, i3 1, i3 2> | ||
| ; CHECK-NEXT: ret <3 x i1> [[CMP]] | ||
| ; | ||
| %cmp = icmp ule <3 x i3> %x, <i3 6, i3 0, i3 1> | ||
| ret <3 x i1> %cmp | ||
| } | ||
|
|
||
| ; We can't do the transform if any constants are already at the limits. | ||
|
|
||
| define <2 x i1> @sge_min(<2 x i3> %x) { | ||
| ; CHECK-LABEL: @sge_min( | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp sge <2 x i3> [[X:%.*]], <i3 -4, i3 1> | ||
| ; CHECK-NEXT: ret <2 x i1> [[CMP]] | ||
| ; | ||
| %cmp = icmp sge <2 x i3> %x, <i3 -4, i3 1> | ||
| ret <2 x i1> %cmp | ||
| } | ||
|
|
||
| define <2 x i1> @uge_min(<2 x i3> %x) { | ||
| ; CHECK-LABEL: @uge_min( | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp uge <2 x i3> [[X:%.*]], <i3 1, i3 0> | ||
| ; CHECK-NEXT: ret <2 x i1> [[CMP]] | ||
| ; | ||
| %cmp = icmp uge <2 x i3> %x, <i3 1, i3 0> | ||
| ret <2 x i1> %cmp | ||
| } | ||
|
|
||
| define <2 x i1> @sle_max(<2 x i3> %x) { | ||
| ; CHECK-LABEL: @sle_max( | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp sle <2 x i3> [[X:%.*]], <i3 1, i3 3> | ||
| ; CHECK-NEXT: ret <2 x i1> [[CMP]] | ||
| ; | ||
| %cmp = icmp sle <2 x i3> %x, <i3 1, i3 3> | ||
| ret <2 x i1> %cmp | ||
| } | ||
|
|
||
| define <2 x i1> @ule_max(<2 x i3> %x) { | ||
| ; CHECK-LABEL: @ule_max( | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp ule <2 x i3> [[X:%.*]], <i3 -1, i3 1> | ||
| ; CHECK-NEXT: ret <2 x i1> [[CMP]] | ||
| ; | ||
| %cmp = icmp ule <2 x i3> %x, <i3 7, i3 1> | ||
| ret <2 x i1> %cmp | ||
| } | ||
|
|
||
| define <2 x i1> @PR27756_1(<2 x i8> %a) { | ||
| ; CHECK-LABEL: @PR27756_1( | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[A:%.*]], <i8 34, i8 1> | ||
| ; CHECK-NEXT: ret <2 x i1> [[CMP]] | ||
| ; | ||
| %cmp = icmp sle <2 x i8> %a, <i8 bitcast (<2 x i4> <i4 1, i4 2> to i8), i8 0> | ||
| ret <2 x i1> %cmp | ||
| } | ||
|
|
||
| ; Undef elements don't prevent the transform of the comparison. | ||
|
|
||
| define <3 x i1> @PR27756_2(<3 x i8> %a) { | ||
| ; CHECK-LABEL: @PR27756_2( | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp slt <3 x i8> [[A:%.*]], <i8 43, i8 43, i8 1> | ||
| ; CHECK-NEXT: ret <3 x i1> [[CMP]] | ||
| ; | ||
| %cmp = icmp sle <3 x i8> %a, <i8 42, i8 undef, i8 0> | ||
| ret <3 x i1> %cmp | ||
| } | ||
|
|
||
| define <3 x i1> @PR27756_3(<3 x i8> %a) { | ||
| ; CHECK-LABEL: @PR27756_3( | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <3 x i8> [[A:%.*]], <i8 0, i8 0, i8 41> | ||
| ; CHECK-NEXT: ret <3 x i1> [[CMP]] | ||
| ; | ||
| %cmp = icmp sge <3 x i8> %a, <i8 undef, i8 1, i8 42> | ||
| ret <3 x i1> %cmp | ||
| } | ||
|
|
||
| @someglobal = global i32 0 | ||
|
|
||
| define <2 x i1> @PR27786(<2 x i8> %a) { | ||
| ; CHECK-LABEL: @PR27786( | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp sle <2 x i8> [[A:%.*]], bitcast (i16 ptrtoint (i32* @someglobal to i16) to <2 x i8>) | ||
| ; CHECK-NEXT: ret <2 x i1> [[CMP]] | ||
| ; | ||
| %cmp = icmp sle <2 x i8> %a, bitcast (i16 ptrtoint (i32* @someglobal to i16) to <2 x i8>) | ||
| ret <2 x i1> %cmp | ||
| } | ||
|
|
||
| ; This is similar to a transform for shuffled binops: compare first, shuffle after. | ||
|
|
||
| define <4 x i1> @same_shuffle_inputs_icmp(<4 x i8> %x, <4 x i8> %y) { | ||
| ; CHECK-LABEL: @same_shuffle_inputs_icmp( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i8> [[X:%.*]], [[Y:%.*]] | ||
| ; CHECK-NEXT: [[CMP:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 2, i32 0> | ||
| ; CHECK-NEXT: ret <4 x i1> [[CMP]] | ||
| ; | ||
| %shufx = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> < i32 3, i32 3, i32 2, i32 0 > | ||
| %shufy = shufflevector <4 x i8> %y, <4 x i8> poison, <4 x i32> < i32 3, i32 3, i32 2, i32 0 > | ||
| %cmp = icmp sgt <4 x i8> %shufx, %shufy | ||
| ret <4 x i1> %cmp | ||
| } | ||
|
|
||
| ; fcmp and size-changing shuffles are ok too. | ||
|
|
||
| define <5 x i1> @same_shuffle_inputs_fcmp(<4 x float> %x, <4 x float> %y) { | ||
| ; CHECK-LABEL: @same_shuffle_inputs_fcmp( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq <4 x float> [[X:%.*]], [[Y:%.*]] | ||
| ; CHECK-NEXT: [[CMP:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> undef, <5 x i32> <i32 0, i32 1, i32 3, i32 2, i32 0> | ||
| ; CHECK-NEXT: ret <5 x i1> [[CMP]] | ||
| ; | ||
| %shufx = shufflevector <4 x float> %x, <4 x float> poison, <5 x i32> < i32 0, i32 1, i32 3, i32 2, i32 0 > | ||
| %shufy = shufflevector <4 x float> %y, <4 x float> poison, <5 x i32> < i32 0, i32 1, i32 3, i32 2, i32 0 > | ||
| %cmp = fcmp oeq <5 x float> %shufx, %shufy | ||
| ret <5 x i1> %cmp | ||
| } | ||
|
|
||
| declare void @use_v4i8(<4 x i8>) | ||
|
|
||
| define <4 x i1> @same_shuffle_inputs_icmp_extra_use1(<4 x i8> %x, <4 x i8> %y) { | ||
| ; CHECK-LABEL: @same_shuffle_inputs_icmp_extra_use1( | ||
| ; CHECK-NEXT: [[SHUFX:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3> | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i8> [[X]], [[Y:%.*]] | ||
| ; CHECK-NEXT: [[CMP:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> | ||
| ; CHECK-NEXT: call void @use_v4i8(<4 x i8> [[SHUFX]]) | ||
| ; CHECK-NEXT: ret <4 x i1> [[CMP]] | ||
| ; | ||
| %shufx = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > | ||
| %shufy = shufflevector <4 x i8> %y, <4 x i8> poison, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > | ||
| %cmp = icmp ugt <4 x i8> %shufx, %shufy | ||
| call void @use_v4i8(<4 x i8> %shufx) | ||
| ret <4 x i1> %cmp | ||
| } | ||
|
|
||
| declare void @use_v2i8(<2 x i8>) | ||
|
|
||
| define <2 x i1> @same_shuffle_inputs_icmp_extra_use2(<4 x i8> %x, <4 x i8> %y) { | ||
| ; CHECK-LABEL: @same_shuffle_inputs_icmp_extra_use2( | ||
| ; CHECK-NEXT: [[SHUFY:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> poison, <2 x i32> <i32 3, i32 2> | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i8> [[X:%.*]], [[Y]] | ||
| ; CHECK-NEXT: [[CMP:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> undef, <2 x i32> <i32 3, i32 2> | ||
| ; CHECK-NEXT: call void @use_v2i8(<2 x i8> [[SHUFY]]) | ||
| ; CHECK-NEXT: ret <2 x i1> [[CMP]] | ||
| ; | ||
| %shufx = shufflevector <4 x i8> %x, <4 x i8> poison, <2 x i32> < i32 3, i32 2 > | ||
| %shufy = shufflevector <4 x i8> %y, <4 x i8> poison, <2 x i32> < i32 3, i32 2 > | ||
| %cmp = icmp eq <2 x i8> %shufx, %shufy | ||
| call void @use_v2i8(<2 x i8> %shufy) | ||
| ret <2 x i1> %cmp | ||
| } | ||
|
|
||
| ; Negative test: if both shuffles have extra uses, don't transform because that would increase instruction count. | ||
|
|
||
| define <2 x i1> @same_shuffle_inputs_icmp_extra_use3(<4 x i8> %x, <4 x i8> %y) { | ||
| ; CHECK-LABEL: @same_shuffle_inputs_icmp_extra_use3( | ||
| ; CHECK-NEXT: [[SHUFX:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <2 x i32> zeroinitializer | ||
| ; CHECK-NEXT: [[SHUFY:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> poison, <2 x i32> zeroinitializer | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[SHUFX]], [[SHUFY]] | ||
| ; CHECK-NEXT: call void @use_v2i8(<2 x i8> [[SHUFX]]) | ||
| ; CHECK-NEXT: call void @use_v2i8(<2 x i8> [[SHUFY]]) | ||
| ; CHECK-NEXT: ret <2 x i1> [[CMP]] | ||
| ; | ||
| %shufx = shufflevector <4 x i8> %x, <4 x i8> poison, <2 x i32> < i32 0, i32 0 > | ||
| %shufy = shufflevector <4 x i8> %y, <4 x i8> poison, <2 x i32> < i32 0, i32 0 > | ||
| %cmp = icmp eq <2 x i8> %shufx, %shufy | ||
| call void @use_v2i8(<2 x i8> %shufx) | ||
| call void @use_v2i8(<2 x i8> %shufy) | ||
| ret <2 x i1> %cmp | ||
| } | ||
|
|
||
| define <4 x i1> @splat_icmp(<4 x i8> %x) { | ||
| ; CHECK-LABEL: @splat_icmp( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i8> [[X:%.*]], <i8 42, i8 42, i8 42, i8 42> | ||
| ; CHECK-NEXT: [[CMP:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> | ||
| ; CHECK-NEXT: ret <4 x i1> [[CMP]] | ||
| ; | ||
| %splatx = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3> | ||
| %cmp = icmp sgt <4 x i8> %splatx, <i8 42, i8 42, i8 42, i8 42> | ||
| ret <4 x i1> %cmp | ||
| } | ||
|
|
||
| define <4 x i1> @splat_icmp_undef(<4 x i8> %x) { | ||
| ; CHECK-LABEL: @splat_icmp_undef( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i8> [[X:%.*]], <i8 42, i8 42, i8 42, i8 42> | ||
| ; CHECK-NEXT: [[CMP:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> | ||
| ; CHECK-NEXT: ret <4 x i1> [[CMP]] | ||
| ; | ||
| %splatx = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> <i32 2, i32 undef, i32 undef, i32 2> | ||
| %cmp = icmp ult <4 x i8> %splatx, <i8 undef, i8 42, i8 undef, i8 42> | ||
| ret <4 x i1> %cmp | ||
| } | ||
|
|
||
| define <4 x i1> @splat_icmp_larger_size(<2 x i8> %x) { | ||
| ; CHECK-LABEL: @splat_icmp_larger_size( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 42, i8 42> | ||
| ; CHECK-NEXT: [[CMP:%.*]] = shufflevector <2 x i1> [[TMP1]], <2 x i1> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> | ||
| ; CHECK-NEXT: ret <4 x i1> [[CMP]] | ||
| ; | ||
| %splatx = shufflevector <2 x i8> %x, <2 x i8> poison, <4 x i32> <i32 1, i32 undef, i32 1, i32 undef> | ||
| %cmp = icmp eq <4 x i8> %splatx, <i8 42, i8 42, i8 undef, i8 42> | ||
| ret <4 x i1> %cmp | ||
| } | ||
|
|
||
| define <4 x i1> @splat_fcmp_smaller_size(<5 x float> %x) { | ||
| ; CHECK-LABEL: @splat_fcmp_smaller_size( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq <5 x float> [[X:%.*]], <float 4.200000e+01, float 4.200000e+01, float 4.200000e+01, float 4.200000e+01, float 4.200000e+01> | ||
| ; CHECK-NEXT: [[CMP:%.*]] = shufflevector <5 x i1> [[TMP1]], <5 x i1> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> | ||
| ; CHECK-NEXT: ret <4 x i1> [[CMP]] | ||
| ; | ||
| %splatx = shufflevector <5 x float> %x, <5 x float> poison, <4 x i32> <i32 1, i32 undef, i32 1, i32 undef> | ||
| %cmp = fcmp oeq <4 x float> %splatx, <float 42.0, float 42.0, float undef, float 42.0> | ||
| ret <4 x i1> %cmp | ||
| } | ||
|
|
||
| ; Negative test | ||
|
|
||
| define <4 x i1> @splat_icmp_extra_use(<4 x i8> %x) { | ||
| ; CHECK-LABEL: @splat_icmp_extra_use( | ||
| ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3> | ||
| ; CHECK-NEXT: call void @use_v4i8(<4 x i8> [[SPLATX]]) | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], <i8 42, i8 42, i8 42, i8 42> | ||
| ; CHECK-NEXT: ret <4 x i1> [[CMP]] | ||
| ; | ||
| %splatx = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3> | ||
| call void @use_v4i8(<4 x i8> %splatx) | ||
| %cmp = icmp sgt <4 x i8> %splatx, <i8 42, i8 42, i8 42, i8 42> | ||
| ret <4 x i1> %cmp | ||
| } | ||
|
|
||
| ; Negative test | ||
|
|
||
| define <4 x i1> @not_splat_icmp(<4 x i8> %x) { | ||
| ; CHECK-LABEL: @not_splat_icmp( | ||
| ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 3, i32 3> | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], <i8 42, i8 42, i8 42, i8 42> | ||
| ; CHECK-NEXT: ret <4 x i1> [[CMP]] | ||
| ; | ||
| %splatx = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 3, i32 3> | ||
| %cmp = icmp sgt <4 x i8> %splatx, <i8 42, i8 42, i8 42, i8 42> | ||
| ret <4 x i1> %cmp | ||
| } | ||
|
|
||
| ; Negative test | ||
|
|
||
| define <4 x i1> @not_splat_icmp2(<4 x i8> %x) { | ||
| ; CHECK-LABEL: @not_splat_icmp2( | ||
| ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2> | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], <i8 43, i8 42, i8 42, i8 42> | ||
| ; CHECK-NEXT: ret <4 x i1> [[CMP]] | ||
| ; | ||
| %splatx = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2> | ||
| %cmp = icmp sgt <4 x i8> %splatx, <i8 43, i8 42, i8 42, i8 42> | ||
| ret <4 x i1> %cmp | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,142 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
| ; RUN: opt < %s -instcombine -S | FileCheck %s | ||
|
|
||
| define i32 @sub1(i32 %x) { | ||
| ; CHECK-LABEL: @sub1( | ||
| ; CHECK-NEXT: [[Y:%.*]] = sub i32 0, [[X:%.*]] | ||
| ; CHECK-NEXT: [[Z:%.*]] = sdiv i32 [[Y]], 337 | ||
| ; CHECK-NEXT: ret i32 [[Z]] | ||
| ; | ||
| %y = sub i32 0, %x | ||
| %z = sdiv i32 %y, 337 | ||
| ret i32 %z | ||
| } | ||
|
|
||
| define i32 @sub2(i32 %x) { | ||
| ; CHECK-LABEL: @sub2( | ||
| ; CHECK-NEXT: [[Z:%.*]] = sdiv i32 [[X:%.*]], -337 | ||
| ; CHECK-NEXT: ret i32 [[Z]] | ||
| ; | ||
| %y = sub nsw i32 0, %x | ||
| %z = sdiv i32 %y, 337 | ||
| ret i32 %z | ||
| } | ||
|
|
||
| define i1 @shl_icmp(i64 %X) { | ||
| ; CHECK-LABEL: @shl_icmp( | ||
| ; CHECK-NEXT: [[B:%.*]] = icmp eq i64 [[X:%.*]], 0 | ||
| ; CHECK-NEXT: ret i1 [[B]] | ||
| ; | ||
| %A = shl nuw i64 %X, 2 ; X/4 | ||
| %B = icmp eq i64 %A, 0 | ||
| ret i1 %B | ||
| } | ||
|
|
||
| define i64 @shl1(i64 %X, i64* %P) { | ||
| ; CHECK-LABEL: @shl1( | ||
| ; CHECK-NEXT: [[A:%.*]] = and i64 [[X:%.*]], 312 | ||
| ; CHECK-NEXT: store i64 [[A]], i64* [[P:%.*]], align 4 | ||
| ; CHECK-NEXT: [[B:%.*]] = shl nuw nsw i64 [[A]], 8 | ||
| ; CHECK-NEXT: ret i64 [[B]] | ||
| ; | ||
| %A = and i64 %X, 312 | ||
| store i64 %A, i64* %P ; multiple uses of A. | ||
| %B = shl i64 %A, 8 | ||
| ret i64 %B | ||
| } | ||
|
|
||
| define i32 @preserve1(i32 %x) { | ||
| ; CHECK-LABEL: @preserve1( | ||
| ; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[X:%.*]], 5 | ||
| ; CHECK-NEXT: ret i32 [[ADD3]] | ||
| ; | ||
| %add = add nsw i32 %x, 2 | ||
| %add3 = add nsw i32 %add, 3 | ||
| ret i32 %add3 | ||
| } | ||
|
|
||
| define i8 @nopreserve1(i8 %x) { | ||
| ; CHECK-LABEL: @nopreserve1( | ||
| ; CHECK-NEXT: [[ADD3:%.*]] = add i8 [[X:%.*]], -126 | ||
| ; CHECK-NEXT: ret i8 [[ADD3]] | ||
| ; | ||
| %add = add nsw i8 %x, 127 | ||
| %add3 = add nsw i8 %add, 3 | ||
| ret i8 %add3 | ||
| } | ||
|
|
||
| define i8 @nopreserve2(i8 %x) { | ||
| ; CHECK-LABEL: @nopreserve2( | ||
| ; CHECK-NEXT: [[ADD3:%.*]] = add i8 [[X:%.*]], 3 | ||
| ; CHECK-NEXT: ret i8 [[ADD3]] | ||
| ; | ||
| %add = add i8 %x, 1 | ||
| %add3 = add nsw i8 %add, 2 | ||
| ret i8 %add3 | ||
| } | ||
|
|
||
| define i8 @nopreserve3(i8 %A, i8 %B) { | ||
| ; CHECK-LABEL: @nopreserve3( | ||
| ; CHECK-NEXT: [[Y:%.*]] = add i8 [[A:%.*]], [[B:%.*]] | ||
| ; CHECK-NEXT: [[ADD:%.*]] = add i8 [[Y]], 20 | ||
| ; CHECK-NEXT: ret i8 [[ADD]] | ||
| ; | ||
| %x = add i8 %A, 10 | ||
| %y = add i8 %B, 10 | ||
| %add = add nsw i8 %x, %y | ||
| ret i8 %add | ||
| } | ||
|
|
||
| define i8 @nopreserve4(i8 %A, i8 %B) { | ||
| ; CHECK-LABEL: @nopreserve4( | ||
| ; CHECK-NEXT: [[Y:%.*]] = add i8 [[A:%.*]], [[B:%.*]] | ||
| ; CHECK-NEXT: [[ADD:%.*]] = add i8 [[Y]], 20 | ||
| ; CHECK-NEXT: ret i8 [[ADD]] | ||
| ; | ||
| %x = add nsw i8 %A, 10 | ||
| %y = add nsw i8 %B, 10 | ||
| %add = add nsw i8 %x, %y | ||
| ret i8 %add | ||
| } | ||
|
|
||
| define <3 x i32> @shl_nuw_nsw_shuffle_splat_vec(<2 x i8> %x) { | ||
| ; CHECK-LABEL: @shl_nuw_nsw_shuffle_splat_vec( | ||
| ; CHECK-NEXT: [[T2:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32> | ||
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[T2]], <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 1> | ||
| ; CHECK-NEXT: [[T3:%.*]] = shl nuw nsw <3 x i32> [[SHUF]], <i32 17, i32 17, i32 17> | ||
| ; CHECK-NEXT: ret <3 x i32> [[T3]] | ||
| ; | ||
| %t2 = zext <2 x i8> %x to <2 x i32> | ||
| %shuf = shufflevector <2 x i32> %t2, <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 1> | ||
| %t3 = shl <3 x i32> %shuf, <i32 17, i32 17, i32 17> | ||
| ret <3 x i32> %t3 | ||
| } | ||
|
|
||
| ; Negative test - if the shuffle mask contains an undef, we bail out to | ||
| ; avoid propagating information that may not be used consistently by callers. | ||
|
|
||
| define <3 x i32> @shl_nuw_nsw_shuffle_undef_elt_splat_vec(<2 x i8> %x) { | ||
| ; CHECK-LABEL: @shl_nuw_nsw_shuffle_undef_elt_splat_vec( | ||
| ; CHECK-NEXT: [[T2:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32> | ||
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[T2]], <2 x i32> poison, <3 x i32> <i32 1, i32 undef, i32 0> | ||
| ; CHECK-NEXT: [[T3:%.*]] = shl <3 x i32> [[SHUF]], <i32 17, i32 17, i32 17> | ||
| ; CHECK-NEXT: ret <3 x i32> [[T3]] | ||
| ; | ||
| %t2 = zext <2 x i8> %x to <2 x i32> | ||
| %shuf = shufflevector <2 x i32> %t2, <2 x i32> poison, <3 x i32> <i32 1, i32 undef, i32 0> | ||
| %t3 = shl <3 x i32> %shuf, <i32 17, i32 17, i32 17> | ||
| ret <3 x i32> %t3 | ||
| } | ||
|
|
||
| ; Make sure we don't crash on a ConstantExpr shufflevector | ||
| define <vscale x 2 x i64> @mul_nuw_nsw_shuffle_constant_expr(<vscale x 2 x i8> %z) { | ||
| ; CHECK-LABEL: @mul_nuw_nsw_shuffle_constant_expr( | ||
| ; CHECK-NEXT: [[XX:%.*]] = zext <vscale x 2 x i8> [[Z:%.*]] to <vscale x 2 x i64> | ||
| ; CHECK-NEXT: [[T3:%.*]] = mul <vscale x 2 x i64> [[XX]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 3, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer) | ||
| ; CHECK-NEXT: ret <vscale x 2 x i64> [[T3]] | ||
| ; | ||
| %xx = zext <vscale x 2 x i8> %z to <vscale x 2 x i64> | ||
| %shuf = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 3, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer | ||
| %t3 = mul <vscale x 2 x i64> %shuf, %xx | ||
| ret <vscale x 2 x i64> %t3 | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| ; RUN: opt -instcombine -S < %s | FileCheck %s | ||
|
|
||
| define void @test(<4 x float> *%in_ptr, <4 x float> *%out_ptr) { | ||
| %A = load <4 x float>, <4 x float>* %in_ptr, align 16 | ||
| %B = shufflevector <4 x float> %A, <4 x float> poison, <4 x i32> <i32 0, i32 0, i32 undef, i32 undef> | ||
| %C = shufflevector <4 x float> %B, <4 x float> %A, <4 x i32> <i32 0, i32 1, i32 4, i32 undef> | ||
| %D = shufflevector <4 x float> %C, <4 x float> %A, <4 x i32> <i32 0, i32 1, i32 2, i32 4> | ||
| ; CHECK: %D = shufflevector <4 x float> %A, <4 x float> undef, <4 x i32> zeroinitializer | ||
| store <4 x float> %D, <4 x float> *%out_ptr | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
| ; RUN: opt < %s -instcombine -S | grep "insertelement <4 x float> poison" | ||
|
|
||
| ; Instcombine should be able to prove that none of the | ||
| ; insertelement's first operand's elements are needed. | ||
|
|
||
| define internal void @""(i8*) { | ||
| ; <label>:1 | ||
| bitcast i8* %0 to i32* ; <i32*>:2 [#uses=1] | ||
| load i32, i32* %2, align 1 ; <i32>:3 [#uses=1] | ||
| getelementptr i8, i8* %0, i32 4 ; <i8*>:4 [#uses=1] | ||
| bitcast i8* %4 to i32* ; <i32*>:5 [#uses=1] | ||
| load i32, i32* %5, align 1 ; <i32>:6 [#uses=1] | ||
| br label %7 | ||
|
|
||
| ; <label>:7 ; preds = %9, %1 | ||
| %.01 = phi <4 x float> [ undef, %1 ], [ %12, %9 ] ; <<4 x float>> [#uses=1] | ||
| %.0 = phi i32 [ %3, %1 ], [ %15, %9 ] ; <i32> [#uses=3] | ||
| icmp slt i32 %.0, %6 ; <i1>:8 [#uses=1] | ||
| br i1 %8, label %9, label %16 | ||
|
|
||
| ; <label>:9 ; preds = %7 | ||
| sitofp i32 %.0 to float ; <float>:10 [#uses=1] | ||
| insertelement <4 x float> %.01, float %10, i32 0 ; <<4 x float>>:11 [#uses=1] | ||
| shufflevector <4 x float> %11, <4 x float> poison, <4 x i32> zeroinitializer ; <<4 x float>>:12 [#uses=2] | ||
| getelementptr i8, i8* %0, i32 48 ; <i8*>:13 [#uses=1] | ||
| bitcast i8* %13 to <4 x float>* ; <<4 x float>*>:14 [#uses=1] | ||
| store <4 x float> %12, <4 x float>* %14, align 16 | ||
| add i32 %.0, 2 ; <i32>:15 [#uses=1] | ||
| br label %7 | ||
|
|
||
| ; <label>:16 ; preds = %7 | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,123 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
| ; RUN: opt < %s -instcombine -S -data-layout="e" | FileCheck %s --check-prefixes=ANY,LE | ||
| ; RUN: opt < %s -instcombine -S -data-layout="E" | FileCheck %s --check-prefixes=ANY,BE | ||
|
|
||
| define <4 x i16> @trunc_little_endian(<4 x i32> %x) { | ||
| ; LE-LABEL: @trunc_little_endian( | ||
| ; LE-NEXT: [[R:%.*]] = trunc <4 x i32> [[X:%.*]] to <4 x i16> | ||
| ; LE-NEXT: ret <4 x i16> [[R]] | ||
| ; | ||
| ; BE-LABEL: @trunc_little_endian( | ||
| ; BE-NEXT: [[B:%.*]] = bitcast <4 x i32> [[X:%.*]] to <8 x i16> | ||
| ; BE-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> | ||
| ; BE-NEXT: ret <4 x i16> [[R]] | ||
| ; | ||
| %b = bitcast <4 x i32> %x to <8 x i16> | ||
| %r = shufflevector <8 x i16> %b, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> | ||
| ret <4 x i16> %r | ||
| } | ||
|
|
||
| define <4 x i16> @trunc_big_endian(<4 x i32> %x) { | ||
| ; LE-LABEL: @trunc_big_endian( | ||
| ; LE-NEXT: [[B:%.*]] = bitcast <4 x i32> [[X:%.*]] to <8 x i16> | ||
| ; LE-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> | ||
| ; LE-NEXT: ret <4 x i16> [[R]] | ||
| ; | ||
| ; BE-LABEL: @trunc_big_endian( | ||
| ; BE-NEXT: [[R:%.*]] = trunc <4 x i32> [[X:%.*]] to <4 x i16> | ||
| ; BE-NEXT: ret <4 x i16> [[R]] | ||
| ; | ||
| %b = bitcast <4 x i32> %x to <8 x i16> | ||
| %r = shufflevector <8 x i16> %b, <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> | ||
| ret <4 x i16> %r | ||
| } | ||
|
|
||
| declare void @use_v8i16(<8 x i16>) | ||
|
|
||
| ; Extra use is ok. | ||
|
|
||
| define <2 x i16> @trunc_little_endian_extra_use(<2 x i64> %x) { | ||
| ; LE-LABEL: @trunc_little_endian_extra_use( | ||
| ; LE-NEXT: [[B:%.*]] = bitcast <2 x i64> [[X:%.*]] to <8 x i16> | ||
| ; LE-NEXT: call void @use_v8i16(<8 x i16> [[B]]) | ||
| ; LE-NEXT: [[R:%.*]] = trunc <2 x i64> [[X]] to <2 x i16> | ||
| ; LE-NEXT: ret <2 x i16> [[R]] | ||
| ; | ||
| ; BE-LABEL: @trunc_little_endian_extra_use( | ||
| ; BE-NEXT: [[B:%.*]] = bitcast <2 x i64> [[X:%.*]] to <8 x i16> | ||
| ; BE-NEXT: call void @use_v8i16(<8 x i16> [[B]]) | ||
| ; BE-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <2 x i32> <i32 0, i32 4> | ||
| ; BE-NEXT: ret <2 x i16> [[R]] | ||
| ; | ||
| %b = bitcast <2 x i64> %x to <8 x i16> | ||
| call void @use_v8i16(<8 x i16> %b) | ||
| %r = shufflevector <8 x i16> %b, <8 x i16> poison, <2 x i32> <i32 0, i32 4> | ||
| ret <2 x i16> %r | ||
| } | ||
|
|
||
| declare void @use_v12i11(<12 x i11>) | ||
|
|
||
| ; Weird types are ok. | ||
|
|
||
| define <4 x i11> @trunc_big_endian_extra_use(<4 x i33> %x) { | ||
| ; LE-LABEL: @trunc_big_endian_extra_use( | ||
| ; LE-NEXT: [[B:%.*]] = bitcast <4 x i33> [[X:%.*]] to <12 x i11> | ||
| ; LE-NEXT: call void @use_v12i11(<12 x i11> [[B]]) | ||
| ; LE-NEXT: [[R:%.*]] = shufflevector <12 x i11> [[B]], <12 x i11> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11> | ||
| ; LE-NEXT: ret <4 x i11> [[R]] | ||
| ; | ||
| ; BE-LABEL: @trunc_big_endian_extra_use( | ||
| ; BE-NEXT: [[B:%.*]] = bitcast <4 x i33> [[X:%.*]] to <12 x i11> | ||
| ; BE-NEXT: call void @use_v12i11(<12 x i11> [[B]]) | ||
| ; BE-NEXT: [[R:%.*]] = trunc <4 x i33> [[X]] to <4 x i11> | ||
| ; BE-NEXT: ret <4 x i11> [[R]] | ||
| ; | ||
| %b = bitcast <4 x i33> %x to <12 x i11> | ||
| call void @use_v12i11(<12 x i11> %b) | ||
| %r = shufflevector <12 x i11> %b, <12 x i11> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11> | ||
| ret <4 x i11> %r | ||
| } | ||
|
|
||
| define <4 x i16> @wrong_cast1(i128 %x) { | ||
| ; ANY-LABEL: @wrong_cast1( | ||
| ; ANY-NEXT: [[B:%.*]] = bitcast i128 [[X:%.*]] to <8 x i16> | ||
| ; ANY-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> | ||
| ; ANY-NEXT: ret <4 x i16> [[R]] | ||
| ; | ||
| %b = bitcast i128 %x to <8 x i16> | ||
| %r = shufflevector <8 x i16> %b, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> | ||
| ret <4 x i16> %r | ||
| } | ||
|
|
||
| define <4 x i16> @wrong_cast2(<4 x float> %x) { | ||
| ; ANY-LABEL: @wrong_cast2( | ||
| ; ANY-NEXT: [[B:%.*]] = bitcast <4 x float> [[X:%.*]] to <8 x i16> | ||
| ; ANY-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> | ||
| ; ANY-NEXT: ret <4 x i16> [[R]] | ||
| ; | ||
| %b = bitcast <4 x float> %x to <8 x i16> | ||
| %r = shufflevector <8 x i16> %b, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> | ||
| ret <4 x i16> %r | ||
| } | ||
|
|
||
| define <4 x half> @wrong_cast3(<4 x i32> %x) { | ||
| ; ANY-LABEL: @wrong_cast3( | ||
| ; ANY-NEXT: [[B:%.*]] = bitcast <4 x i32> [[X:%.*]] to <8 x half> | ||
| ; ANY-NEXT: [[R:%.*]] = shufflevector <8 x half> [[B]], <8 x half> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> | ||
| ; ANY-NEXT: ret <4 x half> [[R]] | ||
| ; | ||
| %b = bitcast <4 x i32> %x to <8 x half> | ||
| %r = shufflevector <8 x half> %b, <8 x half> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> | ||
| ret <4 x half> %r | ||
| } | ||
|
|
||
| define <2 x i16> @wrong_shuffle(<4 x i32> %x) { | ||
| ; ANY-LABEL: @wrong_shuffle( | ||
| ; ANY-NEXT: [[B:%.*]] = bitcast <4 x i32> [[X:%.*]] to <8 x i16> | ||
| ; ANY-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <2 x i32> <i32 0, i32 2> | ||
| ; ANY-NEXT: ret <2 x i16> [[R]] | ||
| ; | ||
| %b = bitcast <4 x i32> %x to <8 x i16> | ||
| %r = shufflevector <8 x i16> %b, <8 x i16> poison, <2 x i32> <i32 0, i32 2> | ||
| ret <2 x i16> %r | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,144 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
| ; RUN: opt < %s -instcombine -S | FileCheck %s | ||
|
|
||
| ; Narrow the select operands to eliminate the existing shuffles and replace a wide select with a narrow select. | ||
|
|
||
| define <2 x i8> @narrow_shuffle_of_select(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) { | ||
| ; CHECK-LABEL: @narrow_shuffle_of_select( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> undef, <2 x i32> <i32 0, i32 1> | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> undef, <2 x i32> <i32 0, i32 1> | ||
| ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i8> [[TMP1]], <2 x i8> [[TMP2]] | ||
| ; CHECK-NEXT: ret <2 x i8> [[R]] | ||
| ; | ||
| %widecmp = shufflevector <2 x i1> %cmp, <2 x i1> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> | ||
| %widesel = select <4 x i1> %widecmp, <4 x i8> %x, <4 x i8> %y | ||
| %r = shufflevector <4 x i8> %widesel, <4 x i8> poison, <2 x i32> <i32 0, i32 1> | ||
| ret <2 x i8> %r | ||
| } | ||
|
|
||
| ; The 1st shuffle is not extending with undefs, but demanded elements corrects that. | ||
|
|
||
| define <2 x i8> @narrow_shuffle_of_select_overspecified_extend(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) { | ||
| ; CHECK-LABEL: @narrow_shuffle_of_select_overspecified_extend( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> undef, <2 x i32> <i32 0, i32 1> | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> undef, <2 x i32> <i32 0, i32 1> | ||
| ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i8> [[TMP1]], <2 x i8> [[TMP2]] | ||
| ; CHECK-NEXT: ret <2 x i8> [[R]] | ||
| ; | ||
| %widecmp = shufflevector <2 x i1> %cmp, <2 x i1> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1> | ||
| %widesel = select <4 x i1> %widecmp, <4 x i8> %x, <4 x i8> %y | ||
| %r = shufflevector <4 x i8> %widesel, <4 x i8> poison, <2 x i32> <i32 0, i32 1> | ||
| ret <2 x i8> %r | ||
| } | ||
|
|
||
| ; Verify that undef elements are acceptable for identity shuffle mask. Also check FP types. | ||
|
|
||
| define <3 x float> @narrow_shuffle_of_select_undefs(<3 x i1> %cmp, <4 x float> %x, <4 x float> %y) { | ||
| ; CHECK-LABEL: @narrow_shuffle_of_select_undefs( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 undef> | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 undef> | ||
| ; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[CMP:%.*]], <3 x float> [[TMP1]], <3 x float> [[TMP2]] | ||
| ; CHECK-NEXT: ret <3 x float> [[R]] | ||
| ; | ||
| %widecmp = shufflevector <3 x i1> %cmp, <3 x i1> poison, <4 x i32> <i32 undef, i32 1, i32 2, i32 undef> | ||
| %widesel = select <4 x i1> %widecmp, <4 x float> %x, <4 x float> %y | ||
| %r = shufflevector <4 x float> %widesel, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 undef> | ||
| ret <3 x float> %r | ||
| } | ||
|
|
||
| declare void @use(<4 x i8>) | ||
| declare void @use_cmp(<4 x i1>) | ||
|
|
||
| ; Negative test - extra use would require more instructions than we started with. | ||
|
|
||
| define <2 x i8> @narrow_shuffle_of_select_use1(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) { | ||
| ; CHECK-LABEL: @narrow_shuffle_of_select_use1( | ||
| ; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> | ||
| ; CHECK-NEXT: [[WIDESEL:%.*]] = select <4 x i1> [[WIDECMP]], <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]] | ||
| ; CHECK-NEXT: call void @use(<4 x i8> [[WIDESEL]]) | ||
| ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[WIDESEL]], <4 x i8> poison, <2 x i32> <i32 0, i32 1> | ||
| ; CHECK-NEXT: ret <2 x i8> [[R]] | ||
| ; | ||
| %widecmp = shufflevector <2 x i1> %cmp, <2 x i1> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> | ||
| %widesel = select <4 x i1> %widecmp, <4 x i8> %x, <4 x i8> %y | ||
| call void @use(<4 x i8> %widesel) | ||
| %r = shufflevector <4 x i8> %widesel, <4 x i8> poison, <2 x i32> <i32 0, i32 1> | ||
| ret <2 x i8> %r | ||
| } | ||
|
|
||
| ; Negative test - extra use would require more instructions than we started with. | ||
|
|
||
| define <2 x i8> @narrow_shuffle_of_select_use2(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) { | ||
| ; CHECK-LABEL: @narrow_shuffle_of_select_use2( | ||
| ; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> | ||
| ; CHECK-NEXT: call void @use_cmp(<4 x i1> [[WIDECMP]]) | ||
| ; CHECK-NEXT: [[WIDESEL:%.*]] = select <4 x i1> [[WIDECMP]], <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]] | ||
| ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[WIDESEL]], <4 x i8> poison, <2 x i32> <i32 0, i32 1> | ||
| ; CHECK-NEXT: ret <2 x i8> [[R]] | ||
| ; | ||
| %widecmp = shufflevector <2 x i1> %cmp, <2 x i1> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> | ||
| call void @use_cmp(<4 x i1> %widecmp) | ||
| %widesel = select <4 x i1> %widecmp, <4 x i8> %x, <4 x i8> %y | ||
| %r = shufflevector <4 x i8> %widesel, <4 x i8> poison, <2 x i32> <i32 0, i32 1> | ||
| ret <2 x i8> %r | ||
| } | ||
|
|
||
| ; Negative test - mismatched types would require extra shuffling. | ||
|
|
||
| define <3 x i8> @narrow_shuffle_of_select_mismatch_types1(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) { | ||
| ; CHECK-LABEL: @narrow_shuffle_of_select_mismatch_types1( | ||
| ; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> | ||
| ; CHECK-NEXT: [[WIDESEL:%.*]] = select <4 x i1> [[WIDECMP]], <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]] | ||
| ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[WIDESEL]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2> | ||
| ; CHECK-NEXT: ret <3 x i8> [[R]] | ||
| ; | ||
| %widecmp = shufflevector <2 x i1> %cmp, <2 x i1> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> | ||
| %widesel = select <4 x i1> %widecmp, <4 x i8> %x, <4 x i8> %y | ||
| %r = shufflevector <4 x i8> %widesel, <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2> | ||
| ret <3 x i8> %r | ||
| } | ||
|
|
||
| ; Negative test - mismatched types would require extra shuffling. | ||
|
|
||
| define <3 x i8> @narrow_shuffle_of_select_mismatch_types2(<4 x i1> %cmp, <6 x i8> %x, <6 x i8> %y) { | ||
| ; CHECK-LABEL: @narrow_shuffle_of_select_mismatch_types2( | ||
| ; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <4 x i1> [[CMP:%.*]], <4 x i1> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef> | ||
| ; CHECK-NEXT: [[WIDESEL:%.*]] = select <6 x i1> [[WIDECMP]], <6 x i8> [[X:%.*]], <6 x i8> [[Y:%.*]] | ||
| ; CHECK-NEXT: [[R:%.*]] = shufflevector <6 x i8> [[WIDESEL]], <6 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2> | ||
| ; CHECK-NEXT: ret <3 x i8> [[R]] | ||
| ; | ||
| %widecmp = shufflevector <4 x i1> %cmp, <4 x i1> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef> | ||
| %widesel = select <6 x i1> %widecmp, <6 x i8> %x, <6 x i8> %y | ||
| %r = shufflevector <6 x i8> %widesel, <6 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2> | ||
| ret <3 x i8> %r | ||
| } | ||
|
|
||
| ; Narrowing constants does not require creating new narrowing shuffle instructions. | ||
|
|
||
| define <2 x i8> @narrow_shuffle_of_select_consts(<2 x i1> %cmp) { | ||
| ; CHECK-LABEL: @narrow_shuffle_of_select_consts( | ||
| ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i8> <i8 -1, i8 -2>, <2 x i8> <i8 1, i8 2> | ||
| ; CHECK-NEXT: ret <2 x i8> [[R]] | ||
| ; | ||
| %widecmp = shufflevector <2 x i1> %cmp, <2 x i1> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> | ||
| %widesel = select <4 x i1> %widecmp, <4 x i8> <i8 -1, i8 -2, i8 -3, i8 -4>, <4 x i8> <i8 1, i8 2, i8 3, i8 4> | ||
| %r = shufflevector <4 x i8> %widesel, <4 x i8> poison, <2 x i32> <i32 0, i32 1> | ||
| ret <2 x i8> %r | ||
| } | ||
|
|
||
| ; PR38691 - https://bugs.llvm.org/show_bug.cgi?id=38691 | ||
| ; If the operands are widened only to be narrowed back, then all of the shuffles are unnecessary. | ||
|
|
||
| define <2 x i8> @narrow_shuffle_of_select_with_widened_ops(<2 x i1> %cmp, <2 x i8> %x, <2 x i8> %y) { | ||
| ; CHECK-LABEL: @narrow_shuffle_of_select_with_widened_ops( | ||
| ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]] | ||
| ; CHECK-NEXT: ret <2 x i8> [[R]] | ||
| ; | ||
| %widex = shufflevector <2 x i8> %x, <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> | ||
| %widey = shufflevector <2 x i8> %y, <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> | ||
| %widecmp = shufflevector <2 x i1> %cmp, <2 x i1> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> | ||
| %widesel = select <4 x i1> %widecmp, <4 x i8> %widex, <4 x i8> %widey | ||
| %r = shufflevector <4 x i8> %widesel, <4 x i8> poison, <2 x i32> <i32 0, i32 1> | ||
| ret <2 x i8> %r | ||
| } | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,169 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
| ; RUN: opt < %s -instcombine -S | FileCheck %s | ||
|
|
||
| declare void @use(<4 x i16>) | ||
|
|
||
| define void @test(<16 x i8> %w, i32* %o1, float* %o2) { | ||
| ; CHECK-LABEL: @test( | ||
| ; CHECK-NEXT: [[V_BC:%.*]] = bitcast <16 x i8> [[W:%.*]] to <4 x i32> | ||
| ; CHECK-NEXT: [[V_EXTRACT:%.*]] = extractelement <4 x i32> [[V_BC]], i32 3 | ||
| ; CHECK-NEXT: [[V_BC1:%.*]] = bitcast <16 x i8> [[W]] to <4 x float> | ||
| ; CHECK-NEXT: [[V_EXTRACT2:%.*]] = extractelement <4 x float> [[V_BC1]], i32 3 | ||
| ; CHECK-NEXT: store i32 [[V_EXTRACT]], i32* [[O1:%.*]], align 4 | ||
| ; CHECK-NEXT: store float [[V_EXTRACT2]], float* [[O2:%.*]], align 4 | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
| %v = shufflevector <16 x i8> %w, <16 x i8> poison, <4 x i32> <i32 12, i32 13, i32 14, i32 15> | ||
| %f = bitcast <4 x i8> %v to float | ||
| %i = bitcast <4 x i8> %v to i32 | ||
| store i32 %i, i32* %o1, align 4 | ||
| store float %f, float* %o2, align 4 | ||
| ret void | ||
| } | ||
|
|
||
| ; Shuffle-of-bitcast-splat --> splat-bitcast | ||
|
|
||
| define <4 x i16> @splat_bitcast_operand(<8 x i8> %x) { | ||
| ; CHECK-LABEL: @splat_bitcast_operand( | ||
| ; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x i8> [[X:%.*]], <8 x i8> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> | ||
| ; CHECK-NEXT: [[S2:%.*]] = bitcast <8 x i8> [[S1]] to <4 x i16> | ||
| ; CHECK-NEXT: ret <4 x i16> [[S2]] | ||
| ; | ||
| %s1 = shufflevector <8 x i8> %x, <8 x i8> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> | ||
| %bc = bitcast <8 x i8> %s1 to <4 x i16> | ||
| %s2 = shufflevector <4 x i16> %bc, <4 x i16> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 0> | ||
| ret <4 x i16> %s2 | ||
| } | ||
|
|
||
| ; Shuffle-of-bitcast-splat --> splat-bitcast | ||
|
|
||
| define <4 x i16> @splat_bitcast_operand_uses(<8 x i8> %x) { | ||
| ; CHECK-LABEL: @splat_bitcast_operand_uses( | ||
| ; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x i8> [[X:%.*]], <8 x i8> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> | ||
| ; CHECK-NEXT: [[BC:%.*]] = bitcast <8 x i8> [[S1]] to <4 x i16> | ||
| ; CHECK-NEXT: call void @use(<4 x i16> [[BC]]) | ||
| ; CHECK-NEXT: [[S2:%.*]] = bitcast <8 x i8> [[S1]] to <4 x i16> | ||
| ; CHECK-NEXT: ret <4 x i16> [[S2]] | ||
| ; | ||
| %s1 = shufflevector <8 x i8> %x, <8 x i8> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> | ||
| %bc = bitcast <8 x i8> %s1 to <4 x i16> | ||
| call void @use(<4 x i16> %bc) | ||
| %s2 = shufflevector <4 x i16> %bc, <4 x i16> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 0> | ||
| ret <4 x i16> %s2 | ||
| } | ||
|
|
||
| ; Shuffle-of-bitcast-splat --> splat-bitcast | ||
|
|
||
| define <4 x i32> @splat_bitcast_operand_same_size_src_elt(<4 x float> %x) { | ||
| ; CHECK-LABEL: @splat_bitcast_operand_same_size_src_elt( | ||
| ; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2> | ||
| ; CHECK-NEXT: [[S2:%.*]] = bitcast <4 x float> [[S1]] to <4 x i32> | ||
| ; CHECK-NEXT: ret <4 x i32> [[S2]] | ||
| ; | ||
| %s1 = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2> | ||
| %bc = bitcast <4 x float> %s1 to <4 x i32> | ||
| %s2 = shufflevector <4 x i32> %bc, <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 0> | ||
| ret <4 x i32> %s2 | ||
| } | ||
|
|
||
| ; Scaled mask is inverse of first mask. | ||
|
|
||
| define <4 x i32> @shuf_bitcast_operand(<16 x i8> %x) { | ||
| ; CHECK-LABEL: @shuf_bitcast_operand( | ||
| ; CHECK-NEXT: [[S2:%.*]] = bitcast <16 x i8> [[X:%.*]] to <4 x i32> | ||
| ; CHECK-NEXT: ret <4 x i32> [[S2]] | ||
| ; | ||
| %s1 = shufflevector <16 x i8> %x, <16 x i8> poison, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> | ||
| %bc = bitcast <16 x i8> %s1 to <4 x i32> | ||
| %s2 = shufflevector <4 x i32> %bc, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> | ||
| ret <4 x i32> %s2 | ||
| } | ||
|
|
||
| ; TODO: Could allow fold for length-changing shuffles. | ||
|
|
||
| define <5 x i16> @splat_bitcast_operand_change_type(<8 x i8> %x) { | ||
| ; CHECK-LABEL: @splat_bitcast_operand_change_type( | ||
| ; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x i8> [[X:%.*]], <8 x i8> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> | ||
| ; CHECK-NEXT: [[BC:%.*]] = bitcast <8 x i8> [[S1]] to <4 x i16> | ||
| ; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x i16> [[BC]], <4 x i16> poison, <5 x i32> <i32 0, i32 2, i32 1, i32 0, i32 3> | ||
| ; CHECK-NEXT: ret <5 x i16> [[S2]] | ||
| ; | ||
| %s1 = shufflevector <8 x i8> %x, <8 x i8> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> | ||
| %bc = bitcast <8 x i8> %s1 to <4 x i16> | ||
| %s2 = shufflevector <4 x i16> %bc, <4 x i16> poison, <5 x i32> <i32 0, i32 2, i32 1, i32 0, i32 3> | ||
| ret <5 x i16> %s2 | ||
| } | ||
|
|
||
| ; Shuffle-of-bitcast-splat --> splat-bitcast | ||
|
|
||
| define <4 x i16> @splat_bitcast_operand_wider_src_elt(<2 x i32> %x) { | ||
| ; CHECK-LABEL: @splat_bitcast_operand_wider_src_elt( | ||
| ; CHECK-NEXT: [[S1:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> <i32 1, i32 1> | ||
| ; CHECK-NEXT: [[S2:%.*]] = bitcast <2 x i32> [[S1]] to <4 x i16> | ||
| ; CHECK-NEXT: ret <4 x i16> [[S2]] | ||
| ; | ||
| %s1 = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> <i32 1, i32 1> | ||
| %bc = bitcast <2 x i32> %s1 to <4 x i16> | ||
| %s2 = shufflevector <4 x i16> %bc, <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1> | ||
| ret <4 x i16> %s2 | ||
| } | ||
|
|
||
| ; Shuffle-of-bitcast-splat --> splat-bitcast | ||
|
|
||
| define <4 x i16> @splat_bitcast_operand_wider_src_elt_uses(<2 x i32> %x) { | ||
| ; CHECK-LABEL: @splat_bitcast_operand_wider_src_elt_uses( | ||
| ; CHECK-NEXT: [[S1:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> <i32 1, i32 1> | ||
| ; CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i32> [[S1]] to <4 x i16> | ||
| ; CHECK-NEXT: call void @use(<4 x i16> [[BC]]) | ||
| ; CHECK-NEXT: [[S2:%.*]] = bitcast <2 x i32> [[S1]] to <4 x i16> | ||
| ; CHECK-NEXT: ret <4 x i16> [[S2]] | ||
| ; | ||
| %s1 = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> <i32 1, i32 1> | ||
| %bc = bitcast <2 x i32> %s1 to <4 x i16> | ||
| call void @use(<4 x i16> %bc) | ||
| %s2 = shufflevector <4 x i16> %bc, <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1> | ||
| ret <4 x i16> %s2 | ||
| } | ||
|
|
||
| ; Scaled mask is inverse of first mask. | ||
|
|
||
| define <16 x i8> @shuf_bitcast_operand_wider_src(<4 x i32> %x) { | ||
| ; CHECK-LABEL: @shuf_bitcast_operand_wider_src( | ||
| ; CHECK-NEXT: [[S2:%.*]] = bitcast <4 x i32> [[X:%.*]] to <16 x i8> | ||
| ; CHECK-NEXT: ret <16 x i8> [[S2]] | ||
| ; | ||
| %s1 = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> | ||
| %bc = bitcast <4 x i32> %s1 to <16 x i8> | ||
| %s2 = shufflevector <16 x i8> %bc, <16 x i8> poison, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> | ||
| ret <16 x i8> %s2 | ||
| } | ||
|
|
||
| ; Negative test - the 2nd mask can't be widened | ||
|
|
||
| define <16 x i8> @shuf_bitcast_operand_cannot_widen(<4 x i32> %x) { | ||
| ; CHECK-LABEL: @shuf_bitcast_operand_cannot_widen( | ||
| ; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> | ||
| ; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[S1]] to <16 x i8> | ||
| ; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[BC]], <16 x i8> poison, <16 x i32> <i32 12, i32 13, i32 12, i32 13, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> | ||
| ; CHECK-NEXT: ret <16 x i8> [[S2]] | ||
| ; | ||
| %s1 = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> | ||
| %bc = bitcast <4 x i32> %s1 to <16 x i8> | ||
| %s2 = shufflevector <16 x i8> %bc, <16 x i8> poison, <16 x i32> <i32 12, i32 13, i32 12, i32 13, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> | ||
| ret <16 x i8> %s2 | ||
| } | ||
|
|
||
| ; Negative test - the 2nd mask can't be widened | ||
|
|
||
| define <16 x i8> @shuf_bitcast_operand_cannot_widen_undef(<4 x i32> %x) { | ||
| ; CHECK-LABEL: @shuf_bitcast_operand_cannot_widen_undef( | ||
| ; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> | ||
| ; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[S1]] to <16 x i8> | ||
| ; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[BC]], <16 x i8> poison, <16 x i32> <i32 12, i32 undef, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> | ||
| ; CHECK-NEXT: ret <16 x i8> [[S2]] | ||
| ; | ||
| %s1 = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> | ||
| %bc = bitcast <4 x i32> %s1 to <16 x i8> | ||
| %s2 = shufflevector <16 x i8> %bc, <16 x i8> poison, <16 x i32> <i32 12, i32 undef, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> | ||
| ret <16 x i8> %s2 | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| ; NOTE: Assertions have been autogenerated by update_test_checks.py | ||
| ; RUN: opt < %s -instcombine -S | FileCheck %s | ||
|
|
||
| target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" | ||
| target triple = "i386-apple-darwin9" | ||
|
|
||
| define <4 x float> @__inff4() nounwind readnone { | ||
| ; CHECK-LABEL: @__inff4( | ||
| ; CHECK-NEXT: ret <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0x7FF0000000000000, float 0x7FF0000000000000> | ||
| ; | ||
| %tmp14 = extractelement <1 x double> bitcast (<2 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000> to <1 x double>), i32 0 | ||
| %tmp4 = bitcast double %tmp14 to i64 | ||
| %tmp3 = bitcast i64 %tmp4 to <2 x float> | ||
| %tmp8 = shufflevector <2 x float> %tmp3, <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> | ||
| %tmp9 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp8, <4 x i32> <i32 0, i32 1, i32 4, i32 5> | ||
| ret <4 x float> %tmp9 | ||
| } |