diff --git a/llvm/test/Transforms/InstCombine/fma.ll b/llvm/test/Transforms/InstCombine/fma.ll
index f429b5ffa5cfc..18efa0ff6ce61 100644
--- a/llvm/test/Transforms/InstCombine/fma.ll
+++ b/llvm/test/Transforms/InstCombine/fma.ll
@@ -1,11 +1,18 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -passes=instcombine < %s | FileCheck %s
 
-declare float @llvm.fma.f32(float, float, float) #1
-declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) #1
-declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>) #1
-declare float @llvm.fmuladd.f32(float, float, float) #1
-declare float @llvm.fabs.f32(float) #1
+declare float @llvm.fma.f32(float, float, float)
+declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
+declare <3 x float> @llvm.fma.v3f32(<3 x float>, <3 x float>, <3 x float>)
+declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
+declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
+
+declare float @llvm.fmuladd.f32(float, float, float)
+declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>)
+declare float @llvm.fabs.f32(float)
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
+declare void @use_vec(<2 x float>)
+declare void @use_vec3(<3 x float>)
 
 @external = external global i32
 
@@ -453,75 +460,59 @@ define float @fmuladd_x_1_z_fast(float %x, float %z) {
 
 define <2 x double> @fmuladd_a_0_b(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @fmuladd_a_0_b(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret <2 x double> [[B:%.*]]
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> zeroinitializer, <2 x double> %b)
   ret <2 x double> %res
 }
 
 define <2 x double> @fmuladd_0_a_b(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @fmuladd_0_a_b(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret <2 x double> [[B:%.*]]
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> zeroinitializer, <2 x double> %a, <2 x double> %b)
   ret <2 x double> %res
 }
 
 define <2 x double> @fmuladd_a_0_b_missing_flags(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @fmuladd_a_0_b_missing_flags(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES:%.*]] = call nnan <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[A:%.*]], <2 x double> zeroinitializer, <2 x double> [[B:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[RES]]
 ;
-entry:
   %res = call nnan <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> zeroinitializer, <2 x double> %b)
   ret <2 x double> %res
 }
 
-declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>)
-
 define <2 x double> @fma_a_0_b(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @fma_a_0_b(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret <2 x double> [[B:%.*]]
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> zeroinitializer, <2 x double> %b)
   ret <2 x double> %res
 }
 
 define <2 x double> @fma_0_a_b(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @fma_0_a_b(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret <2 x double> [[B:%.*]]
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> zeroinitializer, <2 x double> %a, <2 x double> %b)
   ret <2 x double> %res
 }
 
 define <2 x double> @fma_0_a_b_missing_flags(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @fma_0_a_b_missing_flags(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES:%.*]] = call nsz <2 x double> @llvm.fma.v2f64(<2 x double> [[A:%.*]], <2 x double> zeroinitializer, <2 x double> [[B:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[RES]]
 ;
-entry:
   %res = call nsz <2 x double> @llvm.fma.v2f64(<2 x double> zeroinitializer, <2 x double> %a, <2 x double> %b)
   ret <2 x double> %res
 }
 
 define <2 x double> @fma_sqrt(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @fma_sqrt(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES:%.*]] = fadd fast <2 x double> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    ret <2 x double> [[RES]]
 ;
-entry:
   %sqrt = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a)
   %res = call fast <2 x double> @llvm.fma.v2f64(<2 x double> %sqrt, <2 x double> %sqrt, <2 x double> %b)
   ret <2 x double> %res
@@ -530,146 +521,118 @@ entry:
 ; We do not fold constant multiplies in FMAs, as they could require rounding, unless either constant is 0.0 or 1.0.
 define <2 x double> @fma_const_fmul(<2 x double> %b) {
 ; CHECK-LABEL: @fma_const_fmul(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES:%.*]] = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> <double 0x4131233302898702, double 0x40C387800000D6C0>, <2 x double> <double 1.291820e-08, double 9.123000e-06>, <2 x double> [[B:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[RES]]
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> <double 1123123.0099110012314, double 9999.0000001>, <2 x double> <double 0.0000000129182, double 0.000009123>, <2 x double> %b)
   ret <2 x double> %res
 }
 
 define <2 x double> @fma_const_fmul_zero(<2 x double> %b) {
 ; CHECK-LABEL: @fma_const_fmul_zero(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret <2 x double> [[B:%.*]]
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> <double 0.0, double 0.0>, <2 x double> <double 1123123.0099110012314, double 9999.0000001>, <2 x double> %b)
   ret <2 x double> %res
 }
 
 define <2 x double> @fma_const_fmul_zero2(<2 x double> %b) {
 ; CHECK-LABEL: @fma_const_fmul_zero2(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret <2 x double> [[B:%.*]]
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> <double 1123123.0099110012314, double 9999.0000001>, <2 x double> <double 0.0, double 0.0>, <2 x double> %b)
   ret <2 x double> %res
 }
 
 define <2 x double> @fma_const_fmul_one(<2 x double> %b) {
 ; CHECK-LABEL: @fma_const_fmul_one(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES:%.*]] = fadd nnan nsz <2 x double> [[B:%.*]], <double 0x4131233302898702, double 0x40C387800000D6C0>
 ; CHECK-NEXT:    ret <2 x double> [[RES]]
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> <double 1.0, double 1.0>, <2 x double> <double 1123123.0099110012314, double 9999.0000001>, <2 x double> %b)
   ret <2 x double> %res
 }
 
 define <2 x double> @fma_const_fmul_one2(<2 x double> %b) {
 ; CHECK-LABEL: @fma_const_fmul_one2(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES:%.*]] = fadd nnan nsz <2 x double> [[B:%.*]], <double 0x4131233302898702, double 0x40C387800000D6C0>
 ; CHECK-NEXT:    ret <2 x double> [[RES]]
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> <double 1123123.0099110012314, double 9999.0000001>, <2 x double> <double 1.0, double 1.0>, <2 x double> %b)
   ret <2 x double> %res
 }
 
 define <2 x double> @fma_nan_and_const_0(<2 x double> %b) {
 ; CHECK-LABEL: @fma_nan_and_const_0(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, <2 x double> <double 0.0000000129182, double 0.000009123>, <2 x double> %b)
   ret <2 x double> %res
 }
 
 define <2 x double> @fma_nan_and_const_1(<2 x double> %b) {
 ; CHECK-LABEL: @fma_nan_and_const_1(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> <double 0.0000000129182, double 0.000009123>, <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, <2 x double> %b)
   ret <2 x double> %res
 }
 
 define <2 x double> @fma_nan_and_const_2(<2 x double> %b) {
 ; CHECK-LABEL: @fma_nan_and_const_2(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> <double 0.0000000129182, double 0.000009123>, <2 x double> %b, <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>)
   ret <2 x double> %res
 }
 
 define <2 x double> @fma_undef_0(<2 x double> %b, <2 x double> %c) {
 ; CHECK-LABEL: @fma_undef_0(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> <double undef, double undef>, <2 x double> %b, <2 x double> %c)
   ret <2 x double> %res
 }
 
 define <2 x double> @fma_undef_1(<2 x double> %b, <2 x double> %c) {
 ; CHECK-LABEL: @fma_undef_1(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> <double undef, double undef>, <2 x double> %c)
   ret <2 x double> %res
 }
 
 define <2 x double> @fma_undef_2(<2 x double> %b, <2 x double> %c) {
 ; CHECK-LABEL: @fma_undef_2(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> <double undef, double undef>)
   ret <2 x double> %res
 }
 
 define <2 x double> @fma_partial_undef_0(<2 x double> %b, <2 x double> %c) {
 ; CHECK-LABEL: @fma_partial_undef_0(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES:%.*]] = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> [[B:%.*]], <2 x double> <double undef, double 0x4068E00A137F38C5>, <2 x double> [[C:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[RES]]
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> <double undef, double 199.00123>, <2 x double> %b, <2 x double> %c)
   ret <2 x double> %res
 }
 
 define <2 x double> @fma_partial_undef_1(<2 x double> %b, <2 x double> %c) {
 ; CHECK-LABEL: @fma_partial_undef_1(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES:%.*]] = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> [[B:%.*]], <2 x double> <double 0x4068E00A137F38C5, double undef>, <2 x double> [[C:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[RES]]
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> <double 199.00123, double undef>, <2 x double> %c)
   ret <2 x double> %res
 }
 
 define <2 x double> @fma_partial_undef_2(<2 x double> %b, <2 x double> %c) {
 ; CHECK-LABEL: @fma_partial_undef_2(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES:%.*]] = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> [[B:%.*]], <2 x double> [[C:%.*]], <2 x double> <double 0x4068E00A137F38C5, double undef>)
 ; CHECK-NEXT:    ret <2 x double> [[RES]]
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> <double 199.00123, double undef>)
   ret <2 x double> %res
 }
@@ -677,126 +640,194 @@ entry:
 
 define <2 x double> @fma_nan_0(<2 x double> %b, <2 x double> %c) {
 ; CHECK-LABEL: @fma_nan_0(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, <2 x double> %b, <2 x double> %c)
   ret <2 x double> %res
 }
 define <2 x double> @fma_nan_1(<2 x double> %b, <2 x double> %c) {
 ; CHECK-LABEL: @fma_nan_1(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, <2 x double> %c)
   ret <2 x double> %res
 }
 
 define <2 x double> @fma_nan_2(<2 x double> %b, <2 x double> %c) {
 ; CHECK-LABEL: @fma_nan_2(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>)
   ret <2 x double> %res
 }
 
 define <2 x double> @fmuladd_const_fmul(<2 x double> %b) {
 ; CHECK-LABEL: @fmuladd_const_fmul(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES:%.*]] = fadd nnan nsz <2 x double> [[B:%.*]], <double 0x3F8DB6C076AD949B, double 0x3FB75A405B6E6D69>
 ; CHECK-NEXT:    ret <2 x double> [[RES]]
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> <double 1123123.0099110012314, double 9999.0000001>, <2 x double> <double 0.0000000129182, double 0.000009123>, <2 x double> %b)
   ret <2 x double> %res
 }
 
 define <2 x double> @fmuladd_nan_and_const_0(<2 x double> %b) {
 ; CHECK-LABEL: @fmuladd_nan_and_const_0(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, <2 x double> <double 0.0000000129182, double 0.000009123>, <2 x double> %b)
   ret <2 x double> %res
 }
 
 define <2 x double> @fmuladd_nan_and_const_1(<2 x double> %b) {
 ; CHECK-LABEL: @fmuladd_nan_and_const_1(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> <double 0.0000000129182, double 0.000009123>, <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, <2 x double> %b)
   ret <2 x double> %res
 }
 
 define <2 x double> @fmuladd_nan_and_const_2(<2 x double> %b) {
 ; CHECK-LABEL: @fmuladd_nan_and_const_2(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> <double 0.0000000129182, double 0.000009123>, <2 x double> %b, <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>)
   ret <2 x double> %res
 }
 
 define <2 x double> @fmuladd_nan_0(<2 x double> %b, <2 x double> %c) {
 ; CHECK-LABEL: @fmuladd_nan_0(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, <2 x double> %b, <2 x double> %c)
   ret <2 x double> %res
 }
 
 define <2 x double> @fmuladd_nan_1(<2 x double> %b, <2 x double> %c) {
 ; CHECK-LABEL: @fmuladd_nan_1(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> %b, <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, <2 x double> %c)
   ret <2 x double> %res
 }
 
 define <2 x double> @fmuladd_undef_0(<2 x double> %b, <2 x double> %c) {
 ; CHECK-LABEL: @fmuladd_undef_0(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> <double undef, double undef>, <2 x double> %b, <2 x double> %c)
   ret <2 x double> %res
 }
 
 define <2 x double> @fmuladd_undef_1(<2 x double> %b, <2 x double> %c) {
 ; CHECK-LABEL: @fmuladd_undef_1(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> %b, <2 x double> <double undef, double undef>, <2 x double> %c)
   ret <2 x double> %res
 }
 
 define <2 x double> @fmuladd_undef_2(<2 x double> %b, <2 x double> %c) {
 ; CHECK-LABEL: @fmuladd_undef_2(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
 ;
-entry:
   %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>)
   ret <2 x double> %res
 }
-declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
-declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
 
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+define <2 x float> @fma_unary_shuffle_ops(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: @fma_unary_shuffle_ops(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[C:%.*]] = shufflevector <2 x float> [[Z:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[A]], <2 x float> [[B]], <2 x float> [[C]])
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %a = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> <i32 1, i32 0>
+  %b = shufflevector <2 x float> %y, <2 x float> poison, <2 x i32> <i32 1, i32 0>
+  %c = shufflevector <2 x float> %z, <2 x float> poison, <2 x i32> <i32 1, i32 0>
+  %r = call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
+  ret <2 x float> %r
+}
+
+define <3 x float> @fma_unary_shuffle_ops_widening(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: @fma_unary_shuffle_ops_widening(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 1>
+; CHECK-NEXT:    call void @use_vec3(<3 x float> [[A]])
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 1>
+; CHECK-NEXT:    [[C:%.*]] = shufflevector <2 x float> [[Z:%.*]], <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 1>
+; CHECK-NEXT:    [[R:%.*]] = call <3 x float> @llvm.fma.v3f32(<3 x float> [[A]], <3 x float> [[B]], <3 x float> [[C]])
+; CHECK-NEXT:    ret <3 x float> [[R]]
+;
+  %a = shufflevector <2 x float> %x, <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 1>
+  call void @use_vec3(<3 x float> %a)
+  %b = shufflevector <2 x float> %y, <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 1>
+  %c = shufflevector <2 x float> %z, <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 1>
+  %r = call <3 x float> @llvm.fma.v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c)
+  ret <3 x float> %r
+}
+
+define <2 x float> @fma_unary_shuffle_ops_narrowing(<3 x float> %x, <3 x float> %y, <3 x float> %z) {
+; CHECK-LABEL: @fma_unary_shuffle_ops_narrowing(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <3 x float> [[Y:%.*]], <3 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    call void @use_vec(<2 x float> [[B]])
+; CHECK-NEXT:    [[C:%.*]] = shufflevector <3 x float> [[Z:%.*]], <3 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[A]], <2 x float> [[B]], <2 x float> [[C]])
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %a = shufflevector <3 x float> %x, <3 x float> poison, <2 x i32> <i32 1, i32 0>
+  %b = shufflevector <3 x float> %y, <3 x float> poison, <2 x i32> <i32 1, i32 0>
+  call void @use_vec(<2 x float> %b)
+  %c = shufflevector <3 x float> %z, <3 x float> poison, <2 x i32> <i32 1, i32 0>
+  %r = call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
+  ret <2 x float> %r
+}
+
+define <2 x float> @fma_unary_shuffle_ops_unshuffled(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: @fma_unary_shuffle_ops_unshuffled(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[A]], <2 x float> [[B]], <2 x float> [[Z:%.*]])
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %a = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> <i32 1, i32 0>
+  %b = shufflevector <2 x float> %y, <2 x float> poison, <2 x i32> <i32 1, i32 0>
+  %r = call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %z)
+  ret <2 x float> %r
+}
+
+define <2 x float> @fma_unary_shuffle_ops_wrong_mask(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: @fma_unary_shuffle_ops_wrong_mask(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[C:%.*]] = shufflevector <2 x float> [[Z:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[A]], <2 x float> [[B]], <2 x float> [[C]])
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %a = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> <i32 1, i32 0>
+  %b = shufflevector <2 x float> %y, <2 x float> poison, <2 x i32> <i32 0, i32 0>
+  %c = shufflevector <2 x float> %z, <2 x float> poison, <2 x i32> <i32 1, i32 0>
+  %r = call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
+  ret <2 x float> %r
+}
+
+define <2 x float> @fma_unary_shuffle_ops_uses(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: @fma_unary_shuffle_ops_uses(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    call void @use_vec(<2 x float> [[A]])
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    call void @use_vec(<2 x float> [[B]])
+; CHECK-NEXT:    [[C:%.*]] = shufflevector <2 x float> [[Z:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    call void @use_vec(<2 x float> [[C]])
+; CHECK-NEXT:    [[R:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[A]], <2 x float> [[B]], <2 x float> [[C]])
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %a = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> <i32 1, i32 0>
+  call void @use_vec(<2 x float> %a)
+  %b = shufflevector <2 x float> %y, <2 x float> poison, <2 x i32> <i32 1, i32 0>
+  call void @use_vec(<2 x float> %b)
+  %c = shufflevector <2 x float> %z, <2 x float> poison, <2 x i32> <i32 1, i32 0>
+  call void @use_vec(<2 x float> %c)
+  %r = call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
+  ret <2 x float> %r
+}