diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 0921a0e51668a..a3ad0b1c8dd6c 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -11365,6 +11365,36 @@ let Predicates = [HasVLX] in { (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; + def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, + (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v16i8 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))), + VR128X:$src2, VR128X:$src1, (i8 timm:$src4))), + (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v16i8 (X86vpternlog VR128X:$src1, + (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))), + VR128X:$src2, (i8 timm:$src4))), + (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, + (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v16i8 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))), + VR128X:$src2, VR128X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v16i8 (X86vpternlog VR128X:$src1, + (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))), + VR128X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 timm:$src4))), (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, @@ -11382,6 +11412,66 @@ let Predicates = [HasVLX] in { (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; + def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, + (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v8i16 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))), + VR128X:$src2, VR128X:$src1, (i8 timm:$src4))), + (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v8i16 (X86vpternlog VR128X:$src1, + (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))), + VR128X:$src2, (i8 timm:$src4))), + (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, + (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v8i16 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))), + VR128X:$src2, VR128X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v8i16 (X86vpternlog VR128X:$src1, + (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))), + VR128X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v4i32 (X86vpternlog VR128X:$src1, VR128X:$src2, + (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v4i32 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))), + VR128X:$src2, VR128X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v4i32 (X86vpternlog VR128X:$src1, + (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))), + VR128X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v2i64 (X86vpternlog VR128X:$src1, VR128X:$src2, + (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v2i64 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))), + VR128X:$src2, VR128X:$src1, (i8 timm:$src4))), + (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v2i64 (X86vpternlog VR128X:$src1, + (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))), + VR128X:$src2, (i8 timm:$src4))), + (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 timm:$src4))), (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, @@ -11399,6 +11489,36 @@ let Predicates = [HasVLX] in { (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; + def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, + (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v32i8 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))), + VR256X:$src2, VR256X:$src1, (i8 timm:$src4))), + (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v32i8 (X86vpternlog VR256X:$src1, + (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))), + VR256X:$src2, (i8 timm:$src4))), + (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, + (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v32i8 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))), + VR256X:$src2, VR256X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v32i8 (X86vpternlog VR256X:$src1, + (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))), + VR256X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 timm:$src4))), (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, @@ -11415,6 +11535,66 @@ let Predicates = [HasVLX] in { VR256X:$src2, (i8 timm:$src4))), (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, + (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v16i16 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))), + VR256X:$src2, VR256X:$src1, (i8 timm:$src4))), + (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v16i16 (X86vpternlog VR256X:$src1, + (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))), + VR256X:$src2, (i8 timm:$src4))), + (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, + (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v16i16 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))), + VR256X:$src2, VR256X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v16i16 (X86vpternlog VR256X:$src1, + (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))), + VR256X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v8i32 (X86vpternlog VR256X:$src1, VR256X:$src2, + (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v8i32 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))), + VR256X:$src2, VR256X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v8i32 (X86vpternlog VR256X:$src1, + (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))), + VR256X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v4i64 (X86vpternlog VR256X:$src1, VR256X:$src2, + (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v4i64 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))), + VR256X:$src2, VR256X:$src1, (i8 timm:$src4))), + (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v4i64 (X86vpternlog VR256X:$src1, + (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))), + VR256X:$src2, (i8 timm:$src4))), + (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; } let Predicates = [HasAVX512] in { @@ -11435,6 +11615,36 @@ let Predicates = [HasAVX512] in { (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; + def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, + (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v64i8 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), + VR512:$src2, VR512:$src1, (i8 timm:$src4))), + (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v64i8 (X86vpternlog VR512:$src1, + (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), + VR512:$src2, (i8 timm:$src4))), + (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, + (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v64i8 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))), + VR512:$src2, VR512:$src1, (i8 timm:$src4))), + (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v64i8 (X86vpternlog VR512:$src1, + (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))), + VR512:$src2, (i8 timm:$src4))), + (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3, (i8 timm:$src4))), (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, @@ -11448,9 +11658,84 @@ let Predicates = [HasAVX512] in { (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3), - VR512:$src2, (i8 timm:$src4))), + VR512:$src2, (i8 timm:$src4))), (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, + (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v32i16 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), + VR512:$src2, VR512:$src1, (i8 timm:$src4))), + (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v32i16 (X86vpternlog VR512:$src1, + (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), + VR512:$src2, (i8 timm:$src4))), + (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, + (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v32i16 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))), + VR512:$src2, VR512:$src1, (i8 timm:$src4))), + (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v32i16 (X86vpternlog VR512:$src1, + (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))), + VR512:$src2, (i8 timm:$src4))), + (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, + (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v32i16 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), + VR512:$src2, VR512:$src1, (i8 timm:$src4))), + (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v32i16 (X86vpternlog VR512:$src1, + (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), + VR512:$src2, (i8 timm:$src4))), + (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v16i32 (X86vpternlog VR512:$src1, VR512:$src2, + (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v16i32 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))), + VR512:$src2, VR512:$src1, (i8 timm:$src4))), + (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v16i32 (X86vpternlog VR512:$src1, + (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))), + VR512:$src2, (i8 timm:$src4))), + (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v8i64 (X86vpternlog VR512:$src1, VR512:$src2, + (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v8i64 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), + VR512:$src2, VR512:$src1, (i8 timm:$src4))), + (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v8i64 (X86vpternlog VR512:$src1, + (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), + VR512:$src2, (i8 timm:$src4))), + (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; } // Patterns to implement vnot using vpternlog instead of creating all ones diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll index b2ad1b33384e4..d8442048f65ec 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll @@ -2905,8 +2905,7 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; AVX512VL-NEXT: vpsllvd {{.*}}(%rip), %zmm2, %zmm2 ; AVX512VL-NEXT: vpord %zmm1, %zmm2, %zmm1 ; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [18446744073709551360,18446744073709551360] -; AVX512VL-NEXT: vpternlogq $216, %xmm2, %xmm1, %xmm0 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip){1to2}, %xmm1, %xmm0 ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll index 674b064100c46..12feea7658988 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll @@ -2376,8 +2376,7 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { ; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX512VL-NEXT: vpackuswb %ymm4, %ymm1, %ymm1 ; AVX512VL-NEXT: vpor %ymm1, %ymm2, %ymm1 -; AVX512VL-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360] -; AVX512VL-NEXT: vpternlogq $216, %ymm2, %ymm1, %ymm0 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip){1to4}, %ymm1, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: constant_funnnel_v32i8: diff --git a/llvm/test/CodeGen/X86/vector-fshl-512.ll b/llvm/test/CodeGen/X86/vector-fshl-512.ll index 09a29fdbaad4b..6e0cb76398df9 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-512.ll @@ -1184,8 +1184,7 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { ; AVX512F-NEXT: vpackuswb %ymm5, %ymm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1 ; AVX512F-NEXT: vporq %zmm1, %zmm2, %zmm1 -; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm2 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360] -; AVX512F-NEXT: vpternlogq $216, %zmm2, %zmm1, %zmm0 +; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: constant_funnnel_v64i8: @@ -1236,8 +1235,7 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { ; AVX512VL-NEXT: vpackuswb %ymm5, %ymm1, %ymm1 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1 ; AVX512VL-NEXT: vporq %zmm1, %zmm2, %zmm1 -; AVX512VL-NEXT: vpbroadcastq {{.*#+}} zmm2 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360] -; AVX512VL-NEXT: vpternlogq $216, %zmm2, %zmm1, %zmm0 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: constant_funnnel_v64i8: diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll index 23fbc5e707079..b7cc39a32d718 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll @@ -2651,9 +2651,8 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero ; AVX512VL-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0 ; AVX512VL-NEXT: vpord %zmm2, %zmm0, %zmm0 -; AVX512VL-NEXT: vpmovdb %zmm0, %xmm2 -; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm0 = [18446744073709551360,18446744073709551360] -; AVX512VL-NEXT: vpternlogq $202, %xmm1, %xmm2, %xmm0 +; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip){1to2}, %xmm1, %xmm0 ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll index bd5698bc63bef..bbeaed5cc725c 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll @@ -2083,9 +2083,8 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { ; AVX512VL-NEXT: vpmullw {{.*}}(%rip), %ymm2, %ymm2 ; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX512VL-NEXT: vpackuswb %ymm3, %ymm2, %ymm2 -; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm2 -; AVX512VL-NEXT: vpbroadcastq {{.*#+}} ymm0 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360] -; AVX512VL-NEXT: vpternlogq $202, %ymm1, %ymm2, %ymm0 +; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0 +; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip){1to4}, %ymm1, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: constant_funnnel_v32i8: diff --git a/llvm/test/CodeGen/X86/vector-fshr-512.ll b/llvm/test/CodeGen/X86/vector-fshr-512.ll index 3337ebe22fed7..c89782bc359cc 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-512.ll @@ -1171,9 +1171,8 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX512F-NEXT: vpackuswb %ymm4, %ymm3, %ymm3 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-NEXT: vporq %zmm2, %zmm0, %zmm2 -; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm0 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360] -; AVX512F-NEXT: vpternlogq $202, %zmm1, %zmm2, %zmm0 +; AVX512F-NEXT: vporq %zmm2, %zmm0, %zmm0 +; AVX512F-NEXT: vpternlogq $228, {{.*}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: constant_funnnel_v64i8: @@ -1223,9 +1222,8 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { ; AVX512VL-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX512VL-NEXT: vpackuswb %ymm4, %ymm3, %ymm3 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512VL-NEXT: vporq %zmm2, %zmm0, %zmm2 -; AVX512VL-NEXT: vpbroadcastq {{.*#+}} zmm0 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360] -; AVX512VL-NEXT: vpternlogq $202, %zmm1, %zmm2, %zmm0 +; AVX512VL-NEXT: vporq %zmm2, %zmm0, %zmm0 +; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: constant_funnnel_v64i8: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll index 1ab6f2cc45fc7..cb2dd3ef7e86d 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll @@ -337,11 +337,15 @@ define <32 x i16> @test_mm512_mask_blend_epi16(<32 x i16> %A, <32 x i16> %W){ ; SKX-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1} ; SKX-NEXT: ret{{[l|q]}} ; -; KNL-LABEL: test_mm512_mask_blend_epi16: -; KNL: # %bb.0: # %entry -; KNL-NEXT: vpbroadcastd {{.*#+}} zmm2 = [65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535] -; KNL-NEXT: vpternlogq $216, %zmm2, %zmm1, %zmm0 -; KNL-NEXT: ret{{[l|q]}} +; KNL64-LABEL: test_mm512_mask_blend_epi16: +; KNL64: # %bb.0: # %entry +; KNL64-NEXT: vpternlogd $216, {{.*}}(%rip){1to16}, %zmm1, %zmm0 +; KNL64-NEXT: retq +; +; KNL32-LABEL: test_mm512_mask_blend_epi16: +; KNL32: # %bb.0: # %entry +; KNL32-NEXT: vpternlogd $216, {{\.LCPI.*}}{1to16}, %zmm1, %zmm0 +; KNL32-NEXT: retl entry: %0 = shufflevector <32 x i16> %A, <32 x i16> %W, <32 x i32> ret <32 x i16> %0