diff --git a/llvm/include/llvm/IR/CMakeLists.txt b/llvm/include/llvm/IR/CMakeLists.txt index c8edc29bd887b5..0498fc269b634a 100644 --- a/llvm/include/llvm/IR/CMakeLists.txt +++ b/llvm/include/llvm/IR/CMakeLists.txt @@ -18,4 +18,5 @@ tablegen(LLVM IntrinsicsS390.h -gen-intrinsic-enums -intrinsic-prefix=s390) tablegen(LLVM IntrinsicsWebAssembly.h -gen-intrinsic-enums -intrinsic-prefix=wasm) tablegen(LLVM IntrinsicsX86.h -gen-intrinsic-enums -intrinsic-prefix=x86) tablegen(LLVM IntrinsicsXCore.h -gen-intrinsic-enums -intrinsic-prefix=xcore) +tablegen(LLVM IntrinsicsVE.h -gen-intrinsic-enums -intrinsic-prefix=ve) add_public_tablegen_target(intrinsics_gen) diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index e0f3d67a62ddb4..81e0340b042953 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1649,3 +1649,4 @@ include "llvm/IR/IntrinsicsBPF.td" include "llvm/IR/IntrinsicsSystemZ.td" include "llvm/IR/IntrinsicsWebAssembly.td" include "llvm/IR/IntrinsicsRISCV.td" +include "llvm/IR/IntrinsicsVE.td" diff --git a/llvm/include/llvm/IR/IntrinsicsVE.td b/llvm/include/llvm/IR/IntrinsicsVE.td new file mode 100644 index 00000000000000..1cb7a2e1eaf416 --- /dev/null +++ b/llvm/include/llvm/IR/IntrinsicsVE.td @@ -0,0 +1,4 @@ +// Define intrinsics written by hand + +// Define intrinsics automatically generated +include "llvm/IR/IntrinsicsVEVL.gen.td" diff --git a/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td b/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td new file mode 100644 index 00000000000000..c4002a2a3b6228 --- /dev/null +++ b/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td @@ -0,0 +1,32 @@ +let TargetPrefix = "ve" in def int_ve_vl_vld_vssl : GCCBuiltin<"__builtin_ve_vl_vld_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vld_vssvl : GCCBuiltin<"__builtin_ve_vl_vld_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldnc_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldnc_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldu_vssl : GCCBuiltin<"__builtin_ve_vl_vldu_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldu_vssvl : GCCBuiltin<"__builtin_ve_vl_vldu_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldunc_vssl : GCCBuiltin<"__builtin_ve_vl_vldunc_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldunc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldunc_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldlsx_vssl : GCCBuiltin<"__builtin_ve_vl_vldlsx_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldlsx_vssvl : GCCBuiltin<"__builtin_ve_vl_vldlsx_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldlsxnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldlsxnc_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldlsxnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldlsxnc_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldlzx_vssl : GCCBuiltin<"__builtin_ve_vl_vldlzx_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldlzx_vssvl : GCCBuiltin<"__builtin_ve_vl_vldlzx_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldlzxnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldlzxnc_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldlzxnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldlzxnc_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vld2d_vssl : GCCBuiltin<"__builtin_ve_vl_vld2d_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vld2d_vssvl : GCCBuiltin<"__builtin_ve_vl_vld2d_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vld2dnc_vssl : GCCBuiltin<"__builtin_ve_vl_vld2dnc_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vld2dnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vld2dnc_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldu2d_vssl : GCCBuiltin<"__builtin_ve_vl_vldu2d_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldu2d_vssvl : GCCBuiltin<"__builtin_ve_vl_vldu2d_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldu2dnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldu2dnc_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldu2dnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldu2dnc_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldl2dsx_vssl : GCCBuiltin<"__builtin_ve_vl_vldl2dsx_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldl2dsx_vssvl : GCCBuiltin<"__builtin_ve_vl_vldl2dsx_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldl2dsxnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldl2dsxnc_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldl2dsxnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldl2dsxnc_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldl2dzx_vssl : GCCBuiltin<"__builtin_ve_vl_vldl2dzx_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldl2dzx_vssvl : GCCBuiltin<"__builtin_ve_vl_vldl2dzx_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldl2dzxnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldl2dzxnc_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldl2dzxnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldl2dzxnc_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index 8c360dd1841edb..a316c7c9c06825 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -43,6 +43,7 @@ #include "llvm/IR/IntrinsicsR600.h" #include "llvm/IR/IntrinsicsRISCV.h" #include "llvm/IR/IntrinsicsS390.h" +#include "llvm/IR/IntrinsicsVE.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/IntrinsicsXCore.h" diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td index acfa680223c255..1a15058cf6c4b0 100644 --- a/llvm/lib/Target/VE/VEInstrInfo.td +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -2221,3 +2221,6 @@ def : Pat<(i64 (and i64:$val, 0xffffffff)), // Vector instructions. include "VEInstrVec.td" + +// The vevlintrin +include "VEInstrIntrinsicVL.td" diff --git a/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td b/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td new file mode 100644 index 00000000000000..fbb35027156168 --- /dev/null +++ b/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td @@ -0,0 +1,64 @@ +def : Pat<(int_ve_vl_vld_vssl i64:$sy, i64:$sz, i32:$vl), (VLDrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vld_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vld_vssl simm7:$I, i64:$sz, i32:$vl), (VLDirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vld_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDNCrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDNCirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldu_vssl i64:$sy, i64:$sz, i32:$vl), (VLDUrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldu_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDUrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldu_vssl simm7:$I, i64:$sz, i32:$vl), (VLDUirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldu_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDUirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldunc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDUNCrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldunc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDUNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldunc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDUNCirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldunc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDUNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldlsx_vssl i64:$sy, i64:$sz, i32:$vl), (VLDLSXrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldlsx_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDLSXrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldlsx_vssl simm7:$I, i64:$sz, i32:$vl), (VLDLSXirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldlsx_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDLSXirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldlsxnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDLSXNCrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldlsxnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDLSXNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldlsxnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDLSXNCirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldlsxnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDLSXNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldlzx_vssl i64:$sy, i64:$sz, i32:$vl), (VLDLZXrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldlzx_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDLZXrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldlzx_vssl simm7:$I, i64:$sz, i32:$vl), (VLDLZXirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldlzx_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDLZXirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldlzxnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDLZXNCrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldlzxnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDLZXNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldlzxnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDLZXNCirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldlzxnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDLZXNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vld2d_vssl i64:$sy, i64:$sz, i32:$vl), (VLD2Drrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vld2d_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLD2Drrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vld2d_vssl simm7:$I, i64:$sz, i32:$vl), (VLD2Dirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vld2d_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLD2Dirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vld2dnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLD2DNCrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vld2dnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLD2DNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vld2dnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLD2DNCirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vld2dnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLD2DNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldu2d_vssl i64:$sy, i64:$sz, i32:$vl), (VLDU2Drrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldu2d_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDU2Drrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldu2d_vssl simm7:$I, i64:$sz, i32:$vl), (VLDU2Dirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldu2d_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDU2Dirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldu2dnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDU2DNCrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldu2dnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDU2DNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldu2dnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDU2DNCirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldu2dnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDU2DNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldl2dsx_vssl i64:$sy, i64:$sz, i32:$vl), (VLDL2DSXrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldl2dsx_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DSXrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldl2dsx_vssl simm7:$I, i64:$sz, i32:$vl), (VLDL2DSXirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldl2dsx_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DSXirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldl2dsxnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDL2DSXNCrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldl2dsxnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DSXNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldl2dsxnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDL2DSXNCirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldl2dsxnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DSXNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldl2dzx_vssl i64:$sy, i64:$sz, i32:$vl), (VLDL2DZXrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldl2dzx_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DZXrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldl2dzx_vssl simm7:$I, i64:$sz, i32:$vl), (VLDL2DZXirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldl2dzx_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DZXirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldl2dzxnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDL2DZXNCrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldl2dzxnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DZXNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldl2dzxnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDL2DZXNCirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldl2dzxnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DZXNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; diff --git a/llvm/lib/Target/VE/VEInstrIntrinsicVL.td b/llvm/lib/Target/VE/VEInstrIntrinsicVL.td new file mode 100644 index 00000000000000..c8d253ef65ff92 --- /dev/null +++ b/llvm/lib/Target/VE/VEInstrIntrinsicVL.td @@ -0,0 +1,6 @@ +// Pattern Matchings for VEL Intrinsics + +// Define intrinsics written by hand + +// Define intrinsics automatically generated +include "VEInstrIntrinsicVL.gen.td" diff --git a/llvm/test/CodeGen/VE/VELIntrinsics/lit.local.cfg b/llvm/test/CodeGen/VE/VELIntrinsics/lit.local.cfg new file mode 100644 index 00000000000000..b6366779272dff --- /dev/null +++ b/llvm/test/CodeGen/VE/VELIntrinsics/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'VE' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/CodeGen/VE/VELIntrinsics/vld.ll b/llvm/test/CodeGen/VE/VELIntrinsics/vld.ll new file mode 100644 index 00000000000000..c57ec433803846 --- /dev/null +++ b/llvm/test/CodeGen/VE/VELIntrinsics/vld.ll @@ -0,0 +1,1126 @@ +; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s + +;;; Test vector load intrinsic instructions +;;; +;;; Note: +;;; We test VLD*rrl, VLD*irl, VLD*rrl_v, VLD*irl_v + +; Function Attrs: nounwind +define void @vld_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vld_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vld.vssl(i64, i8*, i32) + +; Function Attrs: nounwind +define void @vld_vssvl(i8* %0, i64 %1, i8* %2) { +; CHECK-LABEL: vld_vssvl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s3, 256 +; CHECK-NEXT: vld %v0, %s1, %s2 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %4 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %2, i32 256) + %5 = tail call fast <256 x double> @llvm.ve.vl.vld.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vld.vssvl(i64, i8*, <256 x double>, i32) + +; Function Attrs: nounwind +define void @vld_vssl_imm(i8* %0) { +; CHECK-LABEL: vld_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vld_vssvl_imm(i8* %0, i8* %1) { +; CHECK-LABEL: vld_vssvl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, 8, %s1 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %1, i32 256) + %4 = tail call fast <256 x double> @llvm.ve.vl.vld.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vldnc_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vldnc_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld.nc %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vldnc.vssl(i64 %1, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vldnc.vssl(i64, i8*, i32) + +; Function Attrs: nounwind +define void @vldnc_vssvl(i8* %0, i64 %1, i8* %2) { +; CHECK-LABEL: vldnc_vssvl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s3, 256 +; CHECK-NEXT: vld.nc %v0, %s1, %s2 +; CHECK-NEXT: vld.nc %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %4 = tail call fast <256 x double> @llvm.ve.vl.vldnc.vssl(i64 %1, i8* %2, i32 256) + %5 = tail call fast <256 x double> @llvm.ve.vl.vldnc.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vldnc.vssvl(i64, i8*, <256 x double>, i32) + +; Function Attrs: nounwind +define void @vldnc_vssl_imm(i8* %0) { +; CHECK-LABEL: vldnc_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld.nc %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vldnc.vssl(i64 8, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vldnc_vssvl_imm(i8* %0, i8* %1) { +; CHECK-LABEL: vldnc_vssvl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld.nc %v0, 8, %s1 +; CHECK-NEXT: vld.nc %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vldnc.vssl(i64 8, i8* %1, i32 256) + %4 = tail call fast <256 x double> @llvm.ve.vl.vldnc.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vldu_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vldu_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vldu %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vldu.vssl(i64 %1, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vldu.vssl(i64, i8*, i32) + +; Function Attrs: nounwind +define void @vldu_vssvl(i8* %0, i64 %1, i8* %2) { +; CHECK-LABEL: vldu_vssvl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s3, 256 +; CHECK-NEXT: vldu %v0, %s1, %s2 +; CHECK-NEXT: vldu %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %4 = tail call fast <256 x double> @llvm.ve.vl.vldu.vssl(i64 %1, i8* %2, i32 256) + %5 = tail call fast <256 x double> @llvm.ve.vl.vldu.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vldu.vssvl(i64, i8*, <256 x double>, i32) + +; Function Attrs: nounwind +define void @vldu_vssl_imm(i8* %0) { +; CHECK-LABEL: vldu_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vldu %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vldu.vssl(i64 8, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vldu_vssvl_imm(i8* %0, i8* %1) { +; CHECK-LABEL: vldu_vssvl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vldu %v0, 8, %s1 +; CHECK-NEXT: vldu %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vldu.vssl(i64 8, i8* %1, i32 256) + %4 = tail call fast <256 x double> @llvm.ve.vl.vldu.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vldunc_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vldunc_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vldu.nc %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vldunc.vssl(i64 %1, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vldunc.vssl(i64, i8*, i32) + +; Function Attrs: nounwind +define void @vldunc_vssvl(i8* %0, i64 %1, i8* %2) { +; CHECK-LABEL: vldunc_vssvl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s3, 256 +; CHECK-NEXT: vldu.nc %v0, %s1, %s2 +; CHECK-NEXT: vldu.nc %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %4 = tail call fast <256 x double> @llvm.ve.vl.vldunc.vssl(i64 %1, i8* %2, i32 256) + %5 = tail call fast <256 x double> @llvm.ve.vl.vldunc.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vldunc.vssvl(i64, i8*, <256 x double>, i32) + +; Function Attrs: nounwind +define void @vldunc_vssl_imm(i8* %0) { +; CHECK-LABEL: vldunc_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vldu.nc %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vldunc.vssl(i64 8, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vldunc_vssvl_imm(i8* %0, i8* %1) { +; CHECK-LABEL: vldunc_vssvl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vldu.nc %v0, 8, %s1 +; CHECK-NEXT: vldu.nc %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vldunc.vssl(i64 8, i8* %1, i32 256) + %4 = tail call fast <256 x double> @llvm.ve.vl.vldunc.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vldlsx_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vldlsx_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vldl.sx %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vldlsx.vssl(i64 %1, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vldlsx.vssl(i64, i8*, i32) + +; Function Attrs: nounwind +define void @vldlsx_vssvl(i8* %0, i64 %1, i8* %2) { +; CHECK-LABEL: vldlsx_vssvl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s3, 256 +; CHECK-NEXT: vldl.sx %v0, %s1, %s2 +; CHECK-NEXT: vldl.sx %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %4 = tail call fast <256 x double> @llvm.ve.vl.vldlsx.vssl(i64 %1, i8* %2, i32 256) + %5 = tail call fast <256 x double> @llvm.ve.vl.vldlsx.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vldlsx.vssvl(i64, i8*, <256 x double>, i32) + +; Function Attrs: nounwind +define void @vldlsx_vssl_imm(i8* %0) { +; CHECK-LABEL: vldlsx_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vldl.sx %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vldlsx.vssl(i64 8, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vldlsx_vssvl_imm(i8* %0, i8* %1) { +; CHECK-LABEL: vldlsx_vssvl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vldl.sx %v0, 8, %s1 +; CHECK-NEXT: vldl.sx %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vldlsx.vssl(i64 8, i8* %1, i32 256) + %4 = tail call fast <256 x double> @llvm.ve.vl.vldlsx.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vldlsxnc_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vldlsxnc_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vldl.sx.nc %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vldlsxnc.vssl(i64 %1, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vldlsxnc.vssl(i64, i8*, i32) + +; Function Attrs: nounwind +define void @vldlsxnc_vssvl(i8* %0, i64 %1, i8* %2) { +; CHECK-LABEL: vldlsxnc_vssvl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s3, 256 +; CHECK-NEXT: vldl.sx.nc %v0, %s1, %s2 +; CHECK-NEXT: vldl.sx.nc %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %4 = tail call fast <256 x double> @llvm.ve.vl.vldlsxnc.vssl(i64 %1, i8* %2, i32 256) + %5 = tail call fast <256 x double> @llvm.ve.vl.vldlsxnc.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vldlsxnc.vssvl(i64, i8*, <256 x double>, i32) + +; Function Attrs: nounwind +define void @vldlsxnc_vssl_imm(i8* %0) { +; CHECK-LABEL: vldlsxnc_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vldl.sx.nc %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vldlsxnc.vssl(i64 8, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vldlsxnc_vssvl_imm(i8* %0, i8* %1) { +; CHECK-LABEL: vldlsxnc_vssvl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vldl.sx.nc %v0, 8, %s1 +; CHECK-NEXT: vldl.sx.nc %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vldlsxnc.vssl(i64 8, i8* %1, i32 256) + %4 = tail call fast <256 x double> @llvm.ve.vl.vldlsxnc.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vldlzx_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vldlzx_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vldl.zx %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vldlzx.vssl(i64 %1, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vldlzx.vssl(i64, i8*, i32) + +; Function Attrs: nounwind +define void @vldlzx_vssvl(i8* %0, i64 %1, i8* %2) { +; CHECK-LABEL: vldlzx_vssvl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s3, 256 +; CHECK-NEXT: vldl.zx %v0, %s1, %s2 +; CHECK-NEXT: vldl.zx %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %4 = tail call fast <256 x double> @llvm.ve.vl.vldlzx.vssl(i64 %1, i8* %2, i32 256) + %5 = tail call fast <256 x double> @llvm.ve.vl.vldlzx.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vldlzx.vssvl(i64, i8*, <256 x double>, i32) + +; Function Attrs: nounwind +define void @vldlzx_vssl_imm(i8* %0) { +; CHECK-LABEL: vldlzx_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vldl.zx %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vldlzx.vssl(i64 8, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vldlzx_vssvl_imm(i8* %0, i8* %1) { +; CHECK-LABEL: vldlzx_vssvl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vldl.zx %v0, 8, %s1 +; CHECK-NEXT: vldl.zx %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vldlzx.vssl(i64 8, i8* %1, i32 256) + %4 = tail call fast <256 x double> @llvm.ve.vl.vldlzx.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vldlzxnc_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vldlzxnc_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vldl.zx.nc %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vldlzxnc.vssl(i64 %1, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vldlzxnc.vssl(i64, i8*, i32) + +; Function Attrs: nounwind +define void @vldlzxnc_vssvl(i8* %0, i64 %1, i8* %2) { +; CHECK-LABEL: vldlzxnc_vssvl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s3, 256 +; CHECK-NEXT: vldl.zx.nc %v0, %s1, %s2 +; CHECK-NEXT: vldl.zx.nc %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %4 = tail call fast <256 x double> @llvm.ve.vl.vldlzxnc.vssl(i64 %1, i8* %2, i32 256) + %5 = tail call fast <256 x double> @llvm.ve.vl.vldlzxnc.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vldlzxnc.vssvl(i64, i8*, <256 x double>, i32) + +; Function Attrs: nounwind +define void @vldlzxnc_vssl_imm(i8* %0) { +; CHECK-LABEL: vldlzxnc_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vldl.zx.nc %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vldlzxnc.vssl(i64 8, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vldlzxnc_vssvl_imm(i8* %0, i8* %1) { +; CHECK-LABEL: vldlzxnc_vssvl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vldl.zx.nc %v0, 8, %s1 +; CHECK-NEXT: vldl.zx.nc %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vldlzxnc.vssl(i64 8, i8* %1, i32 256) + %4 = tail call fast <256 x double> @llvm.ve.vl.vldlzxnc.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vld2d_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vld2d_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld2d %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld2d.vssl(i64 %1, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vld2d.vssl(i64, i8*, i32) + +; Function Attrs: nounwind +define void @vld2d_vssvl(i8* %0, i64 %1, i8* %2) { +; CHECK-LABEL: vld2d_vssvl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s3, 256 +; CHECK-NEXT: vld2d %v0, %s1, %s2 +; CHECK-NEXT: vld2d %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %4 = tail call fast <256 x double> @llvm.ve.vl.vld2d.vssl(i64 %1, i8* %2, i32 256) + %5 = tail call fast <256 x double> @llvm.ve.vl.vld2d.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vld2d.vssvl(i64, i8*, <256 x double>, i32) + +; Function Attrs: nounwind +define void @vld2d_vssl_imm(i8* %0) { +; CHECK-LABEL: vld2d_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld2d %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld2d.vssl(i64 8, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vld2d_vssvl_imm(i8* %0, i8* %1) { +; CHECK-LABEL: vld2d_vssvl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld2d %v0, 8, %s1 +; CHECK-NEXT: vld2d %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld2d.vssl(i64 8, i8* %1, i32 256) + %4 = tail call fast <256 x double> @llvm.ve.vl.vld2d.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vld2dnc_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vld2dnc_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld2d.nc %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld2dnc.vssl(i64 %1, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vld2dnc.vssl(i64, i8*, i32) + +; Function Attrs: nounwind +define void @vld2dnc_vssvl(i8* %0, i64 %1, i8* %2) { +; CHECK-LABEL: vld2dnc_vssvl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s3, 256 +; CHECK-NEXT: vld2d.nc %v0, %s1, %s2 +; CHECK-NEXT: vld2d.nc %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %4 = tail call fast <256 x double> @llvm.ve.vl.vld2dnc.vssl(i64 %1, i8* %2, i32 256) + %5 = tail call fast <256 x double> @llvm.ve.vl.vld2dnc.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vld2dnc.vssvl(i64, i8*, <256 x double>, i32) + +; Function Attrs: nounwind +define void @vld2dnc_vssl_imm(i8* %0) { +; CHECK-LABEL: vld2dnc_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld2d.nc %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld2dnc.vssl(i64 8, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vld2dnc_vssvl_imm(i8* %0, i8* %1) { +; CHECK-LABEL: vld2dnc_vssvl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld2d.nc %v0, 8, %s1 +; CHECK-NEXT: vld2d.nc %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld2dnc.vssl(i64 8, i8* %1, i32 256) + %4 = tail call fast <256 x double> @llvm.ve.vl.vld2dnc.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vldu2d_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vldu2d_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vldu2d %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vldu2d.vssl(i64 %1, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vldu2d.vssl(i64, i8*, i32) + +; Function Attrs: nounwind +define void @vldu2d_vssvl(i8* %0, i64 %1, i8* %2) { +; CHECK-LABEL: vldu2d_vssvl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s3, 256 +; CHECK-NEXT: vldu2d %v0, %s1, %s2 +; CHECK-NEXT: vldu2d %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %4 = tail call fast <256 x double> @llvm.ve.vl.vldu2d.vssl(i64 %1, i8* %2, i32 256) + %5 = tail call fast <256 x double> @llvm.ve.vl.vldu2d.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vldu2d.vssvl(i64, i8*, <256 x double>, i32) + +; Function Attrs: nounwind +define void @vldu2d_vssl_imm(i8* %0) { +; CHECK-LABEL: vldu2d_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vldu2d %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vldu2d.vssl(i64 8, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vldu2d_vssvl_imm(i8* %0, i8* %1) { +; CHECK-LABEL: vldu2d_vssvl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vldu2d %v0, 8, %s1 +; CHECK-NEXT: vldu2d %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vldu2d.vssl(i64 8, i8* %1, i32 256) + %4 = tail call fast <256 x double> @llvm.ve.vl.vldu2d.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vldu2dnc_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vldu2dnc_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vldu2d.nc %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vldu2dnc.vssl(i64 %1, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vldu2dnc.vssl(i64, i8*, i32) + +; Function Attrs: nounwind +define void @vldu2dnc_vssvl(i8* %0, i64 %1, i8* %2) { +; CHECK-LABEL: vldu2dnc_vssvl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s3, 256 +; CHECK-NEXT: vldu2d.nc %v0, %s1, %s2 +; CHECK-NEXT: vldu2d.nc %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %4 = tail call fast <256 x double> @llvm.ve.vl.vldu2dnc.vssl(i64 %1, i8* %2, i32 256) + %5 = tail call fast <256 x double> @llvm.ve.vl.vldu2dnc.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vldu2dnc.vssvl(i64, i8*, <256 x double>, i32) + +; Function Attrs: nounwind +define void @vldu2dnc_vssl_imm(i8* %0) { +; CHECK-LABEL: vldu2dnc_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vldu2d.nc %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vldu2dnc.vssl(i64 8, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vldu2dnc_vssvl_imm(i8* %0, i8* %1) { +; CHECK-LABEL: vldu2dnc_vssvl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vldu2d.nc %v0, 8, %s1 +; CHECK-NEXT: vldu2d.nc %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vldu2dnc.vssl(i64 8, i8* %1, i32 256) + %4 = tail call fast <256 x double> @llvm.ve.vl.vldu2dnc.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vldl2dsx_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vldl2dsx_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vldl2d.sx %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vldl2dsx.vssl(i64 %1, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vldl2dsx.vssl(i64, i8*, i32) + +; Function Attrs: nounwind +define void @vldl2dsx_vssvl(i8* %0, i64 %1, i8* %2) { +; CHECK-LABEL: vldl2dsx_vssvl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s3, 256 +; CHECK-NEXT: vldl2d.sx %v0, %s1, %s2 +; CHECK-NEXT: vldl2d.sx %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %4 = tail call fast <256 x double> @llvm.ve.vl.vldl2dsx.vssl(i64 %1, i8* %2, i32 256) + %5 = tail call fast <256 x double> @llvm.ve.vl.vldl2dsx.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vldl2dsx.vssvl(i64, i8*, <256 x double>, i32) + +; Function Attrs: nounwind +define void @vldl2dsx_vssl_imm(i8* %0) { +; CHECK-LABEL: vldl2dsx_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vldl2d.sx %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vldl2dsx.vssl(i64 8, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vldl2dsx_vssvl_imm(i8* %0, i8* %1) { +; CHECK-LABEL: vldl2dsx_vssvl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vldl2d.sx %v0, 8, %s1 +; CHECK-NEXT: vldl2d.sx %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vldl2dsx.vssl(i64 8, i8* %1, i32 256) + %4 = tail call fast <256 x double> @llvm.ve.vl.vldl2dsx.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vldl2dsxnc_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vldl2dsxnc_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vldl2d.sx.nc %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vldl2dsxnc.vssl(i64 %1, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vldl2dsxnc.vssl(i64, i8*, i32) + +; Function Attrs: nounwind +define void @vldl2dsxnc_vssvl(i8* %0, i64 %1, i8* %2) { +; CHECK-LABEL: vldl2dsxnc_vssvl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s3, 256 +; CHECK-NEXT: vldl2d.sx.nc %v0, %s1, %s2 +; CHECK-NEXT: vldl2d.sx.nc %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %4 = tail call fast <256 x double> @llvm.ve.vl.vldl2dsxnc.vssl(i64 %1, i8* %2, i32 256) + %5 = tail call fast <256 x double> @llvm.ve.vl.vldl2dsxnc.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vldl2dsxnc.vssvl(i64, i8*, <256 x double>, i32) + +; Function Attrs: nounwind +define void @vldl2dsxnc_vssl_imm(i8* %0) { +; CHECK-LABEL: vldl2dsxnc_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vldl2d.sx.nc %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vldl2dsxnc.vssl(i64 8, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vldl2dsxnc_vssvl_imm(i8* %0, i8* %1) { +; CHECK-LABEL: vldl2dsxnc_vssvl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vldl2d.sx.nc %v0, 8, %s1 +; CHECK-NEXT: vldl2d.sx.nc %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vldl2dsxnc.vssl(i64 8, i8* %1, i32 256) + %4 = tail call fast <256 x double> @llvm.ve.vl.vldl2dsxnc.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vldl2dzx_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vldl2dzx_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vldl2d.zx %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vldl2dzx.vssl(i64 %1, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vldl2dzx.vssl(i64, i8*, i32) + +; Function Attrs: nounwind +define void @vldl2dzx_vssvl(i8* %0, i64 %1, i8* %2) { +; CHECK-LABEL: vldl2dzx_vssvl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s3, 256 +; CHECK-NEXT: vldl2d.zx %v0, %s1, %s2 +; CHECK-NEXT: vldl2d.zx %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %4 = tail call fast <256 x double> @llvm.ve.vl.vldl2dzx.vssl(i64 %1, i8* %2, i32 256) + %5 = tail call fast <256 x double> @llvm.ve.vl.vldl2dzx.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vldl2dzx.vssvl(i64, i8*, <256 x double>, i32) + +; Function Attrs: nounwind +define void @vldl2dzx_vssl_imm(i8* %0) { +; CHECK-LABEL: vldl2dzx_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vldl2d.zx %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vldl2dzx.vssl(i64 8, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vldl2dzx_vssvl_imm(i8* %0, i8* %1) { +; CHECK-LABEL: vldl2dzx_vssvl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vldl2d.zx %v0, 8, %s1 +; CHECK-NEXT: vldl2d.zx %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vldl2dzx.vssl(i64 8, i8* %1, i32 256) + %4 = tail call fast <256 x double> @llvm.ve.vl.vldl2dzx.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vldl2dzxnc_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vldl2dzxnc_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vldl2d.zx.nc %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vldl2dzxnc.vssl(i64 %1, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %3, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vldl2dzxnc.vssl(i64, i8*, i32) + +; Function Attrs: nounwind +define void @vldl2dzxnc_vssvl(i8* %0, i64 %1, i8* %2) { +; CHECK-LABEL: vldl2dzxnc_vssvl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s3, 256 +; CHECK-NEXT: vldl2d.zx.nc %v0, %s1, %s2 +; CHECK-NEXT: vldl2d.zx.nc %v0, %s1, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %4 = tail call fast <256 x double> @llvm.ve.vl.vldl2dzxnc.vssl(i64 %1, i8* %2, i32 256) + %5 = tail call fast <256 x double> @llvm.ve.vl.vldl2dzxnc.vssvl(i64 %1, i8* %0, <256 x double> %4, i32 256) + tail call void asm sideeffect "vst $0, $1, $2", "v,r,r"(<256 x double> %5, i64 %1, i8* %0) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vldl2dzxnc.vssvl(i64, i8*, <256 x double>, i32) + +; Function Attrs: nounwind +define void @vldl2dzxnc_vssl_imm(i8* %0) { +; CHECK-LABEL: vldl2dzxnc_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vldl2d.zx.nc %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vldl2dzxnc.vssl(i64 8, i8* %0, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %2, i8* %0) + ret void +} + +; Function Attrs: nounwind +define void @vldl2dzxnc_vssvl_imm(i8* %0, i8* %1) { +; CHECK-LABEL: vldl2dzxnc_vssvl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vldl2d.zx.nc %v0, 8, %s1 +; CHECK-NEXT: vldl2d.zx.nc %v0, 8, %s0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vldl2dzxnc.vssl(i64 8, i8* %1, i32 256) + %4 = tail call fast <256 x double> @llvm.ve.vl.vldl2dzxnc.vssvl(i64 8, i8* %0, <256 x double> %3, i32 256) + tail call void asm sideeffect "vst $0, 8, $1", "v,r"(<256 x double> %4, i8* %0) + ret void +}