diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index f816d7de27ee4..6230c17e20804 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -212,6 +212,11 @@ def VOP_I16_F16_SPECIAL_OMOD_fake16 : VOPProfile_Fake16 { } +def VOP_F64_F64_NO_DPP : VOPProfile <[f64, f64, untyped, untyped]> { + let HasExtVOP3DPP = 0; + let HasExt64BitDPP = 0; +} + //===----------------------------------------------------------------------===// // VOP1 Instructions //===----------------------------------------------------------------------===// @@ -344,9 +349,9 @@ defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, int_amdgcn_sqrt>; } // End TRANS = 1, SchedRW = [WriteTrans32] let TRANS = 1, SchedRW = [WriteTrans64] in { -defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>; -defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>; -defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, int_amdgcn_sqrt>; +defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64_NO_DPP, AMDGPUrcp>; +defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64_NO_DPP, AMDGPUrsq>; +defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64_NO_DPP, int_amdgcn_sqrt>; } // End TRANS = 1, SchedRW = [WriteTrans64] let TRANS = 1, SchedRW = [WriteTrans32] in { @@ -1025,6 +1030,11 @@ multiclass VOP1_Real_FULL_with_name op, string opName, multiclass VOP1_Real_NO_DPP op> : VOP1_Real_e32, VOP1_Real_e64; +multiclass VOP1_Real_with_DPP16 op> : + VOP1_Real_NO_DPP, + VOP1_Real_dpp, + VOP3_Real_dpp_Base; + multiclass VOP1_Real_FULL_t16_gfx11_gfx12 op, string asmName, string opName = NAME> : VOP1_Real_FULL_with_name, @@ -1057,6 +1067,11 @@ multiclass VOP1_Real_FULL_t16_and_fake16_gfx1250< VOP1_Real_FULL_with_name; } +multiclass VOP1_Real_FULL_with_name_gfx11_gfx12_not_gfx1250 op, string opName, + string asmName> : + VOP1_Real_FULL_with_name, + VOP1_Real_FULL_with_name; + multiclass VOP1_Real_OpSelIsDPP_gfx1250 op> : VOP1_Real_e32 { defvar ps = !cast(NAME#"_e64"); def _e64_gfx1250 : @@ -1064,10 +1079,10 @@ multiclass VOP1_Real_OpSelIsDPP_gfx1250 op> : VOP1_Real_e32; } -defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name; -defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name; +defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name_gfx11_gfx12_not_gfx1250<0x06c, "V_CVT_F32_FP8_OP_SEL", "v_cvt_f32_fp8">; +defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name; -defm V_CVT_F32_BF8 : VOP1_Real_FULL_with_name; +defm V_CVT_F32_BF8 : VOP1_Real_FULL_with_name; defm V_CVT_PK_F32_FP8_fake16 : VOP1_Real_e32_with_name; defm V_CVT_PK_F32_FP8_t16 : VOP1_Real_e32_with_name; @@ -1252,17 +1267,17 @@ let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in { multiclass VOP1_Real_gfx7 op> : VOP1_Real_e32_gfx7, VOP1_Real_e64_gfx7; -multiclass VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12 op> : +multiclass VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 op> : VOP1_Real_gfx7, VOP1_Real_gfx10, VOP1_Real_NO_DPP, - VOP1_Real_NO_DPP; + VOP1_Real_with_DPP16; defm V_LOG_LEGACY_F32 : VOP1_Real_gfx7<0x045>; defm V_EXP_LEGACY_F32 : VOP1_Real_gfx7<0x046>; -defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x017>; -defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x018>; -defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x019>; -defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x01a>; +defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x017>; +defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x018>; +defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x019>; +defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x01a>; //===----------------------------------------------------------------------===// // GFX6, GFX7, GFX10, GFX11, GFX12 @@ -1300,6 +1315,10 @@ multiclass VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12 op> : VOP1_Real_gfx6_gfx7_gfx10, VOP1_Real_NO_DPP, VOP1_Real_NO_DPP; +multiclass VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 op> : + VOP1_Real_gfx6_gfx7_gfx10, VOP1_Real_NO_DPP, + VOP1_Real_with_DPP16; + multiclass VOP1Only_Real_gfx6_gfx7_gfx10_gfx11_gfx12 op> : VOP1Only_Real_gfx6_gfx7, VOP1Only_Real_gfx10_gfx11_gfx12; @@ -1314,8 +1333,8 @@ defm V_RSQ_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x032>; defm V_NOP : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x000>; defm V_MOV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x001>; defm V_READFIRSTLANE_B32 : VOP1Only_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x002>; -defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x003>; -defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x004>; +defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x003>; +defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x004>; defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x005>; defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x006>; defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x007>; @@ -1325,14 +1344,14 @@ defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10<0x00b>; defm V_CVT_RPI_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00c>; defm V_CVT_FLR_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00d>; defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x00e>; -defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x00f>; -defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x010>; +defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x00f>; +defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x010>; defm V_CVT_F32_UBYTE0 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x011>; defm V_CVT_F32_UBYTE1 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x012>; defm V_CVT_F32_UBYTE2 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x013>; defm V_CVT_F32_UBYTE3 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x014>; -defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x015>; -defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x016>; +defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x015>; +defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x016>; defm V_FRACT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x020>; defm V_TRUNC_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x021>; defm V_CEIL_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x022>; @@ -1354,9 +1373,9 @@ defm V_BFREV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x038>; defm V_FFBH_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x039>; defm V_FFBL_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x03a>; defm V_FFBH_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x03b>; -defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x03c>; -defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x03d>; -defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x03e>; +defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x03c>; +defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x03d>; +defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x03e>; defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x03f>; defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x040>; defm V_CLREXCP : VOP1_Real_gfx6_gfx7_gfx10<0x041>; @@ -1410,7 +1429,9 @@ multiclass VOP1_Real_vi op> { if !cast(NAME#"_e32").Pfl.HasExtDPP then def _dpp_vi : VOP_DPP_Real(NAME#"_dpp"), SIEncodingFamily.VI>, - VOP1_DPPe(NAME#"_dpp")>; + VOP1_DPPe(NAME#"_dpp")> { + let AssemblerPredicate = isGFX8GFX9; + } } defm V_NOP : VOP1_Real_vi <0x0>; diff --git a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir index 84da231c95a62..8094dbaf418b8 100644 --- a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir +++ b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir @@ -1,5 +1,6 @@ # RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN # RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN +# RUN: llc -mtriple=amdgcn -mcpu=gfx1251 -run-pass=gcn-dpp-combine -o - %s | FileCheck %s --check-prefix=GCN --- # GCN-LABEL: name: dpp64_old_impdef diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.gfx1251.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.gfx1251.ll new file mode 100644 index 0000000000000..7a2f8faae9e89 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.gfx1251.ll @@ -0,0 +1,25 @@ +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN,GFX12 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1251 < %s | FileCheck -check-prefixes=GCN,GFX1251 %s + +; GCN-LABEL: {{^}}mov_dpp64_test: +; GCN: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 +; GCN: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 +define amdgpu_kernel void @mov_dpp64_test(ptr addrspace(1) %out, i64 %in1) { + %tmp0 = call i64 @llvm.amdgcn.mov.dpp.i64(i64 %in1, i32 1, i32 1, i32 1, i1 0) #0 + store i64 %tmp0, ptr addrspace(1) %out + ret void +} + +; GCN-LABEL: {{^}}mov_dpp64_row_share_test: +; GFX12-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_share:1 row_mask:0x1 bank_mask:0x1 +; GFX1251: v_mov_b64_dpp v[{{[0-9:]+}}], v[{{[0-9:]+}}] row_share:1 row_mask:0x1 bank_mask:0x1 +define amdgpu_kernel void @mov_dpp64_row_share_test(ptr addrspace(1) %out, i64 %in1) { + %tmp0 = call i64 @llvm.amdgcn.mov.dpp.i64(i64 %in1, i32 337, i32 1, i32 1, i1 0) #0 + store i64 %tmp0, ptr addrspace(1) %out + ret void +} + +declare i64 @llvm.amdgcn.mov.dpp.i64(i64, i32, i32, i32, i1) #0 + +attributes #0 = { nounwind readnone convergent } diff --git a/llvm/test/MC/AMDGPU/gfx1251_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1251_asm_vop1_dpp16.s new file mode 100644 index 0000000000000..bb1ccaf53ce32 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1251_asm_vop1_dpp16.s @@ -0,0 +1,98 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1251 -show-encoding %s | FileCheck --check-prefixes=GFX1251 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s + +v_mov_b64 v[4:5], v[2:3] row_share:0 row_mask:0xf bank_mask:0xf +// GFX1251: v_mov_b64_dpp v[4:5], v[2:3] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3a,0x08,0x7e,0x02,0x50,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_mov_b64 v[4:5], v[2:3] row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250-ERR-NEXT:{{^}} ^ + +v_mov_b64 v[4:5], v[2:3] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1251: v_mov_b64_dpp v[4:5], v[2:3] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x3a,0x08,0x7e,0x02,0x5f,0x01,0x01] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_mov_b64 v[4:5], v[2:3] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_mov_b64 v[254:255], v[254:255] row_share:3 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1251: v_mov_b64_dpp v[254:255], v[254:255] row_share:3 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x3a,0xfc,0x7f,0xfe,0x53,0x05,0x30] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_mov_b64 v[254:255], v[254:255] row_share:3 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_cvt_i32_f64 v2, v[4:5] row_share:1 +// GFX1251: v_cvt_i32_f64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x06,0x04,0x7e,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_cvt_i32_f64 v2, v[4:5] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_cvt_f64_i32 v[4:5], v2 row_share:1 +// GFX1251: v_cvt_f64_i32_dpp v[4:5], v2 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x7e,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_cvt_f64_i32 v[4:5], v2 row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_cvt_f32_f64 v2, v[4:5] row_share:1 +// GFX1251: v_cvt_f32_f64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x1e,0x04,0x7e,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_cvt_f32_f64 v2, v[4:5] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_cvt_f64_f32 v[4:5], v2 row_share:1 +// GFX1251: v_cvt_f64_f32_dpp v[4:5], v2 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x20,0x08,0x7e,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_cvt_f64_f32 v[4:5], v2 row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_cvt_u32_f64 v2, v[4:5] row_share:1 +// GFX1251: v_cvt_u32_f64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x2a,0x04,0x7e,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_cvt_u32_f64 v2, v[4:5] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_cvt_f64_u32 v[4:5], v2 row_share:1 +// GFX1251: v_cvt_f64_u32_dpp v[4:5], v2 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x2c,0x08,0x7e,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_cvt_f64_u32 v[4:5], v2 row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_trunc_f64 v[2:3], v[4:5] row_share:1 +// GFX1251: v_trunc_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x2e,0x04,0x7e,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_trunc_f64 v[2:3], v[4:5] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_ceil_f64 v[2:3], v[4:5] row_share:1 +// GFX1251: v_ceil_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x30,0x04,0x7e,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_ceil_f64 v[2:3], v[4:5] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_rndne_f64 v[2:3], v[4:5] row_share:1 +// GFX1251: v_rndne_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x32,0x04,0x7e,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_rndne_f64 v[2:3], v[4:5] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_floor_f64 v[2:3], v[4:5] row_share:1 +// GFX1251: v_floor_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x34,0x04,0x7e,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_floor_f64 v[2:3], v[4:5] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_frexp_exp_i32_f64 v2, v[4:5] row_share:1 +// GFX1251: v_frexp_exp_i32_f64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x78,0x04,0x7e,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_frexp_exp_i32_f64 v2, v[4:5] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_frexp_mant_f64 v[2:3], v[4:5] row_share:1 +// GFX1251: v_frexp_mant_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x7a,0x04,0x7e,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_frexp_mant_f64 v[2:3], v[4:5] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_fract_f64 v[2:3], v[4:5] row_share:1 +// GFX1251: v_fract_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x7c,0x04,0x7e,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_fract_f64 v[2:3], v[4:5] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ diff --git a/llvm/test/MC/AMDGPU/gfx1251_asm_vop1_err.s b/llvm/test/MC/AMDGPU/gfx1251_asm_vop1_err.s new file mode 100644 index 0000000000000..1d88e9cb59c8e --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1251_asm_vop1_err.s @@ -0,0 +1,156 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1251 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1251-ERR --implicit-check-not=error: --strict-whitespace %s + +v_mov_b64 v[4:5], v[2:3] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_mov_b64 v[4:5], v[2:3] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_i32_f64 v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_cvt_i32_f64 v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f64_i32 v[4:5], v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_cvt_f64_i32 v[4:5], v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f32_f64 v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_cvt_f32_f64 v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f64_f32 v[4:5], v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_cvt_f64_f32 v[4:5], v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_u32_f64 v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_cvt_u32_f64 v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f64_u32 v[4:5], v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_cvt_f64_u32 v[4:5], v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_trunc_f64 v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_trunc_f64 v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_ceil_f64 v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_ceil_f64 v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_rndne_f64 v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_rndne_f64 v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_floor_f64 v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_floor_f64 v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_frexp_exp_i32_f64 v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_frexp_exp_i32_f64 v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_frexp_mant_f64 v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_frexp_mant_f64 v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_fract_f64 v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_fract_f64 v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_mov_b64 v[4:5], v[2:3] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_mov_b64 v[4:5], v[2:3] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_i32_f64 v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_cvt_i32_f64 v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f64_i32 v[4:5], v2 quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_cvt_f64_i32 v[4:5], v2 quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f32_f64 v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_cvt_f32_f64 v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f64_f32 v[4:5], v2 quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_cvt_f64_f32 v[4:5], v2 quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_u32_f64 v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_cvt_u32_f64 v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f64_u32 v[4:5], v2 quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_cvt_f64_u32 v[4:5], v2 quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_trunc_f64 v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_trunc_f64 v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_ceil_f64 v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_ceil_f64 v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_rndne_f64 v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_rndne_f64 v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_floor_f64 v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_floor_f64 v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_frexp_exp_i32_f64 v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_frexp_exp_i32_f64 v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_frexp_mant_f64 v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_frexp_mant_f64 v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_fract_f64 v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_fract_f64 v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_rcp_f64 v[4:5], v[2:3] row_share:1 +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_rcp_f64 v[4:5], v[2:3] row_share:1 +// GFX1251-ERR-NEXT:{{^}} ^ + +v_rsq_f64 v[4:5], v[2:3] row_share:1 +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_rsq_f64 v[4:5], v[2:3] row_share:1 +// GFX1251-ERR-NEXT:{{^}} ^ + +v_sqrt_f64 v[4:5], v[2:3] row_share:1 +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_sqrt_f64 v[4:5], v[2:3] row_share:1 +// GFX1251-ERR-NEXT:{{^}} ^ diff --git a/llvm/test/MC/AMDGPU/gfx1251_err.s b/llvm/test/MC/AMDGPU/gfx1251_err.s new file mode 100644 index 0000000000000..d4db1bf9bb780 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1251_err.s @@ -0,0 +1,6 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1251 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX1251-ERR --implicit-check-not=error: -strict-whitespace %s + +v_mov_b64 v[4:5], v[2:3] quad_perm:[1,1,1,1] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR: v_mov_b64 v[4:5], v[2:3] quad_perm:[1,1,1,1] +// GFX1251-ERR: ^ diff --git a/llvm/test/MC/AMDGPU/gfx9-asm-err.s b/llvm/test/MC/AMDGPU/gfx9-asm-err.s index 31e0d953b5bd8..eb1d7b0b90772 100644 --- a/llvm/test/MC/AMDGPU/gfx9-asm-err.s +++ b/llvm/test/MC/AMDGPU/gfx9-asm-err.s @@ -31,7 +31,7 @@ v_subrev_u16_e64 v5, v1, -4.2 // GFX9ERR: :[[@LINE-1]]:{{[0-9]+}}: error: literal operands are not supported v_cvt_u32_f64 v5, v[0:1] quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf -// GFX9ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX9ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_lds_dword v[2:3], off // GFX9ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop1_dpp16.txt new file mode 100644 index 0000000000000..3380b77a27a5d --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop1_dpp16.txt @@ -0,0 +1,49 @@ +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1251 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX1251 %s + +# GFX1251: v_mov_b64_dpp v[254:255], v[254:255] row_share:3 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x3a,0xfc,0x7f,0xfe,0x53,0x05,0x30] +0xfa,0x3a,0xfc,0x7f,0xfe,0x53,0x05,0x30 + +# GFX1251: v_mov_b64_dpp v[4:5], v[2:3] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3a,0x08,0x7e,0x02,0x50,0x01,0xff] +0xfa,0x3a,0x08,0x7e,0x02,0x50,0x01,0xff + +# GFX1251: v_mov_b64_dpp v[4:5], v[2:3] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x3a,0x08,0x7e,0x02,0x5f,0x01,0x01] +0xfa,0x3a,0x08,0x7e,0x02,0x5f,0x01,0x01 + +# GFX1251: v_ceil_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x30,0x04,0x7e,0x04,0x51,0x01,0xff] +0xfa,0x30,0x04,0x7e,0x04,0x51,0x01,0xff + +# GFX1251: v_cvt_f32_f64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x1e,0x04,0x7e,0x04,0x51,0x01,0xff] +0xfa,0x1e,0x04,0x7e,0x04,0x51,0x01,0xff + +# GFX1251: v_cvt_f64_f32_dpp v[4:5], v2 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x20,0x08,0x7e,0x02,0x51,0x01,0xff] +0xfa,0x20,0x08,0x7e,0x02,0x51,0x01,0xff + +# GFX1251: v_cvt_f64_i32_dpp v[4:5], v2 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x7e,0x02,0x51,0x01,0xff] +0xfa,0x08,0x08,0x7e,0x02,0x51,0x01,0xff + +# GFX1251: v_cvt_f64_u32_dpp v[4:5], v2 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x2c,0x08,0x7e,0x02,0x51,0x01,0xff] +0xfa,0x2c,0x08,0x7e,0x02,0x51,0x01,0xff + +# GFX1251: v_cvt_i32_f64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x06,0x04,0x7e,0x04,0x51,0x01,0xff] +0xfa,0x06,0x04,0x7e,0x04,0x51,0x01,0xff + +# GFX1251: v_cvt_u32_f64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x2a,0x04,0x7e,0x04,0x51,0x01,0xff] +0xfa,0x2a,0x04,0x7e,0x04,0x51,0x01,0xff + +# GFX1251: v_floor_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x34,0x04,0x7e,0x04,0x51,0x01,0xff] +0xfa,0x34,0x04,0x7e,0x04,0x51,0x01,0xff + +# GFX1251: v_fract_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x7c,0x04,0x7e,0x04,0x51,0x01,0xff] +0xfa,0x7c,0x04,0x7e,0x04,0x51,0x01,0xff + +# GFX1251: v_frexp_exp_i32_f64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x78,0x04,0x7e,0x04,0x51,0x01,0xff] +0xfa,0x78,0x04,0x7e,0x04,0x51,0x01,0xff + +# GFX1251: v_frexp_mant_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x7a,0x04,0x7e,0x04,0x51,0x01,0xff] +0xfa,0x7a,0x04,0x7e,0x04,0x51,0x01,0xff + +# GFX1251: v_rndne_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x32,0x04,0x7e,0x04,0x51,0x01,0xff] +0xfa,0x32,0x04,0x7e,0x04,0x51,0x01,0xff + +# GFX1251: v_trunc_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x2e,0x04,0x7e,0x04,0x51,0x01,0xff] +0xfa,0x2e,0x04,0x7e,0x04,0x51,0x01,0xff