From eccc49197174bfb20a26c9cad573df37614ed629 Mon Sep 17 00:00:00 2001 From: shore <372660931@qq.com> Date: Thu, 13 Nov 2025 10:18:26 +0800 Subject: [PATCH 01/12] Adding instruction specific features --- llvm/lib/Target/AMDGPU/AMDGPU.td | 108 +++++++++++++++++-- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 21 ++++ llvm/lib/Target/AMDGPU/VOP1Instructions.td | 14 +-- llvm/lib/Target/AMDGPU/VOP2Instructions.td | 2 +- llvm/lib/Target/AMDGPU/VOP3Instructions.td | 22 ++-- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll | 1 - 6 files changed, 142 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index b008354cfd462..fe2a192f0f372 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -901,6 +901,48 @@ def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst", "Has v_pk_fmac_f16 instruction" >; +def FeatureVCUBEInsts : SubtargetFeature<"V_CUBE-Insts", + "HasVCUBEInsts", + "true", + "Has V_CUBE* instructions" +>; + +def FeatureVLERPInsts : SubtargetFeature<"V_LERP-insts", + "HasVLERPInsts", + "true", + "Has V_LERP* instructions" +>; + +def FeatureVSADInsts : SubtargetFeature<"V_SAD-insts", + "HasVSADInsts", + "true", + "Has V_SAD* instructions" +>; + +def FeatureVQSADInsts : SubtargetFeature<"V_QSAD-insts", + "HasVQSADInsts", + "true", + "Has V_QSAD* instructions" +>; + +def FeatureVCVTNORMInsts : SubtargetFeature<"V_CVT_NORM-insts", + "HasVCVTNORMInsts", + "true", + "Has V_CVT_NORM* instructions" +>; + +def FeatureVCVTPKNORMVOP2Insts : SubtargetFeature<"V_CVT_PKNORM-VOP2-insts", + "HasVCVTPKNORMVOP2Insts", + "true", + "Has V_CVT_NORM* VOP2 instructions" +>; + +def FeatureVCVTPKNORMVOP3Insts : SubtargetFeature<"V_CVT_PKNORM-VOP3-insts", + "HasVCVTPKNORMVOP3Insts", + "true", + "Has V_CVT_NORM* VOP3 instructions" +>; + def FeatureAtomicDsPkAdd16Insts : SubtargetFeature<"atomic-ds-pk-add-16-insts", "HasAtomicDsPkAdd16Insts", "true", @@ -1494,7 +1536,8 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS", FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureVmemWriteVgprInOrder + FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts, + FeatureVSADInsts, FeatureVCVTPKNORMVOP2Insts ] >; @@ -1508,7 +1551,8 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS", FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts, - FeatureVmemWriteVgprInOrder + FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts, + FeatureVSADInsts, FeatureVQSADInsts, FeatureVCVTPKNORMVOP2Insts ] >; @@ -1524,7 +1568,9 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32, FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS, - FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder + FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, + FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts, + FeatureVCVTPKNORMVOP2Insts ] >; @@ -1543,7 +1589,10 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9", FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK, FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess, FeatureUnalignedDSAccess, FeatureNegativeScratchOffsetBug, FeatureGWS, - FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad + FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad, + FeatureVCUBEInsts, FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts, + FeatureVCVTNORMInsts, FeatureVCVTPKNORMVOP2Insts, + FeatureVCVTPKNORMVOP3Insts ] >; @@ -1567,7 +1616,10 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10", FeatureDefaultComponentZero, FeatureMaxHardClauseLength63, FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts, - FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad + FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad, FeatureVCUBEInsts, + FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts, + FeatureVCVTNORMInsts, FeatureVCVTPKNORMVOP2Insts, + FeatureVCVTPKNORMVOP3Insts ] >; @@ -1590,7 +1642,9 @@ def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11", FeatureUnalignedDSAccess, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, FeatureMaxHardClauseLength32, FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, - FeatureVmemWriteVgprInOrder + FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts, + FeatureVSADInsts, FeatureVQSADInsts, FeatureVCVTNORMInsts, + FeatureVCVTPKNORMVOP2Insts, FeatureVCVTPKNORMVOP3Insts ] >; @@ -2069,10 +2123,17 @@ def FeatureISAVersion12 : FeatureSet< FeatureMemoryAtomicFAddF32DenormalSupport, FeatureBVHDualAndBVH8Insts, FeatureWaitsBeforeSystemScopeStores, - FeatureD16Writes32BitVgpr + FeatureD16Writes32BitVgpr, + FeatureVCUBEInsts, + FeatureVLERPInsts, + FeatureVSADInsts, + FeatureVQSADInsts, + FeatureVCVTNORMInsts, + FeatureVCVTPKNORMVOP2Insts, + FeatureVCVTPKNORMVOP3Insts ]>; -def FeatureISAVersion12_50 : FeatureSet< +def FeatureISAVersion12_50_Common : FeatureSet< [FeatureGFX12, FeatureGFX1250Insts, FeatureRequiresAlignedVGPRs, @@ -2147,6 +2208,16 @@ def FeatureISAVersion12_50 : FeatureSet< FeatureD16Writes32BitVgpr, ]>; +def FeatureISAVersion12_50 : FeatureSet< + !listconcat(FeatureISAVersion12_50_Common.Features, + [FeatureVCUBEInsts, + FeatureVLERPInsts, + FeatureVSADInsts, + FeatureVQSADInsts, + FeatureVCVTNORMInsts, + FeatureVCVTPKNORMVOP2Insts, + FeatureVCVTPKNORMVOP3Insts])>; + def FeatureISAVersion12_51 : FeatureSet< !listconcat(FeatureISAVersion12_50.Features, [FeatureDPALU_DPP])>; @@ -2816,6 +2887,27 @@ def HasFP8Insts : Predicate<"Subtarget->hasFP8Insts()">, def HasFP8ConversionInsts : Predicate<"Subtarget->hasFP8ConversionInsts()">, AssemblerPredicate<(all_of FeatureFP8ConversionInsts)>; +def HasVCUBEInsts : Predicate<"Subtarget->hasVCUBEInsts()">, + AssemblerPredicate<(all_of FeatureVCUBEInsts)>; + +def HasVLERPInsts : Predicate<"Subtarget->hasVLERPInsts()">, + AssemblerPredicate<(all_of FeatureVLERPInsts)>; + +def HasVSADInsts : Predicate<"Subtarget->hasVSADInsts()">, + AssemblerPredicate<(all_of FeatureVSADInsts)>; + +def HasVQSADInsts : Predicate<"Subtarget->hasVQSADInsts()">, + AssemblerPredicate<(all_of FeatureVQSADInsts)>; + +def HasVCVTNORMInsts : Predicate<"Subtarget->hasVCVTNORMInsts()">, + AssemblerPredicate<(all_of FeatureVCVTNORMInsts)>; + +def HasVCVTPKNORMVOP2Insts : Predicate<"Subtarget->hasVCVTPKNORMVOP2Insts()">, + AssemblerPredicate<(all_of FeatureVCVTPKNORMVOP2Insts)>; + +def HasVCVTPKNORMVOP3Insts : Predicate<"Subtarget->hasVCVTPKNORMVOP3Insts()">, + AssemblerPredicate<(all_of FeatureVCVTPKNORMVOP3Insts)>; + def HasFP8E5M3Insts : Predicate<"Subtarget->hasFP8E5M3Insts()">, AssemblerPredicate<(all_of FeatureFP8E5M3Insts)>; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index f377b8aaf1333..862cee468b7d3 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -166,6 +166,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasMAIInsts = false; bool HasFP8Insts = false; bool HasFP8ConversionInsts = false; + bool HasVCUBEInsts = false; + bool HasVLERPInsts = false; + bool HasVSADInsts = false; + bool HasVQSADInsts = false; + bool HasVCVTNORMInsts = false; + bool HasVCVTPKNORMVOP2Insts = false; + bool HasVCVTPKNORMVOP3Insts = false; bool HasFP8E5M3Insts = false; bool HasCvtFP8Vop1Bug = false; bool HasPkFmacF16Inst = false; @@ -892,6 +899,20 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasFP8ConversionInsts() const { return HasFP8ConversionInsts; } + bool hasVCUBEInsts() const { return HasVCUBEInsts; } + + bool hasVLERPInsts() const { return HasVLERPInsts; } + + bool hasVSADInsts() const { return HasVSADInsts; } + + bool hasVQSADInsts() const { return HasVQSADInsts; } + + bool hasVCVTNORMInsts() const { return HasVCVTNORMInsts; } + + bool hasVCVTPKNORMVOP2Insts() const { return HasVCVTPKNORMVOP2Insts; } + + bool hasVCVTPKNORMVOP3Insts() const { return HasVCVTPKNORMVOP3Insts; } + bool hasFP8E5M3Insts() const { return HasFP8E5M3Insts; } bool hasPkFmacF16Inst() const { diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 85adcab55b742..23095ba17cae8 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -616,15 +616,15 @@ let SubtargetPredicate = isGFX9Plus in { let isReMaterializable = 1 in defm V_SAT_PK_U8_I16 : VOP1Inst_t16<"v_sat_pk_u8_i16", VOP_I16_I32>; - - let mayRaiseFPException = 0 in { - defm V_CVT_NORM_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_i16_f16", - VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>; - defm V_CVT_NORM_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_u16_f16", - VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>; - } // End mayRaiseFPException = 0 } // End SubtargetPredicate = isGFX9Plus +let mayRaiseFPException = 0, SubtargetPredicate = HasVCVTNORMInsts in { +defm V_CVT_NORM_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_i16_f16", + VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>; +defm V_CVT_NORM_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_u16_f16", + VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>; +} // End mayRaiseFPException = 0, SubtargetPredicate = HasVCVTNORMInsts + let SubtargetPredicate = isGFX9Only in { defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>; } // End SubtargetPredicate = isGFX9Only diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index d87d250a034f0..afd2d610b17de 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -971,7 +971,7 @@ defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_a } // End IsNeverUniform = 1 defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, any_fldexp>; -let ReadsModeReg = 0, mayRaiseFPException = 0 in { +let ReadsModeReg = 0, mayRaiseFPException = 0, SubtargetPredicate = HasVCVTPKNORMVOP2Insts in { defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_i16_f32>; defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_u16_f32>; } diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 05ba76ab489d8..3d82866c1e5a7 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -185,7 +185,8 @@ defm V_FMA_LEGACY_F32 : VOP3Inst <"v_fma_legacy_f32", defm V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile>; defm V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile>; defm V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile, any_fma>, VOPD_Component<0x13, "v_fma_f32">; -defm V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile, int_amdgcn_lerp>; +let SubtargetPredicate = HasVLERPInsts in + defm V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile, int_amdgcn_lerp>; let SchedRW = [WriteIntMul] in { let SubtargetPredicate = HasMadU32Inst in @@ -258,12 +259,12 @@ defm V_DIV_FMAS_F64 : VOP3Inst <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC>; } // End isCommutable = 1 let isReMaterializable = 1 in { -let mayRaiseFPException = 0 in { +let mayRaiseFPException = 0, SubtargetPredicate = HasVCUBEInsts in { defm V_CUBEID_F32 : VOP3Inst <"v_cubeid_f32", VOP3_Profile, int_amdgcn_cubeid>; defm V_CUBESC_F32 : VOP3Inst <"v_cubesc_f32", VOP3_Profile, int_amdgcn_cubesc>; defm V_CUBETC_F32 : VOP3Inst <"v_cubetc_f32", VOP3_Profile, int_amdgcn_cubetc>; defm V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile, int_amdgcn_cubema>; -} // End mayRaiseFPException +} // mayRaiseFPException = 0, SubtargetPredicate = HasVCUBEInsts defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile, AMDGPUbfe_u32>; defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile, AMDGPUbfe_i32>; @@ -306,12 +307,12 @@ let SubtargetPredicate = HasMinimum3Maximum3F32, ReadsModeReg = 0 in { defm V_MAXIMUM3_F32 : VOP3Inst <"v_maximum3_f32", VOP3_Profile, AMDGPUfmaximum3>; } // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 -let isCommutable = 1 in { +let isCommutable = 1, SubtargetPredicate = HasVSADInsts in { defm V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile>; defm V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile>; defm V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3_Profile>; defm V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3_Profile>; -} // End isCommutable = 1 +} // End isCommutable = 1, SubtargetPredicate = HasVSADInsts defm V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile, int_amdgcn_cvt_pk_u8_f32>; defm V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", DIV_FIXUP_F32_PROF, AMDGPUdiv_fixup>; @@ -424,7 +425,8 @@ def VOPProfileMQSAD : VOP3_Profile { let SubtargetPredicate = isGFX7Plus in { let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in { -defm V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile>; +let SubtargetPredicate = HasVQSADInsts in + defm V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile>; defm V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOPProfileMQSAD>; } // End Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] } // End SubtargetPredicate = isGFX7Plus @@ -789,9 +791,6 @@ let isCommutable = 1 in { defm V_MAD_I32_I16 : VOP3Inst_t16 <"v_mad_i32_i16", VOP_I32_I16_I16_I32>; } // End isCommutable = 1 -defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>; -defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>; - defm V_PACK_B32_F16 : VOP3Inst_t16 <"v_pack_b32_f16", VOP_B32_F16_F16>; let isReMaterializable = 1 in { @@ -996,6 +995,11 @@ def : GCNPat<(DivergentBinFrag (or_oneuse i64:$src0, i64:$src1), i64:$src2), } // End SubtargetPredicate = isGFX9Plus +let SubtargetPredicate = HasVCVTPKNORMVOP3Insts in { + defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>; + defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>; +} // end SubtargetPredicate = HasVCVTPKNORMVOP3Insts + // FIXME: Probably should hardcode clamp bit in pseudo and avoid this. class OpSelBinOpClampPat : GCNPat< diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll index 43c69baaf3e7f..49169eec072b6 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s ; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GCN %s declare i32 @llvm.amdgcn.lerp(i32, i32, i32) #0 From 33276544f6cd6a96f783bf66dfee81d26a3e8b96 Mon Sep 17 00:00:00 2001 From: shore <372660931@qq.com> Date: Fri, 14 Nov 2025 09:45:23 +0800 Subject: [PATCH 02/12] fix comments --- llvm/lib/Target/AMDGPU/AMDGPU.td | 124 ++++++++++----------- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 28 ++--- llvm/lib/Target/AMDGPU/VOP1Instructions.td | 4 +- llvm/lib/Target/AMDGPU/VOP2Instructions.td | 2 +- llvm/lib/Target/AMDGPU/VOP3Instructions.td | 16 +-- 5 files changed, 87 insertions(+), 87 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index fe2a192f0f372..c5d63e5000767 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -901,46 +901,46 @@ def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst", "Has v_pk_fmac_f16 instruction" >; -def FeatureVCUBEInsts : SubtargetFeature<"V_CUBE-Insts", - "HasVCUBEInsts", +def FeatureCubeInsts : SubtargetFeature<"V_CUBE-Insts", + "HasCubeInsts", "true", "Has V_CUBE* instructions" >; -def FeatureVLERPInsts : SubtargetFeature<"V_LERP-insts", - "HasVLERPInsts", +def FeatureLerpInst : SubtargetFeature<"V_LERP-insts", + "HasLerpInst", "true", - "Has V_LERP* instructions" + "Has v_lerp_u8 instruction" >; -def FeatureVSADInsts : SubtargetFeature<"V_SAD-insts", - "HasVSADInsts", +def FeatureSadInsts : SubtargetFeature<"V_SAD-insts", + "HasSadInsts", "true", "Has V_SAD* instructions" >; -def FeatureVQSADInsts : SubtargetFeature<"V_QSAD-insts", - "HasVQSADInsts", +def FeatureQsadInsts : SubtargetFeature<"V_QSAD-insts", + "HasQsadInsts", "true", "Has V_QSAD* instructions" >; -def FeatureVCVTNORMInsts : SubtargetFeature<"V_CVT_NORM-insts", - "HasVCVTNORMInsts", +def FeatureCvtNormInsts : SubtargetFeature<"V_CVT_NORM-insts", + "HasCvtNormInsts", "true", "Has V_CVT_NORM* instructions" >; -def FeatureVCVTPKNORMVOP2Insts : SubtargetFeature<"V_CVT_PKNORM-VOP2-insts", - "HasVCVTPKNORMVOP2Insts", +def FeatureCvtPkNormVOP2Insts : SubtargetFeature<"V_CVT_PKNORM-VOP2-insts", + "HasCvtPkNormVOP2Insts", "true", - "Has V_CVT_NORM* VOP2 instructions" + "Has V_CVT_PK_NORM_*_F32 instructions/Has V_CVT_PK_NORM_*_F16 instructions" >; -def FeatureVCVTPKNORMVOP3Insts : SubtargetFeature<"V_CVT_PKNORM-VOP3-insts", - "HasVCVTPKNORMVOP3Insts", +def FeatureCvtPkNormVOP3Insts : SubtargetFeature<"V_CVT_PKNORM-VOP3-insts", + "HasCvtPkNormVOP3Insts", "true", - "Has V_CVT_NORM* VOP3 instructions" + "Has V_CVT_PK_NORM_*_F32 instructions/Has V_CVT_PK_NORM_*_F16 instructions" >; def FeatureAtomicDsPkAdd16Insts : SubtargetFeature<"atomic-ds-pk-add-16-insts", @@ -1536,8 +1536,8 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS", FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts, - FeatureVSADInsts, FeatureVCVTPKNORMVOP2Insts + FeatureVmemWriteVgprInOrder, FeatureCubeInsts, FeatureLerpInst, + FeatureSadInsts, FeatureCvtPkNormVOP2Insts ] >; @@ -1551,8 +1551,8 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS", FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts, - FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts, - FeatureVSADInsts, FeatureVQSADInsts, FeatureVCVTPKNORMVOP2Insts + FeatureVmemWriteVgprInOrder, FeatureCubeInsts, FeatureLerpInst, + FeatureSadInsts, FeatureQsadInsts, FeatureCvtPkNormVOP2Insts ] >; @@ -1568,9 +1568,9 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32, FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS, - FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, - FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts, - FeatureVCVTPKNORMVOP2Insts + FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder, FeatureCubeInsts, + FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts, + FeatureCvtPkNormVOP2Insts ] >; @@ -1590,9 +1590,9 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9", FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess, FeatureUnalignedDSAccess, FeatureNegativeScratchOffsetBug, FeatureGWS, FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad, - FeatureVCUBEInsts, FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts, - FeatureVCVTNORMInsts, FeatureVCVTPKNORMVOP2Insts, - FeatureVCVTPKNORMVOP3Insts + FeatureCubeInsts, FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts, + FeatureCvtNormInsts, FeatureCvtPkNormVOP2Insts, + FeatureCvtPkNormVOP3Insts ] >; @@ -1616,10 +1616,10 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10", FeatureDefaultComponentZero, FeatureMaxHardClauseLength63, FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts, - FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad, FeatureVCUBEInsts, - FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts, - FeatureVCVTNORMInsts, FeatureVCVTPKNORMVOP2Insts, - FeatureVCVTPKNORMVOP3Insts + FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad, FeatureCubeInsts, + FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts, + FeatureCvtNormInsts, FeatureCvtPkNormVOP2Insts, + FeatureCvtPkNormVOP3Insts ] >; @@ -1642,9 +1642,9 @@ def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11", FeatureUnalignedDSAccess, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, FeatureMaxHardClauseLength32, FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, - FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts, - FeatureVSADInsts, FeatureVQSADInsts, FeatureVCVTNORMInsts, - FeatureVCVTPKNORMVOP2Insts, FeatureVCVTPKNORMVOP3Insts + FeatureVmemWriteVgprInOrder, FeatureCubeInsts, FeatureLerpInst, + FeatureSadInsts, FeatureQsadInsts, FeatureCvtNormInsts, + FeatureCvtPkNormVOP2Insts, FeatureCvtPkNormVOP3Insts ] >; @@ -2124,13 +2124,13 @@ def FeatureISAVersion12 : FeatureSet< FeatureBVHDualAndBVH8Insts, FeatureWaitsBeforeSystemScopeStores, FeatureD16Writes32BitVgpr, - FeatureVCUBEInsts, - FeatureVLERPInsts, - FeatureVSADInsts, - FeatureVQSADInsts, - FeatureVCVTNORMInsts, - FeatureVCVTPKNORMVOP2Insts, - FeatureVCVTPKNORMVOP3Insts + FeatureCubeInsts, + FeatureLerpInst, + FeatureSadInsts, + FeatureQsadInsts, + FeatureCvtNormInsts, + FeatureCvtPkNormVOP2Insts, + FeatureCvtPkNormVOP3Insts ]>; def FeatureISAVersion12_50_Common : FeatureSet< @@ -2210,13 +2210,13 @@ def FeatureISAVersion12_50_Common : FeatureSet< def FeatureISAVersion12_50 : FeatureSet< !listconcat(FeatureISAVersion12_50_Common.Features, - [FeatureVCUBEInsts, - FeatureVLERPInsts, - FeatureVSADInsts, - FeatureVQSADInsts, - FeatureVCVTNORMInsts, - FeatureVCVTPKNORMVOP2Insts, - FeatureVCVTPKNORMVOP3Insts])>; + [FeatureCubeInsts, + FeatureLerpInst, + FeatureSadInsts, + FeatureQsadInsts, + FeatureCvtNormInsts, + FeatureCvtPkNormVOP2Insts, + FeatureCvtPkNormVOP3Insts])>; def FeatureISAVersion12_51 : FeatureSet< !listconcat(FeatureISAVersion12_50.Features, @@ -2887,26 +2887,26 @@ def HasFP8Insts : Predicate<"Subtarget->hasFP8Insts()">, def HasFP8ConversionInsts : Predicate<"Subtarget->hasFP8ConversionInsts()">, AssemblerPredicate<(all_of FeatureFP8ConversionInsts)>; -def HasVCUBEInsts : Predicate<"Subtarget->hasVCUBEInsts()">, - AssemblerPredicate<(all_of FeatureVCUBEInsts)>; +def HasCubeInsts : Predicate<"Subtarget->hasCubeInsts()">, + AssemblerPredicate<(all_of FeatureCubeInsts)>; -def HasVLERPInsts : Predicate<"Subtarget->hasVLERPInsts()">, - AssemblerPredicate<(all_of FeatureVLERPInsts)>; +def HasLerpInst : Predicate<"Subtarget->hasLerpInst()">, + AssemblerPredicate<(all_of FeatureLerpInst)>; -def HasVSADInsts : Predicate<"Subtarget->hasVSADInsts()">, - AssemblerPredicate<(all_of FeatureVSADInsts)>; +def HasSadInsts : Predicate<"Subtarget->hasSadInsts()">, + AssemblerPredicate<(all_of FeatureSadInsts)>; -def HasVQSADInsts : Predicate<"Subtarget->hasVQSADInsts()">, - AssemblerPredicate<(all_of FeatureVQSADInsts)>; +def HasQsadInsts : Predicate<"Subtarget->hasQsadInsts()">, + AssemblerPredicate<(all_of FeatureQsadInsts)>; -def HasVCVTNORMInsts : Predicate<"Subtarget->hasVCVTNORMInsts()">, - AssemblerPredicate<(all_of FeatureVCVTNORMInsts)>; +def HasCvtNormInsts : Predicate<"Subtarget->hasCvtNormInsts()">, + AssemblerPredicate<(all_of FeatureCvtNormInsts)>; -def HasVCVTPKNORMVOP2Insts : Predicate<"Subtarget->hasVCVTPKNORMVOP2Insts()">, - AssemblerPredicate<(all_of FeatureVCVTPKNORMVOP2Insts)>; +def HasCvtPkNormVOP2Insts : Predicate<"Subtarget->hasCvtPkNormVOP2Insts()">, + AssemblerPredicate<(all_of FeatureCvtPkNormVOP2Insts)>; -def HasVCVTPKNORMVOP3Insts : Predicate<"Subtarget->hasVCVTPKNORMVOP3Insts()">, - AssemblerPredicate<(all_of FeatureVCVTPKNORMVOP3Insts)>; +def HasCvtPkNormVOP3Insts : Predicate<"Subtarget->hasCvtPkNormVOP3Insts()">, + AssemblerPredicate<(all_of FeatureCvtPkNormVOP3Insts)>; def HasFP8E5M3Insts : Predicate<"Subtarget->hasFP8E5M3Insts()">, AssemblerPredicate<(all_of FeatureFP8E5M3Insts)>; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 862cee468b7d3..85260c4f123c7 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -166,13 +166,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasMAIInsts = false; bool HasFP8Insts = false; bool HasFP8ConversionInsts = false; - bool HasVCUBEInsts = false; - bool HasVLERPInsts = false; - bool HasVSADInsts = false; - bool HasVQSADInsts = false; - bool HasVCVTNORMInsts = false; - bool HasVCVTPKNORMVOP2Insts = false; - bool HasVCVTPKNORMVOP3Insts = false; + bool HasCubeInsts = false; + bool HasLerpInst = false; + bool HasSadInsts = false; + bool HasQsadInsts = false; + bool HasCvtNormInsts = false; + bool HasCvtPkNormVOP2Insts = false; + bool HasCvtPkNormVOP3Insts = false; bool HasFP8E5M3Insts = false; bool HasCvtFP8Vop1Bug = false; bool HasPkFmacF16Inst = false; @@ -899,19 +899,19 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasFP8ConversionInsts() const { return HasFP8ConversionInsts; } - bool hasVCUBEInsts() const { return HasVCUBEInsts; } + bool hasCubeInsts() const { return HasCubeInsts; } - bool hasVLERPInsts() const { return HasVLERPInsts; } + bool hasLerpInst() const { return HasLerpInst; } - bool hasVSADInsts() const { return HasVSADInsts; } + bool hasSadInsts() const { return HasSadInsts; } - bool hasVQSADInsts() const { return HasVQSADInsts; } + bool hasQsadInsts() const { return HasQsadInsts; } - bool hasVCVTNORMInsts() const { return HasVCVTNORMInsts; } + bool hasCvtNormInsts() const { return HasCvtNormInsts; } - bool hasVCVTPKNORMVOP2Insts() const { return HasVCVTPKNORMVOP2Insts; } + bool hasCvtPkNormVOP2Insts() const { return HasCvtPkNormVOP2Insts; } - bool hasVCVTPKNORMVOP3Insts() const { return HasVCVTPKNORMVOP3Insts; } + bool hasCvtPkNormVOP3Insts() const { return HasCvtPkNormVOP3Insts; } bool hasFP8E5M3Insts() const { return HasFP8E5M3Insts; } diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 23095ba17cae8..1d1e95908fce6 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -618,12 +618,12 @@ let SubtargetPredicate = isGFX9Plus in { defm V_SAT_PK_U8_I16 : VOP1Inst_t16<"v_sat_pk_u8_i16", VOP_I16_I32>; } // End SubtargetPredicate = isGFX9Plus -let mayRaiseFPException = 0, SubtargetPredicate = HasVCVTNORMInsts in { +let mayRaiseFPException = 0, SubtargetPredicate = HasCvtNormInsts in { defm V_CVT_NORM_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_i16_f16", VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>; defm V_CVT_NORM_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_u16_f16", VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>; -} // End mayRaiseFPException = 0, SubtargetPredicate = HasVCVTNORMInsts +} // End mayRaiseFPException = 0, SubtargetPredicate = HasCvtNormInsts let SubtargetPredicate = isGFX9Only in { defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>; diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index afd2d610b17de..dbb7862ab4ab5 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -971,7 +971,7 @@ defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_a } // End IsNeverUniform = 1 defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, any_fldexp>; -let ReadsModeReg = 0, mayRaiseFPException = 0, SubtargetPredicate = HasVCVTPKNORMVOP2Insts in { +let ReadsModeReg = 0, mayRaiseFPException = 0, SubtargetPredicate = HasCvtPkNormVOP2Insts in { defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_i16_f32>; defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_u16_f32>; } diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 3d82866c1e5a7..872bde501cd2d 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -185,7 +185,7 @@ defm V_FMA_LEGACY_F32 : VOP3Inst <"v_fma_legacy_f32", defm V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile>; defm V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile>; defm V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile, any_fma>, VOPD_Component<0x13, "v_fma_f32">; -let SubtargetPredicate = HasVLERPInsts in +let SubtargetPredicate = HasLerpInst in defm V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile, int_amdgcn_lerp>; let SchedRW = [WriteIntMul] in { @@ -259,12 +259,12 @@ defm V_DIV_FMAS_F64 : VOP3Inst <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC>; } // End isCommutable = 1 let isReMaterializable = 1 in { -let mayRaiseFPException = 0, SubtargetPredicate = HasVCUBEInsts in { +let mayRaiseFPException = 0, SubtargetPredicate = HasCubeInsts in { defm V_CUBEID_F32 : VOP3Inst <"v_cubeid_f32", VOP3_Profile, int_amdgcn_cubeid>; defm V_CUBESC_F32 : VOP3Inst <"v_cubesc_f32", VOP3_Profile, int_amdgcn_cubesc>; defm V_CUBETC_F32 : VOP3Inst <"v_cubetc_f32", VOP3_Profile, int_amdgcn_cubetc>; defm V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile, int_amdgcn_cubema>; -} // mayRaiseFPException = 0, SubtargetPredicate = HasVCUBEInsts +} // mayRaiseFPException = 0, SubtargetPredicate = HasCubeInsts defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile, AMDGPUbfe_u32>; defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile, AMDGPUbfe_i32>; @@ -307,12 +307,12 @@ let SubtargetPredicate = HasMinimum3Maximum3F32, ReadsModeReg = 0 in { defm V_MAXIMUM3_F32 : VOP3Inst <"v_maximum3_f32", VOP3_Profile, AMDGPUfmaximum3>; } // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 -let isCommutable = 1, SubtargetPredicate = HasVSADInsts in { +let isCommutable = 1, SubtargetPredicate = HasSadInsts in { defm V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile>; defm V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile>; defm V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3_Profile>; defm V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3_Profile>; -} // End isCommutable = 1, SubtargetPredicate = HasVSADInsts +} // End isCommutable = 1, SubtargetPredicate = HasSadInsts defm V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile, int_amdgcn_cvt_pk_u8_f32>; defm V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", DIV_FIXUP_F32_PROF, AMDGPUdiv_fixup>; @@ -425,7 +425,7 @@ def VOPProfileMQSAD : VOP3_Profile { let SubtargetPredicate = isGFX7Plus in { let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in { -let SubtargetPredicate = HasVQSADInsts in +let SubtargetPredicate = HasQsadInsts in defm V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile>; defm V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOPProfileMQSAD>; } // End Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] @@ -995,10 +995,10 @@ def : GCNPat<(DivergentBinFrag (or_oneuse i64:$src0, i64:$src1), i64:$src2), } // End SubtargetPredicate = isGFX9Plus -let SubtargetPredicate = HasVCVTPKNORMVOP3Insts in { +let SubtargetPredicate = HasCvtPkNormVOP3Insts in { defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>; defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>; -} // end SubtargetPredicate = HasVCVTPKNORMVOP3Insts +} // end SubtargetPredicate = HasCvtPkNormVOP3Insts // FIXME: Probably should hardcode clamp bit in pseudo and avoid this. class OpSelBinOpClampPat Date: Tue, 18 Nov 2025 10:59:53 +0800 Subject: [PATCH 03/12] add feature to builtins --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 22 ++++++++++---------- llvm/lib/Target/AMDGPU/AMDGPU.td | 4 ++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 2b6fcb1fd479b..0dfa9c13792cf 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -122,13 +122,13 @@ BUILTIN(__builtin_amdgcn_frexp_exp, "id", "nc") BUILTIN(__builtin_amdgcn_frexp_expf, "if", "nc") BUILTIN(__builtin_amdgcn_fract, "dd", "nc") BUILTIN(__builtin_amdgcn_fractf, "ff", "nc") -BUILTIN(__builtin_amdgcn_lerp, "UiUiUiUi", "nc") +TARGET_BUILTIN(__builtin_amdgcn_lerp, "UiUiUiUi", "nc", "HasLerpInst") BUILTIN(__builtin_amdgcn_class, "bdi", "nc") BUILTIN(__builtin_amdgcn_classf, "bfi", "nc") -BUILTIN(__builtin_amdgcn_cubeid, "ffff", "nc") -BUILTIN(__builtin_amdgcn_cubesc, "ffff", "nc") -BUILTIN(__builtin_amdgcn_cubetc, "ffff", "nc") -BUILTIN(__builtin_amdgcn_cubema, "ffff", "nc") +TARGET_BUILTIN(__builtin_amdgcn_cubeid, "ffff", "nc", "HasCubeInsts") +TARGET_BUILTIN(__builtin_amdgcn_cubesc, "ffff", "nc", "HasCubeInsts") +TARGET_BUILTIN(__builtin_amdgcn_cubetc, "ffff", "nc", "HasCubeInsts") +TARGET_BUILTIN(__builtin_amdgcn_cubema, "ffff", "nc", "HasCubeInsts") BUILTIN(__builtin_amdgcn_s_sleep, "vIi", "n") BUILTIN(__builtin_amdgcn_s_incperflevel, "vIi", "n") BUILTIN(__builtin_amdgcn_s_decperflevel, "vIi", "n") @@ -149,17 +149,17 @@ BUILTIN(__builtin_amdgcn_alignbyte, "UiUiUiUi", "nc") BUILTIN(__builtin_amdgcn_ubfe, "UiUiUiUi", "nc") BUILTIN(__builtin_amdgcn_sbfe, "UiUiUiUi", "nc") BUILTIN(__builtin_amdgcn_cvt_pkrtz, "E2hff", "nc") -BUILTIN(__builtin_amdgcn_cvt_pknorm_i16, "E2sff", "nc") -BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc") +TARGET_BUILTIN(__builtin_amdgcn_cvt_pknorm_i16, "E2sff", "nc", "HasCvtPkNormVOP2Insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc", "HasCvtPkNormVOP2Insts") BUILTIN(__builtin_amdgcn_cvt_pk_i16, "E2sii", "nc") BUILTIN(__builtin_amdgcn_cvt_pk_u16, "E2UsUiUi", "nc") BUILTIN(__builtin_amdgcn_cvt_pk_u8_f32, "UifUiUi", "nc") BUILTIN(__builtin_amdgcn_cvt_off_f32_i4, "fi", "nc") -BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc") +TARGET_BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc", "HasSadInsts") BUILTIN(__builtin_amdgcn_msad_u8, "UiUiUiUi", "nc") -BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc") -BUILTIN(__builtin_amdgcn_sad_u16, "UiUiUiUi", "nc") -BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "WUiWUiUiWUi", "nc") +TARGET_BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc", "HasSadInsts") +TARGET_BUILTIN(__builtin_amdgcn_sad_u16, "UiUiUiUi", "nc", "HasSadInsts") +TARGET_BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "WUiWUiUiWUi", "nc", "HasQsadInsts") BUILTIN(__builtin_amdgcn_mqsad_pk_u16_u8, "WUiWUiUiWUi", "nc") BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiWUiUiV4Ui", "nc") diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index c5d63e5000767..cd8327563d9d6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -934,13 +934,13 @@ def FeatureCvtNormInsts : SubtargetFeature<"V_CVT_NORM-insts", def FeatureCvtPkNormVOP2Insts : SubtargetFeature<"V_CVT_PKNORM-VOP2-insts", "HasCvtPkNormVOP2Insts", "true", - "Has V_CVT_PK_NORM_*_F32 instructions/Has V_CVT_PK_NORM_*_F16 instructions" + "Has V_CVT_PK_NORM_* instructions/Has V_CVT_PK_NORM_*_F16 instructions" >; def FeatureCvtPkNormVOP3Insts : SubtargetFeature<"V_CVT_PKNORM-VOP3-insts", "HasCvtPkNormVOP3Insts", "true", - "Has V_CVT_PK_NORM_*_F32 instructions/Has V_CVT_PK_NORM_*_F16 instructions" + "Has V_CVT_PK_NORM_* instructions/Has V_CVT_PK_NORM_*_F16 instructions" >; def FeatureAtomicDsPkAdd16Insts : SubtargetFeature<"atomic-ds-pk-add-16-insts", From 15e6eece5e39bd0c8fb1d1eb6e36151b0f45fdb7 Mon Sep 17 00:00:00 2001 From: shore <372660931@qq.com> Date: Tue, 18 Nov 2025 13:17:14 +0800 Subject: [PATCH 04/12] fix builtin features --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 22 ++--- .../CodeGenOpenCL/builtins-amdgcn-fiji.cl | 86 +++++++++++++++++++ clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 67 --------------- llvm/lib/Target/AMDGPU/AMDGPU.td | 14 +-- llvm/lib/TargetParser/TargetParser.cpp | 50 ++++++++++- 5 files changed, 153 insertions(+), 86 deletions(-) create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-fiji.cl diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 0dfa9c13792cf..c349cdc6aef5b 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -122,13 +122,13 @@ BUILTIN(__builtin_amdgcn_frexp_exp, "id", "nc") BUILTIN(__builtin_amdgcn_frexp_expf, "if", "nc") BUILTIN(__builtin_amdgcn_fract, "dd", "nc") BUILTIN(__builtin_amdgcn_fractf, "ff", "nc") -TARGET_BUILTIN(__builtin_amdgcn_lerp, "UiUiUiUi", "nc", "HasLerpInst") +TARGET_BUILTIN(__builtin_amdgcn_lerp, "UiUiUiUi", "nc", "lerp-inst") BUILTIN(__builtin_amdgcn_class, "bdi", "nc") BUILTIN(__builtin_amdgcn_classf, "bfi", "nc") -TARGET_BUILTIN(__builtin_amdgcn_cubeid, "ffff", "nc", "HasCubeInsts") -TARGET_BUILTIN(__builtin_amdgcn_cubesc, "ffff", "nc", "HasCubeInsts") -TARGET_BUILTIN(__builtin_amdgcn_cubetc, "ffff", "nc", "HasCubeInsts") -TARGET_BUILTIN(__builtin_amdgcn_cubema, "ffff", "nc", "HasCubeInsts") +TARGET_BUILTIN(__builtin_amdgcn_cubeid, "ffff", "nc", "cube-insts") +TARGET_BUILTIN(__builtin_amdgcn_cubesc, "ffff", "nc", "cube-insts") +TARGET_BUILTIN(__builtin_amdgcn_cubetc, "ffff", "nc", "cube-insts") +TARGET_BUILTIN(__builtin_amdgcn_cubema, "ffff", "nc", "cube-insts") BUILTIN(__builtin_amdgcn_s_sleep, "vIi", "n") BUILTIN(__builtin_amdgcn_s_incperflevel, "vIi", "n") BUILTIN(__builtin_amdgcn_s_decperflevel, "vIi", "n") @@ -149,17 +149,17 @@ BUILTIN(__builtin_amdgcn_alignbyte, "UiUiUiUi", "nc") BUILTIN(__builtin_amdgcn_ubfe, "UiUiUiUi", "nc") BUILTIN(__builtin_amdgcn_sbfe, "UiUiUiUi", "nc") BUILTIN(__builtin_amdgcn_cvt_pkrtz, "E2hff", "nc") -TARGET_BUILTIN(__builtin_amdgcn_cvt_pknorm_i16, "E2sff", "nc", "HasCvtPkNormVOP2Insts") -TARGET_BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc", "HasCvtPkNormVOP2Insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_pknorm_i16, "E2sff", "nc", "cvt-pknorm-vop2-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc", "cvt-pknorm-vop2-insts") BUILTIN(__builtin_amdgcn_cvt_pk_i16, "E2sii", "nc") BUILTIN(__builtin_amdgcn_cvt_pk_u16, "E2UsUiUi", "nc") BUILTIN(__builtin_amdgcn_cvt_pk_u8_f32, "UifUiUi", "nc") BUILTIN(__builtin_amdgcn_cvt_off_f32_i4, "fi", "nc") -TARGET_BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc", "HasSadInsts") +TARGET_BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc", "sad-insts") BUILTIN(__builtin_amdgcn_msad_u8, "UiUiUiUi", "nc") -TARGET_BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc", "HasSadInsts") -TARGET_BUILTIN(__builtin_amdgcn_sad_u16, "UiUiUiUi", "nc", "HasSadInsts") -TARGET_BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "WUiWUiUiWUi", "nc", "HasQsadInsts") +TARGET_BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc", "sad-insts") +TARGET_BUILTIN(__builtin_amdgcn_sad_u16, "UiUiUiUi", "nc", "sad-insts") +TARGET_BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "WUiWUiUiWUi", "nc", "qsad-insts") BUILTIN(__builtin_amdgcn_mqsad_pk_u16_u8, "WUiWUiUiWUi", "nc") BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiWUiUiV4Ui", "nc") diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fiji.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fiji.cl new file mode 100644 index 0000000000000..2178718f90d5a --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fiji.cl @@ -0,0 +1,86 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu fiji -emit-llvm -o - %s | FileCheck -enable-var-scope --check-prefixes=CHECK %s + + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +typedef unsigned long ulong; +typedef unsigned int uint; +typedef unsigned short ushort; +typedef half __attribute__((ext_vector_type(2))) half2; +typedef short __attribute__((ext_vector_type(2))) short2; +typedef ushort __attribute__((ext_vector_type(2))) ushort2; +typedef uint __attribute__((ext_vector_type(4))) uint4; + +// CHECK-LABEL: @test_lerp +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.lerp +void test_lerp(global int* out, int a, int b, int c) +{ + *out = __builtin_amdgcn_lerp(a, b, c); +} + +// CHECK-LABEL: @test_cubeid( +// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubeid(float %a, float %b, float %c) +void test_cubeid(global float* out, float a, float b, float c) { + *out = __builtin_amdgcn_cubeid(a, b, c); +} + +// CHECK-LABEL: @test_cubesc( +// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubesc(float %a, float %b, float %c) +void test_cubesc(global float* out, float a, float b, float c) { + *out = __builtin_amdgcn_cubesc(a, b, c); +} + +// CHECK-LABEL: @test_cubetc( +// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubetc(float %a, float %b, float %c) +void test_cubetc(global float* out, float a, float b, float c) { + *out = __builtin_amdgcn_cubetc(a, b, c); +} + +// CHECK-LABEL: @test_cubema( +// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubema(float %a, float %b, float %c) +void test_cubema(global float* out, float a, float b, float c) { + *out = __builtin_amdgcn_cubema(a, b, c); +} + +// CHECK-LABEL: @test_cvt_pknorm_i16( +// CHECK: tail call{{.*}} <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %src0, float %src1) +kernel void test_cvt_pknorm_i16(global short2* out, float src0, float src1) { + *out = __builtin_amdgcn_cvt_pknorm_i16(src0, src1); +} + +// CHECK-LABEL: @test_cvt_pknorm_u16( +// CHECK: tail call{{.*}} <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %src0, float %src1) +kernel void test_cvt_pknorm_u16(global ushort2* out, float src0, float src1) { + *out = __builtin_amdgcn_cvt_pknorm_u16(src0, src1); +} + +// CHECK-LABEL: @test_sad_u8( +// CHECK: tail call{{.*}} i32 @llvm.amdgcn.sad.u8(i32 %src0, i32 %src1, i32 %src2) +kernel void test_sad_u8(global uint* out, uint src0, uint src1, uint src2) { + *out = __builtin_amdgcn_sad_u8(src0, src1, src2); +} + +// CHECK-LABEL: test_msad_u8( +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.msad.u8(i32 %src0, i32 %src1, i32 %src2) +kernel void test_msad_u8(global uint* out, uint src0, uint src1, uint src2) { + *out = __builtin_amdgcn_msad_u8(src0, src1, src2); +} + +// CHECK-LABEL: test_sad_hi_u8( +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.sad.hi.u8(i32 %src0, i32 %src1, i32 %src2) +kernel void test_sad_hi_u8(global uint* out, uint src0, uint src1, uint src2) { + *out = __builtin_amdgcn_sad_hi_u8(src0, src1, src2); +} + +// CHECK-LABEL: @test_sad_u16( +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.sad.u16(i32 %src0, i32 %src1, i32 %src2) +kernel void test_sad_u16(global uint* out, uint src0, uint src1, uint src2) { + *out = __builtin_amdgcn_sad_u16(src0, src1, src2); +} + +// CHECK-LABEL: @test_qsad_pk_u16_u8( +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src0, i32 %src1, i64 %src2) +kernel void test_qsad_pk_u16_u8(global ulong* out, ulong src0, uint src1, ulong src2) { + *out = __builtin_amdgcn_qsad_pk_u16_u8(src0, src1, src2); +} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index ab0b0b936abdc..b92454de60c78 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -251,13 +251,6 @@ void test_fract_f64(global int* out, double a) *out = __builtin_amdgcn_fract(a); } -// CHECK-LABEL: @test_lerp -// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.lerp -void test_lerp(global int* out, int a, int b, int c) -{ - *out = __builtin_amdgcn_lerp(a, b, c); -} - // CHECK-LABEL: @test_sicmp_i32 // CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 32) void test_sicmp_i32(global ulong* out, int a, int b) @@ -865,30 +858,6 @@ void test_s_setprio() __builtin_amdgcn_s_setprio(3); } -// CHECK-LABEL: @test_cubeid( -// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubeid(float %a, float %b, float %c) -void test_cubeid(global float* out, float a, float b, float c) { - *out = __builtin_amdgcn_cubeid(a, b, c); -} - -// CHECK-LABEL: @test_cubesc( -// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubesc(float %a, float %b, float %c) -void test_cubesc(global float* out, float a, float b, float c) { - *out = __builtin_amdgcn_cubesc(a, b, c); -} - -// CHECK-LABEL: @test_cubetc( -// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubetc(float %a, float %b, float %c) -void test_cubetc(global float* out, float a, float b, float c) { - *out = __builtin_amdgcn_cubetc(a, b, c); -} - -// CHECK-LABEL: @test_cubema( -// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubema(float %a, float %b, float %c) -void test_cubema(global float* out, float a, float b, float c) { - *out = __builtin_amdgcn_cubema(a, b, c); -} - // CHECK-LABEL: @test_read_exec( // CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.ballot.i64(i1 true) void test_read_exec(global ulong* out) { @@ -1139,18 +1108,6 @@ kernel void test_cvt_pkrtz(global half2* out, float src0, float src1) { *out = __builtin_amdgcn_cvt_pkrtz(src0, src1); } -// CHECK-LABEL: @test_cvt_pknorm_i16( -// CHECK: tail call{{.*}} <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %src0, float %src1) -kernel void test_cvt_pknorm_i16(global short2* out, float src0, float src1) { - *out = __builtin_amdgcn_cvt_pknorm_i16(src0, src1); -} - -// CHECK-LABEL: @test_cvt_pknorm_u16( -// CHECK: tail call{{.*}} <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %src0, float %src1) -kernel void test_cvt_pknorm_u16(global ushort2* out, float src0, float src1) { - *out = __builtin_amdgcn_cvt_pknorm_u16(src0, src1); -} - // CHECK-LABEL: @test_cvt_pk_i16( // CHECK: tail call{{.*}} <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %src0, i32 %src1) kernel void test_cvt_pk_i16(global short2* out, int src0, int src1) { @@ -1169,36 +1126,12 @@ kernel void test_cvt_pk_u8_f32(global uint* out, float src0, uint src1, uint src *out = __builtin_amdgcn_cvt_pk_u8_f32(src0, src1, src2); } -// CHECK-LABEL: @test_sad_u8( -// CHECK: tail call{{.*}} i32 @llvm.amdgcn.sad.u8(i32 %src0, i32 %src1, i32 %src2) -kernel void test_sad_u8(global uint* out, uint src0, uint src1, uint src2) { - *out = __builtin_amdgcn_sad_u8(src0, src1, src2); -} - // CHECK-LABEL: test_msad_u8( // CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.msad.u8(i32 %src0, i32 %src1, i32 %src2) kernel void test_msad_u8(global uint* out, uint src0, uint src1, uint src2) { *out = __builtin_amdgcn_msad_u8(src0, src1, src2); } -// CHECK-LABEL: test_sad_hi_u8( -// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.sad.hi.u8(i32 %src0, i32 %src1, i32 %src2) -kernel void test_sad_hi_u8(global uint* out, uint src0, uint src1, uint src2) { - *out = __builtin_amdgcn_sad_hi_u8(src0, src1, src2); -} - -// CHECK-LABEL: @test_sad_u16( -// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.sad.u16(i32 %src0, i32 %src1, i32 %src2) -kernel void test_sad_u16(global uint* out, uint src0, uint src1, uint src2) { - *out = __builtin_amdgcn_sad_u16(src0, src1, src2); -} - -// CHECK-LABEL: @test_qsad_pk_u16_u8( -// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src0, i32 %src1, i64 %src2) -kernel void test_qsad_pk_u16_u8(global ulong* out, ulong src0, uint src1, ulong src2) { - *out = __builtin_amdgcn_qsad_pk_u16_u8(src0, src1, src2); -} - // CHECK-LABEL: @test_mqsad_pk_u16_u8( // CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %src0, i32 %src1, i64 %src2) kernel void test_mqsad_pk_u16_u8(global ulong* out, ulong src0, uint src1, ulong src2) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index cd8327563d9d6..b2bbd3ba6c5ca 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -901,43 +901,43 @@ def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst", "Has v_pk_fmac_f16 instruction" >; -def FeatureCubeInsts : SubtargetFeature<"V_CUBE-Insts", +def FeatureCubeInsts : SubtargetFeature<"cube-insts", "HasCubeInsts", "true", "Has V_CUBE* instructions" >; -def FeatureLerpInst : SubtargetFeature<"V_LERP-insts", +def FeatureLerpInst : SubtargetFeature<"lerp-inst", "HasLerpInst", "true", "Has v_lerp_u8 instruction" >; -def FeatureSadInsts : SubtargetFeature<"V_SAD-insts", +def FeatureSadInsts : SubtargetFeature<"sad-insts", "HasSadInsts", "true", "Has V_SAD* instructions" >; -def FeatureQsadInsts : SubtargetFeature<"V_QSAD-insts", +def FeatureQsadInsts : SubtargetFeature<"qsad-insts", "HasQsadInsts", "true", "Has V_QSAD* instructions" >; -def FeatureCvtNormInsts : SubtargetFeature<"V_CVT_NORM-insts", +def FeatureCvtNormInsts : SubtargetFeature<"cvt-norm-insts", "HasCvtNormInsts", "true", "Has V_CVT_NORM* instructions" >; -def FeatureCvtPkNormVOP2Insts : SubtargetFeature<"V_CVT_PKNORM-VOP2-insts", +def FeatureCvtPkNormVOP2Insts : SubtargetFeature<"cvt-pknorm-vop2-insts", "HasCvtPkNormVOP2Insts", "true", "Has V_CVT_PK_NORM_* instructions/Has V_CVT_PK_NORM_*_F16 instructions" >; -def FeatureCvtPkNormVOP3Insts : SubtargetFeature<"V_CVT_PKNORM-VOP3-insts", +def FeatureCvtPkNormVOP3Insts : SubtargetFeature<"cvt-pknorm-vop3-insts", "HasCvtPkNormVOP3Insts", "true", "Has V_CVT_PK_NORM_* instructions/Has V_CVT_PK_NORM_*_F16 instructions" diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index 96bef0e574a45..9a9e76d581432 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -447,6 +447,12 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["atomic-fmin-fmax-global-f64"] = true; Features["wavefrontsize32"] = true; Features["clusters"] = true; + Features["cube-insts"] = true; + Features["lerp-inst"] = true; + ; + Features["sad-insts"] = true; + Features["qsad-insts"] = true; + Features["cvt-pknorm-vop2-insts"] = true; break; case GK_GFX1201: case GK_GFX1200: @@ -474,6 +480,12 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["gfx12-insts"] = true; Features["atomic-fadd-rtn-insts"] = true; Features["image-insts"] = true; + Features["cube-insts"] = true; + Features["lerp-inst"] = true; + ; + Features["sad-insts"] = true; + Features["qsad-insts"] = true; + Features["cvt-pknorm-vop2-insts"] = true; Features["fp8-conversion-insts"] = true; Features["atomic-fmin-fmax-global-f32"] = true; break; @@ -503,6 +515,12 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["gfx11-insts"] = true; Features["atomic-fadd-rtn-insts"] = true; Features["image-insts"] = true; + Features["cube-insts"] = true; + Features["lerp-inst"] = true; + ; + Features["sad-insts"] = true; + Features["qsad-insts"] = true; + Features["cvt-pknorm-vop2-insts"] = true; Features["gws"] = true; Features["atomic-fmin-fmax-global-f32"] = true; break; @@ -562,6 +580,12 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["vmem-to-lds-load-insts"] = true; Features["atomic-fmin-fmax-global-f32"] = true; Features["atomic-fmin-fmax-global-f64"] = true; + Features["cube-insts"] = true; + Features["lerp-inst"] = true; + ; + Features["sad-insts"] = true; + Features["qsad-insts"] = true; + Features["cvt-pknorm-vop2-insts"] = true; break; case GK_GFX950: Features["bitop3-insts"] = true; @@ -615,6 +639,12 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["vmem-to-lds-load-insts"] = true; Features["atomic-fmin-fmax-global-f64"] = true; Features["wavefrontsize64"] = true; + Features["cube-insts"] = true; + Features["lerp-inst"] = true; + ; + Features["sad-insts"] = true; + Features["qsad-insts"] = true; + Features["cvt-pknorm-vop2-insts"] = true; break; case GK_GFX90A: Features["gfx90a-insts"] = true; @@ -659,6 +689,12 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["s-memtime-inst"] = true; Features["gws"] = true; Features["wavefrontsize64"] = true; + Features["cube-insts"] = true; + Features["lerp-inst"] = true; + ; + Features["sad-insts"] = true; + Features["qsad-insts"] = true; + Features["cvt-pknorm-vop2-insts"] = true; break; case GK_GFX705: case GK_GFX704: @@ -667,16 +703,28 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, case GK_GFX701: case GK_GFX700: Features["ci-insts"] = true; - [[fallthrough]]; + Features["cube-insts"] = true; + Features["lerp-inst"] = true; + ; + Features["sad-insts"] = true; + Features["qsad-insts"] = true; + Features["cvt-pknorm-vop2-insts"] = true; + break; case GK_GFX602: case GK_GFX601: case GK_GFX600: + Features["ci-insts"] = true; Features["image-insts"] = true; Features["s-memtime-inst"] = true; Features["gws"] = true; Features["atomic-fmin-fmax-global-f32"] = true; Features["atomic-fmin-fmax-global-f64"] = true; Features["wavefrontsize64"] = true; + Features["cube-insts"] = true; + Features["lerp-inst"] = true; + ; + Features["sad-insts"] = true; + Features["cvt-pknorm-vop2-insts"] = true; break; case GK_NONE: break; From c6ac0f1a79e59389fbbb944d84607a015a694a22 Mon Sep 17 00:00:00 2001 From: shore <372660931@qq.com> Date: Tue, 18 Nov 2025 13:43:18 +0800 Subject: [PATCH 05/12] fix format --- llvm/lib/TargetParser/TargetParser.cpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index 9a9e76d581432..3f28780e2cb04 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -449,7 +449,6 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["clusters"] = true; Features["cube-insts"] = true; Features["lerp-inst"] = true; - ; Features["sad-insts"] = true; Features["qsad-insts"] = true; Features["cvt-pknorm-vop2-insts"] = true; @@ -482,7 +481,6 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["image-insts"] = true; Features["cube-insts"] = true; Features["lerp-inst"] = true; - ; Features["sad-insts"] = true; Features["qsad-insts"] = true; Features["cvt-pknorm-vop2-insts"] = true; @@ -517,7 +515,6 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["image-insts"] = true; Features["cube-insts"] = true; Features["lerp-inst"] = true; - ; Features["sad-insts"] = true; Features["qsad-insts"] = true; Features["cvt-pknorm-vop2-insts"] = true; @@ -553,6 +550,11 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["vmem-to-lds-load-insts"] = true; Features["atomic-fmin-fmax-global-f32"] = true; Features["atomic-fmin-fmax-global-f64"] = true; + Features["cube-insts"] = true; + Features["lerp-inst"] = true; + Features["sad-insts"] = true; + Features["qsad-insts"] = true; + Features["cvt-pknorm-vop2-insts"] = true; break; case GK_GFX1012: case GK_GFX1011: @@ -582,7 +584,6 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["atomic-fmin-fmax-global-f64"] = true; Features["cube-insts"] = true; Features["lerp-inst"] = true; - ; Features["sad-insts"] = true; Features["qsad-insts"] = true; Features["cvt-pknorm-vop2-insts"] = true; @@ -641,7 +642,6 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["wavefrontsize64"] = true; Features["cube-insts"] = true; Features["lerp-inst"] = true; - ; Features["sad-insts"] = true; Features["qsad-insts"] = true; Features["cvt-pknorm-vop2-insts"] = true; @@ -691,7 +691,6 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["wavefrontsize64"] = true; Features["cube-insts"] = true; Features["lerp-inst"] = true; - ; Features["sad-insts"] = true; Features["qsad-insts"] = true; Features["cvt-pknorm-vop2-insts"] = true; @@ -705,7 +704,6 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["ci-insts"] = true; Features["cube-insts"] = true; Features["lerp-inst"] = true; - ; Features["sad-insts"] = true; Features["qsad-insts"] = true; Features["cvt-pknorm-vop2-insts"] = true; @@ -722,7 +720,6 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["wavefrontsize64"] = true; Features["cube-insts"] = true; Features["lerp-inst"] = true; - ; Features["sad-insts"] = true; Features["cvt-pknorm-vop2-insts"] = true; break; From a3987b136105dd98ede771b3f17429f4493e19b6 Mon Sep 17 00:00:00 2001 From: shore <372660931@qq.com> Date: Tue, 18 Nov 2025 14:39:04 +0800 Subject: [PATCH 06/12] fix tests --- clang/test/CodeGenOpenCL/amdgpu-features.cl | 98 ++++++++++----------- llvm/lib/TargetParser/TargetParser.cpp | 6 ++ 2 files changed, 55 insertions(+), 49 deletions(-) diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl index 9bd096f3fcbc7..aae05dd623a63 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -63,55 +63,55 @@ // NOCPU-WAVE32: "target-features"="+wavefrontsize32" // NOCPU-WAVE64: "target-features"="+wavefrontsize64" -// GFX600: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+s-memtime-inst,+wavefrontsize64 -// GFX601: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+s-memtime-inst,+wavefrontsize64 -// GFX602: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+s-memtime-inst,+wavefrontsize64 -// GFX700: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+s-memtime-inst,+wavefrontsize64" -// GFX701: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+s-memtime-inst,+wavefrontsize64" -// GFX702: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+s-memtime-inst,+wavefrontsize64" -// GFX703: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+s-memtime-inst,+wavefrontsize64" -// GFX704: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+s-memtime-inst,+wavefrontsize64" -// GFX705: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+s-memtime-inst,+wavefrontsize64" -// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX802: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX803: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX805: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX810: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX900: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX902: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" -// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX950: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX1010: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" -// GFX1011: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" -// GFX1012: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" -// GFX1013: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" -// GFX1030: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" -// GFX1031: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" -// GFX1032: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" -// GFX1033: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" -// GFX1034: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" -// GFX1035: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" -// GFX1036: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" -// GFX1100: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1101: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1102: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1103: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1150: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1151: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1152: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1250: "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" -// GFX1251: "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" +// GFX600: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+s-memtime-inst,+sad-insts,+wavefrontsize64 +// GFX601: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+s-memtime-inst,+sad-insts,+wavefrontsize64 +// GFX602: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+s-memtime-inst,+sad-insts,+wavefrontsize64 +// GFX700: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64" +// GFX701: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64" +// GFX702: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64" +// GFX703: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64" +// GFX704: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64" +// GFX705: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64" +// GFX801: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" +// GFX802: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" +// GFX803: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" +// GFX805: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" +// GFX810: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" +// GFX900: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" +// GFX902: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" +// GFX904: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" +// GFX906: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" +// GFX908: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" +// GFX909: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" +// GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" +// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" +// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,+xf32-insts" +// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" +// GFX950: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+lerp-inst,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" +// GFX1010: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32" +// GFX1011: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32" +// GFX1012: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32" +// GFX1013: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32" +// GFX1030: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32" +// GFX1031: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32" +// GFX1032: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32" +// GFX1033: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32" +// GFX1034: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32" +// GFX1035: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32" +// GFX1036: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32" +// GFX1100: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32" +// GFX1101: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32" +// GFX1102: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32" +// GFX1103: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32" +// GFX1150: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32" +// GFX1151: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32" +// GFX1152: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32" +// GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32" +// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32" +// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32" +// GFX1250: "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" +// GFX1251: "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" -// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64" +// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize64" kernel void test() {} diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index 3f28780e2cb04..9439fa2b3a0ec 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -707,6 +707,12 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["sad-insts"] = true; Features["qsad-insts"] = true; Features["cvt-pknorm-vop2-insts"] = true; + Features["image-insts"] = true; + Features["s-memtime-inst"] = true; + Features["gws"] = true; + Features["atomic-fmin-fmax-global-f32"] = true; + Features["atomic-fmin-fmax-global-f64"] = true; + Features["wavefrontsize64"] = true; break; case GK_GFX602: case GK_GFX601: From 2b6be97afae2e4e225715cb7998b38da2bd0600a Mon Sep 17 00:00:00 2001 From: shore <372660931@qq.com> Date: Tue, 18 Nov 2025 14:45:04 +0800 Subject: [PATCH 07/12] fix description --- llvm/lib/Target/AMDGPU/AMDGPU.td | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index b2bbd3ba6c5ca..5dea64844e64e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -904,7 +904,7 @@ def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst", def FeatureCubeInsts : SubtargetFeature<"cube-insts", "HasCubeInsts", "true", - "Has V_CUBE* instructions" + "Has v_cube* instructions" >; def FeatureLerpInst : SubtargetFeature<"lerp-inst", @@ -916,31 +916,31 @@ def FeatureLerpInst : SubtargetFeature<"lerp-inst", def FeatureSadInsts : SubtargetFeature<"sad-insts", "HasSadInsts", "true", - "Has V_SAD* instructions" + "Has v_sad* instructions" >; def FeatureQsadInsts : SubtargetFeature<"qsad-insts", "HasQsadInsts", "true", - "Has V_QSAD* instructions" + "Has v_qsad* instructions" >; def FeatureCvtNormInsts : SubtargetFeature<"cvt-norm-insts", "HasCvtNormInsts", "true", - "Has V_CVT_NORM* instructions" + "Has v_cvt_norm* instructions" >; def FeatureCvtPkNormVOP2Insts : SubtargetFeature<"cvt-pknorm-vop2-insts", "HasCvtPkNormVOP2Insts", "true", - "Has V_CVT_PK_NORM_* instructions/Has V_CVT_PK_NORM_*_F16 instructions" + "Has v_cvt_pk_norm_*f32 instructions/Has v_cvt_pk_norm_*_f16 instructions" >; def FeatureCvtPkNormVOP3Insts : SubtargetFeature<"cvt-pknorm-vop3-insts", "HasCvtPkNormVOP3Insts", "true", - "Has V_CVT_PK_NORM_* instructions/Has V_CVT_PK_NORM_*_F16 instructions" + "Has v_cvt_pk_norm_*f32 instructions/Has v_cvt_pk_norm_*_f16 instructions" >; def FeatureAtomicDsPkAdd16Insts : SubtargetFeature<"atomic-ds-pk-add-16-insts", From 3691fc903f591cb40d53f3e6b558c035292964a9 Mon Sep 17 00:00:00 2001 From: shore <372660931@qq.com> Date: Tue, 18 Nov 2025 15:36:09 +0800 Subject: [PATCH 08/12] fix failing tests --- clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl | 4 ++-- clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl | 8 ++++---- .../amdgpu-readonly-features-written-with-no-target.cl | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl b/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl index 14fbeb24a96c2..c5656c49c4761 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl @@ -26,8 +26,8 @@ kernel void foo(global int *p) { *p = 1; } // CHECK-NEXT: ret void // //. -// CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" "uniform-work-group-size"="false" } -// CHECK: attributes #[[ATTR1]] = { alwaysinline convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" } +// CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" "uniform-work-group-size"="false" } +// CHECK: attributes #[[ATTR1]] = { alwaysinline convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" } // CHECK: attributes #[[ATTR2]] = { convergent nounwind } //. // CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl index e9adac23a6530..2cbc9787a04b0 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -816,12 +816,12 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU: attributes #[[ATTR10]] = { convergent nounwind } //. // GFX900: attributes #[[ATTR0:[0-9]+]] = { "objc_arc_inert" } -// GFX900: attributes #[[ATTR1]] = { convergent norecurse nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" } -// GFX900: attributes #[[ATTR2]] = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" "uniform-work-group-size"="false" } -// GFX900: attributes #[[ATTR3]] = { alwaysinline convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" } +// GFX900: attributes #[[ATTR1]] = { convergent norecurse nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,-sram-ecc" } +// GFX900: attributes #[[ATTR2]] = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,-sram-ecc" "uniform-work-group-size"="false" } +// GFX900: attributes #[[ATTR3]] = { alwaysinline convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,-sram-ecc" } // GFX900: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } // GFX900: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } -// GFX900: attributes #[[ATTR6]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" } +// GFX900: attributes #[[ATTR6]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,-sram-ecc" } // GFX900: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn } // GFX900: attributes #[[ATTR8]] = { convergent nounwind } // GFX900: attributes #[[ATTR9]] = { nounwind } diff --git a/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl b/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl index 1a0a30ca0b51e..2d50ce7cab2e0 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl @@ -11,6 +11,6 @@ __attribute__((target("gws,image-insts,vmem-to-lds-load-insts"))) void test() {} // NOCPU: "target-features"="+gws,+image-insts,+vmem-to-lds-load-insts" -// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" -// GFX1100: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32 +// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,+xf32-insts" +// GFX1100: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32" +// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32" From 6a8d41af37e2c617580d20fc536644197c6ad88c Mon Sep 17 00:00:00 2001 From: shore <372660931@qq.com> Date: Tue, 18 Nov 2025 16:20:16 +0800 Subject: [PATCH 09/12] fix fialing test --- clang/test/CodeGen/link-builtin-bitcode.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/test/CodeGen/link-builtin-bitcode.c b/clang/test/CodeGen/link-builtin-bitcode.c index 9a5b6de3c3b38..f6e45bf573705 100644 --- a/clang/test/CodeGen/link-builtin-bitcode.c +++ b/clang/test/CodeGen/link-builtin-bitcode.c @@ -43,7 +43,7 @@ int bar() { return no_attr() + attr_in_target() + attr_not_in_target() + attr_in // CHECK-LABEL: @attr_incompatible // CHECK-SAME: () #[[ATTR_INCOMPATIBLE:[0-9]+]] { -// CHECK: attributes #[[ATTR_BAR]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } -// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64" } -// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64" } -// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64,-gfx9-insts" } +// CHECK: attributes #[[ATTR_BAR]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" } +// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64" } +// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64" } +// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64,-gfx9-insts" } From 91bd9087906d802e750368c3303598ce41e5c711 Mon Sep 17 00:00:00 2001 From: shore <372660931@qq.com> Date: Tue, 18 Nov 2025 16:32:05 +0800 Subject: [PATCH 10/12] fix failing tests --- clang/test/OpenMP/amdgcn-attributes.cpp | 4 ++-- flang/test/Lower/OpenMP/target_cpu_features.f90 | 4 ++-- llvm/lib/TargetParser/TargetParser.cpp | 1 - 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/clang/test/OpenMP/amdgcn-attributes.cpp b/clang/test/OpenMP/amdgcn-attributes.cpp index 2c9e16a4f5098..03f5c31e3157c 100644 --- a/clang/test/OpenMP/amdgcn-attributes.cpp +++ b/clang/test/OpenMP/amdgcn-attributes.cpp @@ -32,9 +32,9 @@ int callable(int x) { } // DEFAULT: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } -// CPU: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" "uniform-work-group-size"="true" } +// CPU: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" "uniform-work-group-size"="true" } // NOIEEE: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "amdgpu-ieee"="false" "kernel" "no-nans-fp-math"="true" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } // DEFAULT: attributes #2 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// CPU: attributes #2 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } +// CPU: attributes #2 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" } // NOIEEE: attributes #2 = { convergent mustprogress noinline nounwind optnone "amdgpu-ieee"="false" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/flang/test/Lower/OpenMP/target_cpu_features.f90 b/flang/test/Lower/OpenMP/target_cpu_features.f90 index 4532593156eab..341cfc7991d43 100644 --- a/flang/test/Lower/OpenMP/target_cpu_features.f90 +++ b/flang/test/Lower/OpenMP/target_cpu_features.f90 @@ -11,8 +11,8 @@ !AMDGCN-SAME: fir.target_features = #llvm.target_features<["+16-bit-insts", "+ci-insts", !AMDGCN-SAME: "+dl-insts", "+dot1-insts", "+dot10-insts", "+dot2-insts", "+dot3-insts", !AMDGCN-SAME: "+dot4-insts", "+dot5-insts", "+dot6-insts", "+dot7-insts", "+dpp", -!AMDGCN-SAME: "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+mai-insts", -!AMDGCN-SAME: "+s-memrealtime", "+s-memtime-inst", "+vmem-to-lds-load-insts", "+wavefrontsize64"]> +!AMDGCN-SAME: "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+lerp-inst", "+mai-insts", +!AMDGCN-SAME: "+qsad-insts", "+s-memrealtime", "+s-memtime-inst", "+sad-insts", "+vmem-to-lds-load-insts", "+wavefrontsize64"]> !NVPTX: module attributes { !NVPTX-SAME: fir.target_cpu = "sm_80" diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index 9439fa2b3a0ec..28f3649a840d6 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -717,7 +717,6 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, case GK_GFX602: case GK_GFX601: case GK_GFX600: - Features["ci-insts"] = true; Features["image-insts"] = true; Features["s-memtime-inst"] = true; Features["gws"] = true; From c8d801121a8bce0a4f36b1ea851c2a304c5132ff Mon Sep 17 00:00:00 2001 From: shore <372660931@qq.com> Date: Tue, 18 Nov 2025 17:09:16 +0800 Subject: [PATCH 11/12] fix failing test --- clang/test/CodeGenOpenCL/amdgpu-features.cl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl index aae05dd623a63..bd162b40b8e47 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -63,9 +63,9 @@ // NOCPU-WAVE32: "target-features"="+wavefrontsize32" // NOCPU-WAVE64: "target-features"="+wavefrontsize64" -// GFX600: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+s-memtime-inst,+sad-insts,+wavefrontsize64 -// GFX601: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+s-memtime-inst,+sad-insts,+wavefrontsize64 -// GFX602: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+s-memtime-inst,+sad-insts,+wavefrontsize64 +// GFX600: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+s-memtime-inst,+sad-insts,+wavefrontsize64" +// GFX601: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+s-memtime-inst,+sad-insts,+wavefrontsize64" +// GFX602: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+s-memtime-inst,+sad-insts,+wavefrontsize64" // GFX700: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64" // GFX701: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64" // GFX702: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64" From b6ad1ac384a504f2c798c095cf914fe2b469967a Mon Sep 17 00:00:00 2001 From: shore <372660931@qq.com> Date: Wed, 19 Nov 2025 09:30:01 +0800 Subject: [PATCH 12/12] format code --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index c349cdc6aef5b..81e684a04a03d 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -155,8 +155,8 @@ BUILTIN(__builtin_amdgcn_cvt_pk_i16, "E2sii", "nc") BUILTIN(__builtin_amdgcn_cvt_pk_u16, "E2UsUiUi", "nc") BUILTIN(__builtin_amdgcn_cvt_pk_u8_f32, "UifUiUi", "nc") BUILTIN(__builtin_amdgcn_cvt_off_f32_i4, "fi", "nc") -TARGET_BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc", "sad-insts") BUILTIN(__builtin_amdgcn_msad_u8, "UiUiUiUi", "nc") +TARGET_BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc", "sad-insts") TARGET_BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc", "sad-insts") TARGET_BUILTIN(__builtin_amdgcn_sad_u16, "UiUiUiUi", "nc", "sad-insts") TARGET_BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "WUiWUiUiWUi", "nc", "qsad-insts")