diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index bef4868492d4e..7e7ee754c250d 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -280,6 +280,10 @@ static unsigned getTcgen05LdOpcode(unsigned IID, bool enablePack) { } void NVPTXDAGToDAGISel::SelectTcgen05Ld(SDNode *N, bool hasOffset) { + if (!Subtarget->hasTcgen05InstSupport()) + report_fatal_error( + "tcgen05.ld is not supported on this architecture variant"); + SDLoc DL(N); unsigned IID = cast(N->getOperand(1))->getZExtValue(); @@ -2136,6 +2140,10 @@ static unsigned getTcgen05StOpcode(unsigned IID, bool enableUnpack) { } void NVPTXDAGToDAGISel::SelectTcgen05St(SDNode *N, bool hasOffset) { + if (!Subtarget->hasTcgen05InstSupport()) + report_fatal_error( + "tcgen05.st is not supported on this architecture variant"); + SDLoc DL(N); unsigned IID = cast(N->getOperand(1))->getZExtValue(); diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 6c14cf0b324e1..dfde0cca0f00c 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -101,6 +101,22 @@ def PrmtMode : Operand { // NVPTX Instruction Predicate Definitions //===----------------------------------------------------------------------===// +// Checks PTX version and family-specific and architecture-specific SM versions. +// For example, sm_100{f/a} and any future variants in the same family will match +// for any PTX version greater than or equal to `PTXVersion`. +class PTXWithFamilySMs SMVersions> : + Predicate<"Subtarget->hasPTXWithFamilySMs(" # PTXVersion # ", {" # + !interleave(SMVersions, ", ") # "})">; + +// Checks PTX version and architecture-specific SM versions. +// For example, sm_100{a} will match for any PTX version +// greater than or equal to `PTXVersion`. +class PTXWithAccelSMs SMVersions> : + Predicate<"Subtarget->hasPTXWithAccelSMs(" # PTXVersion # ", {" # + !interleave(SMVersions, ", ") # "})">; + +// Helper predicate to call a subtarget method. +class callSubtarget : Predicate<"Subtarget->" # SubtargetMethod # "()">; def hasAtomAddF64 : Predicate<"Subtarget->hasAtomAddF64()">; def hasAtomScope : Predicate<"Subtarget->hasAtomScope()">; diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index a8b854ff1f793..22cf3a7eef2c1 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -5103,8 +5103,8 @@ let Predicates = [hasSM<90>, hasPTX<78>] in { def EXIT : NullaryInst<"exit", int_nvvm_exit>; // Tcgen05 intrinsics -let isConvergent = true, Predicates = [hasTcgen05Instructions] in { - +let isConvergent = true in { +let Predicates = [callSubtarget<"hasTcgen05InstSupport">] in { multiclass TCGEN05_ALLOC_INTR { def "" : BasicNVPTXInst<(outs), (ins ADDR:$dst, B32:$ncols), @@ -5156,15 +5156,6 @@ defm TCGEN05_COMMIT_CG2 : TCGEN05_COMMIT_INTR<"", "2">; defm TCGEN05_COMMIT_S64_CG1 : TCGEN05_COMMIT_INTR<"shared", "1">; defm TCGEN05_COMMIT_S64_CG2 : TCGEN05_COMMIT_INTR<"shared", "2">; -multiclass TCGEN05_SHIFT_INTR { - def "" : BasicNVPTXInst<(outs), - (ins ADDR:$tmem_addr), - "tcgen05.shift.cta_group::" # num # ".down", - [(Intr addr:$tmem_addr)]>; -} -defm TCGEN05_SHIFT_CG1: TCGEN05_SHIFT_INTR<"1", int_nvvm_tcgen05_shift_down_cg1>; -defm TCGEN05_SHIFT_CG2: TCGEN05_SHIFT_INTR<"2", int_nvvm_tcgen05_shift_down_cg2>; - multiclass TCGEN05_CP_INTR { defvar dst_fmt = !if(!eq(src_fmt, ""), "", ".b8x16"); defvar fmt_asm = StrJoin<".", [dst_fmt, src_fmt]>.ret; @@ -5195,9 +5186,22 @@ foreach src_fmt = ["", "b6x16_p32", "b4x16_p64"] in { defm TCGEN05_CP_64x128_2 # src_fmt : TCGEN05_CP_INTR<"64x128b", src_fmt, "warpx2::01_23">; defm TCGEN05_CP_32x128 # src_fmt : TCGEN05_CP_INTR<"32x128b", src_fmt, "warpx4">; } +} // Predicates + +let Predicates = [callSubtarget<"hasTcgen05ShiftSupport">] in { +multiclass TCGEN05_SHIFT_INTR { + def "" : BasicNVPTXInst<(outs), + (ins ADDR:$tmem_addr), + "tcgen05.shift.cta_group::" # num # ".down", + [(Intr addr:$tmem_addr)]>; +} +defm TCGEN05_SHIFT_CG1: TCGEN05_SHIFT_INTR<"1", int_nvvm_tcgen05_shift_down_cg1>; +defm TCGEN05_SHIFT_CG2: TCGEN05_SHIFT_INTR<"2", int_nvvm_tcgen05_shift_down_cg2>; +} // Predicates + } // isConvergent -let hasSideEffects = 1, Predicates = [hasTcgen05Instructions] in { +let hasSideEffects = 1, Predicates = [callSubtarget<"hasTcgen05InstSupport">] in { def tcgen05_fence_before_thread_sync: NullaryInst< "tcgen05.fence::before_thread_sync", int_nvvm_tcgen05_fence_before_thread_sync>; @@ -5231,8 +5235,7 @@ class TCGEN05_LDST_REGINFO { // class TCGEN05_LD_INST : - NVPTXInst<(outs), (ins), "?", []>, - Requires<[hasTcgen05Instructions]> { + NVPTXInst<(outs), (ins), "?", []> { TCGEN05_LDST_REGINFO Info = TCGEN05_LDST_REGINFO< NVVM_TCGEN05_LDST_ACCESS_SIZE.veclen>; @@ -5256,8 +5259,7 @@ class TCGEN05_LD_INST : // class TCGEN05_ST_INST : - NVPTXInst<(outs), (ins), "?", []>, - Requires<[hasTcgen05Instructions]> { + NVPTXInst<(outs), (ins), "?", []> { TCGEN05_LDST_REGINFO Info = TCGEN05_LDST_REGINFO< NVVM_TCGEN05_LDST_ACCESS_SIZE.veclen>; diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp index c5489670bd249..989be50d45554 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -72,6 +72,40 @@ const SelectionDAGTargetInfo *NVPTXSubtarget::getSelectionDAGInfo() const { return TSInfo.get(); } +bool NVPTXSubtarget::hasPTXWithFamilySMs(unsigned PTXVersion, + ArrayRef SMVersions) const { + unsigned PTXVer = getPTXVersion(); + if (!hasFamilySpecificFeatures() || PTXVer < PTXVersion) + return false; + + unsigned SMVer = getSmVersion(); + return llvm::any_of(SMVersions, [&](unsigned SM) { + // sm_101 is a different family, never group it with sm_10x. + if (SMVer == 101 || SM == 101) + return SMVer == SM && + // PTX 9.0 and later renamed sm_101 to sm_110, so sm_101 is not + // supported. + !(PTXVer >= 90 && SMVer == 101); + + return getSmFamilyVersion() == SM / 10 && SMVer >= SM; + }); +} + +bool NVPTXSubtarget::hasPTXWithAccelSMs(unsigned PTXVersion, + ArrayRef SMVersions) const { + unsigned PTXVer = getPTXVersion(); + if (!hasArchAccelFeatures() || PTXVer < PTXVersion) + return false; + + unsigned SMVer = getSmVersion(); + return llvm::any_of(SMVersions, [&](unsigned SM) { + return SMVer == SM && + // PTX 9.0 and later renamed sm_101 to sm_110, so sm_101 is not + // supported. + !(PTXVer >= 90 && SMVer == 101); + }); +} + bool NVPTXSubtarget::allowFP16Math() const { return hasFP16Math() && NoF16Math == false; } diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h index e81c56bb4b562..194dbdc061a96 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h @@ -73,6 +73,18 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo { const SelectionDAGTargetInfo *getSelectionDAGInfo() const override; + // Checks PTX version and family-specific and architecture-specific SM + // versions. For example, sm_100{f/a} and any future variants in the same + // family will match for any PTX version greater than or equal to + // `PTXVersion`. + bool hasPTXWithFamilySMs(unsigned PTXVersion, + ArrayRef SMVersions) const; + // Checks PTX version and architecture-specific SM versions. + // For example, sm_100{a} will match for any PTX version greater than or equal + // to `PTXVersion`. + bool hasPTXWithAccelSMs(unsigned PTXVersion, + ArrayRef SMVersions) const; + bool has256BitVectorLoadStore(unsigned AS) const { return SmVersion >= 100 && PTXVersion >= 88 && AS == NVPTXAS::ADDRESS_SPACE_GLOBAL; @@ -127,6 +139,27 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo { return HasTcgen05 && PTXVersion >= MinPTXVersion; } + // Checks following instructions support: + // - tcgen05.ld/st + // - tcgen05.alloc/dealloc/relinquish + // - tcgen05.cp + // - tcgen05.fence/wait + // - tcgen05.commit + bool hasTcgen05InstSupport() const { + // sm_101 renamed to sm_110 in PTX 9.0 + return hasPTXWithFamilySMs(90, {100, 110}) || + hasPTXWithFamilySMs(88, {100, 101}) || + hasPTXWithAccelSMs(86, {100, 101}); + } + + // Checks tcgen05.shift instruction support. + bool hasTcgen05ShiftSupport() const { + // sm_101 renamed to sm_110 in PTX 9.0 + return hasPTXWithAccelSMs(90, {100, 110, 103}) || + hasPTXWithAccelSMs(88, {100, 101, 103}) || + hasPTXWithAccelSMs(86, {100, 101}); + } + bool hasTcgen05MMAScaleInputDImm() const { return FullSmVersion == 1003 && PTXVersion >= 86; } @@ -158,6 +191,7 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo { bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; } unsigned int getFullSmVersion() const { return FullSmVersion; } unsigned int getSmVersion() const { return getFullSmVersion() / 10; } + unsigned int getSmFamilyVersion() const { return getFullSmVersion() / 100; } // GPUs with "a" suffix have architecture-accelerated features that are // supported on the specified architecture only, hence such targets do not // follow the onion layer model. hasArchAccelFeatures() allows distinguishing diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-alloc.ll b/llvm/test/CodeGen/NVPTX/tcgen05-alloc.ll index 1edb3871d9912..f345e08ec685d 100644 --- a/llvm/test/CodeGen/NVPTX/tcgen05-alloc.ll +++ b/llvm/test/CodeGen/NVPTX/tcgen05-alloc.ll @@ -2,9 +2,13 @@ ; RUN: llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | FileCheck --check-prefixes=CHECK_PTX64 %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr | FileCheck --check-prefixes=CHECK_PTX64_SHARED32 %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | FileCheck --check-prefixes=CHECK_PTX64 %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | FileCheck --check-prefixes=CHECK_PTX64 %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | FileCheck --check-prefixes=CHECK_PTX64 %s ; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | %ptxas-verify -arch=sm_100a %} ; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr | %ptxas-verify -arch=sm_100a %} ; RUN: %if ptxas-sm_103a && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | %ptxas-verify -arch=sm_103a %} +; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | %ptxas-verify -arch=sm_100f %} +; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | %ptxas-verify -arch=sm_110f %} declare void @llvm.nvvm.tcgen05.alloc.cg1(ptr %addr, i32 %ncols) diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-commit.ll b/llvm/test/CodeGen/NVPTX/tcgen05-commit.ll index 2e80c4c935814..29b130f8cf7c3 100644 --- a/llvm/test/CodeGen/NVPTX/tcgen05-commit.ll +++ b/llvm/test/CodeGen/NVPTX/tcgen05-commit.ll @@ -2,9 +2,13 @@ ; RUN: llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | FileCheck --check-prefixes=CHECK_PTX64 %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr | FileCheck --check-prefixes=CHECK_PTX64_SHARED32 %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | FileCheck --check-prefixes=CHECK_PTX64 %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | FileCheck --check-prefixes=CHECK_PTX64 %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | FileCheck --check-prefixes=CHECK_PTX64 %s ; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | %ptxas-verify -arch=sm_100a %} ; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr | %ptxas-verify -arch=sm_100a %} ; RUN: %if ptxas-sm_103a && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | %ptxas-verify -arch=sm_103a %} +; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | %ptxas-verify -arch=sm_100f %} +; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | %ptxas-verify -arch=sm_110f %} declare void @llvm.nvvm.tcgen05.commit.cg1(ptr %bar_addr) declare void @llvm.nvvm.tcgen05.commit.cg2(ptr %bar_addr) diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-cp.ll b/llvm/test/CodeGen/NVPTX/tcgen05-cp.ll index 817b1d5bff291..4e463a14e7537 100644 --- a/llvm/test/CodeGen/NVPTX/tcgen05-cp.ll +++ b/llvm/test/CodeGen/NVPTX/tcgen05-cp.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | FileCheck --check-prefixes=CHECK %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | FileCheck --check-prefixes=CHECK %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | FileCheck --check-prefixes=CHECK %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | FileCheck --check-prefixes=CHECK %s ; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | %ptxas-verify -arch=sm_100a %} ; RUN: %if ptxas-sm_103a && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | %ptxas-verify -arch=sm_103a %} +; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | %ptxas-verify -arch=sm_100f %} +; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | %ptxas-verify -arch=sm_110f %} define void @test_tcgen05_cp_64x128_v1_cg1(ptr addrspace(6) %addr, i64 %sdesc) { ; CHECK-LABEL: test_tcgen05_cp_64x128_v1_cg1( diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-fence.ll b/llvm/test/CodeGen/NVPTX/tcgen05-fence.ll index cbf647f857173..fc8cce4a143be 100644 --- a/llvm/test/CodeGen/NVPTX/tcgen05-fence.ll +++ b/llvm/test/CodeGen/NVPTX/tcgen05-fence.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | FileCheck --check-prefixes=CHECK %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | FileCheck --check-prefixes=CHECK %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | FileCheck --check-prefixes=CHECK %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | FileCheck --check-prefixes=CHECK %s ; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | %ptxas-verify -arch=sm_100a %} ; RUN: %if ptxas-sm_103a && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | %ptxas-verify -arch=sm_103a %} +; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | %ptxas-verify -arch=sm_100f %} +; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | %ptxas-verify -arch=sm_110f %} declare void @llvm.nvvm.tcgen05.fence.before.thread.sync() declare void @llvm.nvvm.tcgen05.fence.after.thread.sync() diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-ld.ll b/llvm/test/CodeGen/NVPTX/tcgen05-ld.ll index a37b1a95aa800..22eb7298133bb 100644 --- a/llvm/test/CodeGen/NVPTX/tcgen05-ld.ll +++ b/llvm/test/CodeGen/NVPTX/tcgen05-ld.ll @@ -2,9 +2,13 @@ ; RUN: llc < %s -o - -mcpu=sm_100a -march=nvptx64 -mattr=+ptx86 | FileCheck %s ; RUN: llc < %s -o - -mcpu=sm_101a -march=nvptx64 -mattr=+ptx86 | FileCheck %s ; RUN: llc < %s -o - -mcpu=sm_103a -march=nvptx64 -mattr=+ptx88 | FileCheck %s +; RUN: llc < %s -o - -mcpu=sm_100f -march=nvptx64 -mattr=+ptx88 | FileCheck %s +; RUN: llc < %s -o - -mcpu=sm_110f -march=nvptx64 -mattr=+ptx90 | FileCheck %s ; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mattr=+ptx86 -mcpu=sm_100a | %ptxas-verify -arch=sm_100a %} ; RUN: %if ptxas-sm_101a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mattr=+ptx86 -mcpu=sm_101a | %ptxas-verify -arch=sm_101a %} ; RUN: %if ptxas-sm_103a && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mattr=+ptx88 -mcpu=sm_103a | %ptxas-verify -arch=sm_103a %} +; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mattr=+ptx88 -mcpu=sm_100f | %ptxas-verify -arch=sm_100f %} +; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -march=nvptx64 -mattr=+ptx90 -mcpu=sm_110f | %ptxas-verify -arch=sm_110f %} ; CHECK-LABEL: nvvm_tcgen05_ld_16x64b define void @nvvm_tcgen05_ld_16x64b(ptr addrspace(6) %taddr) { diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-shift.ll b/llvm/test/CodeGen/NVPTX/tcgen05-shift.ll index bf2adac6ec6fc..33483b58a62f9 100644 --- a/llvm/test/CodeGen/NVPTX/tcgen05-shift.ll +++ b/llvm/test/CodeGen/NVPTX/tcgen05-shift.ll @@ -1,8 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | FileCheck --check-prefixes=CHECK %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | FileCheck --check-prefixes=CHECK %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_110a -mattr=+ptx90 | FileCheck --check-prefixes=CHECK %s ; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | %ptxas-verify -arch=sm_100a %} ; RUN: %if ptxas-sm_103a && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | %ptxas-verify -arch=sm_103a %} +; RUN: %if ptxas-sm_110a && ptxas-isa-9.0 %{ llc < %s -march=nvptx64 -mcpu=sm_110a -mattr=+ptx90 | %ptxas-verify -arch=sm_110a %} declare void @llvm.nvvm.tcgen05.shift.down.cg1(ptr addrspace(6) %tmem_addr) declare void @llvm.nvvm.tcgen05.shift.down.cg2(ptr addrspace(6) %tmem_addr) diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-st.ll b/llvm/test/CodeGen/NVPTX/tcgen05-st.ll index 0636a06bc9ea9..ccf6541d01973 100644 --- a/llvm/test/CodeGen/NVPTX/tcgen05-st.ll +++ b/llvm/test/CodeGen/NVPTX/tcgen05-st.ll @@ -2,9 +2,13 @@ ; RUN: llc < %s -o - -mcpu=sm_100a -march=nvptx64 -mattr=+ptx86 | FileCheck %s ; RUN: llc < %s -o - -mcpu=sm_101a -march=nvptx64 -mattr=+ptx86 | FileCheck %s ; RUN: llc < %s -o - -mcpu=sm_103a -march=nvptx64 -mattr=+ptx88 | FileCheck %s +; RUN: llc < %s -o - -mcpu=sm_100f -march=nvptx64 -mattr=+ptx88 | FileCheck %s +; RUN: llc < %s -o - -mcpu=sm_110f -march=nvptx64 -mattr=+ptx90 | FileCheck %s ; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | %ptxas-verify -arch=sm_100a %} ; RUN: %if ptxas-sm_101a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_101a -mattr=+ptx86 | %ptxas-verify -arch=sm_101a %} ; RUN: %if ptxas-sm_103a && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | %ptxas-verify -arch=sm_103a %} +; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | %ptxas-verify -arch=sm_100f %} +; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | %ptxas-verify -arch=sm_110f %} ; CHECK-LABEL: nvvm_tcgen05_st_16x64b define void @nvvm_tcgen05_st_16x64b(ptr addrspace(6) %taddr, i32 %stv1, <2 x i32> %stv2, <4 x i32> %stv4, <8 x i32> %stv8, <16 x i32> %stv16, <32 x i32> %stv32, <64 x i32> %stv64, <128 x i32> %stv128) {