From 6eea017741b5e2a88954e6c3ee18c8144c3e74a4 Mon Sep 17 00:00:00 2001 From: Akshay Deodhar Date: Thu, 5 Dec 2024 06:36:04 +0000 Subject: [PATCH 1/4] [NVPTX] Support for fence.acquire and fence.release --- llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 41 ++++ llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 37 +--- llvm/lib/Target/NVPTX/NVPTXSubtarget.h | 4 + llvm/test/CodeGen/NVPTX/fence-sm-90.ll | 30 --- llvm/test/CodeGen/NVPTX/fence-sm30.ll | 165 +++++++++++++++ llvm/test/CodeGen/NVPTX/fence-sm70.ll | 165 +++++++++++++++ llvm/test/CodeGen/NVPTX/fence-sm90.ll | 213 ++++++++++++++++++++ llvm/test/CodeGen/NVPTX/fence.ll | 102 ---------- llvm/test/CodeGen/NVPTX/fence.py | 38 ++++ llvm/test/CodeGen/NVPTX/lit.local.cfg | 1 + 10 files changed, 637 insertions(+), 159 deletions(-) delete mode 100644 llvm/test/CodeGen/NVPTX/fence-sm-90.ll create mode 100644 llvm/test/CodeGen/NVPTX/fence-sm30.ll create mode 100644 llvm/test/CodeGen/NVPTX/fence-sm70.ll create mode 100644 llvm/test/CodeGen/NVPTX/fence-sm90.ll delete mode 100644 llvm/test/CodeGen/NVPTX/fence.ll create mode 100644 llvm/test/CodeGen/NVPTX/fence.py diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index ac8ce05724750..ec654e0f3f200 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -648,9 +648,50 @@ static unsigned int getFenceOp(NVPTX::Ordering O, NVPTX::Scope S, if (S == NVPTX::Scope::Cluster) T->failIfClustersUnsupported(".cluster scope fence"); + // Fall back to .acq_rel if .acquire, .release is not supported. + if (!T->hasSplitAcquireAndReleaseFences() && + (O == NVPTX::Ordering::Acquire || O == NVPTX::Ordering::Release)) + O = NVPTX::Ordering::AcquireRelease; + switch (O) { case NVPTX::Ordering::Acquire: + switch (S) { + case NVPTX::Scope::System: + return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acquire_sys + : NVPTX::INT_MEMBAR_SYS; + case NVPTX::Scope::Block: + return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acquire_cta + : NVPTX::INT_MEMBAR_CTA; + case NVPTX::Scope::Cluster: + return NVPTX::atomic_thread_fence_acquire_cluster; + case NVPTX::Scope::Device: + return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acquire_gpu + : NVPTX::INT_MEMBAR_GL; + case NVPTX::Scope::Thread: + report_fatal_error( + formatv("Unsupported scope \"{}\" for acquire/release/acq_rel fence.", + ScopeToString(S))); + } + break; case NVPTX::Ordering::Release: + switch (S) { + case NVPTX::Scope::System: + return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_release_sys + : NVPTX::INT_MEMBAR_SYS; + case NVPTX::Scope::Block: + return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_release_cta + : NVPTX::INT_MEMBAR_CTA; + case NVPTX::Scope::Cluster: + return NVPTX::atomic_thread_fence_release_cluster; + case NVPTX::Scope::Device: + return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_release_gpu + : NVPTX::INT_MEMBAR_GL; + case NVPTX::Scope::Thread: + report_fatal_error( + formatv("Unsupported scope \"{}\" for acquire/release/acq_rel fence.", + ScopeToString(S))); + } + break; case NVPTX::Ordering::AcquireRelease: { switch (S) { case NVPTX::Scope::System: diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 633a99d0fc1be..74423d79e41e0 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -3866,33 +3866,16 @@ def : Pat < // PTX Fence instructions //////////////////////////////////////////////////////////////////////////////// -def atomic_thread_fence_seq_cst_sys : - NVPTXInst<(outs), (ins), "fence.sc.sys;", []>, - Requires<[hasPTX<60>, hasSM<70>]>; -def atomic_thread_fence_acq_rel_sys : - NVPTXInst<(outs), (ins), "fence.acq_rel.sys;", []>, - Requires<[hasPTX<60>, hasSM<70>]>; - -def atomic_thread_fence_seq_cst_gpu : - NVPTXInst<(outs), (ins), "fence.sc.gpu;", []>, - Requires<[hasPTX<60>, hasSM<70>]>; -def atomic_thread_fence_acq_rel_gpu : - NVPTXInst<(outs), (ins), "fence.acq_rel.gpu;", []>, - Requires<[hasPTX<60>, hasSM<70>]>; - -def atomic_thread_fence_seq_cst_cluster : - NVPTXInst<(outs), (ins), "fence.sc.cluster;", []>, - Requires<[hasPTX<78>, hasSM<90>]>; -def atomic_thread_fence_acq_rel_cluster : - NVPTXInst<(outs), (ins), "fence.acq_rel.cluster;", []>, - Requires<[hasPTX<78>, hasSM<90>]>; - -def atomic_thread_fence_seq_cst_cta : - NVPTXInst<(outs), (ins), "fence.sc.cta;", []>, - Requires<[hasPTX<60>, hasSM<70>]>; -def atomic_thread_fence_acq_rel_cta : - NVPTXInst<(outs), (ins), "fence.acq_rel.cta;", []>, - Requires<[hasPTX<60>, hasSM<70>]>; +class NVPTXFenceInst: + NVPTXInst<(outs), (ins), "fence."#sem#"."#scope#";", []>, + Requires<[ptx, hasSM<70>]>; + +foreach scope = ["sys", "gpu", "cluster", "cta"] in { + def atomic_thread_fence_seq_cst_#scope: NVPTXFenceInst>; + def atomic_thread_fence_acq_rel_#scope: NVPTXFenceInst>; + def atomic_thread_fence_acquire_#scope: NVPTXFenceInst>; + def atomic_thread_fence_release_#scope: NVPTXFenceInst>; +} def fpimm_any_zero : FPImmLeaf= 70 && PTXVersion >= 60; } + // Does SM & PTX support .acquire and .release qualifiers for fence? + bool hasSplitAcquireAndReleaseFences() const { + return SmVersion >= 90 && PTXVersion >= 86; + } // Does SM & PTX support atomic relaxed MMIO operations ? bool hasRelaxedMMIO() const { return SmVersion >= 70 && PTXVersion >= 82; } bool hasDotInstructions() const { diff --git a/llvm/test/CodeGen/NVPTX/fence-sm-90.ll b/llvm/test/CodeGen/NVPTX/fence-sm-90.ll deleted file mode 100644 index dce39bf3e1e3e..0000000000000 --- a/llvm/test/CodeGen/NVPTX/fence-sm-90.ll +++ /dev/null @@ -1,30 +0,0 @@ -; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 | FileCheck %s -; RUN: %if ptxas-12.2 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 | %ptxas-verify -arch=sm_90 %} - -; CHECK-LABEL: fence_sc_cluster -define void @fence_sc_cluster() local_unnamed_addr { - ; CHECK: fence.sc.cluster - fence syncscope("cluster") seq_cst - ret void -} - -; CHECK-LABEL: fence_acq_rel_cluster -define void @fence_acq_rel_cluster() local_unnamed_addr { - ; CHECK: fence.acq_rel.cluster - fence syncscope("cluster") acq_rel - ret void -} - -; CHECK-LABEL: fence_release_cluster -define void @fence_release_cluster() local_unnamed_addr { - ; CHECK: fence.acq_rel.cluster - fence syncscope("cluster") release - ret void -} - -; CHECK-LABEL: fence_acquire_cluster -define void @fence_acquire_cluster() local_unnamed_addr { - ; CHECK: fence.acq_rel.cluster - fence syncscope("cluster") acquire - ret void -} diff --git a/llvm/test/CodeGen/NVPTX/fence-sm30.ll b/llvm/test/CodeGen/NVPTX/fence-sm30.ll new file mode 100644 index 0000000000000..16365db21d5b9 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/fence-sm30.ll @@ -0,0 +1,165 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 + +; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx50 | FileCheck %s --check-prefix=SM30 +; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_30 -mattr=+ptx50 | %ptxas-verfy %} + + +define void @fence_acquire_() { +; SM30-LABEL: fence_acquire_( +; SM30: { +; SM30-EMPTY: +; SM30-EMPTY: +; SM30-NEXT: // %bb.0: +; SM30-NEXT: membar.sys; +; SM30-NEXT: ret; + fence syncscope("") acquire + ret void +} + + +define void @fence_acquire_block() { +; SM30-LABEL: fence_acquire_block( +; SM30: { +; SM30-EMPTY: +; SM30-EMPTY: +; SM30-NEXT: // %bb.0: +; SM30-NEXT: membar.cta; +; SM30-NEXT: ret; + fence syncscope("block") acquire + ret void +} + +; .cluster scope unsupported on SM = 30 PTX = 50 + +define void @fence_acquire_device() { +; SM30-LABEL: fence_acquire_device( +; SM30: { +; SM30-EMPTY: +; SM30-EMPTY: +; SM30-NEXT: // %bb.0: +; SM30-NEXT: membar.gl; +; SM30-NEXT: ret; + fence syncscope("device") acquire + ret void +} + + +define void @fence_release_() { +; SM30-LABEL: fence_release_( +; SM30: { +; SM30-EMPTY: +; SM30-EMPTY: +; SM30-NEXT: // %bb.0: +; SM30-NEXT: membar.sys; +; SM30-NEXT: ret; + fence syncscope("") release + ret void +} + + +define void @fence_release_block() { +; SM30-LABEL: fence_release_block( +; SM30: { +; SM30-EMPTY: +; SM30-EMPTY: +; SM30-NEXT: // %bb.0: +; SM30-NEXT: membar.cta; +; SM30-NEXT: ret; + fence syncscope("block") release + ret void +} + +; .cluster scope unsupported on SM = 30 PTX = 50 + +define void @fence_release_device() { +; SM30-LABEL: fence_release_device( +; SM30: { +; SM30-EMPTY: +; SM30-EMPTY: +; SM30-NEXT: // %bb.0: +; SM30-NEXT: membar.gl; +; SM30-NEXT: ret; + fence syncscope("device") release + ret void +} + + +define void @fence_acq_rel_() { +; SM30-LABEL: fence_acq_rel_( +; SM30: { +; SM30-EMPTY: +; SM30-EMPTY: +; SM30-NEXT: // %bb.0: +; SM30-NEXT: membar.sys; +; SM30-NEXT: ret; + fence syncscope("") acq_rel + ret void +} + + +define void @fence_acq_rel_block() { +; SM30-LABEL: fence_acq_rel_block( +; SM30: { +; SM30-EMPTY: +; SM30-EMPTY: +; SM30-NEXT: // %bb.0: +; SM30-NEXT: membar.cta; +; SM30-NEXT: ret; + fence syncscope("block") acq_rel + ret void +} + +; .cluster scope unsupported on SM = 30 PTX = 50 + +define void @fence_acq_rel_device() { +; SM30-LABEL: fence_acq_rel_device( +; SM30: { +; SM30-EMPTY: +; SM30-EMPTY: +; SM30-NEXT: // %bb.0: +; SM30-NEXT: membar.gl; +; SM30-NEXT: ret; + fence syncscope("device") acq_rel + ret void +} + + +define void @fence_seq_cst_() { +; SM30-LABEL: fence_seq_cst_( +; SM30: { +; SM30-EMPTY: +; SM30-EMPTY: +; SM30-NEXT: // %bb.0: +; SM30-NEXT: membar.sys; +; SM30-NEXT: ret; + fence syncscope("") seq_cst + ret void +} + + +define void @fence_seq_cst_block() { +; SM30-LABEL: fence_seq_cst_block( +; SM30: { +; SM30-EMPTY: +; SM30-EMPTY: +; SM30-NEXT: // %bb.0: +; SM30-NEXT: membar.cta; +; SM30-NEXT: ret; + fence syncscope("block") seq_cst + ret void +} + +; .cluster scope unsupported on SM = 30 PTX = 50 + +define void @fence_seq_cst_device() { +; SM30-LABEL: fence_seq_cst_device( +; SM30: { +; SM30-EMPTY: +; SM30-EMPTY: +; SM30-NEXT: // %bb.0: +; SM30-NEXT: membar.gl; +; SM30-NEXT: ret; + fence syncscope("device") seq_cst + ret void +} + diff --git a/llvm/test/CodeGen/NVPTX/fence-sm70.ll b/llvm/test/CodeGen/NVPTX/fence-sm70.ll new file mode 100644 index 0000000000000..085529571e044 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/fence-sm70.ll @@ -0,0 +1,165 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 + +; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | FileCheck %s --check-prefix=SM70 +; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_70 -mattr=+ptx60 | %ptxas-verfy %} + + +define void @fence_acquire_() { +; SM70-LABEL: fence_acquire_( +; SM70: { +; SM70-EMPTY: +; SM70-EMPTY: +; SM70-NEXT: // %bb.0: +; SM70-NEXT: fence.acq_rel.sys; +; SM70-NEXT: ret; + fence syncscope("") acquire + ret void +} + + +define void @fence_acquire_block() { +; SM70-LABEL: fence_acquire_block( +; SM70: { +; SM70-EMPTY: +; SM70-EMPTY: +; SM70-NEXT: // %bb.0: +; SM70-NEXT: fence.acq_rel.cta; +; SM70-NEXT: ret; + fence syncscope("block") acquire + ret void +} + +; .cluster scope unsupported on SM = 70 PTX = 60 + +define void @fence_acquire_device() { +; SM70-LABEL: fence_acquire_device( +; SM70: { +; SM70-EMPTY: +; SM70-EMPTY: +; SM70-NEXT: // %bb.0: +; SM70-NEXT: fence.acq_rel.gpu; +; SM70-NEXT: ret; + fence syncscope("device") acquire + ret void +} + + +define void @fence_release_() { +; SM70-LABEL: fence_release_( +; SM70: { +; SM70-EMPTY: +; SM70-EMPTY: +; SM70-NEXT: // %bb.0: +; SM70-NEXT: fence.acq_rel.sys; +; SM70-NEXT: ret; + fence syncscope("") release + ret void +} + + +define void @fence_release_block() { +; SM70-LABEL: fence_release_block( +; SM70: { +; SM70-EMPTY: +; SM70-EMPTY: +; SM70-NEXT: // %bb.0: +; SM70-NEXT: fence.acq_rel.cta; +; SM70-NEXT: ret; + fence syncscope("block") release + ret void +} + +; .cluster scope unsupported on SM = 70 PTX = 60 + +define void @fence_release_device() { +; SM70-LABEL: fence_release_device( +; SM70: { +; SM70-EMPTY: +; SM70-EMPTY: +; SM70-NEXT: // %bb.0: +; SM70-NEXT: fence.acq_rel.gpu; +; SM70-NEXT: ret; + fence syncscope("device") release + ret void +} + + +define void @fence_acq_rel_() { +; SM70-LABEL: fence_acq_rel_( +; SM70: { +; SM70-EMPTY: +; SM70-EMPTY: +; SM70-NEXT: // %bb.0: +; SM70-NEXT: fence.acq_rel.sys; +; SM70-NEXT: ret; + fence syncscope("") acq_rel + ret void +} + + +define void @fence_acq_rel_block() { +; SM70-LABEL: fence_acq_rel_block( +; SM70: { +; SM70-EMPTY: +; SM70-EMPTY: +; SM70-NEXT: // %bb.0: +; SM70-NEXT: fence.acq_rel.cta; +; SM70-NEXT: ret; + fence syncscope("block") acq_rel + ret void +} + +; .cluster scope unsupported on SM = 70 PTX = 60 + +define void @fence_acq_rel_device() { +; SM70-LABEL: fence_acq_rel_device( +; SM70: { +; SM70-EMPTY: +; SM70-EMPTY: +; SM70-NEXT: // %bb.0: +; SM70-NEXT: fence.acq_rel.gpu; +; SM70-NEXT: ret; + fence syncscope("device") acq_rel + ret void +} + + +define void @fence_seq_cst_() { +; SM70-LABEL: fence_seq_cst_( +; SM70: { +; SM70-EMPTY: +; SM70-EMPTY: +; SM70-NEXT: // %bb.0: +; SM70-NEXT: fence.sc.sys; +; SM70-NEXT: ret; + fence syncscope("") seq_cst + ret void +} + + +define void @fence_seq_cst_block() { +; SM70-LABEL: fence_seq_cst_block( +; SM70: { +; SM70-EMPTY: +; SM70-EMPTY: +; SM70-NEXT: // %bb.0: +; SM70-NEXT: fence.sc.cta; +; SM70-NEXT: ret; + fence syncscope("block") seq_cst + ret void +} + +; .cluster scope unsupported on SM = 70 PTX = 60 + +define void @fence_seq_cst_device() { +; SM70-LABEL: fence_seq_cst_device( +; SM70: { +; SM70-EMPTY: +; SM70-EMPTY: +; SM70-NEXT: // %bb.0: +; SM70-NEXT: fence.sc.gpu; +; SM70-NEXT: ret; + fence syncscope("device") seq_cst + ret void +} + diff --git a/llvm/test/CodeGen/NVPTX/fence-sm90.ll b/llvm/test/CodeGen/NVPTX/fence-sm90.ll new file mode 100644 index 0000000000000..6c1959d34df4e --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/fence-sm90.ll @@ -0,0 +1,213 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 + +; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx87 | FileCheck %s --check-prefix=SM90 +; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_90 -mattr=+ptx87 | %ptxas-verfy %} + + +define void @fence_acquire_() { +; SM90-LABEL: fence_acquire_( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.acquire.sys; +; SM90-NEXT: ret; + fence syncscope("") acquire + ret void +} + + +define void @fence_acquire_block() { +; SM90-LABEL: fence_acquire_block( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.acquire.cta; +; SM90-NEXT: ret; + fence syncscope("block") acquire + ret void +} + + +define void @fence_acquire_cluster() { +; SM90-LABEL: fence_acquire_cluster( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.acquire.cluster; +; SM90-NEXT: ret; + fence syncscope("cluster") acquire + ret void +} + + +define void @fence_acquire_device() { +; SM90-LABEL: fence_acquire_device( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.acquire.gpu; +; SM90-NEXT: ret; + fence syncscope("device") acquire + ret void +} + + +define void @fence_release_() { +; SM90-LABEL: fence_release_( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.release.sys; +; SM90-NEXT: ret; + fence syncscope("") release + ret void +} + + +define void @fence_release_block() { +; SM90-LABEL: fence_release_block( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.release.cta; +; SM90-NEXT: ret; + fence syncscope("block") release + ret void +} + + +define void @fence_release_cluster() { +; SM90-LABEL: fence_release_cluster( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.release.cluster; +; SM90-NEXT: ret; + fence syncscope("cluster") release + ret void +} + + +define void @fence_release_device() { +; SM90-LABEL: fence_release_device( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.release.gpu; +; SM90-NEXT: ret; + fence syncscope("device") release + ret void +} + + +define void @fence_acq_rel_() { +; SM90-LABEL: fence_acq_rel_( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.acq_rel.sys; +; SM90-NEXT: ret; + fence syncscope("") acq_rel + ret void +} + + +define void @fence_acq_rel_block() { +; SM90-LABEL: fence_acq_rel_block( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.acq_rel.cta; +; SM90-NEXT: ret; + fence syncscope("block") acq_rel + ret void +} + + +define void @fence_acq_rel_cluster() { +; SM90-LABEL: fence_acq_rel_cluster( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.acq_rel.cluster; +; SM90-NEXT: ret; + fence syncscope("cluster") acq_rel + ret void +} + + +define void @fence_acq_rel_device() { +; SM90-LABEL: fence_acq_rel_device( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.acq_rel.gpu; +; SM90-NEXT: ret; + fence syncscope("device") acq_rel + ret void +} + + +define void @fence_seq_cst_() { +; SM90-LABEL: fence_seq_cst_( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.sc.sys; +; SM90-NEXT: ret; + fence syncscope("") seq_cst + ret void +} + + +define void @fence_seq_cst_block() { +; SM90-LABEL: fence_seq_cst_block( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.sc.cta; +; SM90-NEXT: ret; + fence syncscope("block") seq_cst + ret void +} + + +define void @fence_seq_cst_cluster() { +; SM90-LABEL: fence_seq_cst_cluster( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.sc.cluster; +; SM90-NEXT: ret; + fence syncscope("cluster") seq_cst + ret void +} + + +define void @fence_seq_cst_device() { +; SM90-LABEL: fence_seq_cst_device( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.sc.gpu; +; SM90-NEXT: ret; + fence syncscope("device") seq_cst + ret void +} + diff --git a/llvm/test/CodeGen/NVPTX/fence.ll b/llvm/test/CodeGen/NVPTX/fence.ll deleted file mode 100644 index e094ddf5775a6..0000000000000 --- a/llvm/test/CodeGen/NVPTX/fence.ll +++ /dev/null @@ -1,102 +0,0 @@ -; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=SM60 -; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %} -; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | FileCheck %s --check-prefix=SM70 -; RUN: %if ptxas-12.2 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | %ptxas-verify -arch=sm_70 %} - -; TODO: implement and test thread scope. - -; CHECK-LABEL: fence_sc_sys -define void @fence_sc_sys() local_unnamed_addr { - ; SM60: membar.sys - ; SM70: fence.sc.sys - fence seq_cst - ret void -} - -; CHECK-LABEL: fence_acq_rel_sys -define void @fence_acq_rel_sys() local_unnamed_addr { - ; SM60: membar.sys - ; SM70: fence.acq_rel.sys - fence acq_rel - ret void -} - -; CHECK-LABEL: fence_release_sys -define void @fence_release_sys() local_unnamed_addr { - ; SM60: membar.sys - ; SM70: fence.acq_rel.sys - fence release - ret void -} - -; CHECK-LABEL: fence_acquire_sys -define void @fence_acquire_sys() local_unnamed_addr { - ; SM60: membar.sys - ; SM70: fence.acq_rel.sys - fence acquire - ret void -} - -; CHECK-LABEL: fence_sc_gpu -define void @fence_sc_gpu() local_unnamed_addr { - ; SM60: membar.gl - ; SM70: fence.sc.gpu - fence syncscope("device") seq_cst - ret void -} - -; CHECK-LABEL: fence_acq_rel_gpu -define void @fence_acq_rel_gpu() local_unnamed_addr { - ; SM60: membar.gl - ; SM70: fence.acq_rel.gpu - fence syncscope("device") acq_rel - ret void -} - -; CHECK-LABEL: fence_release_gpu -define void @fence_release_gpu() local_unnamed_addr { - ; SM60: membar.gl - ; SM70: fence.acq_rel.gpu - fence syncscope("device") release - ret void -} - -; CHECK-LABEL: fence_acquire_gpu -define void @fence_acquire_gpu() local_unnamed_addr { - ; SM60: membar.gl - ; SM70: fence.acq_rel.gpu - fence syncscope("device") acquire - ret void -} - -; CHECK-LABEL: fence_sc_cta -define void @fence_sc_cta() local_unnamed_addr { - ; SM60: membar.cta - ; SM70: fence.sc.cta - fence syncscope("block") seq_cst - ret void -} - -; CHECK-LABEL: fence_acq_rel_cta -define void @fence_acq_rel_cta() local_unnamed_addr { - ; SM60: membar.cta - ; SM70: fence.acq_rel.cta - fence syncscope("block") acq_rel - ret void -} - -; CHECK-LABEL: fence_release_cta -define void @fence_release_cta() local_unnamed_addr { - ; SM60: membar.cta - ; SM70: fence.acq_rel.cta - fence syncscope("block") release - ret void -} - -; CHECK-LABEL: fence_acquire_cta -define void @fence_acquire_cta() local_unnamed_addr { - ; SM60: membar.cta - ; SM70: fence.acq_rel.cta - fence syncscope("block") acquire - ret void -} \ No newline at end of file diff --git a/llvm/test/CodeGen/NVPTX/fence.py b/llvm/test/CodeGen/NVPTX/fence.py new file mode 100644 index 0000000000000..529ef78db8089 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/fence.py @@ -0,0 +1,38 @@ +# For manual usage, not as a part of lit tests. Used for generating the following tests: +# fence-sm30.ll, fence-sm70.ll, fence-sm90.ll + +from string import Template +from itertools import product + +fence_func = Template( +""" +define void @fence_${ordering}_${scope}() { + fence syncscope(\"${scope}\") ${ordering} + ret void +} +""" +) + +run_statement = Template( +""" +; ${run}: llc < %s -march=nvptx64 -mcpu=sm_${sm} -mattr=+ptx${ptx} | FileCheck %s --check-prefix=SM${sm} +; ${run}: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_${sm} -mattr=+ptx${ptx} | %ptxas-verfy %} +""" +) + +# (sm, ptx) +TESTS = [(30, 50), (70, 60), (90, 87)] + +SCOPES = ["", "block", "cluster", "device"] + +ORDERINGS = ["acquire", "release", "acq_rel", "seq_cst"] + +if __name__ == "__main__": + for sm, ptx in TESTS: + with open ("fence-sm{}.ll".format(sm), "w") as fp: + print(run_statement.substitute(run = "RUN", sm = sm, ptx = ptx), file = fp) + for ordering, scope in product(ORDERINGS, SCOPES): + if scope == "cluster" and (sm < 90 or ptx < 78): + print("; .cluster scope unsupported on SM = {} PTX = {}".format(sm, ptx), file = fp) + else: + print(fence_func.substitute(scope = scope, ordering = ordering), file = fp) diff --git a/llvm/test/CodeGen/NVPTX/lit.local.cfg b/llvm/test/CodeGen/NVPTX/lit.local.cfg index e3f06d1a720e3..54a6c338bdf85 100644 --- a/llvm/test/CodeGen/NVPTX/lit.local.cfg +++ b/llvm/test/CodeGen/NVPTX/lit.local.cfg @@ -1,3 +1,4 @@ if not "NVPTX" in config.root.targets: config.unsupported = True config.suffixes.add(".py") +config.excludes = ["fence.py"] From dea04ca0e85354e9d88f3eea5a0ad0367d3ac347 Mon Sep 17 00:00:00 2001 From: Akshay Deodhar Date: Wed, 29 Jan 2025 00:58:42 +0000 Subject: [PATCH 2/4] clang-format darker --- llvm/test/CodeGen/NVPTX/fence.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/llvm/test/CodeGen/NVPTX/fence.py b/llvm/test/CodeGen/NVPTX/fence.py index 529ef78db8089..9714d6f4afe44 100644 --- a/llvm/test/CodeGen/NVPTX/fence.py +++ b/llvm/test/CodeGen/NVPTX/fence.py @@ -5,7 +5,7 @@ from itertools import product fence_func = Template( -""" + """ define void @fence_${ordering}_${scope}() { fence syncscope(\"${scope}\") ${ordering} ret void @@ -14,7 +14,7 @@ ) run_statement = Template( -""" + """ ; ${run}: llc < %s -march=nvptx64 -mcpu=sm_${sm} -mattr=+ptx${ptx} | FileCheck %s --check-prefix=SM${sm} ; ${run}: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_${sm} -mattr=+ptx${ptx} | %ptxas-verfy %} """ @@ -29,10 +29,17 @@ if __name__ == "__main__": for sm, ptx in TESTS: - with open ("fence-sm{}.ll".format(sm), "w") as fp: - print(run_statement.substitute(run = "RUN", sm = sm, ptx = ptx), file = fp) + with open("fence-sm{}.ll".format(sm), "w") as fp: + print(run_statement.substitute(run="RUN", sm=sm, ptx=ptx), file=fp) for ordering, scope in product(ORDERINGS, SCOPES): if scope == "cluster" and (sm < 90 or ptx < 78): - print("; .cluster scope unsupported on SM = {} PTX = {}".format(sm, ptx), file = fp) + print( + "; .cluster scope unsupported on SM = {} PTX = {}".format( + sm, ptx + ), + file=fp, + ) else: - print(fence_func.substitute(scope = scope, ordering = ordering), file = fp) + print( + fence_func.substitute(scope=scope, ordering=ordering), file=fp + ) From b09406e844d04e1ef7f903a67fca6cafa0f20798 Mon Sep 17 00:00:00 2001 From: Akshay Deodhar Date: Wed, 29 Jan 2025 20:46:31 +0000 Subject: [PATCH 3/4] test naming nits --- llvm/test/CodeGen/NVPTX/fence-sm30.ll | 50 +++++++++++++-------------- llvm/test/CodeGen/NVPTX/fence-sm70.ll | 50 +++++++++++++-------------- llvm/test/CodeGen/NVPTX/fence-sm90.ll | 50 +++++++++++++-------------- llvm/test/CodeGen/NVPTX/fence.py | 24 ++++++++----- 4 files changed, 91 insertions(+), 83 deletions(-) diff --git a/llvm/test/CodeGen/NVPTX/fence-sm30.ll b/llvm/test/CodeGen/NVPTX/fence-sm30.ll index 16365db21d5b9..79b43a5ae98e9 100644 --- a/llvm/test/CodeGen/NVPTX/fence-sm30.ll +++ b/llvm/test/CodeGen/NVPTX/fence-sm30.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx50 | FileCheck %s --check-prefix=SM30 -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_30 -mattr=+ptx50 | %ptxas-verfy %} +; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_30 -mattr=+ptx50 | %ptxas-verify %} -define void @fence_acquire_() { -; SM30-LABEL: fence_acquire_( +define void @fence_acquire_sys() { +; SM30-LABEL: fence_acquire_sys( ; SM30: { ; SM30-EMPTY: ; SM30-EMPTY: @@ -17,8 +17,8 @@ define void @fence_acquire_() { } -define void @fence_acquire_block() { -; SM30-LABEL: fence_acquire_block( +define void @fence_acquire_cta() { +; SM30-LABEL: fence_acquire_cta( ; SM30: { ; SM30-EMPTY: ; SM30-EMPTY: @@ -31,8 +31,8 @@ define void @fence_acquire_block() { ; .cluster scope unsupported on SM = 30 PTX = 50 -define void @fence_acquire_device() { -; SM30-LABEL: fence_acquire_device( +define void @fence_acquire_gpu() { +; SM30-LABEL: fence_acquire_gpu( ; SM30: { ; SM30-EMPTY: ; SM30-EMPTY: @@ -44,8 +44,8 @@ define void @fence_acquire_device() { } -define void @fence_release_() { -; SM30-LABEL: fence_release_( +define void @fence_release_sys() { +; SM30-LABEL: fence_release_sys( ; SM30: { ; SM30-EMPTY: ; SM30-EMPTY: @@ -57,8 +57,8 @@ define void @fence_release_() { } -define void @fence_release_block() { -; SM30-LABEL: fence_release_block( +define void @fence_release_cta() { +; SM30-LABEL: fence_release_cta( ; SM30: { ; SM30-EMPTY: ; SM30-EMPTY: @@ -71,8 +71,8 @@ define void @fence_release_block() { ; .cluster scope unsupported on SM = 30 PTX = 50 -define void @fence_release_device() { -; SM30-LABEL: fence_release_device( +define void @fence_release_gpu() { +; SM30-LABEL: fence_release_gpu( ; SM30: { ; SM30-EMPTY: ; SM30-EMPTY: @@ -84,8 +84,8 @@ define void @fence_release_device() { } -define void @fence_acq_rel_() { -; SM30-LABEL: fence_acq_rel_( +define void @fence_acq_rel_sys() { +; SM30-LABEL: fence_acq_rel_sys( ; SM30: { ; SM30-EMPTY: ; SM30-EMPTY: @@ -97,8 +97,8 @@ define void @fence_acq_rel_() { } -define void @fence_acq_rel_block() { -; SM30-LABEL: fence_acq_rel_block( +define void @fence_acq_rel_cta() { +; SM30-LABEL: fence_acq_rel_cta( ; SM30: { ; SM30-EMPTY: ; SM30-EMPTY: @@ -111,8 +111,8 @@ define void @fence_acq_rel_block() { ; .cluster scope unsupported on SM = 30 PTX = 50 -define void @fence_acq_rel_device() { -; SM30-LABEL: fence_acq_rel_device( +define void @fence_acq_rel_gpu() { +; SM30-LABEL: fence_acq_rel_gpu( ; SM30: { ; SM30-EMPTY: ; SM30-EMPTY: @@ -124,8 +124,8 @@ define void @fence_acq_rel_device() { } -define void @fence_seq_cst_() { -; SM30-LABEL: fence_seq_cst_( +define void @fence_seq_cst_sys() { +; SM30-LABEL: fence_seq_cst_sys( ; SM30: { ; SM30-EMPTY: ; SM30-EMPTY: @@ -137,8 +137,8 @@ define void @fence_seq_cst_() { } -define void @fence_seq_cst_block() { -; SM30-LABEL: fence_seq_cst_block( +define void @fence_seq_cst_cta() { +; SM30-LABEL: fence_seq_cst_cta( ; SM30: { ; SM30-EMPTY: ; SM30-EMPTY: @@ -151,8 +151,8 @@ define void @fence_seq_cst_block() { ; .cluster scope unsupported on SM = 30 PTX = 50 -define void @fence_seq_cst_device() { -; SM30-LABEL: fence_seq_cst_device( +define void @fence_seq_cst_gpu() { +; SM30-LABEL: fence_seq_cst_gpu( ; SM30: { ; SM30-EMPTY: ; SM30-EMPTY: diff --git a/llvm/test/CodeGen/NVPTX/fence-sm70.ll b/llvm/test/CodeGen/NVPTX/fence-sm70.ll index 085529571e044..105fc08a1212f 100644 --- a/llvm/test/CodeGen/NVPTX/fence-sm70.ll +++ b/llvm/test/CodeGen/NVPTX/fence-sm70.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | FileCheck %s --check-prefix=SM70 -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_70 -mattr=+ptx60 | %ptxas-verfy %} +; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_70 -mattr=+ptx60 | %ptxas-verify %} -define void @fence_acquire_() { -; SM70-LABEL: fence_acquire_( +define void @fence_acquire_sys() { +; SM70-LABEL: fence_acquire_sys( ; SM70: { ; SM70-EMPTY: ; SM70-EMPTY: @@ -17,8 +17,8 @@ define void @fence_acquire_() { } -define void @fence_acquire_block() { -; SM70-LABEL: fence_acquire_block( +define void @fence_acquire_cta() { +; SM70-LABEL: fence_acquire_cta( ; SM70: { ; SM70-EMPTY: ; SM70-EMPTY: @@ -31,8 +31,8 @@ define void @fence_acquire_block() { ; .cluster scope unsupported on SM = 70 PTX = 60 -define void @fence_acquire_device() { -; SM70-LABEL: fence_acquire_device( +define void @fence_acquire_gpu() { +; SM70-LABEL: fence_acquire_gpu( ; SM70: { ; SM70-EMPTY: ; SM70-EMPTY: @@ -44,8 +44,8 @@ define void @fence_acquire_device() { } -define void @fence_release_() { -; SM70-LABEL: fence_release_( +define void @fence_release_sys() { +; SM70-LABEL: fence_release_sys( ; SM70: { ; SM70-EMPTY: ; SM70-EMPTY: @@ -57,8 +57,8 @@ define void @fence_release_() { } -define void @fence_release_block() { -; SM70-LABEL: fence_release_block( +define void @fence_release_cta() { +; SM70-LABEL: fence_release_cta( ; SM70: { ; SM70-EMPTY: ; SM70-EMPTY: @@ -71,8 +71,8 @@ define void @fence_release_block() { ; .cluster scope unsupported on SM = 70 PTX = 60 -define void @fence_release_device() { -; SM70-LABEL: fence_release_device( +define void @fence_release_gpu() { +; SM70-LABEL: fence_release_gpu( ; SM70: { ; SM70-EMPTY: ; SM70-EMPTY: @@ -84,8 +84,8 @@ define void @fence_release_device() { } -define void @fence_acq_rel_() { -; SM70-LABEL: fence_acq_rel_( +define void @fence_acq_rel_sys() { +; SM70-LABEL: fence_acq_rel_sys( ; SM70: { ; SM70-EMPTY: ; SM70-EMPTY: @@ -97,8 +97,8 @@ define void @fence_acq_rel_() { } -define void @fence_acq_rel_block() { -; SM70-LABEL: fence_acq_rel_block( +define void @fence_acq_rel_cta() { +; SM70-LABEL: fence_acq_rel_cta( ; SM70: { ; SM70-EMPTY: ; SM70-EMPTY: @@ -111,8 +111,8 @@ define void @fence_acq_rel_block() { ; .cluster scope unsupported on SM = 70 PTX = 60 -define void @fence_acq_rel_device() { -; SM70-LABEL: fence_acq_rel_device( +define void @fence_acq_rel_gpu() { +; SM70-LABEL: fence_acq_rel_gpu( ; SM70: { ; SM70-EMPTY: ; SM70-EMPTY: @@ -124,8 +124,8 @@ define void @fence_acq_rel_device() { } -define void @fence_seq_cst_() { -; SM70-LABEL: fence_seq_cst_( +define void @fence_seq_cst_sys() { +; SM70-LABEL: fence_seq_cst_sys( ; SM70: { ; SM70-EMPTY: ; SM70-EMPTY: @@ -137,8 +137,8 @@ define void @fence_seq_cst_() { } -define void @fence_seq_cst_block() { -; SM70-LABEL: fence_seq_cst_block( +define void @fence_seq_cst_cta() { +; SM70-LABEL: fence_seq_cst_cta( ; SM70: { ; SM70-EMPTY: ; SM70-EMPTY: @@ -151,8 +151,8 @@ define void @fence_seq_cst_block() { ; .cluster scope unsupported on SM = 70 PTX = 60 -define void @fence_seq_cst_device() { -; SM70-LABEL: fence_seq_cst_device( +define void @fence_seq_cst_gpu() { +; SM70-LABEL: fence_seq_cst_gpu( ; SM70: { ; SM70-EMPTY: ; SM70-EMPTY: diff --git a/llvm/test/CodeGen/NVPTX/fence-sm90.ll b/llvm/test/CodeGen/NVPTX/fence-sm90.ll index 6c1959d34df4e..75684b9d30b18 100644 --- a/llvm/test/CodeGen/NVPTX/fence-sm90.ll +++ b/llvm/test/CodeGen/NVPTX/fence-sm90.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx87 | FileCheck %s --check-prefix=SM90 -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_90 -mattr=+ptx87 | %ptxas-verfy %} +; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_90 -mattr=+ptx87 | %ptxas-verify %} -define void @fence_acquire_() { -; SM90-LABEL: fence_acquire_( +define void @fence_acquire_sys() { +; SM90-LABEL: fence_acquire_sys( ; SM90: { ; SM90-EMPTY: ; SM90-EMPTY: @@ -17,8 +17,8 @@ define void @fence_acquire_() { } -define void @fence_acquire_block() { -; SM90-LABEL: fence_acquire_block( +define void @fence_acquire_cta() { +; SM90-LABEL: fence_acquire_cta( ; SM90: { ; SM90-EMPTY: ; SM90-EMPTY: @@ -43,8 +43,8 @@ define void @fence_acquire_cluster() { } -define void @fence_acquire_device() { -; SM90-LABEL: fence_acquire_device( +define void @fence_acquire_gpu() { +; SM90-LABEL: fence_acquire_gpu( ; SM90: { ; SM90-EMPTY: ; SM90-EMPTY: @@ -56,8 +56,8 @@ define void @fence_acquire_device() { } -define void @fence_release_() { -; SM90-LABEL: fence_release_( +define void @fence_release_sys() { +; SM90-LABEL: fence_release_sys( ; SM90: { ; SM90-EMPTY: ; SM90-EMPTY: @@ -69,8 +69,8 @@ define void @fence_release_() { } -define void @fence_release_block() { -; SM90-LABEL: fence_release_block( +define void @fence_release_cta() { +; SM90-LABEL: fence_release_cta( ; SM90: { ; SM90-EMPTY: ; SM90-EMPTY: @@ -95,8 +95,8 @@ define void @fence_release_cluster() { } -define void @fence_release_device() { -; SM90-LABEL: fence_release_device( +define void @fence_release_gpu() { +; SM90-LABEL: fence_release_gpu( ; SM90: { ; SM90-EMPTY: ; SM90-EMPTY: @@ -108,8 +108,8 @@ define void @fence_release_device() { } -define void @fence_acq_rel_() { -; SM90-LABEL: fence_acq_rel_( +define void @fence_acq_rel_sys() { +; SM90-LABEL: fence_acq_rel_sys( ; SM90: { ; SM90-EMPTY: ; SM90-EMPTY: @@ -121,8 +121,8 @@ define void @fence_acq_rel_() { } -define void @fence_acq_rel_block() { -; SM90-LABEL: fence_acq_rel_block( +define void @fence_acq_rel_cta() { +; SM90-LABEL: fence_acq_rel_cta( ; SM90: { ; SM90-EMPTY: ; SM90-EMPTY: @@ -147,8 +147,8 @@ define void @fence_acq_rel_cluster() { } -define void @fence_acq_rel_device() { -; SM90-LABEL: fence_acq_rel_device( +define void @fence_acq_rel_gpu() { +; SM90-LABEL: fence_acq_rel_gpu( ; SM90: { ; SM90-EMPTY: ; SM90-EMPTY: @@ -160,8 +160,8 @@ define void @fence_acq_rel_device() { } -define void @fence_seq_cst_() { -; SM90-LABEL: fence_seq_cst_( +define void @fence_seq_cst_sys() { +; SM90-LABEL: fence_seq_cst_sys( ; SM90: { ; SM90-EMPTY: ; SM90-EMPTY: @@ -173,8 +173,8 @@ define void @fence_seq_cst_() { } -define void @fence_seq_cst_block() { -; SM90-LABEL: fence_seq_cst_block( +define void @fence_seq_cst_cta() { +; SM90-LABEL: fence_seq_cst_cta( ; SM90: { ; SM90-EMPTY: ; SM90-EMPTY: @@ -199,8 +199,8 @@ define void @fence_seq_cst_cluster() { } -define void @fence_seq_cst_device() { -; SM90-LABEL: fence_seq_cst_device( +define void @fence_seq_cst_gpu() { +; SM90-LABEL: fence_seq_cst_gpu( ; SM90: { ; SM90-EMPTY: ; SM90-EMPTY: diff --git a/llvm/test/CodeGen/NVPTX/fence.py b/llvm/test/CodeGen/NVPTX/fence.py index 9714d6f4afe44..77f91f868ad76 100644 --- a/llvm/test/CodeGen/NVPTX/fence.py +++ b/llvm/test/CodeGen/NVPTX/fence.py @@ -6,8 +6,8 @@ fence_func = Template( """ -define void @fence_${ordering}_${scope}() { - fence syncscope(\"${scope}\") ${ordering} +define void @fence_${ordering}_${ptx_scope}() { + fence syncscope(\"${llvm_scope}\") ${ordering} ret void } """ @@ -15,15 +15,17 @@ run_statement = Template( """ -; ${run}: llc < %s -march=nvptx64 -mcpu=sm_${sm} -mattr=+ptx${ptx} | FileCheck %s --check-prefix=SM${sm} -; ${run}: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_${sm} -mattr=+ptx${ptx} | %ptxas-verfy %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_${sm} -mattr=+ptx${ptx} | FileCheck %s --check-prefix=SM${sm} +; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_${sm} -mattr=+ptx${ptx} | %ptxas-verify %} """ ) # (sm, ptx) TESTS = [(30, 50), (70, 60), (90, 87)] -SCOPES = ["", "block", "cluster", "device"] +LLVM_SCOPES = ["", "block", "cluster", "device"] + +SCOPE_LLVM_TO_PTX = {"": "sys", "block": "cta", "cluster": "cluster", "device": "gpu"} ORDERINGS = ["acquire", "release", "acq_rel", "seq_cst"] @@ -31,8 +33,9 @@ for sm, ptx in TESTS: with open("fence-sm{}.ll".format(sm), "w") as fp: print(run_statement.substitute(run="RUN", sm=sm, ptx=ptx), file=fp) - for ordering, scope in product(ORDERINGS, SCOPES): - if scope == "cluster" and (sm < 90 or ptx < 78): + for ordering, llvm_scope in product(ORDERINGS, LLVM_SCOPES): + ptx_scope = SCOPE_LLVM_TO_PTX[llvm_scope] + if llvm_scope == "cluster" and (sm < 90 or ptx < 78): print( "; .cluster scope unsupported on SM = {} PTX = {}".format( sm, ptx @@ -41,5 +44,10 @@ ) else: print( - fence_func.substitute(scope=scope, ordering=ordering), file=fp + fence_func.substitute( + llvm_scope=llvm_scope, + ptx_scope=ptx_scope, + ordering=ordering, + ), + file=fp, ) From 01f69b4e43ed5426a0cc3e633b9eec5a1e6ac647 Mon Sep 17 00:00:00 2001 From: Akshay Deodhar Date: Wed, 29 Jan 2025 21:14:21 +0000 Subject: [PATCH 4/4] have checks for different archs together --- llvm/test/CodeGen/NVPTX/fence-cluster.ll | 55 ++++ llvm/test/CodeGen/NVPTX/fence-nocluster.ll | 355 +++++++++++++++++++++ llvm/test/CodeGen/NVPTX/fence-sm30.ll | 165 ---------- llvm/test/CodeGen/NVPTX/fence-sm70.ll | 165 ---------- llvm/test/CodeGen/NVPTX/fence-sm90.ll | 213 ------------- llvm/test/CodeGen/NVPTX/fence.py | 55 ++-- 6 files changed, 439 insertions(+), 569 deletions(-) create mode 100644 llvm/test/CodeGen/NVPTX/fence-cluster.ll create mode 100644 llvm/test/CodeGen/NVPTX/fence-nocluster.ll delete mode 100644 llvm/test/CodeGen/NVPTX/fence-sm30.ll delete mode 100644 llvm/test/CodeGen/NVPTX/fence-sm70.ll delete mode 100644 llvm/test/CodeGen/NVPTX/fence-sm90.ll diff --git a/llvm/test/CodeGen/NVPTX/fence-cluster.ll b/llvm/test/CodeGen/NVPTX/fence-cluster.ll new file mode 100644 index 0000000000000..697dce4f89515 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/fence-cluster.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx87 | FileCheck %s --check-prefix=SM90 +; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_90 -mattr=+ptx87 | %ptxas-verify %} + +define void @fence_acquire_cluster() { +; SM90-LABEL: fence_acquire_cluster( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.acquire.cluster; +; SM90-NEXT: ret; + fence syncscope("cluster") acquire + ret void +} + + +define void @fence_release_cluster() { +; SM90-LABEL: fence_release_cluster( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.release.cluster; +; SM90-NEXT: ret; + fence syncscope("cluster") release + ret void +} + + +define void @fence_acq_rel_cluster() { +; SM90-LABEL: fence_acq_rel_cluster( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.acq_rel.cluster; +; SM90-NEXT: ret; + fence syncscope("cluster") acq_rel + ret void +} + + +define void @fence_seq_cst_cluster() { +; SM90-LABEL: fence_seq_cst_cluster( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.sc.cluster; +; SM90-NEXT: ret; + fence syncscope("cluster") seq_cst + ret void +} + diff --git a/llvm/test/CodeGen/NVPTX/fence-nocluster.ll b/llvm/test/CodeGen/NVPTX/fence-nocluster.ll new file mode 100644 index 0000000000000..e2bec72517d55 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/fence-nocluster.ll @@ -0,0 +1,355 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx50 | FileCheck %s --check-prefix=SM30 +; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_30 -mattr=+ptx50 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | FileCheck %s --check-prefix=SM70 +; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_70 -mattr=+ptx60 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx87 | FileCheck %s --check-prefix=SM90 +; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_90 -mattr=+ptx87 | %ptxas-verify %} + +define void @fence_acquire_sys() { +; SM30-LABEL: fence_acquire_sys( +; SM30: { +; SM30-EMPTY: +; SM30-EMPTY: +; SM30-NEXT: // %bb.0: +; SM30-NEXT: membar.sys; +; SM30-NEXT: ret; +; +; SM70-LABEL: fence_acquire_sys( +; SM70: { +; SM70-EMPTY: +; SM70-EMPTY: +; SM70-NEXT: // %bb.0: +; SM70-NEXT: fence.acq_rel.sys; +; SM70-NEXT: ret; +; +; SM90-LABEL: fence_acquire_sys( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.acquire.sys; +; SM90-NEXT: ret; + fence syncscope("") acquire + ret void +} + + +define void @fence_acquire_cta() { +; SM30-LABEL: fence_acquire_cta( +; SM30: { +; SM30-EMPTY: +; SM30-EMPTY: +; SM30-NEXT: // %bb.0: +; SM30-NEXT: membar.cta; +; SM30-NEXT: ret; +; +; SM70-LABEL: fence_acquire_cta( +; SM70: { +; SM70-EMPTY: +; SM70-EMPTY: +; SM70-NEXT: // %bb.0: +; SM70-NEXT: fence.acq_rel.cta; +; SM70-NEXT: ret; +; +; SM90-LABEL: fence_acquire_cta( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.acquire.cta; +; SM90-NEXT: ret; + fence syncscope("block") acquire + ret void +} + + +define void @fence_acquire_gpu() { +; SM30-LABEL: fence_acquire_gpu( +; SM30: { +; SM30-EMPTY: +; SM30-EMPTY: +; SM30-NEXT: // %bb.0: +; SM30-NEXT: membar.gl; +; SM30-NEXT: ret; +; +; SM70-LABEL: fence_acquire_gpu( +; SM70: { +; SM70-EMPTY: +; SM70-EMPTY: +; SM70-NEXT: // %bb.0: +; SM70-NEXT: fence.acq_rel.gpu; +; SM70-NEXT: ret; +; +; SM90-LABEL: fence_acquire_gpu( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.acquire.gpu; +; SM90-NEXT: ret; + fence syncscope("device") acquire + ret void +} + + +define void @fence_release_sys() { +; SM30-LABEL: fence_release_sys( +; SM30: { +; SM30-EMPTY: +; SM30-EMPTY: +; SM30-NEXT: // %bb.0: +; SM30-NEXT: membar.sys; +; SM30-NEXT: ret; +; +; SM70-LABEL: fence_release_sys( +; SM70: { +; SM70-EMPTY: +; SM70-EMPTY: +; SM70-NEXT: // %bb.0: +; SM70-NEXT: fence.acq_rel.sys; +; SM70-NEXT: ret; +; +; SM90-LABEL: fence_release_sys( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.release.sys; +; SM90-NEXT: ret; + fence syncscope("") release + ret void +} + + +define void @fence_release_cta() { +; SM30-LABEL: fence_release_cta( +; SM30: { +; SM30-EMPTY: +; SM30-EMPTY: +; SM30-NEXT: // %bb.0: +; SM30-NEXT: membar.cta; +; SM30-NEXT: ret; +; +; SM70-LABEL: fence_release_cta( +; SM70: { +; SM70-EMPTY: +; SM70-EMPTY: +; SM70-NEXT: // %bb.0: +; SM70-NEXT: fence.acq_rel.cta; +; SM70-NEXT: ret; +; +; SM90-LABEL: fence_release_cta( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.release.cta; +; SM90-NEXT: ret; + fence syncscope("block") release + ret void +} + + +define void @fence_release_gpu() { +; SM30-LABEL: fence_release_gpu( +; SM30: { +; SM30-EMPTY: +; SM30-EMPTY: +; SM30-NEXT: // %bb.0: +; SM30-NEXT: membar.gl; +; SM30-NEXT: ret; +; +; SM70-LABEL: fence_release_gpu( +; SM70: { +; SM70-EMPTY: +; SM70-EMPTY: +; SM70-NEXT: // %bb.0: +; SM70-NEXT: fence.acq_rel.gpu; +; SM70-NEXT: ret; +; +; SM90-LABEL: fence_release_gpu( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.release.gpu; +; SM90-NEXT: ret; + fence syncscope("device") release + ret void +} + + +define void @fence_acq_rel_sys() { +; SM30-LABEL: fence_acq_rel_sys( +; SM30: { +; SM30-EMPTY: +; SM30-EMPTY: +; SM30-NEXT: // %bb.0: +; SM30-NEXT: membar.sys; +; SM30-NEXT: ret; +; +; SM70-LABEL: fence_acq_rel_sys( +; SM70: { +; SM70-EMPTY: +; SM70-EMPTY: +; SM70-NEXT: // %bb.0: +; SM70-NEXT: fence.acq_rel.sys; +; SM70-NEXT: ret; +; +; SM90-LABEL: fence_acq_rel_sys( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.acq_rel.sys; +; SM90-NEXT: ret; + fence syncscope("") acq_rel + ret void +} + + +define void @fence_acq_rel_cta() { +; SM30-LABEL: fence_acq_rel_cta( +; SM30: { +; SM30-EMPTY: +; SM30-EMPTY: +; SM30-NEXT: // %bb.0: +; SM30-NEXT: membar.cta; +; SM30-NEXT: ret; +; +; SM70-LABEL: fence_acq_rel_cta( +; SM70: { +; SM70-EMPTY: +; SM70-EMPTY: +; SM70-NEXT: // %bb.0: +; SM70-NEXT: fence.acq_rel.cta; +; SM70-NEXT: ret; +; +; SM90-LABEL: fence_acq_rel_cta( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.acq_rel.cta; +; SM90-NEXT: ret; + fence syncscope("block") acq_rel + ret void +} + + +define void @fence_acq_rel_gpu() { +; SM30-LABEL: fence_acq_rel_gpu( +; SM30: { +; SM30-EMPTY: +; SM30-EMPTY: +; SM30-NEXT: // %bb.0: +; SM30-NEXT: membar.gl; +; SM30-NEXT: ret; +; +; SM70-LABEL: fence_acq_rel_gpu( +; SM70: { +; SM70-EMPTY: +; SM70-EMPTY: +; SM70-NEXT: // %bb.0: +; SM70-NEXT: fence.acq_rel.gpu; +; SM70-NEXT: ret; +; +; SM90-LABEL: fence_acq_rel_gpu( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.acq_rel.gpu; +; SM90-NEXT: ret; + fence syncscope("device") acq_rel + ret void +} + + +define void @fence_seq_cst_sys() { +; SM30-LABEL: fence_seq_cst_sys( +; SM30: { +; SM30-EMPTY: +; SM30-EMPTY: +; SM30-NEXT: // %bb.0: +; SM30-NEXT: membar.sys; +; SM30-NEXT: ret; +; +; SM70-LABEL: fence_seq_cst_sys( +; SM70: { +; SM70-EMPTY: +; SM70-EMPTY: +; SM70-NEXT: // %bb.0: +; SM70-NEXT: fence.sc.sys; +; SM70-NEXT: ret; +; +; SM90-LABEL: fence_seq_cst_sys( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.sc.sys; +; SM90-NEXT: ret; + fence syncscope("") seq_cst + ret void +} + + +define void @fence_seq_cst_cta() { +; SM30-LABEL: fence_seq_cst_cta( +; SM30: { +; SM30-EMPTY: +; SM30-EMPTY: +; SM30-NEXT: // %bb.0: +; SM30-NEXT: membar.cta; +; SM30-NEXT: ret; +; +; SM70-LABEL: fence_seq_cst_cta( +; SM70: { +; SM70-EMPTY: +; SM70-EMPTY: +; SM70-NEXT: // %bb.0: +; SM70-NEXT: fence.sc.cta; +; SM70-NEXT: ret; +; +; SM90-LABEL: fence_seq_cst_cta( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.sc.cta; +; SM90-NEXT: ret; + fence syncscope("block") seq_cst + ret void +} + + +define void @fence_seq_cst_gpu() { +; SM30-LABEL: fence_seq_cst_gpu( +; SM30: { +; SM30-EMPTY: +; SM30-EMPTY: +; SM30-NEXT: // %bb.0: +; SM30-NEXT: membar.gl; +; SM30-NEXT: ret; +; +; SM70-LABEL: fence_seq_cst_gpu( +; SM70: { +; SM70-EMPTY: +; SM70-EMPTY: +; SM70-NEXT: // %bb.0: +; SM70-NEXT: fence.sc.gpu; +; SM70-NEXT: ret; +; +; SM90-LABEL: fence_seq_cst_gpu( +; SM90: { +; SM90-EMPTY: +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: fence.sc.gpu; +; SM90-NEXT: ret; + fence syncscope("device") seq_cst + ret void +} + diff --git a/llvm/test/CodeGen/NVPTX/fence-sm30.ll b/llvm/test/CodeGen/NVPTX/fence-sm30.ll deleted file mode 100644 index 79b43a5ae98e9..0000000000000 --- a/llvm/test/CodeGen/NVPTX/fence-sm30.ll +++ /dev/null @@ -1,165 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 - -; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx50 | FileCheck %s --check-prefix=SM30 -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_30 -mattr=+ptx50 | %ptxas-verify %} - - -define void @fence_acquire_sys() { -; SM30-LABEL: fence_acquire_sys( -; SM30: { -; SM30-EMPTY: -; SM30-EMPTY: -; SM30-NEXT: // %bb.0: -; SM30-NEXT: membar.sys; -; SM30-NEXT: ret; - fence syncscope("") acquire - ret void -} - - -define void @fence_acquire_cta() { -; SM30-LABEL: fence_acquire_cta( -; SM30: { -; SM30-EMPTY: -; SM30-EMPTY: -; SM30-NEXT: // %bb.0: -; SM30-NEXT: membar.cta; -; SM30-NEXT: ret; - fence syncscope("block") acquire - ret void -} - -; .cluster scope unsupported on SM = 30 PTX = 50 - -define void @fence_acquire_gpu() { -; SM30-LABEL: fence_acquire_gpu( -; SM30: { -; SM30-EMPTY: -; SM30-EMPTY: -; SM30-NEXT: // %bb.0: -; SM30-NEXT: membar.gl; -; SM30-NEXT: ret; - fence syncscope("device") acquire - ret void -} - - -define void @fence_release_sys() { -; SM30-LABEL: fence_release_sys( -; SM30: { -; SM30-EMPTY: -; SM30-EMPTY: -; SM30-NEXT: // %bb.0: -; SM30-NEXT: membar.sys; -; SM30-NEXT: ret; - fence syncscope("") release - ret void -} - - -define void @fence_release_cta() { -; SM30-LABEL: fence_release_cta( -; SM30: { -; SM30-EMPTY: -; SM30-EMPTY: -; SM30-NEXT: // %bb.0: -; SM30-NEXT: membar.cta; -; SM30-NEXT: ret; - fence syncscope("block") release - ret void -} - -; .cluster scope unsupported on SM = 30 PTX = 50 - -define void @fence_release_gpu() { -; SM30-LABEL: fence_release_gpu( -; SM30: { -; SM30-EMPTY: -; SM30-EMPTY: -; SM30-NEXT: // %bb.0: -; SM30-NEXT: membar.gl; -; SM30-NEXT: ret; - fence syncscope("device") release - ret void -} - - -define void @fence_acq_rel_sys() { -; SM30-LABEL: fence_acq_rel_sys( -; SM30: { -; SM30-EMPTY: -; SM30-EMPTY: -; SM30-NEXT: // %bb.0: -; SM30-NEXT: membar.sys; -; SM30-NEXT: ret; - fence syncscope("") acq_rel - ret void -} - - -define void @fence_acq_rel_cta() { -; SM30-LABEL: fence_acq_rel_cta( -; SM30: { -; SM30-EMPTY: -; SM30-EMPTY: -; SM30-NEXT: // %bb.0: -; SM30-NEXT: membar.cta; -; SM30-NEXT: ret; - fence syncscope("block") acq_rel - ret void -} - -; .cluster scope unsupported on SM = 30 PTX = 50 - -define void @fence_acq_rel_gpu() { -; SM30-LABEL: fence_acq_rel_gpu( -; SM30: { -; SM30-EMPTY: -; SM30-EMPTY: -; SM30-NEXT: // %bb.0: -; SM30-NEXT: membar.gl; -; SM30-NEXT: ret; - fence syncscope("device") acq_rel - ret void -} - - -define void @fence_seq_cst_sys() { -; SM30-LABEL: fence_seq_cst_sys( -; SM30: { -; SM30-EMPTY: -; SM30-EMPTY: -; SM30-NEXT: // %bb.0: -; SM30-NEXT: membar.sys; -; SM30-NEXT: ret; - fence syncscope("") seq_cst - ret void -} - - -define void @fence_seq_cst_cta() { -; SM30-LABEL: fence_seq_cst_cta( -; SM30: { -; SM30-EMPTY: -; SM30-EMPTY: -; SM30-NEXT: // %bb.0: -; SM30-NEXT: membar.cta; -; SM30-NEXT: ret; - fence syncscope("block") seq_cst - ret void -} - -; .cluster scope unsupported on SM = 30 PTX = 50 - -define void @fence_seq_cst_gpu() { -; SM30-LABEL: fence_seq_cst_gpu( -; SM30: { -; SM30-EMPTY: -; SM30-EMPTY: -; SM30-NEXT: // %bb.0: -; SM30-NEXT: membar.gl; -; SM30-NEXT: ret; - fence syncscope("device") seq_cst - ret void -} - diff --git a/llvm/test/CodeGen/NVPTX/fence-sm70.ll b/llvm/test/CodeGen/NVPTX/fence-sm70.ll deleted file mode 100644 index 105fc08a1212f..0000000000000 --- a/llvm/test/CodeGen/NVPTX/fence-sm70.ll +++ /dev/null @@ -1,165 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 - -; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | FileCheck %s --check-prefix=SM70 -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_70 -mattr=+ptx60 | %ptxas-verify %} - - -define void @fence_acquire_sys() { -; SM70-LABEL: fence_acquire_sys( -; SM70: { -; SM70-EMPTY: -; SM70-EMPTY: -; SM70-NEXT: // %bb.0: -; SM70-NEXT: fence.acq_rel.sys; -; SM70-NEXT: ret; - fence syncscope("") acquire - ret void -} - - -define void @fence_acquire_cta() { -; SM70-LABEL: fence_acquire_cta( -; SM70: { -; SM70-EMPTY: -; SM70-EMPTY: -; SM70-NEXT: // %bb.0: -; SM70-NEXT: fence.acq_rel.cta; -; SM70-NEXT: ret; - fence syncscope("block") acquire - ret void -} - -; .cluster scope unsupported on SM = 70 PTX = 60 - -define void @fence_acquire_gpu() { -; SM70-LABEL: fence_acquire_gpu( -; SM70: { -; SM70-EMPTY: -; SM70-EMPTY: -; SM70-NEXT: // %bb.0: -; SM70-NEXT: fence.acq_rel.gpu; -; SM70-NEXT: ret; - fence syncscope("device") acquire - ret void -} - - -define void @fence_release_sys() { -; SM70-LABEL: fence_release_sys( -; SM70: { -; SM70-EMPTY: -; SM70-EMPTY: -; SM70-NEXT: // %bb.0: -; SM70-NEXT: fence.acq_rel.sys; -; SM70-NEXT: ret; - fence syncscope("") release - ret void -} - - -define void @fence_release_cta() { -; SM70-LABEL: fence_release_cta( -; SM70: { -; SM70-EMPTY: -; SM70-EMPTY: -; SM70-NEXT: // %bb.0: -; SM70-NEXT: fence.acq_rel.cta; -; SM70-NEXT: ret; - fence syncscope("block") release - ret void -} - -; .cluster scope unsupported on SM = 70 PTX = 60 - -define void @fence_release_gpu() { -; SM70-LABEL: fence_release_gpu( -; SM70: { -; SM70-EMPTY: -; SM70-EMPTY: -; SM70-NEXT: // %bb.0: -; SM70-NEXT: fence.acq_rel.gpu; -; SM70-NEXT: ret; - fence syncscope("device") release - ret void -} - - -define void @fence_acq_rel_sys() { -; SM70-LABEL: fence_acq_rel_sys( -; SM70: { -; SM70-EMPTY: -; SM70-EMPTY: -; SM70-NEXT: // %bb.0: -; SM70-NEXT: fence.acq_rel.sys; -; SM70-NEXT: ret; - fence syncscope("") acq_rel - ret void -} - - -define void @fence_acq_rel_cta() { -; SM70-LABEL: fence_acq_rel_cta( -; SM70: { -; SM70-EMPTY: -; SM70-EMPTY: -; SM70-NEXT: // %bb.0: -; SM70-NEXT: fence.acq_rel.cta; -; SM70-NEXT: ret; - fence syncscope("block") acq_rel - ret void -} - -; .cluster scope unsupported on SM = 70 PTX = 60 - -define void @fence_acq_rel_gpu() { -; SM70-LABEL: fence_acq_rel_gpu( -; SM70: { -; SM70-EMPTY: -; SM70-EMPTY: -; SM70-NEXT: // %bb.0: -; SM70-NEXT: fence.acq_rel.gpu; -; SM70-NEXT: ret; - fence syncscope("device") acq_rel - ret void -} - - -define void @fence_seq_cst_sys() { -; SM70-LABEL: fence_seq_cst_sys( -; SM70: { -; SM70-EMPTY: -; SM70-EMPTY: -; SM70-NEXT: // %bb.0: -; SM70-NEXT: fence.sc.sys; -; SM70-NEXT: ret; - fence syncscope("") seq_cst - ret void -} - - -define void @fence_seq_cst_cta() { -; SM70-LABEL: fence_seq_cst_cta( -; SM70: { -; SM70-EMPTY: -; SM70-EMPTY: -; SM70-NEXT: // %bb.0: -; SM70-NEXT: fence.sc.cta; -; SM70-NEXT: ret; - fence syncscope("block") seq_cst - ret void -} - -; .cluster scope unsupported on SM = 70 PTX = 60 - -define void @fence_seq_cst_gpu() { -; SM70-LABEL: fence_seq_cst_gpu( -; SM70: { -; SM70-EMPTY: -; SM70-EMPTY: -; SM70-NEXT: // %bb.0: -; SM70-NEXT: fence.sc.gpu; -; SM70-NEXT: ret; - fence syncscope("device") seq_cst - ret void -} - diff --git a/llvm/test/CodeGen/NVPTX/fence-sm90.ll b/llvm/test/CodeGen/NVPTX/fence-sm90.ll deleted file mode 100644 index 75684b9d30b18..0000000000000 --- a/llvm/test/CodeGen/NVPTX/fence-sm90.ll +++ /dev/null @@ -1,213 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 - -; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx87 | FileCheck %s --check-prefix=SM90 -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_90 -mattr=+ptx87 | %ptxas-verify %} - - -define void @fence_acquire_sys() { -; SM90-LABEL: fence_acquire_sys( -; SM90: { -; SM90-EMPTY: -; SM90-EMPTY: -; SM90-NEXT: // %bb.0: -; SM90-NEXT: fence.acquire.sys; -; SM90-NEXT: ret; - fence syncscope("") acquire - ret void -} - - -define void @fence_acquire_cta() { -; SM90-LABEL: fence_acquire_cta( -; SM90: { -; SM90-EMPTY: -; SM90-EMPTY: -; SM90-NEXT: // %bb.0: -; SM90-NEXT: fence.acquire.cta; -; SM90-NEXT: ret; - fence syncscope("block") acquire - ret void -} - - -define void @fence_acquire_cluster() { -; SM90-LABEL: fence_acquire_cluster( -; SM90: { -; SM90-EMPTY: -; SM90-EMPTY: -; SM90-NEXT: // %bb.0: -; SM90-NEXT: fence.acquire.cluster; -; SM90-NEXT: ret; - fence syncscope("cluster") acquire - ret void -} - - -define void @fence_acquire_gpu() { -; SM90-LABEL: fence_acquire_gpu( -; SM90: { -; SM90-EMPTY: -; SM90-EMPTY: -; SM90-NEXT: // %bb.0: -; SM90-NEXT: fence.acquire.gpu; -; SM90-NEXT: ret; - fence syncscope("device") acquire - ret void -} - - -define void @fence_release_sys() { -; SM90-LABEL: fence_release_sys( -; SM90: { -; SM90-EMPTY: -; SM90-EMPTY: -; SM90-NEXT: // %bb.0: -; SM90-NEXT: fence.release.sys; -; SM90-NEXT: ret; - fence syncscope("") release - ret void -} - - -define void @fence_release_cta() { -; SM90-LABEL: fence_release_cta( -; SM90: { -; SM90-EMPTY: -; SM90-EMPTY: -; SM90-NEXT: // %bb.0: -; SM90-NEXT: fence.release.cta; -; SM90-NEXT: ret; - fence syncscope("block") release - ret void -} - - -define void @fence_release_cluster() { -; SM90-LABEL: fence_release_cluster( -; SM90: { -; SM90-EMPTY: -; SM90-EMPTY: -; SM90-NEXT: // %bb.0: -; SM90-NEXT: fence.release.cluster; -; SM90-NEXT: ret; - fence syncscope("cluster") release - ret void -} - - -define void @fence_release_gpu() { -; SM90-LABEL: fence_release_gpu( -; SM90: { -; SM90-EMPTY: -; SM90-EMPTY: -; SM90-NEXT: // %bb.0: -; SM90-NEXT: fence.release.gpu; -; SM90-NEXT: ret; - fence syncscope("device") release - ret void -} - - -define void @fence_acq_rel_sys() { -; SM90-LABEL: fence_acq_rel_sys( -; SM90: { -; SM90-EMPTY: -; SM90-EMPTY: -; SM90-NEXT: // %bb.0: -; SM90-NEXT: fence.acq_rel.sys; -; SM90-NEXT: ret; - fence syncscope("") acq_rel - ret void -} - - -define void @fence_acq_rel_cta() { -; SM90-LABEL: fence_acq_rel_cta( -; SM90: { -; SM90-EMPTY: -; SM90-EMPTY: -; SM90-NEXT: // %bb.0: -; SM90-NEXT: fence.acq_rel.cta; -; SM90-NEXT: ret; - fence syncscope("block") acq_rel - ret void -} - - -define void @fence_acq_rel_cluster() { -; SM90-LABEL: fence_acq_rel_cluster( -; SM90: { -; SM90-EMPTY: -; SM90-EMPTY: -; SM90-NEXT: // %bb.0: -; SM90-NEXT: fence.acq_rel.cluster; -; SM90-NEXT: ret; - fence syncscope("cluster") acq_rel - ret void -} - - -define void @fence_acq_rel_gpu() { -; SM90-LABEL: fence_acq_rel_gpu( -; SM90: { -; SM90-EMPTY: -; SM90-EMPTY: -; SM90-NEXT: // %bb.0: -; SM90-NEXT: fence.acq_rel.gpu; -; SM90-NEXT: ret; - fence syncscope("device") acq_rel - ret void -} - - -define void @fence_seq_cst_sys() { -; SM90-LABEL: fence_seq_cst_sys( -; SM90: { -; SM90-EMPTY: -; SM90-EMPTY: -; SM90-NEXT: // %bb.0: -; SM90-NEXT: fence.sc.sys; -; SM90-NEXT: ret; - fence syncscope("") seq_cst - ret void -} - - -define void @fence_seq_cst_cta() { -; SM90-LABEL: fence_seq_cst_cta( -; SM90: { -; SM90-EMPTY: -; SM90-EMPTY: -; SM90-NEXT: // %bb.0: -; SM90-NEXT: fence.sc.cta; -; SM90-NEXT: ret; - fence syncscope("block") seq_cst - ret void -} - - -define void @fence_seq_cst_cluster() { -; SM90-LABEL: fence_seq_cst_cluster( -; SM90: { -; SM90-EMPTY: -; SM90-EMPTY: -; SM90-NEXT: // %bb.0: -; SM90-NEXT: fence.sc.cluster; -; SM90-NEXT: ret; - fence syncscope("cluster") seq_cst - ret void -} - - -define void @fence_seq_cst_gpu() { -; SM90-LABEL: fence_seq_cst_gpu( -; SM90: { -; SM90-EMPTY: -; SM90-EMPTY: -; SM90-NEXT: // %bb.0: -; SM90-NEXT: fence.sc.gpu; -; SM90-NEXT: ret; - fence syncscope("device") seq_cst - ret void -} - diff --git a/llvm/test/CodeGen/NVPTX/fence.py b/llvm/test/CodeGen/NVPTX/fence.py index 77f91f868ad76..b9f9d294e6fe8 100644 --- a/llvm/test/CodeGen/NVPTX/fence.py +++ b/llvm/test/CodeGen/NVPTX/fence.py @@ -14,40 +14,43 @@ ) run_statement = Template( - """ -; RUN: llc < %s -march=nvptx64 -mcpu=sm_${sm} -mattr=+ptx${ptx} | FileCheck %s --check-prefix=SM${sm} -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_${sm} -mattr=+ptx${ptx} | %ptxas-verify %} -""" + """; RUN: llc < %s -march=nvptx64 -mcpu=sm_${sm} -mattr=+ptx${ptx} | FileCheck %s --check-prefix=SM${sm} +; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_${sm} -mattr=+ptx${ptx} | %ptxas-verify %}""" ) # (sm, ptx) TESTS = [(30, 50), (70, 60), (90, 87)] -LLVM_SCOPES = ["", "block", "cluster", "device"] +LLVM_SCOPES_NO_CLUSTER = ["", "block", "device"] SCOPE_LLVM_TO_PTX = {"": "sys", "block": "cta", "cluster": "cluster", "device": "gpu"} ORDERINGS = ["acquire", "release", "acq_rel", "seq_cst"] if __name__ == "__main__": - for sm, ptx in TESTS: - with open("fence-sm{}.ll".format(sm), "w") as fp: - print(run_statement.substitute(run="RUN", sm=sm, ptx=ptx), file=fp) - for ordering, llvm_scope in product(ORDERINGS, LLVM_SCOPES): - ptx_scope = SCOPE_LLVM_TO_PTX[llvm_scope] - if llvm_scope == "cluster" and (sm < 90 or ptx < 78): - print( - "; .cluster scope unsupported on SM = {} PTX = {}".format( - sm, ptx - ), - file=fp, - ) - else: - print( - fence_func.substitute( - llvm_scope=llvm_scope, - ptx_scope=ptx_scope, - ordering=ordering, - ), - file=fp, - ) + # non-cluster orderings are supported on SM30, SM70 and SM90 + with open("fence-nocluster.ll", "w") as fp: + for sm, ptx in TESTS: + print(run_statement.substitute(sm=sm, ptx=ptx), file=fp) + for ordering, llvm_scope in product(ORDERINGS, LLVM_SCOPES_NO_CLUSTER): + print( + fence_func.substitute( + llvm_scope=llvm_scope, + ptx_scope=SCOPE_LLVM_TO_PTX[llvm_scope], + ordering=ordering, + ), + file=fp, + ) + + # cluster ordering only supported on SM90 + with open("fence-cluster.ll", "w") as fp: + print(run_statement.substitute(sm=90, ptx=87), file=fp) + for ordering in ORDERINGS: + print( + fence_func.substitute( + llvm_scope="cluster", + ptx_scope=SCOPE_LLVM_TO_PTX["cluster"], + ordering=ordering, + ), + file=fp, + )