From 75345fb11638ff4c3f4c6c691dbb6c4f72e6666f Mon Sep 17 00:00:00 2001 From: Andrew Savonichev Date: Wed, 11 Jan 2023 20:48:06 +0300 Subject: [PATCH] [NVPTX] Drop memory references of LDG/LDU This patch fixes machine verifier errors: *** Bad machine code: Missing mayLoad flag *** - function: foo1 - basic block: %bb.0 (0x5560fc64ef08) - instruction: %4:float32regs = INT_PTX_LDG_GLOBAL_f32areg64 killed %3:int64regs :: (load (s32) from %ir.from1, addrspace 1) mayLoad flag is missing because LDG and LDU instructions operate on read-only memory, so we want to treat them as regular instructions and exclude them from memory analysis. Machine verifier checks for memoperands to determine whether an instruction is a load, so dropping them during lowering fixes the problem. Differential Revision: https://reviews.llvm.org/D112466 --- llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 3 --- llvm/test/CodeGen/NVPTX/bug26185-2.ll | 2 +- llvm/test/CodeGen/NVPTX/bug26185.ll | 2 +- llvm/test/CodeGen/NVPTX/ldg-invariant.ll | 2 +- llvm/test/CodeGen/NVPTX/ldu-i8.ll | 2 +- llvm/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll | 2 +- llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll | 4 ++-- llvm/test/CodeGen/NVPTX/read-global-variable-constant.ll | 2 +- 8 files changed, 8 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index a0358cef2b3650..a18787196bb5b1 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -1671,9 +1671,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops); } - MachineMemOperand *MemRef = Mem->getMemOperand(); - CurDAG->setNodeMemRefs(cast(LD), {MemRef}); - // For automatic generation of LDG (through SelectLoad[Vector], not the // intrinsics), we may have an extending load like: // diff --git a/llvm/test/CodeGen/NVPTX/bug26185-2.ll b/llvm/test/CodeGen/NVPTX/bug26185-2.ll index 80e5795134f21d..83d47e87c7be1b 100644 --- a/llvm/test/CodeGen/NVPTX/bug26185-2.ll +++ b/llvm/test/CodeGen/NVPTX/bug26185-2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck %s +; RUN: llc < %s -march=nvptx -mcpu=sm_35 -verify-machineinstrs | FileCheck %s ; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %} ; Verify that we correctly emit code for extending ldg/ldu. We do not expose diff --git a/llvm/test/CodeGen/NVPTX/bug26185.ll b/llvm/test/CodeGen/NVPTX/bug26185.ll index 22cf2189aae390..d64ff72a2048ec 100644 --- a/llvm/test/CodeGen/NVPTX/bug26185.ll +++ b/llvm/test/CodeGen/NVPTX/bug26185.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck %s +; RUN: llc < %s -march=nvptx -mcpu=sm_35 -verify-machineinstrs | FileCheck %s ; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %} ; Verify that we correctly emit code for i8 ldg/ldu. We do not expose 8-bit diff --git a/llvm/test/CodeGen/NVPTX/ldg-invariant.ll b/llvm/test/CodeGen/NVPTX/ldg-invariant.ll index b46232880fc525..2ec829c24091ee 100644 --- a/llvm/test/CodeGen/NVPTX/ldg-invariant.ll +++ b/llvm/test/CodeGen/NVPTX/ldg-invariant.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck %s ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %} ; Check that invariant loads from the global addrspace are lowered to diff --git a/llvm/test/CodeGen/NVPTX/ldu-i8.ll b/llvm/test/CodeGen/NVPTX/ldu-i8.ll index 23299e871ccc4e..26ce65e6eb0148 100644 --- a/llvm/test/CodeGen/NVPTX/ldu-i8.ll +++ b/llvm/test/CodeGen/NVPTX/ldu-i8.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s ; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" diff --git a/llvm/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll b/llvm/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll index 44c82fcb1a14c5..6677169632e409 100644 --- a/llvm/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll +++ b/llvm/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s ; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" diff --git a/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll b/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll index 17ea8bf9ca4a39..c01abba811a64e 100644 --- a/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll +++ b/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck -check-prefix=SM20 %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck -check-prefix=SM35 %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck -check-prefix=SM20 %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck -check-prefix=SM35 %s ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %} diff --git a/llvm/test/CodeGen/NVPTX/read-global-variable-constant.ll b/llvm/test/CodeGen/NVPTX/read-global-variable-constant.ll index b15fe0950e7738..fdcb9edbae3198 100644 --- a/llvm/test/CodeGen/NVPTX/read-global-variable-constant.ll +++ b/llvm/test/CodeGen/NVPTX/read-global-variable-constant.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck %s +; RUN: llc < %s -march=nvptx -mcpu=sm_35 -verify-machineinstrs | FileCheck %s ; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %} ; Check load from constant global variables. These loads should be