Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AMDGPU] Use directive for kernarg preload header padding #86004

Merged
merged 1 commit into from
Mar 31, 2024

Conversation

kerbowa
Copy link
Member

@kerbowa kerbowa commented Mar 20, 2024

No description provided.

@llvmbot
Copy link
Collaborator

llvmbot commented Mar 20, 2024

@llvm/pr-subscribers-backend-amdgpu

Author: Austin Kerbow (kerbowa)

Changes

Patch is 406.56 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/86004.diff

3 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp (+10-12)
  • (modified) llvm/test/CodeGen/AMDGPU/preload-kernarg-header.ll (+7-4)
  • (modified) llvm/test/CodeGen/AMDGPU/preload-kernargs.ll (+309-8741)
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 4742b0b3e52ecf..697b986bca4995 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -283,6 +283,16 @@ bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
   return true;
 }
 
+bool AMDGPUTargetAsmStreamer::EmitKernargPreloadHeader(
+    const MCSubtargetInfo &STI, bool TrapEnabled) {
+  const char *TrapInstr = TrapEnabled ? "\ts_trap 2" : "\ts_endpgm";
+  OS << TrapInstr
+     << " ; Kernarg preload header. Trap with incompatible firmware that "
+        "doesn't support preloading kernel arguments.\n";
+  OS << "\t.fill 63, 4, 0xbf800000 ; s_nop 0\n";
+  return true;
+}
+
 bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
   const uint32_t Encoded_s_code_end = 0xbf9f0000;
   const uint32_t Encoded_s_nop = 0xbf800000;
@@ -781,18 +791,6 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc,
   return true;
 }
 
-bool AMDGPUTargetAsmStreamer::EmitKernargPreloadHeader(
-    const MCSubtargetInfo &STI, bool TrapEnabled) {
-  const char *TrapInstr = TrapEnabled ? "\ts_trap 2" : "\ts_endpgm";
-  OS << TrapInstr
-     << " ; Trap with incompatible firmware that doesn't "
-        "support preloading kernel arguments.\n";
-  for (int i = 0; i < 63; ++i) {
-    OS << "\ts_nop 0\n";
-  }
-  return true;
-}
-
 bool AMDGPUTargetELFStreamer::EmitKernargPreloadHeader(
     const MCSubtargetInfo &STI, bool TrapEnabled) {
   const uint32_t Encoded_s_nop = 0xbf800000;
diff --git a/llvm/test/CodeGen/AMDGPU/preload-kernarg-header.ll b/llvm/test/CodeGen/AMDGPU/preload-kernarg-header.ll
index a70488a00db739..a030f86da1b67d 100644
--- a/llvm/test/CodeGen/AMDGPU/preload-kernarg-header.ll
+++ b/llvm/test/CodeGen/AMDGPU/preload-kernarg-header.ll
@@ -1,17 +1,20 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,HSA %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx940 --disassemble - | FileCheck -check-prefixes=GCN,HSA %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx940 --disassemble - | FileCheck -check-prefixes=GCN,NON-HSA %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,HSA,ASM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx940 --disassemble - | FileCheck -check-prefixes=GCN,HSA,OBJ %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx940 --disassemble - | FileCheck -check-prefixes=GCN,NON-HSA,OBJ %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -asm-verbose=0 < %s | llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=obj | llvm-objdump --arch=amdgcn --mcpu=gfx940 --disassemble - | FileCheck -check-prefixes=GCN,HSA,OBJ %s
 
 ; GCN: preload_kernarg_header
 ; HSA: s_trap 2
 ; NON-HSA: s_endpgm
-; GCN-COUNT-63: s_nop 0
+; ASM: .fill 63, 4, 0xbf800000 ; s_nop 0
+; OBJ-COUNT-63: s_nop 0
 define amdgpu_kernel void @preload_kernarg_header(ptr %arg) {
     store ptr %arg, ptr %arg
     ret void
 }
 
 ; GCN: non_kernel_function
+; GCN-NOT: s_trap 2
 ; GCN-NOT: s_nop 0
 ; GCN: flat_store
 define void @non_kernel_function(ptr %arg) {
diff --git a/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll b/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
index d20c3a4007ffdd..f0e709b5a17279 100644
--- a/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
+++ b/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
@@ -24,70 +24,8 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) {
 ; GFX940-NO-PRELOAD-NEXT:    s_endpgm
 ;
 ; GFX940-PRELOAD-1-LABEL: ptr1_i8:
-; GFX940-PRELOAD-1:         s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
-; GFX940-PRELOAD-1-NEXT:    s_nop 0
+; GFX940-PRELOAD-1:         s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-1-NEXT:    .fill 63, 4, 0xbf800000 ; s_nop 0
 ; GFX940-PRELOAD-1-NEXT:  ; %bb.0:
 ; GFX940-PRELOAD-1-NEXT:    s_load_dword s0, s[0:1], 0x8
 ; GFX940-PRELOAD-1-NEXT:    v_mov_b32_e32 v0, 0
@@ -98,70 +36,8 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) {
 ; GFX940-PRELOAD-1-NEXT:    s_endpgm
 ;
 ; GFX940-PRELOAD-2-LABEL: ptr1_i8:
-; GFX940-PRELOAD-2:         s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
-; GFX940-PRELOAD-2-NEXT:    s_nop 0
+; GFX940-PRELOAD-2:         s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-2-NEXT:    .fill 63, 4, 0xbf800000 ; s_nop 0
 ; GFX940-PRELOAD-2-NEXT:  ; %bb.0:
 ; GFX940-PRELOAD-2-NEXT:    s_and_b32 s0, s4, 0xff
 ; GFX940-PRELOAD-2-NEXT:    v_mov_b32_e32 v0, 0
@@ -170,70 +46,8 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) {
 ; GFX940-PRELOAD-2-NEXT:    s_endpgm
 ;
 ; GFX940-PRELOAD-4-LABEL: ptr1_i8:
-; GFX940-PRELOAD-4:         s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
-; GFX940-PRELOAD-4-NEXT:    s_nop 0
+; GFX940-PRELOAD-4:         s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-4-NEXT:    .fill 63, 4, 0xbf800000 ; s_nop 0
 ; GFX940-PRELOAD-4-NEXT:  ; %bb.0:
 ; GFX940-PRELOAD-4-NEXT:    s_and_b32 s0, s4, 0xff
 ; GFX940-PRELOAD-4-NEXT:    v_mov_b32_e32 v0, 0
@@ -242,70 +56,8 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) {
 ; GFX940-PRELOAD-4-NEXT:    s_endpgm
 ;
 ; GFX940-PRELOAD-8-LABEL: ptr1_i8:
-; GFX940-PRELOAD-8:         s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
-; GFX940-PRELOAD-8-NEXT:    s_nop 0
+; GFX940-PRELOAD-8:         s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-8-NEXT:    .fill 63, 4, 0xbf800000 ; s_nop 0
 ; GFX940-PRELOAD-8-NEXT:  ; %bb.0:
 ; GFX940-PRELOAD-8-NEXT:    s_and_b32 s0, s4, 0xff
 ; GFX940-PRELOAD-8-NEXT:    v_mov_b32_e32 v0, 0
@@ -325,70 +77,8 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) {
 ; GFX90a-NO-PRELOAD-NEXT:    s_endpgm
 ;
 ; GFX90a-PRELOAD-1-LABEL: ptr1_i8:
-; GFX90a-PRELOAD-1:         s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
-; GFX90a-PRELOAD-1-NEXT:    s_nop 0
+; GFX90a-PRELOAD-1:         s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-1-NEXT:    .fill 63, 4, 0xbf800000 ; s_nop 0
 ; GFX90a-PRELOAD-1-NEXT:  ; %bb.0:
 ; GFX90a-PRELOAD-1-NEXT:    s_load_dword s0, s[4:5], 0x8
 ; GFX90a-PRELOAD-1-NEXT:    v_mov_b32_e32 v0, 0
@@ -399,70 +89,8 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) {
 ; GFX90a-PRELOAD-1-NEXT:    s_endpgm
 ;
 ; GFX90a-PRELOAD-2-LABEL: ptr1_i8:
-; GFX90a-PRELOAD-2:         s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOAD-2-NEXT:    s_nop 0
-; GFX90a-PRELOA...
[truncated]

Copy link
Contributor

@arsenm arsenm left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lgtm with nit

@@ -283,6 +283,16 @@ bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
return true;
}

bool AMDGPUTargetAsmStreamer::EmitKernargPreloadHeader(
const MCSubtargetInfo &STI, bool TrapEnabled) {
const char *TrapInstr = TrapEnabled ? "\ts_trap 2" : "\ts_endpgm";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

StringLiteral?

@kerbowa kerbowa force-pushed the kernarg-preload-prolog-directive branch from e076ecc to 639405f Compare March 31, 2024 17:39
@kerbowa kerbowa force-pushed the kernarg-preload-prolog-directive branch from 639405f to 188b1b8 Compare March 31, 2024 17:40
@kerbowa kerbowa merged commit b5b34db into llvm:main Mar 31, 2024
3 of 4 checks passed
searlmc1 pushed a commit to ROCm/llvm-project that referenced this pull request Jun 5, 2024
(cherry picked from commit b5b34db)
Change-Id: Ib6d6a4089f5d6601536eeadd1917cf3121e6f4cc
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

3 participants