diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 6b2417143ca06..bee237ad77691 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -5366,7 +5366,10 @@ additional 256 bytes to the kernel_code_entry_byte_offset. This addition facilitates the incorporation of a prologue to the kernel entry to handle cases where code designed for kernarg preloading is executed on hardware equipped with incompatible firmware. If hardware has compatible firmware the 256 bytes at the -start of the kernel entry will be skipped. +start of the kernel entry will be skipped. Additionally, the compiler backend +may insert a trap instruction at the start of the kernel prologue to manage +situations where kernarg preloading is attempted on hardware with incompatible +firmware. .. _amdgpu-amdhsa-kernel-prolog: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index db81e1ee9e389..886d855e227a2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -197,7 +197,8 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() { if (MFI.getNumKernargPreloadedSGPRs() > 0) { assert(AMDGPU::hasKernargPreload(STM)); - getTargetStreamer()->EmitKernargPreloadHeader(*getGlobalSTI()); + getTargetStreamer()->EmitKernargPreloadHeader(*getGlobalSTI(), + STM.isAmdHsaOS()); } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp index 015c71080d670..bc58407a73294 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp @@ -145,7 +145,6 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) { // Try to preload this argument into user SGPRs. if (Arg.hasInRegAttr() && InPreloadSequence && ST.hasKernargPreload() && - !ST.needsKernargPreloadBackwardsCompatibility() && !Arg.getType()->isAggregateType()) if (PreloadInfo.tryAllocPreloadSGPRs(AllocSize, EltOffset, LastExplicitArgOffset)) diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 4f8eeaaf500b4..ba633fa9e9cb4 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1254,12 +1254,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, // \returns true if preloading kernel arguments is supported. bool hasKernargPreload() const { return KernargPreload; } - // \returns true if we need to generate backwards compatible code when - // preloading kernel arguments. - bool needsKernargPreloadBackwardsCompatibility() const { - return hasKernargPreload() && !hasGFX940Insts(); - } - // \returns true if the target has split barriers feature bool hasSplitBarriers() const { return getGeneration() >= GFX12; } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 5e9b1674d87dc..61f4a94019efb 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -756,18 +756,26 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc, } bool AMDGPUTargetAsmStreamer::EmitKernargPreloadHeader( - const MCSubtargetInfo &STI) { - for (int i = 0; i < 64; ++i) { + const MCSubtargetInfo &STI, bool TrapEnabled) { + const char *TrapInstr = TrapEnabled ? "\ts_trap 2" : "\ts_endpgm"; + OS << TrapInstr + << " ; Trap with incompatible firmware that doesn't " + "support preloading kernel arguments.\n"; + for (int i = 0; i < 63; ++i) { OS << "\ts_nop 0\n"; } return true; } bool AMDGPUTargetELFStreamer::EmitKernargPreloadHeader( - const MCSubtargetInfo &STI) { + const MCSubtargetInfo &STI, bool TrapEnabled) { const uint32_t Encoded_s_nop = 0xbf800000; + const uint32_t Encoded_s_trap = 0xbf920002; + const uint32_t Encoded_s_endpgm = 0xbf810000; + const uint32_t TrapInstr = TrapEnabled ? Encoded_s_trap : Encoded_s_endpgm; MCStreamer &OS = getStreamer(); - for (int i = 0; i < 64; ++i) { + OS.emitInt32(TrapInstr); + for (int i = 0; i < 63; ++i) { OS.emitInt32(Encoded_s_nop); } return true; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index ad5f27a33fcbd..5aa80ff578c6b 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -89,7 +89,8 @@ class AMDGPUTargetStreamer : public MCTargetStreamer { virtual bool EmitCodeEnd(const MCSubtargetInfo &STI) { return true; } /// \returns True on success, false on failure. - virtual bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI) { + virtual bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI, + bool TrapEnabled) { return true; } @@ -146,7 +147,8 @@ class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer { bool EmitCodeEnd(const MCSubtargetInfo &STI) override; /// \returns True on success, false on failure. - bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI) override; + bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI, + bool TrapEnabled) override; void EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, @@ -200,7 +202,8 @@ class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer { bool EmitCodeEnd(const MCSubtargetInfo &STI) override; /// \returns True on success, false on failure. - bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI) override; + bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI, + bool TrapEnabled) override; void EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index a64a9e608f217..83221f7ead37e 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2826,8 +2826,7 @@ SDValue SITargetLowering::LowerFormalArguments( if (IsEntryFunc) { allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info); allocateHSAUserSGPRs(CCInfo, MF, *TRI, *Info); - if (IsKernel && Subtarget->hasKernargPreload() && - !Subtarget->needsKernargPreloadBackwardsCompatibility()) + if (IsKernel && Subtarget->hasKernargPreload()) allocatePreloadKernArgSGPRs(CCInfo, ArgLocs, Ins, MF, *TRI, *Info); allocateLDSKernelId(CCInfo, MF, *TRI, *Info); diff --git a/llvm/test/CodeGen/AMDGPU/preload-kernarg-header.ll b/llvm/test/CodeGen/AMDGPU/preload-kernarg-header.ll index 75feac35dacd8..a70488a00db73 100644 --- a/llvm/test/CodeGen/AMDGPU/preload-kernarg-header.ll +++ b/llvm/test/CodeGen/AMDGPU/preload-kernarg-header.ll @@ -1,8 +1,11 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx940 --disassemble - | FileCheck -check-prefixes=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,HSA %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx940 --disassemble - | FileCheck -check-prefixes=GCN,HSA %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx940 --disassemble - | FileCheck -check-prefixes=GCN,NON-HSA %s ; GCN: preload_kernarg_header -; GCN-COUNT-64: s_nop 0 +; HSA: s_trap 2 +; NON-HSA: s_endpgm +; GCN-COUNT-63: s_nop 0 define amdgpu_kernel void @preload_kernarg_header(ptr %arg) { store ptr %arg, ptr %arg ret void diff --git a/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll b/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll index 57980214e58e2..d20c3a4007ffd 100644 --- a/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll +++ b/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll @@ -1,1856 +1,3681 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=NO-PRELOAD %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=PRELOAD-1 %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=2 -verify-machineinstrs < %s | FileCheck -check-prefixes=PRELOAD-2 %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=4 -verify-machineinstrs < %s | FileCheck -check-prefixes=PRELOAD-4 %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=8 -verify-machineinstrs < %s | FileCheck -check-prefixes=PRELOAD-8 %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-NO-PRELOAD %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-PRELOAD-1 %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=2 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-PRELOAD-2 %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=4 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-PRELOAD-4 %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=8 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-PRELOAD-8 %s + +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-NO-PRELOAD %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-PRELOAD-1 %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=2 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-PRELOAD-2 %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=4 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-PRELOAD-4 %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=8 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-PRELOAD-8 %s define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) { -; NO-PRELOAD-LABEL: ptr1_i8: -; NO-PRELOAD: ; %bb.0: -; NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8 -; NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 -; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) -; NO-PRELOAD-NEXT: s_and_b32 s0, s4, 0xff -; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s0 -; NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; NO-PRELOAD-NEXT: s_endpgm -; -; PRELOAD-1-LABEL: ptr1_i8: -; PRELOAD-1: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: ; %bb.0: -; PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8 -; PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xff -; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 -; PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; PRELOAD-1-NEXT: s_endpgm -; -; PRELOAD-2-LABEL: ptr1_i8: -; PRELOAD-2: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: ; %bb.0: -; PRELOAD-2-NEXT: s_and_b32 s0, s4, 0xff -; PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0 -; PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; PRELOAD-2-NEXT: s_endpgm -; -; PRELOAD-4-LABEL: ptr1_i8: -; PRELOAD-4: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: ; %bb.0: -; PRELOAD-4-NEXT: s_and_b32 s0, s4, 0xff -; PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0 -; PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; PRELOAD-4-NEXT: s_endpgm -; -; PRELOAD-8-LABEL: ptr1_i8: -; PRELOAD-8: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: ; %bb.0: -; PRELOAD-8-NEXT: s_and_b32 s0, s4, 0xff -; PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0 -; PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; PRELOAD-8-NEXT: s_endpgm +; GFX940-NO-PRELOAD-LABEL: ptr1_i8: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: s_and_b32 s0, s4, 0xff +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-1-LABEL: ptr1_i8: +; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: ; %bb.0: +; GFX940-PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xff +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-1-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: ptr1_i8: +; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: s_and_b32 s0, s4, 0xff +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-4-LABEL: ptr1_i8: +; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: ; %bb.0: +; GFX940-PRELOAD-4-NEXT: s_and_b32 s0, s4, 0xff +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-4-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: ptr1_i8: +; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: s_and_b32 s0, s4, 0xff +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: ptr1_i8: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: s_and_b32 s2, s2, 0xff +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s2 +; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[0:1] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-1-LABEL: ptr1_i8: +; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: ; %bb.0: +; GFX90a-PRELOAD-1-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xff +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 +; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-1-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: ptr1_i8: +; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: s_and_b32 s0, s8, 0xff +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0 +; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-4-LABEL: ptr1_i8: +; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: ; %bb.0: +; GFX90a-PRELOAD-4-NEXT: s_and_b32 s0, s8, 0xff +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0 +; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-4-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: ptr1_i8: +; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: s_and_b32 s0, s8, 0xff +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0 +; GFX90a-PRELOAD-8-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-8-NEXT: s_endpgm %ext = zext i8 %arg0 to i32 store i32 %ext, ptr addrspace(1) %out ret void } define amdgpu_kernel void @ptr1_i8_zext_arg(ptr addrspace(1) %out, i8 zeroext %arg0) { -; NO-PRELOAD-LABEL: ptr1_i8_zext_arg: -; NO-PRELOAD: ; %bb.0: -; NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8 -; NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 -; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) -; NO-PRELOAD-NEXT: s_and_b32 s0, s4, 0xff -; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s0 -; NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; NO-PRELOAD-NEXT: s_endpgm -; -; PRELOAD-1-LABEL: ptr1_i8_zext_arg: -; PRELOAD-1: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: ; %bb.0: -; PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8 -; PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xff -; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 -; PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; PRELOAD-1-NEXT: s_endpgm -; -; PRELOAD-2-LABEL: ptr1_i8_zext_arg: -; PRELOAD-2: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: ; %bb.0: -; PRELOAD-2-NEXT: s_mov_b32 s0, 0xffff -; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s4 -; PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-2-NEXT: v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; PRELOAD-2-NEXT: s_endpgm -; -; PRELOAD-4-LABEL: ptr1_i8_zext_arg: -; PRELOAD-4: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: ; %bb.0: -; PRELOAD-4-NEXT: s_mov_b32 s0, 0xffff -; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s4 -; PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-4-NEXT: v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; PRELOAD-4-NEXT: s_endpgm -; -; PRELOAD-8-LABEL: ptr1_i8_zext_arg: -; PRELOAD-8: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: ; %bb.0: -; PRELOAD-8-NEXT: s_mov_b32 s0, 0xffff -; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s4 -; PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-8-NEXT: v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; PRELOAD-8-NEXT: s_endpgm +; GFX940-NO-PRELOAD-LABEL: ptr1_i8_zext_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: s_and_b32 s0, s4, 0xff +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-1-LABEL: ptr1_i8_zext_arg: +; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: ; %bb.0: +; GFX940-PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xff +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-1-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: ptr1_i8_zext_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: s_mov_b32 s0, 0xffff +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-2-NEXT: v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-4-LABEL: ptr1_i8_zext_arg: +; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: ; %bb.0: +; GFX940-PRELOAD-4-NEXT: s_mov_b32 s0, 0xffff +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-4-NEXT: v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-4-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: ptr1_i8_zext_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: s_mov_b32 s0, 0xffff +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-8-NEXT: v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: ptr1_i8_zext_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: s_and_b32 s2, s2, 0xff +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s2 +; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[0:1] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-1-LABEL: ptr1_i8_zext_arg: +; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: ; %bb.0: +; GFX90a-PRELOAD-1-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xff +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 +; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-1-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: ptr1_i8_zext_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: s_mov_b32 s0, 0xffff +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s8 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-2-NEXT: v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-4-LABEL: ptr1_i8_zext_arg: +; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: ; %bb.0: +; GFX90a-PRELOAD-4-NEXT: s_mov_b32 s0, 0xffff +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s8 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-4-NEXT: v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-4-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: ptr1_i8_zext_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: s_mov_b32 s0, 0xffff +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s8 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-8-NEXT: v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX90a-PRELOAD-8-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-8-NEXT: s_endpgm %ext = zext i8 %arg0 to i32 store i32 %ext, ptr addrspace(1) %out, align 4 ret void } define amdgpu_kernel void @ptr1_i16_preload_arg(ptr addrspace(1) %out, i16 %arg0) { -; NO-PRELOAD-LABEL: ptr1_i16_preload_arg: -; NO-PRELOAD: ; %bb.0: -; NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8 -; NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 -; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) -; NO-PRELOAD-NEXT: s_and_b32 s0, s4, 0xffff -; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s0 -; NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; NO-PRELOAD-NEXT: s_endpgm -; -; PRELOAD-1-LABEL: ptr1_i16_preload_arg: -; PRELOAD-1: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: ; %bb.0: -; PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8 -; PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xffff -; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 -; PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; PRELOAD-1-NEXT: s_endpgm -; -; PRELOAD-2-LABEL: ptr1_i16_preload_arg: -; PRELOAD-2: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: ; %bb.0: -; PRELOAD-2-NEXT: s_and_b32 s0, s4, 0xffff -; PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0 -; PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; PRELOAD-2-NEXT: s_endpgm -; -; PRELOAD-4-LABEL: ptr1_i16_preload_arg: -; PRELOAD-4: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: ; %bb.0: -; PRELOAD-4-NEXT: s_and_b32 s0, s4, 0xffff -; PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0 -; PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; PRELOAD-4-NEXT: s_endpgm -; -; PRELOAD-8-LABEL: ptr1_i16_preload_arg: -; PRELOAD-8: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: ; %bb.0: -; PRELOAD-8-NEXT: s_and_b32 s0, s4, 0xffff -; PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0 -; PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; PRELOAD-8-NEXT: s_endpgm +; GFX940-NO-PRELOAD-LABEL: ptr1_i16_preload_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: s_and_b32 s0, s4, 0xffff +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-1-LABEL: ptr1_i16_preload_arg: +; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: ; %bb.0: +; GFX940-PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xffff +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-1-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: ptr1_i16_preload_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: s_and_b32 s0, s4, 0xffff +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-4-LABEL: ptr1_i16_preload_arg: +; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: ; %bb.0: +; GFX940-PRELOAD-4-NEXT: s_and_b32 s0, s4, 0xffff +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-4-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: ptr1_i16_preload_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: s_and_b32 s0, s4, 0xffff +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: ptr1_i16_preload_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: s_and_b32 s2, s2, 0xffff +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s2 +; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[0:1] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-1-LABEL: ptr1_i16_preload_arg: +; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: ; %bb.0: +; GFX90a-PRELOAD-1-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xffff +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 +; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-1-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: ptr1_i16_preload_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: s_and_b32 s0, s8, 0xffff +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0 +; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-4-LABEL: ptr1_i16_preload_arg: +; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: ; %bb.0: +; GFX90a-PRELOAD-4-NEXT: s_and_b32 s0, s8, 0xffff +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0 +; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-4-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: ptr1_i16_preload_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: s_and_b32 s0, s8, 0xffff +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0 +; GFX90a-PRELOAD-8-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-8-NEXT: s_endpgm %ext = zext i16 %arg0 to i32 store i32 %ext, ptr addrspace(1) %out, align 4 ret void } define amdgpu_kernel void @ptr1_i32_preload_arg(ptr addrspace(1) %out, i32 %arg0) { -; NO-PRELOAD-LABEL: ptr1_i32_preload_arg: -; NO-PRELOAD: ; %bb.0: -; NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8 -; NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 -; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) -; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s4 -; NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; NO-PRELOAD-NEXT: s_endpgm -; -; PRELOAD-1-LABEL: ptr1_i32_preload_arg: -; PRELOAD-1: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: ; %bb.0: -; PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8 -; PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 -; PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; PRELOAD-1-NEXT: s_endpgm -; -; PRELOAD-2-LABEL: ptr1_i32_preload_arg: -; PRELOAD-2: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: ; %bb.0: -; PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s4 -; PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; PRELOAD-2-NEXT: s_endpgm -; -; PRELOAD-4-LABEL: ptr1_i32_preload_arg: -; PRELOAD-4: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: ; %bb.0: -; PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s4 -; PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; PRELOAD-4-NEXT: s_endpgm -; -; PRELOAD-8-LABEL: ptr1_i32_preload_arg: -; PRELOAD-8: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: ; %bb.0: -; PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s4 -; PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; PRELOAD-8-NEXT: s_endpgm +; GFX940-NO-PRELOAD-LABEL: ptr1_i32_preload_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-1-LABEL: ptr1_i32_preload_arg: +; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: ; %bb.0: +; GFX940-PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-1-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: ptr1_i32_preload_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-4-LABEL: ptr1_i32_preload_arg: +; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: ; %bb.0: +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-4-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: ptr1_i32_preload_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: ptr1_i32_preload_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s2 +; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[0:1] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-1-LABEL: ptr1_i32_preload_arg: +; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: ; %bb.0: +; GFX90a-PRELOAD-1-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 +; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-1-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: ptr1_i32_preload_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s8 +; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-4-LABEL: ptr1_i32_preload_arg: +; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: ; %bb.0: +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s8 +; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-4-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: ptr1_i32_preload_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s8 +; GFX90a-PRELOAD-8-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-8-NEXT: s_endpgm store i32 %arg0, ptr addrspace(1) %out ret void } -; Check alignment on the second preloaded arg. define amdgpu_kernel void @i32_ptr1_i32_preload_arg(i32 %arg0, ptr addrspace(1) %out, i32 %arg1) { -; NO-PRELOAD-LABEL: i32_ptr1_i32_preload_arg: -; NO-PRELOAD: ; %bb.0: -; NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x10 -; NO-PRELOAD-NEXT: s_load_dword s5, s[0:1], 0x0 -; NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 -; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) -; NO-PRELOAD-NEXT: s_add_i32 s0, s5, s4 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s0 -; NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; NO-PRELOAD-NEXT: s_endpgm -; -; PRELOAD-1-LABEL: i32_ptr1_i32_preload_arg: -; PRELOAD-1: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: ; %bb.0: -; PRELOAD-1-NEXT: s_load_dword s3, s[0:1], 0x10 -; PRELOAD-1-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 -; PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; PRELOAD-1-NEXT: s_add_i32 s0, s2, s3 -; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 -; PRELOAD-1-NEXT: global_store_dword v0, v1, s[4:5] sc0 sc1 -; PRELOAD-1-NEXT: s_endpgm -; -; PRELOAD-2-LABEL: i32_ptr1_i32_preload_arg: -; PRELOAD-2: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: ; %bb.0: -; PRELOAD-2-NEXT: s_load_dword s0, s[0:1], 0x10 -; PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0) -; PRELOAD-2-NEXT: s_add_i32 s0, s2, s0 -; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0 -; PRELOAD-2-NEXT: global_store_dword v0, v1, s[4:5] sc0 sc1 -; PRELOAD-2-NEXT: s_endpgm -; -; PRELOAD-4-LABEL: i32_ptr1_i32_preload_arg: -; PRELOAD-4: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: ; %bb.0: -; PRELOAD-4-NEXT: s_add_i32 s0, s2, s6 -; PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0 -; PRELOAD-4-NEXT: global_store_dword v0, v1, s[4:5] sc0 sc1 -; PRELOAD-4-NEXT: s_endpgm -; -; PRELOAD-8-LABEL: i32_ptr1_i32_preload_arg: -; PRELOAD-8: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: ; %bb.0: -; PRELOAD-8-NEXT: s_add_i32 s0, s2, s6 -; PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0 -; PRELOAD-8-NEXT: global_store_dword v0, v1, s[4:5] sc0 sc1 -; PRELOAD-8-NEXT: s_endpgm +; GFX940-NO-PRELOAD-LABEL: i32_ptr1_i32_preload_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x10 +; GFX940-NO-PRELOAD-NEXT: s_load_dword s5, s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: s_add_i32 s0, s5, s4 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-1-LABEL: i32_ptr1_i32_preload_arg: +; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: ; %bb.0: +; GFX940-PRELOAD-1-NEXT: s_load_dword s3, s[0:1], 0x10 +; GFX940-PRELOAD-1-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-1-NEXT: s_add_i32 s0, s2, s3 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v1, s[4:5] sc0 sc1 +; GFX940-PRELOAD-1-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: i32_ptr1_i32_preload_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: s_load_dword s0, s[0:1], 0x10 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-2-NEXT: s_add_i32 s0, s2, s0 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[4:5] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-4-LABEL: i32_ptr1_i32_preload_arg: +; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: ; %bb.0: +; GFX940-PRELOAD-4-NEXT: s_add_i32 s0, s2, s6 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[4:5] sc0 sc1 +; GFX940-PRELOAD-4-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: i32_ptr1_i32_preload_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: s_add_i32 s0, s2, s6 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[4:5] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: i32_ptr1_i32_preload_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x10 +; GFX90a-NO-PRELOAD-NEXT: s_load_dword s3, s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: s_add_i32 s2, s3, s2 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s2 +; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[0:1] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-1-LABEL: i32_ptr1_i32_preload_arg: +; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: ; %bb.0: +; GFX90a-PRELOAD-1-NEXT: s_load_dword s2, s[4:5], 0x10 +; GFX90a-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-1-NEXT: s_add_i32 s2, s6, s2 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s2 +; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v1, s[0:1] +; GFX90a-PRELOAD-1-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: i32_ptr1_i32_preload_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: s_load_dword s0, s[4:5], 0x10 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-2-NEXT: s_add_i32 s0, s6, s0 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0 +; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[8:9] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-4-LABEL: i32_ptr1_i32_preload_arg: +; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: ; %bb.0: +; GFX90a-PRELOAD-4-NEXT: s_add_i32 s0, s6, s10 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0 +; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[8:9] +; GFX90a-PRELOAD-4-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: i32_ptr1_i32_preload_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: s_add_i32 s0, s6, s10 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0 +; GFX90a-PRELOAD-8-NEXT: global_store_dword v0, v1, s[8:9] +; GFX90a-PRELOAD-8-NEXT: s_endpgm %add = add i32 %arg0, %arg1 store i32 %add, ptr addrspace(1) %out ret void } define amdgpu_kernel void @ptr1_i16_i16_preload_arg(ptr addrspace(1) %out, i16 %arg0, i16 %arg1) { -; NO-PRELOAD-LABEL: ptr1_i16_i16_preload_arg: -; NO-PRELOAD: ; %bb.0: -; NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8 -; NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 -; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) -; NO-PRELOAD-NEXT: s_lshr_b32 s0, s4, 16 -; NO-PRELOAD-NEXT: s_and_b32 s1, s4, 0xffff -; NO-PRELOAD-NEXT: s_add_i32 s0, s1, s0 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s0 -; NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; NO-PRELOAD-NEXT: s_endpgm -; -; PRELOAD-1-LABEL: ptr1_i16_i16_preload_arg: -; PRELOAD-1: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: ; %bb.0: -; PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8 -; PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; PRELOAD-1-NEXT: s_lshr_b32 s1, s0, 16 -; PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xffff -; PRELOAD-1-NEXT: s_add_i32 s0, s0, s1 -; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 -; PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; PRELOAD-1-NEXT: s_endpgm -; -; PRELOAD-2-LABEL: ptr1_i16_i16_preload_arg: -; PRELOAD-2: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: ; %bb.0: -; PRELOAD-2-NEXT: s_load_dword s0, s[0:1], 0x8 -; PRELOAD-2-NEXT: s_and_b32 s1, s4, 0xffff -; PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0) -; PRELOAD-2-NEXT: s_lshr_b32 s0, s0, 16 -; PRELOAD-2-NEXT: s_add_i32 s0, s1, s0 -; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0 -; PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; PRELOAD-2-NEXT: s_endpgm -; -; PRELOAD-4-LABEL: ptr1_i16_i16_preload_arg: -; PRELOAD-4: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: ; %bb.0: -; PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 16 -; PRELOAD-4-NEXT: s_and_b32 s1, s4, 0xffff -; PRELOAD-4-NEXT: s_add_i32 s0, s1, s0 -; PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0 -; PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; PRELOAD-4-NEXT: s_endpgm -; -; PRELOAD-8-LABEL: ptr1_i16_i16_preload_arg: -; PRELOAD-8: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: ; %bb.0: -; PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 16 -; PRELOAD-8-NEXT: s_and_b32 s1, s4, 0xffff -; PRELOAD-8-NEXT: s_add_i32 s0, s1, s0 -; PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0 -; PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; PRELOAD-8-NEXT: s_endpgm +; GFX940-NO-PRELOAD-LABEL: ptr1_i16_i16_preload_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: s_lshr_b32 s0, s4, 16 +; GFX940-NO-PRELOAD-NEXT: s_and_b32 s1, s4, 0xffff +; GFX940-NO-PRELOAD-NEXT: s_add_i32 s0, s1, s0 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-1-LABEL: ptr1_i16_i16_preload_arg: +; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: ; %bb.0: +; GFX940-PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-1-NEXT: s_lshr_b32 s1, s0, 16 +; GFX940-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xffff +; GFX940-PRELOAD-1-NEXT: s_add_i32 s0, s0, s1 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-1-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: ptr1_i16_i16_preload_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: s_load_dword s0, s[0:1], 0x8 +; GFX940-PRELOAD-2-NEXT: s_and_b32 s1, s4, 0xffff +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-2-NEXT: s_lshr_b32 s0, s0, 16 +; GFX940-PRELOAD-2-NEXT: s_add_i32 s0, s1, s0 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-4-LABEL: ptr1_i16_i16_preload_arg: +; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: ; %bb.0: +; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 16 +; GFX940-PRELOAD-4-NEXT: s_and_b32 s1, s4, 0xffff +; GFX940-PRELOAD-4-NEXT: s_add_i32 s0, s1, s0 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-4-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: ptr1_i16_i16_preload_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 16 +; GFX940-PRELOAD-8-NEXT: s_and_b32 s1, s4, 0xffff +; GFX940-PRELOAD-8-NEXT: s_add_i32 s0, s1, s0 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: ptr1_i16_i16_preload_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: s_lshr_b32 s3, s2, 16 +; GFX90a-NO-PRELOAD-NEXT: s_and_b32 s2, s2, 0xffff +; GFX90a-NO-PRELOAD-NEXT: s_add_i32 s2, s2, s3 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s2 +; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[0:1] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-1-LABEL: ptr1_i16_i16_preload_arg: +; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: ; %bb.0: +; GFX90a-PRELOAD-1-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-1-NEXT: s_lshr_b32 s1, s0, 16 +; GFX90a-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xffff +; GFX90a-PRELOAD-1-NEXT: s_add_i32 s0, s0, s1 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 +; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-1-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: ptr1_i16_i16_preload_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX90a-PRELOAD-2-NEXT: s_and_b32 s1, s8, 0xffff +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-2-NEXT: s_lshr_b32 s0, s0, 16 +; GFX90a-PRELOAD-2-NEXT: s_add_i32 s0, s1, s0 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0 +; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-4-LABEL: ptr1_i16_i16_preload_arg: +; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: ; %bb.0: +; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s8, 16 +; GFX90a-PRELOAD-4-NEXT: s_and_b32 s1, s8, 0xffff +; GFX90a-PRELOAD-4-NEXT: s_add_i32 s0, s1, s0 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0 +; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-4-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: ptr1_i16_i16_preload_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: s_lshr_b32 s0, s8, 16 +; GFX90a-PRELOAD-8-NEXT: s_and_b32 s1, s8, 0xffff +; GFX90a-PRELOAD-8-NEXT: s_add_i32 s0, s1, s0 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0 +; GFX90a-PRELOAD-8-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-8-NEXT: s_endpgm %ext = zext i16 %arg0 to i32 %ext1 = zext i16 %arg1 to i32 %add = add i32 %ext, %ext1 @@ -1859,3563 +3684,7068 @@ define amdgpu_kernel void @ptr1_i16_i16_preload_arg(ptr addrspace(1) %out, i16 % } define amdgpu_kernel void @ptr1_v2i8_preload_arg(ptr addrspace(1) %out, <2 x i8> %in) { -; NO-PRELOAD-LABEL: ptr1_v2i8_preload_arg: -; NO-PRELOAD: ; %bb.0: -; NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8 -; NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 -; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) -; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s4 -; NO-PRELOAD-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1 -; NO-PRELOAD-NEXT: s_endpgm -; -; PRELOAD-1-LABEL: ptr1_v2i8_preload_arg: -; PRELOAD-1: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: ; %bb.0: -; PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8 -; PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 -; PRELOAD-1-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1 -; PRELOAD-1-NEXT: s_endpgm -; -; PRELOAD-2-LABEL: ptr1_v2i8_preload_arg: -; PRELOAD-2: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: ; %bb.0: -; PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 8 -; PRELOAD-2-NEXT: v_lshlrev_b16_e64 v0, 8, s0 -; PRELOAD-2-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; PRELOAD-2-NEXT: v_mov_b32_e32 v1, 0 -; PRELOAD-2-NEXT: global_store_short v1, v0, s[2:3] sc0 sc1 -; PRELOAD-2-NEXT: s_endpgm -; -; PRELOAD-4-LABEL: ptr1_v2i8_preload_arg: -; PRELOAD-4: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: ; %bb.0: -; PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 8 -; PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0 -; PRELOAD-4-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; PRELOAD-4-NEXT: v_mov_b32_e32 v1, 0 -; PRELOAD-4-NEXT: global_store_short v1, v0, s[2:3] sc0 sc1 -; PRELOAD-4-NEXT: s_endpgm -; -; PRELOAD-8-LABEL: ptr1_v2i8_preload_arg: -; PRELOAD-8: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: ; %bb.0: -; PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 8 -; PRELOAD-8-NEXT: v_lshlrev_b16_e64 v0, 8, s0 -; PRELOAD-8-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; PRELOAD-8-NEXT: v_mov_b32_e32 v1, 0 -; PRELOAD-8-NEXT: global_store_short v1, v0, s[2:3] sc0 sc1 -; PRELOAD-8-NEXT: s_endpgm +; GFX940-NO-PRELOAD-LABEL: ptr1_v2i8_preload_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-NO-PRELOAD-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-1-LABEL: ptr1_v2i8_preload_arg: +; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: ; %bb.0: +; GFX940-PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-PRELOAD-1-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-1-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: ptr1_v2i8_preload_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 8 +; GFX940-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX940-PRELOAD-2-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, 0 +; GFX940-PRELOAD-2-NEXT: global_store_short v1, v0, s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-4-LABEL: ptr1_v2i8_preload_arg: +; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: ; %bb.0: +; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 8 +; GFX940-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, 0 +; GFX940-PRELOAD-4-NEXT: global_store_short v1, v0, s[2:3] sc0 sc1 +; GFX940-PRELOAD-4-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: ptr1_v2i8_preload_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 8 +; GFX940-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX940-PRELOAD-8-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, 0 +; GFX940-PRELOAD-8-NEXT: global_store_short v1, v0, s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: ptr1_v2i8_preload_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s2 +; GFX90a-NO-PRELOAD-NEXT: global_store_short v0, v1, s[0:1] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-1-LABEL: ptr1_v2i8_preload_arg: +; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: ; %bb.0: +; GFX90a-PRELOAD-1-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 +; GFX90a-PRELOAD-1-NEXT: global_store_short v0, v1, s[6:7] +; GFX90a-PRELOAD-1-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: ptr1_v2i8_preload_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: s_lshr_b32 s0, s8, 8 +; GFX90a-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX90a-PRELOAD-2-NEXT: v_or_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, 0 +; GFX90a-PRELOAD-2-NEXT: global_store_short v1, v0, s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-4-LABEL: ptr1_v2i8_preload_arg: +; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: ; %bb.0: +; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s8, 8 +; GFX90a-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, 0 +; GFX90a-PRELOAD-4-NEXT: global_store_short v1, v0, s[6:7] +; GFX90a-PRELOAD-4-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: ptr1_v2i8_preload_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: s_lshr_b32 s0, s8, 8 +; GFX90a-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX90a-PRELOAD-8-NEXT: v_or_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, 0 +; GFX90a-PRELOAD-8-NEXT: global_store_short v1, v0, s[6:7] +; GFX90a-PRELOAD-8-NEXT: s_endpgm store <2 x i8> %in, ptr addrspace(1) %out ret void } -; Don't try to preload byref args. define amdgpu_kernel void @byref_preload_arg(ptr addrspace(1) %out, ptr addrspace(4) byref(i32) align(256) %in.byref, i32 %after.offset) { -; NO-PRELOAD-LABEL: byref_preload_arg: -; NO-PRELOAD: ; %bb.0: -; NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x100 -; NO-PRELOAD-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 -; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) -; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s2 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s3 -; NO-PRELOAD-NEXT: global_store_dword v0, v1, s[4:5] sc0 sc1 -; NO-PRELOAD-NEXT: s_waitcnt vmcnt(0) -; NO-PRELOAD-NEXT: global_store_dword v0, v2, s[4:5] sc0 sc1 -; NO-PRELOAD-NEXT: s_waitcnt vmcnt(0) -; NO-PRELOAD-NEXT: s_endpgm -; -; PRELOAD-1-LABEL: byref_preload_arg: -; PRELOAD-1: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: ; %bb.0: -; PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x100 -; PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 -; PRELOAD-1-NEXT: v_mov_b32_e32 v2, s1 -; PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; PRELOAD-1-NEXT: s_waitcnt vmcnt(0) -; PRELOAD-1-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1 -; PRELOAD-1-NEXT: s_waitcnt vmcnt(0) -; PRELOAD-1-NEXT: s_endpgm -; -; PRELOAD-2-LABEL: byref_preload_arg: -; PRELOAD-2: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: ; %bb.0: -; PRELOAD-2-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x100 -; PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0) -; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0 -; PRELOAD-2-NEXT: v_mov_b32_e32 v2, s1 -; PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; PRELOAD-2-NEXT: s_waitcnt vmcnt(0) -; PRELOAD-2-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1 -; PRELOAD-2-NEXT: s_waitcnt vmcnt(0) -; PRELOAD-2-NEXT: s_endpgm -; -; PRELOAD-4-LABEL: byref_preload_arg: -; PRELOAD-4: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: ; %bb.0: -; PRELOAD-4-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x100 -; PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0) -; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0 -; PRELOAD-4-NEXT: v_mov_b32_e32 v2, s1 -; PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; PRELOAD-4-NEXT: s_waitcnt vmcnt(0) -; PRELOAD-4-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1 -; PRELOAD-4-NEXT: s_waitcnt vmcnt(0) -; PRELOAD-4-NEXT: s_endpgm -; -; PRELOAD-8-LABEL: byref_preload_arg: -; PRELOAD-8: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: ; %bb.0: -; PRELOAD-8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x100 -; PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-8-NEXT: s_waitcnt lgkmcnt(0) -; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0 -; PRELOAD-8-NEXT: v_mov_b32_e32 v2, s1 -; PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; PRELOAD-8-NEXT: s_waitcnt vmcnt(0) -; PRELOAD-8-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1 -; PRELOAD-8-NEXT: s_waitcnt vmcnt(0) -; PRELOAD-8-NEXT: s_endpgm +; GFX940-NO-PRELOAD-LABEL: byref_preload_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x100 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s2 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s3 +; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[4:5] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt vmcnt(0) +; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v2, s[4:5] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt vmcnt(0) +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-1-LABEL: byref_preload_arg: +; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: ; %bb.0: +; GFX940-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x100 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s1 +; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-1-NEXT: s_waitcnt vmcnt(0) +; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1 +; GFX940-PRELOAD-1-NEXT: s_waitcnt vmcnt(0) +; GFX940-PRELOAD-1-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: byref_preload_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x100 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s1 +; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_waitcnt vmcnt(0) +; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_waitcnt vmcnt(0) +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-4-LABEL: byref_preload_arg: +; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: ; %bb.0: +; GFX940-PRELOAD-4-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x100 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s1 +; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-4-NEXT: s_waitcnt vmcnt(0) +; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1 +; GFX940-PRELOAD-4-NEXT: s_waitcnt vmcnt(0) +; GFX940-PRELOAD-4-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: byref_preload_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x100 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-8-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s1 +; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_waitcnt vmcnt(0) +; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_waitcnt vmcnt(0) +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: byref_preload_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x100 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s1 +; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt vmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v2, s[2:3] +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt vmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-1-LABEL: byref_preload_arg: +; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: ; %bb.0: +; GFX90a-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x100 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s1 +; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-1-NEXT: s_waitcnt vmcnt(0) +; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v2, s[6:7] +; GFX90a-PRELOAD-1-NEXT: s_waitcnt vmcnt(0) +; GFX90a-PRELOAD-1-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: byref_preload_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x100 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s1 +; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_waitcnt vmcnt(0) +; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v2, s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_waitcnt vmcnt(0) +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-4-LABEL: byref_preload_arg: +; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: ; %bb.0: +; GFX90a-PRELOAD-4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x100 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s1 +; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-4-NEXT: s_waitcnt vmcnt(0) +; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v2, s[6:7] +; GFX90a-PRELOAD-4-NEXT: s_waitcnt vmcnt(0) +; GFX90a-PRELOAD-4-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: byref_preload_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x100 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-8-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s1 +; GFX90a-PRELOAD-8-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-8-NEXT: s_waitcnt vmcnt(0) +; GFX90a-PRELOAD-8-NEXT: global_store_dword v0, v2, s[6:7] +; GFX90a-PRELOAD-8-NEXT: s_waitcnt vmcnt(0) +; GFX90a-PRELOAD-8-NEXT: s_endpgm %in = load i32, ptr addrspace(4) %in.byref store volatile i32 %in, ptr addrspace(1) %out, align 4 store volatile i32 %after.offset, ptr addrspace(1) %out, align 4 ret void } -; TODO: Should do partial preload in cases like these where only part of the arg -; can be preloaded. define amdgpu_kernel void @v8i32_arg(ptr addrspace(1) nocapture %out, <8 x i32> %in) nounwind { -; NO-PRELOAD-LABEL: v8i32_arg: -; NO-PRELOAD: ; %bb.0: -; NO-PRELOAD-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x20 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v4, 0 -; NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) -; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s8 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s9 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s10 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v3, s11 -; NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1 -; NO-PRELOAD-NEXT: s_nop 1 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s4 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s5 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s6 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v3, s7 -; NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; NO-PRELOAD-NEXT: s_endpgm -; -; PRELOAD-1-LABEL: v8i32_arg: -; PRELOAD-1: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: ; %bb.0: -; PRELOAD-1-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x20 -; PRELOAD-1-NEXT: v_mov_b32_e32 v4, 0 -; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; PRELOAD-1-NEXT: v_mov_b32_e32 v0, s8 -; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s9 -; PRELOAD-1-NEXT: v_mov_b32_e32 v2, s10 -; PRELOAD-1-NEXT: v_mov_b32_e32 v3, s11 -; PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1 -; PRELOAD-1-NEXT: s_nop 1 -; PRELOAD-1-NEXT: v_mov_b32_e32 v0, s4 -; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s5 -; PRELOAD-1-NEXT: v_mov_b32_e32 v2, s6 -; PRELOAD-1-NEXT: v_mov_b32_e32 v3, s7 -; PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 -; PRELOAD-1-NEXT: s_endpgm -; -; PRELOAD-2-LABEL: v8i32_arg: -; PRELOAD-2: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: ; %bb.0: -; PRELOAD-2-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x20 -; PRELOAD-2-NEXT: v_mov_b32_e32 v4, 0 -; PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0) -; PRELOAD-2-NEXT: v_mov_b32_e32 v0, s8 -; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s9 -; PRELOAD-2-NEXT: v_mov_b32_e32 v2, s10 -; PRELOAD-2-NEXT: v_mov_b32_e32 v3, s11 -; PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1 -; PRELOAD-2-NEXT: s_nop 1 -; PRELOAD-2-NEXT: v_mov_b32_e32 v0, s4 -; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s5 -; PRELOAD-2-NEXT: v_mov_b32_e32 v2, s6 -; PRELOAD-2-NEXT: v_mov_b32_e32 v3, s7 -; PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 -; PRELOAD-2-NEXT: s_endpgm -; -; PRELOAD-4-LABEL: v8i32_arg: -; PRELOAD-4: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: ; %bb.0: -; PRELOAD-4-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x20 -; PRELOAD-4-NEXT: v_mov_b32_e32 v4, 0 -; PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0) -; PRELOAD-4-NEXT: v_mov_b32_e32 v0, s8 -; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s9 -; PRELOAD-4-NEXT: v_mov_b32_e32 v2, s10 -; PRELOAD-4-NEXT: v_mov_b32_e32 v3, s11 -; PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1 -; PRELOAD-4-NEXT: s_nop 1 -; PRELOAD-4-NEXT: v_mov_b32_e32 v0, s4 -; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s5 -; PRELOAD-4-NEXT: v_mov_b32_e32 v2, s6 -; PRELOAD-4-NEXT: v_mov_b32_e32 v3, s7 -; PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 -; PRELOAD-4-NEXT: s_endpgm -; -; PRELOAD-8-LABEL: v8i32_arg: -; PRELOAD-8: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: ; %bb.0: -; PRELOAD-8-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x20 -; PRELOAD-8-NEXT: v_mov_b32_e32 v4, 0 -; PRELOAD-8-NEXT: s_waitcnt lgkmcnt(0) -; PRELOAD-8-NEXT: v_mov_b32_e32 v0, s8 -; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s9 -; PRELOAD-8-NEXT: v_mov_b32_e32 v2, s10 -; PRELOAD-8-NEXT: v_mov_b32_e32 v3, s11 -; PRELOAD-8-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1 -; PRELOAD-8-NEXT: s_nop 1 -; PRELOAD-8-NEXT: v_mov_b32_e32 v0, s4 -; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s5 -; PRELOAD-8-NEXT: v_mov_b32_e32 v2, s6 -; PRELOAD-8-NEXT: v_mov_b32_e32 v3, s7 -; PRELOAD-8-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 -; PRELOAD-8-NEXT: s_endpgm +; GFX940-NO-PRELOAD-LABEL: v8i32_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x20 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v4, 0 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s8 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s9 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s10 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, s11 +; GFX940-NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_nop 1 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s4 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s5 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s6 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, s7 +; GFX940-NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-1-LABEL: v8i32_arg: +; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: ; %bb.0: +; GFX940-PRELOAD-1-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x20 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v4, 0 +; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s8 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s9 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s10 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v3, s11 +; GFX940-PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1 +; GFX940-PRELOAD-1-NEXT: s_nop 1 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s4 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s5 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s6 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v3, s7 +; GFX940-PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 +; GFX940-PRELOAD-1-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: v8i32_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x20 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v4, 0 +; GFX940-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s8 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s9 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s10 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v3, s11 +; GFX940-PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_nop 1 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s4 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s5 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s6 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v3, s7 +; GFX940-PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-4-LABEL: v8i32_arg: +; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: ; %bb.0: +; GFX940-PRELOAD-4-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x20 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v4, 0 +; GFX940-PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s8 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s9 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s10 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v3, s11 +; GFX940-PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1 +; GFX940-PRELOAD-4-NEXT: s_nop 1 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s4 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s5 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s6 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v3, s7 +; GFX940-PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 +; GFX940-PRELOAD-4-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: v8i32_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x20 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v4, 0 +; GFX940-PRELOAD-8-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s8 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s9 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s10 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v3, s11 +; GFX940-PRELOAD-8-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_nop 1 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s4 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s5 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s6 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v3, s7 +; GFX940-PRELOAD-8-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: v8i32_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x20 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v4, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s12 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s13 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s14 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, s15 +; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; GFX90a-NO-PRELOAD-NEXT: s_nop 0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s8 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s9 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s10 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, s11 +; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-1-LABEL: v8i32_arg: +; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: ; %bb.0: +; GFX90a-PRELOAD-1-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x20 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v4, 0 +; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s12 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s13 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s14 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v3, s15 +; GFX90a-PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] offset:16 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s8 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s9 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s10 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v3, s11 +; GFX90a-PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] +; GFX90a-PRELOAD-1-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: v8i32_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x20 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v4, 0 +; GFX90a-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s12 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s13 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s14 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v3, s15 +; GFX90a-PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] offset:16 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s8 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s9 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s10 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v3, s11 +; GFX90a-PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-4-LABEL: v8i32_arg: +; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: ; %bb.0: +; GFX90a-PRELOAD-4-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x20 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v4, 0 +; GFX90a-PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s12 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s13 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s14 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v3, s15 +; GFX90a-PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] offset:16 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s8 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s9 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s10 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v3, s11 +; GFX90a-PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] +; GFX90a-PRELOAD-4-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: v8i32_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x20 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v4, 0 +; GFX90a-PRELOAD-8-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s12 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s13 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s14 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v3, s15 +; GFX90a-PRELOAD-8-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] offset:16 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s8 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s9 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s10 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v3, s11 +; GFX90a-PRELOAD-8-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] +; GFX90a-PRELOAD-8-NEXT: s_endpgm store <8 x i32> %in, ptr addrspace(1) %out, align 4 ret void } define amdgpu_kernel void @v3i16_preload_arg(ptr addrspace(1) nocapture %out, <3 x i16> %in) nounwind { -; NO-PRELOAD-LABEL: v3i16_preload_arg: -; NO-PRELOAD: ; %bb.0: -; NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 -; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) -; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s2 -; NO-PRELOAD-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; NO-PRELOAD-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; NO-PRELOAD-NEXT: s_endpgm -; -; PRELOAD-1-LABEL: v3i16_preload_arg: -; PRELOAD-1: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: ; %bb.0: -; PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 -; PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s1 -; PRELOAD-1-NEXT: v_mov_b32_e32 v2, s0 -; PRELOAD-1-NEXT: global_store_short v0, v1, s[2:3] offset:4 sc0 sc1 -; PRELOAD-1-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1 -; PRELOAD-1-NEXT: s_endpgm -; -; PRELOAD-2-LABEL: v3i16_preload_arg: -; PRELOAD-2: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: ; %bb.0: -; PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s5 -; PRELOAD-2-NEXT: global_store_short v0, v1, s[2:3] offset:4 sc0 sc1 -; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s4 -; PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; PRELOAD-2-NEXT: s_endpgm -; -; PRELOAD-4-LABEL: v3i16_preload_arg: -; PRELOAD-4: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: ; %bb.0: -; PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s5 -; PRELOAD-4-NEXT: global_store_short v0, v1, s[2:3] offset:4 sc0 sc1 -; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s4 -; PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; PRELOAD-4-NEXT: s_endpgm -; -; PRELOAD-8-LABEL: v3i16_preload_arg: -; PRELOAD-8: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: ; %bb.0: -; PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s5 -; PRELOAD-8-NEXT: global_store_short v0, v1, s[2:3] offset:4 sc0 sc1 -; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s4 -; PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; PRELOAD-8-NEXT: s_endpgm +; GFX940-NO-PRELOAD-LABEL: v3i16_preload_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s2 +; GFX940-NO-PRELOAD-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-1-LABEL: v3i16_preload_arg: +; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: ; %bb.0: +; GFX940-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s1 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s0 +; GFX940-PRELOAD-1-NEXT: global_store_short v0, v1, s[2:3] offset:4 sc0 sc1 +; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1 +; GFX940-PRELOAD-1-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: v3i16_preload_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s5 +; GFX940-PRELOAD-2-NEXT: global_store_short v0, v1, s[2:3] offset:4 sc0 sc1 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-4-LABEL: v3i16_preload_arg: +; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: ; %bb.0: +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s5 +; GFX940-PRELOAD-4-NEXT: global_store_short v0, v1, s[2:3] offset:4 sc0 sc1 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-4-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: v3i16_preload_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s5 +; GFX940-PRELOAD-8-NEXT: global_store_short v0, v1, s[2:3] offset:4 sc0 sc1 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: v3i16_preload_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s2 +; GFX90a-NO-PRELOAD-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v2, s[0:1] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-1-LABEL: v3i16_preload_arg: +; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: ; %bb.0: +; GFX90a-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s1 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s0 +; GFX90a-PRELOAD-1-NEXT: global_store_short v0, v1, s[6:7] offset:4 +; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v2, s[6:7] +; GFX90a-PRELOAD-1-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: v3i16_preload_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s9 +; GFX90a-PRELOAD-2-NEXT: global_store_short v0, v1, s[6:7] offset:4 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s8 +; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-4-LABEL: v3i16_preload_arg: +; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: ; %bb.0: +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s9 +; GFX90a-PRELOAD-4-NEXT: global_store_short v0, v1, s[6:7] offset:4 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s8 +; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-4-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: v3i16_preload_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s9 +; GFX90a-PRELOAD-8-NEXT: global_store_short v0, v1, s[6:7] offset:4 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s8 +; GFX90a-PRELOAD-8-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-8-NEXT: s_endpgm store <3 x i16> %in, ptr addrspace(1) %out, align 4 ret void } define amdgpu_kernel void @v3i32_preload_arg(ptr addrspace(1) nocapture %out, <3 x i32> %in) nounwind { -; NO-PRELOAD-LABEL: v3i32_preload_arg: -; NO-PRELOAD: ; %bb.0: -; NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 -; NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v3, 0 -; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) -; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s4 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s5 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s6 -; NO-PRELOAD-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 -; NO-PRELOAD-NEXT: s_endpgm -; -; PRELOAD-1-LABEL: v3i32_preload_arg: -; PRELOAD-1: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: ; %bb.0: -; PRELOAD-1-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 -; PRELOAD-1-NEXT: v_mov_b32_e32 v3, 0 -; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; PRELOAD-1-NEXT: v_mov_b32_e32 v0, s4 -; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s5 -; PRELOAD-1-NEXT: v_mov_b32_e32 v2, s6 -; PRELOAD-1-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 -; PRELOAD-1-NEXT: s_endpgm -; -; PRELOAD-2-LABEL: v3i32_preload_arg: -; PRELOAD-2: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: ; %bb.0: -; PRELOAD-2-NEXT: v_mov_b32_e32 v0, s6 -; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s7 -; PRELOAD-2-NEXT: v_mov_b32_e32 v2, s8 -; PRELOAD-2-NEXT: v_mov_b32_e32 v3, 0 -; PRELOAD-2-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 -; PRELOAD-2-NEXT: s_endpgm -; -; PRELOAD-4-LABEL: v3i32_preload_arg: -; PRELOAD-4: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: ; %bb.0: -; PRELOAD-4-NEXT: v_mov_b32_e32 v0, s6 -; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s7 -; PRELOAD-4-NEXT: v_mov_b32_e32 v2, s8 -; PRELOAD-4-NEXT: v_mov_b32_e32 v3, 0 -; PRELOAD-4-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 -; PRELOAD-4-NEXT: s_endpgm -; -; PRELOAD-8-LABEL: v3i32_preload_arg: -; PRELOAD-8: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: ; %bb.0: -; PRELOAD-8-NEXT: v_mov_b32_e32 v0, s6 -; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s7 -; PRELOAD-8-NEXT: v_mov_b32_e32 v2, s8 -; PRELOAD-8-NEXT: v_mov_b32_e32 v3, 0 -; PRELOAD-8-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 -; PRELOAD-8-NEXT: s_endpgm +; GFX940-NO-PRELOAD-LABEL: v3i32_preload_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s4 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s5 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s6 +; GFX940-NO-PRELOAD-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-1-LABEL: v3i32_preload_arg: +; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: ; %bb.0: +; GFX940-PRELOAD-1-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v3, 0 +; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s4 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s5 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s6 +; GFX940-PRELOAD-1-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 +; GFX940-PRELOAD-1-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: v3i32_preload_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s6 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s7 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s8 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v3, 0 +; GFX940-PRELOAD-2-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-4-LABEL: v3i32_preload_arg: +; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: ; %bb.0: +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s6 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s7 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s8 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v3, 0 +; GFX940-PRELOAD-4-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 +; GFX940-PRELOAD-4-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: v3i32_preload_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s6 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s7 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s8 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v3, 0 +; GFX940-PRELOAD-8-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: v3i32_preload_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x10 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s1 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s2 +; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-1-LABEL: v3i32_preload_arg: +; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: ; %bb.0: +; GFX90a-PRELOAD-1-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x10 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v3, 0 +; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s0 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s1 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s2 +; GFX90a-PRELOAD-1-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] +; GFX90a-PRELOAD-1-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: v3i32_preload_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s10 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s11 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s12 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v3, 0 +; GFX90a-PRELOAD-2-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-4-LABEL: v3i32_preload_arg: +; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: ; %bb.0: +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s10 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s11 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s12 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v3, 0 +; GFX90a-PRELOAD-4-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] +; GFX90a-PRELOAD-4-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: v3i32_preload_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s10 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s11 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s12 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v3, 0 +; GFX90a-PRELOAD-8-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] +; GFX90a-PRELOAD-8-NEXT: s_endpgm store <3 x i32> %in, ptr addrspace(1) %out, align 4 ret void } define amdgpu_kernel void @v3f32_preload_arg(ptr addrspace(1) nocapture %out, <3 x float> %in) nounwind { -; NO-PRELOAD-LABEL: v3f32_preload_arg: -; NO-PRELOAD: ; %bb.0: -; NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 -; NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v3, 0 -; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) -; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s4 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s5 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s6 -; NO-PRELOAD-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 -; NO-PRELOAD-NEXT: s_endpgm -; -; PRELOAD-1-LABEL: v3f32_preload_arg: -; PRELOAD-1: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: ; %bb.0: -; PRELOAD-1-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 -; PRELOAD-1-NEXT: v_mov_b32_e32 v3, 0 -; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; PRELOAD-1-NEXT: v_mov_b32_e32 v0, s4 -; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s5 -; PRELOAD-1-NEXT: v_mov_b32_e32 v2, s6 -; PRELOAD-1-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 -; PRELOAD-1-NEXT: s_endpgm -; -; PRELOAD-2-LABEL: v3f32_preload_arg: -; PRELOAD-2: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: ; %bb.0: -; PRELOAD-2-NEXT: v_mov_b32_e32 v3, 0 -; PRELOAD-2-NEXT: v_mov_b32_e32 v0, s6 -; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s7 -; PRELOAD-2-NEXT: v_mov_b32_e32 v2, s8 -; PRELOAD-2-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 -; PRELOAD-2-NEXT: s_endpgm -; -; PRELOAD-4-LABEL: v3f32_preload_arg: -; PRELOAD-4: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: ; %bb.0: -; PRELOAD-4-NEXT: v_mov_b32_e32 v3, 0 -; PRELOAD-4-NEXT: v_mov_b32_e32 v0, s6 -; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s7 -; PRELOAD-4-NEXT: v_mov_b32_e32 v2, s8 -; PRELOAD-4-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 -; PRELOAD-4-NEXT: s_endpgm -; -; PRELOAD-8-LABEL: v3f32_preload_arg: -; PRELOAD-8: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: ; %bb.0: -; PRELOAD-8-NEXT: v_mov_b32_e32 v3, 0 -; PRELOAD-8-NEXT: v_mov_b32_e32 v0, s6 -; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s7 -; PRELOAD-8-NEXT: v_mov_b32_e32 v2, s8 -; PRELOAD-8-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 -; PRELOAD-8-NEXT: s_endpgm +; GFX940-NO-PRELOAD-LABEL: v3f32_preload_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s4 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s5 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s6 +; GFX940-NO-PRELOAD-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-1-LABEL: v3f32_preload_arg: +; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: ; %bb.0: +; GFX940-PRELOAD-1-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v3, 0 +; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s4 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s5 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s6 +; GFX940-PRELOAD-1-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 +; GFX940-PRELOAD-1-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: v3f32_preload_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v3, 0 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s6 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s7 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s8 +; GFX940-PRELOAD-2-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-4-LABEL: v3f32_preload_arg: +; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: ; %bb.0: +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v3, 0 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s6 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s7 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s8 +; GFX940-PRELOAD-4-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 +; GFX940-PRELOAD-4-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: v3f32_preload_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v3, 0 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s6 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s7 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s8 +; GFX940-PRELOAD-8-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: v3f32_preload_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x10 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s1 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s2 +; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-1-LABEL: v3f32_preload_arg: +; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: ; %bb.0: +; GFX90a-PRELOAD-1-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x10 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v3, 0 +; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s0 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s1 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s2 +; GFX90a-PRELOAD-1-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] +; GFX90a-PRELOAD-1-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: v3f32_preload_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v3, 0 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s10 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s11 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s12 +; GFX90a-PRELOAD-2-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-4-LABEL: v3f32_preload_arg: +; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: ; %bb.0: +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v3, 0 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s10 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s11 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s12 +; GFX90a-PRELOAD-4-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] +; GFX90a-PRELOAD-4-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: v3f32_preload_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v3, 0 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s10 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s11 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s12 +; GFX90a-PRELOAD-8-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] +; GFX90a-PRELOAD-8-NEXT: s_endpgm store <3 x float> %in, ptr addrspace(1) %out, align 4 ret void } define amdgpu_kernel void @v5i8_preload_arg(ptr addrspace(1) nocapture %out, <5 x i8> %in) nounwind { -; NO-PRELOAD-LABEL: v5i8_preload_arg: -; NO-PRELOAD: ; %bb.0: -; NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 -; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) -; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s2 -; NO-PRELOAD-NEXT: global_store_byte v0, v1, s[0:1] offset:4 sc0 sc1 -; NO-PRELOAD-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; NO-PRELOAD-NEXT: s_endpgm -; -; PRELOAD-1-LABEL: v5i8_preload_arg: -; PRELOAD-1: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: ; %bb.0: -; PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 -; PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s1 -; PRELOAD-1-NEXT: v_mov_b32_e32 v2, s0 -; PRELOAD-1-NEXT: global_store_byte v0, v1, s[2:3] offset:4 sc0 sc1 -; PRELOAD-1-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1 -; PRELOAD-1-NEXT: s_endpgm -; -; PRELOAD-2-LABEL: v5i8_preload_arg: -; PRELOAD-2: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: ; %bb.0: -; PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 8 -; PRELOAD-2-NEXT: v_lshlrev_b16_e64 v0, 8, s0 -; PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 24 -; PRELOAD-2-NEXT: v_lshlrev_b16_e64 v1, 8, s0 -; PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 16 -; PRELOAD-2-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; PRELOAD-2-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; PRELOAD-2-NEXT: v_mov_b32_e32 v2, s5 -; PRELOAD-2-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; PRELOAD-2-NEXT: v_mov_b32_e32 v1, 0 -; PRELOAD-2-NEXT: global_store_byte v1, v2, s[2:3] offset:4 sc0 sc1 -; PRELOAD-2-NEXT: global_store_dword v1, v0, s[2:3] sc0 sc1 -; PRELOAD-2-NEXT: s_endpgm -; -; PRELOAD-4-LABEL: v5i8_preload_arg: -; PRELOAD-4: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: ; %bb.0: -; PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 8 -; PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0 -; PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 24 -; PRELOAD-4-NEXT: v_lshlrev_b16_e64 v1, 8, s0 -; PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 16 -; PRELOAD-4-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; PRELOAD-4-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; PRELOAD-4-NEXT: v_mov_b32_e32 v2, s5 -; PRELOAD-4-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; PRELOAD-4-NEXT: v_mov_b32_e32 v1, 0 -; PRELOAD-4-NEXT: global_store_byte v1, v2, s[2:3] offset:4 sc0 sc1 -; PRELOAD-4-NEXT: global_store_dword v1, v0, s[2:3] sc0 sc1 -; PRELOAD-4-NEXT: s_endpgm -; -; PRELOAD-8-LABEL: v5i8_preload_arg: -; PRELOAD-8: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: ; %bb.0: -; PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 8 -; PRELOAD-8-NEXT: v_lshlrev_b16_e64 v0, 8, s0 -; PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 24 -; PRELOAD-8-NEXT: v_lshlrev_b16_e64 v1, 8, s0 -; PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 16 -; PRELOAD-8-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; PRELOAD-8-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; PRELOAD-8-NEXT: v_mov_b32_e32 v2, s5 -; PRELOAD-8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; PRELOAD-8-NEXT: v_mov_b32_e32 v1, 0 -; PRELOAD-8-NEXT: global_store_byte v1, v2, s[2:3] offset:4 sc0 sc1 -; PRELOAD-8-NEXT: global_store_dword v1, v0, s[2:3] sc0 sc1 -; PRELOAD-8-NEXT: s_endpgm +; GFX940-NO-PRELOAD-LABEL: v5i8_preload_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s2 +; GFX940-NO-PRELOAD-NEXT: global_store_byte v0, v1, s[0:1] offset:4 sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-1-LABEL: v5i8_preload_arg: +; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: ; %bb.0: +; GFX940-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s1 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s0 +; GFX940-PRELOAD-1-NEXT: global_store_byte v0, v1, s[2:3] offset:4 sc0 sc1 +; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1 +; GFX940-PRELOAD-1-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: v5i8_preload_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 8 +; GFX940-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX940-PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 24 +; GFX940-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v1, 8, s0 +; GFX940-PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 16 +; GFX940-PRELOAD-2-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX940-PRELOAD-2-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s5 +; GFX940-PRELOAD-2-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, 0 +; GFX940-PRELOAD-2-NEXT: global_store_byte v1, v2, s[2:3] offset:4 sc0 sc1 +; GFX940-PRELOAD-2-NEXT: global_store_dword v1, v0, s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-4-LABEL: v5i8_preload_arg: +; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: ; %bb.0: +; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 8 +; GFX940-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 24 +; GFX940-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v1, 8, s0 +; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 16 +; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s5 +; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, 0 +; GFX940-PRELOAD-4-NEXT: global_store_byte v1, v2, s[2:3] offset:4 sc0 sc1 +; GFX940-PRELOAD-4-NEXT: global_store_dword v1, v0, s[2:3] sc0 sc1 +; GFX940-PRELOAD-4-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: v5i8_preload_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 8 +; GFX940-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX940-PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 24 +; GFX940-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v1, 8, s0 +; GFX940-PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 16 +; GFX940-PRELOAD-8-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX940-PRELOAD-8-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s5 +; GFX940-PRELOAD-8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, 0 +; GFX940-PRELOAD-8-NEXT: global_store_byte v1, v2, s[2:3] offset:4 sc0 sc1 +; GFX940-PRELOAD-8-NEXT: global_store_dword v1, v0, s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: v5i8_preload_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s2 +; GFX90a-NO-PRELOAD-NEXT: global_store_byte v0, v1, s[0:1] offset:4 +; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v2, s[0:1] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-1-LABEL: v5i8_preload_arg: +; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: ; %bb.0: +; GFX90a-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s1 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s0 +; GFX90a-PRELOAD-1-NEXT: global_store_byte v0, v1, s[6:7] offset:4 +; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v2, s[6:7] +; GFX90a-PRELOAD-1-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: v5i8_preload_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: s_lshr_b32 s0, s8, 8 +; GFX90a-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX90a-PRELOAD-2-NEXT: s_lshr_b32 s0, s8, 24 +; GFX90a-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v1, 8, s0 +; GFX90a-PRELOAD-2-NEXT: s_lshr_b32 s0, s8, 16 +; GFX90a-PRELOAD-2-NEXT: v_or_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX90a-PRELOAD-2-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX90a-PRELOAD-2-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, 0 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s9 +; GFX90a-PRELOAD-2-NEXT: global_store_byte v1, v2, s[6:7] offset:4 +; GFX90a-PRELOAD-2-NEXT: global_store_dword v1, v0, s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-4-LABEL: v5i8_preload_arg: +; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: ; %bb.0: +; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s8, 8 +; GFX90a-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s8, 24 +; GFX90a-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v1, 8, s0 +; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s8, 16 +; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, 0 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s9 +; GFX90a-PRELOAD-4-NEXT: global_store_byte v1, v2, s[6:7] offset:4 +; GFX90a-PRELOAD-4-NEXT: global_store_dword v1, v0, s[6:7] +; GFX90a-PRELOAD-4-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: v5i8_preload_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: s_lshr_b32 s0, s8, 8 +; GFX90a-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX90a-PRELOAD-8-NEXT: s_lshr_b32 s0, s8, 24 +; GFX90a-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v1, 8, s0 +; GFX90a-PRELOAD-8-NEXT: s_lshr_b32 s0, s8, 16 +; GFX90a-PRELOAD-8-NEXT: v_or_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX90a-PRELOAD-8-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX90a-PRELOAD-8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, 0 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s9 +; GFX90a-PRELOAD-8-NEXT: global_store_byte v1, v2, s[6:7] offset:4 +; GFX90a-PRELOAD-8-NEXT: global_store_dword v1, v0, s[6:7] +; GFX90a-PRELOAD-8-NEXT: s_endpgm store <5 x i8> %in, ptr addrspace(1) %out, align 4 ret void } define amdgpu_kernel void @v5f64_arg(ptr addrspace(1) nocapture %out, <5 x double> %in) nounwind { -; NO-PRELOAD-LABEL: v5f64_arg: -; NO-PRELOAD: ; %bb.0: -; NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x60 -; NO-PRELOAD-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x40 -; NO-PRELOAD-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x0 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v4, 0 -; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) -; NO-PRELOAD-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s8 -; NO-PRELOAD-NEXT: global_store_dwordx2 v4, v[2:3], s[12:13] offset:32 sc0 sc1 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s9 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s10 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v3, s11 -; NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[12:13] offset:16 sc0 sc1 -; NO-PRELOAD-NEXT: s_nop 1 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s4 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s5 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s6 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v3, s7 -; NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[12:13] sc0 sc1 -; NO-PRELOAD-NEXT: s_endpgm -; -; PRELOAD-1-LABEL: v5f64_arg: -; PRELOAD-1: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: ; %bb.0: -; PRELOAD-1-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x60 -; PRELOAD-1-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x40 -; PRELOAD-1-NEXT: v_mov_b32_e32 v4, 0 -; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; PRELOAD-1-NEXT: v_mov_b64_e32 v[2:3], s[12:13] -; PRELOAD-1-NEXT: v_mov_b32_e32 v0, s8 -; PRELOAD-1-NEXT: global_store_dwordx2 v4, v[2:3], s[2:3] offset:32 sc0 sc1 -; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s9 -; PRELOAD-1-NEXT: v_mov_b32_e32 v2, s10 -; PRELOAD-1-NEXT: v_mov_b32_e32 v3, s11 -; PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1 -; PRELOAD-1-NEXT: s_nop 1 -; PRELOAD-1-NEXT: v_mov_b32_e32 v0, s4 -; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s5 -; PRELOAD-1-NEXT: v_mov_b32_e32 v2, s6 -; PRELOAD-1-NEXT: v_mov_b32_e32 v3, s7 -; PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 -; PRELOAD-1-NEXT: s_endpgm -; -; PRELOAD-2-LABEL: v5f64_arg: -; PRELOAD-2: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: ; %bb.0: -; PRELOAD-2-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x60 -; PRELOAD-2-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x40 -; PRELOAD-2-NEXT: v_mov_b32_e32 v4, 0 -; PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0) -; PRELOAD-2-NEXT: v_mov_b64_e32 v[2:3], s[12:13] -; PRELOAD-2-NEXT: v_mov_b32_e32 v0, s8 -; PRELOAD-2-NEXT: global_store_dwordx2 v4, v[2:3], s[2:3] offset:32 sc0 sc1 -; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s9 -; PRELOAD-2-NEXT: v_mov_b32_e32 v2, s10 -; PRELOAD-2-NEXT: v_mov_b32_e32 v3, s11 -; PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1 -; PRELOAD-2-NEXT: s_nop 1 -; PRELOAD-2-NEXT: v_mov_b32_e32 v0, s4 -; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s5 -; PRELOAD-2-NEXT: v_mov_b32_e32 v2, s6 -; PRELOAD-2-NEXT: v_mov_b32_e32 v3, s7 -; PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 -; PRELOAD-2-NEXT: s_endpgm -; -; PRELOAD-4-LABEL: v5f64_arg: -; PRELOAD-4: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: ; %bb.0: -; PRELOAD-4-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x60 -; PRELOAD-4-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x40 -; PRELOAD-4-NEXT: v_mov_b32_e32 v4, 0 -; PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0) -; PRELOAD-4-NEXT: v_mov_b64_e32 v[2:3], s[12:13] -; PRELOAD-4-NEXT: v_mov_b32_e32 v0, s8 -; PRELOAD-4-NEXT: global_store_dwordx2 v4, v[2:3], s[2:3] offset:32 sc0 sc1 -; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s9 -; PRELOAD-4-NEXT: v_mov_b32_e32 v2, s10 -; PRELOAD-4-NEXT: v_mov_b32_e32 v3, s11 -; PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1 -; PRELOAD-4-NEXT: s_nop 1 -; PRELOAD-4-NEXT: v_mov_b32_e32 v0, s4 -; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s5 -; PRELOAD-4-NEXT: v_mov_b32_e32 v2, s6 -; PRELOAD-4-NEXT: v_mov_b32_e32 v3, s7 -; PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 -; PRELOAD-4-NEXT: s_endpgm -; -; PRELOAD-8-LABEL: v5f64_arg: -; PRELOAD-8: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: ; %bb.0: -; PRELOAD-8-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x60 -; PRELOAD-8-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x40 -; PRELOAD-8-NEXT: v_mov_b32_e32 v4, 0 -; PRELOAD-8-NEXT: s_waitcnt lgkmcnt(0) -; PRELOAD-8-NEXT: v_mov_b64_e32 v[2:3], s[12:13] -; PRELOAD-8-NEXT: v_mov_b32_e32 v0, s8 -; PRELOAD-8-NEXT: global_store_dwordx2 v4, v[2:3], s[2:3] offset:32 sc0 sc1 -; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s9 -; PRELOAD-8-NEXT: v_mov_b32_e32 v2, s10 -; PRELOAD-8-NEXT: v_mov_b32_e32 v3, s11 -; PRELOAD-8-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1 -; PRELOAD-8-NEXT: s_nop 1 -; PRELOAD-8-NEXT: v_mov_b32_e32 v0, s4 -; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s5 -; PRELOAD-8-NEXT: v_mov_b32_e32 v2, s6 -; PRELOAD-8-NEXT: v_mov_b32_e32 v3, s7 -; PRELOAD-8-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 -; PRELOAD-8-NEXT: s_endpgm +; GFX940-NO-PRELOAD-LABEL: v5f64_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x60 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x40 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v4, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s8 +; GFX940-NO-PRELOAD-NEXT: global_store_dwordx2 v4, v[2:3], s[12:13] offset:32 sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s9 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s10 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, s11 +; GFX940-NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[12:13] offset:16 sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_nop 1 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s4 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s5 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s6 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, s7 +; GFX940-NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[12:13] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-1-LABEL: v5f64_arg: +; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: ; %bb.0: +; GFX940-PRELOAD-1-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x60 +; GFX940-PRELOAD-1-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x40 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v4, 0 +; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-1-NEXT: v_mov_b64_e32 v[2:3], s[12:13] +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s8 +; GFX940-PRELOAD-1-NEXT: global_store_dwordx2 v4, v[2:3], s[2:3] offset:32 sc0 sc1 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s9 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s10 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v3, s11 +; GFX940-PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1 +; GFX940-PRELOAD-1-NEXT: s_nop 1 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s4 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s5 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s6 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v3, s7 +; GFX940-PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 +; GFX940-PRELOAD-1-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: v5f64_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x60 +; GFX940-PRELOAD-2-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x40 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v4, 0 +; GFX940-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-2-NEXT: v_mov_b64_e32 v[2:3], s[12:13] +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s8 +; GFX940-PRELOAD-2-NEXT: global_store_dwordx2 v4, v[2:3], s[2:3] offset:32 sc0 sc1 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s9 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s10 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v3, s11 +; GFX940-PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_nop 1 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s4 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s5 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s6 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v3, s7 +; GFX940-PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-4-LABEL: v5f64_arg: +; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: ; %bb.0: +; GFX940-PRELOAD-4-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x60 +; GFX940-PRELOAD-4-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x40 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v4, 0 +; GFX940-PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-4-NEXT: v_mov_b64_e32 v[2:3], s[12:13] +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s8 +; GFX940-PRELOAD-4-NEXT: global_store_dwordx2 v4, v[2:3], s[2:3] offset:32 sc0 sc1 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s9 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s10 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v3, s11 +; GFX940-PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1 +; GFX940-PRELOAD-4-NEXT: s_nop 1 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s4 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s5 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s6 +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v3, s7 +; GFX940-PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 +; GFX940-PRELOAD-4-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: v5f64_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x60 +; GFX940-PRELOAD-8-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x40 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v4, 0 +; GFX940-PRELOAD-8-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-8-NEXT: v_mov_b64_e32 v[2:3], s[12:13] +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s8 +; GFX940-PRELOAD-8-NEXT: global_store_dwordx2 v4, v[2:3], s[2:3] offset:32 sc0 sc1 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s9 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s10 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v3, s11 +; GFX940-PRELOAD-8-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_nop 1 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s4 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s5 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s6 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v3, s7 +; GFX940-PRELOAD-8-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: v5f64_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x60 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v4, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1] +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s12 +; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx2 v4, v[2:3], s[2:3] offset:32 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s13 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s14 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, s15 +; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 +; GFX90a-NO-PRELOAD-NEXT: s_nop 0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s8 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s9 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s10 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, s11 +; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-1-LABEL: v5f64_arg: +; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: ; %bb.0: +; GFX90a-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x60 +; GFX90a-PRELOAD-1-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v4, 0 +; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-1-NEXT: v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1] +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s12 +; GFX90a-PRELOAD-1-NEXT: global_store_dwordx2 v4, v[2:3], s[6:7] offset:32 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s13 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s14 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v3, s15 +; GFX90a-PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] offset:16 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s8 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s9 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s10 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v3, s11 +; GFX90a-PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] +; GFX90a-PRELOAD-1-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: v5f64_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x60 +; GFX90a-PRELOAD-2-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v4, 0 +; GFX90a-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-2-NEXT: v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1] +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s12 +; GFX90a-PRELOAD-2-NEXT: global_store_dwordx2 v4, v[2:3], s[6:7] offset:32 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s13 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s14 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v3, s15 +; GFX90a-PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] offset:16 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s8 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s9 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s10 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v3, s11 +; GFX90a-PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-4-LABEL: v5f64_arg: +; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: ; %bb.0: +; GFX90a-PRELOAD-4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x60 +; GFX90a-PRELOAD-4-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v4, 0 +; GFX90a-PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-4-NEXT: v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1] +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s12 +; GFX90a-PRELOAD-4-NEXT: global_store_dwordx2 v4, v[2:3], s[6:7] offset:32 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s13 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s14 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v3, s15 +; GFX90a-PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] offset:16 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s8 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s9 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s10 +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v3, s11 +; GFX90a-PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] +; GFX90a-PRELOAD-4-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: v5f64_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x60 +; GFX90a-PRELOAD-8-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v4, 0 +; GFX90a-PRELOAD-8-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-8-NEXT: v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1] +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s12 +; GFX90a-PRELOAD-8-NEXT: global_store_dwordx2 v4, v[2:3], s[6:7] offset:32 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s13 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s14 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v3, s15 +; GFX90a-PRELOAD-8-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] offset:16 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s8 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s9 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s10 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v3, s11 +; GFX90a-PRELOAD-8-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] +; GFX90a-PRELOAD-8-NEXT: s_endpgm store <5 x double> %in, ptr addrspace(1) %out, align 8 ret void } define amdgpu_kernel void @v8i8_preload_arg(ptr addrspace(1) %out, <8 x i8> %in) { -; NO-PRELOAD-LABEL: v8i8_preload_arg: -; NO-PRELOAD: ; %bb.0: -; NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v2, 0 -; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) -; NO-PRELOAD-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; NO-PRELOAD-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; NO-PRELOAD-NEXT: s_endpgm -; -; PRELOAD-1-LABEL: v8i8_preload_arg: -; PRELOAD-1: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: ; %bb.0: -; PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 -; PRELOAD-1-NEXT: v_mov_b32_e32 v2, 0 -; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; PRELOAD-1-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; PRELOAD-1-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 -; PRELOAD-1-NEXT: s_endpgm -; -; PRELOAD-2-LABEL: v8i8_preload_arg: -; PRELOAD-2: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: ; %bb.0: -; PRELOAD-2-NEXT: s_lshr_b32 s0, s5, 8 -; PRELOAD-2-NEXT: v_lshlrev_b16_e64 v0, 8, s0 -; PRELOAD-2-NEXT: s_lshr_b32 s0, s5, 24 -; PRELOAD-2-NEXT: v_lshlrev_b16_e64 v1, 8, s0 -; PRELOAD-2-NEXT: s_lshr_b32 s0, s5, 16 -; PRELOAD-2-NEXT: v_or_b32_sdwa v0, s5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; PRELOAD-2-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 8 -; PRELOAD-2-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; PRELOAD-2-NEXT: v_lshlrev_b16_e64 v0, 8, s0 -; PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 24 -; PRELOAD-2-NEXT: v_lshlrev_b16_e64 v2, 8, s0 -; PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 16 -; PRELOAD-2-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; PRELOAD-2-NEXT: v_or_b32_sdwa v2, s0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; PRELOAD-2-NEXT: v_mov_b32_e32 v2, 0 -; PRELOAD-2-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 -; PRELOAD-2-NEXT: s_endpgm -; -; PRELOAD-4-LABEL: v8i8_preload_arg: -; PRELOAD-4: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: ; %bb.0: -; PRELOAD-4-NEXT: s_lshr_b32 s0, s5, 8 -; PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0 -; PRELOAD-4-NEXT: s_lshr_b32 s0, s5, 24 -; PRELOAD-4-NEXT: v_lshlrev_b16_e64 v1, 8, s0 -; PRELOAD-4-NEXT: s_lshr_b32 s0, s5, 16 -; PRELOAD-4-NEXT: v_or_b32_sdwa v0, s5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; PRELOAD-4-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 8 -; PRELOAD-4-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0 -; PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 24 -; PRELOAD-4-NEXT: v_lshlrev_b16_e64 v2, 8, s0 -; PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 16 -; PRELOAD-4-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; PRELOAD-4-NEXT: v_or_b32_sdwa v2, s0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; PRELOAD-4-NEXT: v_mov_b32_e32 v2, 0 -; PRELOAD-4-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 -; PRELOAD-4-NEXT: s_endpgm -; -; PRELOAD-8-LABEL: v8i8_preload_arg: -; PRELOAD-8: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: ; %bb.0: -; PRELOAD-8-NEXT: s_lshr_b32 s0, s5, 8 -; PRELOAD-8-NEXT: v_lshlrev_b16_e64 v0, 8, s0 -; PRELOAD-8-NEXT: s_lshr_b32 s0, s5, 24 -; PRELOAD-8-NEXT: v_lshlrev_b16_e64 v1, 8, s0 -; PRELOAD-8-NEXT: s_lshr_b32 s0, s5, 16 -; PRELOAD-8-NEXT: v_or_b32_sdwa v0, s5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; PRELOAD-8-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 8 -; PRELOAD-8-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; PRELOAD-8-NEXT: v_lshlrev_b16_e64 v0, 8, s0 -; PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 24 -; PRELOAD-8-NEXT: v_lshlrev_b16_e64 v2, 8, s0 -; PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 16 -; PRELOAD-8-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; PRELOAD-8-NEXT: v_or_b32_sdwa v2, s0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; PRELOAD-8-NEXT: v_mov_b32_e32 v2, 0 -; PRELOAD-8-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 -; PRELOAD-8-NEXT: s_endpgm +; GFX940-NO-PRELOAD-LABEL: v8i8_preload_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX940-NO-PRELOAD-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-1-LABEL: v8i8_preload_arg: +; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: ; %bb.0: +; GFX940-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, 0 +; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-1-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX940-PRELOAD-1-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 +; GFX940-PRELOAD-1-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: v8i8_preload_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: s_lshr_b32 s0, s5, 8 +; GFX940-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX940-PRELOAD-2-NEXT: s_lshr_b32 s0, s5, 24 +; GFX940-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v1, 8, s0 +; GFX940-PRELOAD-2-NEXT: s_lshr_b32 s0, s5, 16 +; GFX940-PRELOAD-2-NEXT: v_or_b32_sdwa v0, s5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX940-PRELOAD-2-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX940-PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 8 +; GFX940-PRELOAD-2-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX940-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX940-PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 24 +; GFX940-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v2, 8, s0 +; GFX940-PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 16 +; GFX940-PRELOAD-2-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX940-PRELOAD-2-NEXT: v_or_b32_sdwa v2, s0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v2, 0 +; GFX940-PRELOAD-2-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-4-LABEL: v8i8_preload_arg: +; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: ; %bb.0: +; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s5, 8 +; GFX940-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s5, 24 +; GFX940-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v1, 8, s0 +; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s5, 16 +; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v0, s5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 8 +; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX940-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 24 +; GFX940-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v2, 8, s0 +; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 16 +; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v2, s0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, 0 +; GFX940-PRELOAD-4-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 +; GFX940-PRELOAD-4-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: v8i8_preload_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: s_lshr_b32 s0, s5, 8 +; GFX940-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX940-PRELOAD-8-NEXT: s_lshr_b32 s0, s5, 24 +; GFX940-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v1, 8, s0 +; GFX940-PRELOAD-8-NEXT: s_lshr_b32 s0, s5, 16 +; GFX940-PRELOAD-8-NEXT: v_or_b32_sdwa v0, s5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX940-PRELOAD-8-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX940-PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 8 +; GFX940-PRELOAD-8-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX940-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX940-PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 24 +; GFX940-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v2, 8, s0 +; GFX940-PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 16 +; GFX940-PRELOAD-8-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX940-PRELOAD-8-NEXT: v_or_b32_sdwa v2, s0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, 0 +; GFX940-PRELOAD-8-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: v8i8_preload_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-1-LABEL: v8i8_preload_arg: +; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: ; %bb.0: +; GFX90a-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, 0 +; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-1-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] +; GFX90a-PRELOAD-1-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] +; GFX90a-PRELOAD-1-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: v8i8_preload_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: s_lshr_b32 s0, s9, 8 +; GFX90a-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX90a-PRELOAD-2-NEXT: s_lshr_b32 s0, s9, 24 +; GFX90a-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v1, 8, s0 +; GFX90a-PRELOAD-2-NEXT: s_lshr_b32 s0, s9, 16 +; GFX90a-PRELOAD-2-NEXT: v_or_b32_sdwa v0, s9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX90a-PRELOAD-2-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX90a-PRELOAD-2-NEXT: s_lshr_b32 s0, s8, 8 +; GFX90a-PRELOAD-2-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX90a-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX90a-PRELOAD-2-NEXT: s_lshr_b32 s0, s8, 24 +; GFX90a-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v2, 8, s0 +; GFX90a-PRELOAD-2-NEXT: s_lshr_b32 s0, s8, 16 +; GFX90a-PRELOAD-2-NEXT: v_or_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX90a-PRELOAD-2-NEXT: v_or_b32_sdwa v2, s0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX90a-PRELOAD-2-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v2, 0 +; GFX90a-PRELOAD-2-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-4-LABEL: v8i8_preload_arg: +; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: ; %bb.0: +; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s9, 8 +; GFX90a-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s9, 24 +; GFX90a-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v1, 8, s0 +; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s9, 16 +; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v0, s9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s8, 8 +; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX90a-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s8, 24 +; GFX90a-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v2, 8, s0 +; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s8, 16 +; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v2, s0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, 0 +; GFX90a-PRELOAD-4-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] +; GFX90a-PRELOAD-4-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: v8i8_preload_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: s_lshr_b32 s0, s9, 8 +; GFX90a-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX90a-PRELOAD-8-NEXT: s_lshr_b32 s0, s9, 24 +; GFX90a-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v1, 8, s0 +; GFX90a-PRELOAD-8-NEXT: s_lshr_b32 s0, s9, 16 +; GFX90a-PRELOAD-8-NEXT: v_or_b32_sdwa v0, s9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX90a-PRELOAD-8-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX90a-PRELOAD-8-NEXT: s_lshr_b32 s0, s8, 8 +; GFX90a-PRELOAD-8-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX90a-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX90a-PRELOAD-8-NEXT: s_lshr_b32 s0, s8, 24 +; GFX90a-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v2, 8, s0 +; GFX90a-PRELOAD-8-NEXT: s_lshr_b32 s0, s8, 16 +; GFX90a-PRELOAD-8-NEXT: v_or_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX90a-PRELOAD-8-NEXT: v_or_b32_sdwa v2, s0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX90a-PRELOAD-8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, 0 +; GFX90a-PRELOAD-8-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] +; GFX90a-PRELOAD-8-NEXT: s_endpgm store <8 x i8> %in, ptr addrspace(1) %out ret void } define amdgpu_kernel void @i64_kernel_preload_arg(ptr addrspace(1) %out, i64 %a) { -; NO-PRELOAD-LABEL: i64_kernel_preload_arg: -; NO-PRELOAD: ; %bb.0: -; NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v2, 0 -; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) -; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s2 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3 -; NO-PRELOAD-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; NO-PRELOAD-NEXT: s_endpgm -; -; PRELOAD-1-LABEL: i64_kernel_preload_arg: -; PRELOAD-1: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: ; %bb.0: -; PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 -; PRELOAD-1-NEXT: v_mov_b32_e32 v2, 0 -; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; PRELOAD-1-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; PRELOAD-1-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 -; PRELOAD-1-NEXT: s_endpgm -; -; PRELOAD-2-LABEL: i64_kernel_preload_arg: -; PRELOAD-2: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: ; %bb.0: -; PRELOAD-2-NEXT: v_mov_b32_e32 v2, 0 -; PRELOAD-2-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; PRELOAD-2-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 -; PRELOAD-2-NEXT: s_endpgm -; -; PRELOAD-4-LABEL: i64_kernel_preload_arg: -; PRELOAD-4: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: ; %bb.0: -; PRELOAD-4-NEXT: v_mov_b32_e32 v2, 0 -; PRELOAD-4-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; PRELOAD-4-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 -; PRELOAD-4-NEXT: s_endpgm -; -; PRELOAD-8-LABEL: i64_kernel_preload_arg: -; PRELOAD-8: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: ; %bb.0: -; PRELOAD-8-NEXT: v_mov_b32_e32 v2, 0 -; PRELOAD-8-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; PRELOAD-8-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 -; PRELOAD-8-NEXT: s_endpgm +; GFX940-NO-PRELOAD-LABEL: i64_kernel_preload_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s2 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3 +; GFX940-NO-PRELOAD-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-1-LABEL: i64_kernel_preload_arg: +; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: ; %bb.0: +; GFX940-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, 0 +; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-1-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX940-PRELOAD-1-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 +; GFX940-PRELOAD-1-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: i64_kernel_preload_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v2, 0 +; GFX940-PRELOAD-2-NEXT: v_mov_b64_e32 v[0:1], s[4:5] +; GFX940-PRELOAD-2-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-4-LABEL: i64_kernel_preload_arg: +; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: ; %bb.0: +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, 0 +; GFX940-PRELOAD-4-NEXT: v_mov_b64_e32 v[0:1], s[4:5] +; GFX940-PRELOAD-4-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 +; GFX940-PRELOAD-4-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: i64_kernel_preload_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, 0 +; GFX940-PRELOAD-8-NEXT: v_mov_b64_e32 v[0:1], s[4:5] +; GFX940-PRELOAD-8-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: i64_kernel_preload_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s2 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3 +; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-1-LABEL: i64_kernel_preload_arg: +; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: ; %bb.0: +; GFX90a-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, 0 +; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-1-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] +; GFX90a-PRELOAD-1-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] +; GFX90a-PRELOAD-1-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: i64_kernel_preload_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v2, 0 +; GFX90a-PRELOAD-2-NEXT: v_pk_mov_b32 v[0:1], s[8:9], s[8:9] op_sel:[0,1] +; GFX90a-PRELOAD-2-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-4-LABEL: i64_kernel_preload_arg: +; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: ; %bb.0: +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, 0 +; GFX90a-PRELOAD-4-NEXT: v_pk_mov_b32 v[0:1], s[8:9], s[8:9] op_sel:[0,1] +; GFX90a-PRELOAD-4-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] +; GFX90a-PRELOAD-4-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: i64_kernel_preload_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, 0 +; GFX90a-PRELOAD-8-NEXT: v_pk_mov_b32 v[0:1], s[8:9], s[8:9] op_sel:[0,1] +; GFX90a-PRELOAD-8-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] +; GFX90a-PRELOAD-8-NEXT: s_endpgm store i64 %a, ptr addrspace(1) %out, align 8 ret void } define amdgpu_kernel void @f64_kernel_preload_arg(ptr addrspace(1) %out, double %in) { -; NO-PRELOAD-LABEL: f64_kernel_preload_arg: -; NO-PRELOAD: ; %bb.0: -; NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v2, 0 -; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) -; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s2 -; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3 -; NO-PRELOAD-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; NO-PRELOAD-NEXT: s_endpgm -; -; PRELOAD-1-LABEL: f64_kernel_preload_arg: -; PRELOAD-1: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: s_nop 0 -; PRELOAD-1-NEXT: ; %bb.0: -; PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 -; PRELOAD-1-NEXT: v_mov_b32_e32 v2, 0 -; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; PRELOAD-1-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; PRELOAD-1-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 -; PRELOAD-1-NEXT: s_endpgm -; -; PRELOAD-2-LABEL: f64_kernel_preload_arg: -; PRELOAD-2: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: s_nop 0 -; PRELOAD-2-NEXT: ; %bb.0: -; PRELOAD-2-NEXT: v_mov_b32_e32 v2, 0 -; PRELOAD-2-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; PRELOAD-2-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 -; PRELOAD-2-NEXT: s_endpgm -; -; PRELOAD-4-LABEL: f64_kernel_preload_arg: -; PRELOAD-4: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: s_nop 0 -; PRELOAD-4-NEXT: ; %bb.0: -; PRELOAD-4-NEXT: v_mov_b32_e32 v2, 0 -; PRELOAD-4-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; PRELOAD-4-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 -; PRELOAD-4-NEXT: s_endpgm -; -; PRELOAD-8-LABEL: f64_kernel_preload_arg: -; PRELOAD-8: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: s_nop 0 -; PRELOAD-8-NEXT: ; %bb.0: -; PRELOAD-8-NEXT: v_mov_b32_e32 v2, 0 -; PRELOAD-8-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; PRELOAD-8-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 -; PRELOAD-8-NEXT: s_endpgm +; GFX940-NO-PRELOAD-LABEL: f64_kernel_preload_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s2 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3 +; GFX940-NO-PRELOAD-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-1-LABEL: f64_kernel_preload_arg: +; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: s_nop 0 +; GFX940-PRELOAD-1-NEXT: ; %bb.0: +; GFX940-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 +; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, 0 +; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-1-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX940-PRELOAD-1-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 +; GFX940-PRELOAD-1-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: f64_kernel_preload_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v2, 0 +; GFX940-PRELOAD-2-NEXT: v_mov_b64_e32 v[0:1], s[4:5] +; GFX940-PRELOAD-2-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-4-LABEL: f64_kernel_preload_arg: +; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: s_nop 0 +; GFX940-PRELOAD-4-NEXT: ; %bb.0: +; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, 0 +; GFX940-PRELOAD-4-NEXT: v_mov_b64_e32 v[0:1], s[4:5] +; GFX940-PRELOAD-4-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 +; GFX940-PRELOAD-4-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: f64_kernel_preload_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, 0 +; GFX940-PRELOAD-8-NEXT: v_mov_b64_e32 v[0:1], s[4:5] +; GFX940-PRELOAD-8-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: f64_kernel_preload_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s2 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3 +; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-1-LABEL: f64_kernel_preload_arg: +; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: s_nop 0 +; GFX90a-PRELOAD-1-NEXT: ; %bb.0: +; GFX90a-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, 0 +; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-1-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] +; GFX90a-PRELOAD-1-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] +; GFX90a-PRELOAD-1-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: f64_kernel_preload_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v2, 0 +; GFX90a-PRELOAD-2-NEXT: v_pk_mov_b32 v[0:1], s[8:9], s[8:9] op_sel:[0,1] +; GFX90a-PRELOAD-2-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-4-LABEL: f64_kernel_preload_arg: +; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: s_nop 0 +; GFX90a-PRELOAD-4-NEXT: ; %bb.0: +; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, 0 +; GFX90a-PRELOAD-4-NEXT: v_pk_mov_b32 v[0:1], s[8:9], s[8:9] op_sel:[0,1] +; GFX90a-PRELOAD-4-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] +; GFX90a-PRELOAD-4-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: f64_kernel_preload_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, 0 +; GFX90a-PRELOAD-8-NEXT: v_pk_mov_b32 v[0:1], s[8:9], s[8:9] op_sel:[0,1] +; GFX90a-PRELOAD-8-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] +; GFX90a-PRELOAD-8-NEXT: s_endpgm store double %in, ptr addrspace(1) %out ret void }