Skip to content
Permalink
Browse files

[AMDGPU] Increase kernel padding

To support prefetch mode 3 we need to pad current
cacheline and fill 3 cachelines after. Current padding
is only sufficient for mode 2.

Differential Revision: https://reviews.llvm.org/D65236

llvm-svn: 366938
  • Loading branch information...
rampitec committed Jul 24, 2019
1 parent 65217a4 commit c43784ff26c5ea4d16678560524ba15740d147f5
@@ -250,7 +250,7 @@ bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
bool AMDGPUTargetAsmStreamer::EmitCodeEnd() {
const uint32_t Encoded_s_code_end = 0xbf9f0000;
OS << "\t.p2alignl 6, " << Encoded_s_code_end << '\n';
OS << "\t.fill 32, 4, " << Encoded_s_code_end << '\n';
OS << "\t.fill 48, 4, " << Encoded_s_code_end << '\n';
return true;
}

@@ -602,7 +602,7 @@ bool AMDGPUTargetELFStreamer::EmitCodeEnd() {
MCStreamer &OS = getStreamer();
OS.PushSection();
OS.EmitValueToAlignment(64, Encoded_s_code_end, 4);
for (unsigned I = 0; I < 32; ++I)
for (unsigned I = 0; I < 48; ++I)
OS.EmitIntValue(Encoded_s_code_end, 4);
OS.PopSection();
return true;
@@ -35,47 +35,14 @@ define amdgpu_kernel void @a_kernel2() {
; GCN-ASM-NEXT: [[END_LABEL3:\.Lfunc_end.*]]:
; GCN-ASM-NEXT: .size a_function, [[END_LABEL3]]-a_function
; GFX10END-ASM: .p2alignl 6, 3214868480
; GFX10END-ASM-NEXT: .fill 32, 4, 3214868480
; GFX10END-ASM-NEXT: .fill 48, 4, 3214868480
; GFX10NOEND-NOT: .fill

; GFX10NOEND-OBJ-NOT: s_code_end
; GFX10END-OBJ-NEXT: s_code_end

; GFX10END-OBJ: s_code_end // 000000000140:
; GFX10END-OBJ-NEXT: s_code_end
; GFX10END-OBJ-NEXT: s_code_end
; GFX10END-OBJ-NEXT: s_code_end
; GFX10END-OBJ-NEXT: s_code_end
; GFX10END-OBJ-NEXT: s_code_end
; GFX10END-OBJ-NEXT: s_code_end
; GFX10END-OBJ-NEXT: s_code_end

; GFX10END-OBJ-NEXT: s_code_end
; GFX10END-OBJ-NEXT: s_code_end
; GFX10END-OBJ-NEXT: s_code_end
; GFX10END-OBJ-NEXT: s_code_end
; GFX10END-OBJ-NEXT: s_code_end
; GFX10END-OBJ-NEXT: s_code_end
; GFX10END-OBJ-NEXT: s_code_end
; GFX10END-OBJ-NEXT: s_code_end

; GFX10END-OBJ-NEXT: s_code_end
; GFX10END-OBJ-NEXT: s_code_end
; GFX10END-OBJ-NEXT: s_code_end
; GFX10END-OBJ-NEXT: s_code_end
; GFX10END-OBJ-NEXT: s_code_end
; GFX10END-OBJ-NEXT: s_code_end
; GFX10END-OBJ-NEXT: s_code_end
; GFX10END-OBJ-NEXT: s_code_end

; GFX10END-OBJ-NEXT: s_code_end
; GFX10END-OBJ-NEXT: s_code_end
; GFX10END-OBJ-NEXT: s_code_end
; GFX10END-OBJ-NEXT: s_code_end
; GFX10END-OBJ-NEXT: s_code_end
; GFX10END-OBJ-NEXT: s_code_end
; GFX10END-OBJ-NEXT: s_code_end
; GFX10END-OBJ-NEXT: s_code_end
; GFX10END-OBJ-COUNT-47: s_code_end

define void @a_function() {
ret void

0 comments on commit c43784f

Please sign in to comment.
You can’t perform that action at this time.