Skip to content

Commit 5f38ae4

Browse files
authored
[AMDGPU] update LDS block size for gfx1250 (#167614)
LDS block size should be 2048 bytes (512 dwords) based on current spec.
1 parent 0f0cf84 commit 5f38ae4

File tree

9 files changed

+248
-27
lines changed

9 files changed

+248
-27
lines changed

llvm/docs/AMDGPUUsage.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5896,7 +5896,7 @@ The fields used by CP for code objects before V3 also match those specified in
58965896
GFX950
58975897
roundup(lds-size / (320 * 4))
58985898
GFX125*
5899-
roundup(lds-size / (256 * 4))
5899+
roundup(lds-size / (512 * 4))
59005900

59015901
24 1 bit ENABLE_EXCEPTION_IEEE_754_FP Wavefront starts execution
59025902
_INVALID_OPERATION with specified exceptions

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1186,21 +1186,20 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
11861186
// Make clamp modifier on NaN input returns 0.
11871187
ProgInfo.DX10Clamp = Mode.DX10Clamp;
11881188

1189-
unsigned LDSAlignShift;
1190-
if (STM.getFeatureBits().test(FeatureAddressableLocalMemorySize327680)) {
1191-
// LDS is allocated in 256 dword blocks.
1192-
LDSAlignShift = 10;
1193-
} else if (STM.getFeatureBits().test(
1194-
FeatureAddressableLocalMemorySize163840)) {
1195-
// LDS is allocated in 320 dword blocks.
1189+
unsigned LDSAlignShift = 8;
1190+
switch (getLdsDwGranularity(STM)) {
1191+
case 512:
1192+
case 320:
11961193
LDSAlignShift = 11;
1197-
} else if (STM.getFeatureBits().test(
1198-
FeatureAddressableLocalMemorySize65536)) {
1199-
// LDS is allocated in 128 dword blocks.
1194+
break;
1195+
case 128:
12001196
LDSAlignShift = 9;
1201-
} else {
1202-
// LDS is allocated in 64 dword blocks.
1197+
break;
1198+
case 64:
12031199
LDSAlignShift = 8;
1200+
break;
1201+
default:
1202+
llvm_unreachable("invald LDS block size");
12041203
}
12051204

12061205
ProgInfo.SGPRSpill = MFI->getNumSpilledSGPRs();

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3565,8 +3565,15 @@ bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
35653565
}
35663566

35673567
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST) {
3568-
return ST.hasFeature(AMDGPU::FeatureAddressableLocalMemorySize327680) ? 256
3569-
: 128;
3568+
if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize32768))
3569+
return 64;
3570+
if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize65536))
3571+
return 128;
3572+
if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize163840))
3573+
return 320;
3574+
if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize327680))
3575+
return 512;
3576+
return 64; // In sync with getAddressableLocalMemorySize
35703577
}
35713578

35723579
bool isPackedFP32Inst(unsigned Opc) {

llvm/test/CodeGen/AMDGPU/extra-lds-size.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,10 @@
3131
; GFX1200-MESA: .long 45100
3232
; GFX1200-MESA-NEXT: .long 1024
3333

34-
; GFX1250-PAL: '0x2c0b (SPI_SHADER_PGM_RSRC2_PS)': 0x200
34+
; GFX1250-PAL: '0x2c0b (SPI_SHADER_PGM_RSRC2_PS)': 0x100
3535

3636
; GFX1250-MESA: .long 45100
37-
; GFX1250-MESA-NEXT: .long 512
37+
; GFX1250-MESA-NEXT: .long 256
3838

3939
@lds = internal addrspace(3) global [4096 x i8] poison
4040

llvm/test/CodeGen/AMDGPU/lds-size-hsa-gfx1250.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ define amdgpu_kernel void @test_lds_i32(i32 %val) {
4141
; GCN-LABEL: test_lds_array_i8:
4242
; GCN: .amdhsa_group_segment_fixed_size 327680
4343
; GCN: ; LDSByteSize: 327680 bytes/workgroup
44-
; MESA: granulated_lds_size = 320
44+
; MESA: granulated_lds_size = 160
4545
define amdgpu_kernel void @test_lds_array_i8() {
4646
%gep = getelementptr inbounds [327679 x i8], ptr addrspace(3) @lds.array.i8, i32 0, i32 5
4747
%val = load i8, ptr addrspace(3) %gep
@@ -52,7 +52,7 @@ define amdgpu_kernel void @test_lds_array_i8() {
5252
; GCN-LABEL: test_lds_array_i16:
5353
; GCN: .amdhsa_group_segment_fixed_size 327680
5454
; GCN: ; LDSByteSize: 327680 bytes/workgroup
55-
; MESA: granulated_lds_size = 320
55+
; MESA: granulated_lds_size = 160
5656
define amdgpu_kernel void @test_lds_array_i16() {
5757
%gep = getelementptr inbounds [163839 x i16], ptr addrspace(3) @lds.array.i16, i32 0, i32 10
5858
%val = load i16, ptr addrspace(3) %gep
@@ -63,7 +63,7 @@ define amdgpu_kernel void @test_lds_array_i16() {
6363
; GCN-LABEL: test_lds_array_i32:
6464
; GCN: .amdhsa_group_segment_fixed_size 327680
6565
; GCN: ; LDSByteSize: 327680 bytes/workgroup
66-
; MESA: granulated_lds_size = 320
66+
; MESA: granulated_lds_size = 160
6767
define amdgpu_kernel void @test_lds_array_i32() {
6868
%gep = getelementptr inbounds [81919 x i32], ptr addrspace(3) @lds.array.i32, i32 0, i32 20
6969
%val = load i32, ptr addrspace(3) %gep

llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.gfx1250.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@
114114
; CHECK-NEXT: .entry_point: _amdgpu_gs
115115
; CHECK-NEXT: .entry_point_symbol: gs_shader
116116
; CHECK-NEXT: .forward_progress: true
117-
; CHECK-NEXT: .lds_size: 0x400
117+
; CHECK-NEXT: .lds_size: 0x800
118118
; CHECK-NEXT: .mem_ordered: true
119119
; CHECK-NEXT: .scratch_en: false
120120
; CHECK-NEXT: .scratch_memory_size: 0
Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx950 <%s | FileCheck %s --check-prefixes=CHECK
2+
3+
; CHECK-LABEL: {{^}}_amdgpu_cs_main:
4+
; CHECK: ; TotalNumSgprs: 6
5+
; CHECK: ; NumVgprs: 1
6+
; CHECK: .amdgpu_pal_metadata
7+
; CHECK-NEXT: ---
8+
; CHECK-NEXT: amdpal.pipelines:
9+
; CHECK-NEXT: - .api: Vulkan
10+
; CHECK-NEXT: .compute_registers:
11+
; CHECK-NEXT: .tg_size_en: true
12+
; CHECK-NEXT: .tgid_x_en: false
13+
; CHECK-NEXT: .tgid_y_en: false
14+
; CHECK-NEXT: .tgid_z_en: false
15+
; CHECK-NEXT: .tidig_comp_cnt: 0x1
16+
; CHECK-NEXT: .graphics_registers:
17+
; CHECK-NEXT: .ps_extra_lds_size: 0
18+
; CHECK-NEXT: .spi_ps_input_addr:
19+
; CHECK-NEXT: .ancillary_ena: false
20+
; CHECK-NEXT: .front_face_ena: true
21+
; CHECK-NEXT: .line_stipple_tex_ena: false
22+
; CHECK-NEXT: .linear_center_ena: true
23+
; CHECK-NEXT: .linear_centroid_ena: true
24+
; CHECK-NEXT: .linear_sample_ena: true
25+
; CHECK-NEXT: .persp_center_ena: true
26+
; CHECK-NEXT: .persp_centroid_ena: true
27+
; CHECK-NEXT: .persp_pull_model_ena: false
28+
; CHECK-NEXT: .persp_sample_ena: true
29+
; CHECK-NEXT: .pos_fixed_pt_ena: true
30+
; CHECK-NEXT: .pos_w_float_ena: false
31+
; CHECK-NEXT: .pos_x_float_ena: false
32+
; CHECK-NEXT: .pos_y_float_ena: false
33+
; CHECK-NEXT: .pos_z_float_ena: false
34+
; CHECK-NEXT: .sample_coverage_ena: false
35+
; CHECK-NEXT: .spi_ps_input_ena:
36+
; CHECK-NEXT: .ancillary_ena: false
37+
; CHECK-NEXT: .front_face_ena: false
38+
; CHECK-NEXT: .line_stipple_tex_ena: false
39+
; CHECK-NEXT: .linear_center_ena: false
40+
; CHECK-NEXT: .linear_centroid_ena: false
41+
; CHECK-NEXT: .linear_sample_ena: false
42+
; CHECK-NEXT: .persp_center_ena: false
43+
; CHECK-NEXT: .persp_centroid_ena: false
44+
; CHECK-NEXT: .persp_pull_model_ena: false
45+
; CHECK-NEXT: .persp_sample_ena: true
46+
; CHECK-NEXT: .pos_fixed_pt_ena: false
47+
; CHECK-NEXT: .pos_w_float_ena: false
48+
; CHECK-NEXT: .pos_x_float_ena: false
49+
; CHECK-NEXT: .pos_y_float_ena: false
50+
; CHECK-NEXT: .pos_z_float_ena: false
51+
; CHECK-NEXT: .sample_coverage_ena: false
52+
; CHECK-NEXT: .hardware_stages:
53+
; CHECK-NEXT: .cs:
54+
; CHECK-NEXT: .agpr_count: 0
55+
; CHECK-NEXT: .checksum_value: 0x9444d7d0
56+
; CHECK-NEXT: .debug_mode: false
57+
; CHECK-NEXT: .entry_point: _amdgpu_cs
58+
; CHECK-NEXT: .entry_point_symbol: _amdgpu_cs_main
59+
; CHECK-NEXT: .excp_en: 0
60+
; CHECK-NEXT: .float_mode: 0xc0
61+
; CHECK-NEXT: .forward_progress: false
62+
; CHECK-NEXT: .ieee_mode: false
63+
; CHECK-NEXT: .image_op: false
64+
; CHECK-NEXT: .lds_size: 0
65+
; CHECK-NEXT: .mem_ordered: false
66+
; CHECK-NEXT: .scratch_en: false
67+
; CHECK-NEXT: .scratch_memory_size: 0
68+
; CHECK-NEXT: .sgpr_count: 0xa
69+
; CHECK-NEXT: .sgpr_limit: 0x6a
70+
; CHECK-NEXT: .threadgroup_dimensions:
71+
; CHECK-NEXT: - 0x1
72+
; CHECK-NEXT: - 0x400
73+
; CHECK-NEXT: - 0x1
74+
; CHECK-NEXT: .trap_present: false
75+
; CHECK-NEXT: .user_data_reg_map:
76+
; CHECK-NEXT: - 0x10000000
77+
; CHECK-NEXT: - 0xffffffff
78+
; CHECK-NEXT: - 0
79+
; CHECK-NEXT: - 0xffffffff
80+
; CHECK-NEXT: - 0xffffffff
81+
; CHECK-NEXT: - 0xffffffff
82+
; CHECK-NEXT: - 0xffffffff
83+
; CHECK-NEXT: - 0xffffffff
84+
; CHECK-NEXT: - 0xffffffff
85+
; CHECK-NEXT: - 0xffffffff
86+
; CHECK-NEXT: - 0xffffffff
87+
; CHECK-NEXT: - 0xffffffff
88+
; CHECK-NEXT: - 0xffffffff
89+
; CHECK-NEXT: - 0xffffffff
90+
; CHECK-NEXT: - 0xffffffff
91+
; CHECK-NEXT: - 0xffffffff
92+
; CHECK-NEXT: - 0xffffffff
93+
; CHECK-NEXT: - 0xffffffff
94+
; CHECK-NEXT: - 0xffffffff
95+
; CHECK-NEXT: - 0xffffffff
96+
; CHECK-NEXT: - 0xffffffff
97+
; CHECK-NEXT: - 0xffffffff
98+
; CHECK-NEXT: - 0xffffffff
99+
; CHECK-NEXT: - 0xffffffff
100+
; CHECK-NEXT: - 0xffffffff
101+
; CHECK-NEXT: - 0xffffffff
102+
; CHECK-NEXT: - 0xffffffff
103+
; CHECK-NEXT: - 0xffffffff
104+
; CHECK-NEXT: - 0xffffffff
105+
; CHECK-NEXT: - 0xffffffff
106+
; CHECK-NEXT: - 0xffffffff
107+
; CHECK-NEXT: - 0xffffffff
108+
; CHECK-NEXT: .user_sgprs: 0x3
109+
; CHECK-NEXT: .vgpr_count: 0x2
110+
; CHECK-NEXT: .vgpr_limit: 0x100
111+
; CHECK-NEXT: .wavefront_size: 0x20
112+
; CHECK-NEXT: .wgp_mode: false
113+
; CHECK-NEXT: .gs:
114+
; CHECK-NEXT: .agpr_count: 0
115+
; CHECK-NEXT: .debug_mode: false
116+
; CHECK-NEXT: .entry_point: _amdgpu_gs
117+
; CHECK-NEXT: .entry_point_symbol: gs_shader
118+
; CHECK-NEXT: .forward_progress: false
119+
; CHECK-NEXT: .ieee_mode: false
120+
; CHECK-NEXT: .lds_size: 0x500
121+
; CHECK-NEXT: .mem_ordered: false
122+
; CHECK-NEXT: .scratch_en: false
123+
; CHECK-NEXT: .scratch_memory_size: 0
124+
; CHECK-NEXT: .sgpr_count: 0x6
125+
; CHECK-NEXT: .vgpr_count: 0x1
126+
; CHECK-NEXT: .wgp_mode: false
127+
; CHECK-NEXT: .hs:
128+
; CHECK-NEXT: .agpr_count: 0
129+
; CHECK-NEXT: .debug_mode: false
130+
; CHECK-NEXT: .entry_point: _amdgpu_hs
131+
; CHECK-NEXT: .entry_point_symbol: hs_shader
132+
; CHECK-NEXT: .forward_progress: false
133+
; CHECK-NEXT: .ieee_mode: false
134+
; CHECK-NEXT: .lds_size: 0xa00
135+
; CHECK-NEXT: .mem_ordered: false
136+
; CHECK-NEXT: .scratch_en: false
137+
; CHECK-NEXT: .scratch_memory_size: 0
138+
; CHECK-NEXT: .sgpr_count: 0x6
139+
; CHECK-NEXT: .vgpr_count: 0x1
140+
; CHECK-NEXT: .wgp_mode: false
141+
; CHECK-NEXT: .ps:
142+
; CHECK-NEXT: .agpr_count: 0
143+
; CHECK-NEXT: .debug_mode: false
144+
; CHECK-NEXT: .entry_point: _amdgpu_ps
145+
; CHECK-NEXT: .entry_point_symbol: ps_shader
146+
; CHECK-NEXT: .forward_progress: false
147+
; CHECK-NEXT: .ieee_mode: false
148+
; CHECK-NEXT: .lds_size: 0
149+
; CHECK-NEXT: .mem_ordered: false
150+
; CHECK-NEXT: .scratch_en: false
151+
; CHECK-NEXT: .scratch_memory_size: 0
152+
; CHECK-NEXT: .sgpr_count: 0x6
153+
; CHECK-NEXT: .vgpr_count: 0x1
154+
; CHECK-NEXT: .wgp_mode: false
155+
; CHECK: .registers: {}
156+
; CHECK:amdpal.version:
157+
; CHECK-NEXT: - 0x3
158+
; CHECK-NEXT: - 0
159+
; CHECK-NEXT:...
160+
; CHECK-NEXT: .end_amdgpu_pal_metadata
161+
162+
define amdgpu_cs void @_amdgpu_cs_main(i32 inreg %arg1, i32 %arg2) #0 !lgc.shaderstage !1 {
163+
.entry:
164+
%i = call i64 @llvm.amdgcn.s.getpc()
165+
%i1 = and i64 %i, -4294967296
166+
%i2 = zext i32 %arg1 to i64
167+
%i3 = or i64 %i1, %i2
168+
%i4 = inttoptr i64 %i3 to ptr addrspace(4)
169+
%i5 = and i32 %arg2, 1023
170+
%i6 = lshr i32 %arg2, 10
171+
%i7 = and i32 %i6, 1023
172+
%i8 = add nuw nsw i32 %i7, %i5
173+
%i9 = load <4 x i32>, ptr addrspace(4) %i4, align 16
174+
%.idx = shl nuw nsw i32 %i8, 2
175+
call void @llvm.amdgcn.raw.buffer.store.i32(i32 1, <4 x i32> %i9, i32 %.idx, i32 0, i32 0)
176+
ret void
177+
}
178+
179+
define amdgpu_ps void @ps_shader() #1 {
180+
ret void
181+
}
182+
183+
@LDS.GS = external addrspace(3) global [1 x i32], align 4
184+
185+
define amdgpu_gs void @gs_shader() {
186+
%ptr = getelementptr i32, ptr addrspace(3) @LDS.GS, i32 0
187+
store i32 0, ptr addrspace(3) %ptr, align 4
188+
ret void
189+
}
190+
191+
@LDS.HS = external addrspace(3) global [1024 x i32], align 4
192+
193+
define amdgpu_hs void @hs_shader() {
194+
%ptr = getelementptr i32, ptr addrspace(3) @LDS.HS, i32 0
195+
store i32 0, ptr addrspace(3) %ptr, align 4
196+
ret void
197+
}
198+
199+
!amdgpu.pal.metadata.msgpack = !{!0}
200+
201+
; Function Attrs: nounwind willreturn memory(none)
202+
declare ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32>) #1
203+
204+
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
205+
declare i64 @llvm.amdgcn.s.getpc() #2
206+
207+
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write)
208+
declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32 immarg) #3
209+
210+
attributes #0 = { nounwind memory(readwrite) "amdgpu-flat-work-group-size"="1024,1024" "amdgpu-memory-bound"="false" "amdgpu-unroll-threshold"="700" "amdgpu-wave-limiter"="false" "amdgpu-work-group-info-arg-no"="4" "denormal-fp-math-f32"="preserve-sign" }
211+
212+
attributes #1 = { nounwind memory(readwrite) "InitialPSInputAddr"="36983" }
213+
214+
!0 = !{!"\82\B0amdpal.pipelines\91\8A\A4.api\A6Vulkan\B2.compute_registers\85\AB.tg_size_en\C3\AA.tgid_x_en\C2\AA.tgid_y_en\C2\AA.tgid_z_en\C2\AF.tidig_comp_cnt\01\B0.hardware_stages\81\A3.cs\8C\AF.checksum_value\CE\94D\D7\D0\AB.debug_mode\00\AB.float_mode\CC\C0\A9.image_op\C2\AC.mem_ordered\C3\AB.sgpr_limitj\B7.threadgroup_dimensions\93\01\CD\04\00\01\AD.trap_present\00\B2.user_data_reg_map\DC\00 \CE\10\00\00\00\CE\FF\FF\FF\FF\00\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\AB.user_sgprs\03\AB.vgpr_limit\CD\01\00\AF.wavefront_size \B7.internal_pipeline_hash\92\CF\E7\10k\A6:\A6%\F7\CF\B2\1F\1A\D4{\DA\E1T\AA.registers\80\A8.shaders\81\A8.compute\82\B0.api_shader_hash\92\CF\E9Zn7}\1E\B9\E7\00\B1.hardware_mapping\91\A3.cs\B0.spill_threshold\CE\FF\FF\FF\FF\A5.type\A2Cs\B0.user_data_limit\01\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CF\B4X\B8\11[\A4\88P\CF\A0;\B0\AF\FF\B4\BE\C0\AD.llpc_version\A461.1\AEamdpal.version\92\03\00"}
215+
!1 = !{i32 7}

llvm/test/MC/AMDGPU/hsa-gfx1250-v4.s

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
// OBJDUMP-NEXT: 00e0 00000000 00000000 00000000 00000000
5353
// OBJDUMP-NEXT: 00f0 00000cc0 80000000 00040000 00000000
5454
// max_lds_size
55-
// OBJDUMP-NEXT: 0100 00000600 00000000 00000000 00000000
55+
// OBJDUMP-NEXT: 0100 00000500 00000000 00000000 00000000
5656
// OBJDUMP-NEXT: 0110 00000000 00000000 00000000 00000000
5757
// OBJDUMP-NEXT: 0120 00000000 00000000 00000000 00000000
5858
// OBJDUMP-NEXT: 0130 00000cc0 80000000 00040000 00000000
@@ -231,13 +231,13 @@ max_vgprs:
231231

232232
.p2align 6
233233
.amdhsa_kernel max_lds_size
234-
.amdhsa_group_segment_fixed_size 393216
234+
.amdhsa_group_segment_fixed_size 327680
235235
.amdhsa_next_free_vgpr 1
236236
.amdhsa_next_free_sgpr 1
237237
.end_amdhsa_kernel
238238

239239
// ASM: .amdhsa_kernel max_lds_size
240-
// ASM: .amdhsa_group_segment_fixed_size 393216
240+
// ASM: .amdhsa_group_segment_fixed_size 327680
241241
// ASM: .end_amdhsa_kernel
242242

243243
// Test maximum VGPR allocation

llvm/test/MC/AMDGPU/hsa-gfx1251-v4.s

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
// OBJDUMP-NEXT: 00e0 00000000 00000000 00000000 00000000
5353
// OBJDUMP-NEXT: 00f0 00000cc0 80000000 00040000 00000000
5454
// max_lds_size
55-
// OBJDUMP-NEXT: 0100 00000600 00000000 00000000 00000000
55+
// OBJDUMP-NEXT: 0100 00000500 00000000 00000000 00000000
5656
// OBJDUMP-NEXT: 0110 00000000 00000000 00000000 00000000
5757
// OBJDUMP-NEXT: 0120 00000000 00000000 00000000 00000000
5858
// OBJDUMP-NEXT: 0130 00000cc0 80000000 00040000 00000000
@@ -231,13 +231,13 @@ max_vgprs:
231231

232232
.p2align 6
233233
.amdhsa_kernel max_lds_size
234-
.amdhsa_group_segment_fixed_size 393216
234+
.amdhsa_group_segment_fixed_size 327680
235235
.amdhsa_next_free_vgpr 1
236236
.amdhsa_next_free_sgpr 1
237237
.end_amdhsa_kernel
238238

239239
// ASM: .amdhsa_kernel max_lds_size
240-
// ASM: .amdhsa_group_segment_fixed_size 393216
240+
// ASM: .amdhsa_group_segment_fixed_size 327680
241241
// ASM: .end_amdhsa_kernel
242242

243243
// Test maximum VGPR allocation

0 commit comments

Comments
 (0)