diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 51f070fa5e1723..64012a67c4655d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -272,8 +272,8 @@ static bool isLoadStoreSizeLegal(const GCNSubtarget &ST, const bool IsLoad = Query.Opcode != AMDGPU::G_STORE; unsigned RegSize = Ty.getSizeInBits(); - unsigned MemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits(); - unsigned AlignBits = Query.MMODescrs[0].AlignInBits; + uint64_t MemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits(); + uint64_t AlignBits = Query.MMODescrs[0].AlignInBits; unsigned AS = Query.Types[1].getAddressSpace(); // All of these need to be custom lowered to cast the pointer operand. @@ -380,7 +380,7 @@ static bool shouldBitcastLoadStoreType(const GCNSubtarget &ST, const LLT Ty, /// access up to the alignment. Note this case when the memory access itself /// changes, not the size of the result register. static bool shouldWidenLoad(const GCNSubtarget &ST, LLT MemoryTy, - unsigned AlignInBits, unsigned AddrSpace, + uint64_t AlignInBits, unsigned AddrSpace, unsigned Opcode) { unsigned SizeInBits = MemoryTy.getSizeInBits(); // We don't want to widen cases that are naturally legal. @@ -1172,7 +1172,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, if (MemSize > MaxSize) return std::make_pair(0, LLT::scalar(MaxSize)); - unsigned Align = Query.MMODescrs[0].AlignInBits; + uint64_t Align = Query.MMODescrs[0].AlignInBits; return std::make_pair(0, LLT::scalar(Align)); }) .fewerElementsIf( @@ -2547,7 +2547,7 @@ bool AMDGPULegalizerInfo::legalizeLoad(LegalizerHelper &Helper, const LLT MemTy = MMO->getMemoryType(); const Align MemAlign = MMO->getAlign(); const unsigned MemSize = MemTy.getSizeInBits(); - const unsigned AlignInBits = 8 * MemAlign.value(); + const uint64_t AlignInBits = 8 * MemAlign.value(); // Widen non-power-of-2 loads to the alignment if needed if (shouldWidenLoad(ST, MemTy, AlignInBits, AddrSpace, MI.getOpcode())) { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir index 7b047ddd27e33f..331a5d35218907 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -6575,3 +6575,27 @@ body: | $vgpr0_vgpr1 = COPY %1 ... +# Make sure there's no crash on very high alignments +--- +name: test_load_flat_s32_align536870912 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CI-LABEL: name: test_load_flat_s32_align536870912 + ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 536870912) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-LABEL: name: test_load_flat_s32_align536870912 + ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 536870912) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-LABEL: name: test_load_flat_s32_align536870912 + ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 536870912) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_LOAD %0 :: (load (s16), align 536870912) + $vgpr0 = COPY %1 + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir index acaf1dab33f8f7..100180176064b0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -15593,3 +15593,40 @@ body: | %1:_(<8 x s4>) = G_LOAD %0 :: (load (<8 x s4>), align 4, addrspace 1) $vgpr0 = COPY %1 ... + +# Make sure there's no crash on very high alignments +--- +name: test_load_global_s32_align536870912 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; SI-LABEL: name: test_load_global_s32_align536870912 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-HSA-LABEL: name: test_load_global_s32_align536870912 + ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1) + ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CI-MESA-LABEL: name: test_load_global_s32_align536870912 + ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1) + ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; VI-LABEL: name: test_load_global_s32_align536870912 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-HSA-LABEL: name: test_load_global_s32_align536870912 + ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-MESA-LABEL: name: test_load_global_s32_align536870912 + ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_LOAD %0 :: (load (s16), align 536870912, addrspace 1) + $vgpr0 = COPY %1 + +...