Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[MemCpyOpt] Fix a variety of scalable-type crashes
This patch fixes a variety of crashes resulting from the `MemCpyOptPass` casting `TypeSize` to a constant integer, whether implicitly or explicitly. Since the `MemsetRanges` requires a constant size to work, all but one of the fixes in this patch simply involve skipping the various optimizations for scalable types as cleanly as possible. The optimization of `byval` parameters, however, has been updated to work on scalable types in theory. In practice, this optimization is only valid when the length of the `memcpy` is known to be larger than the scalable type size, which is currently never the case. This could perhaps be done in the future using the `vscale_range` attribute. Some implicit casts have been left as they were, under the knowledge they are only called on aggregate types. These should never be scalably-sized. Reviewed By: nikic, tra Differential Revision: https://reviews.llvm.org/D109329
- Loading branch information
1 parent
caabf2a
commit 7fb66d4
Showing
3 changed files
with
128 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
; RUN: opt < %s -memcpyopt -S -verify-memoryssa | FileCheck %s | ||
|
||
; Check that a call featuring a scalable-vector byval argument fed by a memcpy | ||
; doesn't crash the compiler. It previously assumed the byval type's size could | ||
; be represented as a known constant amount. | ||
define void @byval_caller(i8 *%P) { | ||
; CHECK-LABEL: @byval_caller( | ||
; CHECK-NEXT: [[A:%.*]] = alloca i8, align 1 | ||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[A]], i8* align 4 [[P:%.*]], i64 8, i1 false) | ||
; CHECK-NEXT: [[VA:%.*]] = bitcast i8* [[A]] to <vscale x 1 x i8>* | ||
; CHECK-NEXT: call void @byval_callee(<vscale x 1 x i8>* byval(<vscale x 1 x i8>) align 1 [[VA]]) | ||
; CHECK-NEXT: ret void | ||
; | ||
%a = alloca i8 | ||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a, i8* align 4 %P, i64 8, i1 false) | ||
%va = bitcast i8* %a to <vscale x 1 x i8>* | ||
call void @byval_callee(<vscale x 1 x i8>* align 1 byval(<vscale x 1 x i8>) %va) | ||
ret void | ||
} | ||
|
||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4, i8* align 4, i64, i1) | ||
declare void @byval_callee(<vscale x 1 x i8>* align 1 byval(<vscale x 1 x i8>)) | ||
|
||
; Check that two scalable-vector stores (overlapping, with a constant offset) | ||
; do not crash the compiler when checked whether or not they can be merged into | ||
; a single memset. There was previously an assumption that the stored values' | ||
; sizes could be represented by a known constant amount. | ||
define void @merge_stores_both_scalable(<vscale x 1 x i8>* %ptr) { | ||
; CHECK-LABEL: @merge_stores_both_scalable( | ||
; CHECK-NEXT: store <vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8>* [[PTR:%.*]], align 1 | ||
; CHECK-NEXT: [[PTRI8:%.*]] = bitcast <vscale x 1 x i8>* [[PTR]] to i8* | ||
; CHECK-NEXT: [[PTR_NEXT:%.*]] = getelementptr i8, i8* [[PTRI8]], i64 1 | ||
; CHECK-NEXT: [[PTR_NEXT_2:%.*]] = bitcast i8* [[PTR_NEXT]] to <vscale x 1 x i8>* | ||
; CHECK-NEXT: store <vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8>* [[PTR_NEXT_2]], align 1 | ||
; CHECK-NEXT: ret void | ||
; | ||
store <vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8>* %ptr | ||
%ptri8 = bitcast <vscale x 1 x i8>* %ptr to i8* | ||
%ptr.next = getelementptr i8, i8* %ptri8, i64 1 | ||
%ptr.next.2 = bitcast i8* %ptr.next to <vscale x 1 x i8>* | ||
store <vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8>* %ptr.next.2 | ||
ret void | ||
} | ||
|
||
; As above, but where the base is scalable but the subsequent store(s) are not. | ||
define void @merge_stores_first_scalable(<vscale x 1 x i8>* %ptr) { | ||
; CHECK-LABEL: @merge_stores_first_scalable( | ||
; CHECK-NEXT: store <vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8>* [[PTR:%.*]], align 1 | ||
; CHECK-NEXT: [[PTRI8:%.*]] = bitcast <vscale x 1 x i8>* [[PTR]] to i8* | ||
; CHECK-NEXT: [[PTR_NEXT:%.*]] = getelementptr i8, i8* [[PTRI8]], i64 1 | ||
; CHECK-NEXT: store i8 0, i8* [[PTR_NEXT]], align 1 | ||
; CHECK-NEXT: ret void | ||
; | ||
store <vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8>* %ptr | ||
%ptri8 = bitcast <vscale x 1 x i8>* %ptr to i8* | ||
%ptr.next = getelementptr i8, i8* %ptri8, i64 1 | ||
store i8 zeroinitializer, i8* %ptr.next | ||
ret void | ||
} | ||
|
||
; As above, but where the base is not scalable but the subsequent store(s) are. | ||
define void @merge_stores_second_scalable(i8* %ptr) { | ||
; CHECK-LABEL: @merge_stores_second_scalable( | ||
; CHECK-NEXT: store i8 0, i8* [[PTR:%.*]], align 1 | ||
; CHECK-NEXT: [[PTR_NEXT:%.*]] = getelementptr i8, i8* [[PTR]], i64 1 | ||
; CHECK-NEXT: [[PTR_NEXT_2:%.*]] = bitcast i8* [[PTR_NEXT]] to <vscale x 1 x i8>* | ||
; CHECK-NEXT: store <vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8>* [[PTR_NEXT_2]], align 1 | ||
; CHECK-NEXT: ret void | ||
; | ||
store i8 zeroinitializer, i8* %ptr | ||
%ptr.next = getelementptr i8, i8* %ptr, i64 1 | ||
%ptr.next.2 = bitcast i8* %ptr.next to <vscale x 1 x i8>* | ||
store <vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8>* %ptr.next.2 | ||
ret void | ||
} | ||
|
||
; Check that the call-slot optimization doesn't crash when encountering scalable types. | ||
define void @callslotoptzn(<vscale x 4 x float> %val, <vscale x 4 x float>* %out) { | ||
; CHECK-LABEL: @callslotoptzn( | ||
; CHECK-NEXT: [[ALLOC:%.*]] = alloca <vscale x 4 x float>, align 16 | ||
; CHECK-NEXT: [[IDX:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32() | ||
; CHECK-NEXT: [[BALLOC:%.*]] = getelementptr inbounds <vscale x 4 x float>, <vscale x 4 x float>* [[ALLOC]], i64 0, i64 0 | ||
; CHECK-NEXT: [[STRIDE:%.*]] = getelementptr inbounds float, float* [[BALLOC]], <vscale x 4 x i32> [[IDX]] | ||
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> [[VAL:%.*]], <vscale x 4 x float*> [[STRIDE]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) | ||
; CHECK-NEXT: [[LI:%.*]] = load <vscale x 4 x float>, <vscale x 4 x float>* [[ALLOC]], align 4 | ||
; CHECK-NEXT: store <vscale x 4 x float> [[LI]], <vscale x 4 x float>* [[OUT:%.*]], align 4 | ||
; CHECK-NEXT: ret void | ||
; | ||
%alloc = alloca <vscale x 4 x float>, align 16 | ||
%idx = tail call <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32() | ||
%balloc = getelementptr inbounds <vscale x 4 x float>, <vscale x 4 x float>* %alloc, i64 0, i64 0 | ||
%stride = getelementptr inbounds float, float* %balloc, <vscale x 4 x i32> %idx | ||
call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %stride, i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) | ||
%li = load <vscale x 4 x float>, <vscale x 4 x float>* %alloc, align 4 | ||
store <vscale x 4 x float> %li, <vscale x 4 x float>* %out, align 4 | ||
ret void | ||
} | ||
|
||
declare <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32() | ||
declare void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> , <vscale x 4 x float*> , i32, <vscale x 4 x i1>) |