84 changes: 84 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,87 @@ body: |
$w1 = COPY %o_wide
RET_ReallyLR implicit $w0
...
---
name: add_multiuse
body: |
bb.0:
liveins: $w0, $w1
; CHECK-LABEL: name: add_multiuse
; CHECK: liveins: $w0, $w1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: %const:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
; CHECK-NEXT: $w1 = COPY [[COPY]](s32)
; CHECK-NEXT: $w2 = COPY %const(s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:_(s32) = COPY $w0
%const:_(s32) = G_CONSTANT i32 0
%add:_(s32), %o:_(s1) = G_SADDO %0, %const
%o_wide:_(s32) = G_ZEXT %o(s1)
$w0 = COPY %add(s32)
$w1 = COPY %add(s32)
$w2 = COPY %o_wide
RET_ReallyLR implicit $w0
...
---
name: add_vector
body: |
bb.0:
liveins: $w0, $w1
; CHECK-LABEL: name: add_vector
; CHECK: liveins: $w0, $w1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
; CHECK-NEXT: %bv0:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: %bv1:_(<4 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK-NEXT: %add:_(<4 x s32>), %o:_(<4 x s1>) = G_UADDO %bv0, %bv1
; CHECK-NEXT: %o_wide:_(<4 x s32>) = G_ZEXT %o(<4 x s1>)
; CHECK-NEXT: $q0 = COPY %add(<4 x s32>)
; CHECK-NEXT: $q1 = COPY %o_wide(<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:_(s32) = COPY $w0
%1:_(s32) = COPY $w1
%2:_(s32) = COPY $w2
%3:_(s32) = COPY $w3
%bv0:_(<4 x s32>) = G_BUILD_VECTOR %0:_(s32), %1:_(s32), %0:_(s32), %1:_(s32)
%bv1:_(<4 x s32>) = G_BUILD_VECTOR %2:_(s32), %3:_(s32), %2:_(s32), %3:_(s32)
%add:_(<4 x s32>), %o:_(<4 x s1>) = G_UADDO %bv0, %bv1
%o_wide:_(<4 x s32>) = G_ZEXT %o(<4 x s1>)
$q0 = COPY %add(<4 x s32>)
$q1 = COPY %o_wide
RET_ReallyLR implicit $w0
...
---
name: add_splat_vector
body: |
bb.0:
liveins: $w0, $w1
; CHECK-LABEL: name: add_splat_vector
; CHECK: liveins: $w0, $w1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK-NEXT: %bv0:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
; CHECK-NEXT: %o:_(<4 x s1>) = G_BUILD_VECTOR [[C]](s1), [[C]](s1), [[C]](s1), [[C]](s1)
; CHECK-NEXT: %o_wide:_(<4 x s32>) = G_ZEXT %o(<4 x s1>)
; CHECK-NEXT: $q0 = COPY %bv0(<4 x s32>)
; CHECK-NEXT: $q1 = COPY %o_wide(<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:_(s32) = COPY $w0
%1:_(s32) = COPY $w1
%2:_(s32) = COPY $w2
%3:_(s32) = COPY $w3
%const:_(s32) = G_CONSTANT i32 0
%bv0:_(<4 x s32>) = G_BUILD_VECTOR %0:_(s32), %1:_(s32), %0:_(s32), %1:_(s32)
%bv1:_(<4 x s32>) = G_BUILD_VECTOR %const:_(s32), %const:_(s32), %const:_(s32), %const:_(s32)
%add:_(<4 x s32>), %o:_(<4 x s1>) = G_SADDO %bv0, %bv1
%o_wide:_(<4 x s32>) = G_ZEXT %o(<4 x s1>)
$q0 = COPY %add(<4 x s32>)
$q1 = COPY %o_wide
RET_ReallyLR implicit $w0
...
28 changes: 18 additions & 10 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-abs.mir
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@ body: |
bb.0:
; CHECK-LABEL: name: abs_s32
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 31
; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s64)
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]]
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
; CHECK-NEXT: $w0 = COPY [[XOR]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY]]
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[C]]
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[SUB]]
; CHECK-NEXT: $w0 = COPY [[SELECT]](s32)
;
; CHECK-CSSC-LABEL: name: abs_s32
; CHECK-CSSC: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-CSSC-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[COPY]]
Expand All @@ -28,11 +29,12 @@ body: |
bb.0:
; CHECK-LABEL: name: abs_s64
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63
; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s64)
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[COPY]], [[ASHR]]
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ADD]], [[ASHR]]
; CHECK-NEXT: $x0 = COPY [[XOR]](s64)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[C]], [[COPY]]
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s64), [[C]]
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[COPY]], [[SUB]]
; CHECK-NEXT: $x0 = COPY [[SELECT]](s64)
;
; CHECK-CSSC-LABEL: name: abs_s64
; CHECK-CSSC: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-CSSC-NEXT: [[ABS:%[0-9]+]]:_(s64) = G_ABS [[COPY]]
Expand All @@ -55,6 +57,7 @@ body: |
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<4 x s16>) = G_ABS [[COPY]]
; CHECK-NEXT: $d0 = COPY [[ABS]](<4 x s16>)
; CHECK-NEXT: RET_ReallyLR implicit $d0
;
; CHECK-CSSC-LABEL: name: abs_v4s16
; CHECK-CSSC: liveins: $d0
; CHECK-CSSC-NEXT: {{ $}}
Expand Down Expand Up @@ -82,6 +85,7 @@ body: |
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<8 x s16>) = G_ABS [[COPY]]
; CHECK-NEXT: $q0 = COPY [[ABS]](<8 x s16>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
;
; CHECK-CSSC-LABEL: name: abs_v8s16
; CHECK-CSSC: liveins: $q0
; CHECK-CSSC-NEXT: {{ $}}
Expand Down Expand Up @@ -109,6 +113,7 @@ body: |
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<2 x s32>) = G_ABS [[COPY]]
; CHECK-NEXT: $d0 = COPY [[ABS]](<2 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $d0
;
; CHECK-CSSC-LABEL: name: abs_v2s32
; CHECK-CSSC: liveins: $d0
; CHECK-CSSC-NEXT: {{ $}}
Expand Down Expand Up @@ -136,6 +141,7 @@ body: |
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<4 x s32>) = G_ABS [[COPY]]
; CHECK-NEXT: $q0 = COPY [[ABS]](<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
;
; CHECK-CSSC-LABEL: name: abs_v4s32
; CHECK-CSSC: liveins: $q0
; CHECK-CSSC-NEXT: {{ $}}
Expand Down Expand Up @@ -163,6 +169,7 @@ body: |
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<8 x s8>) = G_ABS [[COPY]]
; CHECK-NEXT: $d0 = COPY [[ABS]](<8 x s8>)
; CHECK-NEXT: RET_ReallyLR implicit $d0
;
; CHECK-CSSC-LABEL: name: abs_v4s8
; CHECK-CSSC: liveins: $d0
; CHECK-CSSC-NEXT: {{ $}}
Expand Down Expand Up @@ -190,6 +197,7 @@ body: |
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<16 x s8>) = G_ABS [[COPY]]
; CHECK-NEXT: $q0 = COPY [[ABS]](<16 x s8>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
;
; CHECK-CSSC-LABEL: name: abs_v16s8
; CHECK-CSSC: liveins: $q0
; CHECK-CSSC-NEXT: {{ $}}
Expand Down
26 changes: 11 additions & 15 deletions llvm/test/CodeGen/AArch64/abs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,8 @@ define i8 @abs_i8(i8 %a){
; CHECK-GI-LABEL: abs_i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sxtb w8, w0
; CHECK-GI-NEXT: asr w8, w8, #7
; CHECK-GI-NEXT: add w9, w0, w8
; CHECK-GI-NEXT: eor w0, w9, w8
; CHECK-GI-NEXT: cmp w8, #0
; CHECK-GI-NEXT: cneg w0, w0, le
; CHECK-GI-NEXT: ret
entry:
%res = call i8 @llvm.abs.i8(i8 %a, i1 0)
Expand All @@ -36,9 +35,8 @@ define i16 @abs_i16(i16 %a){
; CHECK-GI-LABEL: abs_i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sxth w8, w0
; CHECK-GI-NEXT: asr w8, w8, #15
; CHECK-GI-NEXT: add w9, w0, w8
; CHECK-GI-NEXT: eor w0, w9, w8
; CHECK-GI-NEXT: cmp w8, #0
; CHECK-GI-NEXT: cneg w0, w0, le
; CHECK-GI-NEXT: ret
entry:
%res = call i16 @llvm.abs.i16(i16 %a, i1 0)
Expand All @@ -55,9 +53,8 @@ define i32 @abs_i32(i32 %a){
;
; CHECK-GI-LABEL: abs_i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: asr w8, w0, #31
; CHECK-GI-NEXT: add w9, w0, w8
; CHECK-GI-NEXT: eor w0, w9, w8
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: cneg w0, w0, le
; CHECK-GI-NEXT: ret
entry:
%res = call i32 @llvm.abs.i32(i32 %a, i1 0)
Expand All @@ -74,9 +71,8 @@ define i64 @abs_i64(i64 %a){
;
; CHECK-GI-LABEL: abs_i64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: asr x8, x0, #63
; CHECK-GI-NEXT: add x9, x0, x8
; CHECK-GI-NEXT: eor x0, x9, x8
; CHECK-GI-NEXT: cmp x0, #0
; CHECK-GI-NEXT: cneg x0, x0, le
; CHECK-GI-NEXT: ret
entry:
%res = call i64 @llvm.abs.i64(i64 %a, i1 0)
Expand Down Expand Up @@ -248,9 +244,9 @@ define <1 x i32> @abs_v1i32(<1 x i32> %a){
; CHECK-GI-LABEL: abs_v1i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: asr w9, w8, #31
; CHECK-GI-NEXT: add w8, w8, w9
; CHECK-GI-NEXT: eor w8, w8, w9
; CHECK-GI-NEXT: fmov w9, s0
; CHECK-GI-NEXT: cmp w8, #0
; CHECK-GI-NEXT: cneg w8, w9, le
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AArch64/arm64-xaluo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2643,8 +2643,7 @@ define i8 @pr60530() {
;
; GISEL-LABEL: pr60530:
; GISEL: // %bb.0:
; GISEL-NEXT: mov w8, #1 // =0x1
; GISEL-NEXT: sbfx w0, w8, #0, #1
; GISEL-NEXT: mov w0, #255 // =0xff
; GISEL-NEXT: ret
%1 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 0, i8 1)
%2 = extractvalue { i8, i1 } %1, 1
Expand Down
39 changes: 8 additions & 31 deletions llvm/test/CodeGen/AArch64/overflow.ll
Original file line number Diff line number Diff line change
Expand Up @@ -64,21 +64,10 @@ entry:
}

define i32 @saddo.select.i64(i32 %v1, i32 %v2, i1 %v3, i64 %v4, i64 %v5) {
; SDAG-LABEL: saddo.select.i64:
; SDAG: // %bb.0: // %entry
; SDAG-NEXT: mov w0, w1
; SDAG-NEXT: ret
;
; GISEL-LABEL: saddo.select.i64:
; GISEL: // %bb.0: // %entry
; GISEL-NEXT: mov w8, #13 // =0xd
; GISEL-NEXT: and x9, x3, #0xc
; GISEL-NEXT: and x8, x4, x8
; GISEL-NEXT: cmn x9, x8
; GISEL-NEXT: cset w8, vs
; GISEL-NEXT: tst w8, #0x1
; GISEL-NEXT: csel w0, w0, w1, ne
; GISEL-NEXT: ret
; CHECK-LABEL: saddo.select.i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w0, w1
; CHECK-NEXT: ret
entry:
%lhs = and i64 %v4, 12
%rhs = and i64 %v5, 13
Expand All @@ -89,22 +78,10 @@ entry:
}

define i32 @uaddo.select.i64(i32 %v1, i32 %v2, i1 %v3, i64 %v4, i64 %v5) {
; SDAG-LABEL: uaddo.select.i64:
; SDAG: // %bb.0: // %entry
; SDAG-NEXT: mov w0, w1
; SDAG-NEXT: ret
;
; GISEL-LABEL: uaddo.select.i64:
; GISEL: // %bb.0: // %entry
; GISEL-NEXT: mov w8, #9 // =0x9
; GISEL-NEXT: mov w9, #10 // =0xa
; GISEL-NEXT: and x8, x3, x8
; GISEL-NEXT: and x9, x4, x9
; GISEL-NEXT: cmn x8, x9
; GISEL-NEXT: cset w8, hs
; GISEL-NEXT: tst w8, #0x1
; GISEL-NEXT: csel w0, w0, w1, ne
; GISEL-NEXT: ret
; CHECK-LABEL: uaddo.select.i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w0, w1
; CHECK-NEXT: ret
entry:
%lhs = and i64 %v4, 9
%rhs = and i64 %v5, 10
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ define i128 @fptosi_f64_to_i128(double %x) {
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB0_4
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
; GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 0xfffffbcd, v6
; GISEL-NEXT: v_add_u32_e32 v6, 0xfffffbcd, v6
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
Expand Down Expand Up @@ -612,7 +612,7 @@ define i128 @fptoui_f64_to_i128(double %x) {
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB1_4
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
; GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 0xfffffbcd, v6
; GISEL-NEXT: v_add_u32_e32 v6, 0xfffffbcd, v6
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
Expand Down Expand Up @@ -978,7 +978,7 @@ define i128 @fptosi_f32_to_i128(float %x) {
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB2_4
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
; GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 0xffffff6a, v6
; GISEL-NEXT: v_add_u32_e32 v6, 0xffffff6a, v6
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
Expand Down Expand Up @@ -1338,7 +1338,7 @@ define i128 @fptoui_f32_to_i128(float %x) {
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB3_4
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
; GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 0xffffff6a, v6
; GISEL-NEXT: v_add_u32_e32 v6, 0xffffff6a, v6
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
Expand Down
929 changes: 929 additions & 0 deletions llvm/test/CodeGen/X86/apx/domain-reassignment.mir

Large diffs are not rendered by default.

163 changes: 138 additions & 25 deletions llvm/test/Transforms/Attributor/align.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; TEST 1
;;
;.
; CHECK: @[[A1:[a-zA-Z0-9_$"\\.-]+]] = common global i8 0, align 8
; CHECK: @[[A2:[a-zA-Z0-9_$"\\.-]+]] = common global i8 0, align 16
; CHECK: @[[CND:[a-zA-Z0-9_$"\\.-]+]] = external global i1
; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = global i8 0, align 32
; CHECK: @a1 = common global i8 0, align 8
; CHECK: @a2 = common global i8 0, align 16
; CHECK: @cnd = external global i1
; CHECK: @G = global i8 0, align 32
;.
define ptr @test1(ptr align 8 %0) #0 {
; CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable
Expand Down Expand Up @@ -158,18 +158,31 @@ define internal ptr @f1(ptr readnone %0) local_unnamed_addr #0 {

; Function Attrs: nounwind readnone ssp uwtable
define ptr @f2(ptr readnone %0) local_unnamed_addr #0 {
; CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable
; CHECK-LABEL: define {{[^@]+}}@f2
; CHECK-SAME: (ptr nofree readnone [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null
; CHECK-NEXT: br i1 [[TMP2]], label [[TMP4:%.*]], label [[TMP3:%.*]]
; CHECK: 3:
; CHECK-NEXT: br label [[TMP5:%.*]]
; CHECK: 4:
; CHECK-NEXT: br label [[TMP5]]
; CHECK: 5:
; CHECK-NEXT: [[TMP6:%.*]] = phi ptr [ [[TMP0]], [[TMP3]] ], [ @a1, [[TMP4]] ]
; CHECK-NEXT: ret ptr [[TMP6]]
; TUNIT: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable
; TUNIT-LABEL: define {{[^@]+}}@f2
; TUNIT-SAME: (ptr nofree readnone [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
; TUNIT-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null
; TUNIT-NEXT: br i1 [[TMP2]], label [[TMP4:%.*]], label [[TMP3:%.*]]
; TUNIT: 3:
; TUNIT-NEXT: br label [[TMP5:%.*]]
; TUNIT: 4:
; TUNIT-NEXT: br label [[TMP5]]
; TUNIT: 5:
; TUNIT-NEXT: [[TMP6:%.*]] = phi ptr [ [[TMP0]], [[TMP3]] ], [ @a1, [[TMP4]] ]
; TUNIT-NEXT: ret ptr [[TMP6]]
;
; CGSCC: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable
; CGSCC-LABEL: define {{[^@]+}}@f2
; CGSCC-SAME: (ptr nofree readnone [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CGSCC-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null
; CGSCC-NEXT: br i1 [[TMP2]], label [[TMP4:%.*]], label [[TMP3:%.*]]
; CGSCC: 3:
; CGSCC-NEXT: br label [[TMP5:%.*]]
; CGSCC: 4:
; CGSCC-NEXT: br label [[TMP5]]
; CGSCC: 5:
; CGSCC-NEXT: [[TMP6:%.*]] = phi ptr [ [[TMP0]], [[TMP3]] ], [ @a1, [[TMP4]] ]
; CGSCC-NEXT: ret ptr [[TMP6]]
;
%2 = icmp eq ptr %0, null
br i1 %2, label %5, label %3
Expand Down Expand Up @@ -222,7 +235,7 @@ define align 4 ptr @test7() #0 {
; CGSCC: Function Attrs: mustprogress nofree noinline nosync nounwind willreturn memory(none) uwtable
; CGSCC-LABEL: define {{[^@]+}}@test7
; CGSCC-SAME: () #[[ATTR1:[0-9]+]] {
; CGSCC-NEXT: [[C:%.*]] = tail call noundef nonnull align 8 dereferenceable(1) ptr @f1() #[[ATTR14:[0-9]+]]
; CGSCC-NEXT: [[C:%.*]] = tail call noundef nonnull align 8 dereferenceable(1) ptr @f1() #[[ATTR15:[0-9]+]]
; CGSCC-NEXT: ret ptr [[C]]
;
%c = tail call ptr @f1(ptr align 8 dereferenceable(1) @a1)
Expand Down Expand Up @@ -933,7 +946,7 @@ define i32 @musttail_caller_1(ptr %p) {
; TUNIT-NEXT: [[C:%.*]] = load i1, ptr @cnd, align 1
; TUNIT-NEXT: br i1 [[C]], label [[MT:%.*]], label [[EXIT:%.*]]
; TUNIT: mt:
; TUNIT-NEXT: [[V:%.*]] = musttail call i32 @musttail_callee_1(ptr nocapture nofree noundef readonly [[P]]) #[[ATTR12:[0-9]+]]
; TUNIT-NEXT: [[V:%.*]] = musttail call i32 @musttail_callee_1(ptr nocapture nofree noundef readonly [[P]]) #[[ATTR13:[0-9]+]]
; TUNIT-NEXT: ret i32 [[V]]
; TUNIT: exit:
; TUNIT-NEXT: ret i32 0
Expand All @@ -944,7 +957,7 @@ define i32 @musttail_caller_1(ptr %p) {
; CGSCC-NEXT: [[C:%.*]] = load i1, ptr @cnd, align 1
; CGSCC-NEXT: br i1 [[C]], label [[MT:%.*]], label [[EXIT:%.*]]
; CGSCC: mt:
; CGSCC-NEXT: [[V:%.*]] = musttail call i32 @musttail_callee_1(ptr nocapture nofree noundef nonnull readonly dereferenceable(4) [[P]]) #[[ATTR15:[0-9]+]]
; CGSCC-NEXT: [[V:%.*]] = musttail call i32 @musttail_callee_1(ptr nocapture nofree noundef nonnull readonly dereferenceable(4) [[P]]) #[[ATTR16:[0-9]+]]
; CGSCC-NEXT: ret i32 [[V]]
; CGSCC: exit:
; CGSCC-NEXT: ret i32 0
Expand Down Expand Up @@ -1076,13 +1089,13 @@ define ptr @aligned_8_return_caller(ptr align(16) %a, i1 %c1, i1 %c2) {
; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
; TUNIT-LABEL: define {{[^@]+}}@aligned_8_return_caller
; TUNIT-SAME: (ptr nofree readnone align 16 "no-capture-maybe-returned" [[A:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]]) #[[ATTR10]] {
; TUNIT-NEXT: [[R:%.*]] = call align 8 ptr @aligned_8_return(ptr noalias nofree readnone align 16 "no-capture-maybe-returned" [[A]], i1 noundef [[C1]], i1 [[C2]]) #[[ATTR13:[0-9]+]]
; TUNIT-NEXT: [[R:%.*]] = call align 8 ptr @aligned_8_return(ptr noalias nofree readnone align 16 "no-capture-maybe-returned" [[A]], i1 noundef [[C1]], i1 [[C2]]) #[[ATTR14:[0-9]+]]
; TUNIT-NEXT: ret ptr [[R]]
;
; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
; CGSCC-LABEL: define {{[^@]+}}@aligned_8_return_caller
; CGSCC-SAME: (ptr nofree readnone align 16 [[A:%.*]], i1 noundef [[C1:%.*]], i1 [[C2:%.*]]) #[[ATTR13:[0-9]+]] {
; CGSCC-NEXT: [[R:%.*]] = call align 8 ptr @aligned_8_return(ptr noalias nofree readnone align 16 [[A]], i1 noundef [[C1]], i1 [[C2]]) #[[ATTR14]]
; CGSCC-NEXT: [[R:%.*]] = call align 8 ptr @aligned_8_return(ptr noalias nofree readnone align 16 [[A]], i1 noundef [[C1]], i1 [[C2]]) #[[ATTR15]]
; CGSCC-NEXT: ret ptr [[R]]
;
%r = call ptr @aligned_8_return(ptr %a, i1 %c1, i1 %c2)
Expand All @@ -1101,6 +1114,104 @@ entry:
ret i32 0
}

define i64 @infer_align_atomicrmw(ptr align 4 %p) {
; TUNIT: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
; TUNIT-LABEL: define {{[^@]+}}@infer_align_atomicrmw
; TUNIT-SAME: (ptr nocapture nofree align 16 [[P:%.*]]) #[[ATTR12:[0-9]+]] {
; TUNIT-NEXT: [[ARRAYIDX0:%.*]] = getelementptr i64, ptr [[P]], i64 1
; TUNIT-NEXT: [[ARRAYIDX1:%.*]] = getelementptr i64, ptr [[ARRAYIDX0]], i64 3
; TUNIT-NEXT: [[RET:%.*]] = atomicrmw add ptr [[ARRAYIDX1]], i64 4 seq_cst, align 16
; TUNIT-NEXT: ret i64 [[RET]]
;
; CGSCC: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
; CGSCC-LABEL: define {{[^@]+}}@infer_align_atomicrmw
; CGSCC-SAME: (ptr nocapture nofree align 16 [[P:%.*]]) #[[ATTR14:[0-9]+]] {
; CGSCC-NEXT: [[ARRAYIDX0:%.*]] = getelementptr i64, ptr [[P]], i64 1
; CGSCC-NEXT: [[ARRAYIDX1:%.*]] = getelementptr i64, ptr [[ARRAYIDX0]], i64 3
; CGSCC-NEXT: [[RET:%.*]] = atomicrmw add ptr [[ARRAYIDX1]], i64 4 seq_cst, align 16
; CGSCC-NEXT: ret i64 [[RET]]
;
%arrayidx0 = getelementptr i64, ptr %p, i64 1
%arrayidx1 = getelementptr i64, ptr %arrayidx0, i64 3
%ret = atomicrmw add ptr %arrayidx1, i64 4 seq_cst, align 16
ret i64 %ret
}

define ptr @infer_align_atomicrmw_ptr(ptr align 4 %p, ptr %val) {
; TUNIT: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
; TUNIT-LABEL: define {{[^@]+}}@infer_align_atomicrmw_ptr
; TUNIT-SAME: (ptr nocapture nofree align 16 [[P:%.*]], ptr nofree [[VAL:%.*]]) #[[ATTR12]] {
; TUNIT-NEXT: [[ARRAYIDX0:%.*]] = getelementptr i64, ptr [[P]], i64 1
; TUNIT-NEXT: [[ARRAYIDX1:%.*]] = getelementptr i64, ptr [[ARRAYIDX0]], i64 3
; TUNIT-NEXT: [[RET:%.*]] = atomicrmw xchg ptr [[ARRAYIDX1]], ptr [[VAL]] seq_cst, align 16
; TUNIT-NEXT: ret ptr [[RET]]
;
; CGSCC: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
; CGSCC-LABEL: define {{[^@]+}}@infer_align_atomicrmw_ptr
; CGSCC-SAME: (ptr nocapture nofree align 16 [[P:%.*]], ptr nofree [[VAL:%.*]]) #[[ATTR14]] {
; CGSCC-NEXT: [[ARRAYIDX0:%.*]] = getelementptr i64, ptr [[P]], i64 1
; CGSCC-NEXT: [[ARRAYIDX1:%.*]] = getelementptr i64, ptr [[ARRAYIDX0]], i64 3
; CGSCC-NEXT: [[RET:%.*]] = atomicrmw xchg ptr [[ARRAYIDX1]], ptr [[VAL]] seq_cst, align 16
; CGSCC-NEXT: ret ptr [[RET]]
;
%arrayidx0 = getelementptr i64, ptr %p, i64 1
%arrayidx1 = getelementptr i64, ptr %arrayidx0, i64 3
%ret = atomicrmw xchg ptr %arrayidx1, ptr %val seq_cst, align 16
ret ptr %ret
}

define i64 @infer_align_cmpxchg(ptr align 4 %p) {
; TUNIT: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
; TUNIT-LABEL: define {{[^@]+}}@infer_align_cmpxchg
; TUNIT-SAME: (ptr nocapture nofree align 16 [[P:%.*]]) #[[ATTR12]] {
; TUNIT-NEXT: [[ARRAYIDX0:%.*]] = getelementptr i64, ptr [[P]], i64 1
; TUNIT-NEXT: [[ARRAYIDX1:%.*]] = getelementptr i64, ptr [[ARRAYIDX0]], i64 3
; TUNIT-NEXT: [[CMPX:%.*]] = cmpxchg ptr [[ARRAYIDX1]], i64 4, i64 1 seq_cst seq_cst, align 16
; TUNIT-NEXT: [[RET:%.*]] = extractvalue { i64, i1 } [[CMPX]], 0
; TUNIT-NEXT: ret i64 [[RET]]
;
; CGSCC: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
; CGSCC-LABEL: define {{[^@]+}}@infer_align_cmpxchg
; CGSCC-SAME: (ptr nocapture nofree align 16 [[P:%.*]]) #[[ATTR14]] {
; CGSCC-NEXT: [[ARRAYIDX0:%.*]] = getelementptr i64, ptr [[P]], i64 1
; CGSCC-NEXT: [[ARRAYIDX1:%.*]] = getelementptr i64, ptr [[ARRAYIDX0]], i64 3
; CGSCC-NEXT: [[CMPX:%.*]] = cmpxchg ptr [[ARRAYIDX1]], i64 4, i64 1 seq_cst seq_cst, align 16
; CGSCC-NEXT: [[RET:%.*]] = extractvalue { i64, i1 } [[CMPX]], 0
; CGSCC-NEXT: ret i64 [[RET]]
;
%arrayidx0 = getelementptr i64, ptr %p, i64 1
%arrayidx1 = getelementptr i64, ptr %arrayidx0, i64 3
%cmpx = cmpxchg ptr %arrayidx1, i64 4, i64 1 seq_cst seq_cst, align 16
%ret = extractvalue { i64, i1 } %cmpx, 0
ret i64 %ret
}

define ptr @infer_align_cmpxchg_ptr(ptr align 4 %p, ptr %cmp0, ptr %cmp1) {
; TUNIT: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
; TUNIT-LABEL: define {{[^@]+}}@infer_align_cmpxchg_ptr
; TUNIT-SAME: (ptr nocapture nofree align 16 [[P:%.*]], ptr nofree [[CMP0:%.*]], ptr nofree [[CMP1:%.*]]) #[[ATTR12]] {
; TUNIT-NEXT: [[ARRAYIDX0:%.*]] = getelementptr i64, ptr [[P]], i64 1
; TUNIT-NEXT: [[ARRAYIDX1:%.*]] = getelementptr i64, ptr [[ARRAYIDX0]], i64 3
; TUNIT-NEXT: [[CMPX:%.*]] = cmpxchg ptr [[ARRAYIDX1]], ptr [[CMP0]], ptr [[CMP1]] seq_cst seq_cst, align 16
; TUNIT-NEXT: [[RET:%.*]] = extractvalue { ptr, i1 } [[CMPX]], 0
; TUNIT-NEXT: ret ptr [[RET]]
;
; CGSCC: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
; CGSCC-LABEL: define {{[^@]+}}@infer_align_cmpxchg_ptr
; CGSCC-SAME: (ptr nocapture nofree align 16 [[P:%.*]], ptr nofree [[CMP0:%.*]], ptr nofree [[CMP1:%.*]]) #[[ATTR14]] {
; CGSCC-NEXT: [[ARRAYIDX0:%.*]] = getelementptr i64, ptr [[P]], i64 1
; CGSCC-NEXT: [[ARRAYIDX1:%.*]] = getelementptr i64, ptr [[ARRAYIDX0]], i64 3
; CGSCC-NEXT: [[CMPX:%.*]] = cmpxchg ptr [[ARRAYIDX1]], ptr [[CMP0]], ptr [[CMP1]] seq_cst seq_cst, align 16
; CGSCC-NEXT: [[RET:%.*]] = extractvalue { ptr, i1 } [[CMPX]], 0
; CGSCC-NEXT: ret ptr [[RET]]
;
%arrayidx0 = getelementptr i64, ptr %p, i64 1
%arrayidx1 = getelementptr i64, ptr %arrayidx0, i64 3
%cmpx = cmpxchg ptr %arrayidx1, ptr %cmp0, ptr %cmp1 seq_cst seq_cst, align 16
%ret = extractvalue { ptr, i1 } %cmpx, 0
ret ptr %ret
}

declare void @implicit_cast_callee(i64)

attributes #0 = { nounwind uwtable noinline }
Expand All @@ -1119,8 +1230,9 @@ attributes #2 = { null_pointer_is_valid }
; TUNIT: attributes #[[ATTR9]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(write) }
; TUNIT: attributes #[[ATTR10]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
; TUNIT: attributes #[[ATTR11]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(read) }
; TUNIT: attributes #[[ATTR12]] = { nofree nosync nounwind willreturn memory(read) }
; TUNIT: attributes #[[ATTR13]] = { nofree nosync nounwind willreturn }
; TUNIT: attributes #[[ATTR12]] = { mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) }
; TUNIT: attributes #[[ATTR13]] = { nofree nosync nounwind willreturn memory(read) }
; TUNIT: attributes #[[ATTR14]] = { nofree nosync nounwind willreturn }
;.
; CGSCC: attributes #[[ATTR0]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable }
; CGSCC: attributes #[[ATTR1]] = { mustprogress nofree noinline nosync nounwind willreturn memory(none) uwtable }
Expand All @@ -1136,6 +1248,7 @@ attributes #2 = { null_pointer_is_valid }
; CGSCC: attributes #[[ATTR11]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
; CGSCC: attributes #[[ATTR12]] = { mustprogress nofree nosync nounwind willreturn memory(read) }
; CGSCC: attributes #[[ATTR13]] = { mustprogress nofree nosync nounwind willreturn memory(none) }
; CGSCC: attributes #[[ATTR14]] = { nofree nosync willreturn }
; CGSCC: attributes #[[ATTR15]] = { nofree willreturn memory(read) }
; CGSCC: attributes #[[ATTR14]] = { mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) }
; CGSCC: attributes #[[ATTR15]] = { nofree nosync willreturn }
; CGSCC: attributes #[[ATTR16]] = { nofree willreturn memory(read) }
;.
12 changes: 6 additions & 6 deletions llvm/test/Transforms/Attributor/nocapture-1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -524,13 +524,13 @@ define void @test6_2(ptr %x6_2, ptr %y6_2, ptr %z6_2) {
define void @test_cmpxchg(ptr %p) {
; TUNIT: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
; TUNIT-LABEL: define {{[^@]+}}@test_cmpxchg
; TUNIT-SAME: (ptr nocapture nofree noundef nonnull dereferenceable(4) [[P:%.*]]) #[[ATTR8:[0-9]+]] {
; TUNIT-SAME: (ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR8:[0-9]+]] {
; TUNIT-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[P]], i32 0, i32 1 acquire monotonic, align 4
; TUNIT-NEXT: ret void
;
; CGSCC: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
; CGSCC-LABEL: define {{[^@]+}}@test_cmpxchg
; CGSCC-SAME: (ptr nocapture nofree noundef nonnull dereferenceable(4) [[P:%.*]]) #[[ATTR11:[0-9]+]] {
; CGSCC-SAME: (ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR11:[0-9]+]] {
; CGSCC-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[P]], i32 0, i32 1 acquire monotonic, align 4
; CGSCC-NEXT: ret void
;
Expand All @@ -541,13 +541,13 @@ define void @test_cmpxchg(ptr %p) {
define void @test_cmpxchg_ptr(ptr %p, ptr %q) {
; TUNIT: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
; TUNIT-LABEL: define {{[^@]+}}@test_cmpxchg_ptr
; TUNIT-SAME: (ptr nocapture nofree noundef nonnull dereferenceable(8) [[P:%.*]], ptr nofree [[Q:%.*]]) #[[ATTR8]] {
; TUNIT-SAME: (ptr nocapture nofree noundef nonnull align 8 dereferenceable(8) [[P:%.*]], ptr nofree [[Q:%.*]]) #[[ATTR8]] {
; TUNIT-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[P]], ptr null, ptr [[Q]] acquire monotonic, align 8
; TUNIT-NEXT: ret void
;
; CGSCC: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
; CGSCC-LABEL: define {{[^@]+}}@test_cmpxchg_ptr
; CGSCC-SAME: (ptr nocapture nofree noundef nonnull dereferenceable(8) [[P:%.*]], ptr nofree [[Q:%.*]]) #[[ATTR11]] {
; CGSCC-SAME: (ptr nocapture nofree noundef nonnull align 8 dereferenceable(8) [[P:%.*]], ptr nofree [[Q:%.*]]) #[[ATTR11]] {
; CGSCC-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[P]], ptr null, ptr [[Q]] acquire monotonic, align 8
; CGSCC-NEXT: ret void
;
Expand All @@ -558,13 +558,13 @@ define void @test_cmpxchg_ptr(ptr %p, ptr %q) {
define void @test_atomicrmw(ptr %p) {
; TUNIT: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
; TUNIT-LABEL: define {{[^@]+}}@test_atomicrmw
; TUNIT-SAME: (ptr nocapture nofree noundef nonnull dereferenceable(4) [[P:%.*]]) #[[ATTR8]] {
; TUNIT-SAME: (ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR8]] {
; TUNIT-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[P]], i32 1 seq_cst, align 4
; TUNIT-NEXT: ret void
;
; CGSCC: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
; CGSCC-LABEL: define {{[^@]+}}@test_atomicrmw
; CGSCC-SAME: (ptr nocapture nofree noundef nonnull dereferenceable(4) [[P:%.*]]) #[[ATTR11]] {
; CGSCC-SAME: (ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR11]] {
; CGSCC-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[P]], i32 1 seq_cst, align 4
; CGSCC-NEXT: ret void
;
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/Attributor/nofpclass.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1813,7 +1813,7 @@ define double @fpext(float nofpclass(inf nan) %arg) {
define float @atomicrmw_fadd(ptr %ptr, float nofpclass(inf nan) %val) {
; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
; CHECK-LABEL: define float @atomicrmw_fadd
; CHECK-SAME: (ptr nocapture nofree noundef nonnull dereferenceable(4) [[PTR:%.*]], float nofpclass(nan inf) [[VAL:%.*]]) #[[ATTR6:[0-9]+]] {
; CHECK-SAME: (ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[PTR:%.*]], float nofpclass(nan inf) [[VAL:%.*]]) #[[ATTR6:[0-9]+]] {
; CHECK-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr [[PTR]], float [[VAL]] seq_cst, align 4
; CHECK-NEXT: ret float [[RESULT]]
;
Expand Down
31 changes: 22 additions & 9 deletions llvm/test/Transforms/LoopIdiom/AArch64/byte-compare-index.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
; RUN: opt -aarch64-lit -aarch64-lit-verify -verify-dom-info -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s
; RUN: opt -aarch64-lit -simplifycfg -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=LOOP-DEL
; RUN: opt -aarch64-lit -mtriple aarch64-unknown-linux-gnu -S < %s | FileCheck %s --check-prefix=NO-TRANSFORM
; RUN: opt -p aarch64-lit -aarch64-lit-verify -verify-dom-info -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s
; RUN: opt -passes='function(loop(aarch64-lit)),simplifycfg' -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=LOOP-DEL
; RUN: opt -p aarch64-lit -mtriple aarch64-unknown-linux-gnu -S < %s | FileCheck %s --check-prefix=NO-TRANSFORM

define i32 @compare_bytes_simple(ptr %a, ptr %b, i32 %len, i32 %extra, i32 %n) {
; CHECK-LABEL: define i32 @compare_bytes_simple(
Expand Down Expand Up @@ -780,19 +783,22 @@ define i32 @compare_bytes_extra_cmp(ptr %a, ptr %b, i32 %len, i32 %n, i32 %x) {
; CHECK-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ]
; CHECK-NEXT: [[INC:%.*]] = add i32 [[LEN_ADDR]], 1
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]]
; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
; CHECK: while.body:
; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
; CHECK-NEXT: [[TMP45:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
; CHECK-NEXT: [[TMP46:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
; CHECK-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP45]], [[TMP46]]
; CHECK-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
; CHECK-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END_LOOPEXIT]]
; CHECK: byte.compare:
; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT]]
; CHECK: while.end.loopexit:
; CHECK-NEXT: [[INC_LCSSA1:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
; CHECK-NEXT: br label [[WHILE_END]]
; CHECK: while.end:
; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[X]], [[ENTRY:%.*]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[INC_LCSSA1]], [[WHILE_END_LOOPEXIT]] ]
; CHECK-NEXT: ret i32 [[INC_LCSSA]]
;
; LOOP-DEL-LABEL: define i32 @compare_bytes_extra_cmp(
Expand Down Expand Up @@ -884,17 +890,20 @@ define i32 @compare_bytes_extra_cmp(ptr %a, ptr %b, i32 %len, i32 %n, i32 %x) {
; NO-TRANSFORM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[PH]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
; NO-TRANSFORM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
; NO-TRANSFORM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
; NO-TRANSFORM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]]
; NO-TRANSFORM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
; NO-TRANSFORM: while.body:
; NO-TRANSFORM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
; NO-TRANSFORM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
; NO-TRANSFORM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; NO-TRANSFORM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
; NO-TRANSFORM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
; NO-TRANSFORM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
; NO-TRANSFORM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
; NO-TRANSFORM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END_LOOPEXIT]]
; NO-TRANSFORM: while.end.loopexit:
; NO-TRANSFORM-NEXT: [[INC_LCSSA1:%.*]] = phi i32 [ [[INC]], [[WHILE_COND]] ], [ [[INC]], [[WHILE_BODY]] ]
; NO-TRANSFORM-NEXT: br label [[WHILE_END]]
; NO-TRANSFORM: while.end:
; NO-TRANSFORM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ], [ [[X]], [[ENTRY:%.*]] ]
; NO-TRANSFORM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[INC_LCSSA1]], [[WHILE_END_LOOPEXIT]] ]
; NO-TRANSFORM-NEXT: ret i32 [[INC_LCSSA]]
;
entry:
Expand All @@ -908,7 +917,7 @@ while.cond:
%len.addr = phi i32 [ %len, %ph ], [ %inc, %while.body ]
%inc = add i32 %len.addr, 1
%cmp.not = icmp eq i32 %inc, %n
br i1 %cmp.not, label %while.end, label %while.body
br i1 %cmp.not, label %while.end.loopexit, label %while.body

while.body:
%idxprom = zext i32 %inc to i64
Expand All @@ -917,10 +926,14 @@ while.body:
%arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %idxprom
%1 = load i8, ptr %arrayidx2
%cmp.not2 = icmp eq i8 %0, %1
br i1 %cmp.not2, label %while.cond, label %while.end
br i1 %cmp.not2, label %while.cond, label %while.end.loopexit

while.end.loopexit:
%inc.lcssa1 = phi i32 [ %inc, %while.cond ], [ %inc, %while.body ]
br label %while.end

while.end:
%inc.lcssa = phi i32 [ %inc, %while.body ], [ %inc, %while.cond ], [ %x, %entry ]
%inc.lcssa = phi i32 [ %x, %entry ], [ %inc.lcssa1, %while.end.loopexit ]
ret i32 %inc.lcssa
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -366,10 +366,6 @@ struct BufferizationOptions {
DefaultMemorySpaceFn defaultMemorySpaceFn =
[](TensorType t) -> std::optional<Attribute> { return Attribute(); };

/// Seed for the analysis fuzzer. If set to `0`, the fuzzer is deactivated.
/// Should be used only with `testAnalysisOnly = true`.
unsigned analysisFuzzerSeed = 0;

/// If set to `true`, the analysis is skipped. A buffer is copied before every
/// write. This flag cannot be used together with `testAnalysisOnly = true`.
bool copyBeforeWrite = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,12 @@ class OneShotAnalysisState;

/// Options for analysis-enabled bufferization.
struct OneShotBufferizationOptions : public BufferizationOptions {
enum class AnalysisHeuristic { BottomUp, TopDown };
enum class AnalysisHeuristic {
BottomUp,
TopDown,
BottomUpFromTerminators,
Fuzzer
};

OneShotBufferizationOptions() = default;

Expand All @@ -42,6 +47,11 @@ struct OneShotBufferizationOptions : public BufferizationOptions {
/// Specify the functions that should not be analyzed. copyBeforeWrite will be
/// set to true when bufferizing them.
llvm::ArrayRef<std::string> noAnalysisFuncFilter;

/// Seed for the analysis fuzzer. Used only if the heuristic is set to
/// `AnalysisHeuristic::Fuzzer`. The fuzzer should be used only with
/// `testAnalysisOnly = true`.
unsigned analysisFuzzerSeed = 0;
};

/// State for analysis-enabled bufferization. This class keeps track of alias
Expand Down
18 changes: 18 additions & 0 deletions mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,24 @@ def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> {
argument is read/written and which returned values are aliasing/equivalent.
For debugging purposes, such information can be printed with
`test-analysis-only`.

The order in which ops are analyzed is important. The analysis is greedy and
ops that are analyzed earlier are more likely to bufferize in-place. The
heuristic can be set with `analysis-heuristic`. At the moment, the following
heuristics are available:

* `bottom-up` (default): Analyze ops from bottom to top.
* `top-down`: Analyze ops from top to bottom.
* `fuzzer`: Randomize the ordering of ops with `analysis-fuzzer-seed`.
* `bottom-up-from-terminators`: Traverse the reverse use-def chains of
tensor IR, starting from region branch terminators (bottom-up). Nested
regions are traversed before enclosing regions. Analyze the traversed ops
first, then analyze the remaining ops bottom-up. This heuristic is useful
for bufferizing loop constructs. One-Shot Bufferize currently supports
only such IR where yielded tensor values bufferize to equivalent region
iter_args, and first analyzing all ops on the path from the "yielding" op
to the beginning of the loop body makes it more likely for the region
iter_args and yielded values to bufferize to equivalent buffers.
}];
let options = [
Option<"allowReturnAllocsFromLoops", "allow-return-allocs-from-loops",
Expand Down
5 changes: 5 additions & 0 deletions mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,11 @@ parseHeuristicOption(const std::string &s) {
return OneShotBufferizationOptions::AnalysisHeuristic::BottomUp;
if (s == "top-down")
return OneShotBufferizationOptions::AnalysisHeuristic::TopDown;
if (s == "bottom-up-from-terminators")
return OneShotBufferizationOptions::AnalysisHeuristic::
BottomUpFromTerminators;
if (s == "fuzzer")
return OneShotBufferizationOptions::AnalysisHeuristic::Fuzzer;
llvm_unreachable("invalid analysisheuristic option");
}

Expand Down
116 changes: 90 additions & 26 deletions mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/IR/AsmState.h"
#include "mlir/IR/Dominance.h"
#include "mlir/IR/Iterators.h"
#include "mlir/IR/Operation.h"
#include "mlir/IR/TypeUtilities.h"
#include "mlir/Interfaces/ControlFlowInterfaces.h"
Expand Down Expand Up @@ -1094,41 +1095,104 @@ static void equivalenceAnalysis(Operation *op, OneShotAnalysisState &state) {
equivalenceAnalysis(ops, state);
}

LogicalResult OneShotAnalysisState::analyzeOp(Operation *op,
const DominanceInfo &domInfo) {
// Collect ops so we can build our own reverse traversal.
SmallVector<Operation *> ops;
op->walk([&](Operation *op) {
// No tensors => no buffers.
if (!hasTensorSemantics(op))
/// "Bottom-up from terminators" heuristic.
static SmallVector<Operation *>
bottomUpFromTerminatorsHeuristic(Operation *op,
const OneShotAnalysisState &state) {
SetVector<Operation *> traversedOps;

// Find region terminators.
op->walk<WalkOrder::PostOrder>([&](RegionBranchTerminatorOpInterface term) {
if (!traversedOps.insert(term))
return;
ops.push_back(op);
// Follow the reverse SSA use-def chain from each yielded value as long as
// we stay within the same region.
SmallVector<OpResult> worklist;
for (Value v : term->getOperands()) {
if (!isa<TensorType>(v.getType()))
continue;
auto opResult = dyn_cast<OpResult>(v);
if (!opResult)
continue;
worklist.push_back(opResult);
}
while (!worklist.empty()) {
OpResult opResult = worklist.pop_back_val();
Operation *defOp = opResult.getDefiningOp();
if (!traversedOps.insert(defOp))
continue;
if (!term->getParentRegion()->findAncestorOpInRegion(*defOp))
continue;
AliasingOpOperandList aliases = state.getAliasingOpOperands(opResult);
for (auto alias : aliases) {
Value v = alias.opOperand->get();
if (!isa<TensorType>(v.getType()))
continue;
auto opResult = dyn_cast<OpResult>(v);
if (!opResult)
continue;
worklist.push_back(opResult);
}
}
});

if (getOptions().analysisFuzzerSeed) {
// This is a fuzzer. For testing purposes only. Randomize the order in which
// operations are analyzed. The bufferization quality is likely worse, but
// we want to make sure that no assertions are triggered anywhere.
std::mt19937 g(getOptions().analysisFuzzerSeed);
llvm::shuffle(ops.begin(), ops.end(), g);
}
// Analyze traversed ops, then all remaining ops.
SmallVector<Operation *> result(traversedOps.begin(), traversedOps.end());
op->walk<WalkOrder::PostOrder, ReverseIterator>([&](Operation *op) {
if (!traversedOps.contains(op) && hasTensorSemantics(op))
result.push_back(op);
});
return result;
}

LogicalResult OneShotAnalysisState::analyzeOp(Operation *op,
const DominanceInfo &domInfo) {
OneShotBufferizationOptions::AnalysisHeuristic heuristic =
getOptions().analysisHeuristic;
if (heuristic == OneShotBufferizationOptions::AnalysisHeuristic::BottomUp) {
// Default: Walk ops in reverse for better interference analysis.
for (Operation *op : reverse(ops))
if (failed(analyzeSingleOp(op, domInfo)))
return failure();
} else if (heuristic ==
OneShotBufferizationOptions::AnalysisHeuristic::TopDown) {
for (Operation *op : ops)
if (failed(analyzeSingleOp(op, domInfo)))
return failure();

SmallVector<Operation *> orderedOps;
if (heuristic ==
OneShotBufferizationOptions::AnalysisHeuristic::BottomUpFromTerminators) {
orderedOps = bottomUpFromTerminatorsHeuristic(op, *this);
} else {
llvm_unreachable("unsupported heuristic");
op->walk([&](Operation *op) {
// No tensors => no buffers.
if (!hasTensorSemantics(op))
return;
orderedOps.push_back(op);
});
switch (heuristic) {
case OneShotBufferizationOptions::AnalysisHeuristic::BottomUp: {
// Default: Walk ops in reverse for better interference analysis.
std::reverse(orderedOps.begin(), orderedOps.end());
break;
}
case OneShotBufferizationOptions::AnalysisHeuristic::TopDown: {
// Ops are already sorted top-down in `orderedOps`.
break;
}
case OneShotBufferizationOptions::AnalysisHeuristic::Fuzzer: {
assert(getOptions().analysisFuzzerSeed &&
"expected that fuzzer seed it set");
// This is a fuzzer. For testing purposes only. Randomize the order in
// which operations are analyzed. The bufferization quality is likely
// worse, but we want to make sure that no assertions are triggered
// anywhere.
std::mt19937 g(getOptions().analysisFuzzerSeed);
llvm::shuffle(orderedOps.begin(), orderedOps.end(), g);
break;
}
default: {
llvm_unreachable("unsupported heuristic");
}
}
}

// Analyze ops in the computed order.
for (Operation *op : orderedOps)
if (failed(analyzeSingleOp(op, domInfo)))
return failure();

equivalenceAnalysis(op, *this);
return success();
}
Expand Down
15 changes: 10 additions & 5 deletions mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3269,15 +3269,20 @@ DiagnosedSilenceableFailure transform::FlattenElementwiseLinalgOp::applyToOne(
transform::ApplyToEachResultList &results,
transform::TransformState &state) {
rewriter.setInsertionPoint(target);
if (target.getNumLoops() <= 1)
if (!isElementwise(target)) {
failed(rewriter.notifyMatchFailure(
target, "only elementwise flattening is supported"));
return emitDefaultSilenceableFailure(target);
}
// If rank <= 1, do nothing
if (target.getNumLoops() <= 1) {
results.push_back(target);
return DiagnosedSilenceableFailure::success();
}
ReassociationIndices reassociation(target.getNumLoops());
std::iota(reassociation.begin(), reassociation.end(), 0);
auto maybeFlattened =
(isElementwise(target))
? collapseOpIterationDims(target, reassociation, rewriter)
: FailureOr<CollapseResult>(rewriter.notifyMatchFailure(
target, "only elementwise flattening is supported"));
collapseOpIterationDims(target, reassociation, rewriter);
if (failed(maybeFlattened))
return emitDefaultSilenceableFailure(target);
results.push_back(maybeFlattened->collapsedOp);
Expand Down
2 changes: 2 additions & 0 deletions mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,7 @@ add_mlir_dialect_library(MLIRXeGPUDialect
MLIRXeGPUEnumsIncGen

LINK_LIBS PUBLIC
MLIRDialectUtils
MLIRIR
MLIRViewLikeInterface
)
6 changes: 3 additions & 3 deletions mlir/test/Dialect/Arith/one-shot-bufferize.mlir
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries" -split-input-file | FileCheck %s

// Run fuzzer with different seeds.
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=23 bufferize-function-boundaries" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=59 bufferize-function-boundaries" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=91 bufferize-function-boundaries" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=23 bufferize-function-boundaries" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=59 bufferize-function-boundaries" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=91 bufferize-function-boundaries" -split-input-file -o /dev/null

// Test bufferization using memref types that have no layout map.
// RUN: mlir-opt %s -one-shot-bufferize="unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map bufferize-function-boundaries" -split-input-file -o /dev/null
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
// RUN: mlir-opt %s -one-shot-bufferize="allow-unknown-ops" -canonicalize -split-input-file | FileCheck %s

// Run fuzzer with different seeds.
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=23" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=59" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=91" -split-input-file -o /dev/null

// CHECK-LABEL: func @buffer_not_deallocated(
// CHECK-SAME: %[[t:.*]]: tensor<?xf32>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-heuristic=bottom-up-from-terminators" -split-input-file | FileCheck %s

// CHECK-LABEL: func @simple_test(
func.func @simple_test(%lb: index, %ub: index, %step: index, %f1: f32, %f2: f32) -> (tensor<5xf32>, tensor<5xf32>) {
%c0 = arith.constant 0 : index
%p = arith.constant 0.0 : f32

// Make sure that ops that feed into region terminators bufferize in-place
// (if possible).
// Note: This test case fails to bufferize with a "top-down" or "bottom-up"
// heuristic.

%0 = tensor.empty() : tensor<5xf32>
%1 = scf.for %iv = %lb to %ub step %step iter_args(%t = %0) -> (tensor<5xf32>) {
// CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
%2 = linalg.fill ins(%f1 : f32) outs(%t : tensor<5xf32>) -> tensor<5xf32>
// CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
%3 = linalg.fill ins(%f2 : f32) outs(%t : tensor<5xf32>) -> tensor<5xf32>
%4 = vector.transfer_read %2[%c0], %p : tensor<5xf32>, vector<5xf32>
vector.print %4 : vector<5xf32>
scf.yield %3 : tensor<5xf32>
}

%5 = tensor.empty() : tensor<5xf32>
%6 = scf.for %iv = %lb to %ub step %step iter_args(%t = %0) -> (tensor<5xf32>) {
// CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
%7 = linalg.fill ins(%f1 : f32) outs(%t : tensor<5xf32>) -> tensor<5xf32>
// CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
%8 = linalg.fill ins(%f2 : f32) outs(%t : tensor<5xf32>) -> tensor<5xf32>
%9 = vector.transfer_read %8[%c0], %p : tensor<5xf32>, vector<5xf32>
vector.print %9 : vector<5xf32>
scf.yield %7 : tensor<5xf32>
}

return %1, %6 : tensor<5xf32>, tensor<5xf32>
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-unknown-ops unknown-type-conversion=identity-layout-map" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP

// Run fuzzer with different seeds.
// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=23" -split-input-file -o /dev/null
// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=59" -split-input-file -o /dev/null
// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=91" -split-input-file -o /dev/null

// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="dialect-filter=tensor,bufferization allow-unknown-ops" -canonicalize -split-input-file | FileCheck %s --check-prefix=CHECK-TENSOR
// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="dialect-filter=scf,bufferization allow-unknown-ops" -canonicalize -split-input-file | FileCheck %s --check-prefix=CHECK-SCF
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
// RUN: mlir-opt %s -one-shot-bufferize="allow-unknown-ops" -verify-diagnostics -split-input-file | FileCheck %s

// Run fuzzer with different seeds.
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=23" -verify-diagnostics -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=59" -verify-diagnostics -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=91" -verify-diagnostics -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=23" -verify-diagnostics -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=59" -verify-diagnostics -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=91" -verify-diagnostics -split-input-file -o /dev/null

// Run with top-down analysis.
// RUN: mlir-opt %s -one-shot-bufferize="allow-unknown-ops analysis-heuristic=top-down" -verify-diagnostics -split-input-file | FileCheck %s --check-prefix=CHECK-TOP-DOWN-ANALYSIS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 " -split-input-file | FileCheck %s --check-prefix=NO-DROP

// Run fuzzer with different seeds.
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=23" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=59" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=91" -split-input-file -o /dev/null

// Test bufferization using memref types that have no layout map.
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map" -split-input-file -o /dev/null
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only" -split-input-file | FileCheck %s

// Run fuzzer with different seeds.
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=23" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=59" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=91" -split-input-file -o /dev/null

// Try different heuristics. Not checking the result, just make sure that we do
// not crash.
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only analysis-heuristic=bottom-up-from-terminators" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only analysis-heuristic=top-down" -split-input-file -o /dev/null

// TODO: Extract op-specific test cases and move them to their respective
// dialects.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1" -canonicalize -drop-equivalent-buffer-results -split-input-file | FileCheck %s

// Run fuzzer with different seeds.
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=23" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=59" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=91" -split-input-file -o /dev/null

// Test bufferization using memref types that have no layout map.
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP
Expand Down
21 changes: 21 additions & 0 deletions mlir/test/Dialect/Linalg/flatten-elementwise.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,27 @@ module attributes {transform.with_named_sequence} {

// -----

// CHECK-LABEL: func.func @map_already_flat(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<32xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<32xf32>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<32xf32>
// CHECK-NEXT: linalg.map { arith.addf } ins(%[[ARG0]], %[[ARG1]] : memref<32xf32>, memref<32xf32>) outs(%[[ARG2]] : memref<32xf32>)
func.func @map_already_flat(%arg0: memref<32xf32>, %arg1: memref<32xf32>, %arg2: memref<32xf32>) {
linalg.map {arith.addf} ins(%arg0, %arg1: memref<32xf32>, memref<32xf32>) outs(%arg2: memref<32xf32>)
return
}

module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match interface{LinalgOp} in %arg1 : (!transform.any_op) -> !transform.any_op
%flattened = transform.structured.flatten_elementwise %0
: (!transform.any_op) -> !transform.any_op
transform.yield
}
}

// -----

// CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
// CHECK-LABEL: func.func @generic
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<32x7xf32>
Expand Down
6 changes: 3 additions & 3 deletions mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries" -canonicalize -buffer-loop-hoisting -drop-equivalent-buffer-results -split-input-file | FileCheck %s

// Run fuzzer with different seeds.
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=23 bufferize-function-boundaries" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=59 bufferize-function-boundaries" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=91 bufferize-function-boundaries" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=23 bufferize-function-boundaries" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=59 bufferize-function-boundaries" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=91 bufferize-function-boundaries" -split-input-file -o /dev/null

// Test bufferization using memref types that have no layout map.
// RUN: mlir-opt %s -one-shot-bufferize="unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map bufferize-function-boundaries" -drop-equivalent-buffer-results -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP
Expand Down
6 changes: 3 additions & 3 deletions mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries test-analysis-only" -split-input-file | FileCheck %s

// Run fuzzer with different seeds.
// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=23" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=59" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=91" -split-input-file -o /dev/null

// CHECK-LABEL: func @scf_for_yield_only
func.func @scf_for_yield_only(
Expand Down
6 changes: 3 additions & 3 deletions mlir/test/Dialect/SCF/one-shot-bufferize.mlir
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries" -cse -canonicalize -drop-equivalent-buffer-results -split-input-file | FileCheck %s

// Run fuzzer with different seeds.
// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs-from-loops test-analysis-only analysis-fuzzer-seed=23 bufferize-function-boundaries" -split-input-file -o /dev/null
// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs-from-loops test-analysis-only analysis-fuzzer-seed=59 bufferize-function-boundaries" -split-input-file -o /dev/null
// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs-from-loops test-analysis-only analysis-fuzzer-seed=91 bufferize-function-boundaries" -split-input-file -o /dev/null
// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs-from-loops analysis-heuristic=fuzzer test-analysis-only analysis-fuzzer-seed=23 bufferize-function-boundaries" -split-input-file -o /dev/null
// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs-from-loops analysis-heuristic=fuzzer test-analysis-only analysis-fuzzer-seed=59 bufferize-function-boundaries" -split-input-file -o /dev/null
// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs-from-loops analysis-heuristic=fuzzer test-analysis-only analysis-fuzzer-seed=91 bufferize-function-boundaries" -split-input-file -o /dev/null

// Test bufferization using memref types that have no layout map.
// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs-from-loops unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map bufferize-function-boundaries" -split-input-file -o /dev/null
Expand Down
6 changes: 3 additions & 3 deletions mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries" -drop-equivalent-buffer-results -split-input-file | FileCheck %s

// Run fuzzer with different seeds.
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=23 bufferize-function-boundaries" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=59 bufferize-function-boundaries" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=91 bufferize-function-boundaries" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=23 bufferize-function-boundaries" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=59 bufferize-function-boundaries" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=91 bufferize-function-boundaries" -split-input-file -o /dev/null

// Test bufferization using memref types that have no layout map.
// RUN: mlir-opt %s -one-shot-bufferize="unknown-type-conversion=identity-layout-map bufferize-function-boundaries" -split-input-file -o /dev/null
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -285,8 +285,7 @@ module {
%has_runtime = sparse_tensor.has_runtime_library
scf.if %has_runtime {
// sparse_tensor.assemble copies buffers when running with the runtime
// library. Deallocations are needed not needed when running in codgen
// mode.
// library. Deallocations are not needed when running in codegen mode.
bufferization.dealloc_tensor %s4 : tensor<10x10xf64, #SortedCOO>
bufferization.dealloc_tensor %s5 : tensor<10x10xf64, #SortedCOOI32>
bufferization.dealloc_tensor %csr : tensor<2x2xf64, #CSR>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,7 @@ module {
%has_runtime = sparse_tensor.has_runtime_library
scf.if %has_runtime {
// sparse_tensor.assemble copies buffers when running with the runtime
// library. Deallocations are needed not needed when running in codgen
// mode.
// library. Deallocations are not needed when running in codegen mode.
bufferization.dealloc_tensor %s0 : tensor<4x3x2xf32, #CCC>
bufferization.dealloc_tensor %s1 : tensor<4x3x2xf32, #BatchedCSR>
bufferization.dealloc_tensor %s2 : tensor<4x3x2xf32, #CSRDense>
Expand Down