210 changes: 105 additions & 105 deletions llvm/test/CodeGen/RISCV/pr69586.ll
Original file line number Diff line number Diff line change
Expand Up @@ -927,258 +927,258 @@ define void @test(ptr %0, ptr %1, i64 %2) {
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v14, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
; REMAT-NEXT: vle32.v v12, (a2)
; REMAT-NEXT: vle32.v v10, (a2)
; REMAT-NEXT: li a2, 11
; REMAT-NEXT: slli a2, a2, 10
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v16, (a2)
; REMAT-NEXT: vle32.v v26, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
; REMAT-NEXT: vle32.v v10, (a2)
; REMAT-NEXT: vle32.v v12, (a2)
; REMAT-NEXT: li a2, 23
; REMAT-NEXT: slli a2, a2, 9
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v26, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v12, v16
; REMAT-NEXT: vle32.v v12, (a2)
; REMAT-NEXT: lui a2, 3
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v28, (a2)
; REMAT-NEXT: vle32.v v14, (a2)
; REMAT-NEXT: li a2, 25
; REMAT-NEXT: slli a2, a2, 9
; REMAT-NEXT: lui a2, 3
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v30, (a2)
; REMAT-NEXT: vle32.v v16, (a2)
; REMAT-NEXT: li a2, 13
; REMAT-NEXT: slli a2, a2, 10
; REMAT-NEXT: li a2, 25
; REMAT-NEXT: slli a2, a2, 9
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v6, (a2)
; REMAT-NEXT: vle32.v v18, (a2)
; REMAT-NEXT: li a2, 27
; REMAT-NEXT: slli a2, a2, 9
; REMAT-NEXT: li a2, 13
; REMAT-NEXT: slli a2, a2, 10
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v4, (a2)
; REMAT-NEXT: vle32.v v20, (a2)
; REMAT-NEXT: li a2, 7
; REMAT-NEXT: slli a2, a2, 11
; REMAT-NEXT: li a2, 27
; REMAT-NEXT: slli a2, a2, 9
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v2, (a2)
; REMAT-NEXT: vle32.v v22, (a2)
; REMAT-NEXT: li a2, 29
; REMAT-NEXT: slli a2, a2, 9
; REMAT-NEXT: li a2, 7
; REMAT-NEXT: slli a2, a2, 11
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v24, (a2)
; REMAT-NEXT: vle32.v v8, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v10, v26
; REMAT-NEXT: li a2, 15
; REMAT-NEXT: slli a2, a2, 10
; REMAT-NEXT: li a2, 29
; REMAT-NEXT: slli a2, a2, 9
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v26, (a2)
; REMAT-NEXT: vle32.v v10, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v12, v28
; REMAT-NEXT: li a2, 31
; REMAT-NEXT: slli a2, a2, 9
; REMAT-NEXT: li a2, 15
; REMAT-NEXT: slli a2, a2, 10
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v28, (a2)
; REMAT-NEXT: vle32.v v12, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v14, v30
; REMAT-NEXT: lui a2, 4
; REMAT-NEXT: li a2, 31
; REMAT-NEXT: slli a2, a2, 9
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v30, (a2)
; REMAT-NEXT: vle32.v v14, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v16, v6
; REMAT-NEXT: lui a2, 4
; REMAT-NEXT: addiw a2, a2, 512
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v6, (a2)
; REMAT-NEXT: vle32.v v16, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v18, v4
; REMAT-NEXT: li a2, 17
; REMAT-NEXT: slli a2, a2, 10
; REMAT-NEXT: lui a2, 4
; REMAT-NEXT: addiw a2, a2, 512
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v4, (a2)
; REMAT-NEXT: vle32.v v18, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v20, v2
; REMAT-NEXT: lui a2, 4
; REMAT-NEXT: addiw a2, a2, 1536
; REMAT-NEXT: li a2, 17
; REMAT-NEXT: slli a2, a2, 10
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v2, (a2)
; REMAT-NEXT: vle32.v v20, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v22, v24
; REMAT-NEXT: li a2, 9
; REMAT-NEXT: slli a2, a2, 11
; REMAT-NEXT: lui a2, 4
; REMAT-NEXT: addiw a2, a2, 1536
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v24, (a2)
; REMAT-NEXT: vle32.v v22, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v8, v26
; REMAT-NEXT: lui a2, 5
; REMAT-NEXT: addiw a2, a2, -1536
; REMAT-NEXT: li a2, 9
; REMAT-NEXT: slli a2, a2, 11
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v26, (a2)
; REMAT-NEXT: vle32.v v8, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v10, v28
; REMAT-NEXT: li a2, 19
; REMAT-NEXT: slli a2, a2, 10
; REMAT-NEXT: lui a2, 5
; REMAT-NEXT: addiw a2, a2, -1536
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v28, (a2)
; REMAT-NEXT: vle32.v v10, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v12, v30
; REMAT-NEXT: lui ra, 5
; REMAT-NEXT: addiw ra, ra, -512
; REMAT-NEXT: add a2, a0, ra
; REMAT-NEXT: li a2, 19
; REMAT-NEXT: slli a2, a2, 10
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v30, (a2)
; REMAT-NEXT: vle32.v v12, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v14, v6
; REMAT-NEXT: lui s11, 5
; REMAT-NEXT: add a2, a0, s11
; REMAT-NEXT: lui ra, 5
; REMAT-NEXT: addiw ra, ra, -512
; REMAT-NEXT: add a2, a0, ra
; REMAT-NEXT: vle32.v v6, (a2)
; REMAT-NEXT: vle32.v v14, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v16, v4
; REMAT-NEXT: lui s10, 5
; REMAT-NEXT: addiw s10, s10, 512
; REMAT-NEXT: add a2, a0, s10
; REMAT-NEXT: lui s11, 5
; REMAT-NEXT: add a2, a0, s11
; REMAT-NEXT: vle32.v v4, (a2)
; REMAT-NEXT: vle32.v v16, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v18, v2
; REMAT-NEXT: li s9, 21
; REMAT-NEXT: slli s9, s9, 10
; REMAT-NEXT: add a2, a0, s9
; REMAT-NEXT: lui s10, 5
; REMAT-NEXT: addiw s10, s10, 512
; REMAT-NEXT: add a2, a0, s10
; REMAT-NEXT: vle32.v v2, (a2)
; REMAT-NEXT: vle32.v v18, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v20, v24
; REMAT-NEXT: lui s8, 5
; REMAT-NEXT: addiw s8, s8, 1536
; REMAT-NEXT: add a2, a0, s8
; REMAT-NEXT: li s9, 21
; REMAT-NEXT: slli s9, s9, 10
; REMAT-NEXT: add a2, a0, s9
; REMAT-NEXT: vle32.v v24, (a2)
; REMAT-NEXT: vle32.v v20, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v22, v26
; REMAT-NEXT: li s7, 11
; REMAT-NEXT: slli s7, s7, 11
; REMAT-NEXT: add a2, a0, s7
; REMAT-NEXT: lui s8, 5
; REMAT-NEXT: addiw s8, s8, 1536
; REMAT-NEXT: add a2, a0, s8
; REMAT-NEXT: vle32.v v26, (a2)
; REMAT-NEXT: vle32.v v22, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v8, v28
; REMAT-NEXT: lui s6, 6
; REMAT-NEXT: addiw s6, s6, -1536
; REMAT-NEXT: add a2, a0, s6
; REMAT-NEXT: li s7, 11
; REMAT-NEXT: slli s7, s7, 11
; REMAT-NEXT: add a2, a0, s7
; REMAT-NEXT: vle32.v v28, (a2)
; REMAT-NEXT: vle32.v v8, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v10, v30
; REMAT-NEXT: li s5, 23
; REMAT-NEXT: slli s5, s5, 10
; REMAT-NEXT: add a2, a0, s5
; REMAT-NEXT: lui s6, 6
; REMAT-NEXT: addiw s6, s6, -1536
; REMAT-NEXT: add a2, a0, s6
; REMAT-NEXT: vle32.v v30, (a2)
; REMAT-NEXT: vle32.v v10, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v12, v6
; REMAT-NEXT: lui s4, 6
; REMAT-NEXT: addiw s4, s4, -512
; REMAT-NEXT: add a2, a0, s4
; REMAT-NEXT: li s5, 23
; REMAT-NEXT: slli s5, s5, 10
; REMAT-NEXT: add a2, a0, s5
; REMAT-NEXT: vle32.v v6, (a2)
; REMAT-NEXT: vle32.v v12, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v14, v4
; REMAT-NEXT: lui s3, 6
; REMAT-NEXT: add a2, a0, s3
; REMAT-NEXT: lui s4, 6
; REMAT-NEXT: addiw s4, s4, -512
; REMAT-NEXT: add a2, a0, s4
; REMAT-NEXT: vle32.v v4, (a2)
; REMAT-NEXT: vle32.v v14, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v16, v2
; REMAT-NEXT: lui s2, 6
; REMAT-NEXT: addiw s2, s2, 512
; REMAT-NEXT: add a2, a0, s2
; REMAT-NEXT: lui s3, 6
; REMAT-NEXT: add a2, a0, s3
; REMAT-NEXT: vle32.v v2, (a2)
; REMAT-NEXT: vle32.v v16, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v18, v24
; REMAT-NEXT: li s1, 25
; REMAT-NEXT: slli s1, s1, 10
; REMAT-NEXT: add a2, a0, s1
; REMAT-NEXT: lui s2, 6
; REMAT-NEXT: addiw s2, s2, 512
; REMAT-NEXT: add a2, a0, s2
; REMAT-NEXT: vle32.v v0, (a2)
; REMAT-NEXT: vle32.v v18, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v20, v26
; REMAT-NEXT: lui s0, 6
; REMAT-NEXT: addiw s0, s0, 1536
; REMAT-NEXT: add a2, a0, s0
; REMAT-NEXT: li s1, 25
; REMAT-NEXT: slli s1, s1, 10
; REMAT-NEXT: add a2, a0, s1
; REMAT-NEXT: vle32.v v26, (a2)
; REMAT-NEXT: vle32.v v20, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v22, v28
; REMAT-NEXT: li t6, 13
; REMAT-NEXT: slli t6, t6, 11
; REMAT-NEXT: add a2, a0, t6
; REMAT-NEXT: lui s0, 6
; REMAT-NEXT: addiw s0, s0, 1536
; REMAT-NEXT: add a2, a0, s0
; REMAT-NEXT: vle32.v v28, (a2)
; REMAT-NEXT: vle32.v v22, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v8, v30
; REMAT-NEXT: lui t5, 7
; REMAT-NEXT: addiw t5, t5, -1536
; REMAT-NEXT: add a2, a0, t5
; REMAT-NEXT: li t6, 13
; REMAT-NEXT: slli t6, t6, 11
; REMAT-NEXT: add a2, a0, t6
; REMAT-NEXT: vle32.v v30, (a2)
; REMAT-NEXT: vle32.v v24, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v10, v6
; REMAT-NEXT: li t4, 27
; REMAT-NEXT: slli t4, t4, 10
; REMAT-NEXT: add a2, a0, t4
; REMAT-NEXT: lui t5, 7
; REMAT-NEXT: addiw t5, t5, -1536
; REMAT-NEXT: add a2, a0, t5
; REMAT-NEXT: vle32.v v6, (a2)
; REMAT-NEXT: vle32.v v10, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v12, v4
; REMAT-NEXT: lui t3, 7
; REMAT-NEXT: addiw t3, t3, -512
; REMAT-NEXT: add a2, a0, t3
; REMAT-NEXT: li t4, 27
; REMAT-NEXT: slli t4, t4, 10
; REMAT-NEXT: add a2, a0, t4
; REMAT-NEXT: vle32.v v4, (a2)
; REMAT-NEXT: vle32.v v12, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v14, v2
; REMAT-NEXT: lui t3, 7
; REMAT-NEXT: addiw t3, t3, -512
; REMAT-NEXT: add a2, a0, t3
; REMAT-NEXT: vle32.v v2, (a2)
; REMAT-NEXT: vle32.v v14, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v16, v0
; REMAT-NEXT: lui t2, 7
; REMAT-NEXT: add a2, a0, t2
; REMAT-NEXT: vle32.v v2, (a2)
; REMAT-NEXT: vle32.v v0, (a2)
; REMAT-NEXT: vle32.v v8, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v16, v0
; REMAT-NEXT: sf.vc.vv 3, 0, v18, v26
; REMAT-NEXT: lui t1, 7
; REMAT-NEXT: addiw t1, t1, 512
; REMAT-NEXT: add a2, a0, t1
; REMAT-NEXT: vle32.v v14, (a2)
; REMAT-NEXT: vle32.v v16, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v18, v26
; REMAT-NEXT: vle32.v v18, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v20, v28
; REMAT-NEXT: li t0, 29
; REMAT-NEXT: slli t0, t0, 10
; REMAT-NEXT: add a2, a0, t0
; REMAT-NEXT: vle32.v v18, (a2)
; REMAT-NEXT: vle32.v v20, (a2)
; REMAT-NEXT: vle32.v v26, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v20, v28
; REMAT-NEXT: sf.vc.vv 3, 0, v22, v30
; REMAT-NEXT: lui a7, 7
; REMAT-NEXT: addiw a7, a7, 1536
; REMAT-NEXT: add a2, a0, a7
; REMAT-NEXT: vle32.v v20, (a2)
; REMAT-NEXT: vle32.v v22, (a2)
; REMAT-NEXT: vle32.v v28, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v22, v30
; REMAT-NEXT: sf.vc.vv 3, 0, v24, v6
; REMAT-NEXT: li a6, 15
; REMAT-NEXT: slli a6, a6, 11
; REMAT-NEXT: add a2, a0, a6
; REMAT-NEXT: vle32.v v22, (a2)
; REMAT-NEXT: vle32.v v24, (a2)
; REMAT-NEXT: vle32.v v30, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v24, v6
; REMAT-NEXT: sf.vc.vv 3, 0, v10, v4
; REMAT-NEXT: lui a5, 8
; REMAT-NEXT: addiw a5, a5, -1536
; REMAT-NEXT: add a2, a0, a5
; REMAT-NEXT: vle32.v v24, (a2)
; REMAT-NEXT: vle32.v v10, (a2)
; REMAT-NEXT: vle32.v v6, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v10, v4
; REMAT-NEXT: sf.vc.vv 3, 0, v12, v2
; REMAT-NEXT: li a4, 31
; REMAT-NEXT: slli a4, a4, 10
; REMAT-NEXT: add a2, a0, a4
; REMAT-NEXT: vle32.v v10, (a2)
; REMAT-NEXT: vle32.v v12, (a2)
; REMAT-NEXT: vle32.v v4, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v12, v2
; REMAT-NEXT: sf.vc.vv 3, 0, v14, v0
; REMAT-NEXT: lui a3, 8
; REMAT-NEXT: addiw a3, a3, -512
; REMAT-NEXT: add a2, a0, a3
; REMAT-NEXT: vle32.v v12, (a2)
; REMAT-NEXT: vle32.v v14, (a2)
; REMAT-NEXT: vle32.v v2, (a2)
; REMAT-NEXT: lui a2, 8
; REMAT-NEXT: add a0, a0, a2
; REMAT-NEXT: vle32.v v0, (a0)
; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
; REMAT-NEXT: sf.vc.vv 3, 0, v16, v18
; REMAT-NEXT: sf.vc.vv 3, 0, v26, v20
; REMAT-NEXT: sf.vc.vv 3, 0, v28, v22
; REMAT-NEXT: sf.vc.vv 3, 0, v30, v24
; REMAT-NEXT: sf.vc.vv 3, 0, v6, v10
; REMAT-NEXT: sf.vc.vv 3, 0, v4, v12
; REMAT-NEXT: sf.vc.vv 3, 0, v8, v16
; REMAT-NEXT: sf.vc.vv 3, 0, v18, v20
; REMAT-NEXT: sf.vc.vv 3, 0, v26, v22
; REMAT-NEXT: sf.vc.vv 3, 0, v28, v24
; REMAT-NEXT: sf.vc.vv 3, 0, v30, v10
; REMAT-NEXT: sf.vc.vv 3, 0, v6, v12
; REMAT-NEXT: sf.vc.vv 3, 0, v4, v14
; REMAT-NEXT: sf.vc.vv 3, 0, v2, v0
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: addi a0, a1, 1024
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ define void @last_chance_recoloring_failure() {
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: call func
; CHECK-NEXT: vsetvli zero, s0, e16, m4, ta, ma
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
Expand All @@ -55,6 +54,7 @@ define void @last_chance_recoloring_failure() {
; CHECK-NEXT: vl4r.v v20, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, s0, e16, m4, ta, ma
; CHECK-NEXT: vfwsub.wv v8, v24, v16
; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, mu
; CHECK-NEXT: vfdiv.vv v8, v24, v8, v0.t
Expand Down Expand Up @@ -99,7 +99,6 @@ define void @last_chance_recoloring_failure() {
; SUBREGLIVENESS-NEXT: addi a0, sp, 16
; SUBREGLIVENESS-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; SUBREGLIVENESS-NEXT: call func
; SUBREGLIVENESS-NEXT: vsetvli zero, s0, e16, m4, ta, ma
; SUBREGLIVENESS-NEXT: csrr a0, vlenb
; SUBREGLIVENESS-NEXT: slli a0, a0, 3
; SUBREGLIVENESS-NEXT: add a0, sp, a0
Expand All @@ -111,6 +110,7 @@ define void @last_chance_recoloring_failure() {
; SUBREGLIVENESS-NEXT: vl4r.v v20, (a0) # Unknown-size Folded Reload
; SUBREGLIVENESS-NEXT: addi a0, sp, 16
; SUBREGLIVENESS-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; SUBREGLIVENESS-NEXT: vsetvli zero, s0, e16, m4, ta, ma
; SUBREGLIVENESS-NEXT: vfwsub.wv v8, v24, v16
; SUBREGLIVENESS-NEXT: vsetvli zero, zero, e32, m8, tu, mu
; SUBREGLIVENESS-NEXT: vfdiv.vv v8, v24, v8, v0.t
Expand Down
11 changes: 5 additions & 6 deletions llvm/test/CodeGen/RISCV/rvv/abs-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -590,13 +590,12 @@ define <vscale x 16 x i64> @vp_abs_nxv16i64(<vscale x 16 x i64> %va, <vscale x 1
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB46_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vrsub.vi v16, v8, 0, v0.t
; CHECK-NEXT: vmax.vv v8, v8, v16, v0.t
; CHECK-NEXT: addi a0, sp, 16
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@ define <vscale x 1 x i64> @access_fixed_and_vector_objects(ptr %val) {
; RV64IV-NEXT: addi a0, sp, 8
; RV64IV-NEXT: vl1re64.v v8, (a0)
; RV64IV-NEXT: addi a0, sp, 528
; RV64IV-NEXT: ld a1, 520(sp)
; RV64IV-NEXT: vl1re64.v v9, (a0)
; RV64IV-NEXT: vsetvli zero, a1, e64, m1, ta, ma
; RV64IV-NEXT: ld a0, 520(sp)
; RV64IV-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV64IV-NEXT: vadd.vv v8, v8, v9
; RV64IV-NEXT: csrr a0, vlenb
; RV64IV-NEXT: slli a0, a0, 1
Expand Down
84 changes: 40 additions & 44 deletions llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,9 @@ define <8 x i1> @fv8(ptr %p, i64 %index, i64 %tc) {
define <32 x i1> @fv32(ptr %p, i64 %index, i64 %tc) {
; CHECK-LABEL: fv32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: lui a0, %hi(.LCPI8_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI8_0)
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vid.v v16
; CHECK-NEXT: vsaddu.vx v16, v16, a1
Expand All @@ -124,31 +124,30 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) {
; CHECK-LABEL: fv64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vsaddu.vx v8, v8, a1
; CHECK-NEXT: vmsltu.vx v0, v8, a2
; CHECK-NEXT: lui a0, %hi(.LCPI9_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_0)
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vid.v v16
; CHECK-NEXT: vsaddu.vx v16, v16, a1
; CHECK-NEXT: vmsltu.vx v0, v16, a2
; CHECK-NEXT: vsext.vf8 v16, v8
; CHECK-NEXT: vsaddu.vx v8, v16, a1
; CHECK-NEXT: vmsltu.vx v16, v8, a2
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v0, v16, 2
; CHECK-NEXT: lui a0, %hi(.LCPI9_1)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_1)
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: vsext.vf8 v16, v8
; CHECK-NEXT: vsaddu.vx v8, v16, a1
; CHECK-NEXT: vmsltu.vx v16, v8, a2
; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v0, v16, 4
; CHECK-NEXT: vsaddu.vx v16, v16, a1
; CHECK-NEXT: vmsltu.vx v8, v16, a2
; CHECK-NEXT: vsext.vf8 v16, v9
; CHECK-NEXT: vsaddu.vx v16, v16, a1
; CHECK-NEXT: lui a0, %hi(.LCPI9_2)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_2)
; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: vmsltu.vx v10, v16, a2
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v0, v8, 2
; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v0, v10, 4
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vsext.vf8 v16, v8
; CHECK-NEXT: vsext.vf8 v16, v9
; CHECK-NEXT: vsaddu.vx v8, v16, a1
; CHECK-NEXT: vmsltu.vx v16, v8, a2
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
Expand All @@ -174,51 +173,48 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) {
; CHECK-NEXT: vsext.vf8 v16, v9
; CHECK-NEXT: vsaddu.vx v16, v16, a1
; CHECK-NEXT: vmsltu.vx v8, v16, a2
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 2
; CHECK-NEXT: lui a0, %hi(.LCPI10_2)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_2)
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: vsext.vf8 v16, v9
; CHECK-NEXT: vsaddu.vx v16, v16, a1
; CHECK-NEXT: vmsltu.vx v9, v16, a2
; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v9, 4
; CHECK-NEXT: lui a0, %hi(.LCPI10_3)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_3)
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: vle8.v v11, (a0)
; CHECK-NEXT: vsext.vf8 v16, v9
; CHECK-NEXT: vsaddu.vx v16, v16, a1
; CHECK-NEXT: vmsltu.vx v9, v16, a2
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v9, 6
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: lui a0, %hi(.LCPI10_4)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_4)
; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: vsext.vf8 v16, v11
; CHECK-NEXT: vsaddu.vx v16, v16, a1
; CHECK-NEXT: vmsltu.vx v11, v16, a2
; CHECK-NEXT: vid.v v16
; CHECK-NEXT: vsaddu.vx v16, v16, a1
; CHECK-NEXT: vmsltu.vx v0, v16, a2
; CHECK-NEXT: vsext.vf8 v16, v9
; CHECK-NEXT: vsaddu.vx v16, v16, a1
; CHECK-NEXT: vmsltu.vx v9, v16, a2
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v0, v9, 2
; CHECK-NEXT: lui a0, %hi(.LCPI10_4)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_4)
; CHECK-NEXT: vle8.v v12, (a0)
; CHECK-NEXT: lui a0, %hi(.LCPI10_5)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_5)
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: vsext.vf8 v16, v9
; CHECK-NEXT: vle8.v v13, (a0)
; CHECK-NEXT: vsext.vf8 v16, v12
; CHECK-NEXT: vsaddu.vx v16, v16, a1
; CHECK-NEXT: vmsltu.vx v9, v16, a2
; CHECK-NEXT: vmsltu.vx v12, v16, a2
; CHECK-NEXT: vsext.vf8 v16, v13
; CHECK-NEXT: vsaddu.vx v16, v16, a1
; CHECK-NEXT: vmsltu.vx v13, v16, a2
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 2
; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v0, v9, 4
; CHECK-NEXT: vslideup.vi v8, v9, 4
; CHECK-NEXT: lui a0, %hi(.LCPI10_6)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_6)
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v11, 6
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v0, v12, 2
; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v0, v13, 4
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vsext.vf8 v16, v9
; CHECK-NEXT: vsaddu.vx v16, v16, a1
; CHECK-NEXT: vmsltu.vx v9, v16, a2
Expand Down
234 changes: 120 additions & 114 deletions llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll

Large diffs are not rendered by default.

258 changes: 132 additions & 126 deletions llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll

Large diffs are not rendered by default.

39 changes: 13 additions & 26 deletions llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -236,53 +236,40 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_param_nxv32i32_nxv32i32_nxv32i32
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a3, 24
; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vmv8r.v v24, v8
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a3, a2, a1
; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: vl8re32.v v8, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vl8re32.v v0, (a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
; CHECK-NEXT: vl8re32.v v8, (a3)
; CHECK-NEXT: vl8re32.v v24, (a0)
; CHECK-NEXT: vl8re32.v v0, (a1)
; CHECK-NEXT: vl8re32.v v16, (a3)
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vl8re32.v v16, (a2)
; CHECK-NEXT: vadd.vv v0, v24, v0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
; CHECK-NEXT: vadd.vv v24, v8, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vadd.vv v24, v24, v8
; CHECK-NEXT: vadd.vv v0, v8, v0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vadd.vv v8, v24, v8
; CHECK-NEXT: vadd.vv v24, v0, v16
; CHECK-NEXT: vadd.vv v8, v0, v8
; CHECK-NEXT: vadd.vv v24, v24, v16
; CHECK-NEXT: vadd.vx v16, v8, a4
; CHECK-NEXT: vadd.vx v8, v24, a4
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 24
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
Expand Down
92 changes: 41 additions & 51 deletions llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -135,16 +135,16 @@ declare <vscale x 8 x half> @llvm.vp.ceil.nxv8f16(<vscale x 8 x half>, <vscale x
define <vscale x 8 x half> @vp_ceil_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI6_0)
; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a1)
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
Expand Down Expand Up @@ -179,16 +179,16 @@ declare <vscale x 16 x half> @llvm.vp.ceil.nxv16f16(<vscale x 16 x half>, <vscal
define <vscale x 16 x half> @vp_ceil_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI8_0)
; CHECK-NEXT: flh fa5, %lo(.LCPI8_0)(a1)
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
Expand Down Expand Up @@ -223,16 +223,16 @@ declare <vscale x 32 x half> @llvm.vp.ceil.nxv32f16(<vscale x 32 x half>, <vscal
define <vscale x 32 x half> @vp_ceil_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv32f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI10_0)
; CHECK-NEXT: flh fa5, %lo(.LCPI10_0)(a1)
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
Expand Down Expand Up @@ -359,8 +359,8 @@ define <vscale x 4 x float> @vp_ceil_vv_nxv4f32(<vscale x 4 x float> %va, <vscal
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
Expand Down Expand Up @@ -403,8 +403,8 @@ define <vscale x 8 x float> @vp_ceil_vv_nxv8f32(<vscale x 8 x float> %va, <vscal
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
Expand Down Expand Up @@ -447,8 +447,8 @@ define <vscale x 16 x float> @vp_ceil_vv_nxv16f32(<vscale x 16 x float> %va, <vs
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
Expand Down Expand Up @@ -525,16 +525,16 @@ declare <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double>, <vsca
define <vscale x 2 x double> @vp_ceil_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1)
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
Expand Down Expand Up @@ -569,16 +569,16 @@ declare <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double>, <vsca
define <vscale x 4 x double> @vp_ceil_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI26_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1)
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
Expand Down Expand Up @@ -613,16 +613,16 @@ declare <vscale x 7 x double> @llvm.vp.ceil.nxv7f64(<vscale x 7 x double>, <vsca
define <vscale x 7 x double> @vp_ceil_vv_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv7f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI28_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1)
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
Expand Down Expand Up @@ -657,16 +657,16 @@ declare <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double>, <vsca
define <vscale x 8 x double> @vp_ceil_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI30_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1)
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
Expand Down Expand Up @@ -705,66 +705,56 @@ define <vscale x 16 x double> @vp_ceil_vv_nxv16f64(<vscale x 16 x double> %va, <
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
; CHECK-NEXT: vmv1r.v v7, v0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a2, a1, 3
; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vx v25, v0, a2
; CHECK-NEXT: vslidedown.vx v6, v0, a2
; CHECK-NEXT: sub a2, a0, a1
; CHECK-NEXT: lui a3, %hi(.LCPI32_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3)
; CHECK-NEXT: sltu a3, a0, a2
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: lui a3, %hi(.LCPI32_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3)
; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vfabs.v v8, v16, v0.t
; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v25, v8, fa5, v0.t
; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t
; CHECK-NEXT: fsrmi a2, 3
; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: addi a3, sp, 16
; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
; CHECK-NEXT: fsrm a2
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: bltu a0, a1, .LBB32_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB32_2:
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v24, v16, fa5, v0.t
; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,19 @@ define void @test(ptr %ref_array, ptr %sad_array) {
; RV32-NEXT: th.lwd a2, a3, (a0), 0, 3
; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; RV32-NEXT: vle8.v v8, (a2)
; RV32-NEXT: vmv.v.i v9, 0
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vzext.vf4 v12, v8
; RV32-NEXT: vmv.s.x v8, zero
; RV32-NEXT: vredsum.vs v10, v12, v8
; RV32-NEXT: vmv.x.s a0, v10
; RV32-NEXT: vredsum.vs v9, v12, v8
; RV32-NEXT: vmv.x.s a0, v9
; RV32-NEXT: th.swia a0, (a1), 4, 0
; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; RV32-NEXT: vle8.v v10, (a3)
; RV32-NEXT: vle8.v v9, (a3)
; RV32-NEXT: vmv.v.i v10, 0
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32-NEXT: vslideup.vi v10, v9, 4
; RV32-NEXT: vslideup.vi v9, v10, 4
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vzext.vf4 v12, v10
; RV32-NEXT: vzext.vf4 v12, v9
; RV32-NEXT: vredsum.vs v8, v12, v8
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32-NEXT: vse32.v v8, (a1)
Expand All @@ -33,19 +33,19 @@ define void @test(ptr %ref_array, ptr %sad_array) {
; RV64-NEXT: th.ldd a2, a3, (a0), 0, 4
; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; RV64-NEXT: vle8.v v8, (a2)
; RV64-NEXT: vmv.v.i v9, 0
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV64-NEXT: vzext.vf4 v12, v8
; RV64-NEXT: vmv.s.x v8, zero
; RV64-NEXT: vredsum.vs v10, v12, v8
; RV64-NEXT: vmv.x.s a0, v10
; RV64-NEXT: vredsum.vs v9, v12, v8
; RV64-NEXT: vmv.x.s a0, v9
; RV64-NEXT: th.swia a0, (a1), 4, 0
; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; RV64-NEXT: vle8.v v10, (a3)
; RV64-NEXT: vle8.v v9, (a3)
; RV64-NEXT: vmv.v.i v10, 0
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64-NEXT: vslideup.vi v10, v9, 4
; RV64-NEXT: vslideup.vi v9, v10, 4
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV64-NEXT: vzext.vf4 v12, v10
; RV64-NEXT: vzext.vf4 v12, v9
; RV64-NEXT: vredsum.vs v8, v12, v8
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a1)
Expand Down
53 changes: 38 additions & 15 deletions llvm/test/CodeGen/RISCV/rvv/compressstore.ll
Original file line number Diff line number Diff line change
Expand Up @@ -197,28 +197,51 @@ entry:
define void @test_compresstore_v256i8(ptr %p, <256 x i1> %mask, <256 x i8> %data) {
; RV64-LABEL: test_compresstore_v256i8:
; RV64: # %bb.0: # %entry
; RV64-NEXT: vmv1r.v v7, v8
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: csrr a2, vlenb
; RV64-NEXT: slli a2, a2, 4
; RV64-NEXT: sub sp, sp, a2
; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
; RV64-NEXT: csrr a2, vlenb
; RV64-NEXT: slli a2, a2, 3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; RV64-NEXT: li a2, 128
; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV64-NEXT: vle8.v v24, (a1)
; RV64-NEXT: vle8.v v16, (a1)
; RV64-NEXT: addi a1, sp, 16
; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vslidedown.vi v9, v0, 1
; RV64-NEXT: vmv.x.s a1, v9
; RV64-NEXT: vmv.x.s a3, v0
; RV64-NEXT: csrr a4, vlenb
; RV64-NEXT: slli a4, a4, 3
; RV64-NEXT: add a4, sp, a4
; RV64-NEXT: addi a4, a4, 16
; RV64-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV64-NEXT: vcompress.vm v8, v16, v0
; RV64-NEXT: vcompress.vm v16, v24, v0
; RV64-NEXT: vcpop.m a4, v0
; RV64-NEXT: vsetvli zero, a4, e8, m8, ta, ma
; RV64-NEXT: vse8.v v8, (a0)
; RV64-NEXT: vse8.v v16, (a0)
; RV64-NEXT: addi a4, sp, 16
; RV64-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV64-NEXT: vcompress.vm v8, v24, v7
; RV64-NEXT: vcpop.m a2, v7
; RV64-NEXT: vcompress.vm v16, v24, v8
; RV64-NEXT: vcpop.m a2, v8
; RV64-NEXT: cpop a3, a3
; RV64-NEXT: cpop a1, a1
; RV64-NEXT: add a0, a0, a3
; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV64-NEXT: vse8.v v8, (a0)
; RV64-NEXT: vse8.v v16, (a0)
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: slli a0, a0, 4
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
;
; RV32-LABEL: test_compresstore_v256i8:
Expand Down Expand Up @@ -796,18 +819,18 @@ define void @test_compresstore_v32i64(ptr %p, <32 x i1> %mask, <32 x i64> %data)
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vse64.v v24, (a0)
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64-NEXT: vslidedown.vi v24, v0, 2
; RV64-NEXT: vslidedown.vi v8, v0, 2
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vcompress.vm v8, v16, v24
; RV64-NEXT: vcompress.vm v24, v16, v8
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64-NEXT: vmv.x.s a1, v0
; RV64-NEXT: zext.h a1, a1
; RV64-NEXT: cpopw a1, a1
; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: vcpop.m a1, v24
; RV64-NEXT: vcpop.m a1, v8
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
; RV64-NEXT: vse64.v v24, (a0)
; RV64-NEXT: ret
;
; RV32-LABEL: test_compresstore_v32i64:
Expand All @@ -818,18 +841,18 @@ define void @test_compresstore_v32i64(ptr %p, <32 x i1> %mask, <32 x i64> %data)
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: vse64.v v24, (a0)
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV32-NEXT: vslidedown.vi v24, v0, 2
; RV32-NEXT: vslidedown.vi v8, v0, 2
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vcompress.vm v8, v16, v24
; RV32-NEXT: vcompress.vm v24, v16, v8
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV32-NEXT: vmv.x.s a1, v0
; RV32-NEXT: zext.h a1, a1
; RV32-NEXT: cpop a1, a1
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: vcpop.m a1, v24
; RV32-NEXT: vcpop.m a1, v8
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: vse64.v v8, (a0)
; RV32-NEXT: vse64.v v24, (a0)
; RV32-NEXT: ret
entry:
tail call void @llvm.masked.compressstore.v32i64(<32 x i64> %data, ptr align 8 %p, <32 x i1> %mask)
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/RISCV/rvv/concat-vector-insert-elt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -189,16 +189,16 @@ define void @v4xi64_concat_vector_insert_idx3(ptr %a, ptr %b, i64 %x) {
; RV32-LABEL: v4xi64_concat_vector_insert_idx3:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vle64.v v8, (a0)
; RV32-NEXT: vle64.v v10, (a1)
; RV32-NEXT: vle64.v v8, (a1)
; RV32-NEXT: vle64.v v10, (a0)
; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV32-NEXT: vslide1down.vx v9, v8, a2
; RV32-NEXT: vslide1down.vx v9, v9, a3
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vslideup.vi v10, v9, 1
; RV32-NEXT: vslideup.vi v8, v9, 1
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vslideup.vi v8, v10, 2
; RV32-NEXT: vse64.v v8, (a0)
; RV32-NEXT: vslideup.vi v10, v8, 2
; RV32-NEXT: vse64.v v10, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: v4xi64_concat_vector_insert_idx3:
Expand Down
18 changes: 10 additions & 8 deletions llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,19 @@ define void @constant_folding_crash(ptr %v54, <4 x ptr> %lanes.a, <4 x ptr> %lan
; RV32-LABEL: constant_folding_crash:
; RV32: # %bb.0: # %entry
; RV32-NEXT: lw a0, 8(a0)
; RV32-NEXT: vmv1r.v v10, v0
; RV32-NEXT: andi a0, a0, 1
; RV32-NEXT: seqz a0, a0
; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; RV32-NEXT: vmv.v.x v11, a0
; RV32-NEXT: vmsne.vi v0, v11, 0
; RV32-NEXT: vmv.v.x v10, a0
; RV32-NEXT: vmsne.vi v10, v10, 0
; RV32-NEXT: vmv1r.v v11, v0
; RV32-NEXT: vmv1r.v v0, v10
; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32-NEXT: vmerge.vvm v8, v9, v8, v0
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; RV32-NEXT: vmv.v.i v8, 0
; RV32-NEXT: vmv1r.v v0, v10
; RV32-NEXT: vmv1r.v v0, v11
; RV32-NEXT: vmerge.vim v8, v8, 1, v0
; RV32-NEXT: vrgather.vi v9, v8, 0
; RV32-NEXT: vmsne.vi v0, v9, 0
Expand All @@ -42,18 +43,19 @@ define void @constant_folding_crash(ptr %v54, <4 x ptr> %lanes.a, <4 x ptr> %lan
; RV64-LABEL: constant_folding_crash:
; RV64: # %bb.0: # %entry
; RV64-NEXT: ld a0, 8(a0)
; RV64-NEXT: vmv1r.v v12, v0
; RV64-NEXT: andi a0, a0, 1
; RV64-NEXT: seqz a0, a0
; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; RV64-NEXT: vmv.v.x v13, a0
; RV64-NEXT: vmsne.vi v0, v13, 0
; RV64-NEXT: vmv.v.x v12, a0
; RV64-NEXT: vmsne.vi v12, v12, 0
; RV64-NEXT: vmv1r.v v13, v0
; RV64-NEXT: vmv1r.v v0, v12
; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; RV64-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; RV64-NEXT: vmv.v.i v8, 0
; RV64-NEXT: vmv1r.v v0, v12
; RV64-NEXT: vmv1r.v v0, v13
; RV64-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-NEXT: vrgather.vi v9, v8, 0
; RV64-NEXT: vmsne.vi v0, v9, 0
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3341,16 +3341,16 @@ define <vscale x 8 x i64> @ctlz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) {
;
; RV32F-LABEL: ctlz_zero_undef_nxv8i64:
; RV32F: # %bb.0:
; RV32F-NEXT: vmv8r.v v16, v8
; RV32F-NEXT: li a0, 190
; RV32F-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32F-NEXT: vmv.v.x v8, a0
; RV32F-NEXT: vmv.v.x v16, a0
; RV32F-NEXT: fsrmi a0, 1
; RV32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32F-NEXT: vfncvt.f.xu.w v24, v16
; RV32F-NEXT: vsrl.vi v16, v24, 23
; RV32F-NEXT: vwsubu.wv v8, v8, v16
; RV32F-NEXT: vfncvt.f.xu.w v24, v8
; RV32F-NEXT: vsrl.vi v8, v24, 23
; RV32F-NEXT: vwsubu.wv v16, v16, v8
; RV32F-NEXT: fsrm a0
; RV32F-NEXT: vmv8r.v v8, v16
; RV32F-NEXT: ret
;
; RV64F-LABEL: ctlz_zero_undef_nxv8i64:
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1259,8 +1259,8 @@ define <vscale x 16 x i64> @vp_ctlz_nxv16i64(<vscale x 16 x i64> %va, <vscale x
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB46_2:
; CHECK-NEXT: fsrmi a1, 1
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t
; CHECK-NEXT: vsrl.vx v8, v8, a2, v0.t
; CHECK-NEXT: vrsub.vx v8, v8, a3, v0.t
Expand All @@ -1285,8 +1285,8 @@ define <vscale x 16 x i64> @vp_ctlz_nxv16i64(<vscale x 16 x i64> %va, <vscale x
; CHECK-ZVBB-NEXT: # %bb.1:
; CHECK-ZVBB-NEXT: mv a0, a1
; CHECK-ZVBB-NEXT: .LBB46_2:
; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-ZVBB-NEXT: vmv1r.v v0, v24
; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t
; CHECK-ZVBB-NEXT: ret
%v = call <vscale x 16 x i64> @llvm.vp.ctlz.nxv16i64(<vscale x 16 x i64> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl)
Expand Down Expand Up @@ -2487,8 +2487,8 @@ define <vscale x 16 x i64> @vp_ctlz_zero_undef_nxv16i64(<vscale x 16 x i64> %va,
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB94_2:
; CHECK-NEXT: fsrmi a1, 1
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t
; CHECK-NEXT: vsrl.vx v8, v8, a2, v0.t
; CHECK-NEXT: vrsub.vx v8, v8, a3, v0.t
Expand All @@ -2512,8 +2512,8 @@ define <vscale x 16 x i64> @vp_ctlz_zero_undef_nxv16i64(<vscale x 16 x i64> %va,
; CHECK-ZVBB-NEXT: # %bb.1:
; CHECK-ZVBB-NEXT: mv a0, a1
; CHECK-ZVBB-NEXT: .LBB94_2:
; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-ZVBB-NEXT: vmv1r.v v0, v24
; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t
; CHECK-ZVBB-NEXT: ret
%v = call <vscale x 16 x i64> @llvm.vp.ctlz.nxv16i64(<vscale x 16 x i64> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
Expand Down
129 changes: 66 additions & 63 deletions llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2024,8 +2024,7 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb
; RV32-NEXT: vmv1r.v v24, v0
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: li a2, 40
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: slli a1, a1, 5
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
Expand All @@ -2044,30 +2043,35 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x
; RV32-NEXT: addi a3, a3, -1
; RV32-NEXT: and a2, a3, a2
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: li a4, 40
; RV32-NEXT: mul a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
; RV32-NEXT: lui a3, 349525
; RV32-NEXT: addi a3, a3, 1365
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
; RV32-NEXT: vmv.v.x v8, a3
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vmv.v.x v16, a3
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: li a4, 24
; RV32-NEXT: mul a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
; RV32-NEXT: vand.vv v8, v16, v8, v0.t
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: li a4, 40
; RV32-NEXT: mul a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 5
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
; RV32-NEXT: vsub.vv v8, v16, v8, v0.t
; RV32-NEXT: csrr a3, vlenb
Expand All @@ -2078,64 +2082,64 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x
; RV32-NEXT: lui a3, 209715
; RV32-NEXT: addi a3, a3, 819
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
; RV32-NEXT: vmv.v.x v8, a3
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vmv.v.x v16, a3
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 5
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
; RV32-NEXT: vand.vv v16, v16, v8, v0.t
; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 5
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t
; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: li a4, 40
; RV32-NEXT: mul a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
; RV32-NEXT: vand.vv v16, v16, v8, v0.t
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
; RV32-NEXT: vadd.vv v16, v8, v16, v0.t
; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
; RV32-NEXT: lui a3, 61681
; RV32-NEXT: addi a3, a3, -241
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
; RV32-NEXT: vmv.v.x v8, a3
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vmv.v.x v16, a3
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 5
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
; RV32-NEXT: vand.vv v16, v16, v8, v0.t
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
; RV32-NEXT: lui a3, 4112
; RV32-NEXT: addi a3, a3, 257
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
; RV32-NEXT: vmv.v.x v8, a3
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: csrr a2, vlenb
; RV32-NEXT: slli a2, a2, 4
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 16
; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; RV32-NEXT: vmul.vv v8, v16, v8, v0.t
; RV32-NEXT: vmul.vv v16, v16, v8, v0.t
; RV32-NEXT: li a2, 56
; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t
; RV32-NEXT: vsrl.vx v8, v16, a2, v0.t
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 3
; RV32-NEXT: add a3, sp, a3
Expand All @@ -2145,14 +2149,13 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a1
; RV32-NEXT: .LBB46_2:
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vmv1r.v v0, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: li a1, 48
; RV32-NEXT: mul a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: li a3, 48
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t
; RV32-NEXT: addi a0, sp, 16
; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
Expand All @@ -2161,17 +2164,17 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x
; RV32-NEXT: mul a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: addi a0, sp, 16
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vand.vv v8, v16, v8, v0.t
; RV32-NEXT: addi a0, sp, 16
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: li a1, 48
; RV32-NEXT: mul a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vsub.vv v8, v16, v8, v0.t
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: li a1, 48
; RV32-NEXT: mul a0, a0, a1
Expand All @@ -2183,41 +2186,41 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x
; RV32-NEXT: mul a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: li a1, 48
; RV32-NEXT: mul a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vand.vv v16, v16, v8, v0.t
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: li a1, 24
; RV32-NEXT: mul a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: li a1, 48
; RV32-NEXT: mul a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: li a1, 40
; RV32-NEXT: mul a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vand.vv v16, v16, v8, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: li a1, 24
; RV32-NEXT: mul a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
; RV32-NEXT: csrr a0, vlenb
Expand Down Expand Up @@ -2303,13 +2306,13 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x
; RV64-NEXT: sltu a0, a0, a1
; RV64-NEXT: addi a0, a0, -1
; RV64-NEXT: and a0, a0, a1
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV64-NEXT: vmv1r.v v0, v24
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: slli a0, a0, 3
; RV64-NEXT: add a0, sp, a0
; RV64-NEXT: addi a0, a0, 16
; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
; RV64-NEXT: vand.vx v16, v16, a2, v0.t
; RV64-NEXT: vsub.vv v16, v8, v16, v0.t
Expand Down Expand Up @@ -2347,8 +2350,8 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x
; CHECK-ZVBB-NEXT: # %bb.1:
; CHECK-ZVBB-NEXT: mv a0, a1
; CHECK-ZVBB-NEXT: .LBB46_2:
; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-ZVBB-NEXT: vmv1r.v v0, v24
; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t
; CHECK-ZVBB-NEXT: ret
%v = call <vscale x 16 x i64> @llvm.vp.ctpop.nxv16i64(<vscale x 16 x i64> %va, <vscale x 16 x i1> %m, i32 %evl)
Expand All @@ -2375,13 +2378,13 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64_unmasked(<vscale x 16 x i64> %va,
; RV32-NEXT: addi a3, a3, 1365
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
; RV32-NEXT: vmv.v.x v0, a3
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: li a4, 24
; RV32-NEXT: mul a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vand.vv v24, v24, v0
; RV32-NEXT: vsub.vv v24, v16, v24
; RV32-NEXT: lui a3, 209715
Expand All @@ -2404,20 +2407,20 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64_unmasked(<vscale x 16 x i64> %va,
; RV32-NEXT: addi a3, a3, -241
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
; RV32-NEXT: vmv.v.x v24, a3
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 3
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vand.vv v16, v16, v24
; RV32-NEXT: lui a3, 4112
; RV32-NEXT: addi a3, a3, 257
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
; RV32-NEXT: vmv.v.x v24, a3
; RV32-NEXT: addi a3, sp, 16
; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: addi a2, sp, 16
; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; RV32-NEXT: vmul.vv v16, v16, v24
; RV32-NEXT: li a2, 56
; RV32-NEXT: vsrl.vx v16, v16, a2
Expand Down
182 changes: 89 additions & 93 deletions llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2282,7 +2282,6 @@ define <vscale x 16 x i64> @vp_cttz_nxv16i64(<vscale x 16 x i64> %va, <vscale x
; RV32-NEXT: addi a4, a4, 1365
; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma
; RV32-NEXT: vmv.v.x v8, a4
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV32-NEXT: csrr a4, vlenb
; RV32-NEXT: li a5, 24
; RV32-NEXT: mul a4, a4, a5
Expand All @@ -2295,6 +2294,7 @@ define <vscale x 16 x i64> @vp_cttz_nxv16i64(<vscale x 16 x i64> %va, <vscale x
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV32-NEXT: vand.vv v8, v16, v8, v0.t
; RV32-NEXT: csrr a4, vlenb
; RV32-NEXT: li a5, 40
Expand All @@ -2312,82 +2312,81 @@ define <vscale x 16 x i64> @vp_cttz_nxv16i64(<vscale x 16 x i64> %va, <vscale x
; RV32-NEXT: lui a4, 209715
; RV32-NEXT: addi a4, a4, 819
; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma
; RV32-NEXT: vmv.v.x v8, a4
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV32-NEXT: vmv.v.x v16, a4
; RV32-NEXT: csrr a4, vlenb
; RV32-NEXT: li a5, 40
; RV32-NEXT: mul a4, a4, a5
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
; RV32-NEXT: vand.vv v16, v16, v8, v0.t
; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
; RV32-NEXT: csrr a4, vlenb
; RV32-NEXT: slli a4, a4, 4
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
; RV32-NEXT: csrr a4, vlenb
; RV32-NEXT: li a5, 40
; RV32-NEXT: mul a4, a4, a5
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t
; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
; RV32-NEXT: csrr a4, vlenb
; RV32-NEXT: li a5, 48
; RV32-NEXT: mul a4, a4, a5
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
; RV32-NEXT: vand.vv v16, v16, v8, v0.t
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
; RV32-NEXT: csrr a4, vlenb
; RV32-NEXT: slli a4, a4, 4
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
; RV32-NEXT: vadd.vv v16, v8, v16, v0.t
; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
; RV32-NEXT: lui a4, 61681
; RV32-NEXT: addi a4, a4, -241
; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma
; RV32-NEXT: vmv.v.x v8, a4
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV32-NEXT: vmv.v.x v16, a4
; RV32-NEXT: csrr a4, vlenb
; RV32-NEXT: slli a4, a4, 4
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
; RV32-NEXT: vand.vv v16, v16, v8, v0.t
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
; RV32-NEXT: lui a4, 4112
; RV32-NEXT: addi a4, a4, 257
; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma
; RV32-NEXT: vmv.v.x v8, a4
; RV32-NEXT: csrr a4, vlenb
; RV32-NEXT: slli a4, a4, 3
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 3
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
; RV32-NEXT: vmul.vv v8, v16, v8, v0.t
; RV32-NEXT: vmul.vv v16, v16, v8, v0.t
; RV32-NEXT: li a3, 56
; RV32-NEXT: vsrl.vx v8, v8, a3, v0.t
; RV32-NEXT: vsrl.vx v8, v16, a3, v0.t
; RV32-NEXT: addi a4, sp, 16
; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
; RV32-NEXT: bltu a0, a1, .LBB46_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a1
; RV32-NEXT: .LBB46_2:
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vmv1r.v v0, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vsub.vx v8, v16, a2, v0.t
; RV32-NEXT: vnot.v v16, v16, v0.t
; RV32-NEXT: vand.vv v8, v16, v8, v0.t
; RV32-NEXT: slli a1, a1, 5
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsub.vx v16, v8, a2, v0.t
; RV32-NEXT: vnot.v v8, v8, v0.t
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: add a0, sp, a0
Expand Down Expand Up @@ -2549,13 +2548,12 @@ define <vscale x 16 x i64> @vp_cttz_nxv16i64(<vscale x 16 x i64> %va, <vscale x
; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a0, a1
; RV64-NEXT: .LBB46_2:
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV64-NEXT: vmv1r.v v0, v24
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: slli a0, a0, 3
; RV64-NEXT: add a0, sp, a0
; RV64-NEXT: addi a0, a0, 16
; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV64-NEXT: vsub.vx v16, v8, a2, v0.t
; RV64-NEXT: vnot.v v8, v8, v0.t
; RV64-NEXT: vand.vv v8, v8, v16, v0.t
Expand Down Expand Up @@ -2596,8 +2594,8 @@ define <vscale x 16 x i64> @vp_cttz_nxv16i64(<vscale x 16 x i64> %va, <vscale x
; CHECK-ZVBB-NEXT: # %bb.1:
; CHECK-ZVBB-NEXT: mv a0, a1
; CHECK-ZVBB-NEXT: .LBB46_2:
; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-ZVBB-NEXT: vmv1r.v v0, v24
; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t
; CHECK-ZVBB-NEXT: ret
%v = call <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl)
Expand Down Expand Up @@ -2628,98 +2626,97 @@ define <vscale x 16 x i64> @vp_cttz_nxv16i64_unmasked(<vscale x 16 x i64> %va, i
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV32-NEXT: vsub.vx v8, v16, a2
; RV32-NEXT: vnot.v v16, v16
; RV32-NEXT: vand.vv v8, v16, v8
; RV32-NEXT: vsrl.vi v24, v8, 1
; RV32-NEXT: vand.vv v16, v16, v8
; RV32-NEXT: vsrl.vi v24, v16, 1
; RV32-NEXT: lui a4, 349525
; RV32-NEXT: addi a4, a4, 1365
; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma
; RV32-NEXT: vmv.v.x v16, a4
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV32-NEXT: vmv.v.x v8, a4
; RV32-NEXT: csrr a4, vlenb
; RV32-NEXT: li a5, 24
; RV32-NEXT: mul a4, a4, a5
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: vand.vv v24, v24, v16
; RV32-NEXT: vsub.vv v8, v8, v24
; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV32-NEXT: vand.vv v24, v24, v8
; RV32-NEXT: vsub.vv v16, v16, v24
; RV32-NEXT: lui a4, 209715
; RV32-NEXT: addi a4, a4, 819
; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma
; RV32-NEXT: vmv.v.x v0, a4
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV32-NEXT: vand.vv v24, v8, v0
; RV32-NEXT: vsrl.vi v8, v8, 2
; RV32-NEXT: vand.vv v8, v8, v0
; RV32-NEXT: vadd.vv v8, v24, v8
; RV32-NEXT: vsrl.vi v24, v8, 4
; RV32-NEXT: vadd.vv v8, v8, v24
; RV32-NEXT: vand.vv v24, v16, v0
; RV32-NEXT: vsrl.vi v16, v16, 2
; RV32-NEXT: vand.vv v16, v16, v0
; RV32-NEXT: vadd.vv v16, v24, v16
; RV32-NEXT: vsrl.vi v24, v16, 4
; RV32-NEXT: vadd.vv v16, v16, v24
; RV32-NEXT: lui a4, 61681
; RV32-NEXT: addi a4, a4, -241
; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma
; RV32-NEXT: vmv.v.x v16, a4
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV32-NEXT: vmv.v.x v8, a4
; RV32-NEXT: csrr a4, vlenb
; RV32-NEXT: slli a4, a4, 4
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: vand.vv v16, v8, v16
; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV32-NEXT: vand.vv v8, v16, v8
; RV32-NEXT: lui a4, 4112
; RV32-NEXT: addi a4, a4, 257
; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma
; RV32-NEXT: vmv.v.x v8, a4
; RV32-NEXT: vmv.v.x v16, a4
; RV32-NEXT: csrr a4, vlenb
; RV32-NEXT: slli a4, a4, 3
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 3
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
; RV32-NEXT: vmul.vv v16, v16, v8
; RV32-NEXT: vmul.vv v8, v8, v16
; RV32-NEXT: li a3, 56
; RV32-NEXT: vsrl.vx v8, v16, a3
; RV32-NEXT: vsrl.vx v8, v8, a3
; RV32-NEXT: addi a4, sp, 16
; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
; RV32-NEXT: bltu a0, a1, .LBB47_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a1
; RV32-NEXT: .LBB47_2:
; RV32-NEXT: slli a1, a1, 5
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vsub.vx v16, v24, a2
; RV32-NEXT: vsub.vx v8, v24, a2
; RV32-NEXT: vnot.v v24, v24
; RV32-NEXT: vand.vv v16, v24, v16
; RV32-NEXT: vsrl.vi v24, v16, 1
; RV32-NEXT: vand.vv v8, v24, v8
; RV32-NEXT: vsrl.vi v24, v8, 1
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: li a1, 24
; RV32-NEXT: mul a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vand.vv v24, v24, v8
; RV32-NEXT: vsub.vv v16, v16, v24
; RV32-NEXT: vand.vv v24, v16, v0
; RV32-NEXT: vsrl.vi v16, v16, 2
; RV32-NEXT: vand.vv v16, v16, v0
; RV32-NEXT: vadd.vv v16, v24, v16
; RV32-NEXT: vsrl.vi v24, v16, 4
; RV32-NEXT: vadd.vv v16, v16, v24
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vand.vv v24, v24, v16
; RV32-NEXT: vsub.vv v8, v8, v24
; RV32-NEXT: vand.vv v24, v8, v0
; RV32-NEXT: vsrl.vi v8, v8, 2
; RV32-NEXT: vand.vv v8, v8, v0
; RV32-NEXT: vadd.vv v8, v24, v8
; RV32-NEXT: vsrl.vi v24, v8, 4
; RV32-NEXT: vadd.vv v8, v8, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vand.vv v16, v16, v8
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vand.vv v8, v8, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vmul.vv v8, v16, v8
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vmul.vv v8, v8, v16
; RV32-NEXT: vsrl.vx v8, v8, a3
; RV32-NEXT: addi a0, sp, 16
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
Expand Down Expand Up @@ -4038,13 +4035,12 @@ define <vscale x 16 x i64> @vp_cttz_zero_undef_nxv16i64(<vscale x 16 x i64> %va,
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB94_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vrsub.vi v16, v8, 0, v0.t
; CHECK-NEXT: vand.vv v8, v8, v16, v0.t
; CHECK-NEXT: fsrmi a0, 1
Expand Down Expand Up @@ -4077,8 +4073,8 @@ define <vscale x 16 x i64> @vp_cttz_zero_undef_nxv16i64(<vscale x 16 x i64> %va,
; CHECK-ZVBB-NEXT: # %bb.1:
; CHECK-ZVBB-NEXT: mv a0, a1
; CHECK-ZVBB-NEXT: .LBB94_2:
; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-ZVBB-NEXT: vmv1r.v v0, v24
; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t
; CHECK-ZVBB-NEXT: ret
%v = call <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -139,22 +139,22 @@ define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind {
; RV32-NEXT: slli a3, a3, 4
; RV32-NEXT: sub sp, sp, a3
; RV32-NEXT: andi sp, sp, -64
; RV32-NEXT: addi a3, sp, 64
; RV32-NEXT: slli a2, a2, 3
; RV32-NEXT: add a4, a0, a2
; RV32-NEXT: vl8r.v v16, (a4)
; RV32-NEXT: add a3, a0, a2
; RV32-NEXT: vl8r.v v16, (a3)
; RV32-NEXT: vl8r.v v24, (a0)
; RV32-NEXT: add a1, a3, a1
; RV32-NEXT: vsetvli a0, zero, e8, m8, ta, ma
; RV32-NEXT: addi a0, sp, 64
; RV32-NEXT: add a1, a0, a1
; RV32-NEXT: vsetvli a3, zero, e8, m8, ta, ma
; RV32-NEXT: vmseq.vi v8, v16, 0
; RV32-NEXT: vmseq.vi v0, v24, 0
; RV32-NEXT: vmv.v.i v16, 0
; RV32-NEXT: vmerge.vim v24, v16, 1, v0
; RV32-NEXT: vs8r.v v24, (a3)
; RV32-NEXT: add a2, a3, a2
; RV32-NEXT: vs8r.v v24, (a0)
; RV32-NEXT: add a0, a0, a2
; RV32-NEXT: vmv1r.v v0, v8
; RV32-NEXT: vmerge.vim v8, v16, 1, v0
; RV32-NEXT: vs8r.v v8, (a2)
; RV32-NEXT: vs8r.v v8, (a0)
; RV32-NEXT: lbu a0, 0(a1)
; RV32-NEXT: addi sp, s0, -80
; RV32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
Expand All @@ -179,22 +179,22 @@ define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind {
; RV64-NEXT: slli a3, a3, 4
; RV64-NEXT: sub sp, sp, a3
; RV64-NEXT: andi sp, sp, -64
; RV64-NEXT: addi a3, sp, 64
; RV64-NEXT: slli a2, a2, 3
; RV64-NEXT: add a4, a0, a2
; RV64-NEXT: vl8r.v v16, (a4)
; RV64-NEXT: add a3, a0, a2
; RV64-NEXT: vl8r.v v16, (a3)
; RV64-NEXT: vl8r.v v24, (a0)
; RV64-NEXT: add a1, a3, a1
; RV64-NEXT: vsetvli a0, zero, e8, m8, ta, ma
; RV64-NEXT: addi a0, sp, 64
; RV64-NEXT: add a1, a0, a1
; RV64-NEXT: vsetvli a3, zero, e8, m8, ta, ma
; RV64-NEXT: vmseq.vi v8, v16, 0
; RV64-NEXT: vmseq.vi v0, v24, 0
; RV64-NEXT: vmv.v.i v16, 0
; RV64-NEXT: vmerge.vim v24, v16, 1, v0
; RV64-NEXT: vs8r.v v24, (a3)
; RV64-NEXT: add a2, a3, a2
; RV64-NEXT: vs8r.v v24, (a0)
; RV64-NEXT: add a0, a0, a2
; RV64-NEXT: vmv1r.v v0, v8
; RV64-NEXT: vmerge.vim v8, v16, 1, v0
; RV64-NEXT: vs8r.v v8, (a2)
; RV64-NEXT: vs8r.v v8, (a0)
; RV64-NEXT: lbu a0, 0(a1)
; RV64-NEXT: addi sp, s0, -80
; RV64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ define <512 x i8> @single_source(<512 x i8> %a) {
; CHECK-NEXT: vslidedown.vi v16, v16, 4
; CHECK-NEXT: li a0, 466
; CHECK-NEXT: li a1, 465
; CHECK-NEXT: lbu a2, 1012(sp)
; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma
; CHECK-NEXT: lbu a0, 1012(sp)
; CHECK-NEXT: vslideup.vx v8, v16, a1
; CHECK-NEXT: vmv.s.x v16, a0
; CHECK-NEXT: vmv.s.x v16, a2
; CHECK-NEXT: li a0, 501
; CHECK-NEXT: li a1, 500
; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma
Expand Down Expand Up @@ -118,16 +118,16 @@ define <512 x i8> @two_source(<512 x i8> %a, <512 x i8> %b) {
; CHECK-NEXT: vslidedown.vi v24, v24, 4
; CHECK-NEXT: li a1, 466
; CHECK-NEXT: li a2, 465
; CHECK-NEXT: lbu a3, 985(sp)
; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-NEXT: lbu a1, 985(sp)
; CHECK-NEXT: vslideup.vx v8, v24, a2
; CHECK-NEXT: vmv.s.x v24, a1
; CHECK-NEXT: vmv.s.x v24, a3
; CHECK-NEXT: li a1, 478
; CHECK-NEXT: li a2, 477
; CHECK-NEXT: lbu a3, 1012(sp)
; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-NEXT: lbu a1, 1012(sp)
; CHECK-NEXT: vslideup.vx v8, v24, a2
; CHECK-NEXT: vmv.s.x v24, a1
; CHECK-NEXT: vmv.s.x v24, a3
; CHECK-NEXT: li a1, 501
; CHECK-NEXT: li a2, 500
; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma
Expand All @@ -137,21 +137,21 @@ define <512 x i8> @two_source(<512 x i8> %a, <512 x i8> %b) {
; CHECK-NEXT: addi a1, a1, 501
; CHECK-NEXT: slli a1, a1, 13
; CHECK-NEXT: addi a1, a1, 512
; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v24, 0
; CHECK-NEXT: lui a2, 1047552
; CHECK-NEXT: addiw a2, a2, 1
; CHECK-NEXT: slli a2, a2, 23
; CHECK-NEXT: addi a2, a2, 1
; CHECK-NEXT: slli a2, a2, 18
; CHECK-NEXT: vslide1down.vx v0, v24, a2
; CHECK-NEXT: lui a2, 4
; CHECK-NEXT: vmv.s.x v24, a2
; CHECK-NEXT: li a2, 64
; CHECK-NEXT: vsetivli zero, 7, e64, m1, tu, ma
; CHECK-NEXT: vslideup.vi v0, v24, 6
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vmv.v.x v24, a1
; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v7, 0
; CHECK-NEXT: lui a1, 1047552
; CHECK-NEXT: addiw a1, a1, 1
; CHECK-NEXT: slli a1, a1, 23
; CHECK-NEXT: addi a1, a1, 1
; CHECK-NEXT: slli a1, a1, 18
; CHECK-NEXT: vslide1down.vx v0, v7, a1
; CHECK-NEXT: lui a1, 4
; CHECK-NEXT: vmv.s.x v7, a1
; CHECK-NEXT: vsetivli zero, 7, e64, m1, tu, ma
; CHECK-NEXT: vslideup.vi v0, v7, 6
; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu
; CHECK-NEXT: vrgather.vv v8, v16, v24, v0.t
; CHECK-NEXT: addi sp, s0, -1536
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -417,8 +417,8 @@ declare <32 x i64> @llvm.vp.abs.v32i64(<32 x i64>, i1 immarg, <32 x i1>, i32)
define <32 x i64> @vp_abs_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_abs_v32i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v7, v0, 2
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB34_2
Expand All @@ -432,8 +432,8 @@ define <32 x i64> @vp_abs_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl)
; CHECK-NEXT: sltu a0, a0, a1
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vrsub.vi v24, v16, 0, v0.t
; CHECK-NEXT: vmax.vv v16, v16, v24, v0.t
; CHECK-NEXT: ret
Expand Down
410 changes: 208 additions & 202 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll

Large diffs are not rendered by default.

234 changes: 120 additions & 114 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -567,13 +567,14 @@ define <8 x i32> @add_constant_rhs_8xi32_partial(<8 x i32> %vin, i32 %a, i32 %b,
; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 5
; CHECK-NEXT: vmv.s.x v10, a2
; CHECK-NEXT: lui a0, %hi(.LCPI19_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI19_0)
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v12, (a0)
; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 6
; CHECK-NEXT: vmv.s.x v10, a3
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: lui a0, %hi(.LCPI19_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI19_0)
; CHECK-NEXT: vle32.v v12, (a0)
; CHECK-NEXT: vslideup.vi v8, v10, 7
; CHECK-NEXT: vadd.vv v8, v8, v12
; CHECK-NEXT: ret
Expand Down
94 changes: 42 additions & 52 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -204,8 +204,8 @@ define <8 x half> @vp_ceil_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl)
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t
; ZVFHMIN-NEXT: fsrmi a0, 3
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: vmv1r.v v0, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t
; ZVFHMIN-NEXT: fsrm a0
; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t
Expand Down Expand Up @@ -261,16 +261,16 @@ declare <16 x half> @llvm.vp.ceil.v16f16(<16 x half>, <16 x i1>, i32)
define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_ceil_v16f16:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI6_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1)
; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t
; ZVFH-NEXT: fsrmi a0, 3
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFH-NEXT: vmv1r.v v0, v10
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t
; ZVFH-NEXT: fsrm a0
; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t
Expand All @@ -290,8 +290,8 @@ define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %e
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t
; ZVFHMIN-NEXT: fsrmi a0, 3
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; ZVFHMIN-NEXT: vmv1r.v v0, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t
; ZVFHMIN-NEXT: fsrm a0
; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t
Expand Down Expand Up @@ -439,8 +439,8 @@ define <8 x float> @vp_ceil_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
Expand Down Expand Up @@ -483,8 +483,8 @@ define <16 x float> @vp_ceil_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
Expand Down Expand Up @@ -561,16 +561,16 @@ declare <4 x double> @llvm.vp.ceil.v4f64(<4 x double>, <4 x i1>, i32)
define <4 x double> @vp_ceil_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI18_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1)
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
Expand Down Expand Up @@ -605,16 +605,16 @@ declare <8 x double> @llvm.vp.ceil.v8f64(<8 x double>, <8 x i1>, i32)
define <8 x double> @vp_ceil_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI20_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1)
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
Expand Down Expand Up @@ -649,16 +649,16 @@ declare <15 x double> @llvm.vp.ceil.v15f64(<15 x double>, <15 x i1>, i32)
define <15 x double> @vp_ceil_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v15f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1)
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
Expand Down Expand Up @@ -693,16 +693,16 @@ declare <16 x double> @llvm.vp.ceil.v16f64(<16 x double>, <16 x i1>, i32)
define <16 x double> @vp_ceil_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v16f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1)
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
Expand Down Expand Up @@ -737,69 +737,59 @@ declare <32 x double> @llvm.vp.ceil.v32f64(<32 x double>, <32 x i1>, i32)
define <32 x double> @vp_ceil_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v32f64:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
; CHECK-NEXT: vmv1r.v v25, v0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vmv1r.v v6, v0
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: vslidedown.vi v24, v0, 2
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v7, v0, 2
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB26_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
; CHECK-NEXT: lui a2, %hi(.LCPI26_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2)
; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t
; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t
; CHECK-NEXT: fsrmi a1, 3
; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: addi a1, a0, -16
; CHECK-NEXT: sltu a0, a0, a1
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfabs.v v8, v16, v0.t
; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t
; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
Expand Down
Loading