147 changes: 81 additions & 66 deletions llvm/test/CodeGen/Hexagon/intrinsics-v60-misc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,8 @@ entry:
; CHECK: v{{[0-9]+}} = vmux(q{{[0-3]+}},v{{[0-9]+}},v{{[0-9]+}})
define void @test20(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %0, <16 x i32> %b, <16 x i32> %c)
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %0, <16 x i32> %b, <16 x i32> %c)
store <16 x i32> %1, <16 x i32>* @k, align 64
ret void
}
Expand All @@ -191,127 +191,139 @@ entry:
; CHECK: q{{[0-3]+}} = and(q{{[0-3]+}},q{{[0-3]+}})
define void @test21(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = bitcast <16 x i32> %b to <512 x i1>
%2 = tail call <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1> %0, <512 x i1> %1)
store <512 x i1> %2, <512 x i1>* bitcast (<16 x i32>* @h to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
%2 = tail call <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1> %0, <64 x i1> %1)
%3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %2, i32 -1)
store <16 x i32> %3, <16 x i32>* @h, align 64
ret void
}

; CHECK-LABEL: test22:
; CHECK: q{{[0-3]+}} = or(q{{[0-3]+}},q{{[0-3]+}})
define void @test22(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = bitcast <16 x i32> %b to <512 x i1>
%2 = tail call <512 x i1> @llvm.hexagon.V6.pred.or(<512 x i1> %0, <512 x i1> %1)
store <512 x i1> %2, <512 x i1>* bitcast (<16 x i32>* @h to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
%2 = tail call <64 x i1> @llvm.hexagon.V6.pred.or(<64 x i1> %0, <64 x i1> %1)
%3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %2, i32 -1)
store <16 x i32> %3, <16 x i32>* @h, align 64
ret void
}

; CHECK-LABEL: test23:
; CHECK: q{{[0-3]+}} = not(q{{[0-3]+}})
define void @test23(<16 x i32> %a) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = tail call <512 x i1> @llvm.hexagon.V6.pred.not(<512 x i1> %0)
store <512 x i1> %1, <512 x i1>* bitcast (<16 x i32>* @h to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <64 x i1> @llvm.hexagon.V6.pred.not(<64 x i1> %0)
%2 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %1, i32 -1)
store <16 x i32> %2, <16 x i32>* @h, align 64
ret void
}

; CHECK-LABEL: test24:
; CHECK: q{{[0-3]+}} = xor(q{{[0-3]+}},q{{[0-3]+}})
define void @test24(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = bitcast <16 x i32> %b to <512 x i1>
%2 = tail call <512 x i1> @llvm.hexagon.V6.pred.xor(<512 x i1> %0, <512 x i1> %1)
store <512 x i1> %2, <512 x i1>* bitcast (<16 x i32>* @h to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
%2 = tail call <64 x i1> @llvm.hexagon.V6.pred.xor(<64 x i1> %0, <64 x i1> %1)
%3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %2, i32 -1)
store <16 x i32> %3, <16 x i32>* @h, align 64
ret void
}

; CHECK-LABEL: test25:
; CHECK: q{{[0-3]+}} = or(q{{[0-3]+}},!q{{[0-3]+}})
define void @test25(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = bitcast <16 x i32> %b to <512 x i1>
%2 = tail call <512 x i1> @llvm.hexagon.V6.pred.or.n(<512 x i1> %0, <512 x i1> %1)
store <512 x i1> %2, <512 x i1>* bitcast (<16 x i32>* @h to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
%2 = tail call <64 x i1> @llvm.hexagon.V6.pred.or.n(<64 x i1> %0, <64 x i1> %1)
%3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %2, i32 -1)
store <16 x i32> %3, <16 x i32>* @h, align 64
ret void
}

; CHECK-LABEL: test26:
; CHECK: q{{[0-3]+}} = and(q{{[0-3]+}},!q{{[0-3]+}})
define void @test26(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = bitcast <16 x i32> %b to <512 x i1>
%2 = tail call <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1> %0, <512 x i1> %1)
store <512 x i1> %2, <512 x i1>* bitcast (<16 x i32>* @h to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
%2 = tail call <64 x i1> @llvm.hexagon.V6.pred.and.n(<64 x i1> %0, <64 x i1> %1)
%3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %2, i32 -1)
store <16 x i32> %3, <16 x i32>* @h, align 64
ret void
}

; CHECK-LABEL: test27:
; CHECK: q{{[0-3]+}} = vcmp.gt(v{{[0-9]+}}.ub,v{{[0-9]+}}.ub)
define void @test27(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %a, <16 x i32> %b)
store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %a, <16 x i32> %b)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
store <16 x i32> %1, <16 x i32>* @k, align 64
ret void
}

; CHECK-LABEL: test28:
; CHECK: q{{[0-3]+}} = vcmp.gt(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
define void @test28(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = tail call <512 x i1> @llvm.hexagon.V6.vgth(<16 x i32> %a, <16 x i32> %b)
store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.vgth(<16 x i32> %a, <16 x i32> %b)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
store <16 x i32> %1, <16 x i32>* @k, align 64
ret void
}

; CHECK-LABEL: test29:
; CHECK: q{{[0-3]+}} = vcmp.eq(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
define void @test29(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = tail call <512 x i1> @llvm.hexagon.V6.veqh(<16 x i32> %a, <16 x i32> %b)
store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.veqh(<16 x i32> %a, <16 x i32> %b)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
store <16 x i32> %1, <16 x i32>* @k, align 64
ret void
}

; CHECK-LABEL: test30:
; CHECK: q{{[0-3]+}} = vcmp.gt(v{{[0-9]+}}.w,v{{[0-9]+}}.w)
define void @test30(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = tail call <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32> %a, <16 x i32> %b)
store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.vgtw(<16 x i32> %a, <16 x i32> %b)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
store <16 x i32> %1, <16 x i32>* @k, align 64
ret void
}

; CHECK-LABEL: test31:
; CHECK: q{{[0-3]+}} = vcmp.eq(v{{[0-9]+}}.w,v{{[0-9]+}}.w)
define void @test31(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = tail call <512 x i1> @llvm.hexagon.V6.veqw(<16 x i32> %a, <16 x i32> %b)
store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.veqw(<16 x i32> %a, <16 x i32> %b)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
store <16 x i32> %1, <16 x i32>* @k, align 64
ret void
}

; CHECK-LABEL: test32:
; CHECK: q{{[0-3]+}} = vcmp.gt(v{{[0-9]+}}.uh,v{{[0-9]+}}.uh)
define void @test32(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = tail call <512 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32> %a, <16 x i32> %b)
store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32> %a, <16 x i32> %b)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
store <16 x i32> %1, <16 x i32>* @k, align 64
ret void
}

; CHECK-LABEL: test33:
; CHECK: v{{[0-9]+}} |= vand(q{{[0-3]+}},r{{[0-9]+}})
define void @test33(<16 x i32> %a, <16 x i32> %b, i32 %c) #0 {
entry:
%0 = bitcast <16 x i32> %b to <512 x i1>
%1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %a, <512 x i1> %0, i32 %c)
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %a, <64 x i1> %0, i32 %c)
store <16 x i32> %1, <16 x i32>* @h, align 64
ret void
}
Expand All @@ -320,18 +332,19 @@ entry:
; CHECK: q{{[0-3]+}} |= vand(v{{[0-9]+}},r{{[0-9]+}})
define void @test34(<16 x i32> %a, <16 x i32> %b, i32 %c) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %0, <16 x i32> %b, i32 %c)
store <512 x i1> %1, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt.acc(<64 x i1> %0, <16 x i32> %b, i32 %c)
%2 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %1, i32 -1)
store <16 x i32> %2, <16 x i32>* @k, align 64
ret void
}

; CHECK-LABEL: test35:
; CHECK: v{{[0-9]+}} = vand(q{{[0-3]+}},r{{[0-9]+}})
define void @test35(<16 x i32> %a, i32 %b) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1> %0, i32 %b)
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 %b)
store <16 x i32> %1, <16 x i32>* @h, align 64
ret void
}
Expand All @@ -340,8 +353,9 @@ entry:
; CHECK: q{{[0-3]+}} = vand(v{{[0-9]+}},r{{[0-9]+}})
define void @test36(<16 x i32> %a, i32 %b) #0 {
entry:
%0 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 %b)
store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 %b)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
store <16 x i32> %1, <16 x i32>* @k, align 64
ret void
}

Expand Down Expand Up @@ -476,8 +490,9 @@ entry:
; CHECK: q{{[0-3]}} = vsetq(r{{[0-9]+}})
define void @test51(i32 %a) #0 {
entry:
%0 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a)
store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
store <16 x i32> %1, <16 x i32>* @k, align 64
ret void
}

Expand Down Expand Up @@ -546,23 +561,23 @@ declare <32 x i32> @llvm.hexagon.V6.vunpackob(<32 x i32>, <16 x i32>) #0
declare <32 x i32> @llvm.hexagon.V6.vunpackoh(<32 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32>, <16 x i32>, i32) #0
declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32) #0
declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1>, <512 x i1>) #0
declare <512 x i1> @llvm.hexagon.V6.pred.or(<512 x i1>, <512 x i1>) #0
declare <512 x i1> @llvm.hexagon.V6.pred.not(<512 x i1>) #0
declare <512 x i1> @llvm.hexagon.V6.pred.xor(<512 x i1>, <512 x i1>) #0
declare <512 x i1> @llvm.hexagon.V6.pred.or.n(<512 x i1>, <512 x i1>) #0
declare <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1>, <512 x i1>) #0
declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.vgth(<16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.veqh(<16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.veqw(<16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <512 x i1>, i32) #0
declare <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1>, <16 x i32>, i32) #0
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #0
declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #0
declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1>, <64 x i1>) #0
declare <64 x i1> @llvm.hexagon.V6.pred.or(<64 x i1>, <64 x i1>) #0
declare <64 x i1> @llvm.hexagon.V6.pred.not(<64 x i1>) #0
declare <64 x i1> @llvm.hexagon.V6.pred.xor(<64 x i1>, <64 x i1>) #0
declare <64 x i1> @llvm.hexagon.V6.pred.or.n(<64 x i1>, <64 x i1>) #0
declare <64 x i1> @llvm.hexagon.V6.pred.and.n(<64 x i1>, <64 x i1>) #0
declare <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.vgth(<16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.veqh(<16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.vgtw(<16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.veqw(<16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <64 x i1>, i32) #0
declare <64 x i1> @llvm.hexagon.V6.vandvrt.acc(<64 x i1>, <16 x i32>, i32) #0
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #0
declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #0
declare i64 @llvm.hexagon.S6.rol.i.p(i64, i32) #0
declare i64 @llvm.hexagon.S6.rol.i.p.acc(i64, i64, i32) #0
declare i64 @llvm.hexagon.S6.rol.i.p.and(i64, i64, i32) #0
Expand All @@ -577,7 +592,7 @@ declare i32 @llvm.hexagon.S6.rol.i.r.or(i32, i32, i32) #0
declare i32 @llvm.hexagon.S6.rol.i.r.xacc(i32, i32, i32) #0
declare i32 @llvm.hexagon.V6.extractw(<16 x i32>, i32) #0
declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #0
declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #0
declare <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #0
declare <16 x i32> @llvm.hexagon.V6.vlutvvb(<16 x i32>, <16 x i32>, i32) #0
declare <32 x i32> @llvm.hexagon.V6.vlutvwh(<16 x i32>, <16 x i32>, i32) #0
declare <16 x i32> @llvm.hexagon.V6.vlutvvb.oracc(<16 x i32>, <16 x i32>, <16 x i32>, i32) #0
Expand Down
301 changes: 165 additions & 136 deletions llvm/test/CodeGen/Hexagon/intrinsics-v60-vcmp.ll

Large diffs are not rendered by default.

26 changes: 14 additions & 12 deletions llvm/test/CodeGen/Hexagon/intrinsics/byte-store-double.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,30 +12,32 @@
; CHECK-LABEL: V6_vmaskedstorentnq_128B
; CHECK: if (!q{{[0-3]+}}) vmem(r{{[0-9]+}}+#0):nt = v{{[0-9]+}}

declare void @llvm.hexagon.V6.vmaskedstoreq.128B(<1024 x i1>, i8*, <32 x i32>)
declare <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32>, i32)

declare void @llvm.hexagon.V6.vmaskedstoreq.128B(<128 x i1>, i8*, <32 x i32>)
define void @V6_vmaskedstoreq_128B( <32 x i32> %a, i8* %b, <32 x i32> %c) {
%1 = bitcast <32 x i32> %a to <1024 x i1>
call void @llvm.hexagon.V6.vmaskedstoreq.128B(<1024 x i1> %1, i8* %b, <32 x i32> %c)
%1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a, i32 -1)
call void @llvm.hexagon.V6.vmaskedstoreq.128B(<128 x i1> %1, i8* %b, <32 x i32> %c)
ret void
}

declare void @llvm.hexagon.V6.vmaskedstorenq.128B(<1024 x i1>, i8*, <32 x i32>)
declare void @llvm.hexagon.V6.vmaskedstorenq.128B(<128 x i1>, i8*, <32 x i32>)
define void @V6_vmaskedstorenq_128B( <32 x i32> %a, i8* %b, <32 x i32> %c) {
%1 = bitcast <32 x i32> %a to <1024 x i1>
call void @llvm.hexagon.V6.vmaskedstorenq.128B(<1024 x i1> %1, i8* %b, <32 x i32> %c)
%1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a, i32 -1)
call void @llvm.hexagon.V6.vmaskedstorenq.128B(<128 x i1> %1, i8* %b, <32 x i32> %c)
ret void
}

declare void @llvm.hexagon.V6.vmaskedstorentq.128B(<1024 x i1>, i8*, <32 x i32>)
declare void @llvm.hexagon.V6.vmaskedstorentq.128B(<128 x i1>, i8*, <32 x i32>)
define void @V6_vmaskedstorentq_128B( <32 x i32> %a, i8* %b, <32 x i32> %c) {
%1 = bitcast <32 x i32> %a to <1024 x i1>
call void @llvm.hexagon.V6.vmaskedstorentq.128B(<1024 x i1> %1, i8* %b, <32 x i32> %c)
%1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a, i32 -1)
call void @llvm.hexagon.V6.vmaskedstorentq.128B(<128 x i1> %1, i8* %b, <32 x i32> %c)
ret void
}

declare void @llvm.hexagon.V6.vmaskedstorentnq.128B(<1024 x i1>, i8*, <32 x i32>)
declare void @llvm.hexagon.V6.vmaskedstorentnq.128B(<128 x i1>, i8*, <32 x i32>)
define void @V6_vmaskedstorentnq_128B( <32 x i32> %a, i8* %b, <32 x i32> %c) {
%1 = bitcast <32 x i32> %a to <1024 x i1>
call void @llvm.hexagon.V6.vmaskedstorentnq.128B(<1024 x i1> %1, i8* %b, <32 x i32> %c)
%1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a, i32 -1)
call void @llvm.hexagon.V6.vmaskedstorentnq.128B(<128 x i1> %1, i8* %b, <32 x i32> %c)
ret void
}
26 changes: 14 additions & 12 deletions llvm/test/CodeGen/Hexagon/intrinsics/byte-store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,30 +12,32 @@
; CHECK-LABEL: V6_vmaskedstorentnq
; CHECK: if (!q{{[0-3]+}}) vmem(r{{[0-9]+}}+#0):nt = v{{[0-9]+}}

declare void @llvm.hexagon.V6.vmaskedstoreq(<512 x i1>, i8*, <16 x i32>)
declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32)

declare void @llvm.hexagon.V6.vmaskedstoreq(<64 x i1>, i8*, <16 x i32>)
define void @V6_vmaskedstoreq( <16 x i32> %a, i8* %b, <16 x i32> %c) {
%1 = bitcast <16 x i32> %a to <512 x i1>
call void @llvm.hexagon.V6.vmaskedstoreq(<512 x i1> %1, i8* %b, <16 x i32> %c)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
call void @llvm.hexagon.V6.vmaskedstoreq(<64 x i1> %1, i8* %b, <16 x i32> %c)
ret void
}

declare void @llvm.hexagon.V6.vmaskedstorenq(<512 x i1>, i8*, <16 x i32>)
declare void @llvm.hexagon.V6.vmaskedstorenq(<64 x i1>, i8*, <16 x i32>)
define void @V6_vmaskedstorenq( <16 x i32> %a, i8* %b, <16 x i32> %c) {
%1 = bitcast <16 x i32> %a to <512 x i1>
call void @llvm.hexagon.V6.vmaskedstorenq(<512 x i1> %1, i8* %b, <16 x i32> %c)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
call void @llvm.hexagon.V6.vmaskedstorenq(<64 x i1> %1, i8* %b, <16 x i32> %c)
ret void
}

declare void @llvm.hexagon.V6.vmaskedstorentq(<512 x i1>, i8*, <16 x i32>)
declare void @llvm.hexagon.V6.vmaskedstorentq(<64 x i1>, i8*, <16 x i32>)
define void @V6_vmaskedstorentq( <16 x i32> %a, i8* %b, <16 x i32> %c) {
%1 = bitcast <16 x i32> %a to <512 x i1>
call void @llvm.hexagon.V6.vmaskedstorentq(<512 x i1> %1, i8* %b, <16 x i32> %c)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
call void @llvm.hexagon.V6.vmaskedstorentq(<64 x i1> %1, i8* %b, <16 x i32> %c)
ret void
}

declare void @llvm.hexagon.V6.vmaskedstorentnq(<512 x i1>, i8*, <16 x i32>)
declare void @llvm.hexagon.V6.vmaskedstorentnq(<64 x i1>, i8*, <16 x i32>)
define void @V6_vmaskedstorentnq( <16 x i32> %a, i8* %b, <16 x i32> %c) {
%1 = bitcast <16 x i32> %a to <512 x i1>
call void @llvm.hexagon.V6.vmaskedstorentnq(<512 x i1> %1, i8* %b, <16 x i32> %c)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
call void @llvm.hexagon.V6.vmaskedstorentnq(<64 x i1> %1, i8* %b, <16 x i32> %c)
ret void
}
20 changes: 11 additions & 9 deletions llvm/test/CodeGen/Hexagon/intrinsics/v65-gather-double.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
; CHECK: if (q{{[0-3]+}}) vtmp.h = vgather(r1,m{{[0-9]+}},v{{[0-9]+}}:{{[0-9]+}}.w).h
; CHECK: vmem(r{{[0-9]+}}+#0) = vtmp.new

declare <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32>, i32)

declare void @llvm.hexagon.V6.vgathermw.128B(i8*, i32, i32, <32 x i32>)
define void @V6_vgathermw_128B(i8* %a, i32 %b, i32 %c, <32 x i32> %d) {
call void @llvm.hexagon.V6.vgathermw.128B(i8* %a, i32 %b, i32 %c, <32 x i32> %d)
Expand All @@ -37,24 +39,24 @@ define void @V6_vgathermhw_128B(i8* %a, i32 %b, i32 %c, <64 x i32> %d) {
ret void
}

declare void @llvm.hexagon.V6.vgathermwq.128B(i8*, <1024 x i1>, i32, i32, <32 x i32>)
declare void @llvm.hexagon.V6.vgathermwq.128B(i8*, <128 x i1>, i32, i32, <32 x i32>)
define void @V6_vgathermwq_128B(i8* %a, <32 x i32> %b, i32 %c, i32 %d, <32 x i32> %e) {
%1 = bitcast <32 x i32> %b to <1024 x i1>
call void @llvm.hexagon.V6.vgathermwq.128B(i8* %a, <1024 x i1> %1, i32 %c, i32 %d, <32 x i32> %e)
%1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %b, i32 -1)
call void @llvm.hexagon.V6.vgathermwq.128B(i8* %a, <128 x i1> %1, i32 %c, i32 %d, <32 x i32> %e)
ret void
}

declare void @llvm.hexagon.V6.vgathermhq.128B(i8*, <1024 x i1>, i32, i32, <32 x i32>)
declare void @llvm.hexagon.V6.vgathermhq.128B(i8*, <128 x i1>, i32, i32, <32 x i32>)
define void @V6_vgathermhq_128B(i8* %a, <32 x i32> %b, i32 %c, i32 %d, <32 x i32> %e) {
%1 = bitcast <32 x i32> %b to <1024 x i1>
call void @llvm.hexagon.V6.vgathermhq.128B(i8* %a, <1024 x i1> %1, i32 %c, i32 %d, <32 x i32> %e)
%1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %b, i32 -1)
call void @llvm.hexagon.V6.vgathermhq.128B(i8* %a, <128 x i1> %1, i32 %c, i32 %d, <32 x i32> %e)
ret void
}

declare void @llvm.hexagon.V6.vgathermhwq.128B(i8*, <1024 x i1>, i32, i32, <64 x i32>)
declare void @llvm.hexagon.V6.vgathermhwq.128B(i8*, <128 x i1>, i32, i32, <64 x i32>)
define void @V6_vgathermhwq_128B(i8* %a, <32 x i32> %b, i32 %c, i32 %d, <64 x i32> %e) {
%1 = bitcast <32 x i32> %b to <1024 x i1>
call void @llvm.hexagon.V6.vgathermhwq.128B(i8* %a, <1024 x i1> %1, i32 %c, i32 %d, <64 x i32> %e)
%1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %b, i32 -1)
call void @llvm.hexagon.V6.vgathermhwq.128B(i8* %a, <128 x i1> %1, i32 %c, i32 %d, <64 x i32> %e)
ret void
}

20 changes: 11 additions & 9 deletions llvm/test/CodeGen/Hexagon/intrinsics/v65-gather.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
; CHECK: if (q{{[0-3]+}}) vtmp.h = vgather(r1,m{{[0-9]+}},v{{[0-9]+}}:{{[0-9]+}}.w).h
; CHECK: vmem(r{{[0-9]+}}+#0) = vtmp.new

declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32)

declare void @llvm.hexagon.V6.vgathermw(i8*, i32, i32, <16 x i32>)
define void @V6_vgathermw(i8* %a, i32 %b, i32 %c, <16 x i32> %d) {
call void @llvm.hexagon.V6.vgathermw(i8* %a, i32 %b, i32 %c, <16 x i32> %d)
Expand All @@ -37,23 +39,23 @@ define void @V6_vgathermhw(i8* %a, i32 %b, i32 %c, <32 x i32> %d) {
ret void
}

declare void @llvm.hexagon.V6.vgathermwq(i8*, <512 x i1>, i32, i32, <16 x i32>)
declare void @llvm.hexagon.V6.vgathermwq(i8*, <64 x i1>, i32, i32, <16 x i32>)
define void @V6_vgathermwq(i8* %a, <16 x i32> %b, i32 %c, i32 %d, <16 x i32> %e) {
%1 = bitcast <16 x i32> %b to <512 x i1>
call void @llvm.hexagon.V6.vgathermwq(i8* %a, <512 x i1> %1, i32 %c, i32 %d, <16 x i32> %e)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
call void @llvm.hexagon.V6.vgathermwq(i8* %a, <64 x i1> %1, i32 %c, i32 %d, <16 x i32> %e)
ret void
}

declare void @llvm.hexagon.V6.vgathermhq(i8*, <512 x i1>, i32, i32, <16 x i32>)
declare void @llvm.hexagon.V6.vgathermhq(i8*, <64 x i1>, i32, i32, <16 x i32>)
define void @V6_vgathermhq(i8* %a, <16 x i32> %b, i32 %c, i32 %d, <16 x i32> %e) {
%1 = bitcast <16 x i32> %b to <512 x i1>
call void @llvm.hexagon.V6.vgathermhq(i8* %a, <512 x i1> %1, i32 %c, i32 %d, <16 x i32> %e)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
call void @llvm.hexagon.V6.vgathermhq(i8* %a, <64 x i1> %1, i32 %c, i32 %d, <16 x i32> %e)
ret void
}

declare void @llvm.hexagon.V6.vgathermhwq(i8*, <512 x i1>, i32, i32, <32 x i32>)
declare void @llvm.hexagon.V6.vgathermhwq(i8*, <64 x i1>, i32, i32, <32 x i32>)
define void @V6_vgathermhwq(i8* %a, <16 x i32> %b, i32 %c, i32 %d, <32 x i32> %e) {
%1 = bitcast <16 x i32> %b to <512 x i1>
call void @llvm.hexagon.V6.vgathermhwq(i8* %a, <512 x i1> %1, i32 %c, i32 %d, <32 x i32> %e)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
call void @llvm.hexagon.V6.vgathermhwq(i8* %a, <64 x i1> %1, i32 %c, i32 %d, <32 x i32> %e)
ret void
}
19 changes: 10 additions & 9 deletions llvm/test/CodeGen/Hexagon/intrinsics/v65-scatter-double.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
; CHECK-LABEL: V6_vscattermhwq_128B
; CHECK: if (q{{[0-3]}}) vscatter(r{{[0-9]+}},m{{[0-9]+}},v{{[0-9]+}}:{{[0-9]+}}.w).h = v{{[0-9]+}}

declare <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32>, i32)

declare void @llvm.hexagon.V6.vscattermw.128B(i32, i32, <32 x i32>, <32 x i32>)
define void @V6_vscattermw_128B(i32 %a, i32 %b, <32 x i32> %c, <32 x i32> %d) {
Expand All @@ -44,17 +45,17 @@ define void @V6_vscattermh_add_128B(i32 %a, i32 %b, <32 x i32> %c, <32 x i32> %d
ret void
}

declare void @llvm.hexagon.V6.vscattermwq.128B(<1024 x i1>, i32, i32, <32 x i32>, <32 x i32>)
declare void @llvm.hexagon.V6.vscattermwq.128B(<128 x i1>, i32, i32, <32 x i32>, <32 x i32>)
define void @V6_vscattermwq_128B(<32 x i32> %a, i32 %b, i32 %c, <32 x i32> %d, <32 x i32> %e) {
%1 = bitcast <32 x i32> %a to <1024 x i1>
call void @llvm.hexagon.V6.vscattermwq.128B(<1024 x i1> %1, i32 %b, i32 %c, <32 x i32> %d, <32 x i32> %e)
%1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a, i32 -1)
call void @llvm.hexagon.V6.vscattermwq.128B(<128 x i1> %1, i32 %b, i32 %c, <32 x i32> %d, <32 x i32> %e)
ret void
}

declare void @llvm.hexagon.V6.vscattermhq.128B(<1024 x i1>, i32, i32, <32 x i32>, <32 x i32>)
declare void @llvm.hexagon.V6.vscattermhq.128B(<128 x i1>, i32, i32, <32 x i32>, <32 x i32>)
define void @V6_vscattermhq_128B(<32 x i32> %a, i32 %b, i32 %c, <32 x i32> %d, <32 x i32> %e) {
%1 = bitcast <32 x i32> %a to <1024 x i1>
call void @llvm.hexagon.V6.vscattermhq.128B(<1024 x i1> %1, i32 %b, i32 %c, <32 x i32> %d, <32 x i32> %e)
%1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a, i32 -1)
call void @llvm.hexagon.V6.vscattermhq.128B(<128 x i1> %1, i32 %b, i32 %c, <32 x i32> %d, <32 x i32> %e)
ret void
}

Expand All @@ -70,9 +71,9 @@ define void @V6_vscattermhw_add_128B(i32 %a, i32 %b, <64 x i32> %c, <32 x i32> %
ret void
}

declare void @llvm.hexagon.V6.vscattermhwq.128B(<1024 x i1>, i32, i32, <64 x i32>, <32 x i32>)
declare void @llvm.hexagon.V6.vscattermhwq.128B(<128 x i1>, i32, i32, <64 x i32>, <32 x i32>)
define void @V6_vscattermhwq_128B(<32 x i32> %a, i32 %b, i32 %c, <64 x i32> %d, <32 x i32> %e) {
%1 = bitcast <32 x i32> %a to <1024 x i1>
call void @llvm.hexagon.V6.vscattermhwq.128B(<1024 x i1> %1, i32 %b, i32 %c, <64 x i32> %d, <32 x i32> %e)
%1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a, i32 -1)
call void @llvm.hexagon.V6.vscattermhwq.128B(<128 x i1> %1, i32 %b, i32 %c, <64 x i32> %d, <32 x i32> %e)
ret void
}
19 changes: 10 additions & 9 deletions llvm/test/CodeGen/Hexagon/intrinsics/v65-scatter.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
; CHECK-LABEL: V6_vscattermhwq
; CHECK: if (q{{[0-3]}}) vscatter(r{{[0-9]+}},m{{[0-9]+}},v{{[0-9]+}}:{{[0-9]+}}.w).h = v{{[0-9]+}}

declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32)

declare void @llvm.hexagon.V6.vscattermw(i32, i32, <16 x i32>, <16 x i32>)
define void @V6_vscattermw(i32 %a, i32 %b, <16 x i32> %c, <16 x i32> %d) {
Expand All @@ -44,17 +45,17 @@ define void @V6_vscattermh_add(i32 %a, i32 %b, <16 x i32> %c, <16 x i32> %d) {
ret void
}

declare void @llvm.hexagon.V6.vscattermwq(<512 x i1>, i32, i32, <16 x i32>, <16 x i32>)
declare void @llvm.hexagon.V6.vscattermwq(<64 x i1>, i32, i32, <16 x i32>, <16 x i32>)
define void @V6_vscattermwq(<16 x i32> %a, i32 %b, i32 %c, <16 x i32> %d, <16 x i32> %e) {
%1 = bitcast <16 x i32> %a to <512 x i1>
call void @llvm.hexagon.V6.vscattermwq(<512 x i1> %1, i32 %b, i32 %c, <16 x i32> %d, <16 x i32> %e)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
call void @llvm.hexagon.V6.vscattermwq(<64 x i1> %1, i32 %b, i32 %c, <16 x i32> %d, <16 x i32> %e)
ret void
}

declare void @llvm.hexagon.V6.vscattermhq(<512 x i1>, i32, i32, <16 x i32>, <16 x i32>)
declare void @llvm.hexagon.V6.vscattermhq(<64 x i1>, i32, i32, <16 x i32>, <16 x i32>)
define void @V6_vscattermhq(<16 x i32> %a, i32 %b, i32 %c, <16 x i32> %d, <16 x i32> %e) {
%1 = bitcast <16 x i32> %a to <512 x i1>
call void @llvm.hexagon.V6.vscattermhq(<512 x i1> %1, i32 %b, i32 %c, <16 x i32> %d, <16 x i32> %e)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
call void @llvm.hexagon.V6.vscattermhq(<64 x i1> %1, i32 %b, i32 %c, <16 x i32> %d, <16 x i32> %e)
ret void
}

Expand All @@ -70,9 +71,9 @@ define void @V6_vscattermhw_add(i32 %a, i32 %b, <32 x i32> %c, <16 x i32> %d) {
ret void
}

declare void @llvm.hexagon.V6.vscattermhwq(<512 x i1>, i32, i32, <32 x i32>, <16 x i32>)
declare void @llvm.hexagon.V6.vscattermhwq(<64 x i1>, i32, i32, <32 x i32>, <16 x i32>)
define void @V6_vscattermhwq(<16 x i32> %a, i32 %b, i32 %c, <32 x i32> %d, <16 x i32> %e) {
%1 = bitcast <16 x i32> %a to <512 x i1>
call void @llvm.hexagon.V6.vscattermhwq(<512 x i1> %1, i32 %b, i32 %c, <32 x i32> %d, <16 x i32> %e)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
call void @llvm.hexagon.V6.vscattermhwq(<64 x i1> %1, i32 %b, i32 %c, <32 x i32> %d, <16 x i32> %e)
ret void
}
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/Hexagon/intrinsics/v65.ll
Original file line number Diff line number Diff line change
Expand Up @@ -136,21 +136,21 @@ define <16 x i32> @V6_vmpyuhe(<16 x i32> %a, i32 %b) {
}
; CHECK: = vmpye(v0.uh,r0.uh)

;declare <16 x i32> @llvm.hexagon.V6.vprefixqb(<512 x i1>)
;define <16 x i32> @V6_vprefixqb(<512 x i1> %a) {
; %b = call <16 x i32> @llvm.hexagon.V6.vprefixqb(<512 x i1> %a)
;declare <16 x i32> @llvm.hexagon.V6.vprefixqb(<64 x i1>)
;define <16 x i32> @V6_vprefixqb(<64 x i1> %a) {
; %b = call <16 x i32> @llvm.hexagon.V6.vprefixqb(<64 x i1> %a)
; ret <16 x i32> %b
;}

;declare <16 x i32> @llvm.hexagon.V6.vprefixqh(<512 x i1>)
;define <16 x i32> @V6_vprefixqh(<512 x i1> %a) {
; %b = call <16 x i32> @llvm.hexagon.V6.vprefixqh(<512 x i1> %a)
;declare <16 x i32> @llvm.hexagon.V6.vprefixqh(<64 x i1>)
;define <16 x i32> @V6_vprefixqh(<64 x i1> %a) {
; %b = call <16 x i32> @llvm.hexagon.V6.vprefixqh(<64 x i1> %a)
; ret <16 x i32> %b
;}

;declare <16 x i32> @llvm.hexagon.V6.vprefixqw(<512 x i1>)
;define <16 x i32> @V6_vprefixqw(<512 x i1> %a) {
; %b = call <16 x i32> @llvm.hexagon.V6.vprefixqw(<512 x i1> %a)
;declare <16 x i32> @llvm.hexagon.V6.vprefixqw(<64 x i1>)
;define <16 x i32> @V6_vprefixqw(<64 x i1> %a) {
; %b = call <16 x i32> @llvm.hexagon.V6.vprefixqw(<64 x i1> %a)
; ret <16 x i32> %b
;}

26 changes: 13 additions & 13 deletions llvm/test/CodeGen/Hexagon/late_instr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ b0:
%v13 = tail call i32 @llvm.hexagon.S2.vsplatrb(i32 %v12)
%v14 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 %v13)
%v15 = tail call <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32> %v14)
%v16 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v5)
%v16 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v5)
%v17 = shl i32 1, %v8
%v18 = tail call i32 @llvm.hexagon.S2.vsplatrb(i32 %v17)
%v19 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %v15, <512 x i1> %v16, i32 %v18)
%v19 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %v15, <64 x i1> %v16, i32 %v18)
%v20 = tail call i32 @llvm.hexagon.S2.vsplatrb(i32 %a3)
%v21 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 %v20)
%v22 = icmp sgt i32 %v5, 0
Expand All @@ -48,8 +48,8 @@ b1: ; preds = %b0
%v30 = getelementptr inbounds i8, i8* %a0, i32 %v29
%v31 = bitcast i8* %v30 to <16 x i32>*
%v32 = load <16 x i32>, <16 x i32>* %v31, align 64, !tbaa !0
%v33 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a5)
%v34 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1> %v33, i32 16843009)
%v33 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a5)
%v34 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v33, i32 16843009)
%v35 = tail call <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32> %v34)
%v36 = add i32 %v0, %a5
%v37 = getelementptr inbounds i8, i8* %a0, i32 %v36
Expand Down Expand Up @@ -127,11 +127,11 @@ b4: ; preds = %b4, %b3
%v100 = tail call <16 x i32> @llvm.hexagon.V6.vminub(<16 x i32> %v94, <16 x i32> %v91)
%v101 = tail call <16 x i32> @llvm.hexagon.V6.vminub(<16 x i32> %v97, <16 x i32> %v99)
%v102 = tail call <16 x i32> @llvm.hexagon.V6.vmaxub(<16 x i32> %v98, <16 x i32> %v100)
%v103 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v101, <16 x i32> %v96)
%v104 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v95, <16 x i32> %v102)
%v105 = tail call <512 x i1> @llvm.hexagon.V6.pred.or(<512 x i1> %v103, <512 x i1> %v104)
%v103 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v101, <16 x i32> %v96)
%v104 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v95, <16 x i32> %v102)
%v105 = tail call <64 x i1> @llvm.hexagon.V6.pred.or(<64 x i1> %v103, <64 x i1> %v104)
%v106 = tail call i32 @llvm.hexagon.S6.rol.i.r(i32 %v83, i32 1)
%v107 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %v86, <512 x i1> %v105, i32 %v106)
%v107 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %v86, <64 x i1> %v105, i32 %v106)
%v108 = add nsw i32 %v79, -64
%v109 = icmp sgt i32 %v79, 64
br i1 %v109, label %b4, label %b5
Expand Down Expand Up @@ -179,16 +179,16 @@ declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
declare <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32>) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1

; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
declare <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.hexagon.S2.vsplatrb(i32) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <512 x i1>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <64 x i1>, i32) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vd0() #1
Expand All @@ -212,10 +212,10 @@ declare <16 x i32> @llvm.hexagon.V6.vmaxub(<16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vminub(<16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.pred.or(<512 x i1>, <512 x i1>) #1
declare <64 x i1> @llvm.hexagon.V6.pred.or(<64 x i1>, <64 x i1>) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.hexagon.S6.rol.i.r(i32, i32) #1
Expand Down
22 changes: 11 additions & 11 deletions llvm/test/CodeGen/Hexagon/peephole-move-phi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,35 +15,35 @@ b0:
br i1 %v0, label %b1, label %b2

b1: ; preds = %b0
%v1 = tail call <1024 x i1> @llvm.hexagon.V6.pred.not.128B(<1024 x i1> undef) #2
%v1 = tail call <128 x i1> @llvm.hexagon.V6.pred.not.128B(<128 x i1> undef) #2
br label %b2

b2: ; preds = %b1, %b0
%v2 = phi <1024 x i1> [ %v1, %b1 ], [ undef, %b0 ]
%v2 = phi <128 x i1> [ %v1, %b1 ], [ undef, %b0 ]
br label %b3

b3: ; preds = %b3, %b2
%v3 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %v2, <32 x i32> undef, <32 x i32> undef) #2
%v3 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %v2, <32 x i32> undef, <32 x i32> undef) #2
%v4 = tail call <32 x i32> @llvm.hexagon.V6.vor.128B(<32 x i32> undef, <32 x i32> %v3) #2
%v5 = tail call <32 x i32> @llvm.hexagon.V6.vor.128B(<32 x i32> %v4, <32 x i32> undef) #2
%v6 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v5, <32 x i32> undef) #2
%v7 = tail call <1024 x i1> @llvm.hexagon.V6.pred.or.128B(<1024 x i1> %v6, <1024 x i1> undef) #2
%v8 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %v7, <32 x i32> undef, <32 x i32> undef) #2
tail call void asm sideeffect "if($0) vmem($1)=$2;", "q,r,v,~{memory}"(<32 x i32> undef, <32 x i32>* undef, <32 x i32> %v8) #2
%v6 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v5, <32 x i32> undef) #2
%v7 = tail call <128 x i1> @llvm.hexagon.V6.pred.or.128B(<128 x i1> %v6, <128 x i1> undef) #2
%v8 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %v7, <32 x i32> undef, <32 x i32> undef) #2
tail call void asm sideeffect "if($0) vmem($1)=$2;", "q,r,v,~{memory}"(<128 x i1> undef, <32 x i32>* undef, <32 x i32> %v8) #2
br label %b3
}

; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1

; Function Attrs: nounwind readnone
declare <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32>, <32 x i32>) #1
declare <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32>, <32 x i32>) #1

; Function Attrs: nounwind readnone
declare <1024 x i1> @llvm.hexagon.V6.pred.or.128B(<1024 x i1>, <1024 x i1>) #1
declare <128 x i1> @llvm.hexagon.V6.pred.or.128B(<128 x i1>, <128 x i1>) #1

; Function Attrs: nounwind readnone
declare <1024 x i1> @llvm.hexagon.V6.pred.not.128B(<1024 x i1>) #1
declare <128 x i1> @llvm.hexagon.V6.pred.not.128B(<128 x i1>) #1

; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vor.128B(<32 x i32>, <32 x i32>) #1
Expand Down
48 changes: 24 additions & 24 deletions llvm/test/CodeGen/Hexagon/reg-scavengebug-2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -25,36 +25,36 @@ b3: ; preds = %b3, %b2
%v7 = load <16 x i32>, <16 x i32>* %v6, align 64, !tbaa !0
%v8 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> undef, <16 x i32> %v7, i32 4)
%v9 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v8, <16 x i32> zeroinitializer)
%v10 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v9, <16 x i32> undef)
%v11 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v10, <16 x i32> undef, <16 x i32> undef)
%v12 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> zeroinitializer, <16 x i32> %v11, <16 x i32> undef)
%v13 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> undef, <16 x i32> %v12, <16 x i32> undef)
%v10 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v9, <16 x i32> undef)
%v11 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v10, <16 x i32> undef, <16 x i32> undef)
%v12 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> zeroinitializer, <16 x i32> %v11, <16 x i32> undef)
%v13 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> undef, <16 x i32> %v12, <16 x i32> undef)
%v14 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> undef, <16 x i32> undef, i32 1)
%v15 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v14, <16 x i32> zeroinitializer)
%v16 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> zeroinitializer, <16 x i32> zeroinitializer)
%v17 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> zeroinitializer, <16 x i32> undef)
%v18 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v15, <16 x i32> undef)
%v19 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> zeroinitializer, <16 x i32> undef)
%v20 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v16, <16 x i32> undef)
%v21 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v19, <16 x i32> undef, <16 x i32> zeroinitializer)
%v22 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v20, <16 x i32> undef, <16 x i32> zeroinitializer)
%v17 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> zeroinitializer, <16 x i32> undef)
%v18 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v15, <16 x i32> undef)
%v19 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> zeroinitializer, <16 x i32> undef)
%v20 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v16, <16 x i32> undef)
%v21 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v19, <16 x i32> undef, <16 x i32> zeroinitializer)
%v22 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v20, <16 x i32> undef, <16 x i32> zeroinitializer)
%v23 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v22, <16 x i32> %v21)
%v24 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> zeroinitializer, <32 x i32> %v23, i32 16843009)
%v25 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v17, <16 x i32> %v13, <16 x i32> undef)
%v26 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v18, <16 x i32> %v25, <16 x i32> undef)
%v27 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v19, <16 x i32> %v26, <16 x i32> undef)
%v28 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v20, <16 x i32> %v27, <16 x i32> undef)
%v25 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v17, <16 x i32> %v13, <16 x i32> undef)
%v26 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v18, <16 x i32> %v25, <16 x i32> undef)
%v27 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v19, <16 x i32> %v26, <16 x i32> undef)
%v28 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v20, <16 x i32> %v27, <16 x i32> undef)
%v29 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> undef, <16 x i32> zeroinitializer)
%v30 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> zeroinitializer, <16 x i32> zeroinitializer)
%v31 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> undef, <16 x i32> undef)
%v32 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v29, <16 x i32> undef)
%v33 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v30, <16 x i32> undef)
%v31 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> undef, <16 x i32> undef)
%v32 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v29, <16 x i32> undef)
%v33 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v30, <16 x i32> undef)
%v34 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v24, <32 x i32> zeroinitializer, i32 16843009)
%v35 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v34, <32 x i32> undef, i32 16843009)
%v36 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> zeroinitializer, <16 x i32> %v28, <16 x i32> undef)
%v37 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v31, <16 x i32> %v36, <16 x i32> undef)
%v38 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v32, <16 x i32> %v37, <16 x i32> undef)
%v39 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v33, <16 x i32> %v38, <16 x i32> undef)
%v36 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> zeroinitializer, <16 x i32> %v28, <16 x i32> undef)
%v37 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v31, <16 x i32> %v36, <16 x i32> undef)
%v38 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v32, <16 x i32> %v37, <16 x i32> undef)
%v39 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v33, <16 x i32> %v38, <16 x i32> undef)
%v40 = add nsw i32 %v3, 3
%v41 = icmp eq i32 %v40, 5
br i1 %v41, label %b4, label %b3
Expand Down Expand Up @@ -85,13 +85,13 @@ declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1>, <16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32) #1
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/Hexagon/reg-scavengebug-3.ll
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,13 @@ declare i32 @printf(i8*, ...) #0
declare void @print_vecpred(i32, i8*) #0

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1

; Function Attrs: nounwind
declare void @init_vectors() #0

; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
Expand All @@ -43,7 +43,7 @@ declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
declare void @init_addresses() #0

; Function Attrs: nounwind
declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<64 x i1>, <16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind
define i32 @main() #0 {
Expand All @@ -63,13 +63,13 @@ entry:
%7 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
%call1381 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str1, i32 0, i32 0), i8* getelementptr inbounds ([43 x i8], [43 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([77 x i8], [77 x i8]* @.str251, i32 0, i32 0)) #3
%8 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
%9 = call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %8, i32 16843009)
%9 = call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %8, i32 16843009)
call void @print_vector(i32 64, i8* bitcast (<16 x i32>* @VectorResult to i8*))
%10 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
%11 = call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %10, i32 16843009)
%12 = bitcast <512 x i1> %11 to <16 x i32>
%13 = bitcast <16 x i32> %12 to <512 x i1>
%14 = call <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1> %13, <16 x i32> undef, <16 x i32> undef)
%11 = call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %10, i32 16843009)
%12 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %11, i32 -1)
%13 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %12, i32 -1)
%14 = call <16 x i32> @llvm.hexagon.V6.vsubhnq(<64 x i1> %13, <16 x i32> undef, <16 x i32> undef)
store <16 x i32> %14, <16 x i32>* @VectorResult, align 64
ret i32 0
}
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/Hexagon/reg-scavengebug-4.ll
Original file line number Diff line number Diff line change
Expand Up @@ -114,12 +114,12 @@ b4: ; preds = %b3
%v91 = tail call <16 x i32> @llvm.hexagon.V6.vasrw(<16 x i32> %v90, i32 1)
%v92 = tail call <16 x i32> @llvm.hexagon.V6.vasrw(<16 x i32> %v91, i32 1)
%v93 = tail call <16 x i32> @llvm.hexagon.V6.vasrw(<16 x i32> %v92, i32 1)
%v94 = tail call <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1> undef, <16 x i32> undef, <16 x i32> %v93)
%v95 = tail call <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1> undef, <16 x i32> %v94, <16 x i32> undef)
%v94 = tail call <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1> undef, <16 x i32> undef, <16 x i32> %v93)
%v95 = tail call <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1> undef, <16 x i32> %v94, <16 x i32> undef)
%v96 = tail call <16 x i32> @llvm.hexagon.V6.vasrw(<16 x i32> undef, i32 1)
%v97 = tail call <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32> %v96, <16 x i32> %v95)
%v98 = tail call <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1> %v97, <16 x i32> undef, <16 x i32> undef)
%v99 = tail call <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1> undef, <16 x i32> undef, <16 x i32> undef)
%v97 = tail call <64 x i1> @llvm.hexagon.V6.vgtw(<16 x i32> %v96, <16 x i32> %v95)
%v98 = tail call <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1> %v97, <16 x i32> undef, <16 x i32> undef)
%v99 = tail call <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1> undef, <16 x i32> undef, <16 x i32> undef)
%v100 = tail call <16 x i32> @llvm.hexagon.V6.vshufeh(<16 x i32> %v99, <16 x i32> %v98)
%v101 = tail call <16 x i32> @llvm.hexagon.V6.vshuffeb(<16 x i32> %v100, <16 x i32> undef)
%v102 = getelementptr inbounds <16 x i32>, <16 x i32>* %v2, i32 1
Expand Down Expand Up @@ -183,13 +183,13 @@ declare <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32>, <32 x i32>) #1
declare <32 x i32> @llvm.hexagon.V6.vadduhw(<16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtw(<16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1>, <16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1>, <16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vasrw(<16 x i32>, i32) #1
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/Hexagon/reg-scavenger-valid-slot.ll
Original file line number Diff line number Diff line change
Expand Up @@ -82,14 +82,14 @@ entry:
%asmresult58 = extractvalue { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } %1, 29
%asmresult59 = extractvalue { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } %1, 30
%asmresult60 = extractvalue { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } %1, 31
%2 = tail call { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } asm "nop", "=q,=q,=q,=q"() #1
%asmresult61 = extractvalue { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } %2, 0
%asmresult62 = extractvalue { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } %2, 1
%asmresult63 = extractvalue { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } %2, 2
%asmresult64 = extractvalue { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } %2, 3
%3 = tail call <16 x i32> asm "nop", "=q,q,q,q,q"(<16 x i32> %asmresult61, <16 x i32> %asmresult62, <16 x i32> %asmresult63, <16 x i32> %asmresult64) #1
tail call void asm sideeffect "nop", "q,q,q"(<16 x i32> %asmresult61, <16 x i32> %asmresult62, <16 x i32> %asmresult63) #2
tail call void asm sideeffect "nop", "q,q"(<16 x i32> %asmresult64, <16 x i32> %3) #2
%2 = tail call { <64 x i1>, <64 x i1>, <64 x i1>, <64 x i1> } asm "nop", "=q,=q,=q,=q"() #1
%asmresult61 = extractvalue { <64 x i1>, <64 x i1>, <64 x i1>, <64 x i1> } %2, 0
%asmresult62 = extractvalue { <64 x i1>, <64 x i1>, <64 x i1>, <64 x i1> } %2, 1
%asmresult63 = extractvalue { <64 x i1>, <64 x i1>, <64 x i1>, <64 x i1> } %2, 2
%asmresult64 = extractvalue { <64 x i1>, <64 x i1>, <64 x i1>, <64 x i1> } %2, 3
%3 = tail call <64 x i1> asm "nop", "=q,q,q,q,q"(<64 x i1> %asmresult61, <64 x i1> %asmresult62, <64 x i1> %asmresult63, <64 x i1> %asmresult64) #1
tail call void asm sideeffect "nop", "q,q,q"(<64 x i1> %asmresult61, <64 x i1> %asmresult62, <64 x i1> %asmresult63) #2
tail call void asm sideeffect "nop", "q,q"(<64 x i1> %asmresult64, <64 x i1> %3) #2
tail call void asm sideeffect "nop", "v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v"(<16 x i32> %asmresult29, <16 x i32> %asmresult30, <16 x i32> %asmresult31, <16 x i32> %asmresult32, <16 x i32> %asmresult33, <16 x i32> %asmresult34, <16 x i32> %asmresult35, <16 x i32> %asmresult36, <16 x i32> %asmresult37, <16 x i32> %asmresult38, <16 x i32> %asmresult39, <16 x i32> %asmresult40, <16 x i32> %asmresult41, <16 x i32> %asmresult42, <16 x i32> %asmresult43, <16 x i32> %asmresult44, <16 x i32> %asmresult45, <16 x i32> %asmresult46, <16 x i32> %asmresult47, <16 x i32> %asmresult48, <16 x i32> %asmresult49, <16 x i32> %asmresult50, <16 x i32> %asmresult51, <16 x i32> %asmresult52, <16 x i32> %asmresult53, <16 x i32> %asmresult54, <16 x i32> %asmresult55, <16 x i32> %asmresult56, <16 x i32> %asmresult57, <16 x i32> %asmresult58, <16 x i32> %asmresult59, <16 x i32> %asmresult60) #2
tail call void asm sideeffect "nop", "r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r"(i32 %asmresult, i32 %asmresult1, i32 %asmresult2, i32 %asmresult3, i32 %asmresult4, i32 %asmresult5, i32 %asmresult6, i32 %asmresult7, i32 %asmresult8, i32 %asmresult9, i32 %asmresult10, i32 %asmresult11, i32 %asmresult12, i32 %asmresult13, i32 %asmresult14, i32 %asmresult15, i32 %asmresult16, i32 %asmresult17, i32 %asmresult18, i32 %asmresult19, i32 %asmresult20, i32 %asmresult21, i32 %asmresult22, i32 %asmresult23, i32 %asmresult24, i32 %asmresult25, i32 %asmresult26, i32 %asmresult27, i32 %asmresult28) #2
ret void
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/Hexagon/split-vecpred.ll
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ b7: ; preds = %b6
br label %b8

b8: ; preds = %b7
%v0 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> undef, i32 -1)
%v0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> undef, i32 -1)
br i1 undef, label %b9, label %b11

b9: ; preds = %b8
Expand All @@ -42,9 +42,9 @@ b10: ; preds = %b12
br label %b11

b11: ; preds = %b10, %b8
%v1 = phi <512 x i1> [ %v0, %b8 ], [ undef, %b10 ]
%v2 = tail call <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1> %v1, <512 x i1> undef)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1> %v2, <16 x i32> undef, <16 x i32> undef)
%v1 = phi <64 x i1> [ %v0, %b8 ], [ undef, %b10 ]
%v2 = tail call <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1> %v1, <64 x i1> undef)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vaddbq(<64 x i1> %v2, <16 x i32> undef, <16 x i32> undef)
%v4 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> undef, <16 x i32> %v3, i32 undef)
%v5 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %v4, <16 x i32> undef, i32 undef)
%v6 = tail call <16 x i32> @llvm.hexagon.V6.vand(<16 x i32> %v5, <16 x i32> undef)
Expand All @@ -53,9 +53,9 @@ b11: ; preds = %b10, %b8
%v9 = tail call <32 x i32> @llvm.hexagon.V6.vshufoeb(<16 x i32> undef, <16 x i32> %v8)
%v10 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v9)
%v11 = tail call <16 x i32> @llvm.hexagon.V6.vor(<16 x i32> %v10, <16 x i32> undef)
%v12 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v11, i32 -1)
%v13 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1> %v12, i32 undef)
tail call void @llvm.hexagon.V6.vmaskedstoreq(<512 x i1> undef, i8* undef, <16 x i32> %v13)
%v12 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v11, i32 -1)
%v13 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v12, i32 undef)
tail call void @llvm.hexagon.V6.vmaskedstoreq(<64 x i1> undef, i8* undef, <16 x i32> %v13)
unreachable

b12: ; preds = %b12, %b9
Expand All @@ -69,22 +69,22 @@ b13: ; preds = %b5
}

; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1

; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1>, <512 x i1>) #1
declare <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1>, <64 x i1>) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32>, <16 x i32>, i32) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1

; Function Attrs: argmemonly nounwind
declare void @llvm.hexagon.V6.vmaskedstoreq(<512 x i1>, i8*, <16 x i32>) #2
declare void @llvm.hexagon.V6.vmaskedstoreq(<64 x i1>, i8*, <16 x i32>) #2

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vaddbq(<64 x i1>, <16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vor(<16 x i32>, <16 x i32>) #1
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/Hexagon/swp-prolog-phi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ b3: ; preds = %b3, %b2
%v2 = phi i32 [ 0, %b2 ], [ %v8, %b3 ]
%v3 = phi <32 x i32> [ zeroinitializer, %b2 ], [ %v0, %b3 ]
%v4 = phi <32 x i32> [ %v1, %b2 ], [ %v7, %b3 ]
%v5 = tail call <1024 x i1> @llvm.hexagon.V6.vgtuh.128B(<32 x i32> %v3, <32 x i32> undef)
%v6 = tail call <1024 x i1> @llvm.hexagon.V6.veqh.and.128B(<1024 x i1> %v5, <32 x i32> undef, <32 x i32> undef)
%v7 = tail call <32 x i32> @llvm.hexagon.V6.vaddhq.128B(<1024 x i1> %v6, <32 x i32> %v4, <32 x i32> undef)
%v5 = tail call <128 x i1> @llvm.hexagon.V6.vgtuh.128B(<32 x i32> %v3, <32 x i32> undef)
%v6 = tail call <128 x i1> @llvm.hexagon.V6.veqh.and.128B(<128 x i1> %v5, <32 x i32> undef, <32 x i32> undef)
%v7 = tail call <32 x i32> @llvm.hexagon.V6.vaddhq.128B(<128 x i1> %v6, <32 x i32> %v4, <32 x i32> undef)
%v8 = add nsw i32 %v2, 1
%v9 = icmp slt i32 %v8, %a2
br i1 %v9, label %b3, label %b4
Expand All @@ -40,13 +40,13 @@ b5: ; preds = %b4, %b0
}

; Function Attrs: nounwind readnone
declare <1024 x i1> @llvm.hexagon.V6.vgtuh.128B(<32 x i32>, <32 x i32>) #1
declare <128 x i1> @llvm.hexagon.V6.vgtuh.128B(<32 x i32>, <32 x i32>) #1

; Function Attrs: nounwind readnone
declare <1024 x i1> @llvm.hexagon.V6.veqh.and.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
declare <128 x i1> @llvm.hexagon.V6.veqh.and.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1

; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vaddhq.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
declare <32 x i32> @llvm.hexagon.V6.vaddhq.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1

; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32>) #1
Expand Down
54 changes: 27 additions & 27 deletions llvm/test/CodeGen/Hexagon/swp-sigma.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #0
declare <16 x i32> @llvm.hexagon.V6.vd0() #0
declare <32 x i32> @llvm.hexagon.V6.vsububh(<16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #0
declare <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32>, <32 x i32>, i32) #0
declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32) #0
Expand Down Expand Up @@ -106,48 +106,48 @@ b6: ; preds = %b6, %b5
%v53 = tail call <32 x i32> @llvm.hexagon.V6.vsububh(<16 x i32> %v8, <16 x i32> %v47) #2
%v54 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v45, <16 x i32> %v47) #2
%v55 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v49, <16 x i32> %v47) #2
%v56 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v54, <16 x i32> %v7) #2
%v57 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v55, <16 x i32> %v7) #2
%v58 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v56, <16 x i32> %v9, <16 x i32> %v10) #2
%v59 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v57, <16 x i32> %v58, <16 x i32> %v9) #2
%v60 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v56, <16 x i32> %v8, <16 x i32> %v45) #2
%v61 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v57, <16 x i32> %v8, <16 x i32> %v49) #2
%v56 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v54, <16 x i32> %v7) #2
%v57 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v55, <16 x i32> %v7) #2
%v58 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v56, <16 x i32> %v9, <16 x i32> %v10) #2
%v59 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v57, <16 x i32> %v58, <16 x i32> %v9) #2
%v60 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v56, <16 x i32> %v8, <16 x i32> %v45) #2
%v61 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v57, <16 x i32> %v8, <16 x i32> %v49) #2
%v62 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v61, <16 x i32> %v60) #2
%v63 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v53, <32 x i32> %v62, i32 -1) #2
%v64 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v45, <16 x i32> %v44, i32 1) #2
%v65 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v49, <16 x i32> %v48, i32 1) #2
%v66 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v64, <16 x i32> %v47) #2
%v67 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v65, <16 x i32> %v47) #2
%v68 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v66, <16 x i32> %v7) #2
%v69 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v67, <16 x i32> %v7) #2
%v70 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v68, <16 x i32> %v59, <16 x i32> %v9) #2
%v71 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v69, <16 x i32> %v70, <16 x i32> %v9) #2
%v72 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v68, <16 x i32> %v8, <16 x i32> %v64) #2
%v73 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v69, <16 x i32> %v8, <16 x i32> %v65) #2
%v68 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v66, <16 x i32> %v7) #2
%v69 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v67, <16 x i32> %v7) #2
%v70 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v68, <16 x i32> %v59, <16 x i32> %v9) #2
%v71 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v69, <16 x i32> %v70, <16 x i32> %v9) #2
%v72 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v68, <16 x i32> %v8, <16 x i32> %v64) #2
%v73 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v69, <16 x i32> %v8, <16 x i32> %v65) #2
%v74 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v73, <16 x i32> %v72) #2
%v75 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v63, <32 x i32> %v74, i32 -1) #2
%v76 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v50, <16 x i32> %v45, i32 1) #2
%v77 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v52, <16 x i32> %v49, i32 1) #2
%v78 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v76, <16 x i32> %v47) #2
%v79 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v77, <16 x i32> %v47) #2
%v80 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v78, <16 x i32> %v7) #2
%v81 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v79, <16 x i32> %v7) #2
%v82 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v80, <16 x i32> %v71, <16 x i32> %v9) #2
%v83 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v81, <16 x i32> %v82, <16 x i32> %v9) #2
%v84 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v80, <16 x i32> %v8, <16 x i32> %v76) #2
%v85 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v81, <16 x i32> %v8, <16 x i32> %v77) #2
%v80 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v78, <16 x i32> %v7) #2
%v81 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v79, <16 x i32> %v7) #2
%v82 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v80, <16 x i32> %v71, <16 x i32> %v9) #2
%v83 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v81, <16 x i32> %v82, <16 x i32> %v9) #2
%v84 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v80, <16 x i32> %v8, <16 x i32> %v76) #2
%v85 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v81, <16 x i32> %v8, <16 x i32> %v77) #2
%v86 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v85, <16 x i32> %v84) #2
%v87 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v75, <32 x i32> %v86, i32 -1) #2
%v88 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v47, <16 x i32> %v46, i32 1) #2
%v89 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v51, <16 x i32> %v47, i32 1) #2
%v90 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v88, <16 x i32> %v47) #2
%v91 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v89, <16 x i32> %v47) #2
%v92 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v90, <16 x i32> %v7) #2
%v93 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v91, <16 x i32> %v7) #2
%v94 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v92, <16 x i32> %v83, <16 x i32> %v9) #2
%v95 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v93, <16 x i32> %v94, <16 x i32> %v9) #2
%v96 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v92, <16 x i32> %v8, <16 x i32> %v88) #2
%v97 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v93, <16 x i32> %v8, <16 x i32> %v89) #2
%v92 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v90, <16 x i32> %v7) #2
%v93 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v91, <16 x i32> %v7) #2
%v94 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v92, <16 x i32> %v83, <16 x i32> %v9) #2
%v95 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v93, <16 x i32> %v94, <16 x i32> %v9) #2
%v96 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v92, <16 x i32> %v8, <16 x i32> %v88) #2
%v97 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v93, <16 x i32> %v8, <16 x i32> %v89) #2
%v98 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v97, <16 x i32> %v96) #2
%v99 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v87, <32 x i32> %v98, i32 -1) #2
%v100 = tail call <32 x i32> @llvm.hexagon.V6.vlutvwh(<16 x i32> %v95, <16 x i32> %v4, i32 0) #2
Expand Down
9 changes: 6 additions & 3 deletions llvm/test/CodeGen/Hexagon/v6-inlasm4.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ b0:
store i32 %a0, i32* %v0, align 4
store <16 x i32> %a1, <16 x i32>* %v1, align 64
%v3 = load i32, i32* %v0, align 4
%v4 = load <16 x i32>, <16 x i32>* %v2, align 64
call void asm sideeffect " $1 = vsetq($0);\0A", "r,q"(i32 %v3, <16 x i32> %v4) #1, !srcloc !0
%v4 = tail call <64 x i1> asm sideeffect " $0 = vsetq($1);\0A", "=q,r"(i32 %v3) #1, !srcloc !0
%v5 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v4, i32 -1)
store <16 x i32> %v5, <16 x i32>* %v2, align 64
ret void
}

Expand All @@ -23,7 +24,9 @@ b0:
ret i32 0
}

declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1

attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
attributes #1 = { nounwind }
attributes #1 = { nounwind readnone }

!0 = !{i32 222}
60 changes: 30 additions & 30 deletions llvm/test/CodeGen/Hexagon/v6-spill1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,10 @@ b3: ; preds = %b3, %b2
%v28 = bitcast i8* %v27 to <16 x i32>*
%v29 = load <16 x i32>, <16 x i32>* %v28, align 64, !tbaa !0
%v30 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v25, <16 x i32> %v14)
%v31 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v30, <16 x i32> %v1)
%v32 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v31, <16 x i32> %v3, <16 x i32> %v25)
%v31 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v30, <16 x i32> %v1)
%v32 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v31, <16 x i32> %v3, <16 x i32> %v25)
%v33 = tail call <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32> %v16, <16 x i32> %v32, i32 16843009)
%v34 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v31, <16 x i32> %v17, <16 x i32> %v2)
%v34 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v31, <16 x i32> %v17, <16 x i32> %v2)
%v35 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 1)
%v36 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v29, <16 x i32> %v25, i32 1)
%v37 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 2)
Expand All @@ -56,22 +56,22 @@ b3: ; preds = %b3, %b2
%v40 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v36, <16 x i32> %v14)
%v41 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v37, <16 x i32> %v14)
%v42 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v38, <16 x i32> %v14)
%v43 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v39, <16 x i32> %v1)
%v44 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v40, <16 x i32> %v1)
%v45 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v41, <16 x i32> %v1)
%v46 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v42, <16 x i32> %v1)
%v47 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v43, <16 x i32> %v3, <16 x i32> %v35)
%v48 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v44, <16 x i32> %v3, <16 x i32> %v36)
%v49 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v45, <16 x i32> %v3, <16 x i32> %v37)
%v50 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v46, <16 x i32> %v3, <16 x i32> %v38)
%v43 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v39, <16 x i32> %v1)
%v44 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v40, <16 x i32> %v1)
%v45 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v41, <16 x i32> %v1)
%v46 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v42, <16 x i32> %v1)
%v47 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v43, <16 x i32> %v3, <16 x i32> %v35)
%v48 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v44, <16 x i32> %v3, <16 x i32> %v36)
%v49 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v45, <16 x i32> %v3, <16 x i32> %v37)
%v50 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v46, <16 x i32> %v3, <16 x i32> %v38)
%v51 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v48, <16 x i32> %v47)
%v52 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v33, <32 x i32> %v51, i32 16843009)
%v53 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v50, <16 x i32> %v49)
%v54 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v52, <32 x i32> %v53, i32 16843009)
%v55 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v43, <16 x i32> %v34, <16 x i32> %v2)
%v56 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v44, <16 x i32> %v55, <16 x i32> %v2)
%v57 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v45, <16 x i32> %v56, <16 x i32> %v2)
%v58 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v46, <16 x i32> %v57, <16 x i32> %v2)
%v55 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v43, <16 x i32> %v34, <16 x i32> %v2)
%v56 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v44, <16 x i32> %v55, <16 x i32> %v2)
%v57 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v45, <16 x i32> %v56, <16 x i32> %v2)
%v58 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v46, <16 x i32> %v57, <16 x i32> %v2)
%v59 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 3)
%v60 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v29, <16 x i32> %v25, i32 3)
%v61 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 4)
Expand All @@ -80,22 +80,22 @@ b3: ; preds = %b3, %b2
%v64 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v60, <16 x i32> %v14)
%v65 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v61, <16 x i32> %v14)
%v66 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v62, <16 x i32> %v14)
%v67 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v63, <16 x i32> %v1)
%v68 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v64, <16 x i32> %v1)
%v69 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v65, <16 x i32> %v1)
%v70 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v66, <16 x i32> %v1)
%v71 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v67, <16 x i32> %v3, <16 x i32> %v59)
%v72 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v68, <16 x i32> %v3, <16 x i32> %v60)
%v73 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v69, <16 x i32> %v3, <16 x i32> %v61)
%v74 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v70, <16 x i32> %v3, <16 x i32> %v62)
%v67 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v63, <16 x i32> %v1)
%v68 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v64, <16 x i32> %v1)
%v69 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v65, <16 x i32> %v1)
%v70 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v66, <16 x i32> %v1)
%v71 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v67, <16 x i32> %v3, <16 x i32> %v59)
%v72 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v68, <16 x i32> %v3, <16 x i32> %v60)
%v73 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v69, <16 x i32> %v3, <16 x i32> %v61)
%v74 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v70, <16 x i32> %v3, <16 x i32> %v62)
%v75 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v72, <16 x i32> %v71)
%v76 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v54, <32 x i32> %v75, i32 16843009)
%v77 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v74, <16 x i32> %v73)
%v78 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v76, <32 x i32> %v77, i32 16843009)
%v79 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v67, <16 x i32> %v58, <16 x i32> %v2)
%v80 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v68, <16 x i32> %v79, <16 x i32> %v2)
%v81 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v69, <16 x i32> %v80, <16 x i32> %v2)
%v82 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v70, <16 x i32> %v81, <16 x i32> %v2)
%v79 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v67, <16 x i32> %v58, <16 x i32> %v2)
%v80 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v68, <16 x i32> %v79, <16 x i32> %v2)
%v81 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v69, <16 x i32> %v80, <16 x i32> %v2)
%v82 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v70, <16 x i32> %v81, <16 x i32> %v2)
%v83 = add nsw i32 %v15, 1
%v84 = icmp eq i32 %v83, 5
br i1 %v84, label %b4, label %b3
Expand Down Expand Up @@ -147,16 +147,16 @@ declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32>, <16 x i32>, i32) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1>, <16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32) #1
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/Hexagon/v6-unaligned-spill.ll
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ b3: ; preds = %b2

b4: ; preds = %b4, %b3
%v3 = phi <32 x i32> [ %v5, %b4 ], [ undef, %b3 ]
%v4 = tail call <32 x i32> @llvm.hexagon.V6.vsubhnq.128B(<1024 x i1> undef, <32 x i32> undef, <32 x i32> %v3) #2
%v4 = tail call <32 x i32> @llvm.hexagon.V6.vsubhnq.128B(<128 x i1> undef, <32 x i32> undef, <32 x i32> %v3) #2
%v5 = tail call <32 x i32> @llvm.hexagon.V6.vavguh.128B(<32 x i32> %v3, <32 x i32> %v2) #2
br label %b4

Expand All @@ -43,7 +43,7 @@ declare void @f1(i8* nocapture readonly, i8* nocapture readonly, i8* nocapture,
declare <32 x i32> @llvm.hexagon.V6.vd0.128B() #1

; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vsubhnq.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
declare <32 x i32> @llvm.hexagon.V6.vsubhnq.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1

; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vavguh.128B(<32 x i32>, <32 x i32>) #1
Expand Down
75 changes: 39 additions & 36 deletions llvm/test/CodeGen/Hexagon/v6-vecpred-copy.ll
Original file line number Diff line number Diff line change
Expand Up @@ -32,76 +32,76 @@ b0:
%v2 = call <16 x i32> @llvm.hexagon.V6.vd0()
store <16 x i32> %v2, <16 x i32>* @g2, align 64
%v3 = load <16 x i32>, <16 x i32>* @g3, align 64
%v4 = bitcast <16 x i32> %v3 to <512 x i1>
%v4 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v3, i32 -1)
%v5 = load <16 x i32>, <16 x i32>* @g2, align 64
%v6 = load <16 x i32>, <16 x i32>* @g1, align 64
%v7 = call <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1> %v4, <16 x i32> %v5, <16 x i32> %v6)
%v7 = call <16 x i32> @llvm.hexagon.V6.vaddbq(<64 x i1> %v4, <16 x i32> %v5, <16 x i32> %v6)
store <16 x i32> %v7, <16 x i32>* @g2, align 64
%v8 = load <16 x i32>, <16 x i32>* @g3, align 64
%v9 = bitcast <16 x i32> %v8 to <512 x i1>
%v9 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v8, i32 -1)
%v10 = load <16 x i32>, <16 x i32>* @g2, align 64
%v11 = load <16 x i32>, <16 x i32>* @g1, align 64
%v12 = call <16 x i32> @llvm.hexagon.V6.vsubbq(<512 x i1> %v9, <16 x i32> %v10, <16 x i32> %v11)
%v12 = call <16 x i32> @llvm.hexagon.V6.vsubbq(<64 x i1> %v9, <16 x i32> %v10, <16 x i32> %v11)
store <16 x i32> %v12, <16 x i32>* @g2, align 64
%v13 = load <16 x i32>, <16 x i32>* @g3, align 64
%v14 = bitcast <16 x i32> %v13 to <512 x i1>
%v14 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v13, i32 -1)
%v15 = load <16 x i32>, <16 x i32>* @g2, align 64
%v16 = load <16 x i32>, <16 x i32>* @g1, align 64
%v17 = call <16 x i32> @llvm.hexagon.V6.vaddhq(<512 x i1> %v14, <16 x i32> %v15, <16 x i32> %v16)
%v17 = call <16 x i32> @llvm.hexagon.V6.vaddhq(<64 x i1> %v14, <16 x i32> %v15, <16 x i32> %v16)
store <16 x i32> %v17, <16 x i32>* @g2, align 64
%v18 = load <16 x i32>, <16 x i32>* @g3, align 64
%v19 = bitcast <16 x i32> %v18 to <512 x i1>
%v19 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v18, i32 -1)
%v20 = load <16 x i32>, <16 x i32>* @g2, align 64
%v21 = load <16 x i32>, <16 x i32>* @g1, align 64
%v22 = call <16 x i32> @llvm.hexagon.V6.vsubhq(<512 x i1> %v19, <16 x i32> %v20, <16 x i32> %v21)
%v22 = call <16 x i32> @llvm.hexagon.V6.vsubhq(<64 x i1> %v19, <16 x i32> %v20, <16 x i32> %v21)
store <16 x i32> %v22, <16 x i32>* @g2, align 64
%v23 = load <16 x i32>, <16 x i32>* @g3, align 64
%v24 = bitcast <16 x i32> %v23 to <512 x i1>
%v24 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v23, i32 -1)
%v25 = load <16 x i32>, <16 x i32>* @g2, align 64
%v26 = load <16 x i32>, <16 x i32>* @g1, align 64
%v27 = call <16 x i32> @llvm.hexagon.V6.vaddwq(<512 x i1> %v24, <16 x i32> %v25, <16 x i32> %v26)
%v27 = call <16 x i32> @llvm.hexagon.V6.vaddwq(<64 x i1> %v24, <16 x i32> %v25, <16 x i32> %v26)
store <16 x i32> %v27, <16 x i32>* @g2, align 64
%v28 = load <16 x i32>, <16 x i32>* @g3, align 64
%v29 = bitcast <16 x i32> %v28 to <512 x i1>
%v29 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v28, i32 -1)
%v30 = load <16 x i32>, <16 x i32>* @g2, align 64
%v31 = load <16 x i32>, <16 x i32>* @g1, align 64
%v32 = call <16 x i32> @llvm.hexagon.V6.vsubwq(<512 x i1> %v29, <16 x i32> %v30, <16 x i32> %v31)
%v32 = call <16 x i32> @llvm.hexagon.V6.vsubwq(<64 x i1> %v29, <16 x i32> %v30, <16 x i32> %v31)
store <16 x i32> %v32, <16 x i32>* @g2, align 64
%v33 = load <16 x i32>, <16 x i32>* @g3, align 64
%v34 = bitcast <16 x i32> %v33 to <512 x i1>
%v34 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v33, i32 -1)
%v35 = load <16 x i32>, <16 x i32>* @g2, align 64
%v36 = load <16 x i32>, <16 x i32>* @g1, align 64
%v37 = call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v34, <16 x i32> %v35, <16 x i32> %v36)
%v37 = call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v34, <16 x i32> %v35, <16 x i32> %v36)
store <16 x i32> %v37, <16 x i32>* @g2, align 64
%v38 = load <16 x i32>, <16 x i32>* @g3, align 64
%v39 = bitcast <16 x i32> %v38 to <512 x i1>
%v39 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v38, i32 -1)
%v40 = load <16 x i32>, <16 x i32>* @g2, align 64
%v41 = load <16 x i32>, <16 x i32>* @g1, align 64
%v42 = call <16 x i32> @llvm.hexagon.V6.vsubbnq(<512 x i1> %v39, <16 x i32> %v40, <16 x i32> %v41)
%v42 = call <16 x i32> @llvm.hexagon.V6.vsubbnq(<64 x i1> %v39, <16 x i32> %v40, <16 x i32> %v41)
store <16 x i32> %v42, <16 x i32>* @g2, align 64
%v43 = load <16 x i32>, <16 x i32>* @g3, align 64
%v44 = bitcast <16 x i32> %v43 to <512 x i1>
%v44 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v43, i32 -1)
%v45 = load <16 x i32>, <16 x i32>* @g2, align 64
%v46 = load <16 x i32>, <16 x i32>* @g1, align 64
%v47 = call <16 x i32> @llvm.hexagon.V6.vaddhnq(<512 x i1> %v44, <16 x i32> %v45, <16 x i32> %v46)
%v47 = call <16 x i32> @llvm.hexagon.V6.vaddhnq(<64 x i1> %v44, <16 x i32> %v45, <16 x i32> %v46)
store <16 x i32> %v47, <16 x i32>* @g2, align 64
%v48 = load <16 x i32>, <16 x i32>* @g3, align 64
%v49 = bitcast <16 x i32> %v48 to <512 x i1>
%v49 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v48, i32 -1)
%v50 = load <16 x i32>, <16 x i32>* @g2, align 64
%v51 = load <16 x i32>, <16 x i32>* @g1, align 64
%v52 = call <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1> %v49, <16 x i32> %v50, <16 x i32> %v51)
%v52 = call <16 x i32> @llvm.hexagon.V6.vsubhnq(<64 x i1> %v49, <16 x i32> %v50, <16 x i32> %v51)
store <16 x i32> %v52, <16 x i32>* @g2, align 64
%v53 = load <16 x i32>, <16 x i32>* @g3, align 64
%v54 = bitcast <16 x i32> %v53 to <512 x i1>
%v54 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v53, i32 -1)
%v55 = load <16 x i32>, <16 x i32>* @g2, align 64
%v56 = load <16 x i32>, <16 x i32>* @g1, align 64
%v57 = call <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1> %v54, <16 x i32> %v55, <16 x i32> %v56)
%v57 = call <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1> %v54, <16 x i32> %v55, <16 x i32> %v56)
store <16 x i32> %v57, <16 x i32>* @g2, align 64
%v58 = load <16 x i32>, <16 x i32>* @g3, align 64
%v59 = bitcast <16 x i32> %v58 to <512 x i1>
%v59 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v58, i32 -1)
%v60 = load <16 x i32>, <16 x i32>* @g2, align 64
%v61 = load <16 x i32>, <16 x i32>* @g1, align 64
%v62 = call <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1> %v59, <16 x i32> %v60, <16 x i32> %v61)
%v62 = call <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1> %v59, <16 x i32> %v60, <16 x i32> %v61)
store <16 x i32> %v62, <16 x i32>* @g2, align 64
ret i32 0
}
Expand All @@ -110,40 +110,43 @@ b0:
declare <16 x i32> @llvm.hexagon.V6.vd0() #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vaddbq(<64 x i1>, <16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vsubbq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vsubbq(<64 x i1>, <16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddhq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vaddhq(<64 x i1>, <16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vsubhq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vsubhq(<64 x i1>, <16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddwq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vaddwq(<64 x i1>, <16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vsubwq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vsubwq(<64 x i1>, <16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1>, <16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vsubbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vsubbnq(<64 x i1>, <16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddhnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vaddhnq(<64 x i1>, <16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<64 x i1>, <16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1>, <16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1>, <16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1

attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
attributes #1 = { nounwind readnone }
60 changes: 30 additions & 30 deletions llvm/test/CodeGen/Hexagon/v60-vecpred-spill.ll
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ b3: ; preds = %b3, %b2
%v28 = bitcast i8* %v27 to <16 x i32>*
%v29 = load <16 x i32>, <16 x i32>* %v28, align 64, !tbaa !0
%v30 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v25, <16 x i32> %v14)
%v31 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v30, <16 x i32> %v1)
%v32 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v31, <16 x i32> %v3, <16 x i32> %v25)
%v31 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v30, <16 x i32> %v1)
%v32 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v31, <16 x i32> %v3, <16 x i32> %v25)
%v33 = tail call <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32> %v16, <16 x i32> %v32, i32 16843009)
%v34 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v31, <16 x i32> %v17, <16 x i32> %v2)
%v34 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v31, <16 x i32> %v17, <16 x i32> %v2)
%v35 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 1)
%v36 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v29, <16 x i32> %v25, i32 1)
%v37 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 2)
Expand All @@ -60,22 +60,22 @@ b3: ; preds = %b3, %b2
%v40 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v36, <16 x i32> %v14)
%v41 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v37, <16 x i32> %v14)
%v42 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v38, <16 x i32> %v14)
%v43 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v39, <16 x i32> %v1)
%v44 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v40, <16 x i32> %v1)
%v45 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v41, <16 x i32> %v1)
%v46 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v42, <16 x i32> %v1)
%v47 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v43, <16 x i32> %v3, <16 x i32> %v35)
%v48 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v44, <16 x i32> %v3, <16 x i32> %v36)
%v49 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v45, <16 x i32> %v3, <16 x i32> %v37)
%v50 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v46, <16 x i32> %v3, <16 x i32> %v38)
%v43 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v39, <16 x i32> %v1)
%v44 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v40, <16 x i32> %v1)
%v45 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v41, <16 x i32> %v1)
%v46 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v42, <16 x i32> %v1)
%v47 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v43, <16 x i32> %v3, <16 x i32> %v35)
%v48 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v44, <16 x i32> %v3, <16 x i32> %v36)
%v49 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v45, <16 x i32> %v3, <16 x i32> %v37)
%v50 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v46, <16 x i32> %v3, <16 x i32> %v38)
%v51 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v48, <16 x i32> %v47)
%v52 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v33, <32 x i32> %v51, i32 16843009)
%v53 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v50, <16 x i32> %v49)
%v54 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v52, <32 x i32> %v53, i32 16843009)
%v55 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v43, <16 x i32> %v34, <16 x i32> %v2)
%v56 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v44, <16 x i32> %v55, <16 x i32> %v2)
%v57 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v45, <16 x i32> %v56, <16 x i32> %v2)
%v58 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v46, <16 x i32> %v57, <16 x i32> %v2)
%v55 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v43, <16 x i32> %v34, <16 x i32> %v2)
%v56 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v44, <16 x i32> %v55, <16 x i32> %v2)
%v57 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v45, <16 x i32> %v56, <16 x i32> %v2)
%v58 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v46, <16 x i32> %v57, <16 x i32> %v2)
%v59 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 3)
%v60 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v29, <16 x i32> %v25, i32 3)
%v61 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 4)
Expand All @@ -84,22 +84,22 @@ b3: ; preds = %b3, %b2
%v64 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v60, <16 x i32> %v14)
%v65 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v61, <16 x i32> %v14)
%v66 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v62, <16 x i32> %v14)
%v67 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v63, <16 x i32> %v1)
%v68 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v64, <16 x i32> %v1)
%v69 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v65, <16 x i32> %v1)
%v70 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v66, <16 x i32> %v1)
%v71 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v67, <16 x i32> %v3, <16 x i32> %v59)
%v72 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v68, <16 x i32> %v3, <16 x i32> %v60)
%v73 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v69, <16 x i32> %v3, <16 x i32> %v61)
%v74 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v70, <16 x i32> %v3, <16 x i32> %v62)
%v67 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v63, <16 x i32> %v1)
%v68 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v64, <16 x i32> %v1)
%v69 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v65, <16 x i32> %v1)
%v70 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v66, <16 x i32> %v1)
%v71 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v67, <16 x i32> %v3, <16 x i32> %v59)
%v72 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v68, <16 x i32> %v3, <16 x i32> %v60)
%v73 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v69, <16 x i32> %v3, <16 x i32> %v61)
%v74 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v70, <16 x i32> %v3, <16 x i32> %v62)
%v75 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v72, <16 x i32> %v71)
%v76 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v54, <32 x i32> %v75, i32 16843009)
%v77 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v74, <16 x i32> %v73)
%v78 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v76, <32 x i32> %v77, i32 16843009)
%v79 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v67, <16 x i32> %v58, <16 x i32> %v2)
%v80 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v68, <16 x i32> %v79, <16 x i32> %v2)
%v81 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v69, <16 x i32> %v80, <16 x i32> %v2)
%v82 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v70, <16 x i32> %v81, <16 x i32> %v2)
%v79 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v67, <16 x i32> %v58, <16 x i32> %v2)
%v80 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v68, <16 x i32> %v79, <16 x i32> %v2)
%v81 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v69, <16 x i32> %v80, <16 x i32> %v2)
%v82 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v70, <16 x i32> %v81, <16 x i32> %v2)
%v83 = add nsw i32 %v15, 1
%v84 = icmp eq i32 %v83, 5
br i1 %v84, label %b4, label %b3
Expand Down Expand Up @@ -151,16 +151,16 @@ declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32>, <16 x i32>, i32) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1>, <16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32) #1
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/Hexagon/v60-vsel1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,17 @@ entry:
%add = add i32 %sub, %rem
%2 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 -1)
%3 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
%4 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %add)
%5 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %3, <512 x i1> %4, i32 12)
%4 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %add)
%5 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %3, <64 x i1> %4, i32 12)
%and4 = and i32 %add, 511
%cmp = icmp eq i32 %and4, 0
%sMaskR.0 = select i1 %cmp, <16 x i32> %2, <16 x i32> %5
%cmp547 = icmp sgt i32 %add, 0
br i1 %cmp547, label %for.body.lr.ph, label %for.end

for.body.lr.ph: ; preds = %entry
%6 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %boundary)
%7 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1> %6, i32 16843009)
%6 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %boundary)
%7 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %6, i32 16843009)
%8 = tail call <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32> %7)
%9 = add i32 %rem, %xsize
%10 = add i32 %9, -1
Expand Down Expand Up @@ -59,9 +59,9 @@ for.end: ; preds = %for.cond.for.end_cr
}

declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <512 x i1>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
declare <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <64 x i1>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vand(<16 x i32>, <16 x i32>) #1

Expand Down
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/Hexagon/v60-vsel2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ b0:
%v4 = add i32 %v2, %v3
%v5 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 -1)
%v6 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
%v7 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v4)
%v8 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %v6, <512 x i1> %v7, i32 12)
%v7 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v4)
%v8 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %v6, <64 x i1> %v7, i32 12)
%v9 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v8, <16 x i32> %v8)
%v10 = and i32 %v4, 511
%v11 = icmp eq i32 %v10, 0
Expand All @@ -31,8 +31,8 @@ b2: ; preds = %b1, %b0
br i1 %v14, label %b3, label %b6

b3: ; preds = %b2
%v15 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a5)
%v16 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1> %v15, i32 16843009)
%v15 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a5)
%v16 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v15, i32 16843009)
%v17 = tail call <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32> %v16)
%v18 = add i32 %v3, %a1
%v19 = add i32 %v18, -1
Expand Down Expand Up @@ -71,16 +71,16 @@ b6: ; preds = %b5, %b2
declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1

; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
declare <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <512 x i1>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <64 x i1>, i32) #1

; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1

; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32>) #1
Expand Down
Loading