64 changes: 43 additions & 21 deletions llvm/test/CodeGen/ARM/vcombine.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,46 @@
; RUN: llc -mtriple=armeb-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BE

define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
; CHECK: vcombine8
; CHECK-LE: vmov r0, r1, d16
; CHECK-LE: vmov r2, r3, d17
; CHECK-BE: vmov r1, r0, d16
; CHECK-BE: vmov r3, r2, d17
; CHECK-LABEL: vcombine8
; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0]
; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1]

; CHECK-LE-DAG: vmov r0, r1, [[LD0]]
; CHECK-LE-DAG: vmov r2, r3, [[LD1]]

; CHECK-BE-DAG: vmov r1, r0, d16
; CHECK-BE-DAG: vmov r3, r2, d17
%tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %tmp3
}

define <8 x i16> @vcombine16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
; CHECK: vcombine16
; CHECK-LE: vmov r0, r1, d16
; CHECK-LE: vmov r2, r3, d17
; CHECK-BE: vmov r1, r0, d16
; CHECK-BE: vmov r3, r2, d17
; CHECK-LABEL: vcombine16
; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0]
; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1]

; CHECK-LE-DAG: vmov r0, r1, [[LD0]]
; CHECK-LE-DAG: vmov r2, r3, [[LD1]]

; CHECK-BE-DAG: vmov r1, r0, d16
; CHECK-BE-DAG: vmov r3, r2, d17
%tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %tmp3
}

define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
; CHECK: vcombine32
; CHECK-LE: vmov r0, r1, d16
; CHECK-LE: vmov r2, r3, d17
; CHECK-LABEL: vcombine32

; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0]
; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1]

; CHECK-LE: vmov r0, r1, [[LD0]]
; CHECK-LE: vmov r2, r3, [[LD1]]

; CHECK-BE: vmov r1, r0, d16
; CHECK-BE: vmov r3, r2, d17
%tmp1 = load <2 x i32>, <2 x i32>* %A
Expand All @@ -38,9 +51,14 @@ define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
}

define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind {
; CHECK: vcombinefloat
; CHECK-LE: vmov r0, r1, d16
; CHECK-LE: vmov r2, r3, d17
; CHECK-LABEL: vcombinefloat

; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0]
; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1]

; CHECK-LE: vmov r0, r1, [[LD0]]
; CHECK-LE: vmov r2, r3, [[LD1]]

; CHECK-BE: vmov r1, r0, d16
; CHECK-BE: vmov r3, r2, d17
%tmp1 = load <2 x float>, <2 x float>* %A
Expand All @@ -50,11 +68,15 @@ define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind {
}

define <2 x i64> @vcombine64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
; CHECK: vcombine64
; CHECK-LE: vmov r0, r1, d16
; CHECK-LE: vmov r2, r3, d17
; CHECK-BE: vmov r1, r0, d16
; CHECK-BE: vmov r3, r2, d17
; CHECK-LABEL: vcombine64
; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0]
; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1]

; CHECK-LE: vmov r0, r1, [[LD0]]
; CHECK-LE: vmov r2, r3, [[LD1]]

; CHECK-BE: vmov r1, r0, [[LD0]]
; CHECK-BE: vmov r3, r2, [[LD1]]
%tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = shufflevector <1 x i64> %tmp1, <1 x i64> %tmp2, <2 x i32> <i32 0, i32 1>
Expand Down
50 changes: 25 additions & 25 deletions llvm/test/CodeGen/ARM/vtrn.ll
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @vtrni8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
; CHECK-LABEL: vtrni8_Qres:
; CHECK: @ BB#0:
; CHECK-NEXT: vldr d17, [r1]
; CHECK-NEXT: vldr d16, [r0]
; CHECK-NEXT: vtrn.8 d16, d17
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
; CHECK-NEXT: vtrn.8 [[LDR0]], [[LDR1]]
; CHECK-NEXT: vmov r0, r1, [[LDR0]]
; CHECK-NEXT: vmov r2, r3, [[LDR1]]
; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = load <8 x i8>, <8 x i8>* %B
Expand Down Expand Up @@ -52,11 +52,11 @@ define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <8 x i16> @vtrni16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind {
; CHECK-LABEL: vtrni16_Qres:
; CHECK: @ BB#0:
; CHECK-NEXT: vldr d17, [r1]
; CHECK-NEXT: vldr d16, [r0]
; CHECK-NEXT: vtrn.16 d16, d17
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
; CHECK-NEXT: vtrn.16 [[LDR0]], [[LDR1]]
; CHECK-NEXT: vmov r0, r1, [[LDR0]]
; CHECK-NEXT: vmov r2, r3, [[LDR1]]
; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = load <4 x i16>, <4 x i16>* %B
Expand Down Expand Up @@ -84,11 +84,11 @@ define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <4 x i32> @vtrni32_Qres(<2 x i32>* %A, <2 x i32>* %B) nounwind {
; CHECK-LABEL: vtrni32_Qres:
; CHECK: @ BB#0:
; CHECK-NEXT: vldr d17, [r1]
; CHECK-NEXT: vldr d16, [r0]
; CHECK-NEXT: vtrn.32 d16, d17
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
; CHECK-NEXT: vtrn.32 [[LDR0]], [[LDR1]]
; CHECK-NEXT: vmov r0, r1, [[LDR0]]
; CHECK-NEXT: vmov r2, r3, [[LDR1]]
; CHECK-NEXT: mov pc, lr
%tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = load <2 x i32>, <2 x i32>* %B
Expand Down Expand Up @@ -116,11 +116,11 @@ define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind {
define <4 x float> @vtrnf_Qres(<2 x float>* %A, <2 x float>* %B) nounwind {
; CHECK-LABEL: vtrnf_Qres:
; CHECK: @ BB#0:
; CHECK-NEXT: vldr d17, [r1]
; CHECK-NEXT: vldr d16, [r0]
; CHECK-NEXT: vtrn.32 d16, d17
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
; CHECK-NEXT: vtrn.32 [[LDR0]], [[LDR1]]
; CHECK-NEXT: vmov r0, r1, [[LDR0]]
; CHECK-NEXT: vmov r2, r3, [[LDR1]]
; CHECK-NEXT: mov pc, lr
%tmp1 = load <2 x float>, <2 x float>* %A
%tmp2 = load <2 x float>, <2 x float>* %B
Expand Down Expand Up @@ -281,11 +281,11 @@ define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @vtrni8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
; CHECK-LABEL: vtrni8_undef_Qres:
; CHECK: @ BB#0:
; CHECK-NEXT: vldr d17, [r1]
; CHECK-NEXT: vldr d16, [r0]
; CHECK-NEXT: vtrn.8 d16, d17
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
; CHECK-NEXT: vtrn.8 [[LDR0]], [[LDR1]]
; CHECK-NEXT: vmov r0, r1, [[LDR0]]
; CHECK-NEXT: vmov r2, r3, [[LDR1]]
; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = load <8 x i8>, <8 x i8>* %B
Expand Down
30 changes: 15 additions & 15 deletions llvm/test/CodeGen/ARM/vuzp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @vuzpi8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
; CHECK-LABEL: vuzpi8_Qres:
; CHECK: @ BB#0:
; CHECK-NEXT: vldr d17, [r1]
; CHECK-NEXT: vldr d16, [r0]
; CHECK-NEXT: vuzp.8 d16, d17
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
; CHECK-NEXT: vuzp.8 [[LDR0]], [[LDR1]]
; CHECK-NEXT: vmov r0, r1, [[LDR0]]
; CHECK-NEXT: vmov r2, r3, [[LDR1]]
; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = load <8 x i8>, <8 x i8>* %B
Expand Down Expand Up @@ -52,11 +52,11 @@ define <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <8 x i16> @vuzpi16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind {
; CHECK-LABEL: vuzpi16_Qres:
; CHECK: @ BB#0:
; CHECK-NEXT: vldr d17, [r1]
; CHECK-NEXT: vldr d16, [r0]
; CHECK-NEXT: vuzp.16 d16, d17
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
; CHECK-NEXT: vuzp.16 [[LDR0]], [[LDR1]]
; CHECK-NEXT: vmov r0, r1, [[LDR0]]
; CHECK-NEXT: vmov r2, r3, [[LDR1]]
; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = load <4 x i16>, <4 x i16>* %B
Expand Down Expand Up @@ -220,11 +220,11 @@ define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @vuzpi8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
; CHECK-LABEL: vuzpi8_undef_Qres:
; CHECK: @ BB#0:
; CHECK-NEXT: vldr d17, [r1]
; CHECK-NEXT: vldr d16, [r0]
; CHECK-NEXT: vuzp.8 d16, d17
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
; CHECK-NEXT: vuzp.8 [[LDR0]], [[LDR1]]
; CHECK-NEXT: vmov r0, r1, [[LDR0]]
; CHECK-NEXT: vmov r2, r3, [[LDR1]]
; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = load <8 x i8>, <8 x i8>* %B
Expand Down
30 changes: 15 additions & 15 deletions llvm/test/CodeGen/ARM/vzip.ll
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @vzipi8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
; CHECK-LABEL: vzipi8_Qres:
; CHECK: @ BB#0:
; CHECK-NEXT: vldr d17, [r1]
; CHECK-NEXT: vldr d16, [r0]
; CHECK-NEXT: vzip.8 d16, d17
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
; CHECK-NEXT: vzip.8 [[LDR0]], [[LDR1]]
; CHECK-NEXT: vmov r0, r1, [[LDR0]]
; CHECK-NEXT: vmov r2, r3, [[LDR1]]
; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = load <8 x i8>, <8 x i8>* %B
Expand Down Expand Up @@ -52,11 +52,11 @@ define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <8 x i16> @vzipi16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind {
; CHECK-LABEL: vzipi16_Qres:
; CHECK: @ BB#0:
; CHECK-NEXT: vldr d17, [r1]
; CHECK-NEXT: vldr d16, [r0]
; CHECK-NEXT: vzip.16 d16, d17
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
; CHECK-NEXT: vzip.16 [[LDR0]], [[LDR1]]
; CHECK-NEXT: vmov r0, r1, [[LDR0]]
; CHECK-NEXT: vmov r2, r3, [[LDR1]]
; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = load <4 x i16>, <4 x i16>* %B
Expand Down Expand Up @@ -220,11 +220,11 @@ define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @vzipi8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
; CHECK-LABEL: vzipi8_undef_Qres:
; CHECK: @ BB#0:
; CHECK-NEXT: vldr d17, [r1]
; CHECK-NEXT: vldr d16, [r0]
; CHECK-NEXT: vzip.8 d16, d17
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
; CHECK-NEXT: vzip.8 [[LDR0]], [[LDR1]]
; CHECK-NEXT: vmov r0, r1, [[LDR0]]
; CHECK-NEXT: vmov r2, r3, [[LDR1]]
; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = load <8 x i8>, <8 x i8>* %B
Expand Down