Skip to content

Commit

Permalink
[AArch64] Additional testing for uqshl and regenerate arm64-vshift.ll…
Browse files Browse the repository at this point in the history
…. NFC

This tries to fill in some missing testing for neon shift intrinsics, and
regenerates the existing tests. See D148309 and D148311.
  • Loading branch information
davemgreen committed May 16, 2023
1 parent a423b7f commit d20afbd
Show file tree
Hide file tree
Showing 3 changed files with 2,611 additions and 1,487 deletions.
124 changes: 124 additions & 0 deletions llvm/test/CodeGen/AArch64/add-extract.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s

define i64 @add_i64_ext_load(<1 x i64> %A, ptr %B) nounwind {
; CHECK-LABEL: add_i64_ext_load:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: fmov x9, d0
; CHECK-NEXT: add x0, x9, x8
; CHECK-NEXT: ret
%a = extractelement <1 x i64> %A, i32 0
%b = load i64, ptr %B
%c = add i64 %a, %b
ret i64 %c
}

define i64 @sub_i64_ext_load(<1 x i64> %A, ptr %B) nounwind {
; CHECK-LABEL: sub_i64_ext_load:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: fmov x9, d0
; CHECK-NEXT: sub x0, x9, x8
; CHECK-NEXT: ret
%a = extractelement <1 x i64> %A, i32 0
%b = load i64, ptr %B
%c = sub i64 %a, %b
ret i64 %c
}

define void @add_i64_ext_load_store(<1 x i64> %A, ptr %B) nounwind {
; CHECK-LABEL: add_i64_ext_load_store:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: fmov x9, d0
; CHECK-NEXT: add x8, x9, x8
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: ret
%a = extractelement <1 x i64> %A, i32 0
%b = load i64, ptr %B
%c = add i64 %a, %b
store i64 %c, ptr %B
ret void
}

define i64 @add_v2i64_ext_load(<2 x i64> %A, ptr %B) nounwind {
; CHECK-LABEL: add_v2i64_ext_load:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: fmov x9, d0
; CHECK-NEXT: add x0, x9, x8
; CHECK-NEXT: ret
%a = extractelement <2 x i64> %A, i32 0
%b = load i64, ptr %B
%c = add i64 %a, %b
ret i64 %c
}

define i64 @add_i64_ext_ext(<1 x i64> %A, <1 x i64> %B) nounwind {
; CHECK-LABEL: add_i64_ext_ext:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: fmov x9, d1
; CHECK-NEXT: add x0, x8, x9
; CHECK-NEXT: ret
%a = extractelement <1 x i64> %A, i32 0
%b = extractelement <1 x i64> %B, i32 0
%c = add i64 %a, %b
ret i64 %c
}

define i32 @add_i32_ext_load(<1 x i32> %A, ptr %B) nounwind {
; CHECK-LABEL: add_i32_ext_load:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: ldr w8, [x0]
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: add w0, w9, w8
; CHECK-NEXT: ret
%a = extractelement <1 x i32> %A, i32 0
%b = load i32, ptr %B
%c = add i32 %a, %b
ret i32 %c
}

define i64 @add_i64_ext_ext_test1(<1 x i64> %A, <2 x i64> %B) nounwind {
; CHECK-LABEL: add_i64_ext_ext_test1:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov x8, v1.d[1]
; CHECK-NEXT: fmov x9, d0
; CHECK-NEXT: fmov x10, d1
; CHECK-NEXT: add x9, x9, x10
; CHECK-NEXT: add x0, x9, x8
; CHECK-NEXT: ret
%a = extractelement <1 x i64> %A, i32 0
%b = extractelement <2 x i64> %B, i32 0
%c = extractelement <2 x i64> %B, i32 1
%d = add i64 %a, %b
%e = add i64 %d, %c
ret i64 %e
}

define i64 @sub_i64_ext_ext_test1(<1 x i64> %A, <2 x i64> %B) nounwind {
; CHECK-LABEL: sub_i64_ext_ext_test1:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov x8, v1.d[1]
; CHECK-NEXT: fmov x9, d0
; CHECK-NEXT: fmov x10, d1
; CHECK-NEXT: sub x9, x9, x10
; CHECK-NEXT: sub x0, x9, x8
; CHECK-NEXT: ret
%a = extractelement <1 x i64> %A, i32 0
%b = extractelement <2 x i64> %B, i32 0
%c = extractelement <2 x i64> %B, i32 1
%d = sub i64 %a, %b
%e = sub i64 %d, %c
ret i64 %e
}
40 changes: 37 additions & 3 deletions llvm/test/CodeGen/AArch64/arm64-sqshl-uqshl-i64Contant.ll
Original file line number Diff line number Diff line change
@@ -1,19 +1,53 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi | FileCheck %s

; Check if sqshl/uqshl with constant shift amout can be selected.
; Check if sqshl/uqshl with constant shift amount can be selected.
define i64 @test_vqshld_s64_i(i64 %a) {
; CHECK-LABEL: test_vqshld_s64_i:
; CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, #36
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: sqshl d0, d0, #36
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%1 = tail call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 36)
ret i64 %1
}

define i64 @test_vqshld_u64_i(i64 %a) {
; CHECK-LABEL: test_vqshld_u64_i:
; CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, #36
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: uqshl d0, d0, #36
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%1 = tail call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 36)
ret i64 %1
}

define i32 @test_vqshld_s32_i(i32 %a) {
; CHECK-LABEL: test_vqshld_s32_i:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: sqshl s0, s0, #16
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%1 = tail call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 16)
ret i32 %1
}

define i32 @test_vqshld_u32_i(i32 %a) {
; CHECK-LABEL: test_vqshld_u32_i:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: uqshl s0, s0, #16
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%1 = tail call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 16)
ret i32 %1
}

declare i64 @llvm.aarch64.neon.uqshl.i64(i64, i64)
declare i64 @llvm.aarch64.neon.sqshl.i64(i64, i64)

declare i32 @llvm.aarch64.neon.uqshl.i32(i32, i32)
declare i32 @llvm.aarch64.neon.sqshl.i32(i32, i32)

0 comments on commit d20afbd

Please sign in to comment.