Skip to content

Commit

Permalink
[AArch64] Add nvcast patterns for v4f16 and v8f16
Browse files Browse the repository at this point in the history
Summary:
Constant stores of f16 vectors can create NvCast nodes from various
operand types to v4f16 or v8f16 depending on patterns in the stored
constants.  This patch adds nvcast rules with v4f16 and v8f16 values.

AArchISelLowering::LowerBUILD_VECTOR has the details on which constant
patterns generate the nvcast nodes.

Reviewers: jmolloy, srhines, ab

Subscribers: rengolin, aemerson, llvm-commits

Differential Revision: http://reviews.llvm.org/D9201

llvm-svn: 235610
  • Loading branch information
pirama-arumuga-nainar committed Apr 23, 2015
1 parent b188153 commit 745615c
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 0 deletions.
8 changes: 8 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Expand Up @@ -5128,22 +5128,26 @@ def : Pat<(trap), (BRK 1)>;
// Natural vector casts (64 bit)
def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4f16 (AArch64NvCast (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2f32 (AArch64NvCast (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;

def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4f16 (AArch64NvCast (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v1i64 (AArch64NvCast (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;

def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4f16 (AArch64NvCast (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;

def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4f16 (AArch64NvCast (f64 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2f32 (AArch64NvCast (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
Expand All @@ -5158,22 +5162,26 @@ def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
// Natural vector casts (128 bit)
def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8f16 (AArch64NvCast (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;

def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8f16 (AArch64NvCast (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;

def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8f16 (AArch64NvCast (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;

def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8f16 (AArch64NvCast (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
Expand Down
89 changes: 89 additions & 0 deletions llvm/test/CodeGen/AArch64/fp16-vector-nvcast.ll
@@ -0,0 +1,89 @@
; RUN: llc < %s -asm-verbose=false -mtriple=aarch64-none-eabi | FileCheck %s

; Test pattern (v4f16 (AArch64NvCast (v2i32 FPR64:$src)))
define void @nvcast_v2i32(<4 x half>* %a) #0 {
; CHECK-LABEL: nvcast_v2i32:
; CHECK-NEXT: movi v[[REG:[0-9]+]].2s, #0xab, lsl #16
; CHECK-NEXT: str d[[REG]], [x0]
; CHECK-NEXT: ret
store volatile <4 x half> <half 0xH0000, half 0xH00AB, half 0xH0000, half 0xH00AB>, <4 x half>* %a
ret void
}


; Test pattern (v4f16 (AArch64NvCast (v4i16 FPR64:$src)))
define void @nvcast_v4i16(<4 x half>* %a) #0 {
; CHECK-LABEL: nvcast_v4i16:
; CHECK-NEXT: movi v[[REG:[0-9]+]].4h, #0xab
; CHECK-NEXT: str d[[REG]], [x0]
; CHECK-NEXT: ret
store volatile <4 x half> <half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB>, <4 x half>* %a
ret void
}


; Test pattern (v4f16 (AArch64NvCast (v8i8 FPR64:$src)))
define void @nvcast_v8i8(<4 x half>* %a) #0 {
; CHECK-LABEL: nvcast_v8i8:
; CHECK-NEXT: movi v[[REG:[0-9]+]].8b, #0xab
; CHECK-NEXT: str d[[REG]], [x0]
; CHECK-NEXT: ret
store volatile <4 x half> <half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB>, <4 x half>* %a
ret void
}


; Test pattern (v4f16 (AArch64NvCast (f64 FPR64:$src)))
define void @nvcast_f64(<4 x half>* %a) #0 {
; CHECK-LABEL: nvcast_f64:
; CHECK-NEXT: movi d[[REG:[0-9]+]], #0000000000000000
; CHECK-NEXT: str d[[REG]], [x0]
; CHECK-NEXT: ret
store volatile <4 x half> zeroinitializer, <4 x half>* %a
ret void
}

; Test pattern (v8f16 (AArch64NvCast (v4i32 FPR128:$src)))
define void @nvcast_v4i32(<8 x half>* %a) #0 {
; CHECK-LABEL: nvcast_v4i32:
; CHECK-NEXT: movi v[[REG:[0-9]+]].4s, #0xab, lsl #16
; CHECK-NEXT: str q[[REG]], [x0]
; CHECK-NEXT: ret
store volatile <8 x half> <half 0xH0000, half 0xH00AB, half 0xH0000, half 0xH00AB, half 0xH0000, half 0xH00AB, half 0xH0000, half 0xH00AB>, <8 x half>* %a
ret void
}


; Test pattern (v8f16 (AArch64NvCast (v8i16 FPR128:$src)))
define void @nvcast_v8i16(<8 x half>* %a) #0 {
; CHECK-LABEL: nvcast_v8i16:
; CHECK-NEXT: movi v[[REG:[0-9]+]].8h, #0xab
; CHECK-NEXT: str q[[REG]], [x0]
; CHECK-NEXT: ret
store volatile <8 x half> <half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB>, <8 x half>* %a
ret void
}


; Test pattern (v8f16 (AArch64NvCast (v16i8 FPR128:$src)))
define void @nvcast_v16i8(<8 x half>* %a) #0 {
; CHECK-LABEL: nvcast_v16i8:
; CHECK-NEXT: movi v[[REG:[0-9]+]].16b, #0xab
; CHECK-NEXT: str q[[REG]], [x0]
; CHECK-NEXT: ret
store volatile <8 x half> <half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB>, <8 x half>* %a
ret void
}


; Test pattern (v8f16 (AArch64NvCast (v2i64 FPR128:$src)))
define void @nvcast_v2i64(<8 x half>* %a) #0 {
; CHECK-LABEL: nvcast_v2i64:
; CHECK-NEXT: movi v[[REG:[0-9]+]].2d, #0000000000000000
; CHECK-NEXT: str q[[REG]], [x0]
; CHECK-NEXT: ret
store volatile <8 x half> zeroinitializer, <8 x half>* %a
ret void
}

attributes #0 = { nounwind }

0 comments on commit 745615c

Please sign in to comment.