Skip to content

Commit

Permalink
[AArch64] added FP16 vcvth intrinsic support
Browse files Browse the repository at this point in the history
Summary: Change-Id: I0df845749c7689dfc99150ba7c19c7d0dadbd705

Reviewers: javed.absar, SjoerdMeijer

Reviewed By: SjoerdMeijer

Subscribers: llvm-commits, SjoerdMeijer

Differential Revision: https://reviews.llvm.org/D46311

llvm-svn: 333410
  • Loading branch information
lukeg101 committed May 29, 2018
1 parent a1d69f9 commit 16092ab
Show file tree
Hide file tree
Showing 3 changed files with 255 additions and 5 deletions.
21 changes: 20 additions & 1 deletion llvm/lib/Target/AArch64/AArch64InstrFormats.td
Expand Up @@ -7928,6 +7928,26 @@ class BaseSIMDScalarShiftTied<bit U, bits<5> opc, bits<7> fixed_imm,

multiclass SIMDFPScalarRShift<bit U, bits<5> opc, string asm> {
let Predicates = [HasNEON, HasFullFP16] in {
def HSr : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?},
FPR16, FPR32, vecshiftR16, asm, []> {
let Inst{19-16} = imm{3-0};
let Inst{23-22} = 0b11;
}
def SHr : BaseSIMDScalarShift<U, opc, {?,?,?,?,?,?,?},
FPR32, FPR16, vecshiftR32, asm, []> {
let Inst{19-16} = imm{3-0};
}
def HDr : BaseSIMDScalarShift<U, opc, {?,?,?,?,?,?,?},
FPR16, FPR64, vecshiftR32, asm, []> {
let Inst{21-16} = imm{5-0};
let Inst{23-22} = 0b11;
}
def DHr : BaseSIMDScalarShift<U, opc, {?,?,?,?,?,?,?},
FPR64, FPR16, vecshiftR64, asm, []> {
let Inst{21-16} = imm{5-0};
let Inst{23-22} = 0b11;
let Inst{31} = 1;
}
def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?},
FPR16, FPR16, vecshiftR16, asm, []> {
let Inst{19-16} = imm{3-0};
Expand All @@ -7937,7 +7957,6 @@ multiclass SIMDFPScalarRShift<bit U, bits<5> opc, string asm> {
FPR32, FPR32, vecshiftR32, asm, []> {
let Inst{20-16} = imm{4-0};
}

def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},
FPR64, FPR64, vecshiftR64, asm, []> {
let Inst{21-16} = imm{5-0};
Expand Down
20 changes: 16 additions & 4 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Expand Up @@ -4955,17 +4955,29 @@ def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn),
def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn),
vecshiftR64:$imm)),
(FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm),
(SCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
(FCVTZSHDr (i64 FPR64:$Rn), vecshiftR32:$imm)>;
def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu FPR16:$Rn, vecshiftR32:$imm)),
(FCVTZUSHr FPR16:$Rn, vecshiftR32:$imm)>;
def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs FPR16:$Rn, vecshiftR32:$imm)),
(FCVTZSSHr FPR16:$Rn, vecshiftR32:$imm)>;
def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)),
(FCVTZSDHr (f16 FPR16:$Rn), vecshiftR64:$imm)>;
def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)),
(UCVTFHSr FPR32:$Rn, vecshiftR16:$imm)>;
def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm),
(UCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
(SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
(UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn),
vecshiftR64:$imm)),
(SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)),
(SCVTFHSr FPR32:$Rn, vecshiftR16:$imm)>;
def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR16:$imm)),
(SCVTFHSr FPR32:$Rn, vecshiftR16:$imm)>;
def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
(SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn),
vecshiftR64:$imm)),
(UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
Expand Down
219 changes: 219 additions & 0 deletions llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll
Expand Up @@ -126,3 +126,222 @@ entry:
%vrsqrtsh_f16 = tail call half @llvm.aarch64.neon.frsqrts.f16(half %a, half %b)
ret half %vrsqrtsh_f16
}

declare half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32, i32) #1
declare half @llvm.aarch64.neon.vcvtfxs2fp.f16.i64(i64, i32) #1
declare i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half, i32) #1
declare i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f16(half, i32) #1
declare half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32, i32) #1
declare i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half, i32) #1

define dso_local half @test_vcvth_n_f16_s16_1(i16 %a) {
; CHECK-LABEL: test_vcvth_n_f16_s16_1:
; CHECK: sxth w[[wReg:[0-9]+]], w0
; CHECK-NEXT: fmov s0, w[[wReg:[0-9]+]]
; CHECK-NEXT: scvtf h0, s0, #1
; CHECK-NEXT: ret
entry:
%sext = sext i16 %a to i32
%fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %sext, i32 1)
ret half %fcvth_n
}

define dso_local half @test_vcvth_n_f16_s16_16(i16 %a) {
; CHECK-LABEL: test_vcvth_n_f16_s16_16:
; CHECK: sxth w[[wReg:[0-9]+]], w0
; CHECK-NEXT: fmov s0, w[[wReg:[0-9]+]]
; CHECK-NEXT: scvtf h0, s0, #16
; CHECK-NEXT: ret
entry:
%sext = sext i16 %a to i32
%fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %sext, i32 16)
ret half %fcvth_n
}

define dso_local half @test_vcvth_n_f16_s32_1(i32 %a) {
; CHECK-LABEL: test_vcvth_n_f16_s32_1:
; CHECK: fmov s0, w0
; CHECK-NEXT: scvtf h0, s0, #1
; CHECK-NEXT: ret
entry:
%vcvth_n_f16_s32 = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %a, i32 1)
ret half %vcvth_n_f16_s32
}

define dso_local half @test_vcvth_n_f16_s32_16(i32 %a) {
; CHECK-LABEL: test_vcvth_n_f16_s32_16:
; CHECK: fmov s0, w0
; CHECK-NEXT: scvtf h0, s0, #16
; CHECK-NEXT: ret
entry:
%vcvth_n_f16_s32 = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %a, i32 16)
ret half %vcvth_n_f16_s32
}

define dso_local half @test_vcvth_n_f16_s64_1(i64 %a) {
; CHECK-LABEL: test_vcvth_n_f16_s64_1:
; CHECK: fmov d0, x0
; CHECK-NEXT: fcvtzs h0, d0, #1
; CHECK-NEXT: ret
entry:
%vcvth_n_f16_s64 = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i64(i64 %a, i32 1)
ret half %vcvth_n_f16_s64
}

define dso_local half @test_vcvth_n_f16_s64_16(i64 %a) {
; CHECK-LABEL: test_vcvth_n_f16_s64_16:
; CHECK: fmov d0, x0
; CHECK-NEXT: fcvtzs h0, d0, #16
; CHECK-NEXT: ret
entry:
%vcvth_n_f16_s64 = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i64(i64 %a, i32 16)
ret half %vcvth_n_f16_s64
}

define dso_local i16 @test_vcvth_n_s16_f16_1(half %a) {
; CHECK-LABEL: test_vcvth_n_s16_f16_1:
; CHECK: fcvtzs s0, h0, #1
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
entry:
%fcvth_n = tail call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 1)
%0 = trunc i32 %fcvth_n to i16
ret i16 %0
}

define dso_local i16 @test_vcvth_n_s16_f16_16(half %a) {
; CHECK-LABEL: test_vcvth_n_s16_f16_16:
; CHECK: fcvtzs s0, h0, #16
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
entry:
%fcvth_n = tail call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 16)
%0 = trunc i32 %fcvth_n to i16
ret i16 %0
}

define dso_local i32 @test_vcvth_n_s32_f16_1(half %a) {
; CHECK-LABEL: test_vcvth_n_s32_f16_1:
; CHECK: fcvtzs s0, h0, #1
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
entry:
%vcvth_n_s32_f16 = tail call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 1)
ret i32 %vcvth_n_s32_f16
}

define dso_local i32 @test_vcvth_n_s32_f16_16(half %a) {
; CHECK-LABEL: test_vcvth_n_s32_f16_16:
; CHECK: fcvtzs s0, h0, #16
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
entry:
%vcvth_n_s32_f16 = tail call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 16)
ret i32 %vcvth_n_s32_f16
}

define dso_local i64 @test_vcvth_n_s64_f16_1(half %a) {
; CHECK-LABEL: test_vcvth_n_s64_f16_1:
; CHECK: fcvtzs d0, h0, #1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
entry:
%vcvth_n_s64_f16 = tail call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f16(half %a, i32 1)
ret i64 %vcvth_n_s64_f16
}

define dso_local i64 @test_vcvth_n_s64_f16_32(half %a) {
; CHECK-LABEL: test_vcvth_n_s64_f16_32:
; CHECK: fcvtzs d0, h0, #32
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
entry:
%vcvth_n_s64_f16 = tail call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f16(half %a, i32 32)
ret i64 %vcvth_n_s64_f16
}

define dso_local half @test_vcvth_n_f16_u16_1(i16 %a) {
; CHECK-LABEL: test_vcvth_n_f16_u16_1:
; CHECK: and w[[wReg:[0-9]+]], w0, #0xffff
; CHECK-NEXT: fmov s0, w[[wReg:[0-9]+]]
; CHECK-NEXT: ucvtf h0, s0, #1
; CHECK-NEXT: ret
entry:
%0 = zext i16 %a to i32
%fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %0, i32 1)
ret half %fcvth_n
}

define dso_local half @test_vcvth_n_f16_u16_16(i16 %a) {
; CHECK-LABEL: test_vcvth_n_f16_u16_16:
; CHECK: and w[[wReg:[0-9]+]], w0, #0xffff
; CHECK-NEXT: fmov s0, w[[wReg:[0-9]+]]
; CHECK-NEXT: ucvtf h0, s0, #16
; CHECK-NEXT: ret
entry:
%0 = zext i16 %a to i32
%fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %0, i32 16)
ret half %fcvth_n
}

define dso_local half @test_vcvth_n_f16_u32_1(i32 %a) {
; CHECK-LABEL: test_vcvth_n_f16_u32_1:
; CHECK: fmov s0, w0
; CHECK-NEXT: ucvtf h0, s0, #1
; CHECK-NEXT: ret
entry:
%vcvth_n_f16_u32 = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %a, i32 1)
ret half %vcvth_n_f16_u32
}

define dso_local half @test_vcvth_n_f16_u32_16(i32 %a) {
; CHECK-LABEL: test_vcvth_n_f16_u32_16:
; CHECK: fmov s0, w0
; CHECK-NEXT: ucvtf h0, s0, #16
; CHECK-NEXT: ret
entry:
%vcvth_n_f16_u32 = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %a, i32 16)
ret half %vcvth_n_f16_u32
}

define dso_local i16 @test_vcvth_n_u16_f16_1(half %a) {
; CHECK-LABEL: test_vcvth_n_u16_f16_1:
; CHECK: fcvtzu s0, h0, #1
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
entry:
%fcvth_n = tail call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 1)
%0 = trunc i32 %fcvth_n to i16
ret i16 %0
}

define dso_local i16 @test_vcvth_n_u16_f16_16(half %a) {
; CHECK-LABEL: test_vcvth_n_u16_f16_16:
; CHECK: fcvtzu s0, h0, #16
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
entry:
%fcvth_n = tail call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 16)
%0 = trunc i32 %fcvth_n to i16
ret i16 %0
}

define dso_local i32 @test_vcvth_n_u32_f16_1(half %a) {
; CHECK-LABEL: test_vcvth_n_u32_f16_1:
; CHECK: fcvtzu s0, h0, #1
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
entry:
%vcvth_n_u32_f16 = tail call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 1)
ret i32 %vcvth_n_u32_f16
}

define dso_local i32 @test_vcvth_n_u32_f16_16(half %a) {
; CHECK-LABEL: test_vcvth_n_u32_f16_16:
; CHECK: fcvtzu s0, h0, #16
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
entry:
%vcvth_n_u32_f16 = tail call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 16)
ret i32 %vcvth_n_u32_f16
}

0 comments on commit 16092ab

Please sign in to comment.