diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll index 48d5305ae0a94..8496ed1cfae82 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll @@ -3,42 +3,38 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s -define amdgpu_gs void @test_fptrunc_round_upward(float %a, i32 %data0, <4 x i32> %data1, ptr addrspace(1) %out) { -; CHECK-LABEL: test_fptrunc_round_upward: +define amdgpu_gs half @v_fptrunc_round_f32_to_f16_upward(float %a) { +; CHECK-LABEL: v_fptrunc_round_f32_to_f16_upward: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 ; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CHECK-NEXT: global_store_short v[6:7], v0, off -; CHECK-NEXT: s_endpgm +; CHECK-NEXT: ; return to shader part epilog %res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.upward") - store half %res, ptr addrspace(1) %out, align 4 - ret void + ret half %res } -define amdgpu_gs void @test_fptrunc_round_downward(float %a, i32 %data0, <4 x i32> %data1, ptr addrspace(1) %out) { -; CHECK-LABEL: test_fptrunc_round_downward: +define amdgpu_gs half @v_fptrunc_round_f32_to_f16_downward(float %a) { +; CHECK-LABEL: v_fptrunc_round_f32_to_f16_downward: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 ; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CHECK-NEXT: global_store_short v[6:7], v0, off -; CHECK-NEXT: s_endpgm +; CHECK-NEXT: ; return to shader part epilog %res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.downward") - store half %res, ptr addrspace(1) %out, align 4 - ret void + ret half %res } -define amdgpu_gs void @test_fptrunc_round_upward_multiple_calls(float %a, float %b, i32 %data0, <4 x i32> %data1, ptr addrspace(1) %out) { -; CHECK-LABEL: test_fptrunc_round_upward_multiple_calls: +define amdgpu_gs void @v_fptrunc_round_f32_to_f16_upward_multiple_calls(float %a, float %b, ptr addrspace(1) %out) { +; CHECK-LABEL: v_fptrunc_round_f32_to_f16_upward_multiple_calls: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 ; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CHECK-NEXT: v_cvt_f16_f32_e32 v2, v1 +; CHECK-NEXT: v_cvt_f16_f32_e32 v4, v1 ; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2 ; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1 ; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0 -; CHECK-NEXT: v_add_f16_e32 v0, v0, v2 +; CHECK-NEXT: v_add_f16_e32 v0, v0, v4 ; CHECK-NEXT: v_add_f16_e32 v0, v1, v0 -; CHECK-NEXT: global_store_short v[7:8], v0, off +; CHECK-NEXT: global_store_short v[2:3], v0, off ; CHECK-NEXT: s_endpgm %res1 = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.upward") %res2 = call half @llvm.fptrunc.round.f16.f32(float %b, metadata !"round.upward") @@ -49,4 +45,56 @@ define amdgpu_gs void @test_fptrunc_round_upward_multiple_calls(float %a, float ret void } -declare half @llvm.fptrunc.round.f16.f32(float, metadata) +define amdgpu_gs i32 @s_fptrunc_round_f32_to_f16_upward(float inreg %a, ptr addrspace(1) %out) { +; CHECK-LABEL: s_fptrunc_round_f32_to_f16_upward: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0 +; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.upward") + %bitcast = bitcast half %res to i16 + %ret = zext i16 %bitcast to i32 + ret i32 %ret +} + +define amdgpu_gs i32 @s_fptrunc_round_f32_to_f16_downward(float inreg %a, ptr addrspace(1) %out) { +; CHECK-LABEL: s_fptrunc_round_f32_to_f16_downward: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 +; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0 +; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.downward") + %bitcast = bitcast half %res to i16 + %ret = zext i16 %bitcast to i32 + ret i32 %ret +} + +define amdgpu_gs void @s_fptrunc_round_f32_to_f16_upward_multiple_calls(float inreg %a, float inreg %b, ptr addrspace(1) %out) { +; CHECK-LABEL: s_fptrunc_round_f32_to_f16_upward_multiple_calls: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_mov_b32_e32 v2, s0 +; CHECK-NEXT: v_mov_b32_e32 v3, s1 +; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; CHECK-NEXT: v_cvt_f16_f32_e32 v2, v2 +; CHECK-NEXT: v_cvt_f16_f32_e32 v4, v3 +; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2 +; CHECK-NEXT: v_cvt_f16_f32_e32 v3, v3 +; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0 +; CHECK-NEXT: v_add_f16_e32 v2, v2, v4 +; CHECK-NEXT: v_add_f16_e32 v2, v3, v2 +; CHECK-NEXT: global_store_short v[0:1], v2, off +; CHECK-NEXT: s_endpgm + %res1 = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.upward") + %res2 = call half @llvm.fptrunc.round.f16.f32(float %b, metadata !"round.upward") + %res3 = call half @llvm.fptrunc.round.f16.f32(float %b, metadata !"round.downward") + %res4 = fadd half %res1, %res2 + %res5 = fadd half %res3, %res4 + store half %res5, ptr addrspace(1) %out, align 4 + ret void +}