| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,150 @@ | ||
| ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s | ||
| ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s | ||
|
|
||
| ; GCN-LABEL: {{^}}fadd_f16 | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] | ||
| ; SI: v_add_f32_e32 v[[R_F32:[0-9]+]], v[[B_F32]], v[[A_F32]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] | ||
| ; VI: v_add_f16_e32 v[[R_F16:[0-9]+]], v[[B_F16]], v[[A_F16]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @fadd_f16( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %a, | ||
| half addrspace(1)* %b) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %b.val = load half, half addrspace(1)* %b | ||
| %r.val = fadd half %a.val, %b.val | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}fadd_f16_imm_a | ||
| ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], 0x3c00{{$}} | ||
| ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] | ||
| ; SI: v_add_f32_e32 v[[R_F32:[0-9]+]], v[[A_F32]], v[[B_F32]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] | ||
| ; VI: v_add_f16_e32 v[[R_F16:[0-9]+]], 0x3c00, v[[B_F16]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @fadd_f16_imm_a( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %b) { | ||
| entry: | ||
| %b.val = load half, half addrspace(1)* %b | ||
| %r.val = fadd half 1.0, %b.val | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}fadd_f16_imm_b | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], 0x4000{{$}} | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] | ||
| ; SI: v_add_f32_e32 v[[R_F32:[0-9]+]], v[[B_F32]], v[[A_F32]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] | ||
| ; VI: v_add_f16_e32 v[[R_F16:[0-9]+]], 0x4000, v[[A_F16]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @fadd_f16_imm_b( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %r.val = fadd half %a.val, 2.0 | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}fadd_v2f16 | ||
| ; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] | ||
| ; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]] | ||
| ; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] | ||
| ; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] | ||
| ; SI: v_add_f32_e32 v[[R_F32_0:[0-9]+]], v[[B_F32_0]], v[[A_F32_0]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] | ||
| ; SI: v_add_f32_e32 v[[R_F32_1:[0-9]+]], v[[B_F32_1]], v[[A_F32_1]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] | ||
| ; VI: v_add_f16_e32 v[[R_F16_0:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] | ||
| ; VI: v_add_f16_e32 v[[R_F16_1:[0-9]+]], v[[B_F16_1]], v[[A_F16_1]] | ||
| ; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] | ||
| ; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] | ||
| ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] | ||
| ; GCN: buffer_store_dword v[[R_V2_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @fadd_v2f16( | ||
| <2 x half> addrspace(1)* %r, | ||
| <2 x half> addrspace(1)* %a, | ||
| <2 x half> addrspace(1)* %b) { | ||
| entry: | ||
| %a.val = load <2 x half>, <2 x half> addrspace(1)* %a | ||
| %b.val = load <2 x half>, <2 x half> addrspace(1)* %b | ||
| %r.val = fadd <2 x half> %a.val, %b.val | ||
| store <2 x half> %r.val, <2 x half> addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}fadd_v2f16_imm_a | ||
| ; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], 0x3c00{{$}} | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], 0x4000{{$}} | ||
| ; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] | ||
| ; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] | ||
| ; SI: v_add_f32_e32 v[[R_F32_0:[0-9]+]], v[[A_F32_0]], v[[B_F32_0]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] | ||
| ; SI: v_add_f32_e32 v[[R_F32_1:[0-9]+]], v[[A_F32_1]], v[[B_F32_1]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] | ||
| ; VI: v_add_f16_e32 v[[R_F16_0:[0-9]+]], 0x3c00, v[[B_V2_F16]] | ||
| ; VI: v_add_f16_e32 v[[R_F16_1:[0-9]+]], 0x4000, v[[B_F16_1]] | ||
| ; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] | ||
| ; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] | ||
| ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] | ||
| ; GCN: buffer_store_dword v[[R_V2_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @fadd_v2f16_imm_a( | ||
| <2 x half> addrspace(1)* %r, | ||
| <2 x half> addrspace(1)* %b) { | ||
| entry: | ||
| %b.val = load <2 x half>, <2 x half> addrspace(1)* %b | ||
| %r.val = fadd <2 x half> <half 1.0, half 2.0>, %b.val | ||
| store <2 x half> %r.val, <2 x half> addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}fadd_v2f16_imm_b | ||
| ; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], 0x4000{{$}} | ||
| ; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], 0x3c00{{$}} | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] | ||
| ; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] | ||
| ; SI: v_add_f32_e32 v[[R_F32_0:[0-9]+]], v[[B_F32_0]], v[[A_F32_0]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] | ||
| ; SI: v_add_f32_e32 v[[R_F32_1:[0-9]+]], v[[B_F32_1]], v[[A_F32_1]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] | ||
| ; VI: v_add_f16_e32 v[[R_F16_0:[0-9]+]], 0x4000, v[[A_V2_F16]] | ||
| ; VI: v_add_f16_e32 v[[R_F16_1:[0-9]+]], 0x3c00, v[[A_F16_1]] | ||
| ; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] | ||
| ; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] | ||
| ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] | ||
| ; GCN: buffer_store_dword v[[R_V2_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @fadd_v2f16_imm_b( | ||
| <2 x half> addrspace(1)* %r, | ||
| <2 x half> addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load <2 x half>, <2 x half> addrspace(1)* %a | ||
| %r.val = fadd <2 x half> %a.val, <half 2.0, half 1.0> | ||
| store <2 x half> %r.val, <2 x half> addrspace(1)* %r | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s | ||
| ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s | ||
|
|
||
| ; Make sure fdiv is promoted to f32. | ||
|
|
||
| ; GCN-LABEL: {{^}}fdiv_f16 | ||
| ; GCN: v_cvt_f32_f16 | ||
| ; GCN: v_cvt_f32_f16 | ||
| ; GCN: v_div_scale_f32 | ||
| ; GCN-DAG: v_div_scale_f32 | ||
| ; GCN-DAG: v_rcp_f32 | ||
| ; GCN: v_fma_f32 | ||
| ; GCN: v_fma_f32 | ||
| ; GCN: v_mul_f32 | ||
| ; GCN: v_fma_f32 | ||
| ; GCN: v_fma_f32 | ||
| ; GCN: v_fma_f32 | ||
| ; GCN: v_div_fmas_f32 | ||
| ; GCN: v_div_fixup_f32 | ||
| ; GCN: v_cvt_f16_f32 | ||
| define void @fdiv_f16( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %a, | ||
| half addrspace(1)* %b) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %b.val = load half, half addrspace(1)* %b | ||
| %r.val = fdiv half %a.val, %b.val | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,150 @@ | ||
| ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s | ||
| ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s | ||
|
|
||
| ; GCN-LABEL: {{^}}fmul_f16 | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] | ||
| ; SI: v_mul_f32_e32 v[[R_F32:[0-9]+]], v[[B_F32]], v[[A_F32]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] | ||
| ; VI: v_mul_f16_e32 v[[R_F16:[0-9]+]], v[[B_F16]], v[[A_F16]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @fmul_f16( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %a, | ||
| half addrspace(1)* %b) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %b.val = load half, half addrspace(1)* %b | ||
| %r.val = fmul half %a.val, %b.val | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}fmul_f16_imm_a | ||
| ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], 0x4200{{$}} | ||
| ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] | ||
| ; SI: v_mul_f32_e32 v[[R_F32:[0-9]+]], v[[A_F32]], v[[B_F32]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] | ||
| ; VI: v_mul_f16_e32 v[[R_F16:[0-9]+]], 0x4200, v[[B_F16]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @fmul_f16_imm_a( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %b) { | ||
| entry: | ||
| %b.val = load half, half addrspace(1)* %b | ||
| %r.val = fmul half 3.0, %b.val | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}fmul_f16_imm_b | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], 0x4400{{$}} | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] | ||
| ; SI: v_mul_f32_e32 v[[R_F32:[0-9]+]], v[[B_F32]], v[[A_F32]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] | ||
| ; VI: v_mul_f16_e32 v[[R_F16:[0-9]+]], 0x4400, v[[A_F16]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @fmul_f16_imm_b( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %r.val = fmul half %a.val, 4.0 | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}fmul_v2f16 | ||
| ; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] | ||
| ; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]] | ||
| ; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] | ||
| ; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] | ||
| ; SI: v_mul_f32_e32 v[[R_F32_0:[0-9]+]], v[[B_F32_0]], v[[A_F32_0]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] | ||
| ; SI: v_mul_f32_e32 v[[R_F32_1:[0-9]+]], v[[B_F32_1]], v[[A_F32_1]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] | ||
| ; VI: v_mul_f16_e32 v[[R_F16_0:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] | ||
| ; VI: v_mul_f16_e32 v[[R_F16_1:[0-9]+]], v[[B_F16_1]], v[[A_F16_1]] | ||
| ; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] | ||
| ; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] | ||
| ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] | ||
| ; GCN: buffer_store_dword v[[R_V2_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @fmul_v2f16( | ||
| <2 x half> addrspace(1)* %r, | ||
| <2 x half> addrspace(1)* %a, | ||
| <2 x half> addrspace(1)* %b) { | ||
| entry: | ||
| %a.val = load <2 x half>, <2 x half> addrspace(1)* %a | ||
| %b.val = load <2 x half>, <2 x half> addrspace(1)* %b | ||
| %r.val = fmul <2 x half> %a.val, %b.val | ||
| store <2 x half> %r.val, <2 x half> addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}fmul_v2f16_imm_a | ||
| ; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], 0x4200{{$}} | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], 0x4400{{$}} | ||
| ; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] | ||
| ; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] | ||
| ; SI: v_mul_f32_e32 v[[R_F32_0:[0-9]+]], v[[A_F32_0]], v[[B_F32_0]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] | ||
| ; SI: v_mul_f32_e32 v[[R_F32_1:[0-9]+]], v[[A_F32_1]], v[[B_F32_1]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] | ||
| ; VI: v_mul_f16_e32 v[[R_F16_0:[0-9]+]], 0x4200, v[[B_V2_F16]] | ||
| ; VI: v_mul_f16_e32 v[[R_F16_1:[0-9]+]], 0x4400, v[[B_F16_1]] | ||
| ; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] | ||
| ; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] | ||
| ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] | ||
| ; GCN: buffer_store_dword v[[R_V2_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @fmul_v2f16_imm_a( | ||
| <2 x half> addrspace(1)* %r, | ||
| <2 x half> addrspace(1)* %b) { | ||
| entry: | ||
| %b.val = load <2 x half>, <2 x half> addrspace(1)* %b | ||
| %r.val = fmul <2 x half> <half 3.0, half 4.0>, %b.val | ||
| store <2 x half> %r.val, <2 x half> addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}fmul_v2f16_imm_b | ||
| ; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], 0x4400{{$}} | ||
| ; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], 0x4200{{$}} | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] | ||
| ; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] | ||
| ; SI: v_mul_f32_e32 v[[R_F32_0:[0-9]+]], v[[B_F32_0]], v[[A_F32_0]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] | ||
| ; SI: v_mul_f32_e32 v[[R_F32_1:[0-9]+]], v[[B_F32_1]], v[[A_F32_1]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] | ||
| ; VI: v_mul_f16_e32 v[[R_F16_0:[0-9]+]], 0x4400, v[[A_V2_F16]] | ||
| ; VI: v_mul_f16_e32 v[[R_F16_1:[0-9]+]], 0x4200, v[[A_F16_1]] | ||
| ; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] | ||
| ; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] | ||
| ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] | ||
| ; GCN: buffer_store_dword v[[R_V2_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @fmul_v2f16_imm_b( | ||
| <2 x half> addrspace(1)* %r, | ||
| <2 x half> addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load <2 x half>, <2 x half> addrspace(1)* %a | ||
| %r.val = fmul <2 x half> %a.val, <half 4.0, half 3.0> | ||
| store <2 x half> %r.val, <2 x half> addrspace(1)* %r | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,70 @@ | ||
| ; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s | ||
| ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s | ||
|
|
||
| ; GCN-LABEL: {{^}}fpext_f16_to_f32 | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; GCN: v_cvt_f32_f16_e32 v[[R_F32:[0-9]+]], v[[A_F16]] | ||
| ; GCN: buffer_store_dword v[[R_F32]] | ||
| ; GCN: s_endpgm | ||
| define void @fpext_f16_to_f32( | ||
| float addrspace(1)* %r, | ||
| half addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %r.val = fpext half %a.val to float | ||
| store float %r.val, float addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}fpext_f16_to_f64 | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; GCN: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] | ||
| ; GCN: v_cvt_f64_f32_e32 v{{\[}}[[R_F64_0:[0-9]+]]:[[R_F64_1:[0-9]+]]{{\]}}, v[[A_F32]] | ||
| ; GCN: buffer_store_dwordx2 v{{\[}}[[R_F64_0]]:[[R_F64_1]]{{\]}} | ||
| ; GCN: s_endpgm | ||
| define void @fpext_f16_to_f64( | ||
| double addrspace(1)* %r, | ||
| half addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %r.val = fpext half %a.val to double | ||
| store double %r.val, double addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}fpext_v2f16_to_v2f32 | ||
| ; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] | ||
| ; VI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] | ||
| ; GCN: v_cvt_f32_f16_e32 v[[R_F32_0:[0-9]+]], v[[A_V2_F16]] | ||
| ; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] | ||
| ; GCN: v_cvt_f32_f16_e32 v[[R_F32_1:[0-9]+]], v[[A_F16_1]] | ||
| ; GCN: buffer_store_dwordx2 v{{\[}}[[R_F32_0]]:[[R_F32_1]]{{\]}} | ||
| ; GCN: s_endpgm | ||
| define void @fpext_v2f16_to_v2f32( | ||
| <2 x float> addrspace(1)* %r, | ||
| <2 x half> addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load <2 x half>, <2 x half> addrspace(1)* %a | ||
| %r.val = fpext <2 x half> %a.val to <2 x float> | ||
| store <2 x float> %r.val, <2 x float> addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}fpext_v2f16_to_v2f64 | ||
| ; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] | ||
| ; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] | ||
| ; GCN: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] | ||
| ; GCN: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] | ||
| ; GCN: v_cvt_f64_f32_e32 v{{\[}}{{[0-9]+}}:[[R_F64_3:[0-9]+]]{{\]}}, v[[A_F32_1]] | ||
| ; GCN: v_cvt_f64_f32_e32 v{{\[}}[[R_F64_0:[0-9]+]]:{{[0-9]+}}{{\]}}, v[[A_F32_0]] | ||
| ; GCN: buffer_store_dwordx4 v{{\[}}[[R_F64_0]]:[[R_F64_3]]{{\]}} | ||
| ; GCN: s_endpgm | ||
| define void @fpext_v2f16_to_v2f64( | ||
| <2 x double> addrspace(1)* %r, | ||
| <2 x half> addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load <2 x half>, <2 x half> addrspace(1)* %a | ||
| %r.val = fpext <2 x half> %a.val to <2 x double> | ||
| store <2 x double> %r.val, <2 x double> addrspace(1)* %r | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,112 @@ | ||
| ; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s | ||
| ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s | ||
|
|
||
| ; GCN-LABEL: {{^}}fptosi_f16_to_i16 | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] | ||
| ; SI: v_cvt_i32_f32_e32 v[[R_I16:[0-9]+]], v[[A_F32]] | ||
| ; VI: v_cvt_i16_f16_e32 v[[R_I16:[0-9]+]], v[[A_F16]] | ||
| ; GCN: buffer_store_short v[[R_I16]] | ||
| ; GCN: s_endpgm | ||
| define void @fptosi_f16_to_i16( | ||
| i16 addrspace(1)* %r, | ||
| half addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %r.val = fptosi half %a.val to i16 | ||
| store i16 %r.val, i16 addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}fptosi_f16_to_i32 | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; GCN: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] | ||
| ; GCN: v_cvt_i32_f32_e32 v[[R_I32:[0-9]+]], v[[A_F32]] | ||
| ; GCN: buffer_store_dword v[[R_I32]] | ||
| ; GCN: s_endpgm | ||
| define void @fptosi_f16_to_i32( | ||
| i32 addrspace(1)* %r, | ||
| half addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %r.val = fptosi half %a.val to i32 | ||
| store i32 %r.val, i32 addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; Need to make sure we promote f16 to f32 when converting f16 to i64. Existing | ||
| ; test checks code generated for 'i64 = fp_to_sint f32'. | ||
|
|
||
| ; GCN-LABEL: {{^}}fptosi_f16_to_i64 | ||
| ; GCN: buffer_load_ushort | ||
| ; GCN: v_cvt_f32_f16_e32 | ||
| ; GCN: s_endpgm | ||
| define void @fptosi_f16_to_i64( | ||
| i64 addrspace(1)* %r, | ||
| half addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %r.val = fptosi half %a.val to i64 | ||
| store i64 %r.val, i64 addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}fptosi_v2f16_to_v2i16 | ||
| ; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] | ||
| ; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] | ||
| ; SI: v_cvt_i32_f32_e32 v[[R_I16_0:[0-9]+]], v[[A_F32_0]] | ||
| ; SI: v_cvt_i32_f32_e32 v[[R_I16_1:[0-9]+]], v[[A_F32_1]] | ||
| ; VI: v_cvt_i16_f16_e32 v[[R_I16_0:[0-9]+]], v[[A_V2_F16]] | ||
| ; VI: v_cvt_i16_f16_e32 v[[R_I16_1:[0-9]+]], v[[A_F16_1]] | ||
| ; GCN: v_and_b32_e32 v[[R_I16_LO:[0-9]+]], 0xffff, v[[R_I16_0]] | ||
| ; GCN: v_lshlrev_b32_e32 v[[R_I16_HI:[0-9]+]], 16, v[[R_I16_1]] | ||
| ; GCN: v_or_b32_e32 v[[R_V2_I16:[0-9]+]], v[[R_I16_HI]], v[[R_I16_LO]] | ||
| ; GCN: buffer_store_dword v[[R_V2_I16]] | ||
| ; GCN: s_endpgm | ||
| define void @fptosi_v2f16_to_v2i16( | ||
| <2 x i16> addrspace(1)* %r, | ||
| <2 x half> addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load <2 x half>, <2 x half> addrspace(1)* %a | ||
| %r.val = fptosi <2 x half> %a.val to <2 x i16> | ||
| store <2 x i16> %r.val, <2 x i16> addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}fptosi_v2f16_to_v2i32 | ||
| ; GCN: buffer_load_dword | ||
| ; GCN: v_cvt_f32_f16_e32 | ||
| ; GCN: v_cvt_f32_f16_e32 | ||
| ; GCN: v_cvt_i32_f32_e32 | ||
| ; GCN: v_cvt_i32_f32_e32 | ||
| ; GCN: buffer_store_dwordx2 | ||
| ; GCN: s_endpgm | ||
| define void @fptosi_v2f16_to_v2i32( | ||
| <2 x i32> addrspace(1)* %r, | ||
| <2 x half> addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load <2 x half>, <2 x half> addrspace(1)* %a | ||
| %r.val = fptosi <2 x half> %a.val to <2 x i32> | ||
| store <2 x i32> %r.val, <2 x i32> addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; Need to make sure we promote f16 to f32 when converting f16 to i64. Existing | ||
| ; test checks code generated for 'i64 = fp_to_sint f32'. | ||
|
|
||
| ; GCN-LABEL: {{^}}fptosi_v2f16_to_v2i64 | ||
| ; GCN: buffer_load_dword | ||
| ; GCN: v_cvt_f32_f16_e32 | ||
| ; GCN: v_cvt_f32_f16_e32 | ||
| ; GCN: s_endpgm | ||
| define void @fptosi_v2f16_to_v2i64( | ||
| <2 x i64> addrspace(1)* %r, | ||
| <2 x half> addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load <2 x half>, <2 x half> addrspace(1)* %a | ||
| %r.val = fptosi <2 x half> %a.val to <2 x i64> | ||
| store <2 x i64> %r.val, <2 x i64> addrspace(1)* %r | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,113 @@ | ||
| ; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s | ||
| ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s | ||
|
|
||
| ; GCN-LABEL: {{^}}fptoui_f16_to_i16 | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] | ||
| ; SI: v_cvt_u32_f32_e32 v[[R_I16:[0-9]+]], v[[A_F32]] | ||
| ; VI: v_cvt_u16_f16_e32 v[[R_I16:[0-9]+]], v[[A_F16]] | ||
| ; GCN: buffer_store_short v[[R_I16]] | ||
| ; GCN: s_endpgm | ||
| define void @fptoui_f16_to_i16( | ||
| i16 addrspace(1)* %r, | ||
| half addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %r.val = fptoui half %a.val to i16 | ||
| store i16 %r.val, i16 addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}fptoui_f16_to_i32 | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; GCN: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] | ||
| ; GCN: v_cvt_u32_f32_e32 v[[R_I32:[0-9]+]], v[[A_F32]] | ||
| ; GCN: buffer_store_dword v[[R_I32]] | ||
| ; GCN: s_endpgm | ||
| define void @fptoui_f16_to_i32( | ||
| i32 addrspace(1)* %r, | ||
| half addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %r.val = fptoui half %a.val to i32 | ||
| store i32 %r.val, i32 addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; Need to make sure we promote f16 to f32 when converting f16 to i64. Existing | ||
| ; test checks code generated for 'i64 = fp_to_uint f32'. | ||
|
|
||
| ; GCN-LABEL: {{^}}fptoui_f16_to_i64 | ||
| ; GCN: buffer_load_ushort | ||
| ; GCN: v_cvt_f32_f16_e32 | ||
| ; GCN: s_endpgm | ||
| define void @fptoui_f16_to_i64( | ||
| i64 addrspace(1)* %r, | ||
| half addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %r.val = fptoui half %a.val to i64 | ||
| store i64 %r.val, i64 addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}fptoui_v2f16_to_v2i16 | ||
| ; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] | ||
| ; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] | ||
| ; SI: v_cvt_u32_f32_e32 v[[R_I16_1:[0-9]+]], v[[A_F32_1]] | ||
| ; SI: v_cvt_u32_f32_e32 v[[R_I16_0:[0-9]+]], v[[A_F32_0]] | ||
| ; VI: v_cvt_u16_f16_e32 v[[R_I16_0:[0-9]+]], v[[A_V2_F16]] | ||
| ; VI: v_cvt_u16_f16_e32 v[[R_I16_1:[0-9]+]], v[[A_F16_1]] | ||
| ; VI: v_and_b32_e32 v[[R_I16_LO:[0-9]+]], 0xffff, v[[R_I16_0]] | ||
| ; GCN: v_lshlrev_b32_e32 v[[R_I16_HI:[0-9]+]], 16, v[[R_I16_1]] | ||
| ; SI: v_or_b32_e32 v[[R_V2_I16:[0-9]+]], v[[R_I16_HI]], v[[R_I16_0]] | ||
| ; VI: v_or_b32_e32 v[[R_V2_I16:[0-9]+]], v[[R_I16_HI]], v[[R_I16_LO]] | ||
| ; GCN: buffer_store_dword v[[R_V2_I16]] | ||
| ; GCN: s_endpgm | ||
| define void @fptoui_v2f16_to_v2i16( | ||
| <2 x i16> addrspace(1)* %r, | ||
| <2 x half> addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load <2 x half>, <2 x half> addrspace(1)* %a | ||
| %r.val = fptoui <2 x half> %a.val to <2 x i16> | ||
| store <2 x i16> %r.val, <2 x i16> addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}fptoui_v2f16_to_v2i32 | ||
| ; GCN: buffer_load_dword | ||
| ; GCN: v_cvt_f32_f16_e32 | ||
| ; GCN: v_cvt_f32_f16_e32 | ||
| ; GCN: v_cvt_u32_f32_e32 | ||
| ; GCN: v_cvt_u32_f32_e32 | ||
| ; GCN: buffer_store_dwordx2 | ||
| ; GCN: s_endpgm | ||
| define void @fptoui_v2f16_to_v2i32( | ||
| <2 x i32> addrspace(1)* %r, | ||
| <2 x half> addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load <2 x half>, <2 x half> addrspace(1)* %a | ||
| %r.val = fptoui <2 x half> %a.val to <2 x i32> | ||
| store <2 x i32> %r.val, <2 x i32> addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; Need to make sure we promote f16 to f32 when converting f16 to i64. Existing | ||
| ; test checks code generated for 'i64 = fp_to_uint f32'. | ||
|
|
||
| ; GCN-LABEL: {{^}}fptoui_v2f16_to_v2i64 | ||
| ; GCN: buffer_load_dword | ||
| ; GCN: v_cvt_f32_f16_e32 | ||
| ; GCN: v_cvt_f32_f16_e32 | ||
| ; GCN: s_endpgm | ||
| define void @fptoui_v2f16_to_v2i64( | ||
| <2 x i64> addrspace(1)* %r, | ||
| <2 x half> addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load <2 x half>, <2 x half> addrspace(1)* %a | ||
| %r.val = fptoui <2 x half> %a.val to <2 x i64> | ||
| store <2 x i64> %r.val, <2 x i64> addrspace(1)* %r | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,72 @@ | ||
| ; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s | ||
| ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s | ||
|
|
||
| ; GCN-LABEL: {{^}}fptrunc_f32_to_f16 | ||
| ; GCN: buffer_load_dword v[[A_F32:[0-9]+]] | ||
| ; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @fptrunc_f32_to_f16( | ||
| half addrspace(1)* %r, | ||
| float addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load float, float addrspace(1)* %a | ||
| %r.val = fptrunc float %a.val to half | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}fptrunc_f64_to_f16 | ||
| ; GCN: buffer_load_dwordx2 v{{\[}}[[A_F64_0:[0-9]+]]:[[A_F64_1:[0-9]+]]{{\]}} | ||
| ; GCN: v_cvt_f32_f64_e32 v[[A_F32:[0-9]+]], v{{\[}}[[A_F64_0]]:[[A_F64_1]]{{\]}} | ||
| ; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @fptrunc_f64_to_f16( | ||
| half addrspace(1)* %r, | ||
| double addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load double, double addrspace(1)* %a | ||
| %r.val = fptrunc double %a.val to half | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}fptrunc_v2f32_to_v2f16 | ||
| ; GCN: buffer_load_dwordx2 v{{\[}}[[A_F32_0:[0-9]+]]:[[A_F32_1:[0-9]+]]{{\]}} | ||
| ; GCN-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[A_F32_0]] | ||
| ; GCN-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[A_F32_1]] | ||
| ; GCN-DAG: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] | ||
| ; GCN-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] | ||
| ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] | ||
| ; GCN: buffer_store_dword v[[R_V2_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @fptrunc_v2f32_to_v2f16( | ||
| <2 x half> addrspace(1)* %r, | ||
| <2 x float> addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load <2 x float>, <2 x float> addrspace(1)* %a | ||
| %r.val = fptrunc <2 x float> %a.val to <2 x half> | ||
| store <2 x half> %r.val, <2 x half> addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}fptrunc_v2f64_to_v2f16 | ||
| ; GCN: buffer_load_dwordx4 v{{\[}}[[A_F64_0:[0-9]+]]:[[A_F64_3:[0-9]+]]{{\]}} | ||
| ; GCN: v_cvt_f32_f64_e32 v[[A_F32_0:[0-9]+]], v{{\[}}[[A_F64_0]]:{{[0-9]+}}{{\]}} | ||
| ; GCN: v_cvt_f32_f64_e32 v[[A_F32_1:[0-9]+]], v{{\[}}{{[0-9]+}}:[[A_F64_3]]{{\]}} | ||
| ; GCN: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[A_F32_0]] | ||
| ; GCN: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[A_F32_1]] | ||
| ; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] | ||
| ; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] | ||
| ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] | ||
| ; GCN: buffer_store_dword v[[R_V2_F16]] | ||
| define void @fptrunc_v2f64_to_v2f16( | ||
| <2 x half> addrspace(1)* %r, | ||
| <2 x double> addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load <2 x double>, <2 x double> addrspace(1)* %a | ||
| %r.val = fptrunc <2 x double> %a.val to <2 x half> | ||
| store <2 x half> %r.val, <2 x half> addrspace(1)* %r | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,150 @@ | ||
| ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s | ||
| ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s | ||
|
|
||
| ; GCN-LABEL: {{^}}fsub_f16 | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] | ||
| ; SI: v_subrev_f32_e32 v[[R_F32:[0-9]+]], v[[B_F32]], v[[A_F32]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] | ||
| ; VI: v_subrev_f16_e32 v[[R_F16:[0-9]+]], v[[B_F16]], v[[A_F16]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @fsub_f16( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %a, | ||
| half addrspace(1)* %b) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %b.val = load half, half addrspace(1)* %b | ||
| %r.val = fsub half %a.val, %b.val | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}fsub_f16_imm_a | ||
| ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], 0x3c00{{$}} | ||
| ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] | ||
| ; SI: v_subrev_f32_e32 v[[R_F32:[0-9]+]], v[[B_F32]], v[[A_F32]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] | ||
| ; VI: v_sub_f16_e32 v[[R_F16:[0-9]+]], 0x3c00, v[[B_F16]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @fsub_f16_imm_a( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %b) { | ||
| entry: | ||
| %b.val = load half, half addrspace(1)* %b | ||
| %r.val = fsub half 1.0, %b.val | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}fsub_f16_imm_b | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], 0xc000{{$}} | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] | ||
| ; SI: v_add_f32_e32 v[[R_F32:[0-9]+]], v[[B_F32]], v[[A_F32]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] | ||
| ; VI: v_add_f16_e32 v[[R_F16:[0-9]+]], 0xc000, v[[A_F16]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @fsub_f16_imm_b( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %r.val = fsub half %a.val, 2.0 | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}fsub_v2f16 | ||
| ; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] | ||
| ; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]] | ||
| ; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] | ||
| ; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] | ||
| ; SI: v_subrev_f32_e32 v[[R_F32_0:[0-9]+]], v[[B_F32_0]], v[[A_F32_0]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] | ||
| ; SI: v_subrev_f32_e32 v[[R_F32_1:[0-9]+]], v[[B_F32_1]], v[[A_F32_1]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] | ||
| ; VI: v_subrev_f16_e32 v[[R_F16_0:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] | ||
| ; VI: v_subrev_f16_e32 v[[R_F16_1:[0-9]+]], v[[B_F16_1]], v[[A_F16_1]] | ||
| ; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] | ||
| ; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] | ||
| ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] | ||
| ; GCN: buffer_store_dword v[[R_V2_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @fsub_v2f16( | ||
| <2 x half> addrspace(1)* %r, | ||
| <2 x half> addrspace(1)* %a, | ||
| <2 x half> addrspace(1)* %b) { | ||
| entry: | ||
| %a.val = load <2 x half>, <2 x half> addrspace(1)* %a | ||
| %b.val = load <2 x half>, <2 x half> addrspace(1)* %b | ||
| %r.val = fsub <2 x half> %a.val, %b.val | ||
| store <2 x half> %r.val, <2 x half> addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}fsub_v2f16_imm_a | ||
| ; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], 0x3c00{{$}} | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], 0x4000{{$}} | ||
| ; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] | ||
| ; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] | ||
| ; SI: v_subrev_f32_e32 v[[R_F32_0:[0-9]+]], v[[B_F32_0]], v[[A_F32_0]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] | ||
| ; SI: v_subrev_f32_e32 v[[R_F32_1:[0-9]+]], v[[B_F32_1]], v[[A_F32_1]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] | ||
| ; VI: v_sub_f16_e32 v[[R_F16_0:[0-9]+]], 0x3c00, v[[B_V2_F16]] | ||
| ; VI: v_sub_f16_e32 v[[R_F16_1:[0-9]+]], 0x4000, v[[B_F16_1]] | ||
| ; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] | ||
| ; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] | ||
| ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] | ||
| ; GCN: buffer_store_dword v[[R_V2_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @fsub_v2f16_imm_a( | ||
| <2 x half> addrspace(1)* %r, | ||
| <2 x half> addrspace(1)* %b) { | ||
| entry: | ||
| %b.val = load <2 x half>, <2 x half> addrspace(1)* %b | ||
| %r.val = fsub <2 x half> <half 1.0, half 2.0>, %b.val | ||
| store <2 x half> %r.val, <2 x half> addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}fsub_v2f16_imm_b | ||
| ; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], 0x4000{{$}} | ||
| ; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], 0x3c00{{$}} | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] | ||
| ; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] | ||
| ; SI: v_subrev_f32_e32 v[[R_F32_0:[0-9]+]], v[[B_F32_0]], v[[A_F32_0]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] | ||
| ; SI: v_subrev_f32_e32 v[[R_F32_1:[0-9]+]], v[[B_F32_1]], v[[A_F32_1]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] | ||
| ; VI: v_add_f16_e32 v[[R_F16_0:[0-9]+]], 0xc000, v[[A_V2_F16]] | ||
| ; VI: v_add_f16_e32 v[[R_F16_1:[0-9]+]], 0xbc00, v[[A_F16_1]] | ||
| ; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] | ||
| ; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] | ||
| ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] | ||
| ; GCN: buffer_store_dword v[[R_V2_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @fsub_v2f16_imm_b( | ||
| <2 x half> addrspace(1)* %r, | ||
| <2 x half> addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load <2 x half>, <2 x half> addrspace(1)* %a | ||
| %r.val = fsub <2 x half> %a.val, <half 2.0, half 1.0> | ||
| store <2 x half> %r.val, <2 x half> addrspace(1)* %r | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,155 @@ | ||
| ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s | ||
|
|
||
| declare half @llvm.fabs.f16(half %a) | ||
| declare i1 @llvm.amdgcn.class.f16(half %a, i32 %b) | ||
|
|
||
| ; GCN-LABEL: {{^}}class_f16 | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; GCN: buffer_load_dword v[[B_I32:[0-9]+]] | ||
| ; VI: v_cmp_class_f16_e32 vcc, v[[A_F16]], v[[B_I32]] | ||
| ; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] | ||
| ; GCN: buffer_store_dword v[[R_I32]] | ||
| ; GCN: s_endpgm | ||
| define void @class_f16( | ||
| i32 addrspace(1)* %r, | ||
| half addrspace(1)* %a, | ||
| i32 addrspace(1)* %b) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %b.val = load i32, i32 addrspace(1)* %b | ||
| %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 %b.val) | ||
| %r.val.sext = sext i1 %r.val to i32 | ||
| store i32 %r.val.sext, i32 addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}class_f16_fabs | ||
| ; GCN: s_load_dword s[[SA_F16:[0-9]+]] | ||
| ; GCN: s_load_dword s[[SB_I32:[0-9]+]] | ||
| ; VI: v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]] | ||
| ; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], |v[[VA_F16]]|, s[[SB_I32]] | ||
| ; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]] | ||
| ; GCN: buffer_store_dword v[[VR_I32]] | ||
| ; GCN: s_endpgm | ||
| define void @class_f16_fabs( | ||
| i32 addrspace(1)* %r, | ||
| half %a.val, | ||
| i32 %b.val) { | ||
| entry: | ||
| %a.val.fabs = call half @llvm.fabs.f16(half %a.val) | ||
| %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val.fabs, i32 %b.val) | ||
| %r.val.sext = sext i1 %r.val to i32 | ||
| store i32 %r.val.sext, i32 addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}class_f16_fneg | ||
| ; GCN: s_load_dword s[[SA_F16:[0-9]+]] | ||
| ; GCN: s_load_dword s[[SB_I32:[0-9]+]] | ||
| ; VI: v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]] | ||
| ; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -v[[VA_F16]], s[[SB_I32]] | ||
| ; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]] | ||
| ; GCN: buffer_store_dword v[[VR_I32]] | ||
| ; GCN: s_endpgm | ||
| define void @class_f16_fneg( | ||
| i32 addrspace(1)* %r, | ||
| half %a.val, | ||
| i32 %b.val) { | ||
| entry: | ||
| %a.val.fneg = fsub half -0.0, %a.val | ||
| %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val.fneg, i32 %b.val) | ||
| %r.val.sext = sext i1 %r.val to i32 | ||
| store i32 %r.val.sext, i32 addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}class_f16_fabs_fneg | ||
| ; GCN: s_load_dword s[[SA_F16:[0-9]+]] | ||
| ; GCN: s_load_dword s[[SB_I32:[0-9]+]] | ||
| ; VI: v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]] | ||
| ; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -|v[[VA_F16]]|, s[[SB_I32]] | ||
| ; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]] | ||
| ; GCN: buffer_store_dword v[[VR_I32]] | ||
| ; GCN: s_endpgm | ||
| define void @class_f16_fabs_fneg( | ||
| i32 addrspace(1)* %r, | ||
| half %a.val, | ||
| i32 %b.val) { | ||
| entry: | ||
| %a.val.fabs = call half @llvm.fabs.f16(half %a.val) | ||
| %a.val.fabs.fneg = fsub half -0.0, %a.val.fabs | ||
| %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val.fabs.fneg, i32 %b.val) | ||
| %r.val.sext = sext i1 %r.val to i32 | ||
| store i32 %r.val.sext, i32 addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}class_f16_1 | ||
| ; GCN: s_load_dword s[[SA_F16:[0-9]+]] | ||
| ; VI: v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]] | ||
| ; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], v[[VA_F16]], 1{{$}} | ||
| ; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]] | ||
| ; GCN: buffer_store_dword v[[VR_I32]] | ||
| ; GCN: s_endpgm | ||
| define void @class_f16_1( | ||
| i32 addrspace(1)* %r, | ||
| half %a.val) { | ||
| entry: | ||
| %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 1) | ||
| %r.val.sext = sext i1 %r.val to i32 | ||
| store i32 %r.val.sext, i32 addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}class_f16_64 | ||
| ; GCN: s_load_dword s[[SA_F16:[0-9]+]] | ||
| ; VI: v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]] | ||
| ; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], v[[VA_F16]], 64{{$}} | ||
| ; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]] | ||
| ; GCN: buffer_store_dword v[[VR_I32]] | ||
| ; GCN: s_endpgm | ||
| define void @class_f16_64( | ||
| i32 addrspace(1)* %r, | ||
| half %a.val) { | ||
| entry: | ||
| %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 64) | ||
| %r.val.sext = sext i1 %r.val to i32 | ||
| store i32 %r.val.sext, i32 addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}class_f16_full_mask | ||
| ; GCN: s_load_dword s[[SA_F16:[0-9]+]] | ||
| ; VI: v_mov_b32_e32 v[[MASK:[0-9]+]], 0x3ff{{$}} | ||
| ; VI: v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]] | ||
| ; VI: v_cmp_class_f16_e32 vcc, v[[VA_F16]], v[[MASK]] | ||
| ; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, vcc | ||
| ; GCN: buffer_store_dword v[[VR_I32]] | ||
| ; GCN: s_endpgm | ||
| define void @class_f16_full_mask( | ||
| i32 addrspace(1)* %r, | ||
| half %a.val) { | ||
| entry: | ||
| %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 1023) | ||
| %r.val.sext = sext i1 %r.val to i32 | ||
| store i32 %r.val.sext, i32 addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}class_f16_nine_bit_mask | ||
| ; GCN: s_load_dword s[[SA_F16:[0-9]+]] | ||
| ; VI: v_mov_b32_e32 v[[MASK:[0-9]+]], 0x1ff{{$}} | ||
| ; VI: v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]] | ||
| ; VI: v_cmp_class_f16_e32 vcc, v[[VA_F16]], v[[MASK]] | ||
| ; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, vcc | ||
| ; GCN: buffer_store_dword v[[VR_I32]] | ||
| ; GCN: s_endpgm | ||
| define void @class_f16_nine_bit_mask( | ||
| i32 addrspace(1)* %r, | ||
| half %a.val) { | ||
| entry: | ||
| %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 511) | ||
| %r.val.sext = sext i1 %r.val to i32 | ||
| store i32 %r.val.sext, i32 addrspace(1)* %r | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s | ||
|
|
||
| declare half @llvm.amdgcn.cos.f16(half %a) | ||
|
|
||
| ; GCN-LABEL: {{^}}cos_f16 | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; VI: v_cos_f16_e32 v[[R_F16:[0-9]+]], v[[A_F16]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @cos_f16( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %r.val = call half @llvm.amdgcn.cos.f16(half %a.val) | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,129 @@ | ||
| ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s | ||
|
|
||
| declare half @llvm.amdgcn.div.fixup.f16(half %a, half %b, half %c) | ||
|
|
||
| ; GCN-LABEL: {{^}}div_fixup_f16 | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] | ||
| ; GCN: buffer_load_ushort v[[C_F16:[0-9]+]] | ||
| ; VI: v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], v[[C_F16]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @div_fixup_f16( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %a, | ||
| half addrspace(1)* %b, | ||
| half addrspace(1)* %c) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %b.val = load half, half addrspace(1)* %b | ||
| %c.val = load half, half addrspace(1)* %c | ||
| %r.val = call half @llvm.amdgcn.div.fixup.f16(half %a.val, half %b.val, half %c.val) | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}div_fixup_f16_imm_a | ||
| ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] | ||
| ; GCN: buffer_load_ushort v[[C_F16:[0-9]+]] | ||
| ; VI: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x4200{{$}} | ||
| ; VI: v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[B_F16]], v[[A_F16]], v[[C_F16]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @div_fixup_f16_imm_a( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %b, | ||
| half addrspace(1)* %c) { | ||
| entry: | ||
| %b.val = load half, half addrspace(1)* %b | ||
| %c.val = load half, half addrspace(1)* %c | ||
| %r.val = call half @llvm.amdgcn.div.fixup.f16(half 3.0, half %b.val, half %c.val) | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}div_fixup_f16_imm_b | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; GCN: buffer_load_ushort v[[C_F16:[0-9]+]] | ||
| ; VI: v_mov_b32_e32 v[[B_F16:[0-9]+]], 0x4200{{$}} | ||
| ; VI: v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[B_F16]], v[[A_F16]], v[[C_F16]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @div_fixup_f16_imm_b( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %a, | ||
| half addrspace(1)* %c) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %c.val = load half, half addrspace(1)* %c | ||
| %r.val = call half @llvm.amdgcn.div.fixup.f16(half %a.val, half 3.0, half %c.val) | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}div_fixup_f16_imm_c | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] | ||
| ; VI: v_mov_b32_e32 v[[C_F16:[0-9]+]], 0x4200{{$}} | ||
| ; VI: v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[B_F16]], v[[A_F16]], v[[C_F16]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @div_fixup_f16_imm_c( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %a, | ||
| half addrspace(1)* %b) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %b.val = load half, half addrspace(1)* %b | ||
| %r.val = call half @llvm.amdgcn.div.fixup.f16(half %a.val, half %b.val, half 3.0) | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}div_fixup_f16_imm_a_imm_b | ||
| ; VI: v_mov_b32_e32 v[[AB_F16:[0-9]+]], 0x4200{{$}} | ||
| ; GCN: buffer_load_ushort v[[C_F16:[0-9]+]] | ||
| ; VI: v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[AB_F16]], v[[AB_F16]], v[[C_F16]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @div_fixup_f16_imm_a_imm_b( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %c) { | ||
| entry: | ||
| %c.val = load half, half addrspace(1)* %c | ||
| %r.val = call half @llvm.amdgcn.div.fixup.f16(half 3.0, half 3.0, half %c.val) | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}div_fixup_f16_imm_b_imm_c | ||
| ; VI: v_mov_b32_e32 v[[BC_F16:[0-9]+]], 0x4200{{$}} | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; VI: v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[BC_F16]], v[[BC_F16]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @div_fixup_f16_imm_b_imm_c( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %r.val = call half @llvm.amdgcn.div.fixup.f16(half %a.val, half 3.0, half 3.0) | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}div_fixup_f16_imm_a_imm_c | ||
| ; VI: v_mov_b32_e32 v[[AC_F16:[0-9]+]], 0x4200{{$}} | ||
| ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] | ||
| ; VI: v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[AC_F16]], v[[B_F16]], v[[AC_F16]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @div_fixup_f16_imm_a_imm_c( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %b) { | ||
| entry: | ||
| %b.val = load half, half addrspace(1)* %b | ||
| %r.val = call half @llvm.amdgcn.div.fixup.f16(half 3.0, half %b.val, half 3.0) | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s | ||
|
|
||
| declare half @llvm.amdgcn.fract.f16(half %a) | ||
|
|
||
| ; GCN-LABEL: {{^}}fract_f16 | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; VI: v_fract_f16_e32 v[[R_F16:[0-9]+]], v[[A_F16]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @fract_f16( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %r.val = call half @llvm.amdgcn.fract.f16(half %a.val) | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s | ||
|
|
||
| declare i32 @llvm.amdgcn.frexp.exp.f16(half %a) | ||
|
|
||
| ; GCN-LABEL: {{^}}frexp_exp_f16 | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; VI: v_frexp_exp_i16_f16_e32 v[[R_I16:[0-9]+]], v[[A_F16]] | ||
| ; GCN: buffer_store_short v[[R_I16]] | ||
| define void @frexp_exp_f16( | ||
| i16 addrspace(1)* %r, | ||
| half addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %r.val = call i32 @llvm.amdgcn.frexp.exp.f16(half %a.val) | ||
| %r.val.i16 = trunc i32 %r.val to i16 | ||
| store i16 %r.val.i16, i16 addrspace(1)* %r | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s | ||
|
|
||
| declare half @llvm.amdgcn.frexp.mant.f16(half %a) | ||
|
|
||
| ; GCN-LABEL: {{^}}frexp_mant_f16 | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; VI: v_frexp_mant_f16_e32 v[[R_F16:[0-9]+]], v[[A_F16]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @frexp_mant_f16( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %r.val = call half @llvm.amdgcn.frexp.mant.f16(half %a.val) | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,45 @@ | ||
| ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s | ||
|
|
||
| declare half @llvm.amdgcn.ldexp.f16(half %a, i32 %b) | ||
|
|
||
| ; GCN-LABEL: {{^}}ldexp_f16 | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; GCN: buffer_load_dword v[[B_I32:[0-9]+]] | ||
| ; VI: v_ldexp_f16_e32 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_I32]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| define void @ldexp_f16( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %a, | ||
| i32 addrspace(1)* %b) { | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %b.val = load i32, i32 addrspace(1)* %b | ||
| %r.val = call half @llvm.amdgcn.ldexp.f16(half %a.val, i32 %b.val) | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}ldexp_f16_imm_a | ||
| ; GCN: buffer_load_dword v[[B_I32:[0-9]+]] | ||
| ; VI: v_ldexp_f16_e32 v[[R_F16:[0-9]+]], 0x4000, v[[B_I32]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| define void @ldexp_f16_imm_a( | ||
| half addrspace(1)* %r, | ||
| i32 addrspace(1)* %b) { | ||
| %b.val = load i32, i32 addrspace(1)* %b | ||
| %r.val = call half @llvm.amdgcn.ldexp.f16(half 2.0, i32 %b.val) | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}ldexp_f16_imm_b | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; VI: v_ldexp_f16_e64 v[[R_F16:[0-9]+]], v[[A_F16]], 2{{$}} | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| define void @ldexp_f16_imm_b( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %a) { | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %r.val = call half @llvm.amdgcn.ldexp.f16(half %a.val, i32 2) | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s | ||
|
|
||
| declare half @llvm.amdgcn.rcp.f16(half %a) | ||
|
|
||
| ; GCN-LABEL: {{^}}rcp_f16 | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; VI: v_rcp_f16_e32 v[[R_F16:[0-9]+]], v[[A_F16]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @rcp_f16( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %r.val = call half @llvm.amdgcn.rcp.f16(half %a.val) | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s | ||
|
|
||
| declare half @llvm.amdgcn.rsq.f16(half %a) | ||
|
|
||
| ; GCN-LABEL: {{^}}rsq_f16 | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; VI: v_rsq_f16_e32 v[[R_F16:[0-9]+]], v[[A_F16]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @rsq_f16( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %r.val = call half @llvm.amdgcn.rsq.f16(half %a.val) | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s | ||
|
|
||
| declare half @llvm.amdgcn.sin.f16(half %a) | ||
|
|
||
| ; GCN-LABEL: {{^}}sin_f16 | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; VI: v_sin_f16_e32 v[[R_F16:[0-9]+]], v[[A_F16]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @sin_f16( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %r.val = call half @llvm.amdgcn.sin.f16(half %a.val) | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,49 @@ | ||
| ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s | ||
| ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s | ||
|
|
||
| declare half @llvm.ceil.f16(half %a) | ||
| declare <2 x half> @llvm.ceil.v2f16(<2 x half> %a) | ||
|
|
||
| ; GCN-LABEL: {{^}}ceil_f16 | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] | ||
| ; SI: v_ceil_f32_e32 v[[R_F32:[0-9]+]], v[[A_F32]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] | ||
| ; VI: v_ceil_f16_e32 v[[R_F16:[0-9]+]], v[[A_F16]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @ceil_f16( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %r.val = call half @llvm.ceil.f16(half %a.val) | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}ceil_v2f16 | ||
| ; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] | ||
| ; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] | ||
| ; SI: v_ceil_f32_e32 v[[R_F32_0:[0-9]+]], v[[A_F32_0]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] | ||
| ; SI: v_ceil_f32_e32 v[[R_F32_1:[0-9]+]], v[[A_F32_1]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] | ||
| ; VI: v_ceil_f16_e32 v[[R_F16_0:[0-9]+]], v[[A_V2_F16]] | ||
| ; VI: v_ceil_f16_e32 v[[R_F16_1:[0-9]+]], v[[A_F16_1]] | ||
| ; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] | ||
| ; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] | ||
| ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] | ||
| ; GCN: buffer_store_dword v[[R_V2_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @ceil_v2f16( | ||
| <2 x half> addrspace(1)* %r, | ||
| <2 x half> addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load <2 x half>, <2 x half> addrspace(1)* %a | ||
| %r.val = call <2 x half> @llvm.ceil.v2f16(<2 x half> %a.val) | ||
| store <2 x half> %r.val, <2 x half> addrspace(1)* %r | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,55 @@ | ||
| ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s | ||
| ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s | ||
|
|
||
| declare half @llvm.cos.f16(half %a) | ||
| declare <2 x half> @llvm.cos.v2f16(<2 x half> %a) | ||
|
|
||
| ; GCN-LABEL: {{^}}cos_f16 | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; GCN: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] | ||
| ; GCN: v_mul_f32_e32 v[[M_F32:[0-9]+]], {{1/2pi|0x3e22f983}}, v[[A_F32]] | ||
| ; GCN: v_fract_f32_e32 v[[F_F32:[0-9]+]], v[[M_F32]] | ||
| ; GCN: v_cos_f32_e32 v[[R_F32:[0-9]+]], v[[F_F32]] | ||
| ; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @cos_f16( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %r.val = call half @llvm.cos.f16(half %a.val) | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}cos_v2f16 | ||
| ; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] | ||
| ; SI: v_mov_b32_e32 v[[HALF_PIE:[0-9]+]], 0x3e22f983{{$}} | ||
| ; GCN: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] | ||
| ; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] | ||
| ; GCN: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] | ||
| ; SI: v_mul_f32_e32 v[[M_F32_0:[0-9]+]], v[[HALF_PIE]], v[[A_F32_0]] | ||
| ; VI: v_mul_f32_e32 v[[M_F32_0:[0-9]+]], 1/2pi, v[[A_F32_0]] | ||
| ; GCN: v_fract_f32_e32 v[[F_F32_0:[0-9]+]], v[[M_F32_0]] | ||
| ; SI: v_mul_f32_e32 v[[M_F32_1:[0-9]+]], v[[HALF_PIE]], v[[A_F32_1]] | ||
| ; VI: v_mul_f32_e32 v[[M_F32_1:[0-9]+]], 1/2pi, v[[A_F32_1]] | ||
| ; GCN: v_fract_f32_e32 v[[F_F32_1:[0-9]+]], v[[M_F32_1]] | ||
| ; GCN: v_cos_f32_e32 v[[R_F32_0:[0-9]+]], v[[F_F32_0]] | ||
| ; GCN: v_cos_f32_e32 v[[R_F32_1:[0-9]+]], v[[F_F32_1]] | ||
| ; GCN: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] | ||
| ; GCN: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] | ||
| ; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] | ||
| ; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] | ||
| ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] | ||
| ; GCN: buffer_store_dword v[[R_V2_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @cos_v2f16( | ||
| <2 x half> addrspace(1)* %r, | ||
| <2 x half> addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load <2 x half>, <2 x half> addrspace(1)* %a | ||
| %r.val = call <2 x half> @llvm.cos.v2f16(<2 x half> %a.val) | ||
| store <2 x half> %r.val, <2 x half> addrspace(1)* %r | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,49 @@ | ||
| ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s | ||
| ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s | ||
|
|
||
| declare half @llvm.exp2.f16(half %a) | ||
| declare <2 x half> @llvm.exp2.v2f16(<2 x half> %a) | ||
|
|
||
| ; GCN-LABEL: {{^}}exp2_f16 | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] | ||
| ; SI: v_exp_f32_e32 v[[R_F32:[0-9]+]], v[[A_F32]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] | ||
| ; VI: v_exp_f16_e32 v[[R_F16:[0-9]+]], v[[A_F16]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @exp2_f16( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %r.val = call half @llvm.exp2.f16(half %a.val) | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}exp2_v2f16 | ||
| ; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] | ||
| ; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] | ||
| ; SI: v_exp_f32_e32 v[[R_F32_0:[0-9]+]], v[[A_F32_0]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] | ||
| ; SI: v_exp_f32_e32 v[[R_F32_1:[0-9]+]], v[[A_F32_1]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] | ||
| ; VI: v_exp_f16_e32 v[[R_F16_0:[0-9]+]], v[[A_V2_F16]] | ||
| ; VI: v_exp_f16_e32 v[[R_F16_1:[0-9]+]], v[[A_F16_1]] | ||
| ; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] | ||
| ; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] | ||
| ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] | ||
| ; GCN: buffer_store_dword v[[R_V2_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @exp2_v2f16( | ||
| <2 x half> addrspace(1)* %r, | ||
| <2 x half> addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load <2 x half>, <2 x half> addrspace(1)* %a | ||
| %r.val = call <2 x half> @llvm.exp2.v2f16(<2 x half> %a.val) | ||
| store <2 x half> %r.val, <2 x half> addrspace(1)* %r | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,49 @@ | ||
| ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s | ||
| ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s | ||
|
|
||
| declare half @llvm.floor.f16(half %a) | ||
| declare <2 x half> @llvm.floor.v2f16(<2 x half> %a) | ||
|
|
||
| ; GCN-LABEL: {{^}}floor_f16 | ||
| ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] | ||
| ; SI: v_floor_f32_e32 v[[R_F32:[0-9]+]], v[[A_F32]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] | ||
| ; VI: v_floor_f16_e32 v[[R_F16:[0-9]+]], v[[A_F16]] | ||
| ; GCN: buffer_store_short v[[R_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @floor_f16( | ||
| half addrspace(1)* %r, | ||
| half addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load half, half addrspace(1)* %a | ||
| %r.val = call half @llvm.floor.f16(half %a.val) | ||
| store half %r.val, half addrspace(1)* %r | ||
| ret void | ||
| } | ||
|
|
||
| ; GCN-LABEL: {{^}}floor_v2f16 | ||
| ; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] | ||
| ; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] | ||
| ; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] | ||
| ; SI: v_floor_f32_e32 v[[R_F32_0:[0-9]+]], v[[A_F32_0]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] | ||
| ; SI: v_floor_f32_e32 v[[R_F32_1:[0-9]+]], v[[A_F32_1]] | ||
| ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] | ||
| ; VI: v_floor_f16_e32 v[[R_F16_0:[0-9]+]], v[[A_V2_F16]] | ||
| ; VI: v_floor_f16_e32 v[[R_F16_1:[0-9]+]], v[[A_F16_1]] | ||
| ; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] | ||
| ; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] | ||
| ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] | ||
| ; GCN: buffer_store_dword v[[R_V2_F16]] | ||
| ; GCN: s_endpgm | ||
| define void @floor_v2f16( | ||
| <2 x half> addrspace(1)* %r, | ||
| <2 x half> addrspace(1)* %a) { | ||
| entry: | ||
| %a.val = load <2 x half>, <2 x half> addrspace(1)* %a | ||
| %r.val = call <2 x half> @llvm.floor.v2f16(<2 x half> %a.val) | ||
| store <2 x half> %r.val, <2 x half> addrspace(1)* %r | ||
| ret void | ||
| } |