Skip to content

Commit

Permalink
[AMDGPU] Narrow lshl from 64 to 32 bit if possible
Browse files Browse the repository at this point in the history
Turn expensive 64 bit shift into 32 bit if shift does not overflow int:
shl (ext x) => zext (shl x)

Differential Revision: https://reviews.llvm.org/D33367

llvm-svn: 303569
  • Loading branch information
rampitec committed May 22, 2017
1 parent 80cb549 commit 5fa289f
Show file tree
Hide file tree
Showing 14 changed files with 105 additions and 37 deletions.
44 changes: 33 additions & 11 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Expand Up @@ -2595,27 +2595,49 @@ SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl(

SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
if (N->getValueType(0) != MVT::i64)
EVT VT = N->getValueType(0);
if (VT != MVT::i64)
return SDValue();

// i64 (shl x, C) -> (build_pair 0, (shl x, C -32))

// On some subtargets, 64-bit shift is a quarter rate instruction. In the
// common case, splitting this into a move and a 32-bit shift is faster and
// the same code size.
const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!RHS)
return SDValue();

unsigned RHSVal = RHS->getZExtValue();
if (RHSVal < 32)
return SDValue();

SDValue LHS = N->getOperand(0);
unsigned RHSVal = RHS->getZExtValue();
if (!RHSVal)
return LHS;

SDLoc SL(N);
SelectionDAG &DAG = DCI.DAG;

switch (LHS->getOpcode()) {
default:
break;
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::ANY_EXTEND: {
// shl (ext x) => zext (shl x), if shift does not overflow int
KnownBits Known;
SDValue X = LHS->getOperand(0);
DAG.computeKnownBits(X, Known);
unsigned LZ = Known.countMinLeadingZeros();
if (LZ < RHSVal)
break;
EVT XVT = X.getValueType();
SDValue Shl = DAG.getNode(ISD::SHL, SL, XVT, X, SDValue(RHS, 0));
return DAG.getZExtOrTrunc(Shl, SL, VT);
}
}

// i64 (shl x, C) -> (build_pair 0, (shl x, C -32))

// On some subtargets, 64-bit shift is a quarter rate instruction. In the
// common case, splitting this into a move and a 32-bit shift is faster and
// the same code size.
if (RHSVal < 32)
return SDValue();

SDValue ShiftAmt = DAG.getConstant(RHSVal - 32, SL, MVT::i32);

SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AMDGPU/add.i16.ll
Expand Up @@ -84,11 +84,10 @@ define amdgpu_kernel void @v_test_add_i16_zext_to_i32(i32 addrspace(1)* %out, i1

; FIXME: Need to handle non-uniform case for function below (load without gep).
; GCN-LABEL: {{^}}v_test_add_i16_zext_to_i64:
; VI-DAG: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: flat_load_ushort [[B:v[0-9]+]]
; VI-DAG: v_add_u16_e32 v[[ADD:[0-9]+]], [[B]], [[A]]
; VI: buffer_store_dwordx2 v{{\[}}[[ADD]]:[[VZERO]]{{\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
; VI: buffer_store_dwordx2 v{{\[}}[[ADD]]:{{[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
define amdgpu_kernel void @v_test_add_i16_zext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i64, i64 addrspace(1)* %out, i32 %tid
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/add.v2i16.ll
Expand Up @@ -202,10 +202,10 @@ define amdgpu_kernel void @v_test_add_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)
; VI: flat_load_ushort v[[B_LO:[0-9]+]]
; VI: flat_load_ushort v[[B_HI:[0-9]+]]

; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; VI: v_add_u16_e32
; VI: v_add_u16_e32
; VI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; VI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; VI-DAG: v_add_u16_e32
; VI-DAG: v_add_u16_e32

; VI: buffer_store_dwordx4
define amdgpu_kernel void @v_test_add_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/bfe-patterns.ll
Expand Up @@ -50,7 +50,7 @@ define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out,
; GCN-LABEL: {{^}}s_ubfe_sub_i32:
; GCN: s_load_dword [[SRC:s[0-9]+]]
; GCN: s_load_dword [[WIDTH:s[0-9]+]]
; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], {{s[0-9]+}}
; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], [[WIDTH]]
; GCN: v_bfe_u32 v{{[0-9]+}}, [[SRC]], 0, [[VWIDTH]]
define amdgpu_kernel void @s_ubfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
Expand Down Expand Up @@ -128,7 +128,7 @@ define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out,
; GCN-LABEL: {{^}}s_sbfe_sub_i32:
; GCN: s_load_dword [[SRC:s[0-9]+]]
; GCN: s_load_dword [[WIDTH:s[0-9]+]]
; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], {{s[0-9]+}}
; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], [[WIDTH]]
; GCN: v_bfe_i32 v{{[0-9]+}}, [[SRC]], 0, [[VWIDTH]]
define amdgpu_kernel void @s_sbfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AMDGPU/ctlz.ll
Expand Up @@ -135,7 +135,6 @@ define amdgpu_kernel void @s_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64
}

; FUNC-LABEL: {{^}}v_ctlz_i64:
; GCN-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}
; GCN-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
; GCN-DAG: v_cmp_eq_u32_e64 [[CMPHI:s\[[0-9]+:[0-9]+\]]], 0, v[[HI]]
; GCN-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]]
Expand All @@ -145,7 +144,7 @@ define amdgpu_kernel void @s_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64
; GCN-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[HI]], v[[LO]]
; GCN-DAG: v_cmp_ne_u32_e32 vcc, 0, [[OR]]
; GCN-DAG: v_cndmask_b32_e32 v[[CLTZ_LO:[0-9]+]], 64, v[[CTLZ:[0-9]+]], vcc
; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CLTZ_LO]]:[[CTLZ_HI]]{{\]}}
; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CLTZ_LO]]:[[CTLZ_HI:[0-9]+]]{{\]}}
define amdgpu_kernel void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
Expand Up @@ -121,8 +121,7 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias
; GCN-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]]
; GCN-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]]
; GCN-DAG: v_cndmask_b32_e64 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[FFBH_LO]]
; GCN-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}
; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI]]{{\]}}
; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI:[0-9]+]]{{\]}}
define amdgpu_kernel void @v_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/ds_write2.ll
Expand Up @@ -266,8 +266,8 @@ define amdgpu_kernel void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)*
}

; SI-LABEL: @simple_write2_one_val_f64
; SI: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]],
; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
; SI-DAG: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]],
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset1:8
; SI: s_endpgm
define amdgpu_kernel void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/fmed3.ll
Expand Up @@ -845,10 +845,10 @@ define amdgpu_kernel void @v_nnan_inputs_missing2_med3_f32_pat0(float addrspace(
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
; GCN: v_min_f32
; GCN: v_max_f32
; GCN: v_min_f32
; GCN: v_max_f32
; GCN-DAG: v_min_f32
; GCN-DAG: v_max_f32
; GCN-DAG: v_min_f32
; GCN-DAG: v_max_f32
define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
Expand Down
2 changes: 2 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll
Expand Up @@ -356,6 +356,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(i64 addrspace(1)*

; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset_addr64:
; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; CI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}}
; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
Expand All @@ -371,6 +372,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(i64 addrspace

; GCN-LABEL: {{^}}global_atomic_dec_noret_i64_offset_addr64:
; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; CI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}}
; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
Expand Down
2 changes: 2 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll
Expand Up @@ -207,6 +207,7 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(i64 addrspace(1)*

; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset_addr64:
; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}}
; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
Expand All @@ -222,6 +223,7 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(i64 addrspace

; GCN-LABEL: {{^}}global_atomic_inc_noret_i64_offset_addr64:
; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}}
; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
Expand Down
45 changes: 45 additions & 0 deletions llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll
@@ -0,0 +1,45 @@
; RUN: llc -march=amdgcn < %s | FileCheck %s

; CHECK-LABEL: {{^}}zext_shl64_to_32:
; CHECK: s_lshl_b32
; CHECK-NOT: s_lshl_b64
define amdgpu_kernel void @zext_shl64_to_32(i64 addrspace(1)* nocapture %out, i32 %x) {
%and = and i32 %x, 1073741823
%ext = zext i32 %and to i64
%shl = shl i64 %ext, 2
store i64 %shl, i64 addrspace(1)* %out, align 4
ret void
}

; CHECK-LABEL: {{^}}sext_shl64_to_32:
; CHECK: s_lshl_b32
; CHECK-NOT: s_lshl_b64
define amdgpu_kernel void @sext_shl64_to_32(i64 addrspace(1)* nocapture %out, i32 %x) {
%and = and i32 %x, 536870911
%ext = sext i32 %and to i64
%shl = shl i64 %ext, 2
store i64 %shl, i64 addrspace(1)* %out, align 4
ret void
}

; CHECK-LABEL: {{^}}zext_shl64_overflow:
; CHECK: s_lshl_b64
; CHECK-NOT: s_lshl_b32
define amdgpu_kernel void @zext_shl64_overflow(i64 addrspace(1)* nocapture %out, i32 %x) {
%and = and i32 %x, 2147483647
%ext = zext i32 %and to i64
%shl = shl i64 %ext, 2
store i64 %shl, i64 addrspace(1)* %out, align 4
ret void
}

; CHECK-LABEL: {{^}}sext_shl64_overflow:
; CHECK: s_lshl_b64
; CHECK-NOT: s_lshl_b32
define amdgpu_kernel void @sext_shl64_overflow(i64 addrspace(1)* nocapture %out, i32 %x) {
%and = and i32 %x, 2147483647
%ext = sext i32 %and to i64
%shl = shl i64 %ext, 2
store i64 %shl, i64 addrspace(1)* %out, align 4
ret void
}
11 changes: 5 additions & 6 deletions llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
Expand Up @@ -299,10 +299,10 @@ define amdgpu_kernel void @v_uextract_bit_31_32_i64_trunc_i32(i32 addrspace(1)*
}

; GCN-LABEL: {{^}}and_not_mask_i64:
; GCN: buffer_load_dwordx2 v{{\[}}[[VALLO:[0-9]+]]:[[VALHI:[0-9]+]]{{\]}}
; GCN: v_mov_b32_e32 v[[SHRHI]], 0{{$}}
; GCN-DAG: buffer_load_dwordx2 v{{\[}}[[VALLO:[0-9]+]]:[[VALHI:[0-9]+]]{{\]}}
; GCN: v_mov_b32_e32 v[[SHRHI:[0-9]+]], 0{{$}}
; GCN: v_lshrrev_b32_e32 [[SHR:v[0-9]+]], 20, v[[VALLO]]
; GCN-DAG: v_and_b32_e32 v[[SHRLO]], 4, [[SHR]]
; GCN-DAG: v_and_b32_e32 v[[SHRLO:[0-9]+]], 4, [[SHR]]
; GCN-NOT: v[[SHRLO]]
; GCN-NOT: v[[SHRHI]]
; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}}
Expand Down Expand Up @@ -360,10 +360,9 @@ define amdgpu_kernel void @v_uextract_bit_34_37_multi_use_shift_i64(i64 addrspac
}

; GCN-LABEL: {{^}}v_uextract_bit_33_36_use_upper_half_shift_i64:
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 3
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:{{[0-9]+\]}}
; GCN: buffer_store_dword v[[ZERO]]
define amdgpu_kernel void @v_uextract_bit_33_36_use_upper_half_shift_i64(i64 addrspace(1)* %out0, i32 addrspace(1)* %out1, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/AMDGPU/srl.ll
Expand Up @@ -201,7 +201,8 @@ define amdgpu_kernel void @s_lshr_32_i64(i64 addrspace(1)* %out, i64 %a) {

; GCN-LABEL: {{^}}v_lshr_32_i64:
; GCN-DAG: buffer_load_dword v[[HI_A:[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[VHI1:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], v[[VHI1]]{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[HI_A]]:[[VHI]]{{\]}}
define amdgpu_kernel void @v_lshr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() #0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/sub.i16.ll
Expand Up @@ -85,9 +85,9 @@ define amdgpu_kernel void @v_test_sub_i16_zext_to_i32(i32 addrspace(1)* %out, i1

; FIXME: Need to handle non-uniform case for function below (load without gep).
; GCN-LABEL: {{^}}v_test_sub_i16_zext_to_i64:
; VI-DAG: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: flat_load_ushort [[B:v[0-9]+]]
; VI-DAG: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0
; VI-DAG: v_subrev_u16_e32 v[[ADD:[0-9]+]], [[B]], [[A]]
; VI: buffer_store_dwordx2 v{{\[}}[[ADD]]:[[VZERO]]{{\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
define amdgpu_kernel void @v_test_sub_i16_zext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
Expand Down

0 comments on commit 5fa289f

Please sign in to comment.