|
127 | 127 | ret void
|
128 | 128 | }
|
129 | 129 |
|
| 130 | +; GCN-LABEL: {{^}}sub_sube_commuted: |
| 131 | +; GCN-DAG: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}} |
| 132 | +; GCN-DAG: buffer_load_dword [[V:v[0-9]+]], |
| 133 | +; GCN: v_subbrev_u32_e{{32|64}} [[SUBB:v[0-9]+]], {{[^,]+}}, 0, [[V]], [[CC]] |
| 134 | +; GCN: v_sub_i32_e32 [[SUB:v[0-9]+]], vcc, s{{[0-9]+}}, [[SUBB]] |
| 135 | +; GCN: v_add_i32_e32 {{.*}}, 0x64, [[SUB]] |
| 136 | + |
| 137 | +; GFX9-LABEL: {{^}}sub_sube_commuted: |
| 138 | +; GFX9-DAG: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}} |
| 139 | +; GFX9-DAG: global_load_dword [[V:v[0-9]+]], |
| 140 | +; GFX9: v_subbrev_co_u32_e{{32|64}} [[SUBB:v[0-9]+]], {{[^,]+}}, 0, [[V]], [[CC]] |
| 141 | +; GFX9: v_sub_u32_e32 [[SUB:v[0-9]+]], s{{[0-9]+}}, [[SUBB]] |
| 142 | +; GFX9: v_add_u32_e32 {{.*}}, 0x64, [[SUB]] |
| 143 | +define amdgpu_kernel void @sub_sube_commuted(i32 addrspace(1)* nocapture %arg, i32 %a) { |
| 144 | +bb: |
| 145 | + %x = tail call i32 @llvm.amdgcn.workitem.id.x() |
| 146 | + %y = tail call i32 @llvm.amdgcn.workitem.id.y() |
| 147 | + %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x |
| 148 | + %v = load i32, i32 addrspace(1)* %gep, align 4 |
| 149 | + %cmp = icmp ugt i32 %x, %y |
| 150 | + %ext = sext i1 %cmp to i32 |
| 151 | + %adde = add i32 %v, %ext |
| 152 | + %sub = sub i32 %adde, %a |
| 153 | + %sub2 = sub i32 100, %sub |
| 154 | + store i32 %sub2, i32 addrspace(1)* %gep, align 4 |
| 155 | + ret void |
| 156 | +} |
| 157 | + |
130 | 158 | ; GCN-LABEL: {{^}}sube_sub:
|
131 | 159 | ; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
|
132 | 160 | ; GCN: v_subb_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]
|
|
0 commit comments