267 changes: 237 additions & 30 deletions llvm/test/CodeGen/AMDGPU/fneg-fabs.f64.ll
Original file line number Diff line number Diff line change
@@ -1,12 +1,35 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -march=amdgcn < %s | FileCheck -check-prefixes=SI,GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI,GCN %s

; FIXME: Check something here. Currently it seems fabs + fneg aren't
; into 2 modifiers, although theoretically that should work.

; GCN-LABEL: {{^}}fneg_fabs_fadd_f64:
; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|v{{\[[0-9]+:[0-9]+\]}}|
define amdgpu_kernel void @fneg_fabs_fadd_f64(ptr addrspace(1) %out, double %x, double %y) {
; SI-LABEL: fneg_fabs_fadd_f64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s0, s4
; SI-NEXT: s_mov_b32 s1, s5
; SI-NEXT: v_mov_b32_e32 v0, s6
; SI-NEXT: v_mov_b32_e32 v1, s7
; SI-NEXT: v_add_f64 v[0:1], s[8:9], -|v[0:1]|
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: fneg_fabs_fadd_f64:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v0, s6
; VI-NEXT: v_mov_b32_e32 v1, s7
; VI-NEXT: v_add_f64 v[0:1], s[0:1], -|v[0:1]|
; VI-NEXT: v_mov_b32_e32 v2, s4
; VI-NEXT: v_mov_b32_e32 v3, s5
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
%fabs = call double @llvm.fabs.f64(double %x)
%fsub = fsub double -0.000000e+00, %fabs
%fadd = fadd double %y, %fsub
Expand All @@ -15,6 +38,29 @@ define amdgpu_kernel void @fneg_fabs_fadd_f64(ptr addrspace(1) %out, double %x,
}

define amdgpu_kernel void @v_fneg_fabs_fadd_f64(ptr addrspace(1) %out, ptr addrspace(1) %xptr, ptr addrspace(1) %yptr) {
; SI-LABEL: v_fneg_fabs_fadd_f64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: v_add_f64 v[0:1], s[4:5], -|s[4:5]|
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: v_fneg_fabs_fadd_f64:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: v_mov_b32_e32 v3, s1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_add_f64 v[0:1], s[2:3], -|s[2:3]|
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
%x = load double, ptr addrspace(1) %xptr, align 8
%y = load double, ptr addrspace(1) %xptr, align 8
%fabs = call double @llvm.fabs.f64(double %x)
Expand All @@ -24,68 +70,227 @@ define amdgpu_kernel void @v_fneg_fabs_fadd_f64(ptr addrspace(1) %out, ptr addrs
ret void
}

; GCN-LABEL: {{^}}fneg_fabs_fmul_f64:
; GCN: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|v{{\[[0-9]+:[0-9]+\]}}|
define amdgpu_kernel void @fneg_fabs_fmul_f64(ptr addrspace(1) %out, double %x, double %y) {
; SI-LABEL: fneg_fabs_fmul_f64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s0, s4
; SI-NEXT: s_mov_b32 s1, s5
; SI-NEXT: v_mov_b32_e32 v0, s6
; SI-NEXT: v_mov_b32_e32 v1, s7
; SI-NEXT: v_mul_f64 v[0:1], s[8:9], -|v[0:1]|
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: fneg_fabs_fmul_f64:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v0, s6
; VI-NEXT: v_mov_b32_e32 v1, s7
; VI-NEXT: v_mul_f64 v[0:1], s[0:1], -|v[0:1]|
; VI-NEXT: v_mov_b32_e32 v2, s4
; VI-NEXT: v_mov_b32_e32 v3, s5
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
%fabs = call double @llvm.fabs.f64(double %x)
%fsub = fsub double -0.000000e+00, %fabs
%fmul = fmul double %y, %fsub
store double %fmul, ptr addrspace(1) %out, align 8
ret void
}

; GCN-LABEL: {{^}}fneg_fabs_free_f64:
define amdgpu_kernel void @fneg_fabs_free_f64(ptr addrspace(1) %out, i64 %in) {
; SI-LABEL: fneg_fabs_free_f64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_bitset1_b32 s3, 31
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: s_mov_b32 s4, s0
; SI-NEXT: s_mov_b32 s5, s1
; SI-NEXT: v_mov_b32_e32 v0, s2
; SI-NEXT: v_mov_b32_e32 v1, s3
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: fneg_fabs_free_f64:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: s_or_b32 s0, s3, 0x80000000
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_mov_b32_e32 v2, s2
; VI-NEXT: v_mov_b32_e32 v3, s0
; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; VI-NEXT: s_endpgm
%bc = bitcast i64 %in to double
%fabs = call double @llvm.fabs.f64(double %bc)
%fsub = fsub double -0.000000e+00, %fabs
store double %fsub, ptr addrspace(1) %out
ret void
}

; GCN-LABEL: {{^}}fneg_fabs_fn_free_f64:
; SI: s_bitset1_b32
; VI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
define amdgpu_kernel void @fneg_fabs_fn_free_f64(ptr addrspace(1) %out, i64 %in) {
; SI-LABEL: fneg_fabs_fn_free_f64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_bitset1_b32 s3, 31
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: s_mov_b32 s4, s0
; SI-NEXT: s_mov_b32 s5, s1
; SI-NEXT: v_mov_b32_e32 v0, s2
; SI-NEXT: v_mov_b32_e32 v1, s3
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: fneg_fabs_fn_free_f64:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: s_or_b32 s0, s3, 0x80000000
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_mov_b32_e32 v2, s2
; VI-NEXT: v_mov_b32_e32 v3, s0
; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; VI-NEXT: s_endpgm
%bc = bitcast i64 %in to double
%fabs = call double @fabs(double %bc)
%fsub = fsub double -0.000000e+00, %fabs
store double %fsub, ptr addrspace(1) %out
ret void
}

; GCN-LABEL: {{^}}fneg_fabs_f64:
; SI-DAG: s_load_dwordx2 s[[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]], s[{{[0-9]+:[0-9]+}}], 0x13
; VI-DAG: s_load_dwordx2 s[[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]], s[{{[0-9]+:[0-9]+}}], 0x4c
; GCN-DAG: s_bitset1_b32 s[[HI_X]], 31
; GCN-DAG: v_mov_b32_e32 v[[LO_V:[0-9]+]], s[[LO_X]]
; GCN-DAG: v_mov_b32_e32 v[[HI_V:[0-9]+]], s[[HI_X]]
; GCN: buffer_store_dwordx2 v[[[LO_V]]:[[HI_V]]]
define amdgpu_kernel void @fneg_fabs_f64(ptr addrspace(1) %out, [8 x i32], double %in) {
; SI-LABEL: fneg_fabs_f64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x13
; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_bitset1_b32 s5, 31
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: v_mov_b32_e32 v0, s4
; SI-NEXT: v_mov_b32_e32 v1, s5
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: fneg_fabs_f64:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_bitset1_b32 s3, 31
; VI-NEXT: v_mov_b32_e32 v3, s1
; VI-NEXT: v_mov_b32_e32 v0, s2
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
%fabs = call double @llvm.fabs.f64(double %in)
%fsub = fsub double -0.000000e+00, %fabs
store double %fsub, ptr addrspace(1) %out, align 8
ret void
}

; GCN-LABEL: {{^}}fneg_fabs_v2f64:
; GCN-NOT: 0x80000000
; GCN: s_bitset1_b32 s{{[0-9]+}}, 31
; GCN: s_bitset1_b32 s{{[0-9]+}}, 31
define amdgpu_kernel void @fneg_fabs_v2f64(ptr addrspace(1) %out, <2 x double> %in) {
; SI-LABEL: fneg_fabs_v2f64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_bitset1_b32 s7, 31
; SI-NEXT: s_bitset1_b32 s5, 31
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: v_mov_b32_e32 v0, s4
; SI-NEXT: v_mov_b32_e32 v2, s6
; SI-NEXT: v_mov_b32_e32 v1, s5
; SI-NEXT: v_mov_b32_e32 v3, s7
; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: fneg_fabs_v2f64:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_or_b32 s2, s7, 0x80000000
; VI-NEXT: s_or_b32 s3, s5, 0x80000000
; VI-NEXT: v_mov_b32_e32 v5, s1
; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: v_mov_b32_e32 v2, s6
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: v_mov_b32_e32 v3, s2
; VI-NEXT: v_mov_b32_e32 v4, s0
; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; VI-NEXT: s_endpgm
%fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %in)
%fsub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %fabs
store <2 x double> %fsub, ptr addrspace(1) %out
ret void
}

; GCN-LABEL: {{^}}fneg_fabs_v4f64:
; GCN-NOT: 0x80000000
; GCN: s_bitset1_b32 s{{[0-9]+}}, 31
; GCN: s_bitset1_b32 s{{[0-9]+}}, 31
; GCN: s_bitset1_b32 s{{[0-9]+}}, 31
; GCN: s_bitset1_b32 s{{[0-9]+}}, 31
define amdgpu_kernel void @fneg_fabs_v4f64(ptr addrspace(1) %out, <4 x double> %in) {
; SI-LABEL: fneg_fabs_v4f64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x11
; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_bitset1_b32 s7, 31
; SI-NEXT: s_bitset1_b32 s11, 31
; SI-NEXT: s_bitset1_b32 s9, 31
; SI-NEXT: s_bitset1_b32 s5, 31
; SI-NEXT: v_mov_b32_e32 v0, s8
; SI-NEXT: v_mov_b32_e32 v2, s10
; SI-NEXT: v_mov_b32_e32 v4, s4
; SI-NEXT: v_mov_b32_e32 v6, s6
; SI-NEXT: v_mov_b32_e32 v1, s9
; SI-NEXT: v_mov_b32_e32 v3, s11
; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
; SI-NEXT: v_mov_b32_e32 v5, s5
; SI-NEXT: v_mov_b32_e32 v7, s7
; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: fneg_fabs_v4f64:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x44
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_bitset1_b32 s7, 31
; VI-NEXT: s_bitset1_b32 s5, 31
; VI-NEXT: s_or_b32 s2, s11, 0x80000000
; VI-NEXT: s_or_b32 s3, s9, 0x80000000
; VI-NEXT: v_mov_b32_e32 v3, s2
; VI-NEXT: s_add_u32 s2, s0, 16
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: s_addc_u32 s3, s1, 0
; VI-NEXT: v_mov_b32_e32 v5, s3
; VI-NEXT: v_mov_b32_e32 v0, s8
; VI-NEXT: v_mov_b32_e32 v2, s10
; VI-NEXT: v_mov_b32_e32 v4, s2
; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; VI-NEXT: v_mov_b32_e32 v5, s1
; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: v_mov_b32_e32 v1, s5
; VI-NEXT: v_mov_b32_e32 v2, s6
; VI-NEXT: v_mov_b32_e32 v3, s7
; VI-NEXT: v_mov_b32_e32 v4, s0
; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; VI-NEXT: s_endpgm
%fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %in)
%fsub = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %fabs
store <4 x double> %fsub, ptr addrspace(1) %out
Expand All @@ -96,3 +301,5 @@ declare double @fabs(double) readnone
declare double @llvm.fabs.f64(double) readnone
declare <2 x double> @llvm.fabs.v2f64(<2 x double>) readnone
declare <4 x double> @llvm.fabs.v4f64(<4 x double>) readnone
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GCN: {{.*}}
258 changes: 213 additions & 45 deletions llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
Original file line number Diff line number Diff line change
@@ -1,104 +1,270 @@
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefixes=SI,FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefixes=VI,FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck --check-prefixes=R600,FUNC %s
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn < %s | FileCheck --check-prefixes=SI,FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga < %s | FileCheck --check-prefixes=VI,FUNC %s

; FUNC-LABEL: {{^}}fneg_fabsf_fadd_f32:
; SI-NOT: and
; SI: v_sub_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, |{{v[0-9]+}}|
define amdgpu_kernel void @fneg_fabsf_fadd_f32(ptr addrspace(1) %out, float %x, float %y) {
; SI-LABEL: fneg_fabsf_fadd_f32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s4, s0
; SI-NEXT: s_mov_b32 s5, s1
; SI-NEXT: v_mov_b32_e32 v0, s2
; SI-NEXT: v_sub_f32_e64 v0, s3, |v0|
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: fneg_fabsf_fadd_f32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v0, s2
; VI-NEXT: v_sub_f32_e64 v2, s3, |v0|
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
%fabs = call float @llvm.fabs.f32(float %x)
%fsub = fsub float -0.000000e+00, %fabs
%fadd = fadd float %y, %fsub
store float %fadd, ptr addrspace(1) %out, align 4
ret void
}

; FUNC-LABEL: {{^}}fneg_fabsf_fmul_f32:
; SI-NOT: and
; SI: v_mul_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, -|{{v[0-9]+}}|
; SI-NOT: and
define amdgpu_kernel void @fneg_fabsf_fmul_f32(ptr addrspace(1) %out, float %x, float %y) {
; SI-LABEL: fneg_fabsf_fmul_f32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s4, s0
; SI-NEXT: s_mov_b32 s5, s1
; SI-NEXT: v_mov_b32_e32 v0, s2
; SI-NEXT: v_mul_f32_e64 v0, s3, -|v0|
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: fneg_fabsf_fmul_f32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v0, s2
; VI-NEXT: v_mul_f32_e64 v2, s3, -|v0|
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
%fabs = call float @llvm.fabs.f32(float %x)
%fsub = fsub float -0.000000e+00, %fabs
%fmul = fmul float %y, %fsub
store float %fmul, ptr addrspace(1) %out, align 4
ret void
}

; DAGCombiner will transform:
; (fabsf (f32 bitcast (i32 a))) => (f32 bitcast (and (i32 a), 0x7FFFFFFF))
; unless isFabsFree returns true

; FUNC-LABEL: {{^}}fneg_fabsf_free_f32:
; R600-NOT: AND
; R600: |PV.{{[XYZW]}}|
; R600: -PV

; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
; VI: s_bitset1_b32 s{{[0-9]+}}, 31
define amdgpu_kernel void @fneg_fabsf_free_f32(ptr addrspace(1) %out, i32 %in) {
; SI-LABEL: fneg_fabsf_free_f32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s2, s[0:1], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_or_b32 s4, s2, 0x80000000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: v_mov_b32_e32 v0, s4
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: fneg_fabsf_free_f32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_bitset1_b32 s2, 31
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_mov_b32_e32 v2, s2
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
%bc = bitcast i32 %in to float
%fabs = call float @llvm.fabs.f32(float %bc)
%fsub = fsub float -0.000000e+00, %fabs
store float %fsub, ptr addrspace(1) %out
ret void
}

; FUNC-LABEL: {{^}}fneg_fabsf_fn_free_f32:
; R600-NOT: AND
; R600: |PV.{{[XYZW]}}|
; R600: -PV

; SI: s_load_dwordx2 s[0:1], s[2:3], 0x9
define amdgpu_kernel void @fneg_fabsf_fn_free_f32(ptr addrspace(1) %out, i32 %in) {
; SI-LABEL: fneg_fabsf_fn_free_f32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[2:3], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_bitset1_b32 s4, 31
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: v_mov_b32_e32 v0, s4
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: fneg_fabsf_fn_free_f32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s4, s[2:3], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_or_b32 s2, s4, 0x80000000
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_mov_b32_e32 v2, s2
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
%bc = bitcast i32 %in to float
%fabs = call float @fabsf(float %bc)
%fsub = fsub float -0.000000e+00, %fabs
store float %fsub, ptr addrspace(1) %out
ret void
}

; FUNC-LABEL: {{^}}fneg_fabsf_f32:
; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
define amdgpu_kernel void @fneg_fabsf_f32(ptr addrspace(1) %out, float %in) {
; SI-LABEL: fneg_fabsf_f32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s2, s[0:1], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_or_b32 s4, s2, 0x80000000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: v_mov_b32_e32 v0, s4
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: fneg_fabsf_f32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_bitset1_b32 s2, 31
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_mov_b32_e32 v2, s2
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
%fabs = call float @llvm.fabs.f32(float %in)
%fsub = fsub float -0.000000e+00, %fabs
store float %fsub, ptr addrspace(1) %out, align 4
ret void
}

; FUNC-LABEL: {{^}}v_fneg_fabsf_f32:
; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
define amdgpu_kernel void @v_fneg_fabsf_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: v_fneg_fabsf_f32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: s_mov_b32 s10, s6
; SI-NEXT: s_mov_b32 s11, s7
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s8, s2
; SI-NEXT: s_mov_b32 s9, s3
; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
; SI-NEXT: s_mov_b32 s4, s0
; SI-NEXT: s_mov_b32 s5, s1
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: v_or_b32_e32 v0, 0x80000000, v0
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: v_fneg_fabsf_f32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v0, s2
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: flat_load_dword v2, v[0:1]
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: v_or_b32_e32 v2, 0x80000000, v2
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
%val = load float, ptr addrspace(1) %in, align 4
%fabs = call float @llvm.fabs.f32(float %val)
%fsub = fsub float -0.000000e+00, %fabs
store float %fsub, ptr addrspace(1) %out, align 4
ret void
}

; FUNC-LABEL: {{^}}fneg_fabsf_v2f32:
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
; R600: -PV
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
; R600: -PV

; FIXME: In this case two uses of the constant should be folded
; SI: s_bitset1_b32 s{{[0-9]+}}, 31
; SI: s_bitset1_b32 s{{[0-9]+}}, 31
define amdgpu_kernel void @fneg_fabsf_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
; SI-LABEL: fneg_fabsf_v2f32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_bitset1_b32 s3, 31
; SI-NEXT: s_bitset1_b32 s2, 31
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: s_mov_b32 s4, s0
; SI-NEXT: s_mov_b32 s5, s1
; SI-NEXT: v_mov_b32_e32 v0, s2
; SI-NEXT: v_mov_b32_e32 v1, s3
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: fneg_fabsf_v2f32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_bitset1_b32 s3, 31
; VI-NEXT: s_bitset1_b32 s2, 31
; VI-NEXT: v_mov_b32_e32 v3, s1
; VI-NEXT: v_mov_b32_e32 v0, s2
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
%fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
%fsub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %fabs
store <2 x float> %fsub, ptr addrspace(1) %out
ret void
}

; FUNC-LABEL: {{^}}fneg_fabsf_v4f32:
; SI: s_bitset1_b32 s{{[0-9]+}}, 31
; SI: s_bitset1_b32 s{{[0-9]+}}, 31
; SI: s_bitset1_b32 s{{[0-9]+}}, 31
; SI: s_bitset1_b32 s{{[0-9]+}}, 31
define amdgpu_kernel void @fneg_fabsf_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
; SI-LABEL: fneg_fabsf_v4f32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_bitset1_b32 s7, 31
; SI-NEXT: s_bitset1_b32 s6, 31
; SI-NEXT: s_bitset1_b32 s5, 31
; SI-NEXT: s_bitset1_b32 s4, 31
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: v_mov_b32_e32 v0, s4
; SI-NEXT: v_mov_b32_e32 v1, s5
; SI-NEXT: v_mov_b32_e32 v2, s6
; SI-NEXT: v_mov_b32_e32 v3, s7
; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: fneg_fabsf_v4f32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_or_b32 s2, s7, 0x80000000
; VI-NEXT: s_or_b32 s3, s6, 0x80000000
; VI-NEXT: s_bitset1_b32 s5, 31
; VI-NEXT: s_bitset1_b32 s4, 31
; VI-NEXT: v_mov_b32_e32 v5, s1
; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: v_mov_b32_e32 v1, s5
; VI-NEXT: v_mov_b32_e32 v2, s3
; VI-NEXT: v_mov_b32_e32 v3, s2
; VI-NEXT: v_mov_b32_e32 v4, s0
; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; VI-NEXT: s_endpgm
%fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
%fsub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %fabs
store <4 x float> %fsub, ptr addrspace(1) %out
Expand All @@ -112,3 +278,5 @@ declare <4 x float> @llvm.fabs.v4f32(<4 x float>) readnone

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; FUNC: {{.*}}
781 changes: 633 additions & 148 deletions llvm/test/CodeGen/AMDGPU/fneg.ll

Large diffs are not rendered by default.

428 changes: 342 additions & 86 deletions llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll

Large diffs are not rendered by default.

630 changes: 373 additions & 257 deletions llvm/test/CodeGen/AMDGPU/packed-op-sel.ll

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions llvm/test/CodeGen/AMDGPU/spill-agpr.mir
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass=regallocfast -o - %s | FileCheck -check-prefix=GFX908-SPILLED %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -passes=regallocfast -o - %s | FileCheck -check-prefix=GFX908-SPILLED %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass=regallocfast,prologepilog -o - %s | FileCheck -check-prefix=GFX908-EXPANDED %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=regallocfast -o - %s | FileCheck -check-prefix=GFX90A-SPILLED %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -passes=regallocfast -o - %s | FileCheck -check-prefix=GFX90A-SPILLED %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=regallocfast,prologepilog -o - %s | FileCheck -check-prefix=GFX90A-EXPANDED %s

---
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/spill192.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=regallocfast -o - %s | FileCheck -check-prefix=SPILLED %s
# RUN: llc -mtriple=amdgcn -mcpu=tahiti -passes=regallocfast -o - %s | FileCheck -check-prefix=SPILLED %s
# RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=regallocfast,si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=EXPANDED %s

# Make sure spill/restore of 192 bit registers works. We have to
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/spill224.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=regallocfast -o - %s | FileCheck -check-prefix=SPILLED %s
# RUN: llc -mtriple=amdgcn -mcpu=tahiti -passes=regallocfast -o - %s | FileCheck -check-prefix=SPILLED %s
# RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=regallocfast,si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=EXPANDED %s

# Make sure spill/restore of 224 bit registers works.
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/spill288.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=regallocfast -o - %s | FileCheck -check-prefix=SPILLED %s
# RUN: llc -mtriple=amdgcn -mcpu=tahiti -passes=regallocfast -o - %s | FileCheck -check-prefix=SPILLED %s
# RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=regallocfast,si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=EXPANDED %s

# Make sure spill/restore of 288 bit registers works.
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/spill320.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=regallocfast -o - %s | FileCheck -check-prefix=SPILLED %s
# RUN: llc -mtriple=amdgcn -mcpu=tahiti -passes=regallocfast -o - %s | FileCheck -check-prefix=SPILLED %s
# RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=regallocfast,si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=EXPANDED %s

# Make sure spill/restore of 320 bit registers works.
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/spill352.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=regallocfast -o - %s | FileCheck -check-prefix=SPILLED %s
# RUN: llc -mtriple=amdgcn -mcpu=tahiti -passes=regallocfast -o - %s | FileCheck -check-prefix=SPILLED %s
# RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=regallocfast,si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=EXPANDED %s

# Make sure spill/restore of 352 bit registers works.
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/spill384.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=regallocfast -o - %s | FileCheck -check-prefix=SPILLED %s
# RUN: llc -mtriple=amdgcn -mcpu=tahiti -passes=regallocfast -o - %s | FileCheck -check-prefix=SPILLED %s
# RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=regallocfast,si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=EXPANDED %s

# Make sure spill/restore of 384 bit registers works.
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/unexpected-reg-unit-state.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=regallocfast -o - %s | FileCheck %s
# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=regallocfast -o - %s | FileCheck %s

---
name: bar
Expand Down
478 changes: 478 additions & 0 deletions llvm/test/CodeGen/AMDGPU/xor-r600.ll

Large diffs are not rendered by default.

766 changes: 666 additions & 100 deletions llvm/test/CodeGen/AMDGPU/xor.ll

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions llvm/test/CodeGen/ARM/regalloc-fast-rewrite-implicits.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=armv7-apple-ios -run-pass=regallocfast -o - %s | FileCheck %s
# RUN: llc -mtriple=armv7-apple-ios -passes=regallocfast -o - %s | FileCheck %s


# tBX_RET uses an implicit vreg with a sub-register. That implicit use will
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/LoongArch/global-address.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,40 +14,40 @@ define void @foo() nounwind {
; LA32NOPIC: # %bb.0:
; LA32NOPIC-NEXT: pcalau12i $a0, %got_pc_hi20(G)
; LA32NOPIC-NEXT: ld.w $a0, $a0, %got_pc_lo12(G)
; LA32NOPIC-NEXT: ld.w $a0, $a0, 0
; LA32NOPIC-NEXT: ld.w $zero, $a0, 0
; LA32NOPIC-NEXT: pcalau12i $a0, %pc_hi20(g)
; LA32NOPIC-NEXT: addi.w $a0, $a0, %pc_lo12(g)
; LA32NOPIC-NEXT: ld.w $a0, $a0, 0
; LA32NOPIC-NEXT: ld.w $zero, $a0, 0
; LA32NOPIC-NEXT: ret
;
; LA32PIC-LABEL: foo:
; LA32PIC: # %bb.0:
; LA32PIC-NEXT: pcalau12i $a0, %got_pc_hi20(G)
; LA32PIC-NEXT: ld.w $a0, $a0, %got_pc_lo12(G)
; LA32PIC-NEXT: ld.w $a0, $a0, 0
; LA32PIC-NEXT: ld.w $zero, $a0, 0
; LA32PIC-NEXT: pcalau12i $a0, %pc_hi20(.Lg$local)
; LA32PIC-NEXT: addi.w $a0, $a0, %pc_lo12(.Lg$local)
; LA32PIC-NEXT: ld.w $a0, $a0, 0
; LA32PIC-NEXT: ld.w $zero, $a0, 0
; LA32PIC-NEXT: ret
;
; LA64NOPIC-LABEL: foo:
; LA64NOPIC: # %bb.0:
; LA64NOPIC-NEXT: pcalau12i $a0, %got_pc_hi20(G)
; LA64NOPIC-NEXT: ld.d $a0, $a0, %got_pc_lo12(G)
; LA64NOPIC-NEXT: ld.w $a0, $a0, 0
; LA64NOPIC-NEXT: ld.w $zero, $a0, 0
; LA64NOPIC-NEXT: pcalau12i $a0, %pc_hi20(g)
; LA64NOPIC-NEXT: addi.d $a0, $a0, %pc_lo12(g)
; LA64NOPIC-NEXT: ld.w $a0, $a0, 0
; LA64NOPIC-NEXT: ld.w $zero, $a0, 0
; LA64NOPIC-NEXT: ret
;
; LA64PIC-LABEL: foo:
; LA64PIC: # %bb.0:
; LA64PIC-NEXT: pcalau12i $a0, %got_pc_hi20(G)
; LA64PIC-NEXT: ld.d $a0, $a0, %got_pc_lo12(G)
; LA64PIC-NEXT: ld.w $a0, $a0, 0
; LA64PIC-NEXT: ld.w $zero, $a0, 0
; LA64PIC-NEXT: pcalau12i $a0, %pc_hi20(.Lg$local)
; LA64PIC-NEXT: addi.d $a0, $a0, %pc_lo12(.Lg$local)
; LA64PIC-NEXT: ld.w $a0, $a0, 0
; LA64PIC-NEXT: ld.w $zero, $a0, 0
; LA64PIC-NEXT: ret
;
; LA64LARGENOPIC-LABEL: foo:
Expand All @@ -57,13 +57,13 @@ define void @foo() nounwind {
; LA64LARGENOPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(G)
; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(G)
; LA64LARGENOPIC-NEXT: ldx.d $a0, $t8, $a0
; LA64LARGENOPIC-NEXT: ld.w $a0, $a0, 0
; LA64LARGENOPIC-NEXT: ld.w $zero, $a0, 0
; LA64LARGENOPIC-NEXT: pcalau12i $a0, %pc_hi20(g)
; LA64LARGENOPIC-NEXT: addi.d $t8, $zero, %pc_lo12(g)
; LA64LARGENOPIC-NEXT: lu32i.d $t8, %pc64_lo20(g)
; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(g)
; LA64LARGENOPIC-NEXT: add.d $a0, $t8, $a0
; LA64LARGENOPIC-NEXT: ld.w $a0, $a0, 0
; LA64LARGENOPIC-NEXT: ld.w $zero, $a0, 0
; LA64LARGENOPIC-NEXT: ret
;
; LA64LARGEPIC-LABEL: foo:
Expand All @@ -73,13 +73,13 @@ define void @foo() nounwind {
; LA64LARGEPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(G)
; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(G)
; LA64LARGEPIC-NEXT: ldx.d $a0, $t8, $a0
; LA64LARGEPIC-NEXT: ld.w $a0, $a0, 0
; LA64LARGEPIC-NEXT: ld.w $zero, $a0, 0
; LA64LARGEPIC-NEXT: pcalau12i $a0, %pc_hi20(.Lg$local)
; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %pc_lo12(.Lg$local)
; LA64LARGEPIC-NEXT: lu32i.d $t8, %pc64_lo20(.Lg$local)
; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(.Lg$local)
; LA64LARGEPIC-NEXT: add.d $a0, $t8, $a0
; LA64LARGEPIC-NEXT: ld.w $a0, $a0, 0
; LA64LARGEPIC-NEXT: ld.w $zero, $a0, 0
; LA64LARGEPIC-NEXT: ret
%V = load volatile i32, ptr @G
%v = load volatile i32, ptr @g
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/LoongArch/intrinsic-la64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ entry:
define void @csrrd_d_noret() {
; CHECK-LABEL: csrrd_d_noret:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrrd $a0, 1
; CHECK-NEXT: csrrd $zero, 1
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.loongarch.csrrd.d(i32 1)
Expand Down Expand Up @@ -240,7 +240,7 @@ entry:
define void @iocsrrd_d_noret(i32 %a) {
; CHECK-LABEL: iocsrrd_d_noret:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: iocsrrd.d $a0, $a0
; CHECK-NEXT: iocsrrd.d $zero, $a0
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.loongarch.iocsrrd.d(i32 %a)
Expand Down Expand Up @@ -290,7 +290,7 @@ entry:
define void @lddir_d_noret(i64 %a) {
; CHECK-LABEL: lddir_d_noret:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lddir $a0, $a0, 1
; CHECK-NEXT: lddir $zero, $a0, 1
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.loongarch.lddir.d(i64 %a, i64 1)
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/LoongArch/intrinsic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ entry:
define void @movfcsr2gr_noret() nounwind {
; CHECK-LABEL: movfcsr2gr_noret:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movfcsr2gr $a0, $fcsr1
; CHECK-NEXT: movfcsr2gr $zero, $fcsr1
; CHECK-NEXT: ret
entry:
%res = call i32 @llvm.loongarch.movfcsr2gr(i32 1)
Expand Down Expand Up @@ -103,7 +103,7 @@ entry:
define void @csrrd_w_noret() {
; CHECK-LABEL: csrrd_w_noret:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrrd $a0, 1
; CHECK-NEXT: csrrd $zero, 1
; CHECK-NEXT: ret
entry:
%0 = tail call i32 @llvm.loongarch.csrrd.w(i32 1)
Expand Down Expand Up @@ -185,7 +185,7 @@ entry:
define void @iocsrrd_b_noret(i32 %a) {
; CHECK-LABEL: iocsrrd_b_noret:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: iocsrrd.b $a0, $a0
; CHECK-NEXT: iocsrrd.b $zero, $a0
; CHECK-NEXT: ret
entry:
%0 = tail call i32 @llvm.loongarch.iocsrrd.b(i32 %a)
Expand All @@ -195,7 +195,7 @@ entry:
define void @iocsrrd_h_noret(i32 %a) {
; CHECK-LABEL: iocsrrd_h_noret:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: iocsrrd.h $a0, $a0
; CHECK-NEXT: iocsrrd.h $zero, $a0
; CHECK-NEXT: ret
entry:
%0 = tail call i32 @llvm.loongarch.iocsrrd.h(i32 %a)
Expand All @@ -205,7 +205,7 @@ entry:
define void @iocsrrd_w_noret(i32 %a) {
; CHECK-LABEL: iocsrrd_w_noret:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: iocsrrd.w $a0, $a0
; CHECK-NEXT: iocsrrd.w $zero, $a0
; CHECK-NEXT: ret
entry:
%0 = tail call i32 @llvm.loongarch.iocsrrd.w(i32 %a)
Expand Down
44 changes: 22 additions & 22 deletions llvm/test/CodeGen/LoongArch/ir-instruction/br.ll
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ define void @foo_br_eq(i32 %a, ptr %b) nounwind {
; LA32-NEXT: ld.w $a2, $a1, 0
; LA32-NEXT: beq $a2, $a0, .LBB1_2
; LA32-NEXT: # %bb.1: # %test
; LA32-NEXT: ld.w $a0, $a1, 0
; LA32-NEXT: ld.w $zero, $a1, 0
; LA32-NEXT: .LBB1_2: # %end
; LA32-NEXT: ret
;
Expand All @@ -31,7 +31,7 @@ define void @foo_br_eq(i32 %a, ptr %b) nounwind {
; LA64-NEXT: addi.w $a0, $a0, 0
; LA64-NEXT: beq $a2, $a0, .LBB1_2
; LA64-NEXT: # %bb.1: # %test
; LA64-NEXT: ld.w $a0, $a1, 0
; LA64-NEXT: ld.w $zero, $a1, 0
; LA64-NEXT: .LBB1_2: # %end
; LA64-NEXT: ret
%val = load volatile i32, ptr %b
Expand All @@ -51,7 +51,7 @@ define void @foo_br_ne(i32 %a, ptr %b) nounwind {
; LA32-NEXT: ld.w $a2, $a1, 0
; LA32-NEXT: bne $a2, $a0, .LBB2_2
; LA32-NEXT: # %bb.1: # %test
; LA32-NEXT: ld.w $a0, $a1, 0
; LA32-NEXT: ld.w $zero, $a1, 0
; LA32-NEXT: .LBB2_2: # %end
; LA32-NEXT: ret
;
Expand All @@ -61,7 +61,7 @@ define void @foo_br_ne(i32 %a, ptr %b) nounwind {
; LA64-NEXT: addi.w $a0, $a0, 0
; LA64-NEXT: bne $a2, $a0, .LBB2_2
; LA64-NEXT: # %bb.1: # %test
; LA64-NEXT: ld.w $a0, $a1, 0
; LA64-NEXT: ld.w $zero, $a1, 0
; LA64-NEXT: .LBB2_2: # %end
; LA64-NEXT: ret
%val = load volatile i32, ptr %b
Expand All @@ -81,7 +81,7 @@ define void @foo_br_slt(i32 %a, ptr %b) nounwind {
; LA32-NEXT: ld.w $a2, $a1, 0
; LA32-NEXT: blt $a2, $a0, .LBB3_2
; LA32-NEXT: # %bb.1: # %test
; LA32-NEXT: ld.w $a0, $a1, 0
; LA32-NEXT: ld.w $zero, $a1, 0
; LA32-NEXT: .LBB3_2: # %end
; LA32-NEXT: ret
;
Expand All @@ -91,7 +91,7 @@ define void @foo_br_slt(i32 %a, ptr %b) nounwind {
; LA64-NEXT: addi.w $a0, $a0, 0
; LA64-NEXT: blt $a2, $a0, .LBB3_2
; LA64-NEXT: # %bb.1: # %test
; LA64-NEXT: ld.w $a0, $a1, 0
; LA64-NEXT: ld.w $zero, $a1, 0
; LA64-NEXT: .LBB3_2: # %end
; LA64-NEXT: ret
%val = load volatile i32, ptr %b
Expand All @@ -111,7 +111,7 @@ define void @foo_br_sge(i32 %a, ptr %b) nounwind {
; LA32-NEXT: ld.w $a2, $a1, 0
; LA32-NEXT: bge $a2, $a0, .LBB4_2
; LA32-NEXT: # %bb.1: # %test
; LA32-NEXT: ld.w $a0, $a1, 0
; LA32-NEXT: ld.w $zero, $a1, 0
; LA32-NEXT: .LBB4_2: # %end
; LA32-NEXT: ret
;
Expand All @@ -121,7 +121,7 @@ define void @foo_br_sge(i32 %a, ptr %b) nounwind {
; LA64-NEXT: addi.w $a0, $a0, 0
; LA64-NEXT: bge $a2, $a0, .LBB4_2
; LA64-NEXT: # %bb.1: # %test
; LA64-NEXT: ld.w $a0, $a1, 0
; LA64-NEXT: ld.w $zero, $a1, 0
; LA64-NEXT: .LBB4_2: # %end
; LA64-NEXT: ret
%val = load volatile i32, ptr %b
Expand All @@ -141,7 +141,7 @@ define void @foo_br_ult(i32 %a, ptr %b) nounwind {
; LA32-NEXT: ld.w $a2, $a1, 0
; LA32-NEXT: bltu $a2, $a0, .LBB5_2
; LA32-NEXT: # %bb.1: # %test
; LA32-NEXT: ld.w $a0, $a1, 0
; LA32-NEXT: ld.w $zero, $a1, 0
; LA32-NEXT: .LBB5_2: # %end
; LA32-NEXT: ret
;
Expand All @@ -151,7 +151,7 @@ define void @foo_br_ult(i32 %a, ptr %b) nounwind {
; LA64-NEXT: addi.w $a0, $a0, 0
; LA64-NEXT: bltu $a2, $a0, .LBB5_2
; LA64-NEXT: # %bb.1: # %test
; LA64-NEXT: ld.w $a0, $a1, 0
; LA64-NEXT: ld.w $zero, $a1, 0
; LA64-NEXT: .LBB5_2: # %end
; LA64-NEXT: ret
%val = load volatile i32, ptr %b
Expand All @@ -171,7 +171,7 @@ define void @foo_br_uge(i32 %a, ptr %b) nounwind {
; LA32-NEXT: ld.w $a2, $a1, 0
; LA32-NEXT: bgeu $a2, $a0, .LBB6_2
; LA32-NEXT: # %bb.1: # %test
; LA32-NEXT: ld.w $a0, $a1, 0
; LA32-NEXT: ld.w $zero, $a1, 0
; LA32-NEXT: .LBB6_2: # %end
; LA32-NEXT: ret
;
Expand All @@ -181,7 +181,7 @@ define void @foo_br_uge(i32 %a, ptr %b) nounwind {
; LA64-NEXT: addi.w $a0, $a0, 0
; LA64-NEXT: bgeu $a2, $a0, .LBB6_2
; LA64-NEXT: # %bb.1: # %test
; LA64-NEXT: ld.w $a0, $a1, 0
; LA64-NEXT: ld.w $zero, $a1, 0
; LA64-NEXT: .LBB6_2: # %end
; LA64-NEXT: ret
%val = load volatile i32, ptr %b
Expand All @@ -202,7 +202,7 @@ define void @foo_br_sgt(i32 %a, ptr %b) nounwind {
; LA32-NEXT: ld.w $a2, $a1, 0
; LA32-NEXT: blt $a0, $a2, .LBB7_2
; LA32-NEXT: # %bb.1: # %test
; LA32-NEXT: ld.w $a0, $a1, 0
; LA32-NEXT: ld.w $zero, $a1, 0
; LA32-NEXT: .LBB7_2: # %end
; LA32-NEXT: ret
;
Expand All @@ -212,7 +212,7 @@ define void @foo_br_sgt(i32 %a, ptr %b) nounwind {
; LA64-NEXT: addi.w $a0, $a0, 0
; LA64-NEXT: blt $a0, $a2, .LBB7_2
; LA64-NEXT: # %bb.1: # %test
; LA64-NEXT: ld.w $a0, $a1, 0
; LA64-NEXT: ld.w $zero, $a1, 0
; LA64-NEXT: .LBB7_2: # %end
; LA64-NEXT: ret
%val = load volatile i32, ptr %b
Expand All @@ -232,7 +232,7 @@ define void @foo_br_sle(i32 %a, ptr %b) nounwind {
; LA32-NEXT: ld.w $a2, $a1, 0
; LA32-NEXT: bge $a0, $a2, .LBB8_2
; LA32-NEXT: # %bb.1: # %test
; LA32-NEXT: ld.w $a0, $a1, 0
; LA32-NEXT: ld.w $zero, $a1, 0
; LA32-NEXT: .LBB8_2: # %end
; LA32-NEXT: ret
;
Expand All @@ -242,7 +242,7 @@ define void @foo_br_sle(i32 %a, ptr %b) nounwind {
; LA64-NEXT: addi.w $a0, $a0, 0
; LA64-NEXT: bge $a0, $a2, .LBB8_2
; LA64-NEXT: # %bb.1: # %test
; LA64-NEXT: ld.w $a0, $a1, 0
; LA64-NEXT: ld.w $zero, $a1, 0
; LA64-NEXT: .LBB8_2: # %end
; LA64-NEXT: ret
%val = load volatile i32, ptr %b
Expand All @@ -262,7 +262,7 @@ define void @foo_br_ugt(i32 %a, ptr %b) nounwind {
; LA32-NEXT: ld.w $a2, $a1, 0
; LA32-NEXT: bltu $a0, $a2, .LBB9_2
; LA32-NEXT: # %bb.1: # %test
; LA32-NEXT: ld.w $a0, $a1, 0
; LA32-NEXT: ld.w $zero, $a1, 0
; LA32-NEXT: .LBB9_2: # %end
; LA32-NEXT: ret
;
Expand All @@ -272,7 +272,7 @@ define void @foo_br_ugt(i32 %a, ptr %b) nounwind {
; LA64-NEXT: addi.w $a0, $a0, 0
; LA64-NEXT: bltu $a0, $a2, .LBB9_2
; LA64-NEXT: # %bb.1: # %test
; LA64-NEXT: ld.w $a0, $a1, 0
; LA64-NEXT: ld.w $zero, $a1, 0
; LA64-NEXT: .LBB9_2: # %end
; LA64-NEXT: ret
%val = load volatile i32, ptr %b
Expand All @@ -292,7 +292,7 @@ define void @foo_br_ule(i32 %a, ptr %b) nounwind {
; LA32-NEXT: ld.w $a2, $a1, 0
; LA32-NEXT: bgeu $a0, $a2, .LBB10_2
; LA32-NEXT: # %bb.1: # %test
; LA32-NEXT: ld.w $a0, $a1, 0
; LA32-NEXT: ld.w $zero, $a1, 0
; LA32-NEXT: .LBB10_2: # %end
; LA32-NEXT: ret
;
Expand All @@ -302,7 +302,7 @@ define void @foo_br_ule(i32 %a, ptr %b) nounwind {
; LA64-NEXT: addi.w $a0, $a0, 0
; LA64-NEXT: bgeu $a0, $a2, .LBB10_2
; LA64-NEXT: # %bb.1: # %test
; LA64-NEXT: ld.w $a0, $a1, 0
; LA64-NEXT: ld.w $zero, $a1, 0
; LA64-NEXT: .LBB10_2: # %end
; LA64-NEXT: ret
%val = load volatile i32, ptr %b
Expand All @@ -321,11 +321,11 @@ end:
define void @foo_br_cc(ptr %a, i1 %cc) nounwind {
; ALL-LABEL: foo_br_cc:
; ALL: # %bb.0:
; ALL-NEXT: ld.w $a2, $a0, 0
; ALL-NEXT: ld.w $zero, $a0, 0
; ALL-NEXT: andi $a1, $a1, 1
; ALL-NEXT: bnez $a1, .LBB11_2
; ALL-NEXT: # %bb.1: # %test
; ALL-NEXT: ld.w $a0, $a0, 0
; ALL-NEXT: ld.w $zero, $a0, 0
; ALL-NEXT: .LBB11_2: # %end
; ALL-NEXT: ret
%val = load volatile i32, ptr %a
Expand Down
96 changes: 48 additions & 48 deletions llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ define i32 @load_store_global_array(i32 %a) nounwind {
; LA32NOPIC-NEXT: addi.w $a2, $a1, %pc_lo12(arr)
; LA32NOPIC-NEXT: ld.w $a1, $a2, 0
; LA32NOPIC-NEXT: st.w $a0, $a2, 0
; LA32NOPIC-NEXT: ld.w $a3, $a2, 36
; LA32NOPIC-NEXT: ld.w $zero, $a2, 36
; LA32NOPIC-NEXT: st.w $a0, $a2, 36
; LA32NOPIC-NEXT: move $a0, $a1
; LA32NOPIC-NEXT: ret
Expand All @@ -68,7 +68,7 @@ define i32 @load_store_global_array(i32 %a) nounwind {
; LA32PIC-NEXT: addi.w $a2, $a1, %pc_lo12(.Larr$local)
; LA32PIC-NEXT: ld.w $a1, $a2, 0
; LA32PIC-NEXT: st.w $a0, $a2, 0
; LA32PIC-NEXT: ld.w $a3, $a2, 36
; LA32PIC-NEXT: ld.w $zero, $a2, 36
; LA32PIC-NEXT: st.w $a0, $a2, 36
; LA32PIC-NEXT: move $a0, $a1
; LA32PIC-NEXT: ret
Expand All @@ -79,7 +79,7 @@ define i32 @load_store_global_array(i32 %a) nounwind {
; LA64NOPIC-NEXT: addi.d $a2, $a1, %pc_lo12(arr)
; LA64NOPIC-NEXT: ld.w $a1, $a2, 0
; LA64NOPIC-NEXT: st.w $a0, $a2, 0
; LA64NOPIC-NEXT: ld.w $a3, $a2, 36
; LA64NOPIC-NEXT: ld.w $zero, $a2, 36
; LA64NOPIC-NEXT: st.w $a0, $a2, 36
; LA64NOPIC-NEXT: move $a0, $a1
; LA64NOPIC-NEXT: ret
Expand All @@ -90,7 +90,7 @@ define i32 @load_store_global_array(i32 %a) nounwind {
; LA64PIC-NEXT: addi.d $a2, $a1, %pc_lo12(.Larr$local)
; LA64PIC-NEXT: ld.w $a1, $a2, 0
; LA64PIC-NEXT: st.w $a0, $a2, 0
; LA64PIC-NEXT: ld.w $a3, $a2, 36
; LA64PIC-NEXT: ld.w $zero, $a2, 36
; LA64PIC-NEXT: st.w $a0, $a2, 36
; LA64PIC-NEXT: move $a0, $a1
; LA64PIC-NEXT: ret
Expand All @@ -108,30 +108,30 @@ define i64 @ld_b(ptr %a) nounwind {
; LA32NOPIC-LABEL: ld_b:
; LA32NOPIC: # %bb.0:
; LA32NOPIC-NEXT: ld.b $a2, $a0, 1
; LA32NOPIC-NEXT: ld.b $a0, $a0, 0
; LA32NOPIC-NEXT: ld.b $zero, $a0, 0
; LA32NOPIC-NEXT: srai.w $a1, $a2, 31
; LA32NOPIC-NEXT: move $a0, $a2
; LA32NOPIC-NEXT: ret
;
; LA32PIC-LABEL: ld_b:
; LA32PIC: # %bb.0:
; LA32PIC-NEXT: ld.b $a2, $a0, 1
; LA32PIC-NEXT: ld.b $a0, $a0, 0
; LA32PIC-NEXT: ld.b $zero, $a0, 0
; LA32PIC-NEXT: srai.w $a1, $a2, 31
; LA32PIC-NEXT: move $a0, $a2
; LA32PIC-NEXT: ret
;
; LA64NOPIC-LABEL: ld_b:
; LA64NOPIC: # %bb.0:
; LA64NOPIC-NEXT: ld.b $a1, $a0, 1
; LA64NOPIC-NEXT: ld.b $a0, $a0, 0
; LA64NOPIC-NEXT: ld.b $zero, $a0, 0
; LA64NOPIC-NEXT: move $a0, $a1
; LA64NOPIC-NEXT: ret
;
; LA64PIC-LABEL: ld_b:
; LA64PIC: # %bb.0:
; LA64PIC-NEXT: ld.b $a1, $a0, 1
; LA64PIC-NEXT: ld.b $a0, $a0, 0
; LA64PIC-NEXT: ld.b $zero, $a0, 0
; LA64PIC-NEXT: move $a0, $a1
; LA64PIC-NEXT: ret
%1 = getelementptr i8, ptr %a, i64 1
Expand All @@ -145,30 +145,30 @@ define i64 @ld_h(ptr %a) nounwind {
; LA32NOPIC-LABEL: ld_h:
; LA32NOPIC: # %bb.0:
; LA32NOPIC-NEXT: ld.h $a2, $a0, 4
; LA32NOPIC-NEXT: ld.h $a0, $a0, 0
; LA32NOPIC-NEXT: ld.h $zero, $a0, 0
; LA32NOPIC-NEXT: srai.w $a1, $a2, 31
; LA32NOPIC-NEXT: move $a0, $a2
; LA32NOPIC-NEXT: ret
;
; LA32PIC-LABEL: ld_h:
; LA32PIC: # %bb.0:
; LA32PIC-NEXT: ld.h $a2, $a0, 4
; LA32PIC-NEXT: ld.h $a0, $a0, 0
; LA32PIC-NEXT: ld.h $zero, $a0, 0
; LA32PIC-NEXT: srai.w $a1, $a2, 31
; LA32PIC-NEXT: move $a0, $a2
; LA32PIC-NEXT: ret
;
; LA64NOPIC-LABEL: ld_h:
; LA64NOPIC: # %bb.0:
; LA64NOPIC-NEXT: ld.h $a1, $a0, 4
; LA64NOPIC-NEXT: ld.h $a0, $a0, 0
; LA64NOPIC-NEXT: ld.h $zero, $a0, 0
; LA64NOPIC-NEXT: move $a0, $a1
; LA64NOPIC-NEXT: ret
;
; LA64PIC-LABEL: ld_h:
; LA64PIC: # %bb.0:
; LA64PIC-NEXT: ld.h $a1, $a0, 4
; LA64PIC-NEXT: ld.h $a0, $a0, 0
; LA64PIC-NEXT: ld.h $zero, $a0, 0
; LA64PIC-NEXT: move $a0, $a1
; LA64PIC-NEXT: ret
%1 = getelementptr i16, ptr %a, i64 2
Expand All @@ -182,30 +182,30 @@ define i64 @ld_w(ptr %a) nounwind {
; LA32NOPIC-LABEL: ld_w:
; LA32NOPIC: # %bb.0:
; LA32NOPIC-NEXT: ld.w $a2, $a0, 12
; LA32NOPIC-NEXT: ld.w $a0, $a0, 0
; LA32NOPIC-NEXT: ld.w $zero, $a0, 0
; LA32NOPIC-NEXT: srai.w $a1, $a2, 31
; LA32NOPIC-NEXT: move $a0, $a2
; LA32NOPIC-NEXT: ret
;
; LA32PIC-LABEL: ld_w:
; LA32PIC: # %bb.0:
; LA32PIC-NEXT: ld.w $a2, $a0, 12
; LA32PIC-NEXT: ld.w $a0, $a0, 0
; LA32PIC-NEXT: ld.w $zero, $a0, 0
; LA32PIC-NEXT: srai.w $a1, $a2, 31
; LA32PIC-NEXT: move $a0, $a2
; LA32PIC-NEXT: ret
;
; LA64NOPIC-LABEL: ld_w:
; LA64NOPIC: # %bb.0:
; LA64NOPIC-NEXT: ld.w $a1, $a0, 12
; LA64NOPIC-NEXT: ld.w $a0, $a0, 0
; LA64NOPIC-NEXT: ld.w $zero, $a0, 0
; LA64NOPIC-NEXT: move $a0, $a1
; LA64NOPIC-NEXT: ret
;
; LA64PIC-LABEL: ld_w:
; LA64PIC: # %bb.0:
; LA64PIC-NEXT: ld.w $a1, $a0, 12
; LA64PIC-NEXT: ld.w $a0, $a0, 0
; LA64PIC-NEXT: ld.w $zero, $a0, 0
; LA64PIC-NEXT: move $a0, $a1
; LA64PIC-NEXT: ret
%1 = getelementptr i32, ptr %a, i64 3
Expand All @@ -220,31 +220,31 @@ define i64 @ld_d(ptr %a) nounwind {
; LA32NOPIC: # %bb.0:
; LA32NOPIC-NEXT: ld.w $a1, $a0, 28
; LA32NOPIC-NEXT: ld.w $a2, $a0, 24
; LA32NOPIC-NEXT: ld.w $a3, $a0, 4
; LA32NOPIC-NEXT: ld.w $a0, $a0, 0
; LA32NOPIC-NEXT: ld.w $zero, $a0, 4
; LA32NOPIC-NEXT: ld.w $zero, $a0, 0
; LA32NOPIC-NEXT: move $a0, $a2
; LA32NOPIC-NEXT: ret
;
; LA32PIC-LABEL: ld_d:
; LA32PIC: # %bb.0:
; LA32PIC-NEXT: ld.w $a1, $a0, 28
; LA32PIC-NEXT: ld.w $a2, $a0, 24
; LA32PIC-NEXT: ld.w $a3, $a0, 4
; LA32PIC-NEXT: ld.w $a0, $a0, 0
; LA32PIC-NEXT: ld.w $zero, $a0, 4
; LA32PIC-NEXT: ld.w $zero, $a0, 0
; LA32PIC-NEXT: move $a0, $a2
; LA32PIC-NEXT: ret
;
; LA64NOPIC-LABEL: ld_d:
; LA64NOPIC: # %bb.0:
; LA64NOPIC-NEXT: ld.d $a1, $a0, 24
; LA64NOPIC-NEXT: ld.d $a0, $a0, 0
; LA64NOPIC-NEXT: ld.d $zero, $a0, 0
; LA64NOPIC-NEXT: move $a0, $a1
; LA64NOPIC-NEXT: ret
;
; LA64PIC-LABEL: ld_d:
; LA64PIC: # %bb.0:
; LA64PIC-NEXT: ld.d $a1, $a0, 24
; LA64PIC-NEXT: ld.d $a0, $a0, 0
; LA64PIC-NEXT: ld.d $zero, $a0, 0
; LA64PIC-NEXT: move $a0, $a1
; LA64PIC-NEXT: ret
%1 = getelementptr i64, ptr %a, i64 3
Expand Down Expand Up @@ -375,7 +375,7 @@ define i64 @ldx_b(ptr %a, i64 %idx) nounwind {
; LA32NOPIC: # %bb.0:
; LA32NOPIC-NEXT: add.w $a1, $a0, $a1
; LA32NOPIC-NEXT: ld.b $a2, $a1, 0
; LA32NOPIC-NEXT: ld.b $a0, $a0, 0
; LA32NOPIC-NEXT: ld.b $zero, $a0, 0
; LA32NOPIC-NEXT: srai.w $a1, $a2, 31
; LA32NOPIC-NEXT: move $a0, $a2
; LA32NOPIC-NEXT: ret
Expand All @@ -384,22 +384,22 @@ define i64 @ldx_b(ptr %a, i64 %idx) nounwind {
; LA32PIC: # %bb.0:
; LA32PIC-NEXT: add.w $a1, $a0, $a1
; LA32PIC-NEXT: ld.b $a2, $a1, 0
; LA32PIC-NEXT: ld.b $a0, $a0, 0
; LA32PIC-NEXT: ld.b $zero, $a0, 0
; LA32PIC-NEXT: srai.w $a1, $a2, 31
; LA32PIC-NEXT: move $a0, $a2
; LA32PIC-NEXT: ret
;
; LA64NOPIC-LABEL: ldx_b:
; LA64NOPIC: # %bb.0:
; LA64NOPIC-NEXT: ldx.b $a1, $a0, $a1
; LA64NOPIC-NEXT: ld.b $a0, $a0, 0
; LA64NOPIC-NEXT: ld.b $zero, $a0, 0
; LA64NOPIC-NEXT: move $a0, $a1
; LA64NOPIC-NEXT: ret
;
; LA64PIC-LABEL: ldx_b:
; LA64PIC: # %bb.0:
; LA64PIC-NEXT: ldx.b $a1, $a0, $a1
; LA64PIC-NEXT: ld.b $a0, $a0, 0
; LA64PIC-NEXT: ld.b $zero, $a0, 0
; LA64PIC-NEXT: move $a0, $a1
; LA64PIC-NEXT: ret
%1 = getelementptr i8, ptr %a, i64 %idx
Expand All @@ -414,7 +414,7 @@ define i64 @ldx_h(ptr %a, i64 %idx) nounwind {
; LA32NOPIC: # %bb.0:
; LA32NOPIC-NEXT: alsl.w $a1, $a1, $a0, 1
; LA32NOPIC-NEXT: ld.h $a2, $a1, 0
; LA32NOPIC-NEXT: ld.h $a0, $a0, 0
; LA32NOPIC-NEXT: ld.h $zero, $a0, 0
; LA32NOPIC-NEXT: srai.w $a1, $a2, 31
; LA32NOPIC-NEXT: move $a0, $a2
; LA32NOPIC-NEXT: ret
Expand All @@ -423,7 +423,7 @@ define i64 @ldx_h(ptr %a, i64 %idx) nounwind {
; LA32PIC: # %bb.0:
; LA32PIC-NEXT: alsl.w $a1, $a1, $a0, 1
; LA32PIC-NEXT: ld.h $a2, $a1, 0
; LA32PIC-NEXT: ld.h $a0, $a0, 0
; LA32PIC-NEXT: ld.h $zero, $a0, 0
; LA32PIC-NEXT: srai.w $a1, $a2, 31
; LA32PIC-NEXT: move $a0, $a2
; LA32PIC-NEXT: ret
Expand All @@ -432,15 +432,15 @@ define i64 @ldx_h(ptr %a, i64 %idx) nounwind {
; LA64NOPIC: # %bb.0:
; LA64NOPIC-NEXT: slli.d $a1, $a1, 1
; LA64NOPIC-NEXT: ldx.h $a1, $a0, $a1
; LA64NOPIC-NEXT: ld.h $a0, $a0, 0
; LA64NOPIC-NEXT: ld.h $zero, $a0, 0
; LA64NOPIC-NEXT: move $a0, $a1
; LA64NOPIC-NEXT: ret
;
; LA64PIC-LABEL: ldx_h:
; LA64PIC: # %bb.0:
; LA64PIC-NEXT: slli.d $a1, $a1, 1
; LA64PIC-NEXT: ldx.h $a1, $a0, $a1
; LA64PIC-NEXT: ld.h $a0, $a0, 0
; LA64PIC-NEXT: ld.h $zero, $a0, 0
; LA64PIC-NEXT: move $a0, $a1
; LA64PIC-NEXT: ret
%1 = getelementptr i16, ptr %a, i64 %idx
Expand All @@ -455,7 +455,7 @@ define i64 @ldx_w(ptr %a, i64 %idx) nounwind {
; LA32NOPIC: # %bb.0:
; LA32NOPIC-NEXT: alsl.w $a1, $a1, $a0, 2
; LA32NOPIC-NEXT: ld.w $a2, $a1, 0
; LA32NOPIC-NEXT: ld.w $a0, $a0, 0
; LA32NOPIC-NEXT: ld.w $zero, $a0, 0
; LA32NOPIC-NEXT: srai.w $a1, $a2, 31
; LA32NOPIC-NEXT: move $a0, $a2
; LA32NOPIC-NEXT: ret
Expand All @@ -464,7 +464,7 @@ define i64 @ldx_w(ptr %a, i64 %idx) nounwind {
; LA32PIC: # %bb.0:
; LA32PIC-NEXT: alsl.w $a1, $a1, $a0, 2
; LA32PIC-NEXT: ld.w $a2, $a1, 0
; LA32PIC-NEXT: ld.w $a0, $a0, 0
; LA32PIC-NEXT: ld.w $zero, $a0, 0
; LA32PIC-NEXT: srai.w $a1, $a2, 31
; LA32PIC-NEXT: move $a0, $a2
; LA32PIC-NEXT: ret
Expand All @@ -473,15 +473,15 @@ define i64 @ldx_w(ptr %a, i64 %idx) nounwind {
; LA64NOPIC: # %bb.0:
; LA64NOPIC-NEXT: slli.d $a1, $a1, 2
; LA64NOPIC-NEXT: ldx.w $a1, $a0, $a1
; LA64NOPIC-NEXT: ld.w $a0, $a0, 0
; LA64NOPIC-NEXT: ld.w $zero, $a0, 0
; LA64NOPIC-NEXT: move $a0, $a1
; LA64NOPIC-NEXT: ret
;
; LA64PIC-LABEL: ldx_w:
; LA64PIC: # %bb.0:
; LA64PIC-NEXT: slli.d $a1, $a1, 2
; LA64PIC-NEXT: ldx.w $a1, $a0, $a1
; LA64PIC-NEXT: ld.w $a0, $a0, 0
; LA64PIC-NEXT: ld.w $zero, $a0, 0
; LA64PIC-NEXT: move $a0, $a1
; LA64PIC-NEXT: ret
%1 = getelementptr i32, ptr %a, i64 %idx
Expand All @@ -497,8 +497,8 @@ define i64 @ldx_d(ptr %a, i64 %idx) nounwind {
; LA32NOPIC-NEXT: alsl.w $a1, $a1, $a0, 3
; LA32NOPIC-NEXT: ld.w $a2, $a1, 0
; LA32NOPIC-NEXT: ld.w $a1, $a1, 4
; LA32NOPIC-NEXT: ld.w $a3, $a0, 0
; LA32NOPIC-NEXT: ld.w $a0, $a0, 4
; LA32NOPIC-NEXT: ld.w $zero, $a0, 0
; LA32NOPIC-NEXT: ld.w $zero, $a0, 4
; LA32NOPIC-NEXT: move $a0, $a2
; LA32NOPIC-NEXT: ret
;
Expand All @@ -507,24 +507,24 @@ define i64 @ldx_d(ptr %a, i64 %idx) nounwind {
; LA32PIC-NEXT: alsl.w $a1, $a1, $a0, 3
; LA32PIC-NEXT: ld.w $a2, $a1, 0
; LA32PIC-NEXT: ld.w $a1, $a1, 4
; LA32PIC-NEXT: ld.w $a3, $a0, 0
; LA32PIC-NEXT: ld.w $a0, $a0, 4
; LA32PIC-NEXT: ld.w $zero, $a0, 0
; LA32PIC-NEXT: ld.w $zero, $a0, 4
; LA32PIC-NEXT: move $a0, $a2
; LA32PIC-NEXT: ret
;
; LA64NOPIC-LABEL: ldx_d:
; LA64NOPIC: # %bb.0:
; LA64NOPIC-NEXT: slli.d $a1, $a1, 3
; LA64NOPIC-NEXT: ldx.d $a1, $a0, $a1
; LA64NOPIC-NEXT: ld.d $a0, $a0, 0
; LA64NOPIC-NEXT: ld.d $zero, $a0, 0
; LA64NOPIC-NEXT: move $a0, $a1
; LA64NOPIC-NEXT: ret
;
; LA64PIC-LABEL: ldx_d:
; LA64PIC: # %bb.0:
; LA64PIC-NEXT: slli.d $a1, $a1, 3
; LA64PIC-NEXT: ldx.d $a1, $a0, $a1
; LA64PIC-NEXT: ld.d $a0, $a0, 0
; LA64PIC-NEXT: ld.d $zero, $a0, 0
; LA64PIC-NEXT: move $a0, $a1
; LA64PIC-NEXT: ret
%1 = getelementptr i64, ptr %a, i64 %idx
Expand Down Expand Up @@ -855,7 +855,7 @@ define i64 @load_sext_zext_anyext_i1(ptr %a) nounwind {
; LA32NOPIC-NEXT: ld.bu $a1, $a0, 1
; LA32NOPIC-NEXT: ld.bu $a3, $a0, 2
; LA32NOPIC-NEXT: sub.w $a2, $a3, $a1
; LA32NOPIC-NEXT: ld.b $a0, $a0, 0
; LA32NOPIC-NEXT: ld.b $zero, $a0, 0
; LA32NOPIC-NEXT: sltu $a0, $a3, $a1
; LA32NOPIC-NEXT: sub.w $a1, $zero, $a0
; LA32NOPIC-NEXT: move $a0, $a2
Expand All @@ -866,7 +866,7 @@ define i64 @load_sext_zext_anyext_i1(ptr %a) nounwind {
; LA32PIC-NEXT: ld.bu $a1, $a0, 1
; LA32PIC-NEXT: ld.bu $a3, $a0, 2
; LA32PIC-NEXT: sub.w $a2, $a3, $a1
; LA32PIC-NEXT: ld.b $a0, $a0, 0
; LA32PIC-NEXT: ld.b $zero, $a0, 0
; LA32PIC-NEXT: sltu $a0, $a3, $a1
; LA32PIC-NEXT: sub.w $a1, $zero, $a0
; LA32PIC-NEXT: move $a0, $a2
Expand All @@ -876,15 +876,15 @@ define i64 @load_sext_zext_anyext_i1(ptr %a) nounwind {
; LA64NOPIC: # %bb.0:
; LA64NOPIC-NEXT: ld.bu $a1, $a0, 1
; LA64NOPIC-NEXT: ld.bu $a2, $a0, 2
; LA64NOPIC-NEXT: ld.b $a0, $a0, 0
; LA64NOPIC-NEXT: ld.b $zero, $a0, 0
; LA64NOPIC-NEXT: sub.d $a0, $a2, $a1
; LA64NOPIC-NEXT: ret
;
; LA64PIC-LABEL: load_sext_zext_anyext_i1:
; LA64PIC: # %bb.0:
; LA64PIC-NEXT: ld.bu $a1, $a0, 1
; LA64PIC-NEXT: ld.bu $a2, $a0, 2
; LA64PIC-NEXT: ld.b $a0, $a0, 0
; LA64PIC-NEXT: ld.b $zero, $a0, 0
; LA64PIC-NEXT: sub.d $a0, $a2, $a1
; LA64PIC-NEXT: ret
;; sextload i1
Expand All @@ -906,31 +906,31 @@ define i16 @load_sext_zext_anyext_i1_i16(ptr %a) nounwind {
; LA32NOPIC: # %bb.0:
; LA32NOPIC-NEXT: ld.bu $a1, $a0, 1
; LA32NOPIC-NEXT: ld.bu $a2, $a0, 2
; LA32NOPIC-NEXT: ld.b $a0, $a0, 0
; LA32NOPIC-NEXT: ld.b $zero, $a0, 0
; LA32NOPIC-NEXT: sub.w $a0, $a2, $a1
; LA32NOPIC-NEXT: ret
;
; LA32PIC-LABEL: load_sext_zext_anyext_i1_i16:
; LA32PIC: # %bb.0:
; LA32PIC-NEXT: ld.bu $a1, $a0, 1
; LA32PIC-NEXT: ld.bu $a2, $a0, 2
; LA32PIC-NEXT: ld.b $a0, $a0, 0
; LA32PIC-NEXT: ld.b $zero, $a0, 0
; LA32PIC-NEXT: sub.w $a0, $a2, $a1
; LA32PIC-NEXT: ret
;
; LA64NOPIC-LABEL: load_sext_zext_anyext_i1_i16:
; LA64NOPIC: # %bb.0:
; LA64NOPIC-NEXT: ld.bu $a1, $a0, 1
; LA64NOPIC-NEXT: ld.bu $a2, $a0, 2
; LA64NOPIC-NEXT: ld.b $a0, $a0, 0
; LA64NOPIC-NEXT: ld.b $zero, $a0, 0
; LA64NOPIC-NEXT: sub.d $a0, $a2, $a1
; LA64NOPIC-NEXT: ret
;
; LA64PIC-LABEL: load_sext_zext_anyext_i1_i16:
; LA64PIC: # %bb.0:
; LA64PIC-NEXT: ld.bu $a1, $a0, 1
; LA64PIC-NEXT: ld.bu $a2, $a0, 2
; LA64PIC-NEXT: ld.b $a0, $a0, 0
; LA64PIC-NEXT: ld.b $zero, $a0, 0
; LA64PIC-NEXT: sub.d $a0, $a2, $a1
; LA64PIC-NEXT: ret
;; sextload i1
Expand Down
458 changes: 458 additions & 0 deletions llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll

Large diffs are not rendered by default.

40 changes: 20 additions & 20 deletions llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll
Original file line number Diff line number Diff line change
Expand Up @@ -27,22 +27,22 @@ define void @foo() nounwind {
; MEDIUM_NO_SCH-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
; MEDIUM_NO_SCH-NEXT: pcalau12i $a0, %got_pc_hi20(G)
; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, %got_pc_lo12(G)
; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, 0
; MEDIUM_NO_SCH-NEXT: ld.d $zero, $a0, 0
; MEDIUM_NO_SCH-NEXT: pcalau12i $a0, %pc_hi20(g)
; MEDIUM_NO_SCH-NEXT: addi.d $a0, $a0, %pc_lo12(g)
; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, 0
; MEDIUM_NO_SCH-NEXT: ld.d $zero, $a0, 0
; MEDIUM_NO_SCH-NEXT: ori $a0, $zero, 1
; MEDIUM_NO_SCH-NEXT: pcaddu18i $ra, %call36(bar)
; MEDIUM_NO_SCH-NEXT: jirl $ra, $ra, 0
; MEDIUM_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(gd)
; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(gd)
; MEDIUM_NO_SCH-NEXT: ldx.d $a0, $a0, $tp
; MEDIUM_NO_SCH-NEXT: ldx.d $zero, $a0, $tp
; MEDIUM_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ld)
; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ld)
; MEDIUM_NO_SCH-NEXT: ldx.d $a0, $a0, $tp
; MEDIUM_NO_SCH-NEXT: ldx.d $zero, $a0, $tp
; MEDIUM_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ie)
; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ie)
; MEDIUM_NO_SCH-NEXT: ldx.d $a0, $a0, $tp
; MEDIUM_NO_SCH-NEXT: ldx.d $zero, $a0, $tp
; MEDIUM_NO_SCH-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
; MEDIUM_NO_SCH-NEXT: addi.d $sp, $sp, 16
; MEDIUM_NO_SCH-NEXT: ret
Expand All @@ -53,22 +53,22 @@ define void @foo() nounwind {
; MEDIUM_SCH-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
; MEDIUM_SCH-NEXT: pcalau12i $a0, %got_pc_hi20(G)
; MEDIUM_SCH-NEXT: ld.d $a0, $a0, %got_pc_lo12(G)
; MEDIUM_SCH-NEXT: ld.d $a0, $a0, 0
; MEDIUM_SCH-NEXT: ld.d $zero, $a0, 0
; MEDIUM_SCH-NEXT: pcalau12i $a0, %pc_hi20(g)
; MEDIUM_SCH-NEXT: addi.d $a0, $a0, %pc_lo12(g)
; MEDIUM_SCH-NEXT: ld.d $a0, $a0, 0
; MEDIUM_SCH-NEXT: ld.d $zero, $a0, 0
; MEDIUM_SCH-NEXT: ori $a0, $zero, 1
; MEDIUM_SCH-NEXT: pcaddu18i $ra, %call36(bar)
; MEDIUM_SCH-NEXT: jirl $ra, $ra, 0
; MEDIUM_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(gd)
; MEDIUM_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(gd)
; MEDIUM_SCH-NEXT: ldx.d $a0, $a0, $tp
; MEDIUM_SCH-NEXT: ldx.d $zero, $a0, $tp
; MEDIUM_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ld)
; MEDIUM_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ld)
; MEDIUM_SCH-NEXT: ldx.d $a0, $a0, $tp
; MEDIUM_SCH-NEXT: ldx.d $zero, $a0, $tp
; MEDIUM_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ie)
; MEDIUM_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ie)
; MEDIUM_SCH-NEXT: ldx.d $a0, $a0, $tp
; MEDIUM_SCH-NEXT: ldx.d $zero, $a0, $tp
; MEDIUM_SCH-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
; MEDIUM_SCH-NEXT: addi.d $sp, $sp, 16
; MEDIUM_SCH-NEXT: ret
Expand All @@ -82,13 +82,13 @@ define void @foo() nounwind {
; LARGE_NO_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(G)
; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(G)
; LARGE_NO_SCH-NEXT: ldx.d $a0, $t8, $a0
; LARGE_NO_SCH-NEXT: ld.d $a0, $a0, 0
; LARGE_NO_SCH-NEXT: ld.d $zero, $a0, 0
; LARGE_NO_SCH-NEXT: pcalau12i $a0, %pc_hi20(g)
; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %pc_lo12(g)
; LARGE_NO_SCH-NEXT: lu32i.d $t8, %pc64_lo20(g)
; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %pc64_hi12(g)
; LARGE_NO_SCH-NEXT: add.d $a0, $t8, $a0
; LARGE_NO_SCH-NEXT: ld.d $a0, $a0, 0
; LARGE_NO_SCH-NEXT: ld.d $zero, $a0, 0
; LARGE_NO_SCH-NEXT: ori $a0, $zero, 1
; LARGE_NO_SCH-NEXT: pcalau12i $ra, %got_pc_hi20(bar)
; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %got_pc_lo12(bar)
Expand All @@ -101,19 +101,19 @@ define void @foo() nounwind {
; LARGE_NO_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(gd)
; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(gd)
; LARGE_NO_SCH-NEXT: ldx.d $a0, $t8, $a0
; LARGE_NO_SCH-NEXT: ldx.d $a0, $a0, $tp
; LARGE_NO_SCH-NEXT: ldx.d $zero, $a0, $tp
; LARGE_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ld)
; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ld)
; LARGE_NO_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(ld)
; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ld)
; LARGE_NO_SCH-NEXT: ldx.d $a0, $t8, $a0
; LARGE_NO_SCH-NEXT: ldx.d $a0, $a0, $tp
; LARGE_NO_SCH-NEXT: ldx.d $zero, $a0, $tp
; LARGE_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ie)
; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ie)
; LARGE_NO_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(ie)
; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ie)
; LARGE_NO_SCH-NEXT: ldx.d $a0, $t8, $a0
; LARGE_NO_SCH-NEXT: ldx.d $a0, $a0, $tp
; LARGE_NO_SCH-NEXT: ldx.d $zero, $a0, $tp
; LARGE_NO_SCH-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
; LARGE_NO_SCH-NEXT: addi.d $sp, $sp, 16
; LARGE_NO_SCH-NEXT: ret
Expand All @@ -127,13 +127,13 @@ define void @foo() nounwind {
; LARGE_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(G)
; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(G)
; LARGE_SCH-NEXT: ldx.d $a0, $t8, $a0
; LARGE_SCH-NEXT: ld.d $a0, $a0, 0
; LARGE_SCH-NEXT: ld.d $zero, $a0, 0
; LARGE_SCH-NEXT: pcalau12i $a0, %pc_hi20(g)
; LARGE_SCH-NEXT: addi.d $t8, $zero, %pc_lo12(g)
; LARGE_SCH-NEXT: lu32i.d $t8, %pc64_lo20(g)
; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %pc64_hi12(g)
; LARGE_SCH-NEXT: add.d $a0, $t8, $a0
; LARGE_SCH-NEXT: ld.d $a0, $a0, 0
; LARGE_SCH-NEXT: ld.d $zero, $a0, 0
; LARGE_SCH-NEXT: ori $a0, $zero, 1
; LARGE_SCH-NEXT: pcalau12i $ra, %got_pc_hi20(bar)
; LARGE_SCH-NEXT: addi.d $t8, $zero, %got_pc_lo12(bar)
Expand All @@ -146,19 +146,19 @@ define void @foo() nounwind {
; LARGE_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(gd)
; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(gd)
; LARGE_SCH-NEXT: ldx.d $a0, $t8, $a0
; LARGE_SCH-NEXT: ldx.d $a0, $a0, $tp
; LARGE_SCH-NEXT: ldx.d $zero, $a0, $tp
; LARGE_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ld)
; LARGE_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ld)
; LARGE_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(ld)
; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ld)
; LARGE_SCH-NEXT: ldx.d $a0, $t8, $a0
; LARGE_SCH-NEXT: ldx.d $a0, $a0, $tp
; LARGE_SCH-NEXT: ldx.d $zero, $a0, $tp
; LARGE_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ie)
; LARGE_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ie)
; LARGE_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(ie)
; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ie)
; LARGE_SCH-NEXT: ldx.d $a0, $t8, $a0
; LARGE_SCH-NEXT: ldx.d $a0, $a0, $tp
; LARGE_SCH-NEXT: ldx.d $zero, $a0, $tp
; LARGE_SCH-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
; LARGE_SCH-NEXT: addi.d $sp, $sp, 16
; LARGE_SCH-NEXT: ret
Expand Down
73 changes: 34 additions & 39 deletions llvm/test/CodeGen/LoongArch/vector-fp-imm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -742,12 +742,10 @@ define void @test_d8(ptr %P, ptr %S) nounwind {
; LA32F-NEXT: st.w $a2, $sp, 32 # 4-byte Folded Spill
; LA32F-NEXT: ld.w $a2, $a0, 52
; LA32F-NEXT: st.w $a2, $sp, 28 # 4-byte Folded Spill
; LA32F-NEXT: ld.w $a2, $a0, 40
; LA32F-NEXT: st.w $a2, $sp, 16 # 4-byte Folded Spill
; LA32F-NEXT: ld.w $s8, $a0, 40
; LA32F-NEXT: ld.w $a2, $a0, 44
; LA32F-NEXT: st.w $a2, $sp, 12 # 4-byte Folded Spill
; LA32F-NEXT: ld.w $a2, $a0, 32
; LA32F-NEXT: st.w $a2, $sp, 0 # 4-byte Folded Spill
; LA32F-NEXT: st.w $a2, $sp, 16 # 4-byte Folded Spill
; LA32F-NEXT: ld.w $s3, $a0, 32
; LA32F-NEXT: ld.w $s4, $a0, 36
; LA32F-NEXT: ld.w $s5, $a0, 24
; LA32F-NEXT: ld.w $s6, $a0, 28
Expand All @@ -756,80 +754,77 @@ define void @test_d8(ptr %P, ptr %S) nounwind {
; LA32F-NEXT: ld.w $s7, $a0, 8
; LA32F-NEXT: ld.w $s0, $a0, 12
; LA32F-NEXT: ld.w $a2, $a0, 0
; LA32F-NEXT: ld.w $a3, $a0, 4
; LA32F-NEXT: ld.w $a4, $a0, 4
; LA32F-NEXT: move $fp, $a1
; LA32F-NEXT: lu12i.w $s8, 261888
; LA32F-NEXT: lu12i.w $a3, 261888
; LA32F-NEXT: move $a0, $a2
; LA32F-NEXT: move $a1, $a3
; LA32F-NEXT: move $a1, $a4
; LA32F-NEXT: move $a2, $zero
; LA32F-NEXT: move $a3, $s8
; LA32F-NEXT: bl %plt(__adddf3)
; LA32F-NEXT: st.w $a0, $sp, 40 # 4-byte Folded Spill
; LA32F-NEXT: st.w $a1, $sp, 36 # 4-byte Folded Spill
; LA32F-NEXT: lu12i.w $s3, 262144
; LA32F-NEXT: lu12i.w $a3, 262144
; LA32F-NEXT: move $a0, $s7
; LA32F-NEXT: move $a1, $s0
; LA32F-NEXT: move $a2, $zero
; LA32F-NEXT: move $a3, $s3
; LA32F-NEXT: move $s0, $a3
; LA32F-NEXT: bl %plt(__adddf3)
; LA32F-NEXT: st.w $a0, $sp, 24 # 4-byte Folded Spill
; LA32F-NEXT: st.w $a1, $sp, 20 # 4-byte Folded Spill
; LA32F-NEXT: lu12i.w $s0, 262272
; LA32F-NEXT: lu12i.w $s7, 262272
; LA32F-NEXT: move $a0, $s1
; LA32F-NEXT: move $a1, $s2
; LA32F-NEXT: move $a2, $zero
; LA32F-NEXT: move $a3, $s0
; LA32F-NEXT: move $a3, $s7
; LA32F-NEXT: bl %plt(__adddf3)
; LA32F-NEXT: st.w $a0, $sp, 8 # 4-byte Folded Spill
; LA32F-NEXT: st.w $a1, $sp, 4 # 4-byte Folded Spill
; LA32F-NEXT: lu12i.w $s7, 262400
; LA32F-NEXT: st.w $a0, $sp, 12 # 4-byte Folded Spill
; LA32F-NEXT: move $s2, $a1
; LA32F-NEXT: lu12i.w $a3, 262400
; LA32F-NEXT: move $a0, $s5
; LA32F-NEXT: move $a1, $s6
; LA32F-NEXT: move $a2, $zero
; LA32F-NEXT: move $a3, $s7
; LA32F-NEXT: bl %plt(__adddf3)
; LA32F-NEXT: move $s5, $a0
; LA32F-NEXT: move $s6, $a1
; LA32F-NEXT: ld.w $a0, $sp, 0 # 4-byte Folded Reload
; LA32F-NEXT: move $a0, $s3
; LA32F-NEXT: move $a1, $s4
; LA32F-NEXT: move $a2, $zero
; LA32F-NEXT: move $a3, $s8
; LA32F-NEXT: lu12i.w $a3, 261888
; LA32F-NEXT: bl %plt(__adddf3)
; LA32F-NEXT: move $s4, $a0
; LA32F-NEXT: move $s8, $a1
; LA32F-NEXT: ld.w $a0, $sp, 16 # 4-byte Folded Reload
; LA32F-NEXT: ld.w $a1, $sp, 12 # 4-byte Folded Reload
; LA32F-NEXT: move $s3, $a0
; LA32F-NEXT: move $s4, $a1
; LA32F-NEXT: move $a0, $s8
; LA32F-NEXT: ld.w $a1, $sp, 16 # 4-byte Folded Reload
; LA32F-NEXT: move $a2, $zero
; LA32F-NEXT: move $a3, $s3
; LA32F-NEXT: move $a3, $s0
; LA32F-NEXT: bl %plt(__adddf3)
; LA32F-NEXT: move $s3, $a0
; LA32F-NEXT: move $s1, $a1
; LA32F-NEXT: move $s8, $a0
; LA32F-NEXT: move $s0, $a1
; LA32F-NEXT: ld.w $a0, $sp, 32 # 4-byte Folded Reload
; LA32F-NEXT: ld.w $a1, $sp, 28 # 4-byte Folded Reload
; LA32F-NEXT: move $a2, $zero
; LA32F-NEXT: move $a3, $s0
; LA32F-NEXT: move $a3, $s7
; LA32F-NEXT: bl %plt(__adddf3)
; LA32F-NEXT: move $s0, $a0
; LA32F-NEXT: move $s2, $a1
; LA32F-NEXT: move $s7, $a0
; LA32F-NEXT: move $s1, $a1
; LA32F-NEXT: ld.w $a0, $sp, 48 # 4-byte Folded Reload
; LA32F-NEXT: ld.w $a1, $sp, 44 # 4-byte Folded Reload
; LA32F-NEXT: move $a2, $zero
; LA32F-NEXT: move $a3, $s7
; LA32F-NEXT: lu12i.w $a3, 262400
; LA32F-NEXT: bl %plt(__adddf3)
; LA32F-NEXT: st.w $a0, $fp, 56
; LA32F-NEXT: st.w $a1, $fp, 60
; LA32F-NEXT: st.w $s0, $fp, 48
; LA32F-NEXT: st.w $s2, $fp, 52
; LA32F-NEXT: st.w $s3, $fp, 40
; LA32F-NEXT: st.w $s1, $fp, 44
; LA32F-NEXT: st.w $s4, $fp, 32
; LA32F-NEXT: st.w $s8, $fp, 36
; LA32F-NEXT: st.w $s7, $fp, 48
; LA32F-NEXT: st.w $s1, $fp, 52
; LA32F-NEXT: st.w $s8, $fp, 40
; LA32F-NEXT: st.w $s0, $fp, 44
; LA32F-NEXT: st.w $s3, $fp, 32
; LA32F-NEXT: st.w $s4, $fp, 36
; LA32F-NEXT: st.w $s5, $fp, 24
; LA32F-NEXT: st.w $s6, $fp, 28
; LA32F-NEXT: ld.w $a0, $sp, 8 # 4-byte Folded Reload
; LA32F-NEXT: ld.w $a0, $sp, 12 # 4-byte Folded Reload
; LA32F-NEXT: st.w $a0, $fp, 16
; LA32F-NEXT: ld.w $a0, $sp, 4 # 4-byte Folded Reload
; LA32F-NEXT: st.w $a0, $fp, 20
; LA32F-NEXT: st.w $s2, $fp, 20
; LA32F-NEXT: ld.w $a0, $sp, 24 # 4-byte Folded Reload
; LA32F-NEXT: st.w $a0, $fp, 8
; LA32F-NEXT: ld.w $a0, $sp, 20 # 4-byte Folded Reload
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/MIR/Generic/runPass.mir
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# RUN: llc -run-pass=greedy -debug-pass=Arguments -o - %s | FileCheck %s
# RUN: llc -run-pass=regallocbasic -debug-pass=Arguments -o - %s | FileCheck %s
# RUN: llc -run-pass=regallocfast -debug-pass=Arguments -o - %s | FileCheck %s
# RUN: llc -passes=regallocfast -o - %s | FileCheck %s

# Check that passes are initialized correctly, so that it's possible to
# use -run-pass.
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/PowerPC/spill-nor0.mir
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# RUN: llc -o - %s -mtriple=powerpc64-- -run-pass=regallocfast | FileCheck %s
# RUN: llc -o - %s -mtriple=powerpc64-- -passes=regallocfast | FileCheck %s
---
# CHECK-LABEL: name: func
name: func
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# RUN: llc -verify-machineinstrs -run-pass regallocfast -mtriple s390x-ibm-linux -o - %s | FileCheck %s
# RUN: llc -verify-machineinstrs -passes=regallocfast -mtriple s390x-ibm-linux -o - %s | FileCheck %s
--- |

@g_167 = external global [5 x i64], align 8
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/Thumb/high-reg-clobber.mir
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# RUN: llc -mtriple thumbv6m-arm-none-eabi -run-pass regallocbasic %s -o - | FileCheck %s
# RUN: llc -mtriple thumbv6m-arm-none-eabi -run-pass greedy %s -o - | FileCheck %s
# RUN: llc -mtriple thumbv6m-arm-none-eabi -run-pass regallocfast %s -o - | FileCheck %s --check-prefix=FAST
# RUN: llc -mtriple thumbv6m-arm-none-eabi -passes=regallocfast %s -o - | FileCheck %s --check-prefix=FAST

...
---
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/Thumb2/high-reg-spill.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -run-pass regallocfast %s -o - | FileCheck %s
# RUN: llc -passes=regallocfast %s -o - | FileCheck %s

# This test examines register allocation and spilling with Fast Register
# Allocator. The test uses inline assembler that requests an input variable to
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/X86/bug47278-eflags-error.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=i386-unknown-linux-musl -verify-machineinstrs -run-pass=regallocfast -o - %s | FileCheck %s
# RUN: llc -mtriple=i386-unknown-linux-musl -verify-machineinstrs -passes=regallocfast -o - %s | FileCheck %s

# Test for correct management of allocatable and non-allocatable
# live-ins in fastregalloc
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/X86/bug47278.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=i386-unknown-linux-musl -verify-machineinstrs -run-pass=regallocfast -o - %s | FileCheck %s
# RUN: llc -mtriple=i386-unknown-linux-musl -verify-machineinstrs -passes=regallocfast -o - %s | FileCheck %s

# Make sure this case doesn't assert or try to assign $ecx to %1 on
# SHRD32rrCL
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/X86/callbr-asm-outputs-regallocfast.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -run-pass=regallocfast -verify-machineinstrs %s -o - | FileCheck %s
# RUN: llc -passes=regallocfast -verify-machineinstrs %s -o - | FileCheck %s
--- |
; ModuleID = 'x.c'
source_filename = "x.c"
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/X86/fastregalloc-selfloop.mir
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# RUN: llc -mtriple=x86_64-- -run-pass=regallocfast -o - %s | FileCheck %s
# RUN: llc -mtriple=x86_64-- -passes=regallocfast -o - %s | FileCheck %s

...
---
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/X86/fastregalloc-tied-undef.mir
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# RUN: llc -mtriple=x86_64-- -run-pass=regallocfast -o - %s | FileCheck %s
# RUN: llc -mtriple=x86_64-- -passes=regallocfast -o - %s | FileCheck %s

# If the tied use is undef value, fastregalloc should free the def register.
# There is no reload needed for the undef value.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=x86_64-grtev4-linux-gnu -run-pass=regallocfast -o - %s | FileCheck %s
# RUN: llc -mtriple=x86_64-grtev4-linux-gnu -passes=regallocfast -o - %s | FileCheck %s

# Bug 41973. Make sure %12 is detected as live out of %bb.0, even
# though the use is allocated before the def block %bb.3. Previously
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/X86/statepoint-fastregalloc.mir
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# RUN: llc -mtriple=x86_64-- -run-pass=regallocfast -o - %s | FileCheck %s
# RUN: llc -mtriple=x86_64-- -passes=regallocfast -o - %s | FileCheck %s

# Check that fastregalloc does not displace register assigned to tied def when
# RegMask operand is present. STATEPOINT is an example of such instruction.
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/X86/virtreg-physreg-def-regallocfast.mir
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# RUN: llc -o - -mtriple=x86_64-- -run-pass=regallocfast %s | FileCheck %s
# RUN: llc -o - -mtriple=x86_64-- -passes=regallocfast %s | FileCheck %s
# Fast regalloc used to not collect physical register definitions
# before walking and assigning the virtual definition.
# Therefore it was possible for a virtual definition to end up
Expand Down
7 changes: 2 additions & 5 deletions llvm/test/Transforms/InstCombine/abs-1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -852,11 +852,8 @@ define i8 @abs_diff_signed_sgt_nuw_extra_use3(i8 %a, i8 %b) {

define i32 @abs_diff_signed_slt_swap_wrong_pred1(i32 %a, i32 %b) {
; CHECK-LABEL: @abs_diff_signed_slt_swap_wrong_pred1(
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[SUB_BA:%.*]] = sub nsw i32 [[B]], [[A]]
; CHECK-NEXT: [[SUB_AB:%.*]] = sub nsw i32 [[A]], [[B]]
; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB_BA]], i32 [[SUB_AB]]
; CHECK-NEXT: ret i32 [[COND]]
; CHECK-NEXT: [[SUB_AB:%.*]] = sub nsw i32 [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: ret i32 [[SUB_AB]]
;
%cmp = icmp eq i32 %a, %b
%sub_ba = sub nsw i32 %b, %a
Expand Down
48 changes: 43 additions & 5 deletions llvm/test/Transforms/InstCombine/select.ll
Original file line number Diff line number Diff line change
Expand Up @@ -667,7 +667,7 @@ define i1 @test39(i1 %cond, double %x) {
; CHECK-LABEL: @test39(
; CHECK-NEXT: ret i1 true
;
%s = select i1 %cond, double %x, double 0x7FF0000000000000 ; RHS = +infty
%s = select i1 %cond, double %x, double 0x7FF0000000000000 ; RHS = +infty
%cmp = fcmp ule double %x, %s
ret i1 %cmp
}
Expand Down Expand Up @@ -1364,7 +1364,7 @@ define i32 @PR23757_ne(i32 %x, ptr %p) {
; CHECK-NEXT: ret i32 -2147483648
;
%cmp = icmp ne i32 %x, 2147483647
store i1 %cmp, ptr %p ; thwart predicate canonicalization
store i1 %cmp, ptr %p ; thwart predicate canonicalization
%add = add nsw i32 %x, 1
%sel = select i1 %cmp, i32 -2147483648, i32 %add
ret i32 %sel
Expand All @@ -1378,7 +1378,7 @@ define i32 @PR23757_ne_swapped(i32 %x, ptr %p) {
; CHECK-NEXT: ret i32 [[ADD]]
;
%cmp = icmp ne i32 %x, 2147483647
store i1 %cmp, ptr %p ; thwart predicate canonicalization
store i1 %cmp, ptr %p ; thwart predicate canonicalization
%add = add nsw i32 %x, 1
%sel = select i1 %cmp, i32 %add, i32 -2147483648
ret i32 %sel
Expand Down Expand Up @@ -2809,6 +2809,45 @@ define <2 x i8> @select_replacement_add_eq_vec_undef(<2 x i8> %x, <2 x i8> %y) {
ret <2 x i8> %sel
}

define <2 x i8> @select_replacement_add_eq_vec_undef_okay(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: @select_replacement_add_eq_vec_undef_okay(
; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 1, i8 1>
; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> <i8 2, i8 undef>, <2 x i8> [[Y:%.*]]
; CHECK-NEXT: ret <2 x i8> [[SEL]]
;
%cmp = icmp eq <2 x i8> %x, <i8 1, i8 1>
%add = add <2 x i8> %x, <i8 1, i8 undef>
%sel = select <2 x i1> %cmp, <2 x i8> %add, <2 x i8> %y
ret <2 x i8> %sel
}


define <2 x i8> @select_replacement_add_eq_vec_undef_okay_todo(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: @select_replacement_add_eq_vec_undef_okay_todo(
; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 1, i8 undef>
; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[X]], <i8 1, i8 undef>
; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[ADD]], <2 x i8> [[Y:%.*]]
; CHECK-NEXT: ret <2 x i8> [[SEL]]
;
%cmp = icmp eq <2 x i8> %x, <i8 1, i8 undef>
%add = add <2 x i8> %x, <i8 1, i8 undef>
%sel = select <2 x i1> %cmp, <2 x i8> %add, <2 x i8> %y
ret <2 x i8> %sel
}

define <2 x i8> @select_replacement_xor_eq_vec(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
; CHECK-LABEL: @select_replacement_xor_eq_vec(
; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> zeroinitializer, <2 x i8> [[Z:%.*]]
; CHECK-NEXT: ret <2 x i8> [[SEL]]
;
%cmp = icmp eq <2 x i8> %x, %y
%add = xor <2 x i8> %x, %y
%sel = select <2 x i1> %cmp, <2 x i8> %add, <2 x i8> %z
ret <2 x i8> %sel
}


define i8 @select_replacement_add_ne(i8 %x, i8 %y) {
; CHECK-LABEL: @select_replacement_add_ne(
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[X:%.*]], 1
Expand Down Expand Up @@ -2865,8 +2904,7 @@ define i8 @select_replacement_sub_noundef_but_may_be_poison(i8 %x, i8 noundef %y
define i8 @select_replacement_sub(i8 %x, i8 %y, i8 %z) {
; CHECK-LABEL: @select_replacement_sub(
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[Y]]
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[SUB]], i8 [[Z:%.*]]
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 0, i8 [[Z:%.*]]
; CHECK-NEXT: ret i8 [[SEL]]
;
%cmp = icmp eq i8 %x, %y
Expand Down
10 changes: 10 additions & 0 deletions llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# REQUIRES: amdgpu-registered-target
# RUN: llc -mtriple=amdgcn --passes='regallocfast<filter=sgpr>' --print-pipeline-passes %s | FileCheck %s --check-prefix=PASS
# RUN: not llc -mtriple=amdgcn --passes='regallocfast<filter=bad-filter>' --print-pipeline-passes --filetype=null %s 2>&1 | FileCheck %s --check-prefix=BAD-FILTER

# PASS: regallocfast<filter=sgpr>
# BAD-FILTER: invalid regallocfast register filter 'bad-filter'

---
name: f
...
26 changes: 26 additions & 0 deletions llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,32 @@ class CoreAPIsStandardTest : public CoreAPIsBasedStandardTest {};

namespace {

class CustomError : public ErrorInfo<CustomError> {
public:
static char ID;
void log(raw_ostream &OS) const override { OS << "CustomError"; }
std::error_code convertToErrorCode() const override { return {}; }
};
char CustomError::ID = 0;

TEST_F(CoreAPIsStandardTest, ErrorReporter) {
// Check that errors reported via ExecutionSession::reportError are sent to
// the registered error reporter, and that the error reporter can hold
// uniquely owned state.

Error ReportedError = Error::success();

ES.setErrorReporter(
// Make sure error reporter can capture uniquely-owned state.
[&, State = std::make_unique<int>(42)](Error Err) {
ReportedError = joinErrors(std::move(Err), std::move(ReportedError));
});

ES.reportError(make_error<CustomError>());

EXPECT_THAT_ERROR(std::move(ReportedError), Failed<CustomError>());
}

TEST_F(CoreAPIsStandardTest, JITDylibAddToLinkOrder) {
// Check that the JITDylib::addToLinkOrder methods behave as expected.
auto &JD2 = ES.createBareJITDylib("JD2");
Expand Down
1 change: 1 addition & 0 deletions llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ static_library("LLVMLoongArchCodeGen") {
include_dirs = [ "." ]
sources = [
"LoongArchAsmPrinter.cpp",
"LoongArchDeadRegisterDefinitions.cpp",
"LoongArchExpandAtomicPseudoInsts.cpp",
"LoongArchExpandPseudoInsts.cpp",
"LoongArchFrameLowering.cpp",
Expand Down
7 changes: 4 additions & 3 deletions mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4531,17 +4531,18 @@ struct FoldTensorCastProducerOp
if (!hasTensorCastOperand)
return failure();

SmallVector<Type, 4> newResultTypes;
newResultTypes.reserve(op->getNumResults());
SmallVector<Type, 4> newResultTypes(op->getResultTypes());
SmallVector<Value, 4> newOperands;
newOperands.reserve(op->getNumOperands());
// Assumes that the result has dpsInits followed by nonDpsInits.
int64_t dpsInitIdx = 0;
for (OpOperand &opOperand : op->getOpOperands()) {
auto tensorCastOp = opOperand.get().getDefiningOp<tensor::CastOp>();
bool fold = canFoldIntoConsumerOp(tensorCastOp);
newOperands.push_back(fold ? tensorCastOp.getOperand() : opOperand.get());
if (op.isDpsInit(&opOperand) &&
!llvm::isa<MemRefType>(newOperands.back().getType()))
newResultTypes.push_back(newOperands.back().getType());
newResultTypes[dpsInitIdx++] = newOperands.back().getType();
}

// Clone op.
Expand Down
15 changes: 15 additions & 0 deletions mlir/test/Dialect/Tensor/canonicalize.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -2523,3 +2523,18 @@ func.func @dim_out_of_bounds() -> vector<7xi32> {
%16 = affine.vector_load %alloc_21[%c1, %c1, %dim] : memref<?x26x2xi32>, vector<7xi32>
return %16 : vector<7xi32>
}

// -----

// CHECK-LABEL: func.func @test_destination_multiple_result(
// CHECK-SAME: %[[ARG1:.*]]: tensor<2x2xf32>,
// CHECK-SAME: %[[ARG2:.*]]: tensor<2x2xf32>) -> index {
// CHECK: %[[RES:.*]]:2 = test.destination_style_op ins(%[[ARG1]] : tensor<2x2xf32>)
// CHECK-SAME: outs(%[[ARG2]] : tensor<2x2xf32>) -> tensor<2x2xf32>, index
// CHECK: return %[[RES]]#1 : index
func.func @test_destination_multiple_result(%arg0: tensor<2x2xf32>, %arg1: tensor<2x2xf32>) -> index {
%cast = tensor.cast %arg0 : tensor<2x2xf32> to tensor<?x2xf32>
%cast_0 = tensor.cast %arg1 : tensor<2x2xf32> to tensor<?x2xf32>
%0:2 = test.destination_style_op ins(%cast : tensor<?x2xf32>) outs(%cast_0 : tensor<?x2xf32>) -> tensor<?x2xf32>, index
return %0#1 : index
}