diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll index 443944408f339..202ddb0d21a28 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll @@ -1,149 +1,83 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GPRIDX %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=MOVREL %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GPRIDX %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MOVREL %s define float @dyn_extract_v8f32_const_s_v(i32 %sel) { -; GPRIDX-LABEL: dyn_extract_v8f32_const_s_v: -; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000 -; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000 -; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000 -; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000 -; GPRIDX-NEXT: s_mov_b32 s7, 4.0 -; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000 -; GPRIDX-NEXT: s_mov_b32 s5, 2.0 -; GPRIDX-NEXT: s_mov_b32 s4, 1.0 -; GPRIDX-NEXT: s_mov_b64 s[12:13], exec -; GPRIDX-NEXT: BB0_1: ; =>This Inner Loop Header: Depth=1 -; GPRIDX-NEXT: v_readfirstlane_b32 s14, v0 -; GPRIDX-NEXT: s_mov_b32 m0, s14 -; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s14, v0 -; GPRIDX-NEXT: s_movrels_b32 s14, s4 -; GPRIDX-NEXT: v_mov_b32_e32 v1, s14 -; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc -; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc -; GPRIDX-NEXT: s_cbranch_execnz BB0_1 -; GPRIDX-NEXT: ; %bb.2: -; GPRIDX-NEXT: s_mov_b64 exec, s[12:13] -; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 -; GPRIDX-NEXT: s_setpc_b64 s[30:31] -; -; MOVREL-LABEL: dyn_extract_v8f32_const_s_v: -; MOVREL: ; %bb.0: ; %entry -; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MOVREL-NEXT: s_mov_b32 s11, 0x41000000 -; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000 -; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000 -; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000 -; MOVREL-NEXT: s_mov_b32 s7, 4.0 -; MOVREL-NEXT: s_mov_b32 s6, 0x40400000 -; MOVREL-NEXT: s_mov_b32 s5, 2.0 -; MOVREL-NEXT: s_mov_b32 s4, 1.0 -; MOVREL-NEXT: s_mov_b64 s[12:13], exec -; MOVREL-NEXT: BB0_1: ; =>This Inner Loop Header: Depth=1 -; MOVREL-NEXT: v_readfirstlane_b32 s14, v0 -; MOVREL-NEXT: s_mov_b32 m0, s14 -; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s14, v0 -; MOVREL-NEXT: s_movrels_b32 s14, s4 -; MOVREL-NEXT: v_mov_b32_e32 v1, s14 -; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc -; MOVREL-NEXT: s_xor_b64 exec, exec, vcc -; MOVREL-NEXT: s_cbranch_execnz BB0_1 -; MOVREL-NEXT: ; %bb.2: -; MOVREL-NEXT: s_mov_b64 exec, s[12:13] -; MOVREL-NEXT: v_mov_b32_e32 v0, v1 -; MOVREL-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: dyn_extract_v8f32_const_s_v: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s11, 0x41000000 +; GCN-NEXT: s_mov_b32 s10, 0x40e00000 +; GCN-NEXT: s_mov_b32 s9, 0x40c00000 +; GCN-NEXT: s_mov_b32 s8, 0x40a00000 +; GCN-NEXT: s_mov_b32 s7, 4.0 +; GCN-NEXT: s_mov_b32 s6, 0x40400000 +; GCN-NEXT: s_mov_b32 s5, 2.0 +; GCN-NEXT: s_mov_b32 s4, 1.0 +; GCN-NEXT: s_mov_b64 s[12:13], exec +; GCN-NEXT: BB0_1: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: v_readfirstlane_b32 s14, v0 +; GCN-NEXT: s_mov_b32 m0, s14 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s14, v0 +; GCN-NEXT: s_movrels_b32 s14, s4 +; GCN-NEXT: v_mov_b32_e32 v1, s14 +; GCN-NEXT: s_and_saveexec_b64 vcc, vcc +; GCN-NEXT: s_xor_b64 exec, exec, vcc +; GCN-NEXT: s_cbranch_execnz BB0_1 +; GCN-NEXT: ; %bb.2: +; GCN-NEXT: s_mov_b64 exec, s[12:13] +; GCN-NEXT: v_mov_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <8 x float> , i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v8f32_const_s_s(i32 inreg %sel) { -; GPRIDX-LABEL: dyn_extract_v8f32_const_s_s: -; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s4, 1.0 -; GPRIDX-NEXT: s_mov_b32 m0, s2 -; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000 -; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000 -; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000 -; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000 -; GPRIDX-NEXT: s_mov_b32 s7, 4.0 -; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000 -; GPRIDX-NEXT: s_mov_b32 s5, 2.0 -; GPRIDX-NEXT: s_movrels_b32 s0, s4 -; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 -; GPRIDX-NEXT: ; return to shader part epilog -; -; MOVREL-LABEL: dyn_extract_v8f32_const_s_s: -; MOVREL: ; %bb.0: ; %entry -; MOVREL-NEXT: s_mov_b32 s4, 1.0 -; MOVREL-NEXT: s_mov_b32 m0, s2 -; MOVREL-NEXT: s_mov_b32 s11, 0x41000000 -; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000 -; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000 -; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000 -; MOVREL-NEXT: s_mov_b32 s7, 4.0 -; MOVREL-NEXT: s_mov_b32 s6, 0x40400000 -; MOVREL-NEXT: s_mov_b32 s5, 2.0 -; MOVREL-NEXT: s_movrels_b32 s0, s4 -; MOVREL-NEXT: v_mov_b32_e32 v0, s0 -; MOVREL-NEXT: ; return to shader part epilog +; GCN-LABEL: dyn_extract_v8f32_const_s_s: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s4, 1.0 +; GCN-NEXT: s_mov_b32 m0, s2 +; GCN-NEXT: s_mov_b32 s11, 0x41000000 +; GCN-NEXT: s_mov_b32 s10, 0x40e00000 +; GCN-NEXT: s_mov_b32 s9, 0x40c00000 +; GCN-NEXT: s_mov_b32 s8, 0x40a00000 +; GCN-NEXT: s_mov_b32 s7, 4.0 +; GCN-NEXT: s_mov_b32 s6, 0x40400000 +; GCN-NEXT: s_mov_b32 s5, 2.0 +; GCN-NEXT: s_movrels_b32 s0, s4 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <8 x float> , i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v8f32_s_v(<8 x float> inreg %vec, i32 %sel) { -; GPRIDX-LABEL: dyn_extract_v8f32_s_v: -; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s0, s2 -; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_mov_b32 s2, s4 -; GPRIDX-NEXT: s_mov_b32 s3, s5 -; GPRIDX-NEXT: s_mov_b32 s4, s6 -; GPRIDX-NEXT: s_mov_b32 s5, s7 -; GPRIDX-NEXT: s_mov_b32 s6, s8 -; GPRIDX-NEXT: s_mov_b32 s7, s9 -; GPRIDX-NEXT: s_mov_b64 s[8:9], exec -; GPRIDX-NEXT: BB2_1: ; =>This Inner Loop Header: Depth=1 -; GPRIDX-NEXT: v_readfirstlane_b32 s10, v0 -; GPRIDX-NEXT: s_mov_b32 m0, s10 -; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s10, v0 -; GPRIDX-NEXT: s_movrels_b32 s10, s0 -; GPRIDX-NEXT: v_mov_b32_e32 v1, s10 -; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc -; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc -; GPRIDX-NEXT: s_cbranch_execnz BB2_1 -; GPRIDX-NEXT: ; %bb.2: -; GPRIDX-NEXT: s_mov_b64 exec, s[8:9] -; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 -; GPRIDX-NEXT: ; return to shader part epilog -; -; MOVREL-LABEL: dyn_extract_v8f32_s_v: -; MOVREL: ; %bb.0: ; %entry -; MOVREL-NEXT: s_mov_b32 s0, s2 -; MOVREL-NEXT: s_mov_b32 s1, s3 -; MOVREL-NEXT: s_mov_b32 s2, s4 -; MOVREL-NEXT: s_mov_b32 s3, s5 -; MOVREL-NEXT: s_mov_b32 s4, s6 -; MOVREL-NEXT: s_mov_b32 s5, s7 -; MOVREL-NEXT: s_mov_b32 s6, s8 -; MOVREL-NEXT: s_mov_b32 s7, s9 -; MOVREL-NEXT: s_mov_b64 s[8:9], exec -; MOVREL-NEXT: BB2_1: ; =>This Inner Loop Header: Depth=1 -; MOVREL-NEXT: v_readfirstlane_b32 s10, v0 -; MOVREL-NEXT: s_mov_b32 m0, s10 -; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s10, v0 -; MOVREL-NEXT: s_movrels_b32 s10, s0 -; MOVREL-NEXT: v_mov_b32_e32 v1, s10 -; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc -; MOVREL-NEXT: s_xor_b64 exec, exec, vcc -; MOVREL-NEXT: s_cbranch_execnz BB2_1 -; MOVREL-NEXT: ; %bb.2: -; MOVREL-NEXT: s_mov_b64 exec, s[8:9] -; MOVREL-NEXT: v_mov_b32_e32 v0, v1 -; MOVREL-NEXT: ; return to shader part epilog +; GCN-LABEL: dyn_extract_v8f32_s_v: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_mov_b64 s[8:9], exec +; GCN-NEXT: BB2_1: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: v_readfirstlane_b32 s10, v0 +; GCN-NEXT: s_mov_b32 m0, s10 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s10, v0 +; GCN-NEXT: s_movrels_b32 s10, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s10 +; GCN-NEXT: s_and_saveexec_b64 vcc, vcc +; GCN-NEXT: s_xor_b64 exec, exec, vcc +; GCN-NEXT: s_cbranch_execnz BB2_1 +; GCN-NEXT: ; %bb.2: +; GCN-NEXT: s_mov_b64 exec, s[8:9] +; GCN-NEXT: v_mov_b32_e32 v0, v1 +; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <8 x float> %vec, i32 %sel ret float %ext @@ -208,94 +142,52 @@ entry: } define amdgpu_ps float @dyn_extract_v8f32_s_s(<8 x float> inreg %vec, i32 inreg %sel) { -; GPRIDX-LABEL: dyn_extract_v8f32_s_s: -; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s0, s2 -; GPRIDX-NEXT: s_mov_b32 m0, s10 -; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_mov_b32 s2, s4 -; GPRIDX-NEXT: s_mov_b32 s3, s5 -; GPRIDX-NEXT: s_mov_b32 s4, s6 -; GPRIDX-NEXT: s_mov_b32 s5, s7 -; GPRIDX-NEXT: s_mov_b32 s6, s8 -; GPRIDX-NEXT: s_mov_b32 s7, s9 -; GPRIDX-NEXT: s_movrels_b32 s0, s0 -; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 -; GPRIDX-NEXT: ; return to shader part epilog -; -; MOVREL-LABEL: dyn_extract_v8f32_s_s: -; MOVREL: ; %bb.0: ; %entry -; MOVREL-NEXT: s_mov_b32 s0, s2 -; MOVREL-NEXT: s_mov_b32 m0, s10 -; MOVREL-NEXT: s_mov_b32 s1, s3 -; MOVREL-NEXT: s_mov_b32 s2, s4 -; MOVREL-NEXT: s_mov_b32 s3, s5 -; MOVREL-NEXT: s_mov_b32 s4, s6 -; MOVREL-NEXT: s_mov_b32 s5, s7 -; MOVREL-NEXT: s_mov_b32 s6, s8 -; MOVREL-NEXT: s_mov_b32 s7, s9 -; MOVREL-NEXT: s_movrels_b32 s0, s0 -; MOVREL-NEXT: v_mov_b32_e32 v0, s0 -; MOVREL-NEXT: ; return to shader part epilog +; GCN-LABEL: dyn_extract_v8f32_s_s: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 m0, s10 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_movrels_b32 s0, s0 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <8 x float> %vec, i32 %sel ret float %ext } define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) { -; GPRIDX-LABEL: dyn_extract_v8i64_const_s_v: -; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GPRIDX-NEXT: s_mov_b64 s[18:19], 8 -; GPRIDX-NEXT: s_mov_b64 s[16:17], 7 -; GPRIDX-NEXT: s_mov_b64 s[14:15], 6 -; GPRIDX-NEXT: s_mov_b64 s[12:13], 5 -; GPRIDX-NEXT: s_mov_b64 s[10:11], 4 -; GPRIDX-NEXT: s_mov_b64 s[8:9], 3 -; GPRIDX-NEXT: s_mov_b64 s[6:7], 2 -; GPRIDX-NEXT: s_mov_b64 s[4:5], 1 -; GPRIDX-NEXT: s_mov_b64 s[20:21], exec -; GPRIDX-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1 -; GPRIDX-NEXT: v_readfirstlane_b32 s22, v0 -; GPRIDX-NEXT: s_lshl_b32 m0, s22, 1 -; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s22, v0 -; GPRIDX-NEXT: s_movrels_b32 s22, s4 -; GPRIDX-NEXT: s_movrels_b32 s23, s5 -; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc -; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc -; GPRIDX-NEXT: s_cbranch_execnz BB6_1 -; GPRIDX-NEXT: ; %bb.2: -; GPRIDX-NEXT: s_mov_b64 exec, s[20:21] -; GPRIDX-NEXT: v_mov_b32_e32 v0, s22 -; GPRIDX-NEXT: v_mov_b32_e32 v1, s23 -; GPRIDX-NEXT: s_setpc_b64 s[30:31] -; -; MOVREL-LABEL: dyn_extract_v8i64_const_s_v: -; MOVREL: ; %bb.0: ; %entry -; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MOVREL-NEXT: s_mov_b64 s[18:19], 8 -; MOVREL-NEXT: s_mov_b64 s[16:17], 7 -; MOVREL-NEXT: s_mov_b64 s[14:15], 6 -; MOVREL-NEXT: s_mov_b64 s[12:13], 5 -; MOVREL-NEXT: s_mov_b64 s[10:11], 4 -; MOVREL-NEXT: s_mov_b64 s[8:9], 3 -; MOVREL-NEXT: s_mov_b64 s[6:7], 2 -; MOVREL-NEXT: s_mov_b64 s[4:5], 1 -; MOVREL-NEXT: s_mov_b64 s[20:21], exec -; MOVREL-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1 -; MOVREL-NEXT: v_readfirstlane_b32 s22, v0 -; MOVREL-NEXT: s_lshl_b32 m0, s22, 1 -; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s22, v0 -; MOVREL-NEXT: s_movrels_b32 s22, s4 -; MOVREL-NEXT: s_movrels_b32 s23, s5 -; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc -; MOVREL-NEXT: s_xor_b64 exec, exec, vcc -; MOVREL-NEXT: s_cbranch_execnz BB6_1 -; MOVREL-NEXT: ; %bb.2: -; MOVREL-NEXT: s_mov_b64 exec, s[20:21] -; MOVREL-NEXT: v_mov_b32_e32 v0, s22 -; MOVREL-NEXT: v_mov_b32_e32 v1, s23 -; MOVREL-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: dyn_extract_v8i64_const_s_v: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_mov_b64 s[18:19], 8 +; GCN-NEXT: s_mov_b64 s[16:17], 7 +; GCN-NEXT: s_mov_b64 s[14:15], 6 +; GCN-NEXT: s_mov_b64 s[12:13], 5 +; GCN-NEXT: s_mov_b64 s[10:11], 4 +; GCN-NEXT: s_mov_b64 s[8:9], 3 +; GCN-NEXT: s_mov_b64 s[6:7], 2 +; GCN-NEXT: s_mov_b64 s[4:5], 1 +; GCN-NEXT: s_mov_b64 s[20:21], exec +; GCN-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: v_readfirstlane_b32 s22, v0 +; GCN-NEXT: s_lshl_b32 m0, s22, 1 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s22, v0 +; GCN-NEXT: s_movrels_b32 s22, s4 +; GCN-NEXT: s_movrels_b32 s23, s5 +; GCN-NEXT: s_and_saveexec_b64 vcc, vcc +; GCN-NEXT: s_xor_b64 exec, exec, vcc +; GCN-NEXT: s_cbranch_execnz BB6_1 +; GCN-NEXT: ; %bb.2: +; GCN-NEXT: s_mov_b64 exec, s[20:21] +; GCN-NEXT: v_mov_b32_e32 v0, s22 +; GCN-NEXT: v_mov_b32_e32 v1, s23 +; GCN-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <8 x i64> , i32 %sel ret i64 %ext @@ -543,35 +435,20 @@ entry: } define amdgpu_ps float @dyn_extract_v8f32_s_s_offset3(<8 x float> inreg %vec, i32 inreg %sel) { -; GPRIDX-LABEL: dyn_extract_v8f32_s_s_offset3: -; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s0, s2 -; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_mov_b32 s3, s5 -; GPRIDX-NEXT: s_mov_b32 m0, s10 -; GPRIDX-NEXT: s_mov_b32 s2, s4 -; GPRIDX-NEXT: s_mov_b32 s4, s6 -; GPRIDX-NEXT: s_mov_b32 s5, s7 -; GPRIDX-NEXT: s_mov_b32 s6, s8 -; GPRIDX-NEXT: s_mov_b32 s7, s9 -; GPRIDX-NEXT: s_movrels_b32 s0, s3 -; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 -; GPRIDX-NEXT: ; return to shader part epilog -; -; MOVREL-LABEL: dyn_extract_v8f32_s_s_offset3: -; MOVREL: ; %bb.0: ; %entry -; MOVREL-NEXT: s_mov_b32 s0, s2 -; MOVREL-NEXT: s_mov_b32 s1, s3 -; MOVREL-NEXT: s_mov_b32 s3, s5 -; MOVREL-NEXT: s_mov_b32 m0, s10 -; MOVREL-NEXT: s_mov_b32 s2, s4 -; MOVREL-NEXT: s_mov_b32 s4, s6 -; MOVREL-NEXT: s_mov_b32 s5, s7 -; MOVREL-NEXT: s_mov_b32 s6, s8 -; MOVREL-NEXT: s_mov_b32 s7, s9 -; MOVREL-NEXT: s_movrels_b32 s0, s3 -; MOVREL-NEXT: v_mov_b32_e32 v0, s0 -; MOVREL-NEXT: ; return to shader part epilog +; GCN-LABEL: dyn_extract_v8f32_s_s_offset3: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 m0, s10 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_movrels_b32 s0, s3 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 3 %ext = extractelement <8 x float> %vec, i32 %add @@ -620,49 +497,27 @@ entry: } define amdgpu_ps double @dyn_extract_v8f64_s_s_offset1(<8 x double> inreg %vec, i32 inreg %sel) { -; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset1: -; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s0, s2 -; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_mov_b32 s2, s4 -; GPRIDX-NEXT: s_mov_b32 s3, s5 -; GPRIDX-NEXT: s_mov_b32 m0, s18 -; GPRIDX-NEXT: s_mov_b32 s4, s6 -; GPRIDX-NEXT: s_mov_b32 s5, s7 -; GPRIDX-NEXT: s_mov_b32 s6, s8 -; GPRIDX-NEXT: s_mov_b32 s7, s9 -; GPRIDX-NEXT: s_mov_b32 s8, s10 -; GPRIDX-NEXT: s_mov_b32 s9, s11 -; GPRIDX-NEXT: s_mov_b32 s10, s12 -; GPRIDX-NEXT: s_mov_b32 s11, s13 -; GPRIDX-NEXT: s_mov_b32 s12, s14 -; GPRIDX-NEXT: s_mov_b32 s13, s15 -; GPRIDX-NEXT: s_mov_b32 s14, s16 -; GPRIDX-NEXT: s_mov_b32 s15, s17 -; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[2:3] -; GPRIDX-NEXT: ; return to shader part epilog -; -; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset1: -; MOVREL: ; %bb.0: ; %entry -; MOVREL-NEXT: s_mov_b32 s0, s2 -; MOVREL-NEXT: s_mov_b32 s1, s3 -; MOVREL-NEXT: s_mov_b32 s2, s4 -; MOVREL-NEXT: s_mov_b32 s3, s5 -; MOVREL-NEXT: s_mov_b32 m0, s18 -; MOVREL-NEXT: s_mov_b32 s4, s6 -; MOVREL-NEXT: s_mov_b32 s5, s7 -; MOVREL-NEXT: s_mov_b32 s6, s8 -; MOVREL-NEXT: s_mov_b32 s7, s9 -; MOVREL-NEXT: s_mov_b32 s8, s10 -; MOVREL-NEXT: s_mov_b32 s9, s11 -; MOVREL-NEXT: s_mov_b32 s10, s12 -; MOVREL-NEXT: s_mov_b32 s11, s13 -; MOVREL-NEXT: s_mov_b32 s12, s14 -; MOVREL-NEXT: s_mov_b32 s13, s15 -; MOVREL-NEXT: s_mov_b32 s14, s16 -; MOVREL-NEXT: s_mov_b32 s15, s17 -; MOVREL-NEXT: s_movrels_b64 s[0:1], s[2:3] -; MOVREL-NEXT: ; return to shader part epilog +; GCN-LABEL: dyn_extract_v8f64_s_s_offset1: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 m0, s18 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_mov_b32 s8, s10 +; GCN-NEXT: s_mov_b32 s9, s11 +; GCN-NEXT: s_mov_b32 s10, s12 +; GCN-NEXT: s_mov_b32 s11, s13 +; GCN-NEXT: s_mov_b32 s12, s14 +; GCN-NEXT: s_mov_b32 s13, s15 +; GCN-NEXT: s_mov_b32 s14, s16 +; GCN-NEXT: s_mov_b32 s15, s17 +; GCN-NEXT: s_movrels_b64 s[0:1], s[2:3] +; GCN-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 1 %ext = extractelement <8 x double> %vec, i32 %add @@ -670,49 +525,27 @@ entry: } define amdgpu_ps double @dyn_extract_v8f64_s_s_offset2(<8 x double> inreg %vec, i32 inreg %sel) { -; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset2: -; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s0, s2 -; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_mov_b32 s2, s4 -; GPRIDX-NEXT: s_mov_b32 s3, s5 -; GPRIDX-NEXT: s_mov_b32 s4, s6 -; GPRIDX-NEXT: s_mov_b32 s5, s7 -; GPRIDX-NEXT: s_mov_b32 m0, s18 -; GPRIDX-NEXT: s_mov_b32 s6, s8 -; GPRIDX-NEXT: s_mov_b32 s7, s9 -; GPRIDX-NEXT: s_mov_b32 s8, s10 -; GPRIDX-NEXT: s_mov_b32 s9, s11 -; GPRIDX-NEXT: s_mov_b32 s10, s12 -; GPRIDX-NEXT: s_mov_b32 s11, s13 -; GPRIDX-NEXT: s_mov_b32 s12, s14 -; GPRIDX-NEXT: s_mov_b32 s13, s15 -; GPRIDX-NEXT: s_mov_b32 s14, s16 -; GPRIDX-NEXT: s_mov_b32 s15, s17 -; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[4:5] -; GPRIDX-NEXT: ; return to shader part epilog -; -; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset2: -; MOVREL: ; %bb.0: ; %entry -; MOVREL-NEXT: s_mov_b32 s0, s2 -; MOVREL-NEXT: s_mov_b32 s1, s3 -; MOVREL-NEXT: s_mov_b32 s2, s4 -; MOVREL-NEXT: s_mov_b32 s3, s5 -; MOVREL-NEXT: s_mov_b32 s4, s6 -; MOVREL-NEXT: s_mov_b32 s5, s7 -; MOVREL-NEXT: s_mov_b32 m0, s18 -; MOVREL-NEXT: s_mov_b32 s6, s8 -; MOVREL-NEXT: s_mov_b32 s7, s9 -; MOVREL-NEXT: s_mov_b32 s8, s10 -; MOVREL-NEXT: s_mov_b32 s9, s11 -; MOVREL-NEXT: s_mov_b32 s10, s12 -; MOVREL-NEXT: s_mov_b32 s11, s13 -; MOVREL-NEXT: s_mov_b32 s12, s14 -; MOVREL-NEXT: s_mov_b32 s13, s15 -; MOVREL-NEXT: s_mov_b32 s14, s16 -; MOVREL-NEXT: s_mov_b32 s15, s17 -; MOVREL-NEXT: s_movrels_b64 s[0:1], s[4:5] -; MOVREL-NEXT: ; return to shader part epilog +; GCN-LABEL: dyn_extract_v8f64_s_s_offset2: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 m0, s18 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_mov_b32 s8, s10 +; GCN-NEXT: s_mov_b32 s9, s11 +; GCN-NEXT: s_mov_b32 s10, s12 +; GCN-NEXT: s_mov_b32 s11, s13 +; GCN-NEXT: s_mov_b32 s12, s14 +; GCN-NEXT: s_mov_b32 s13, s15 +; GCN-NEXT: s_mov_b32 s14, s16 +; GCN-NEXT: s_mov_b32 s15, s17 +; GCN-NEXT: s_movrels_b64 s[0:1], s[4:5] +; GCN-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 2 %ext = extractelement <8 x double> %vec, i32 %add @@ -720,7 +553,119 @@ entry: } define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec, i32 inreg %sel) { -; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset3: +; GCN-LABEL: dyn_extract_v8f64_s_s_offset3: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_mov_b32 m0, s18 +; GCN-NEXT: s_mov_b32 s8, s10 +; GCN-NEXT: s_mov_b32 s9, s11 +; GCN-NEXT: s_mov_b32 s10, s12 +; GCN-NEXT: s_mov_b32 s11, s13 +; GCN-NEXT: s_mov_b32 s12, s14 +; GCN-NEXT: s_mov_b32 s13, s15 +; GCN-NEXT: s_mov_b32 s14, s16 +; GCN-NEXT: s_mov_b32 s15, s17 +; GCN-NEXT: s_movrels_b64 s[0:1], s[6:7] +; GCN-NEXT: ; return to shader part epilog +entry: + %add = add i32 %sel, 3 + %ext = extractelement <8 x double> %vec, i32 %add + ret double %ext +} + +define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec, i32 inreg %sel) { +; GCN-LABEL: dyn_extract_v8f64_s_s_offset4: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_mov_b32 s8, s10 +; GCN-NEXT: s_mov_b32 s9, s11 +; GCN-NEXT: s_mov_b32 m0, s18 +; GCN-NEXT: s_mov_b32 s10, s12 +; GCN-NEXT: s_mov_b32 s11, s13 +; GCN-NEXT: s_mov_b32 s12, s14 +; GCN-NEXT: s_mov_b32 s13, s15 +; GCN-NEXT: s_mov_b32 s14, s16 +; GCN-NEXT: s_mov_b32 s15, s17 +; GCN-NEXT: s_movrels_b64 s[0:1], s[8:9] +; GCN-NEXT: ; return to shader part epilog +entry: + %add = add i32 %sel, 4 + %ext = extractelement <8 x double> %vec, i32 %add + ret double %ext +} + +define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec, i32 inreg %sel) { +; GCN-LABEL: dyn_extract_v8f64_s_s_offset5: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_mov_b32 s8, s10 +; GCN-NEXT: s_mov_b32 s9, s11 +; GCN-NEXT: s_mov_b32 s10, s12 +; GCN-NEXT: s_mov_b32 s11, s13 +; GCN-NEXT: s_mov_b32 m0, s18 +; GCN-NEXT: s_mov_b32 s12, s14 +; GCN-NEXT: s_mov_b32 s13, s15 +; GCN-NEXT: s_mov_b32 s14, s16 +; GCN-NEXT: s_mov_b32 s15, s17 +; GCN-NEXT: s_movrels_b64 s[0:1], s[10:11] +; GCN-NEXT: ; return to shader part epilog +entry: + %add = add i32 %sel, 5 + %ext = extractelement <8 x double> %vec, i32 %add + ret double %ext +} + +define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec, i32 inreg %sel) { +; GCN-LABEL: dyn_extract_v8f64_s_s_offset6: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_mov_b32 s8, s10 +; GCN-NEXT: s_mov_b32 s9, s11 +; GCN-NEXT: s_mov_b32 s10, s12 +; GCN-NEXT: s_mov_b32 s11, s13 +; GCN-NEXT: s_mov_b32 s12, s14 +; GCN-NEXT: s_mov_b32 s13, s15 +; GCN-NEXT: s_mov_b32 m0, s18 +; GCN-NEXT: s_mov_b32 s14, s16 +; GCN-NEXT: s_mov_b32 s15, s17 +; GCN-NEXT: s_movrels_b64 s[0:1], s[12:13] +; GCN-NEXT: ; return to shader part epilog +entry: + %add = add i32 %sel, 6 + %ext = extractelement <8 x double> %vec, i32 %add + ret double %ext +} + +define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset7: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 @@ -730,7 +675,6 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec, ; GPRIDX-NEXT: s_mov_b32 s5, s7 ; GPRIDX-NEXT: s_mov_b32 s6, s8 ; GPRIDX-NEXT: s_mov_b32 s7, s9 -; GPRIDX-NEXT: s_mov_b32 m0, s18 ; GPRIDX-NEXT: s_mov_b32 s8, s10 ; GPRIDX-NEXT: s_mov_b32 s9, s11 ; GPRIDX-NEXT: s_mov_b32 s10, s12 @@ -739,10 +683,12 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec, ; GPRIDX-NEXT: s_mov_b32 s13, s15 ; GPRIDX-NEXT: s_mov_b32 s14, s16 ; GPRIDX-NEXT: s_mov_b32 s15, s17 -; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[6:7] +; GPRIDX-NEXT: s_mov_b32 m0, s18 +; GPRIDX-NEXT: s_nop 0 +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[14:15] ; GPRIDX-NEXT: ; return to shader part epilog ; -; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset3: +; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset7: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 @@ -752,7 +698,6 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec, ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 -; MOVREL-NEXT: s_mov_b32 m0, s18 ; MOVREL-NEXT: s_mov_b32 s8, s10 ; MOVREL-NEXT: s_mov_b32 s9, s11 ; MOVREL-NEXT: s_mov_b32 s10, s12 @@ -761,259 +706,37 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec, ; MOVREL-NEXT: s_mov_b32 s13, s15 ; MOVREL-NEXT: s_mov_b32 s14, s16 ; MOVREL-NEXT: s_mov_b32 s15, s17 -; MOVREL-NEXT: s_movrels_b64 s[0:1], s[6:7] +; MOVREL-NEXT: s_mov_b32 m0, s18 +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[14:15] ; MOVREL-NEXT: ; return to shader part epilog entry: - %add = add i32 %sel, 3 - %ext = extractelement <8 x double> %vec, i32 %add - ret double %ext -} - -define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec, i32 inreg %sel) { -; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset4: -; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s0, s2 -; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_mov_b32 s2, s4 -; GPRIDX-NEXT: s_mov_b32 s3, s5 -; GPRIDX-NEXT: s_mov_b32 s4, s6 -; GPRIDX-NEXT: s_mov_b32 s5, s7 -; GPRIDX-NEXT: s_mov_b32 s6, s8 -; GPRIDX-NEXT: s_mov_b32 s7, s9 -; GPRIDX-NEXT: s_mov_b32 s8, s10 -; GPRIDX-NEXT: s_mov_b32 s9, s11 -; GPRIDX-NEXT: s_mov_b32 m0, s18 -; GPRIDX-NEXT: s_mov_b32 s10, s12 -; GPRIDX-NEXT: s_mov_b32 s11, s13 -; GPRIDX-NEXT: s_mov_b32 s12, s14 -; GPRIDX-NEXT: s_mov_b32 s13, s15 -; GPRIDX-NEXT: s_mov_b32 s14, s16 -; GPRIDX-NEXT: s_mov_b32 s15, s17 -; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[8:9] -; GPRIDX-NEXT: ; return to shader part epilog -; -; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset4: -; MOVREL: ; %bb.0: ; %entry -; MOVREL-NEXT: s_mov_b32 s0, s2 -; MOVREL-NEXT: s_mov_b32 s1, s3 -; MOVREL-NEXT: s_mov_b32 s2, s4 -; MOVREL-NEXT: s_mov_b32 s3, s5 -; MOVREL-NEXT: s_mov_b32 s4, s6 -; MOVREL-NEXT: s_mov_b32 s5, s7 -; MOVREL-NEXT: s_mov_b32 s6, s8 -; MOVREL-NEXT: s_mov_b32 s7, s9 -; MOVREL-NEXT: s_mov_b32 s8, s10 -; MOVREL-NEXT: s_mov_b32 s9, s11 -; MOVREL-NEXT: s_mov_b32 m0, s18 -; MOVREL-NEXT: s_mov_b32 s10, s12 -; MOVREL-NEXT: s_mov_b32 s11, s13 -; MOVREL-NEXT: s_mov_b32 s12, s14 -; MOVREL-NEXT: s_mov_b32 s13, s15 -; MOVREL-NEXT: s_mov_b32 s14, s16 -; MOVREL-NEXT: s_mov_b32 s15, s17 -; MOVREL-NEXT: s_movrels_b64 s[0:1], s[8:9] -; MOVREL-NEXT: ; return to shader part epilog -entry: - %add = add i32 %sel, 4 - %ext = extractelement <8 x double> %vec, i32 %add - ret double %ext -} - -define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec, i32 inreg %sel) { -; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset5: -; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s0, s2 -; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_mov_b32 s2, s4 -; GPRIDX-NEXT: s_mov_b32 s3, s5 -; GPRIDX-NEXT: s_mov_b32 s4, s6 -; GPRIDX-NEXT: s_mov_b32 s5, s7 -; GPRIDX-NEXT: s_mov_b32 s6, s8 -; GPRIDX-NEXT: s_mov_b32 s7, s9 -; GPRIDX-NEXT: s_mov_b32 s8, s10 -; GPRIDX-NEXT: s_mov_b32 s9, s11 -; GPRIDX-NEXT: s_mov_b32 s10, s12 -; GPRIDX-NEXT: s_mov_b32 s11, s13 -; GPRIDX-NEXT: s_mov_b32 m0, s18 -; GPRIDX-NEXT: s_mov_b32 s12, s14 -; GPRIDX-NEXT: s_mov_b32 s13, s15 -; GPRIDX-NEXT: s_mov_b32 s14, s16 -; GPRIDX-NEXT: s_mov_b32 s15, s17 -; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[10:11] -; GPRIDX-NEXT: ; return to shader part epilog -; -; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset5: -; MOVREL: ; %bb.0: ; %entry -; MOVREL-NEXT: s_mov_b32 s0, s2 -; MOVREL-NEXT: s_mov_b32 s1, s3 -; MOVREL-NEXT: s_mov_b32 s2, s4 -; MOVREL-NEXT: s_mov_b32 s3, s5 -; MOVREL-NEXT: s_mov_b32 s4, s6 -; MOVREL-NEXT: s_mov_b32 s5, s7 -; MOVREL-NEXT: s_mov_b32 s6, s8 -; MOVREL-NEXT: s_mov_b32 s7, s9 -; MOVREL-NEXT: s_mov_b32 s8, s10 -; MOVREL-NEXT: s_mov_b32 s9, s11 -; MOVREL-NEXT: s_mov_b32 s10, s12 -; MOVREL-NEXT: s_mov_b32 s11, s13 -; MOVREL-NEXT: s_mov_b32 m0, s18 -; MOVREL-NEXT: s_mov_b32 s12, s14 -; MOVREL-NEXT: s_mov_b32 s13, s15 -; MOVREL-NEXT: s_mov_b32 s14, s16 -; MOVREL-NEXT: s_mov_b32 s15, s17 -; MOVREL-NEXT: s_movrels_b64 s[0:1], s[10:11] -; MOVREL-NEXT: ; return to shader part epilog -entry: - %add = add i32 %sel, 5 - %ext = extractelement <8 x double> %vec, i32 %add - ret double %ext -} - -define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec, i32 inreg %sel) { -; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset6: -; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s0, s2 -; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_mov_b32 s2, s4 -; GPRIDX-NEXT: s_mov_b32 s3, s5 -; GPRIDX-NEXT: s_mov_b32 s4, s6 -; GPRIDX-NEXT: s_mov_b32 s5, s7 -; GPRIDX-NEXT: s_mov_b32 s6, s8 -; GPRIDX-NEXT: s_mov_b32 s7, s9 -; GPRIDX-NEXT: s_mov_b32 s8, s10 -; GPRIDX-NEXT: s_mov_b32 s9, s11 -; GPRIDX-NEXT: s_mov_b32 s10, s12 -; GPRIDX-NEXT: s_mov_b32 s11, s13 -; GPRIDX-NEXT: s_mov_b32 s12, s14 -; GPRIDX-NEXT: s_mov_b32 s13, s15 -; GPRIDX-NEXT: s_mov_b32 m0, s18 -; GPRIDX-NEXT: s_mov_b32 s14, s16 -; GPRIDX-NEXT: s_mov_b32 s15, s17 -; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[12:13] -; GPRIDX-NEXT: ; return to shader part epilog -; -; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset6: -; MOVREL: ; %bb.0: ; %entry -; MOVREL-NEXT: s_mov_b32 s0, s2 -; MOVREL-NEXT: s_mov_b32 s1, s3 -; MOVREL-NEXT: s_mov_b32 s2, s4 -; MOVREL-NEXT: s_mov_b32 s3, s5 -; MOVREL-NEXT: s_mov_b32 s4, s6 -; MOVREL-NEXT: s_mov_b32 s5, s7 -; MOVREL-NEXT: s_mov_b32 s6, s8 -; MOVREL-NEXT: s_mov_b32 s7, s9 -; MOVREL-NEXT: s_mov_b32 s8, s10 -; MOVREL-NEXT: s_mov_b32 s9, s11 -; MOVREL-NEXT: s_mov_b32 s10, s12 -; MOVREL-NEXT: s_mov_b32 s11, s13 -; MOVREL-NEXT: s_mov_b32 s12, s14 -; MOVREL-NEXT: s_mov_b32 s13, s15 -; MOVREL-NEXT: s_mov_b32 m0, s18 -; MOVREL-NEXT: s_mov_b32 s14, s16 -; MOVREL-NEXT: s_mov_b32 s15, s17 -; MOVREL-NEXT: s_movrels_b64 s[0:1], s[12:13] -; MOVREL-NEXT: ; return to shader part epilog -entry: - %add = add i32 %sel, 6 - %ext = extractelement <8 x double> %vec, i32 %add - ret double %ext -} - -define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec, i32 inreg %sel) { -; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset7: -; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s0, s2 -; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_mov_b32 s2, s4 -; GPRIDX-NEXT: s_mov_b32 s3, s5 -; GPRIDX-NEXT: s_mov_b32 s4, s6 -; GPRIDX-NEXT: s_mov_b32 s5, s7 -; GPRIDX-NEXT: s_mov_b32 s6, s8 -; GPRIDX-NEXT: s_mov_b32 s7, s9 -; GPRIDX-NEXT: s_mov_b32 s8, s10 -; GPRIDX-NEXT: s_mov_b32 s9, s11 -; GPRIDX-NEXT: s_mov_b32 s10, s12 -; GPRIDX-NEXT: s_mov_b32 s11, s13 -; GPRIDX-NEXT: s_mov_b32 s12, s14 -; GPRIDX-NEXT: s_mov_b32 s13, s15 -; GPRIDX-NEXT: s_mov_b32 s14, s16 -; GPRIDX-NEXT: s_mov_b32 s15, s17 -; GPRIDX-NEXT: s_mov_b32 m0, s18 -; GPRIDX-NEXT: s_nop 0 -; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[14:15] -; GPRIDX-NEXT: ; return to shader part epilog -; -; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset7: -; MOVREL: ; %bb.0: ; %entry -; MOVREL-NEXT: s_mov_b32 s0, s2 -; MOVREL-NEXT: s_mov_b32 s1, s3 -; MOVREL-NEXT: s_mov_b32 s2, s4 -; MOVREL-NEXT: s_mov_b32 s3, s5 -; MOVREL-NEXT: s_mov_b32 s4, s6 -; MOVREL-NEXT: s_mov_b32 s5, s7 -; MOVREL-NEXT: s_mov_b32 s6, s8 -; MOVREL-NEXT: s_mov_b32 s7, s9 -; MOVREL-NEXT: s_mov_b32 s8, s10 -; MOVREL-NEXT: s_mov_b32 s9, s11 -; MOVREL-NEXT: s_mov_b32 s10, s12 -; MOVREL-NEXT: s_mov_b32 s11, s13 -; MOVREL-NEXT: s_mov_b32 s12, s14 -; MOVREL-NEXT: s_mov_b32 s13, s15 -; MOVREL-NEXT: s_mov_b32 s14, s16 -; MOVREL-NEXT: s_mov_b32 s15, s17 -; MOVREL-NEXT: s_mov_b32 m0, s18 -; MOVREL-NEXT: s_movrels_b64 s[0:1], s[14:15] -; MOVREL-NEXT: ; return to shader part epilog -entry: - %add = add i32 %sel, 7 + %add = add i32 %sel, 7 %ext = extractelement <8 x double> %vec, i32 %add ret double %ext } define amdgpu_ps double @dyn_extract_v8f64_s_s_offsetm1(<8 x double> inreg %vec, i32 inreg %sel) { -; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offsetm1: -; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s0, s2 -; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_add_i32 m0, s18, -1 -; GPRIDX-NEXT: s_mov_b32 s2, s4 -; GPRIDX-NEXT: s_mov_b32 s3, s5 -; GPRIDX-NEXT: s_mov_b32 s4, s6 -; GPRIDX-NEXT: s_mov_b32 s5, s7 -; GPRIDX-NEXT: s_mov_b32 s6, s8 -; GPRIDX-NEXT: s_mov_b32 s7, s9 -; GPRIDX-NEXT: s_mov_b32 s8, s10 -; GPRIDX-NEXT: s_mov_b32 s9, s11 -; GPRIDX-NEXT: s_mov_b32 s10, s12 -; GPRIDX-NEXT: s_mov_b32 s11, s13 -; GPRIDX-NEXT: s_mov_b32 s12, s14 -; GPRIDX-NEXT: s_mov_b32 s13, s15 -; GPRIDX-NEXT: s_mov_b32 s14, s16 -; GPRIDX-NEXT: s_mov_b32 s15, s17 -; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] -; GPRIDX-NEXT: ; return to shader part epilog -; -; MOVREL-LABEL: dyn_extract_v8f64_s_s_offsetm1: -; MOVREL: ; %bb.0: ; %entry -; MOVREL-NEXT: s_mov_b32 s0, s2 -; MOVREL-NEXT: s_mov_b32 s1, s3 -; MOVREL-NEXT: s_add_i32 m0, s18, -1 -; MOVREL-NEXT: s_mov_b32 s2, s4 -; MOVREL-NEXT: s_mov_b32 s3, s5 -; MOVREL-NEXT: s_mov_b32 s4, s6 -; MOVREL-NEXT: s_mov_b32 s5, s7 -; MOVREL-NEXT: s_mov_b32 s6, s8 -; MOVREL-NEXT: s_mov_b32 s7, s9 -; MOVREL-NEXT: s_mov_b32 s8, s10 -; MOVREL-NEXT: s_mov_b32 s9, s11 -; MOVREL-NEXT: s_mov_b32 s10, s12 -; MOVREL-NEXT: s_mov_b32 s11, s13 -; MOVREL-NEXT: s_mov_b32 s12, s14 -; MOVREL-NEXT: s_mov_b32 s13, s15 -; MOVREL-NEXT: s_mov_b32 s14, s16 -; MOVREL-NEXT: s_mov_b32 s15, s17 -; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] -; MOVREL-NEXT: ; return to shader part epilog +; GCN-LABEL: dyn_extract_v8f64_s_s_offsetm1: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_add_i32 m0, s18, -1 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_mov_b32 s8, s10 +; GCN-NEXT: s_mov_b32 s9, s11 +; GCN-NEXT: s_mov_b32 s10, s12 +; GCN-NEXT: s_mov_b32 s11, s13 +; GCN-NEXT: s_mov_b32 s12, s14 +; GCN-NEXT: s_mov_b32 s13, s15 +; GCN-NEXT: s_mov_b32 s14, s16 +; GCN-NEXT: s_mov_b32 s15, s17 +; GCN-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GCN-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, -1 %ext = extractelement <8 x double> %vec, i32 %add @@ -1310,260 +1033,140 @@ entry: } define amdgpu_ps float @dyn_extract_v16f32_s_s(i32 inreg %sel) { -; GPRIDX-LABEL: dyn_extract_v16f32_s_s: -; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s4, 1.0 -; GPRIDX-NEXT: s_mov_b32 m0, s2 -; GPRIDX-NEXT: s_mov_b32 s19, 0x41800000 -; GPRIDX-NEXT: s_mov_b32 s18, 0x41700000 -; GPRIDX-NEXT: s_mov_b32 s17, 0x41600000 -; GPRIDX-NEXT: s_mov_b32 s16, 0x41500000 -; GPRIDX-NEXT: s_mov_b32 s15, 0x41400000 -; GPRIDX-NEXT: s_mov_b32 s14, 0x41300000 -; GPRIDX-NEXT: s_mov_b32 s13, 0x41200000 -; GPRIDX-NEXT: s_mov_b32 s12, 0x41100000 -; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000 -; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000 -; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000 -; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000 -; GPRIDX-NEXT: s_mov_b32 s7, 4.0 -; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000 -; GPRIDX-NEXT: s_mov_b32 s5, 2.0 -; GPRIDX-NEXT: s_movrels_b32 s0, s4 -; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 -; GPRIDX-NEXT: ; return to shader part epilog -; -; MOVREL-LABEL: dyn_extract_v16f32_s_s: -; MOVREL: ; %bb.0: ; %entry -; MOVREL-NEXT: s_mov_b32 s4, 1.0 -; MOVREL-NEXT: s_mov_b32 m0, s2 -; MOVREL-NEXT: s_mov_b32 s19, 0x41800000 -; MOVREL-NEXT: s_mov_b32 s18, 0x41700000 -; MOVREL-NEXT: s_mov_b32 s17, 0x41600000 -; MOVREL-NEXT: s_mov_b32 s16, 0x41500000 -; MOVREL-NEXT: s_mov_b32 s15, 0x41400000 -; MOVREL-NEXT: s_mov_b32 s14, 0x41300000 -; MOVREL-NEXT: s_mov_b32 s13, 0x41200000 -; MOVREL-NEXT: s_mov_b32 s12, 0x41100000 -; MOVREL-NEXT: s_mov_b32 s11, 0x41000000 -; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000 -; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000 -; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000 -; MOVREL-NEXT: s_mov_b32 s7, 4.0 -; MOVREL-NEXT: s_mov_b32 s6, 0x40400000 -; MOVREL-NEXT: s_mov_b32 s5, 2.0 -; MOVREL-NEXT: s_movrels_b32 s0, s4 -; MOVREL-NEXT: v_mov_b32_e32 v0, s0 -; MOVREL-NEXT: ; return to shader part epilog +; GCN-LABEL: dyn_extract_v16f32_s_s: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s4, 1.0 +; GCN-NEXT: s_mov_b32 m0, s2 +; GCN-NEXT: s_mov_b32 s19, 0x41800000 +; GCN-NEXT: s_mov_b32 s18, 0x41700000 +; GCN-NEXT: s_mov_b32 s17, 0x41600000 +; GCN-NEXT: s_mov_b32 s16, 0x41500000 +; GCN-NEXT: s_mov_b32 s15, 0x41400000 +; GCN-NEXT: s_mov_b32 s14, 0x41300000 +; GCN-NEXT: s_mov_b32 s13, 0x41200000 +; GCN-NEXT: s_mov_b32 s12, 0x41100000 +; GCN-NEXT: s_mov_b32 s11, 0x41000000 +; GCN-NEXT: s_mov_b32 s10, 0x40e00000 +; GCN-NEXT: s_mov_b32 s9, 0x40c00000 +; GCN-NEXT: s_mov_b32 s8, 0x40a00000 +; GCN-NEXT: s_mov_b32 s7, 4.0 +; GCN-NEXT: s_mov_b32 s6, 0x40400000 +; GCN-NEXT: s_mov_b32 s5, 2.0 +; GCN-NEXT: s_movrels_b32 s0, s4 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <16 x float> , i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v32f32_s_s(i32 inreg %sel) { -; GPRIDX-LABEL: dyn_extract_v32f32_s_s: -; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s36, 1.0 -; GPRIDX-NEXT: s_mov_b32 m0, s2 -; GPRIDX-NEXT: s_mov_b32 s67, 0x42000000 -; GPRIDX-NEXT: s_mov_b32 s66, 0x41f80000 -; GPRIDX-NEXT: s_mov_b32 s65, 0x41f00000 -; GPRIDX-NEXT: s_mov_b32 s64, 0x41e80000 -; GPRIDX-NEXT: s_mov_b32 s63, 0x41e00000 -; GPRIDX-NEXT: s_mov_b32 s62, 0x41d80000 -; GPRIDX-NEXT: s_mov_b32 s61, 0x41d00000 -; GPRIDX-NEXT: s_mov_b32 s60, 0x41c80000 -; GPRIDX-NEXT: s_mov_b32 s59, 0x41c00000 -; GPRIDX-NEXT: s_mov_b32 s58, 0x41b80000 -; GPRIDX-NEXT: s_mov_b32 s57, 0x41b00000 -; GPRIDX-NEXT: s_mov_b32 s56, 0x41a80000 -; GPRIDX-NEXT: s_mov_b32 s55, 0x41a00000 -; GPRIDX-NEXT: s_mov_b32 s54, 0x41980000 -; GPRIDX-NEXT: s_mov_b32 s53, 0x41900000 -; GPRIDX-NEXT: s_mov_b32 s52, 0x41880000 -; GPRIDX-NEXT: s_mov_b32 s51, 0x41800000 -; GPRIDX-NEXT: s_mov_b32 s50, 0x41700000 -; GPRIDX-NEXT: s_mov_b32 s49, 0x41600000 -; GPRIDX-NEXT: s_mov_b32 s48, 0x41500000 -; GPRIDX-NEXT: s_mov_b32 s47, 0x41400000 -; GPRIDX-NEXT: s_mov_b32 s46, 0x41300000 -; GPRIDX-NEXT: s_mov_b32 s45, 0x41200000 -; GPRIDX-NEXT: s_mov_b32 s44, 0x41100000 -; GPRIDX-NEXT: s_mov_b32 s43, 0x41000000 -; GPRIDX-NEXT: s_mov_b32 s42, 0x40e00000 -; GPRIDX-NEXT: s_mov_b32 s41, 0x40c00000 -; GPRIDX-NEXT: s_mov_b32 s40, 0x40a00000 -; GPRIDX-NEXT: s_mov_b32 s39, 4.0 -; GPRIDX-NEXT: s_mov_b32 s38, 0x40400000 -; GPRIDX-NEXT: s_mov_b32 s37, 2.0 -; GPRIDX-NEXT: s_movrels_b32 s0, s36 -; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 -; GPRIDX-NEXT: ; return to shader part epilog -; -; MOVREL-LABEL: dyn_extract_v32f32_s_s: -; MOVREL: ; %bb.0: ; %entry -; MOVREL-NEXT: s_mov_b32 s36, 1.0 -; MOVREL-NEXT: s_mov_b32 m0, s2 -; MOVREL-NEXT: s_mov_b32 s67, 0x42000000 -; MOVREL-NEXT: s_mov_b32 s66, 0x41f80000 -; MOVREL-NEXT: s_mov_b32 s65, 0x41f00000 -; MOVREL-NEXT: s_mov_b32 s64, 0x41e80000 -; MOVREL-NEXT: s_mov_b32 s63, 0x41e00000 -; MOVREL-NEXT: s_mov_b32 s62, 0x41d80000 -; MOVREL-NEXT: s_mov_b32 s61, 0x41d00000 -; MOVREL-NEXT: s_mov_b32 s60, 0x41c80000 -; MOVREL-NEXT: s_mov_b32 s59, 0x41c00000 -; MOVREL-NEXT: s_mov_b32 s58, 0x41b80000 -; MOVREL-NEXT: s_mov_b32 s57, 0x41b00000 -; MOVREL-NEXT: s_mov_b32 s56, 0x41a80000 -; MOVREL-NEXT: s_mov_b32 s55, 0x41a00000 -; MOVREL-NEXT: s_mov_b32 s54, 0x41980000 -; MOVREL-NEXT: s_mov_b32 s53, 0x41900000 -; MOVREL-NEXT: s_mov_b32 s52, 0x41880000 -; MOVREL-NEXT: s_mov_b32 s51, 0x41800000 -; MOVREL-NEXT: s_mov_b32 s50, 0x41700000 -; MOVREL-NEXT: s_mov_b32 s49, 0x41600000 -; MOVREL-NEXT: s_mov_b32 s48, 0x41500000 -; MOVREL-NEXT: s_mov_b32 s47, 0x41400000 -; MOVREL-NEXT: s_mov_b32 s46, 0x41300000 -; MOVREL-NEXT: s_mov_b32 s45, 0x41200000 -; MOVREL-NEXT: s_mov_b32 s44, 0x41100000 -; MOVREL-NEXT: s_mov_b32 s43, 0x41000000 -; MOVREL-NEXT: s_mov_b32 s42, 0x40e00000 -; MOVREL-NEXT: s_mov_b32 s41, 0x40c00000 -; MOVREL-NEXT: s_mov_b32 s40, 0x40a00000 -; MOVREL-NEXT: s_mov_b32 s39, 4.0 -; MOVREL-NEXT: s_mov_b32 s38, 0x40400000 -; MOVREL-NEXT: s_mov_b32 s37, 2.0 -; MOVREL-NEXT: s_movrels_b32 s0, s36 -; MOVREL-NEXT: v_mov_b32_e32 v0, s0 -; MOVREL-NEXT: ; return to shader part epilog +; GCN-LABEL: dyn_extract_v32f32_s_s: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s36, 1.0 +; GCN-NEXT: s_mov_b32 m0, s2 +; GCN-NEXT: s_mov_b32 s67, 0x42000000 +; GCN-NEXT: s_mov_b32 s66, 0x41f80000 +; GCN-NEXT: s_mov_b32 s65, 0x41f00000 +; GCN-NEXT: s_mov_b32 s64, 0x41e80000 +; GCN-NEXT: s_mov_b32 s63, 0x41e00000 +; GCN-NEXT: s_mov_b32 s62, 0x41d80000 +; GCN-NEXT: s_mov_b32 s61, 0x41d00000 +; GCN-NEXT: s_mov_b32 s60, 0x41c80000 +; GCN-NEXT: s_mov_b32 s59, 0x41c00000 +; GCN-NEXT: s_mov_b32 s58, 0x41b80000 +; GCN-NEXT: s_mov_b32 s57, 0x41b00000 +; GCN-NEXT: s_mov_b32 s56, 0x41a80000 +; GCN-NEXT: s_mov_b32 s55, 0x41a00000 +; GCN-NEXT: s_mov_b32 s54, 0x41980000 +; GCN-NEXT: s_mov_b32 s53, 0x41900000 +; GCN-NEXT: s_mov_b32 s52, 0x41880000 +; GCN-NEXT: s_mov_b32 s51, 0x41800000 +; GCN-NEXT: s_mov_b32 s50, 0x41700000 +; GCN-NEXT: s_mov_b32 s49, 0x41600000 +; GCN-NEXT: s_mov_b32 s48, 0x41500000 +; GCN-NEXT: s_mov_b32 s47, 0x41400000 +; GCN-NEXT: s_mov_b32 s46, 0x41300000 +; GCN-NEXT: s_mov_b32 s45, 0x41200000 +; GCN-NEXT: s_mov_b32 s44, 0x41100000 +; GCN-NEXT: s_mov_b32 s43, 0x41000000 +; GCN-NEXT: s_mov_b32 s42, 0x40e00000 +; GCN-NEXT: s_mov_b32 s41, 0x40c00000 +; GCN-NEXT: s_mov_b32 s40, 0x40a00000 +; GCN-NEXT: s_mov_b32 s39, 4.0 +; GCN-NEXT: s_mov_b32 s38, 0x40400000 +; GCN-NEXT: s_mov_b32 s37, 2.0 +; GCN-NEXT: s_movrels_b32 s0, s36 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <32 x float> , i32 %sel ret float %ext } define amdgpu_ps double @dyn_extract_v16f64_s_s(i32 inreg %sel) { -; GPRIDX-LABEL: dyn_extract_v16f64_s_s: -; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s66, 0 -; GPRIDX-NEXT: s_mov_b64 s[36:37], 1.0 -; GPRIDX-NEXT: s_mov_b32 m0, s2 -; GPRIDX-NEXT: s_mov_b32 s67, 0x40300000 -; GPRIDX-NEXT: s_mov_b32 s65, 0x402e0000 -; GPRIDX-NEXT: s_mov_b32 s64, s66 -; GPRIDX-NEXT: s_mov_b32 s63, 0x402c0000 -; GPRIDX-NEXT: s_mov_b32 s62, s66 -; GPRIDX-NEXT: s_mov_b32 s61, 0x402a0000 -; GPRIDX-NEXT: s_mov_b32 s60, s66 -; GPRIDX-NEXT: s_mov_b32 s59, 0x40280000 -; GPRIDX-NEXT: s_mov_b32 s58, s66 -; GPRIDX-NEXT: s_mov_b32 s57, 0x40260000 -; GPRIDX-NEXT: s_mov_b32 s56, s66 -; GPRIDX-NEXT: s_mov_b32 s55, 0x40240000 -; GPRIDX-NEXT: s_mov_b32 s54, s66 -; GPRIDX-NEXT: s_mov_b32 s53, 0x40220000 -; GPRIDX-NEXT: s_mov_b32 s52, s66 -; GPRIDX-NEXT: s_mov_b32 s51, 0x40200000 -; GPRIDX-NEXT: s_mov_b32 s50, s66 -; GPRIDX-NEXT: s_mov_b32 s49, 0x401c0000 -; GPRIDX-NEXT: s_mov_b32 s48, s66 -; GPRIDX-NEXT: s_mov_b32 s47, 0x40180000 -; GPRIDX-NEXT: s_mov_b32 s46, s66 -; GPRIDX-NEXT: s_mov_b32 s45, 0x40140000 -; GPRIDX-NEXT: s_mov_b32 s44, s66 -; GPRIDX-NEXT: s_mov_b64 s[42:43], 4.0 -; GPRIDX-NEXT: s_mov_b32 s41, 0x40080000 -; GPRIDX-NEXT: s_mov_b32 s40, s66 -; GPRIDX-NEXT: s_mov_b64 s[38:39], 2.0 -; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[36:37] -; GPRIDX-NEXT: ; return to shader part epilog -; -; MOVREL-LABEL: dyn_extract_v16f64_s_s: -; MOVREL: ; %bb.0: ; %entry -; MOVREL-NEXT: s_mov_b32 s66, 0 -; MOVREL-NEXT: s_mov_b64 s[36:37], 1.0 -; MOVREL-NEXT: s_mov_b32 m0, s2 -; MOVREL-NEXT: s_mov_b32 s67, 0x40300000 -; MOVREL-NEXT: s_mov_b32 s65, 0x402e0000 -; MOVREL-NEXT: s_mov_b32 s64, s66 -; MOVREL-NEXT: s_mov_b32 s63, 0x402c0000 -; MOVREL-NEXT: s_mov_b32 s62, s66 -; MOVREL-NEXT: s_mov_b32 s61, 0x402a0000 -; MOVREL-NEXT: s_mov_b32 s60, s66 -; MOVREL-NEXT: s_mov_b32 s59, 0x40280000 -; MOVREL-NEXT: s_mov_b32 s58, s66 -; MOVREL-NEXT: s_mov_b32 s57, 0x40260000 -; MOVREL-NEXT: s_mov_b32 s56, s66 -; MOVREL-NEXT: s_mov_b32 s55, 0x40240000 -; MOVREL-NEXT: s_mov_b32 s54, s66 -; MOVREL-NEXT: s_mov_b32 s53, 0x40220000 -; MOVREL-NEXT: s_mov_b32 s52, s66 -; MOVREL-NEXT: s_mov_b32 s51, 0x40200000 -; MOVREL-NEXT: s_mov_b32 s50, s66 -; MOVREL-NEXT: s_mov_b32 s49, 0x401c0000 -; MOVREL-NEXT: s_mov_b32 s48, s66 -; MOVREL-NEXT: s_mov_b32 s47, 0x40180000 -; MOVREL-NEXT: s_mov_b32 s46, s66 -; MOVREL-NEXT: s_mov_b32 s45, 0x40140000 -; MOVREL-NEXT: s_mov_b32 s44, s66 -; MOVREL-NEXT: s_mov_b64 s[42:43], 4.0 -; MOVREL-NEXT: s_mov_b32 s41, 0x40080000 -; MOVREL-NEXT: s_mov_b32 s40, s66 -; MOVREL-NEXT: s_mov_b64 s[38:39], 2.0 -; MOVREL-NEXT: s_movrels_b64 s[0:1], s[36:37] -; MOVREL-NEXT: ; return to shader part epilog +; GCN-LABEL: dyn_extract_v16f64_s_s: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s66, 0 +; GCN-NEXT: s_mov_b64 s[36:37], 1.0 +; GCN-NEXT: s_mov_b32 m0, s2 +; GCN-NEXT: s_mov_b32 s67, 0x40300000 +; GCN-NEXT: s_mov_b32 s65, 0x402e0000 +; GCN-NEXT: s_mov_b32 s64, s66 +; GCN-NEXT: s_mov_b32 s63, 0x402c0000 +; GCN-NEXT: s_mov_b32 s62, s66 +; GCN-NEXT: s_mov_b32 s61, 0x402a0000 +; GCN-NEXT: s_mov_b32 s60, s66 +; GCN-NEXT: s_mov_b32 s59, 0x40280000 +; GCN-NEXT: s_mov_b32 s58, s66 +; GCN-NEXT: s_mov_b32 s57, 0x40260000 +; GCN-NEXT: s_mov_b32 s56, s66 +; GCN-NEXT: s_mov_b32 s55, 0x40240000 +; GCN-NEXT: s_mov_b32 s54, s66 +; GCN-NEXT: s_mov_b32 s53, 0x40220000 +; GCN-NEXT: s_mov_b32 s52, s66 +; GCN-NEXT: s_mov_b32 s51, 0x40200000 +; GCN-NEXT: s_mov_b32 s50, s66 +; GCN-NEXT: s_mov_b32 s49, 0x401c0000 +; GCN-NEXT: s_mov_b32 s48, s66 +; GCN-NEXT: s_mov_b32 s47, 0x40180000 +; GCN-NEXT: s_mov_b32 s46, s66 +; GCN-NEXT: s_mov_b32 s45, 0x40140000 +; GCN-NEXT: s_mov_b32 s44, s66 +; GCN-NEXT: s_mov_b64 s[42:43], 4.0 +; GCN-NEXT: s_mov_b32 s41, 0x40080000 +; GCN-NEXT: s_mov_b32 s40, s66 +; GCN-NEXT: s_mov_b64 s[38:39], 2.0 +; GCN-NEXT: s_movrels_b64 s[0:1], s[36:37] +; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <16 x double> , i32 %sel ret double %ext } define amdgpu_ps float @dyn_extract_v6f32_s_v(<6 x float> inreg %vec, i32 %sel) { -; GPRIDX-LABEL: dyn_extract_v6f32_s_v: -; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s0, s2 -; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_mov_b32 s2, s4 -; GPRIDX-NEXT: s_mov_b32 s3, s5 -; GPRIDX-NEXT: s_mov_b32 s4, s6 -; GPRIDX-NEXT: s_mov_b32 s5, s7 -; GPRIDX-NEXT: s_mov_b64 s[6:7], exec -; GPRIDX-NEXT: BB33_1: ; =>This Inner Loop Header: Depth=1 -; GPRIDX-NEXT: v_readfirstlane_b32 s8, v0 -; GPRIDX-NEXT: s_mov_b32 m0, s8 -; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s8, v0 -; GPRIDX-NEXT: s_movrels_b32 s8, s0 -; GPRIDX-NEXT: v_mov_b32_e32 v1, s8 -; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc -; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc -; GPRIDX-NEXT: s_cbranch_execnz BB33_1 -; GPRIDX-NEXT: ; %bb.2: -; GPRIDX-NEXT: s_mov_b64 exec, s[6:7] -; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 -; GPRIDX-NEXT: ; return to shader part epilog -; -; MOVREL-LABEL: dyn_extract_v6f32_s_v: -; MOVREL: ; %bb.0: ; %entry -; MOVREL-NEXT: s_mov_b32 s0, s2 -; MOVREL-NEXT: s_mov_b32 s1, s3 -; MOVREL-NEXT: s_mov_b32 s2, s4 -; MOVREL-NEXT: s_mov_b32 s3, s5 -; MOVREL-NEXT: s_mov_b32 s4, s6 -; MOVREL-NEXT: s_mov_b32 s5, s7 -; MOVREL-NEXT: s_mov_b64 s[6:7], exec -; MOVREL-NEXT: BB33_1: ; =>This Inner Loop Header: Depth=1 -; MOVREL-NEXT: v_readfirstlane_b32 s8, v0 -; MOVREL-NEXT: s_mov_b32 m0, s8 -; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s8, v0 -; MOVREL-NEXT: s_movrels_b32 s8, s0 -; MOVREL-NEXT: v_mov_b32_e32 v1, s8 -; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc -; MOVREL-NEXT: s_xor_b64 exec, exec, vcc -; MOVREL-NEXT: s_cbranch_execnz BB33_1 -; MOVREL-NEXT: ; %bb.2: -; MOVREL-NEXT: s_mov_b64 exec, s[6:7] -; MOVREL-NEXT: v_mov_b32_e32 v0, v1 -; MOVREL-NEXT: ; return to shader part epilog +; GCN-LABEL: dyn_extract_v6f32_s_v: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b64 s[6:7], exec +; GCN-NEXT: BB33_1: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: v_readfirstlane_b32 s8, v0 +; GCN-NEXT: s_mov_b32 m0, s8 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s8, v0 +; GCN-NEXT: s_movrels_b32 s8, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s8 +; GCN-NEXT: s_and_saveexec_b64 vcc, vcc +; GCN-NEXT: s_xor_b64 exec, exec, vcc +; GCN-NEXT: s_cbranch_execnz BB33_1 +; GCN-NEXT: ; %bb.2: +; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: v_mov_b32_e32 v0, v1 +; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <6 x float> %vec, i32 %sel ret float %ext @@ -1628,84 +1231,47 @@ entry: } define amdgpu_ps float @dyn_extract_v6f32_s_s(<6 x float> inreg %vec, i32 inreg %sel) { -; GPRIDX-LABEL: dyn_extract_v6f32_s_s: -; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s0, s2 -; GPRIDX-NEXT: s_mov_b32 m0, s8 -; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_mov_b32 s2, s4 -; GPRIDX-NEXT: s_mov_b32 s3, s5 -; GPRIDX-NEXT: s_mov_b32 s4, s6 -; GPRIDX-NEXT: s_mov_b32 s5, s7 -; GPRIDX-NEXT: s_movrels_b32 s0, s0 -; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 -; GPRIDX-NEXT: ; return to shader part epilog -; -; MOVREL-LABEL: dyn_extract_v6f32_s_s: -; MOVREL: ; %bb.0: ; %entry -; MOVREL-NEXT: s_mov_b32 s0, s2 -; MOVREL-NEXT: s_mov_b32 m0, s8 -; MOVREL-NEXT: s_mov_b32 s1, s3 -; MOVREL-NEXT: s_mov_b32 s2, s4 -; MOVREL-NEXT: s_mov_b32 s3, s5 -; MOVREL-NEXT: s_mov_b32 s4, s6 -; MOVREL-NEXT: s_mov_b32 s5, s7 -; MOVREL-NEXT: s_movrels_b32 s0, s0 -; MOVREL-NEXT: v_mov_b32_e32 v0, s0 -; MOVREL-NEXT: ; return to shader part epilog +; GCN-LABEL: dyn_extract_v6f32_s_s: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 m0, s8 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_movrels_b32 s0, s0 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <6 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v7f32_s_v(<7 x float> inreg %vec, i32 %sel) { -; GPRIDX-LABEL: dyn_extract_v7f32_s_v: -; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s0, s2 -; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_mov_b32 s2, s4 -; GPRIDX-NEXT: s_mov_b32 s3, s5 -; GPRIDX-NEXT: s_mov_b32 s4, s6 -; GPRIDX-NEXT: s_mov_b32 s6, s8 -; GPRIDX-NEXT: s_mov_b32 s5, s7 -; GPRIDX-NEXT: s_mov_b64 s[8:9], exec -; GPRIDX-NEXT: BB37_1: ; =>This Inner Loop Header: Depth=1 -; GPRIDX-NEXT: v_readfirstlane_b32 s7, v0 -; GPRIDX-NEXT: s_mov_b32 m0, s7 -; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s7, v0 -; GPRIDX-NEXT: s_movrels_b32 s7, s0 -; GPRIDX-NEXT: v_mov_b32_e32 v1, s7 -; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc -; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc -; GPRIDX-NEXT: s_cbranch_execnz BB37_1 -; GPRIDX-NEXT: ; %bb.2: -; GPRIDX-NEXT: s_mov_b64 exec, s[8:9] -; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 -; GPRIDX-NEXT: ; return to shader part epilog -; -; MOVREL-LABEL: dyn_extract_v7f32_s_v: -; MOVREL: ; %bb.0: ; %entry -; MOVREL-NEXT: s_mov_b32 s0, s2 -; MOVREL-NEXT: s_mov_b32 s1, s3 -; MOVREL-NEXT: s_mov_b32 s2, s4 -; MOVREL-NEXT: s_mov_b32 s3, s5 -; MOVREL-NEXT: s_mov_b32 s4, s6 -; MOVREL-NEXT: s_mov_b32 s6, s8 -; MOVREL-NEXT: s_mov_b32 s5, s7 -; MOVREL-NEXT: s_mov_b64 s[8:9], exec -; MOVREL-NEXT: BB37_1: ; =>This Inner Loop Header: Depth=1 -; MOVREL-NEXT: v_readfirstlane_b32 s7, v0 -; MOVREL-NEXT: s_mov_b32 m0, s7 -; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s7, v0 -; MOVREL-NEXT: s_movrels_b32 s7, s0 -; MOVREL-NEXT: v_mov_b32_e32 v1, s7 -; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc -; MOVREL-NEXT: s_xor_b64 exec, exec, vcc -; MOVREL-NEXT: s_cbranch_execnz BB37_1 -; MOVREL-NEXT: ; %bb.2: -; MOVREL-NEXT: s_mov_b64 exec, s[8:9] -; MOVREL-NEXT: v_mov_b32_e32 v0, v1 -; MOVREL-NEXT: ; return to shader part epilog +; GCN-LABEL: dyn_extract_v7f32_s_v: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b64 s[8:9], exec +; GCN-NEXT: BB37_1: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: v_readfirstlane_b32 s7, v0 +; GCN-NEXT: s_mov_b32 m0, s7 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s7, v0 +; GCN-NEXT: s_movrels_b32 s7, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s7 +; GCN-NEXT: s_and_saveexec_b64 vcc, vcc +; GCN-NEXT: s_xor_b64 exec, exec, vcc +; GCN-NEXT: s_cbranch_execnz BB37_1 +; GCN-NEXT: ; %bb.2: +; GCN-NEXT: s_mov_b64 exec, s[8:9] +; GCN-NEXT: v_mov_b32_e32 v0, v1 +; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <7 x float> %vec, i32 %sel ret float %ext @@ -1770,94 +1336,52 @@ entry: } define amdgpu_ps float @dyn_extract_v7f32_s_s(<7 x float> inreg %vec, i32 inreg %sel) { -; GPRIDX-LABEL: dyn_extract_v7f32_s_s: -; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s0, s2 -; GPRIDX-NEXT: s_mov_b32 m0, s9 -; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_mov_b32 s2, s4 -; GPRIDX-NEXT: s_mov_b32 s3, s5 -; GPRIDX-NEXT: s_mov_b32 s4, s6 -; GPRIDX-NEXT: s_mov_b32 s5, s7 -; GPRIDX-NEXT: s_mov_b32 s6, s8 -; GPRIDX-NEXT: s_movrels_b32 s0, s0 -; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 -; GPRIDX-NEXT: ; return to shader part epilog -; -; MOVREL-LABEL: dyn_extract_v7f32_s_s: -; MOVREL: ; %bb.0: ; %entry -; MOVREL-NEXT: s_mov_b32 s0, s2 -; MOVREL-NEXT: s_mov_b32 m0, s9 -; MOVREL-NEXT: s_mov_b32 s1, s3 -; MOVREL-NEXT: s_mov_b32 s2, s4 -; MOVREL-NEXT: s_mov_b32 s3, s5 -; MOVREL-NEXT: s_mov_b32 s4, s6 -; MOVREL-NEXT: s_mov_b32 s5, s7 -; MOVREL-NEXT: s_mov_b32 s6, s8 -; MOVREL-NEXT: s_movrels_b32 s0, s0 -; MOVREL-NEXT: v_mov_b32_e32 v0, s0 -; MOVREL-NEXT: ; return to shader part epilog +; GCN-LABEL: dyn_extract_v7f32_s_s: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 m0, s9 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_movrels_b32 s0, s0 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <7 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps double @dyn_extract_v6f64_s_v(<6 x double> inreg %vec, i32 %sel) { -; GPRIDX-LABEL: dyn_extract_v6f64_s_v: -; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s16, s2 -; GPRIDX-NEXT: s_mov_b32 s17, s3 -; GPRIDX-NEXT: s_mov_b32 s18, s4 -; GPRIDX-NEXT: s_mov_b32 s19, s5 -; GPRIDX-NEXT: s_mov_b32 s20, s6 -; GPRIDX-NEXT: s_mov_b32 s21, s7 -; GPRIDX-NEXT: s_mov_b32 s22, s8 -; GPRIDX-NEXT: s_mov_b32 s23, s9 -; GPRIDX-NEXT: s_mov_b32 s24, s10 -; GPRIDX-NEXT: s_mov_b32 s25, s11 -; GPRIDX-NEXT: s_mov_b32 s26, s12 -; GPRIDX-NEXT: s_mov_b32 s27, s13 -; GPRIDX-NEXT: s_mov_b64 s[2:3], exec -; GPRIDX-NEXT: BB41_1: ; =>This Inner Loop Header: Depth=1 -; GPRIDX-NEXT: v_readfirstlane_b32 s0, v0 -; GPRIDX-NEXT: s_lshl_b32 m0, s0, 1 -; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s0, v0 -; GPRIDX-NEXT: s_movrels_b32 s0, s16 -; GPRIDX-NEXT: s_movrels_b32 s1, s17 -; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc -; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc -; GPRIDX-NEXT: s_cbranch_execnz BB41_1 -; GPRIDX-NEXT: ; %bb.2: -; GPRIDX-NEXT: s_mov_b64 exec, s[2:3] -; GPRIDX-NEXT: ; return to shader part epilog -; -; MOVREL-LABEL: dyn_extract_v6f64_s_v: -; MOVREL: ; %bb.0: ; %entry -; MOVREL-NEXT: s_mov_b32 s16, s2 -; MOVREL-NEXT: s_mov_b32 s17, s3 -; MOVREL-NEXT: s_mov_b32 s18, s4 -; MOVREL-NEXT: s_mov_b32 s19, s5 -; MOVREL-NEXT: s_mov_b32 s20, s6 -; MOVREL-NEXT: s_mov_b32 s21, s7 -; MOVREL-NEXT: s_mov_b32 s22, s8 -; MOVREL-NEXT: s_mov_b32 s23, s9 -; MOVREL-NEXT: s_mov_b32 s24, s10 -; MOVREL-NEXT: s_mov_b32 s25, s11 -; MOVREL-NEXT: s_mov_b32 s26, s12 -; MOVREL-NEXT: s_mov_b32 s27, s13 -; MOVREL-NEXT: s_mov_b64 s[2:3], exec -; MOVREL-NEXT: BB41_1: ; =>This Inner Loop Header: Depth=1 -; MOVREL-NEXT: v_readfirstlane_b32 s0, v0 -; MOVREL-NEXT: s_lshl_b32 m0, s0, 1 -; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s0, v0 -; MOVREL-NEXT: s_movrels_b32 s0, s16 -; MOVREL-NEXT: s_movrels_b32 s1, s17 -; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc -; MOVREL-NEXT: s_xor_b64 exec, exec, vcc -; MOVREL-NEXT: s_cbranch_execnz BB41_1 -; MOVREL-NEXT: ; %bb.2: -; MOVREL-NEXT: s_mov_b64 exec, s[2:3] -; MOVREL-NEXT: ; return to shader part epilog +; GCN-LABEL: dyn_extract_v6f64_s_v: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s16, s2 +; GCN-NEXT: s_mov_b32 s17, s3 +; GCN-NEXT: s_mov_b32 s18, s4 +; GCN-NEXT: s_mov_b32 s19, s5 +; GCN-NEXT: s_mov_b32 s20, s6 +; GCN-NEXT: s_mov_b32 s21, s7 +; GCN-NEXT: s_mov_b32 s22, s8 +; GCN-NEXT: s_mov_b32 s23, s9 +; GCN-NEXT: s_mov_b32 s24, s10 +; GCN-NEXT: s_mov_b32 s25, s11 +; GCN-NEXT: s_mov_b32 s26, s12 +; GCN-NEXT: s_mov_b32 s27, s13 +; GCN-NEXT: s_mov_b64 s[2:3], exec +; GCN-NEXT: BB41_1: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: v_readfirstlane_b32 s0, v0 +; GCN-NEXT: s_lshl_b32 m0, s0, 1 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s0, v0 +; GCN-NEXT: s_movrels_b32 s0, s16 +; GCN-NEXT: s_movrels_b32 s1, s17 +; GCN-NEXT: s_and_saveexec_b64 vcc, vcc +; GCN-NEXT: s_xor_b64 exec, exec, vcc +; GCN-NEXT: s_cbranch_execnz BB41_1 +; GCN-NEXT: ; %bb.2: +; GCN-NEXT: s_mov_b64 exec, s[2:3] +; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <6 x double> %vec, i32 %sel ret double %ext @@ -1934,106 +1458,58 @@ entry: } define amdgpu_ps double @dyn_extract_v6f64_s_s(<6 x double> inreg %vec, i32 inreg %sel) { -; GPRIDX-LABEL: dyn_extract_v6f64_s_s: -; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s0, s2 -; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_mov_b32 m0, s14 -; GPRIDX-NEXT: s_mov_b32 s2, s4 -; GPRIDX-NEXT: s_mov_b32 s3, s5 -; GPRIDX-NEXT: s_mov_b32 s4, s6 -; GPRIDX-NEXT: s_mov_b32 s5, s7 -; GPRIDX-NEXT: s_mov_b32 s6, s8 -; GPRIDX-NEXT: s_mov_b32 s7, s9 -; GPRIDX-NEXT: s_mov_b32 s8, s10 -; GPRIDX-NEXT: s_mov_b32 s9, s11 -; GPRIDX-NEXT: s_mov_b32 s10, s12 -; GPRIDX-NEXT: s_mov_b32 s11, s13 -; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] -; GPRIDX-NEXT: ; return to shader part epilog -; -; MOVREL-LABEL: dyn_extract_v6f64_s_s: -; MOVREL: ; %bb.0: ; %entry -; MOVREL-NEXT: s_mov_b32 s0, s2 -; MOVREL-NEXT: s_mov_b32 s1, s3 -; MOVREL-NEXT: s_mov_b32 m0, s14 -; MOVREL-NEXT: s_mov_b32 s2, s4 -; MOVREL-NEXT: s_mov_b32 s3, s5 -; MOVREL-NEXT: s_mov_b32 s4, s6 -; MOVREL-NEXT: s_mov_b32 s5, s7 -; MOVREL-NEXT: s_mov_b32 s6, s8 -; MOVREL-NEXT: s_mov_b32 s7, s9 -; MOVREL-NEXT: s_mov_b32 s8, s10 -; MOVREL-NEXT: s_mov_b32 s9, s11 -; MOVREL-NEXT: s_mov_b32 s10, s12 -; MOVREL-NEXT: s_mov_b32 s11, s13 -; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] -; MOVREL-NEXT: ; return to shader part epilog +; GCN-LABEL: dyn_extract_v6f64_s_s: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 m0, s14 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_mov_b32 s8, s10 +; GCN-NEXT: s_mov_b32 s9, s11 +; GCN-NEXT: s_mov_b32 s10, s12 +; GCN-NEXT: s_mov_b32 s11, s13 +; GCN-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <6 x double> %vec, i32 %sel ret double %ext } define amdgpu_ps double @dyn_extract_v7f64_s_v(<7 x double> inreg %vec, i32 %sel) { -; GPRIDX-LABEL: dyn_extract_v7f64_s_v: -; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s16, s2 -; GPRIDX-NEXT: s_mov_b32 s17, s3 -; GPRIDX-NEXT: s_mov_b32 s18, s4 -; GPRIDX-NEXT: s_mov_b32 s19, s5 -; GPRIDX-NEXT: s_mov_b32 s20, s6 -; GPRIDX-NEXT: s_mov_b32 s21, s7 -; GPRIDX-NEXT: s_mov_b32 s22, s8 -; GPRIDX-NEXT: s_mov_b32 s23, s9 -; GPRIDX-NEXT: s_mov_b32 s24, s10 -; GPRIDX-NEXT: s_mov_b32 s25, s11 -; GPRIDX-NEXT: s_mov_b32 s26, s12 -; GPRIDX-NEXT: s_mov_b32 s27, s13 -; GPRIDX-NEXT: s_mov_b32 s28, s14 -; GPRIDX-NEXT: s_mov_b32 s29, s15 -; GPRIDX-NEXT: s_mov_b64 s[2:3], exec -; GPRIDX-NEXT: BB45_1: ; =>This Inner Loop Header: Depth=1 -; GPRIDX-NEXT: v_readfirstlane_b32 s0, v0 -; GPRIDX-NEXT: s_lshl_b32 m0, s0, 1 -; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s0, v0 -; GPRIDX-NEXT: s_movrels_b32 s0, s16 -; GPRIDX-NEXT: s_movrels_b32 s1, s17 -; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc -; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc -; GPRIDX-NEXT: s_cbranch_execnz BB45_1 -; GPRIDX-NEXT: ; %bb.2: -; GPRIDX-NEXT: s_mov_b64 exec, s[2:3] -; GPRIDX-NEXT: ; return to shader part epilog -; -; MOVREL-LABEL: dyn_extract_v7f64_s_v: -; MOVREL: ; %bb.0: ; %entry -; MOVREL-NEXT: s_mov_b32 s16, s2 -; MOVREL-NEXT: s_mov_b32 s17, s3 -; MOVREL-NEXT: s_mov_b32 s18, s4 -; MOVREL-NEXT: s_mov_b32 s19, s5 -; MOVREL-NEXT: s_mov_b32 s20, s6 -; MOVREL-NEXT: s_mov_b32 s21, s7 -; MOVREL-NEXT: s_mov_b32 s22, s8 -; MOVREL-NEXT: s_mov_b32 s23, s9 -; MOVREL-NEXT: s_mov_b32 s24, s10 -; MOVREL-NEXT: s_mov_b32 s25, s11 -; MOVREL-NEXT: s_mov_b32 s26, s12 -; MOVREL-NEXT: s_mov_b32 s27, s13 -; MOVREL-NEXT: s_mov_b32 s28, s14 -; MOVREL-NEXT: s_mov_b32 s29, s15 -; MOVREL-NEXT: s_mov_b64 s[2:3], exec -; MOVREL-NEXT: BB45_1: ; =>This Inner Loop Header: Depth=1 -; MOVREL-NEXT: v_readfirstlane_b32 s0, v0 -; MOVREL-NEXT: s_lshl_b32 m0, s0, 1 -; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s0, v0 -; MOVREL-NEXT: s_movrels_b32 s0, s16 -; MOVREL-NEXT: s_movrels_b32 s1, s17 -; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc -; MOVREL-NEXT: s_xor_b64 exec, exec, vcc -; MOVREL-NEXT: s_cbranch_execnz BB45_1 -; MOVREL-NEXT: ; %bb.2: -; MOVREL-NEXT: s_mov_b64 exec, s[2:3] -; MOVREL-NEXT: ; return to shader part epilog +; GCN-LABEL: dyn_extract_v7f64_s_v: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s16, s2 +; GCN-NEXT: s_mov_b32 s17, s3 +; GCN-NEXT: s_mov_b32 s18, s4 +; GCN-NEXT: s_mov_b32 s19, s5 +; GCN-NEXT: s_mov_b32 s20, s6 +; GCN-NEXT: s_mov_b32 s21, s7 +; GCN-NEXT: s_mov_b32 s22, s8 +; GCN-NEXT: s_mov_b32 s23, s9 +; GCN-NEXT: s_mov_b32 s24, s10 +; GCN-NEXT: s_mov_b32 s25, s11 +; GCN-NEXT: s_mov_b32 s26, s12 +; GCN-NEXT: s_mov_b32 s27, s13 +; GCN-NEXT: s_mov_b32 s28, s14 +; GCN-NEXT: s_mov_b32 s29, s15 +; GCN-NEXT: s_mov_b64 s[2:3], exec +; GCN-NEXT: BB45_1: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: v_readfirstlane_b32 s0, v0 +; GCN-NEXT: s_lshl_b32 m0, s0, 1 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s0, v0 +; GCN-NEXT: s_movrels_b32 s0, s16 +; GCN-NEXT: s_movrels_b32 s1, s17 +; GCN-NEXT: s_and_saveexec_b64 vcc, vcc +; GCN-NEXT: s_xor_b64 exec, exec, vcc +; GCN-NEXT: s_cbranch_execnz BB45_1 +; GCN-NEXT: ; %bb.2: +; GCN-NEXT: s_mov_b64 exec, s[2:3] +; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <7 x double> %vec, i32 %sel ret double %ext @@ -2110,46 +1586,466 @@ entry: } define amdgpu_ps double @dyn_extract_v7f64_s_s(<7 x double> inreg %vec, i32 inreg %sel) { -; GPRIDX-LABEL: dyn_extract_v7f64_s_s: +; GCN-LABEL: dyn_extract_v7f64_s_s: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 m0, s16 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_mov_b32 s8, s10 +; GCN-NEXT: s_mov_b32 s9, s11 +; GCN-NEXT: s_mov_b32 s10, s12 +; GCN-NEXT: s_mov_b32 s11, s13 +; GCN-NEXT: s_mov_b32 s12, s14 +; GCN-NEXT: s_mov_b32 s13, s15 +; GCN-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GCN-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <7 x double> %vec, i32 %sel + ret double %ext +} + +define amdgpu_kernel void @dyn_extract_v5f64_s_s(double addrspace(1)* %out, i32 %sel) { +; GPRIDX-LABEL: dyn_extract_v5f64_s_s: +; GPRIDX: .amd_kernel_code_t +; GPRIDX-NEXT: amd_code_version_major = 1 +; GPRIDX-NEXT: amd_code_version_minor = 2 +; GPRIDX-NEXT: amd_machine_kind = 1 +; GPRIDX-NEXT: amd_machine_version_major = 9 +; GPRIDX-NEXT: amd_machine_version_minor = 0 +; GPRIDX-NEXT: amd_machine_version_stepping = 0 +; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256 +; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0 +; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0 +; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 2 +; GPRIDX-NEXT: priority = 0 +; GPRIDX-NEXT: float_mode = 240 +; GPRIDX-NEXT: priv = 0 +; GPRIDX-NEXT: enable_dx10_clamp = 1 +; GPRIDX-NEXT: debug_mode = 0 +; GPRIDX-NEXT: enable_ieee_mode = 1 +; GPRIDX-NEXT: enable_wgp_mode = 0 +; GPRIDX-NEXT: enable_mem_ordered = 0 +; GPRIDX-NEXT: enable_fwd_progress = 0 +; GPRIDX-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; GPRIDX-NEXT: user_sgpr_count = 6 +; GPRIDX-NEXT: enable_trap_handler = 0 +; GPRIDX-NEXT: enable_sgpr_workgroup_id_x = 1 +; GPRIDX-NEXT: enable_sgpr_workgroup_id_y = 0 +; GPRIDX-NEXT: enable_sgpr_workgroup_id_z = 0 +; GPRIDX-NEXT: enable_sgpr_workgroup_info = 0 +; GPRIDX-NEXT: enable_vgpr_workitem_id = 0 +; GPRIDX-NEXT: enable_exception_msb = 0 +; GPRIDX-NEXT: granulated_lds_size = 0 +; GPRIDX-NEXT: enable_exception = 0 +; GPRIDX-NEXT: enable_sgpr_private_segment_buffer = 1 +; GPRIDX-NEXT: enable_sgpr_dispatch_ptr = 0 +; GPRIDX-NEXT: enable_sgpr_queue_ptr = 0 +; GPRIDX-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; GPRIDX-NEXT: enable_sgpr_dispatch_id = 0 +; GPRIDX-NEXT: enable_sgpr_flat_scratch_init = 0 +; GPRIDX-NEXT: enable_sgpr_private_segment_size = 0 +; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; GPRIDX-NEXT: enable_wavefront_size32 = 0 +; GPRIDX-NEXT: enable_ordered_append_gds = 0 +; GPRIDX-NEXT: private_element_size = 1 +; GPRIDX-NEXT: is_ptr64 = 1 +; GPRIDX-NEXT: is_dynamic_callstack = 0 +; GPRIDX-NEXT: is_debug_enabled = 0 +; GPRIDX-NEXT: is_xnack_enabled = 0 +; GPRIDX-NEXT: workitem_private_segment_byte_size = 0 +; GPRIDX-NEXT: workgroup_group_segment_byte_size = 0 +; GPRIDX-NEXT: gds_segment_byte_size = 0 +; GPRIDX-NEXT: kernarg_segment_byte_size = 28 +; GPRIDX-NEXT: workgroup_fbarrier_count = 0 +; GPRIDX-NEXT: wavefront_sgpr_count = 24 +; GPRIDX-NEXT: workitem_vgpr_count = 4 +; GPRIDX-NEXT: reserved_vgpr_first = 0 +; GPRIDX-NEXT: reserved_vgpr_count = 0 +; GPRIDX-NEXT: reserved_sgpr_first = 0 +; GPRIDX-NEXT: reserved_sgpr_count = 0 +; GPRIDX-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; GPRIDX-NEXT: debug_private_segment_buffer_sgpr = 0 +; GPRIDX-NEXT: kernarg_segment_alignment = 4 +; GPRIDX-NEXT: group_segment_alignment = 4 +; GPRIDX-NEXT: private_segment_alignment = 4 +; GPRIDX-NEXT: wavefront_size = 6 +; GPRIDX-NEXT: call_convention = -1 +; GPRIDX-NEXT: runtime_loader_kernel_symbol = 0 +; GPRIDX-NEXT: .end_amd_kernel_code_t +; GPRIDX-NEXT: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GPRIDX-NEXT: s_load_dword s2, s[4:5], 0x8 +; GPRIDX-NEXT: s_mov_b32 s16, 0 +; GPRIDX-NEXT: s_mov_b64 s[8:9], 1.0 +; GPRIDX-NEXT: s_mov_b32 s17, 0x40140000 +; GPRIDX-NEXT: s_mov_b64 s[14:15], 4.0 +; GPRIDX-NEXT: s_waitcnt lgkmcnt(0) +; GPRIDX-NEXT: s_mov_b32 m0, s2 +; GPRIDX-NEXT: s_mov_b32 s13, 0x40080000 +; GPRIDX-NEXT: s_mov_b32 s12, s16 +; GPRIDX-NEXT: s_mov_b64 s[10:11], 2.0 +; GPRIDX-NEXT: s_movrels_b64 s[2:3], s[8:9] +; GPRIDX-NEXT: v_mov_b32_e32 v0, s2 +; GPRIDX-NEXT: v_mov_b32_e32 v3, s1 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s3 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s0 +; GPRIDX-NEXT: global_store_dwordx2 v[2:3], v[0:1], off +; GPRIDX-NEXT: s_endpgm +; +; MOVREL-LABEL: dyn_extract_v5f64_s_s: +; MOVREL: .amd_kernel_code_t +; MOVREL-NEXT: amd_code_version_major = 1 +; MOVREL-NEXT: amd_code_version_minor = 2 +; MOVREL-NEXT: amd_machine_kind = 1 +; MOVREL-NEXT: amd_machine_version_major = 8 +; MOVREL-NEXT: amd_machine_version_minor = 0 +; MOVREL-NEXT: amd_machine_version_stepping = 3 +; MOVREL-NEXT: kernel_code_entry_byte_offset = 256 +; MOVREL-NEXT: kernel_code_prefetch_byte_size = 0 +; MOVREL-NEXT: granulated_workitem_vgpr_count = 0 +; MOVREL-NEXT: granulated_wavefront_sgpr_count = 2 +; MOVREL-NEXT: priority = 0 +; MOVREL-NEXT: float_mode = 240 +; MOVREL-NEXT: priv = 0 +; MOVREL-NEXT: enable_dx10_clamp = 1 +; MOVREL-NEXT: debug_mode = 0 +; MOVREL-NEXT: enable_ieee_mode = 1 +; MOVREL-NEXT: enable_wgp_mode = 0 +; MOVREL-NEXT: enable_mem_ordered = 0 +; MOVREL-NEXT: enable_fwd_progress = 0 +; MOVREL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; MOVREL-NEXT: user_sgpr_count = 6 +; MOVREL-NEXT: enable_trap_handler = 0 +; MOVREL-NEXT: enable_sgpr_workgroup_id_x = 1 +; MOVREL-NEXT: enable_sgpr_workgroup_id_y = 0 +; MOVREL-NEXT: enable_sgpr_workgroup_id_z = 0 +; MOVREL-NEXT: enable_sgpr_workgroup_info = 0 +; MOVREL-NEXT: enable_vgpr_workitem_id = 0 +; MOVREL-NEXT: enable_exception_msb = 0 +; MOVREL-NEXT: granulated_lds_size = 0 +; MOVREL-NEXT: enable_exception = 0 +; MOVREL-NEXT: enable_sgpr_private_segment_buffer = 1 +; MOVREL-NEXT: enable_sgpr_dispatch_ptr = 0 +; MOVREL-NEXT: enable_sgpr_queue_ptr = 0 +; MOVREL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; MOVREL-NEXT: enable_sgpr_dispatch_id = 0 +; MOVREL-NEXT: enable_sgpr_flat_scratch_init = 0 +; MOVREL-NEXT: enable_sgpr_private_segment_size = 0 +; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; MOVREL-NEXT: enable_wavefront_size32 = 0 +; MOVREL-NEXT: enable_ordered_append_gds = 0 +; MOVREL-NEXT: private_element_size = 1 +; MOVREL-NEXT: is_ptr64 = 1 +; MOVREL-NEXT: is_dynamic_callstack = 0 +; MOVREL-NEXT: is_debug_enabled = 0 +; MOVREL-NEXT: is_xnack_enabled = 0 +; MOVREL-NEXT: workitem_private_segment_byte_size = 0 +; MOVREL-NEXT: workgroup_group_segment_byte_size = 0 +; MOVREL-NEXT: gds_segment_byte_size = 0 +; MOVREL-NEXT: kernarg_segment_byte_size = 28 +; MOVREL-NEXT: workgroup_fbarrier_count = 0 +; MOVREL-NEXT: wavefront_sgpr_count = 24 +; MOVREL-NEXT: workitem_vgpr_count = 4 +; MOVREL-NEXT: reserved_vgpr_first = 0 +; MOVREL-NEXT: reserved_vgpr_count = 0 +; MOVREL-NEXT: reserved_sgpr_first = 0 +; MOVREL-NEXT: reserved_sgpr_count = 0 +; MOVREL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; MOVREL-NEXT: debug_private_segment_buffer_sgpr = 0 +; MOVREL-NEXT: kernarg_segment_alignment = 4 +; MOVREL-NEXT: group_segment_alignment = 4 +; MOVREL-NEXT: private_segment_alignment = 4 +; MOVREL-NEXT: wavefront_size = 6 +; MOVREL-NEXT: call_convention = -1 +; MOVREL-NEXT: runtime_loader_kernel_symbol = 0 +; MOVREL-NEXT: .end_amd_kernel_code_t +; MOVREL-NEXT: ; %bb.0: ; %entry +; MOVREL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; MOVREL-NEXT: s_load_dword s2, s[4:5], 0x8 +; MOVREL-NEXT: s_mov_b32 s16, 0 +; MOVREL-NEXT: s_mov_b64 s[8:9], 1.0 +; MOVREL-NEXT: s_mov_b32 s17, 0x40140000 +; MOVREL-NEXT: s_mov_b64 s[14:15], 4.0 +; MOVREL-NEXT: s_waitcnt lgkmcnt(0) +; MOVREL-NEXT: s_mov_b32 m0, s2 +; MOVREL-NEXT: s_mov_b32 s13, 0x40080000 +; MOVREL-NEXT: s_mov_b32 s12, s16 +; MOVREL-NEXT: s_mov_b64 s[10:11], 2.0 +; MOVREL-NEXT: s_movrels_b64 s[2:3], s[8:9] +; MOVREL-NEXT: v_mov_b32_e32 v0, s2 +; MOVREL-NEXT: v_mov_b32_e32 v3, s1 +; MOVREL-NEXT: v_mov_b32_e32 v1, s3 +; MOVREL-NEXT: v_mov_b32_e32 v2, s0 +; MOVREL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; MOVREL-NEXT: s_endpgm +entry: + %ext = extractelement <5 x double> , i32 %sel + store double %ext, double addrspace(1)* %out + ret void +} + +define float @dyn_extract_v15f32_const_s_v(i32 %sel) { +; GCN-LABEL: dyn_extract_v15f32_const_s_v: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s18, 0x41700000 +; GCN-NEXT: s_mov_b32 s17, 0x41600000 +; GCN-NEXT: s_mov_b32 s16, 0x41500000 +; GCN-NEXT: s_mov_b32 s15, 0x41400000 +; GCN-NEXT: s_mov_b32 s14, 0x41300000 +; GCN-NEXT: s_mov_b32 s13, 0x41200000 +; GCN-NEXT: s_mov_b32 s12, 0x41100000 +; GCN-NEXT: s_mov_b32 s11, 0x41000000 +; GCN-NEXT: s_mov_b32 s10, 0x40e00000 +; GCN-NEXT: s_mov_b32 s9, 0x40c00000 +; GCN-NEXT: s_mov_b32 s8, 0x40a00000 +; GCN-NEXT: s_mov_b32 s7, 4.0 +; GCN-NEXT: s_mov_b32 s6, 0x40400000 +; GCN-NEXT: s_mov_b32 s5, 2.0 +; GCN-NEXT: s_mov_b32 s4, 1.0 +; GCN-NEXT: s_mov_b64 s[20:21], exec +; GCN-NEXT: BB50_1: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: v_readfirstlane_b32 s19, v0 +; GCN-NEXT: s_mov_b32 m0, s19 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s19, v0 +; GCN-NEXT: s_movrels_b32 s19, s4 +; GCN-NEXT: v_mov_b32_e32 v1, s19 +; GCN-NEXT: s_and_saveexec_b64 vcc, vcc +; GCN-NEXT: s_xor_b64 exec, exec, vcc +; GCN-NEXT: s_cbranch_execnz BB50_1 +; GCN-NEXT: ; %bb.2: +; GCN-NEXT: s_mov_b64 exec, s[20:21] +; GCN-NEXT: v_mov_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] +entry: + %ext = extractelement <15 x float> , i32 %sel + ret float %ext +} + +define amdgpu_ps float @dyn_extract_v15f32_const_s_s(i32 inreg %sel) { +; GCN-LABEL: dyn_extract_v15f32_const_s_s: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s4, 1.0 +; GCN-NEXT: s_mov_b32 m0, s2 +; GCN-NEXT: s_mov_b32 s18, 0x41700000 +; GCN-NEXT: s_mov_b32 s17, 0x41600000 +; GCN-NEXT: s_mov_b32 s16, 0x41500000 +; GCN-NEXT: s_mov_b32 s15, 0x41400000 +; GCN-NEXT: s_mov_b32 s14, 0x41300000 +; GCN-NEXT: s_mov_b32 s13, 0x41200000 +; GCN-NEXT: s_mov_b32 s12, 0x41100000 +; GCN-NEXT: s_mov_b32 s11, 0x41000000 +; GCN-NEXT: s_mov_b32 s10, 0x40e00000 +; GCN-NEXT: s_mov_b32 s9, 0x40c00000 +; GCN-NEXT: s_mov_b32 s8, 0x40a00000 +; GCN-NEXT: s_mov_b32 s7, 4.0 +; GCN-NEXT: s_mov_b32 s6, 0x40400000 +; GCN-NEXT: s_mov_b32 s5, 2.0 +; GCN-NEXT: s_movrels_b32 s0, s4 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <15 x float> , i32 %sel + ret float %ext +} + +define amdgpu_ps float @dyn_extract_v15f32_s_v(<15 x float> inreg %vec, i32 %sel) { +; GCN-LABEL: dyn_extract_v15f32_s_v: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_mov_b32 s8, s10 +; GCN-NEXT: s_mov_b32 s9, s11 +; GCN-NEXT: s_mov_b32 s10, s12 +; GCN-NEXT: s_mov_b32 s11, s13 +; GCN-NEXT: s_mov_b32 s12, s14 +; GCN-NEXT: s_mov_b32 s14, s16 +; GCN-NEXT: s_mov_b32 s13, s15 +; GCN-NEXT: s_mov_b64 s[16:17], exec +; GCN-NEXT: BB52_1: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: v_readfirstlane_b32 s15, v0 +; GCN-NEXT: s_mov_b32 m0, s15 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s15, v0 +; GCN-NEXT: s_movrels_b32 s15, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s15 +; GCN-NEXT: s_and_saveexec_b64 vcc, vcc +; GCN-NEXT: s_xor_b64 exec, exec, vcc +; GCN-NEXT: s_cbranch_execnz BB52_1 +; GCN-NEXT: ; %bb.2: +; GCN-NEXT: s_mov_b64 exec, s[16:17] +; GCN-NEXT: v_mov_b32_e32 v0, v1 +; GCN-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <15 x float> %vec, i32 %sel + ret float %ext +} + +define float @dyn_extract_v15f32_v_v(<15 x float> %vec, i32 %sel) { +; GPRIDX-LABEL: dyn_extract_v15f32_v_v: ; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s0, s2 -; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_mov_b32 m0, s16 -; GPRIDX-NEXT: s_mov_b32 s2, s4 -; GPRIDX-NEXT: s_mov_b32 s3, s5 -; GPRIDX-NEXT: s_mov_b32 s4, s6 -; GPRIDX-NEXT: s_mov_b32 s5, s7 -; GPRIDX-NEXT: s_mov_b32 s6, s8 -; GPRIDX-NEXT: s_mov_b32 s7, s9 -; GPRIDX-NEXT: s_mov_b32 s8, s10 -; GPRIDX-NEXT: s_mov_b32 s9, s11 -; GPRIDX-NEXT: s_mov_b32 s10, s12 -; GPRIDX-NEXT: s_mov_b32 s11, s13 -; GPRIDX-NEXT: s_mov_b32 s12, s14 -; GPRIDX-NEXT: s_mov_b32 s13, s15 -; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: s_mov_b64 s[4:5], exec +; GPRIDX-NEXT: BB53_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v15 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v15 +; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v16, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB53_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] +; GPRIDX-NEXT: v_mov_b32_e32 v0, v16 +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: dyn_extract_v15f32_v_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: s_mov_b64 s[4:5], exec +; MOVREL-NEXT: BB53_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v15 +; MOVREL-NEXT: s_mov_b32 m0, s6 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v15 +; MOVREL-NEXT: v_movrels_b32_e32 v16, v0 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB53_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[4:5] +; MOVREL-NEXT: v_mov_b32_e32 v0, v16 +; MOVREL-NEXT: s_setpc_b64 s[30:31] +entry: + %ext = extractelement <15 x float> %vec, i32 %sel + ret float %ext +} + +define amdgpu_ps float @dyn_extract_v15f32_v_s(<15 x float> %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v15f32_v_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: ; return to shader part epilog ; -; MOVREL-LABEL: dyn_extract_v7f64_s_s: +; MOVREL-LABEL: dyn_extract_v15f32_v_s: ; MOVREL: ; %bb.0: ; %entry -; MOVREL-NEXT: s_mov_b32 s0, s2 -; MOVREL-NEXT: s_mov_b32 s1, s3 -; MOVREL-NEXT: s_mov_b32 m0, s16 -; MOVREL-NEXT: s_mov_b32 s2, s4 -; MOVREL-NEXT: s_mov_b32 s3, s5 -; MOVREL-NEXT: s_mov_b32 s4, s6 -; MOVREL-NEXT: s_mov_b32 s5, s7 -; MOVREL-NEXT: s_mov_b32 s6, s8 -; MOVREL-NEXT: s_mov_b32 s7, s9 -; MOVREL-NEXT: s_mov_b32 s8, s10 -; MOVREL-NEXT: s_mov_b32 s9, s11 -; MOVREL-NEXT: s_mov_b32 s10, s12 -; MOVREL-NEXT: s_mov_b32 s11, s13 -; MOVREL-NEXT: s_mov_b32 s12, s14 -; MOVREL-NEXT: s_mov_b32 s13, s15 -; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: s_mov_b32 m0, s2 +; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 ; MOVREL-NEXT: ; return to shader part epilog entry: - %ext = extractelement <7 x double> %vec, i32 %sel - ret double %ext + %ext = extractelement <15 x float> %vec, i32 %sel + ret float %ext +} + +define amdgpu_ps float @dyn_extract_v15f32_s_s(<15 x float> inreg %vec, i32 inreg %sel) { +; GCN-LABEL: dyn_extract_v15f32_s_s: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 m0, s17 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_mov_b32 s8, s10 +; GCN-NEXT: s_mov_b32 s9, s11 +; GCN-NEXT: s_mov_b32 s10, s12 +; GCN-NEXT: s_mov_b32 s11, s13 +; GCN-NEXT: s_mov_b32 s12, s14 +; GCN-NEXT: s_mov_b32 s13, s15 +; GCN-NEXT: s_mov_b32 s14, s16 +; GCN-NEXT: s_movrels_b32 s0, s0 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <15 x float> %vec, i32 %sel + ret float %ext +} + +define amdgpu_ps float @dyn_extract_v15f32_s_s_offset3(<15 x float> inreg %vec, i32 inreg %sel) { +; GCN-LABEL: dyn_extract_v15f32_s_s_offset3: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 m0, s17 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_mov_b32 s8, s10 +; GCN-NEXT: s_mov_b32 s9, s11 +; GCN-NEXT: s_mov_b32 s10, s12 +; GCN-NEXT: s_mov_b32 s11, s13 +; GCN-NEXT: s_mov_b32 s12, s14 +; GCN-NEXT: s_mov_b32 s13, s15 +; GCN-NEXT: s_mov_b32 s14, s16 +; GCN-NEXT: s_movrels_b32 s0, s3 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: ; return to shader part epilog +entry: + %add = add i32 %sel, 3 + %ext = extractelement <15 x float> %vec, i32 %add + ret float %ext +} + +define float @dyn_extract_v15f32_v_v_offset3(<15 x float> %vec, i32 %sel) { +; GPRIDX-LABEL: dyn_extract_v15f32_v_v_offset3: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: s_mov_b64 s[4:5], exec +; GPRIDX-NEXT: BB57_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v15 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v15 +; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v16, v3 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB57_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] +; GPRIDX-NEXT: v_mov_b32_e32 v0, v16 +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: dyn_extract_v15f32_v_v_offset3: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: s_mov_b64 s[4:5], exec +; MOVREL-NEXT: BB57_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v15 +; MOVREL-NEXT: s_mov_b32 m0, s6 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v15 +; MOVREL-NEXT: v_movrels_b32_e32 v16, v3 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB57_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[4:5] +; MOVREL-NEXT: v_mov_b32_e32 v0, v16 +; MOVREL-NEXT: s_setpc_b64 s[30:31] +entry: + %add = add i32 %sel, 3 + %ext = extractelement <15 x float> %vec, i32 %add + ret float %ext }