diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index fd1259b693bc0..9970ba20610ba 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4034,6 +4034,21 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { getShiftAmountTy(N0.getValueType())))); } + // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the + // hi result is in use in case we hit this mid-legalization. + for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) { + if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) { + SDVTList LoHiVT = DAG.getVTList(VT, VT); + // TODO: Can we match commutable operands with getNodeIfExists? + if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1})) + if (LoHi->hasAnyUseOfValue(1)) + return SDValue(LoHi, 0); + if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0})) + if (LoHi->hasAnyUseOfValue(1)) + return SDValue(LoHi, 0); + } + } + // Try to transform: // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub. // mul x, (2^N + 1) --> add (shl x, N), x diff --git a/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll b/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll index 0a62c42969bb9..fdcceea353bcb 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll @@ -34,21 +34,19 @@ define { i64, i1 } @umulo_i64_v_v(i64 %x, i64 %y) { ; GFX9-NEXT: v_mov_b32_e32 v5, v0 ; GFX9-NEXT: v_mov_b32_e32 v4, v1 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v2, 0 -; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v3, 0 -; GFX9-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v4, v2, 0 -; GFX9-NEXT: v_mov_b32_e32 v10, v1 -; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, v10, v6 -; GFX9-NEXT: v_addc_co_u32_e32 v11, vcc, 0, v7, vcc -; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v4, v3, 0 -; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v10, v8 -; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v11, v9, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v7, vcc -; GFX9-NEXT: v_mul_lo_u32 v4, v4, v2 -; GFX9-NEXT: v_mul_lo_u32 v5, v5, v3 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v8, v6 -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v7, vcc +; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v5, v3, 0 +; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v4, v2, 0 +; GFX9-NEXT: v_mov_b32_e32 v2, v1 +; GFX9-NEXT: v_add_co_u32_e32 v9, vcc, v2, v5 +; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0 +; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc +; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v9, v7 +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v6, v8, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v4, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc ; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] -; GFX9-NEXT: v_add3_u32 v1, v1, v5, v4 +; GFX9-NEXT: v_add3_u32 v1, v1, v5, v7 ; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -60,19 +58,17 @@ define { i64, i1 } @umulo_i64_v_v(i64 %x, i64 %y) { ; GFX10-NEXT: v_mov_b32_e32 v5, v1 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v4, v2, 0 ; GFX10-NEXT: v_mad_u64_u32 v[6:7], s4, v4, v3, 0 -; GFX10-NEXT: v_mad_u64_u32 v[9:10], s4, v5, v2, 0 -; GFX10-NEXT: v_mad_u64_u32 v[11:12], s4, v5, v3, 0 -; GFX10-NEXT: v_mov_b32_e32 v8, v1 -; GFX10-NEXT: v_mul_lo_u32 v5, v5, v2 -; GFX10-NEXT: v_mul_lo_u32 v4, v4, v3 -; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v8, v6 -; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v7, vcc_lo -; GFX10-NEXT: v_add3_u32 v1, v1, v4, v5 -; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v6, v9 -; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v7, v10, vcc_lo -; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, 0, v12, vcc_lo -; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v11 -; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v6, vcc_lo +; GFX10-NEXT: v_mad_u64_u32 v[8:9], s4, v5, v2, 0 +; GFX10-NEXT: v_mad_u64_u32 v[2:3], s4, v5, v3, 0 +; GFX10-NEXT: v_mov_b32_e32 v4, v1 +; GFX10-NEXT: v_add3_u32 v1, v1, v6, v8 +; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v4, v6 +; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, 0, v7, vcc_lo +; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v4, v8 +; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, v5, v9, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v4, v2 +; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo ; GFX10-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[2:3] ; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -85,23 +81,21 @@ define { i64, i1 } @umulo_i64_v_v(i64 %x, i64 %y) { ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v4, v2, 0 ; GFX11-NEXT: v_mad_u64_u32 v[6:7], null, v4, v3, 0 -; GFX11-NEXT: v_mad_u64_u32 v[9:10], null, v5, v2, 0 -; GFX11-NEXT: v_mad_u64_u32 v[11:12], null, v5, v3, 0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_mov_b32_e32 v8, v1 -; GFX11-NEXT: v_mul_lo_u32 v5, v5, v2 -; GFX11-NEXT: v_mul_lo_u32 v4, v4, v3 -; GFX11-NEXT: v_add_co_u32 v6, vcc_lo, v8, v6 -; GFX11-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v7, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_add3_u32 v1, v1, v4, v5 -; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v6, v9 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v7, v10, vcc_lo -; GFX11-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, 0, v12, vcc_lo -; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v2, v11 +; GFX11-NEXT: v_mad_u64_u32 v[8:9], null, v5, v2, 0 +; GFX11-NEXT: v_mad_u64_u32 v[10:11], null, v5, v3, 0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_mov_b32_e32 v4, v1 +; GFX11-NEXT: v_add3_u32 v1, v1, v6, v8 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v4, v6 +; GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v7, vcc_lo +; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v2, v8 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v3, v9, vcc_lo +; GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v11, vcc_lo +; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v2, v10 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v6, vcc_lo +; GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo ; GFX11-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[2:3] ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -157,28 +151,26 @@ define { i64, i1 } @smulo_i64_v_v(i64 %x, i64 %y) { ; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v3, 0 ; GFX9-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v4, v2, 0 ; GFX9-NEXT: v_mov_b32_e32 v10, v1 -; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, v10, v6 -; GFX9-NEXT: v_addc_co_u32_e32 v11, vcc, 0, v7, vcc -; GFX9-NEXT: v_mad_i64_i32 v[6:7], s[4:5], v4, v3, 0 -; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v10, v8 -; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v11, v9, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v7, vcc -; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v8, v6 +; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v10, v6 +; GFX9-NEXT: v_mad_i64_i32 v[10:11], s[4:5], v4, v3, 0 ; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v7, vcc -; GFX9-NEXT: v_sub_co_u32_e32 v8, vcc, v6, v2 -; GFX9-NEXT: v_subbrev_co_u32_e32 v9, vcc, 0, v7, vcc +; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v12, v8 +; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v7, v9, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v11, vcc +; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v10 +; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v9, vcc +; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, v7, v2 +; GFX9-NEXT: v_subbrev_co_u32_e32 v10, vcc, 0, v9, vcc ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 0, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc -; GFX9-NEXT: v_sub_co_u32_e32 v8, vcc, v6, v5 -; GFX9-NEXT: v_mul_lo_u32 v4, v4, v2 -; GFX9-NEXT: v_mul_lo_u32 v5, v5, v3 -; GFX9-NEXT: v_subbrev_co_u32_e32 v9, vcc, 0, v7, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v4, v9, v10, vcc +; GFX9-NEXT: v_sub_co_u32_e32 v5, vcc, v2, v5 +; GFX9-NEXT: v_subbrev_co_u32_e32 v7, vcc, 0, v4, vcc ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 0, v3 -; GFX9-NEXT: v_add3_u32 v1, v1, v5, v4 +; GFX9-NEXT: v_add3_u32 v1, v1, v6, v8 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v7, vcc ; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v9, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v2, v6, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc ; GFX9-NEXT: v_mov_b32_e32 v5, v4 ; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, v[2:3], v[4:5] ; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc @@ -195,21 +187,19 @@ define { i64, i1 } @smulo_i64_v_v(i64 %x, i64 %y) { ; GFX10-NEXT: v_mad_u64_u32 v[9:10], s4, v5, v2, 0 ; GFX10-NEXT: v_mad_i64_i32 v[11:12], s4, v5, v3, 0 ; GFX10-NEXT: v_mov_b32_e32 v8, v1 -; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v8, v6 +; GFX10-NEXT: v_add3_u32 v1, v1, v6, v9 +; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v8, v6 ; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v7, vcc_lo -; GFX10-NEXT: v_mul_lo_u32 v8, v5, v2 -; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v6, v9 -; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, v7, v10, vcc_lo -; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v12, vcc_lo -; GFX10-NEXT: v_mul_lo_u32 v9, v4, v3 -; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v6, v11 -; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v7, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v6, v2 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v10, vcc_lo, 0, v7, vcc_lo +; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v8, v9 +; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v7, v10, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, 0, v12, vcc_lo +; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v7, v11 +; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, 0, v8, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v7, v2 +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v10, vcc_lo, 0, v8, vcc_lo ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 0, v5 -; GFX10-NEXT: v_add3_u32 v1, v1, v9, v8 -; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v2, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v5, v7, v10, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v5, v8, v10, vcc_lo ; GFX10-NEXT: v_ashrrev_i32_e32 v2, 31, v1 ; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v6, v4 ; GFX10-NEXT: v_subrev_co_ci_u32_e32 v7, vcc_lo, 0, v5, vcc_lo @@ -231,36 +221,34 @@ define { i64, i1 } @smulo_i64_v_v(i64 %x, i64 %y) { ; GFX11-NEXT: v_mad_u64_u32 v[6:7], null, v4, v3, 0 ; GFX11-NEXT: v_mad_u64_u32 v[9:10], null, v5, v2, 0 ; GFX11-NEXT: v_mad_i64_i32 v[11:12], null, v5, v3, 0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_mov_b32_e32 v8, v1 -; GFX11-NEXT: v_add_co_u32 v6, vcc_lo, v8, v6 -; GFX11-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v7, vcc_lo -; GFX11-NEXT: v_mul_lo_u32 v8, v5, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_add_co_u32 v6, vcc_lo, v6, v9 -; GFX11-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, v7, v10, vcc_lo -; GFX11-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v12, vcc_lo -; GFX11-NEXT: v_mul_lo_u32 v9, v4, v3 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_add_co_u32 v6, vcc_lo, v6, v11 +; GFX11-NEXT: v_add3_u32 v1, v1, v6, v9 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_add_co_u32 v8, vcc_lo, v8, v6 ; GFX11-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v7, vcc_lo +; GFX11-NEXT: v_add_co_u32 v8, vcc_lo, v8, v9 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v7, v10, vcc_lo +; GFX11-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, 0, v12, vcc_lo +; GFX11-NEXT: v_add_co_u32 v7, vcc_lo, v7, v11 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_sub_co_u32 v2, vcc_lo, v6, v2 -; GFX11-NEXT: v_subrev_co_ci_u32_e32 v10, vcc_lo, 0, v7, vcc_lo +; GFX11-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, 0, v8, vcc_lo +; GFX11-NEXT: v_sub_co_u32 v2, vcc_lo, v7, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_subrev_co_ci_u32_e32 v10, vcc_lo, 0, v8, vcc_lo ; GFX11-NEXT: v_cmp_gt_i32_e32 vcc_lo, 0, v5 -; GFX11-NEXT: v_add3_u32 v1, v1, v9, v8 +; GFX11-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e32 v5, v7, v10, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e32 v6, v6, v2, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v5, v8, v10, vcc_lo ; GFX11-NEXT: v_ashrrev_i32_e32 v2, 31, v1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_sub_co_u32 v4, vcc_lo, v6, v4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_subrev_co_ci_u32_e32 v7, vcc_lo, 0, v5, vcc_lo ; GFX11-NEXT: v_cmp_gt_i32_e32 vcc_lo, 0, v3 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_mov_b32_e32 v3, v2 -; GFX11-NEXT: v_dual_cndmask_b32 v5, v5, v7 :: v_dual_cndmask_b32 v4, v6, v4 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_dual_cndmask_b32 v4, v6, v4 :: v_dual_cndmask_b32 v5, v5, v7 ; GFX11-NEXT: v_cmp_ne_u64_e32 vcc_lo, v[4:5], v[2:3] ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/X86/combine-mul.ll b/llvm/test/CodeGen/X86/combine-mul.ll index b7b90b72d19ae..9bac05eccabd5 100644 --- a/llvm/test/CodeGen/X86/combine-mul.ll +++ b/llvm/test/CodeGen/X86/combine-mul.ll @@ -437,18 +437,14 @@ define i64 @combine_mul_umul_lohi_i64(i64 %a, i64 %b) { ; SSE: # %bb.0: ; SSE-NEXT: movq %rdi, %rax ; SSE-NEXT: mulq %rsi -; SSE-NEXT: imulq %rsi, %rdi -; SSE-NEXT: xorq %rdx, %rdi -; SSE-NEXT: movq %rdi, %rax +; SSE-NEXT: xorq %rdx, %rax ; SSE-NEXT: retq ; ; AVX-LABEL: combine_mul_umul_lohi_i64: ; AVX: # %bb.0: ; AVX-NEXT: movq %rdi, %rax ; AVX-NEXT: mulq %rsi -; AVX-NEXT: imulq %rsi, %rdi -; AVX-NEXT: xorq %rdx, %rdi -; AVX-NEXT: movq %rdi, %rax +; AVX-NEXT: xorq %rdx, %rax ; AVX-NEXT: retq %a128 = zext i64 %a to i128 %b128 = zext i64 %b to i128 @@ -465,18 +461,14 @@ define i64 @combine_mul_smul_lohi_commute_i64(i64 %a, i64 %b) { ; SSE: # %bb.0: ; SSE-NEXT: movq %rdi, %rax ; SSE-NEXT: imulq %rsi -; SSE-NEXT: imulq %rdi, %rsi -; SSE-NEXT: xorq %rdx, %rsi -; SSE-NEXT: movq %rsi, %rax +; SSE-NEXT: xorq %rdx, %rax ; SSE-NEXT: retq ; ; AVX-LABEL: combine_mul_smul_lohi_commute_i64: ; AVX: # %bb.0: ; AVX-NEXT: movq %rdi, %rax ; AVX-NEXT: imulq %rsi -; AVX-NEXT: imulq %rdi, %rsi -; AVX-NEXT: xorq %rdx, %rsi -; AVX-NEXT: movq %rsi, %rax +; AVX-NEXT: xorq %rdx, %rax ; AVX-NEXT: retq %a128 = sext i64 %a to i128 %b128 = sext i64 %b to i128 @@ -491,22 +483,18 @@ define i64 @combine_mul_smul_lohi_commute_i64(i64 %a, i64 %b) { define i64 @combine_mul_umul_lohi_const_i64(i64 %h) { ; SSE-LABEL: combine_mul_umul_lohi_const_i64: ; SSE: # %bb.0: -; SSE-NEXT: movabsq $-4265267296055464877, %rcx # imm = 0xC4CEB9FE1A85EC53 ; SSE-NEXT: movq %rdi, %rax +; SSE-NEXT: movabsq $-4265267296055464877, %rcx # imm = 0xC4CEB9FE1A85EC53 ; SSE-NEXT: mulq %rcx -; SSE-NEXT: imulq %rdi, %rcx -; SSE-NEXT: xorq %rdx, %rcx -; SSE-NEXT: movq %rcx, %rax +; SSE-NEXT: xorq %rdx, %rax ; SSE-NEXT: retq ; ; AVX-LABEL: combine_mul_umul_lohi_const_i64: ; AVX: # %bb.0: -; AVX-NEXT: movabsq $-4265267296055464877, %rcx # imm = 0xC4CEB9FE1A85EC53 ; AVX-NEXT: movq %rdi, %rax +; AVX-NEXT: movabsq $-4265267296055464877, %rcx # imm = 0xC4CEB9FE1A85EC53 ; AVX-NEXT: mulq %rcx -; AVX-NEXT: imulq %rdi, %rcx -; AVX-NEXT: xorq %rdx, %rcx -; AVX-NEXT: movq %rcx, %rax +; AVX-NEXT: xorq %rdx, %rax ; AVX-NEXT: retq %h128 = zext i64 %h to i128 %m128 = mul nuw i128 %h128, 14181476777654086739 @@ -520,30 +508,26 @@ define i64 @combine_mul_umul_lohi_const_i64(i64 %h) { define i64 @combine_mul_smul_lohi_const_i64(i64 %h) { ; SSE-LABEL: combine_mul_smul_lohi_const_i64: ; SSE: # %bb.0: -; SSE-NEXT: movq %rdi, %rsi -; SSE-NEXT: sarq $63, %rsi -; SSE-NEXT: movabsq $-4265267296055464877, %rcx # imm = 0xC4CEB9FE1A85EC53 ; SSE-NEXT: movq %rdi, %rax -; SSE-NEXT: mulq %rcx -; SSE-NEXT: imulq %rcx, %rsi -; SSE-NEXT: addq %rdx, %rsi -; SSE-NEXT: imulq %rdi, %rcx -; SSE-NEXT: xorq %rsi, %rcx -; SSE-NEXT: movq %rcx, %rax +; SSE-NEXT: movq %rdi, %rcx +; SSE-NEXT: sarq $63, %rcx +; SSE-NEXT: movabsq $-4265267296055464877, %rsi # imm = 0xC4CEB9FE1A85EC53 +; SSE-NEXT: mulq %rsi +; SSE-NEXT: imulq %rsi, %rcx +; SSE-NEXT: addq %rdx, %rcx +; SSE-NEXT: xorq %rcx, %rax ; SSE-NEXT: retq ; ; AVX-LABEL: combine_mul_smul_lohi_const_i64: ; AVX: # %bb.0: -; AVX-NEXT: movq %rdi, %rsi -; AVX-NEXT: sarq $63, %rsi -; AVX-NEXT: movabsq $-4265267296055464877, %rcx # imm = 0xC4CEB9FE1A85EC53 ; AVX-NEXT: movq %rdi, %rax -; AVX-NEXT: mulq %rcx -; AVX-NEXT: imulq %rcx, %rsi -; AVX-NEXT: addq %rdx, %rsi -; AVX-NEXT: imulq %rdi, %rcx -; AVX-NEXT: xorq %rsi, %rcx -; AVX-NEXT: movq %rcx, %rax +; AVX-NEXT: movq %rdi, %rcx +; AVX-NEXT: sarq $63, %rcx +; AVX-NEXT: movabsq $-4265267296055464877, %rsi # imm = 0xC4CEB9FE1A85EC53 +; AVX-NEXT: mulq %rsi +; AVX-NEXT: imulq %rsi, %rcx +; AVX-NEXT: addq %rdx, %rcx +; AVX-NEXT: xorq %rcx, %rax ; AVX-NEXT: retq %h128 = sext i64 %h to i128 %m128 = mul nsw i128 %h128, 14181476777654086739 diff --git a/llvm/test/CodeGen/X86/muloti.ll b/llvm/test/CodeGen/X86/muloti.ll index 60a2e21dcd03d..a184d49ce75a7 100644 --- a/llvm/test/CodeGen/X86/muloti.ll +++ b/llvm/test/CodeGen/X86/muloti.ll @@ -13,54 +13,51 @@ define %0 @x(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) nou ; CHECK-NEXT: .cfi_def_cfa_offset 24 ; CHECK-NEXT: .cfi_offset %rbx, -24 ; CHECK-NEXT: .cfi_offset %r14, -16 -; CHECK-NEXT: movq %rdx, %r11 +; CHECK-NEXT: movq %rdx, %r10 ; CHECK-NEXT: movq %rdi, %r9 -; CHECK-NEXT: movq %rsi, %rbx -; CHECK-NEXT: sarq $63, %rbx -; CHECK-NEXT: movq %rdx, %rdi -; CHECK-NEXT: imulq %rbx, %rdi -; CHECK-NEXT: movq %rdx, %rax -; CHECK-NEXT: mulq %rbx -; CHECK-NEXT: movq %rax, %r8 -; CHECK-NEXT: imulq %rcx, %rbx -; CHECK-NEXT: addq %rdi, %rbx -; CHECK-NEXT: addq %rdx, %rbx -; CHECK-NEXT: movq %rcx, %rdi +; CHECK-NEXT: movq %rsi, %rdi ; CHECK-NEXT: sarq $63, %rdi -; CHECK-NEXT: movq %rdi, %r14 +; CHECK-NEXT: movq %rcx, %r11 +; CHECK-NEXT: imulq %rdi, %r11 +; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: mulq %rdi +; CHECK-NEXT: movq %rax, %rdi +; CHECK-NEXT: addq %rax, %r11 +; CHECK-NEXT: addq %rdx, %r11 +; CHECK-NEXT: movq %rcx, %rax +; CHECK-NEXT: sarq $63, %rax +; CHECK-NEXT: movq %rax, %r14 ; CHECK-NEXT: imulq %rsi, %r14 -; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: mulq %r9 -; CHECK-NEXT: movq %rax, %r10 -; CHECK-NEXT: imulq %r9, %rdi -; CHECK-NEXT: addq %r14, %rdi -; CHECK-NEXT: addq %rdx, %rdi -; CHECK-NEXT: addq %r8, %r10 -; CHECK-NEXT: adcq %rbx, %rdi -; CHECK-NEXT: movq %r9, %rax -; CHECK-NEXT: mulq %r11 -; CHECK-NEXT: movq %rdx, %rbx ; CHECK-NEXT: movq %rax, %r8 -; CHECK-NEXT: movq %rsi, %rax -; CHECK-NEXT: mulq %r11 +; CHECK-NEXT: addq %rax, %r14 +; CHECK-NEXT: addq %rdx, %r14 +; CHECK-NEXT: addq %rdi, %r8 +; CHECK-NEXT: adcq %r11, %r14 +; CHECK-NEXT: movq %r9, %rax +; CHECK-NEXT: mulq %r10 ; CHECK-NEXT: movq %rdx, %r11 -; CHECK-NEXT: movq %rax, %r14 -; CHECK-NEXT: addq %rbx, %r14 -; CHECK-NEXT: adcq $0, %r11 +; CHECK-NEXT: movq %rax, %rdi +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: mulq %r10 +; CHECK-NEXT: movq %rdx, %r10 +; CHECK-NEXT: movq %rax, %rbx +; CHECK-NEXT: addq %r11, %rbx +; CHECK-NEXT: adcq $0, %r10 ; CHECK-NEXT: movq %r9, %rax ; CHECK-NEXT: mulq %rcx -; CHECK-NEXT: movq %rdx, %rbx +; CHECK-NEXT: movq %rdx, %r11 ; CHECK-NEXT: movq %rax, %r9 -; CHECK-NEXT: addq %r14, %r9 -; CHECK-NEXT: adcq %r11, %rbx +; CHECK-NEXT: addq %rbx, %r9 +; CHECK-NEXT: adcq %r10, %r11 ; CHECK-NEXT: setb %al -; CHECK-NEXT: movzbl %al, %r11d +; CHECK-NEXT: movzbl %al, %r10d ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: mulq %rcx -; CHECK-NEXT: addq %rbx, %rax -; CHECK-NEXT: adcq %r11, %rdx -; CHECK-NEXT: addq %r10, %rax -; CHECK-NEXT: adcq %rdi, %rdx +; CHECK-NEXT: addq %r11, %rax +; CHECK-NEXT: adcq %r10, %rdx +; CHECK-NEXT: addq %r8, %rax +; CHECK-NEXT: adcq %r14, %rdx ; CHECK-NEXT: movq %r9, %rcx ; CHECK-NEXT: sarq $63, %rcx ; CHECK-NEXT: xorq %rcx, %rdx @@ -68,7 +65,7 @@ define %0 @x(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) nou ; CHECK-NEXT: orq %rdx, %rcx ; CHECK-NEXT: jne LBB0_1 ; CHECK-NEXT: ## %bb.2: ## %nooverflow -; CHECK-NEXT: movq %r8, %rax +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: movq %r9, %rdx ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r14 diff --git a/llvm/test/CodeGen/X86/smul-with-overflow.ll b/llvm/test/CodeGen/X86/smul-with-overflow.ll index 83b9460c7dae3..6d8b83824a6d5 100644 --- a/llvm/test/CodeGen/X86/smul-with-overflow.ll +++ b/llvm/test/CodeGen/X86/smul-with-overflow.ll @@ -191,7 +191,7 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind { ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $184, %esp +; X86-NEXT: subl $192, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: andl $1, %eax ; X86-NEXT: negl %eax @@ -199,172 +199,139 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: andl $1, %eax ; X86-NEXT: negl %eax -; X86-NEXT: movl %eax, %ebx +; X86-NEXT: movl %eax, %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %ebx +; X86-NEXT: mull %esi ; X86-NEXT: movl %eax, %ecx -; X86-NEXT: movl %eax, %edi +; X86-NEXT: movl %eax, %ebp ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl %edx, %edi ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: addl %edx, %ecx -; X86-NEXT: adcl $0, %esi +; X86-NEXT: adcl $0, %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %ebx -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: mull %esi +; X86-NEXT: movl %esi, %ebx +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: addl %eax, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl %edx, %edi +; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-NEXT: addl %eax, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload ; X86-NEXT: adcl %edx, %esi -; X86-NEXT: setb %bl -; X86-NEXT: addl %eax, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movzbl %bl, %eax -; X86-NEXT: adcl %edx, %eax +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: addl %edi, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: addl %esi, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl %eax, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl %esi, %ecx +; X86-NEXT: adcl $0, %edi +; X86-NEXT: movl %edi, (%esp) # 4-byte Spill ; X86-NEXT: adcl $0, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl $0, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %ebp -; X86-NEXT: movl %eax, %edi +; X86-NEXT: mull %ebx +; X86-NEXT: movl %eax, %ebp ; X86-NEXT: movl %eax, %esi -; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl %edx, %edi ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: addl %edx, %edi -; X86-NEXT: adcl $0, %ebx +; X86-NEXT: addl %edx, %ebp +; X86-NEXT: adcl $0, %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %ebp +; X86-NEXT: mull %ebx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: addl %eax, %ebp +; X86-NEXT: adcl %edx, %edi +; X86-NEXT: setb %bl ; X86-NEXT: addl %eax, %edi -; X86-NEXT: adcl %edx, %ebx -; X86-NEXT: setb %cl -; X86-NEXT: addl %eax, %ebx -; X86-NEXT: movzbl %cl, %eax +; X86-NEXT: movzbl %bl, %eax ; X86-NEXT: adcl %edx, %eax -; X86-NEXT: movl %esi, %ecx +; X86-NEXT: movl %esi, %edx ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: adcl $0, %ebp -; X86-NEXT: movl %eax, %esi -; X86-NEXT: movl %eax, %edx +; X86-NEXT: adcl %ebp, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edi, %esi ; X86-NEXT: adcl $0, %esi -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: adcl $0, %ecx +; X86-NEXT: addl (%esp), %esi # 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: setb %al -; X86-NEXT: addl %ecx, %ebp -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl %edi, %esi +; X86-NEXT: addl %edx, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl %ebp, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movzbl %al, %eax -; X86-NEXT: adcl %ebx, %eax +; X86-NEXT: adcl %edi, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl $0, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, %eax +; X86-NEXT: adcl $0, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: mull %esi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl %ebp, %eax +; X86-NEXT: movl %edx, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %esi -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: addl %ecx, %ebx -; X86-NEXT: adcl $0, %esi -; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: addl %edi, %ebp +; X86-NEXT: adcl $0, %ebx +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: mull %edi -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: addl %ebx, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: adcl %esi, %ecx -; X86-NEXT: setb %bl -; X86-NEXT: movl %ebp, %eax -; X86-NEXT: mull %edi ; X86-NEXT: movl %edx, %esi -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movzbl %bl, %eax -; X86-NEXT: adcl %eax, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl %ebp, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: mull %edi +; X86-NEXT: addl %ebp, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edx, %ecx +; X86-NEXT: adcl %ebx, %esi +; X86-NEXT: setb %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %edi ; X86-NEXT: movl %edx, %ebx -; X86-NEXT: movl %eax, %edi -; X86-NEXT: addl %ecx, %edi -; X86-NEXT: adcl $0, %ebx -; X86-NEXT: movl %ebp, %eax -; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, %ebp -; X86-NEXT: addl %edi, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl %ebx, %ebp -; X86-NEXT: setb %cl -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: addl %ebp, %ebx +; X86-NEXT: addl %esi, %eax +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: movzbl %cl, %eax -; X86-NEXT: adcl %eax, %edx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT: adcl (%esp), %edx # 4-byte Folded Reload -; X86-NEXT: movl %edx, (%esp) # 4-byte Spill -; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: adcl $0, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl %eax, %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull %ecx +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull %ecx +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: addl %esi, %ecx +; X86-NEXT: adcl $0, %ebp +; X86-NEXT: movl %edi, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: mull %esi ; X86-NEXT: movl %edx, %edi +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl %ebp, %edi +; X86-NEXT: setb %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %ebp -; X86-NEXT: movl %eax, %esi -; X86-NEXT: addl %edi, %esi -; X86-NEXT: adcl $0, %ebp -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: mull %edi -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: addl %esi, %eax -; X86-NEXT: movl %eax, %esi -; X86-NEXT: adcl %ebp, %ecx -; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %edi -; X86-NEXT: movl %edx, %edi ; X86-NEXT: movl %eax, %ebp -; X86-NEXT: addl %ecx, %ebp -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X86-NEXT: adcl %eax, %edi -; X86-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: adcl (%esp), %esi # 4-byte Folded Reload -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl $0, %ebp -; X86-NEXT: adcl $0, %edi +; X86-NEXT: addl %edi, %ebp +; X86-NEXT: movzbl %cl, %eax +; X86-NEXT: adcl %eax, %edx ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT: setb (%esp) # 1-byte Folded Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl $0, (%esp) # 4-byte Folded Spill +; X86-NEXT: adcl $0, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: mull %esi @@ -372,40 +339,71 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind { ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl %edx, %edi ; X86-NEXT: movl %eax, %ebx ; X86-NEXT: addl %ecx, %ebx -; X86-NEXT: adcl $0, %esi +; X86-NEXT: adcl $0, %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: mull %ecx -; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl %edx, %esi ; X86-NEXT: addl %ebx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl %esi, %ecx +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: adcl %edi, %esi ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, %esi -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: addl %ecx, %ebx +; X86-NEXT: mull %ecx +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl %eax, %edi +; X86-NEXT: addl %esi, %edi ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X86-NEXT: adcl %eax, %esi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: addl %ebp, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: adcl %edi, %edx -; X86-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload -; X86-NEXT: adcl %eax, %ebx -; X86-NEXT: adcl $0, %esi -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl %eax, %ecx +; X86-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl $0, %edi +; X86-NEXT: adcl $0, %ecx +; X86-NEXT: addl (%esp), %edi # 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: setb (%esp) # 1-byte Folded Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: mull %ebx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull %ebx +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: addl %esi, %ebp +; X86-NEXT: adcl $0, %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, %esi +; X86-NEXT: addl %ebp, %eax +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: adcl %ebx, %esi +; X86-NEXT: setb %bl +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: addl %esi, %eax +; X86-NEXT: movzbl %bl, %esi +; X86-NEXT: adcl %esi, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: addl %edi, %ebx +; X86-NEXT: movl %ebp, %esi +; X86-NEXT: adcl %ecx, %esi +; X86-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload +; X86-NEXT: adcl %ecx, %eax +; X86-NEXT: adcl $0, %edx +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill @@ -413,411 +411,401 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind { ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, %edi +; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl %eax, %edi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl %ecx, %ebp ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, %esi +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: movl %eax, %ebx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: addl %edi, %eax -; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: addl %esi, %ecx +; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl $0, %ebx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl %edi, %ebx +; X86-NEXT: adcl $0, %esi +; X86-NEXT: addl %edi, %ecx +; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-NEXT: adcl %eax, %esi ; X86-NEXT: setb %al -; X86-NEXT: addl %esi, %ebx +; X86-NEXT: addl %ebx, %esi ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: adcl %edx, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ebp, %ecx +; X86-NEXT: movl %ebp, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl %edx, %ebx ; X86-NEXT: movl %eax, %ebp +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, %edi ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, %edi ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: addl %esi, %ecx -; X86-NEXT: adcl $0, %edx +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: addl %ebx, %ecx +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: adcl $0, %ebx ; X86-NEXT: addl %ebp, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl %esi, %edx -; X86-NEXT: setb %cl -; X86-NEXT: addl %eax, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movzbl %cl, %esi -; X86-NEXT: adcl %edi, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: addl %edx, %edi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: adcl %esi, %eax -; X86-NEXT: movl %ebx, %esi -; X86-NEXT: adcl $0, %esi -; X86-NEXT: movl (%esp), %edx # 4-byte Reload -; X86-NEXT: adcl $0, %edx -; X86-NEXT: addl %ebp, %edi +; X86-NEXT: adcl %eax, %ebx +; X86-NEXT: setb %al +; X86-NEXT: addl %edi, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movzbl %al, %ebp +; X86-NEXT: adcl %edx, %ebp +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: addl %ebx, %edx +; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: adcl %ebp, %eax ; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, %edi +; X86-NEXT: adcl $0, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: adcl $0, %ebx +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: adcl $0, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: adcl $0, %ebp +; X86-NEXT: addl %edi, %ecx +; X86-NEXT: movl %ecx, %edi +; X86-NEXT: adcl %ebx, %ebp +; X86-NEXT: setb %cl +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: adcl (%esp), %ebp # 4-byte Folded Reload +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: movzbl %cl, %ebx +; X86-NEXT: adcl %esi, %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X86-NEXT: adcl $0, %ebp +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: adcl $0, %edi ; X86-NEXT: adcl $0, %eax -; X86-NEXT: addl %esi, %ecx -; X86-NEXT: adcl %edx, %eax -; X86-NEXT: setb %dl -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movzbl %dl, %esi -; X86-NEXT: adcl %ebx, %esi -; X86-NEXT: movl (%esp), %ebx # 4-byte Reload ; X86-NEXT: adcl $0, %ebx -; X86-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl %ebp, %ecx ; X86-NEXT: adcl $0, %ecx -; X86-NEXT: adcl $0, %eax -; X86-NEXT: adcl $0, %esi -; X86-NEXT: movl %ebx, %edx -; X86-NEXT: adcl $0, %edx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: movl %edx, (%esp) # 4-byte Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: movl %eax, %edi ; X86-NEXT: movl %eax, %ebp -; X86-NEXT: addl %edx, %ebp -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: adcl $0, %ebx -; X86-NEXT: addl %eax, %ebp -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl %edx, %ebx -; X86-NEXT: setb %cl -; X86-NEXT: addl %eax, %ebx -; X86-NEXT: movzbl %cl, %ecx +; X86-NEXT: movl %eax, %esi +; X86-NEXT: addl %edx, %esi +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: adcl $0, %ecx +; X86-NEXT: addl %eax, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl %edx, %ecx +; X86-NEXT: setb %bl +; X86-NEXT: addl %eax, %ecx +; X86-NEXT: movzbl %bl, %edi +; X86-NEXT: adcl %edx, %edi ; X86-NEXT: movl %eax, %edx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: addl %ebx, %edx -; X86-NEXT: adcl %ecx, %ebp -; X86-NEXT: movl %ebx, %esi -; X86-NEXT: movl %ebx, %eax -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl $0, %esi -; X86-NEXT: movl %ecx, %ebx +; X86-NEXT: addl %ecx, %edx +; X86-NEXT: movl %esi, %eax +; X86-NEXT: adcl %edi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl %ecx, %esi ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl $0, %eax +; X86-NEXT: movl %edi, %ecx +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl $0, %ecx -; X86-NEXT: addl %edi, %edx +; X86-NEXT: addl %ebp, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: adcl %edi, %ebp -; X86-NEXT: movl %eax, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X86-NEXT: adcl $0, %esi +; X86-NEXT: movl %edi, %edx ; X86-NEXT: adcl $0, %edx -; X86-NEXT: movl %ebx, %eax -; X86-NEXT: adcl $0, %eax -; X86-NEXT: addl %esi, %edx -; X86-NEXT: adcl %ecx, %eax -; X86-NEXT: setb %bl -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: addl %ecx, %edx -; X86-NEXT: movl %edx, %esi -; X86-NEXT: movl %eax, %edx -; X86-NEXT: adcl %edi, %edx -; X86-NEXT: movzbl %bl, %eax +; X86-NEXT: addl %eax, %esi +; X86-NEXT: adcl %ecx, %edx +; X86-NEXT: setb %al +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: addl %edi, %esi +; X86-NEXT: movl %esi, %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: adcl %esi, %edx +; X86-NEXT: movzbl %al, %eax ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, %ebx +; X86-NEXT: movl %eax, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: adcl $0, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: adcl (%esp), %ebp # 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X86-NEXT: adcl %eax, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl %eax, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl $0, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl $0, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl $0, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: movl %esi, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: addl %eax, %ecx +; X86-NEXT: movl %esi, %ebp +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: addl %ecx, %ebp ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X86-NEXT: movl %edi, %edx ; X86-NEXT: adcl $0, %edx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: addl %ebx, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl %eax, %edx -; X86-NEXT: setb %al +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X86-NEXT: adcl %ecx, %edx +; X86-NEXT: setb %cl ; X86-NEXT: addl %esi, %edx -; X86-NEXT: movzbl %al, %eax +; X86-NEXT: movzbl %cl, %eax ; X86-NEXT: adcl %edi, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X86-NEXT: movl %ebp, %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: addl %esi, %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: adcl $0, %edi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: addl %ecx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl %esi, %edi -; X86-NEXT: setb %al -; X86-NEXT: addl %ebp, %edi -; X86-NEXT: movzbl %al, %ebp -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X86-NEXT: addl %edi, %eax ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: adcl %ebp, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edx, %esi ; X86-NEXT: adcl $0, %esi -; X86-NEXT: movl (%esp), %ebx # 4-byte Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl %edi, %esi +; X86-NEXT: setb %cl +; X86-NEXT: addl %ebx, %esi +; X86-NEXT: movzbl %cl, %ecx +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: addl %esi, %eax +; X86-NEXT: movl %ebp, %edi +; X86-NEXT: adcl %ecx, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, %edi +; X86-NEXT: adcl $0, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X86-NEXT: adcl $0, %ebx -; X86-NEXT: addl %ecx, %eax +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl $0, %edi -; X86-NEXT: adcl $0, %ebp -; X86-NEXT: addl %esi, %edi -; X86-NEXT: adcl %ebx, %ebp +; X86-NEXT: adcl $0, %esi +; X86-NEXT: adcl $0, %ecx +; X86-NEXT: addl %edi, %esi +; X86-NEXT: adcl %ebx, %ecx ; X86-NEXT: setb %bl -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X86-NEXT: movzbl %bl, %eax -; X86-NEXT: adcl %edx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl $0, (%esp) # 4-byte Folded Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: imull %eax, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: imull %eax, %ebx -; X86-NEXT: addl %ecx, %ebx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: imull %eax, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: imull %eax, %edx -; X86-NEXT: addl %ecx, %edx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: addl %esi, %eax -; X86-NEXT: adcl %ebx, %edx +; X86-NEXT: addl %eax, %esi +; X86-NEXT: adcl %ebp, %ecx +; X86-NEXT: movzbl %bl, %edi +; X86-NEXT: adcl %edx, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: addl %edi, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X86-NEXT: adcl %ebp, %edx ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, %ecx +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: addl %eax, %ebx -; X86-NEXT: adcl %edx, %edx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: adcl %ebx, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: addl %eax, %eax ; X86-NEXT: adcl %edx, %ebx -; X86-NEXT: addl %edi, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl %ebp, %eax +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl (%esp), %ebx # 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: movl %esi, %edi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: addl %eax, %edi -; X86-NEXT: adcl $0, %eax +; X86-NEXT: movl %edi, %eax +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %eax, %edi +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: addl %edx, %edi -; X86-NEXT: movl %edi, %ebx +; X86-NEXT: adcl %ebx, %edx +; X86-NEXT: addl %esi, %edi ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: adcl %edi, %eax -; X86-NEXT: setb %cl +; X86-NEXT: adcl %ecx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: addl %edx, %eax -; X86-NEXT: movzbl %cl, %edx -; X86-NEXT: adcl %edi, %edx -; X86-NEXT: movl %esi, %ecx -; X86-NEXT: addl %eax, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebx, %ecx -; X86-NEXT: adcl %edx, %ecx -; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X86-NEXT: adcl $0, %eax ; X86-NEXT: adcl $0, %edx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: movl %esi, %ecx +; X86-NEXT: addl %esi, %eax +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: adcl %ebx, %edx +; X86-NEXT: setb %al +; X86-NEXT: addl %esi, %edx +; X86-NEXT: movzbl %al, %edi +; X86-NEXT: adcl %ebx, %edi +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: addl %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: adcl %edi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl $0, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl $0, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: movl %esi, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: addl %ecx, %edx +; X86-NEXT: adcl $0, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X86-NEXT: addl %ebp, %ecx -; X86-NEXT: adcl $0, %ebp +; X86-NEXT: addl %ebp, %edx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: addl %ebx, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: adcl %edi, %ebp -; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X86-NEXT: addl %ebx, %ebp -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload -; X86-NEXT: adcl %edi, %ebx +; X86-NEXT: adcl %ebx, %ecx +; X86-NEXT: setb %al +; X86-NEXT: addl %ebp, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movzbl %al, %ebp +; X86-NEXT: adcl %ebx, %ebp ; X86-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: adcl %ecx, (%esp) # 4-byte Folded Spill -; X86-NEXT: movl %ebp, %esi -; X86-NEXT: adcl $0, %esi -; X86-NEXT: movl %ebx, %edi -; X86-NEXT: adcl $0, %edi -; X86-NEXT: addl %eax, %esi -; X86-NEXT: adcl %edx, %edi +; X86-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: adcl $0, %ecx +; X86-NEXT: movl %ebp, %ebx +; X86-NEXT: adcl $0, %ebx +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: adcl %edi, %ebx ; X86-NEXT: setb %al -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT: adcl %ecx, %edi +; X86-NEXT: addl %esi, %ecx +; X86-NEXT: adcl %edx, %ebx ; X86-NEXT: movzbl %al, %eax -; X86-NEXT: adcl %ebp, %eax +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl $0, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl $0, %ebp ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: imull %edx, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: imull %edx, %ecx -; X86-NEXT: addl %ebp, %ecx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: imull %edx, %ebp -; X86-NEXT: imull {{[0-9]+}}(%esp), %edx -; X86-NEXT: addl %ebp, %edx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: addl %edx, %esi +; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: adcl %edi, %eax +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: addl %eax, %ebx -; X86-NEXT: adcl %ecx, %edx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: addl %edx, %eax +; X86-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: addl %eax, %ecx +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: movl %eax, %edi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: adcl %ebx, %eax +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: movl %esi, %eax ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: addl %esi, %ecx +; X86-NEXT: adcl %ebp, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl %edi, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, (%esp) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl %esi, %ebp +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: sarl $31, %ecx -; X86-NEXT: xorl %ecx, %eax -; X86-NEXT: xorl %ecx, %ebx -; X86-NEXT: orl %eax, %ebx -; X86-NEXT: xorl %ecx, %edi -; X86-NEXT: orl %ebx, %edi -; X86-NEXT: movl (%esp), %eax # 4-byte Reload -; X86-NEXT: xorl %ecx, %eax -; X86-NEXT: xorl %ecx, %edx -; X86-NEXT: orl %eax, %edx -; X86-NEXT: xorl %ecx, %esi -; X86-NEXT: orl %edx, %esi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: xorl %ecx, %edx -; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl (%esp), %esi # 4-byte Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: movl %ebx, %edx +; X86-NEXT: sarl $31, %edx +; X86-NEXT: xorl %edx, %ecx +; X86-NEXT: xorl %edx, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: xorl %edx, %edi +; X86-NEXT: orl %eax, %edi +; X86-NEXT: xorl %edx, %ebp +; X86-NEXT: xorl %edx, %esi +; X86-NEXT: orl %ebp, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: xorl %edx, %ecx ; X86-NEXT: orl %esi, %ecx -; X86-NEXT: orl %edi, %ecx -; X86-NEXT: orl %edx, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: xorl %edx, %eax +; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: orl %edi, %edx +; X86-NEXT: orl %eax, %edx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: movl %edi, %edx -; X86-NEXT: andl $1, %edx -; X86-NEXT: movl %edx, %eax +; X86-NEXT: movl %edi, %ecx +; X86-NEXT: andl $1, %ecx +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: negl %eax -; X86-NEXT: xorl %eax, %ebp +; X86-NEXT: xorl %eax, %ebx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: xorl %eax, %esi -; X86-NEXT: orl %ebp, %esi +; X86-NEXT: orl %ebx, %esi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X86-NEXT: xorl %eax, %ebx ; X86-NEXT: orl %esi, %ebx ; X86-NEXT: xorl %edi, %eax ; X86-NEXT: orl %ebx, %eax -; X86-NEXT: orl %ecx, %eax +; X86-NEXT: orl %edx, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl %ecx, 4(%eax) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl %ecx, (%eax) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl %ecx, 8(%eax) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl %ecx, 12(%eax) -; X86-NEXT: movb %dl, 16(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: movl %edx, 4(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: movl %edx, (%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: movl %edx, 8(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: movl %edx, 12(%eax) +; X86-NEXT: movb %cl, 16(%eax) ; X86-NEXT: setne 20(%eax) -; X86-NEXT: addl $184, %esp +; X86-NEXT: addl $192, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -832,189 +820,170 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind { ; X64-NEXT: pushq %r13 ; X64-NEXT: pushq %r12 ; X64-NEXT: pushq %rbx -; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r8, %r12 -; X64-NEXT: movq %rcx, %r15 -; X64-NEXT: movq %rdx, %r14 -; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rsi, %r8 +; X64-NEXT: movq %r8, %r13 +; X64-NEXT: movq %rcx, %r10 +; X64-NEXT: movq %rdx, %r15 +; X64-NEXT: movq %rsi, %rbx ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi -; X64-NEXT: andl $1, %esi -; X64-NEXT: negq %rsi -; X64-NEXT: andl $1, %r15d -; X64-NEXT: negq %r15 -; X64-NEXT: movq %r15, %rax -; X64-NEXT: mulq %r12 -; X64-NEXT: movq %rax, %rbx -; X64-NEXT: movq %rax, %rbp -; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; X64-NEXT: andl $1, %r11d +; X64-NEXT: negq %r11 +; X64-NEXT: andl $1, %r10d +; X64-NEXT: negq %r10 +; X64-NEXT: movq %r10, %rax +; X64-NEXT: mulq %r8 +; X64-NEXT: movq %rdx, %r8 +; X64-NEXT: movq %rax, %rdi +; X64-NEXT: movq %rax, %r12 ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: addq %rdx, %rbx -; X64-NEXT: adcq $0, %rcx -; X64-NEXT: movq %r15, %rax +; X64-NEXT: addq %rdx, %rdi +; X64-NEXT: adcq $0, %r8 +; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: addq %rax, %rbx +; X64-NEXT: addq %rax, %rdi +; X64-NEXT: adcq %rdx, %r8 +; X64-NEXT: setb %cl +; X64-NEXT: movzbl %cl, %ecx +; X64-NEXT: addq %rax, %r8 ; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: setb %dil -; X64-NEXT: movzbl %dil, %r13d -; X64-NEXT: addq %rax, %rcx -; X64-NEXT: adcq %rdx, %r13 -; X64-NEXT: movq %r8, %rax -; X64-NEXT: mulq %r12 -; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: mulq %r13 +; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r14, %rax -; X64-NEXT: mulq %r12 -; X64-NEXT: movq %rdx, %r10 -; X64-NEXT: movq %rax, %r11 -; X64-NEXT: addq %rdi, %r11 -; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %r8, %rax -; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %r15, %rax +; X64-NEXT: mulq %r13 +; X64-NEXT: movq %rdx, %r13 +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: addq %r14, %rbp +; X64-NEXT: adcq $0, %r13 +; X64-NEXT: movq %rsi, %rax ; X64-NEXT: mulq %r9 -; X64-NEXT: movq %rdx, %rdi -; X64-NEXT: addq %r11, %rax +; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: addq %rbp, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %r10, %rdi -; X64-NEXT: setb %r11b -; X64-NEXT: movq %r14, %rax +; X64-NEXT: adcq %r13, %rsi +; X64-NEXT: setb %bpl +; X64-NEXT: movq %r15, %rax ; X64-NEXT: mulq %r9 -; X64-NEXT: movq %rdx, %r10 -; X64-NEXT: addq %rdi, %rax -; X64-NEXT: movzbl %r11b, %edx -; X64-NEXT: adcq %rdx, %r10 -; X64-NEXT: addq %rbp, %rax -; X64-NEXT: movq %rax, %rdi -; X64-NEXT: adcq %rbx, %r10 +; X64-NEXT: movq %rdx, %r14 +; X64-NEXT: addq %rsi, %rax +; X64-NEXT: movzbl %bpl, %edx +; X64-NEXT: adcq %rdx, %r14 +; X64-NEXT: addq %r12, %rax +; X64-NEXT: movq %r12, %r9 +; X64-NEXT: movq %rax, %rsi +; X64-NEXT: adcq %rdi, %r14 +; X64-NEXT: adcq $0, %r8 ; X64-NEXT: adcq $0, %rcx -; X64-NEXT: adcq $0, %r13 -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %r8 -; X64-NEXT: movq %rdx, %r11 -; X64-NEXT: movq %rax, %r8 -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %r14 -; X64-NEXT: movq %rax, %rbp -; X64-NEXT: movq %rax, %r9 -; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %r11, %rax -; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: addq %r11, %rbp -; X64-NEXT: movq %rdx, %r11 +; X64-NEXT: mulq %rbx +; X64-NEXT: movq %rdx, %r13 +; X64-NEXT: movq %rax, %rbx +; X64-NEXT: movq %r11, %rax +; X64-NEXT: mulq %r15 +; X64-NEXT: movq %rax, %r15 +; X64-NEXT: movq %rax, %rdi +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: addq %r13, %r15 +; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq $0, %r11 -; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: addq %r8, %rbp -; X64-NEXT: adcq %rax, %r11 +; X64-NEXT: adcq $0, %rbp +; X64-NEXT: addq %rbx, %r15 +; X64-NEXT: adcq %r13, %rbp ; X64-NEXT: setb %al -; X64-NEXT: addq %r9, %r11 -; X64-NEXT: movzbl %al, %r14d -; X64-NEXT: adcq %rdx, %r14 -; X64-NEXT: addq %r8, %rdi -; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %r10, %rbp -; X64-NEXT: adcq $0, %r11 +; X64-NEXT: addq %rdi, %rbp +; X64-NEXT: movzbl %al, %r12d +; X64-NEXT: adcq %rdx, %r12 +; X64-NEXT: addq %rbx, %rsi +; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %r14, %r15 +; X64-NEXT: adcq $0, %rbp +; X64-NEXT: adcq $0, %r12 +; X64-NEXT: addq %r8, %rbp +; X64-NEXT: adcq %rcx, %r12 +; X64-NEXT: setb %cl +; X64-NEXT: movq %r10, %rax +; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rax, %r8 +; X64-NEXT: addq %rdx, %r8 +; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: adcq $0, %r14 -; X64-NEXT: addq %rcx, %r11 -; X64-NEXT: adcq %r13, %r14 +; X64-NEXT: addq %rax, %r8 +; X64-NEXT: adcq %rdx, %r14 ; X64-NEXT: setb %dil -; X64-NEXT: movq %r15, %rax -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %rdx, %rbx -; X64-NEXT: movq %rdx, %r10 -; X64-NEXT: adcq $0, %r10 -; X64-NEXT: addq %rax, %rbx -; X64-NEXT: adcq %rdx, %r10 -; X64-NEXT: setb %cl -; X64-NEXT: addq %rax, %r10 -; X64-NEXT: movzbl %cl, %ecx -; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: addq %rax, %r11 -; X64-NEXT: adcq %r14, %rbx -; X64-NEXT: movzbl %dil, %eax -; X64-NEXT: adcq %rax, %r10 -; X64-NEXT: adcq $0, %rcx -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, %r14 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: addq %rax, %r14 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; X64-NEXT: movq %r8, %rcx +; X64-NEXT: movzbl %dil, %esi +; X64-NEXT: adcq %rdx, %rsi +; X64-NEXT: addq %rax, %rbp +; X64-NEXT: adcq %r12, %r8 +; X64-NEXT: movzbl %cl, %eax +; X64-NEXT: adcq %rax, %r14 +; X64-NEXT: adcq $0, %rsi +; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq %rsi, %rdi +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; X64-NEXT: addq %rax, %rdi +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: adcq $0, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; X64-NEXT: addq %r9, %r14 +; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: addq %r9, %rdi ; X64-NEXT: adcq %rax, %rcx -; X64-NEXT: movq %rax, %rdx ; X64-NEXT: setb %al -; X64-NEXT: addq %rdi, %rcx -; X64-NEXT: movzbl %al, %edi -; X64-NEXT: adcq %r8, %rdi -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: imulq %r15, %rax -; X64-NEXT: imulq %r15, %r12 -; X64-NEXT: addq %rax, %r12 -; X64-NEXT: addq %rdx, %r12 -; X64-NEXT: movq %r15, %rax -; X64-NEXT: imulq %rsi -; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r9, %r8 -; X64-NEXT: addq %rax, %r8 +; X64-NEXT: addq %rsi, %rcx +; X64-NEXT: movzbl %al, %esi +; X64-NEXT: adcq %rdx, %rsi +; X64-NEXT: movq %r10, %rax +; X64-NEXT: imulq %r11 +; X64-NEXT: movq %r9, %r11 +; X64-NEXT: addq %rax, %r11 +; X64-NEXT: movq %rdi, %r12 ; X64-NEXT: adcq %rdx, %r12 -; X64-NEXT: addq %rcx, %r8 -; X64-NEXT: adcq %rdi, %r12 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload -; X64-NEXT: movq %r13, %r15 +; X64-NEXT: addq %rcx, %r11 +; X64-NEXT: adcq %rsi, %r12 +; X64-NEXT: movq %rbx, %r10 +; X64-NEXT: addq %r13, %r10 +; X64-NEXT: adcq $0, %r13 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: addq %rcx, %r15 -; X64-NEXT: adcq $0, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: addq %rdi, %r15 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; X64-NEXT: adcq %rdx, %rcx +; X64-NEXT: addq %rcx, %r10 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: adcq %rsi, %r13 ; X64-NEXT: setb %r9b -; X64-NEXT: addq %rdi, %rcx -; X64-NEXT: movzbl %r9b, %edi -; X64-NEXT: adcq %rdx, %rdi -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; X64-NEXT: imulq %rsi, %rdx -; X64-NEXT: imulq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload -; X64-NEXT: addq %rdx, %rsi -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; X64-NEXT: addq %rcx, %r13 +; X64-NEXT: movzbl %r9b, %ecx +; X64-NEXT: adcq %rsi, %rcx +; X64-NEXT: addq %rbx, %rax +; X64-NEXT: adcq %r10, %rdx ; X64-NEXT: addq %r13, %rax -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload -; X64-NEXT: addq %rcx, %rax -; X64-NEXT: adcq %rdi, %rsi -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload -; X64-NEXT: adcq %r14, %r15 -; X64-NEXT: adcq %r8, %rax -; X64-NEXT: adcq %r12, %rsi -; X64-NEXT: addq %r11, %r13 -; X64-NEXT: adcq %rbx, %r15 -; X64-NEXT: adcq %r10, %rax -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload -; X64-NEXT: movq %rbp, %rcx +; X64-NEXT: adcq %rcx, %rdx +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload +; X64-NEXT: adcq %rdi, %r10 +; X64-NEXT: adcq %r11, %rax +; X64-NEXT: adcq %r12, %rdx +; X64-NEXT: addq %rbp, %rbx +; X64-NEXT: adcq %r8, %r10 +; X64-NEXT: adcq %r14, %rax +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload +; X64-NEXT: movq %r15, %rcx ; X64-NEXT: sarq $63, %rcx -; X64-NEXT: xorq %rcx, %rsi -; X64-NEXT: xorq %rcx, %r15 -; X64-NEXT: orq %rsi, %r15 +; X64-NEXT: xorq %rcx, %rdx +; X64-NEXT: xorq %rcx, %r10 +; X64-NEXT: orq %rdx, %r10 ; X64-NEXT: xorq %rcx, %rax -; X64-NEXT: orq %r15, %rax -; X64-NEXT: xorq %r13, %rcx +; X64-NEXT: orq %r10, %rax +; X64-NEXT: xorq %rbx, %rcx ; X64-NEXT: orq %rax, %rcx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: movl %eax, %esi ; X64-NEXT: andl $1, %esi ; X64-NEXT: movq %rsi, %rdx ; X64-NEXT: negq %rdx -; X64-NEXT: xorq %rdx, %rbp +; X64-NEXT: xorq %rdx, %r15 ; X64-NEXT: xorq %rax, %rdx -; X64-NEXT: orq %rbp, %rdx +; X64-NEXT: orq %r15, %rdx ; X64-NEXT: orq %rcx, %rdx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload diff --git a/llvm/test/CodeGen/X86/smul_fix_sat.ll b/llvm/test/CodeGen/X86/smul_fix_sat.ll index 0532916b1e4ca..fc5a54b3cf4ce 100644 --- a/llvm/test/CodeGen/X86/smul_fix_sat.ll +++ b/llvm/test/CodeGen/X86/smul_fix_sat.ll @@ -376,68 +376,65 @@ define i64 @func5(i64 %x, i64 %y) { ; X86-NEXT: .cfi_offset %ebx, -12 ; X86-NEXT: .cfi_offset %ebp, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %ebx -; X86-NEXT: sarl $31, %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, %edx +; X86-NEXT: sarl $31, %edx +; X86-NEXT: movl %esi, %ecx +; X86-NEXT: imull %edx, %ecx +; X86-NEXT: mull %edx ; X86-NEXT: movl %eax, %edi -; X86-NEXT: imull %ebx, %edi -; X86-NEXT: mull %ebx +; X86-NEXT: addl %eax, %ecx +; X86-NEXT: addl %edx, %ecx +; X86-NEXT: movl %esi, %eax +; X86-NEXT: sarl $31, %eax ; X86-NEXT: movl %eax, %esi -; X86-NEXT: imull %ebp, %ebx -; X86-NEXT: addl %edi, %ebx -; X86-NEXT: addl %edx, %ebx -; X86-NEXT: movl %ebp, %edi -; X86-NEXT: sarl $31, %edi -; X86-NEXT: movl %edi, %edx -; X86-NEXT: imull %ecx, %edx +; X86-NEXT: imull %ebx, %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl %edi, %eax -; X86-NEXT: imull %ebp, %edi -; X86-NEXT: addl %edx, %edi ; X86-NEXT: mull %ebp -; X86-NEXT: addl %edx, %edi -; X86-NEXT: addl %esi, %eax +; X86-NEXT: addl %eax, %esi +; X86-NEXT: addl %edx, %esi +; X86-NEXT: addl %edi, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl %ebx, %edi +; X86-NEXT: adcl %ecx, %esi ; X86-NEXT: movl %ebp, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: mull %edi +; X86-NEXT: movl %edx, %ecx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: mull %esi +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: mull %edi ; X86-NEXT: movl %edx, %ebp -; X86-NEXT: addl %eax, %ebx +; X86-NEXT: addl %eax, %ecx ; X86-NEXT: adcl $0, %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, %esi -; X86-NEXT: addl %eax, %ebx -; X86-NEXT: adcl %ebp, %esi +; X86-NEXT: movl %edx, %edi +; X86-NEXT: addl %eax, %ecx +; X86-NEXT: adcl %ebp, %edi ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl %ebx, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: mull %ebp -; X86-NEXT: addl %esi, %eax -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload -; X86-NEXT: adcl %esi, %edx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: addl %edi, %eax +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload ; X86-NEXT: adcl %edi, %edx -; X86-NEXT: movl %ebx, %edi -; X86-NEXT: sarl $31, %edi -; X86-NEXT: xorl %edi, %edx -; X86-NEXT: xorl %eax, %edi -; X86-NEXT: xorl %ebp, %ecx -; X86-NEXT: sarl $31, %ecx +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: adcl %esi, %edx ; X86-NEXT: movl %ecx, %esi -; X86-NEXT: xorl $2147483647, %esi # imm = 0x7FFFFFFF -; X86-NEXT: orl %edx, %edi -; X86-NEXT: notl %ecx -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: cmovel %ebx, %esi -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: movl %esi, %edx +; X86-NEXT: sarl $31, %esi +; X86-NEXT: xorl %esi, %edx +; X86-NEXT: xorl %eax, %esi +; X86-NEXT: xorl %ebp, %ebx +; X86-NEXT: sarl $31, %ebx +; X86-NEXT: movl %ebx, %edi +; X86-NEXT: xorl $2147483647, %edi # imm = 0x7FFFFFFF +; X86-NEXT: orl %edx, %esi +; X86-NEXT: notl %ebx +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: cmovel %ecx, %edi +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl %edi, %edx ; X86-NEXT: addl $12, %esp ; X86-NEXT: .cfi_def_cfa_offset 20 ; X86-NEXT: popl %esi diff --git a/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll index fc40c539f37c7..7113aaf7e83ed 100644 --- a/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll @@ -14,61 +14,58 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) { ; X64-NEXT: .cfi_offset %rbx, -32 ; X64-NEXT: .cfi_offset %r14, -24 ; X64-NEXT: .cfi_offset %r15, -16 -; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: movq %rdi, %r10 -; X64-NEXT: movq %rsi, %r14 -; X64-NEXT: sarq $63, %r14 -; X64-NEXT: movq %rdx, %rdi -; X64-NEXT: imulq %r14, %rdi -; X64-NEXT: movq %rdx, %rax -; X64-NEXT: mulq %r14 -; X64-NEXT: movq %rax, %r9 -; X64-NEXT: imulq %rcx, %r14 -; X64-NEXT: addq %rdi, %r14 -; X64-NEXT: addq %rdx, %r14 -; X64-NEXT: movq %rcx, %rdi +; X64-NEXT: movq %rsi, %rdi ; X64-NEXT: sarq $63, %rdi -; X64-NEXT: movq %rdi, %r15 +; X64-NEXT: movq %rcx, %rbx +; X64-NEXT: imulq %rdi, %rbx +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rax, %rdi +; X64-NEXT: addq %rax, %rbx +; X64-NEXT: addq %rdx, %rbx +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: sarq $63, %rax +; X64-NEXT: movq %rax, %r15 ; X64-NEXT: imulq %rsi, %r15 -; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %r10 -; X64-NEXT: movq %rax, %r11 -; X64-NEXT: imulq %r10, %rdi -; X64-NEXT: addq %r15, %rdi -; X64-NEXT: addq %rdx, %rdi -; X64-NEXT: addq %r9, %r11 -; X64-NEXT: adcq %r14, %rdi -; X64-NEXT: movq %r10, %rax -; X64-NEXT: mulq %rbx -; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rax, %r9 -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %rbx +; X64-NEXT: addq %rax, %r15 +; X64-NEXT: addq %rdx, %r15 +; X64-NEXT: addq %rdi, %r9 +; X64-NEXT: adcq %rbx, %r15 +; X64-NEXT: movq %r10, %rax +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rbx -; X64-NEXT: movq %rax, %r15 -; X64-NEXT: addq %r14, %r15 -; X64-NEXT: adcq $0, %rbx +; X64-NEXT: movq %rax, %rdi +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rdx, %r11 +; X64-NEXT: movq %rax, %r14 +; X64-NEXT: addq %rbx, %r14 +; X64-NEXT: adcq $0, %r11 ; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, %r14 +; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, %r10 -; X64-NEXT: addq %r15, %r10 -; X64-NEXT: adcq %rbx, %r14 +; X64-NEXT: addq %r14, %r10 +; X64-NEXT: adcq %r11, %rbx ; X64-NEXT: setb %al -; X64-NEXT: movzbl %al, %ebx +; X64-NEXT: movzbl %al, %r11d ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: mulq %rcx -; X64-NEXT: addq %r14, %rax -; X64-NEXT: adcq %rbx, %rdx -; X64-NEXT: addq %r11, %rax -; X64-NEXT: adcq %rdi, %rdx +; X64-NEXT: addq %rbx, %rax +; X64-NEXT: adcq %r11, %rdx +; X64-NEXT: addq %r9, %rax +; X64-NEXT: adcq %r15, %rdx ; X64-NEXT: movq %r10, 8(%r8) ; X64-NEXT: sarq $63, %r10 ; X64-NEXT: xorq %r10, %rdx ; X64-NEXT: xorq %rax, %r10 ; X64-NEXT: orq %rdx, %r10 ; X64-NEXT: setne %al -; X64-NEXT: movq %r9, (%r8) +; X64-NEXT: movq %rdi, (%r8) ; X64-NEXT: popq %rbx ; X64-NEXT: popq %r14 ; X64-NEXT: popq %r15 @@ -84,8 +81,8 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) { ; X86-NEXT: .cfi_def_cfa_offset 16 ; X86-NEXT: pushl %esi ; X86-NEXT: .cfi_def_cfa_offset 20 -; X86-NEXT: subl $52, %esp -; X86-NEXT: .cfi_def_cfa_offset 72 +; X86-NEXT: subl $60, %esp +; X86-NEXT: .cfi_def_cfa_offset 80 ; X86-NEXT: .cfi_offset %esi, -20 ; X86-NEXT: .cfi_offset %edi, -16 ; X86-NEXT: .cfi_offset %ebx, -12 @@ -102,19 +99,20 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) { ; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl %eax, %edi ; X86-NEXT: addl %ecx, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: adcl $0, %esi ; X86-NEXT: movl %ebx, %eax -; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: addl %edi, %ebp -; X86-NEXT: adcl %esi, %ebx -; X86-NEXT: setb %cl +; X86-NEXT: mull %ecx +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: addl %edi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl %esi, %ebp +; X86-NEXT: setb %bl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: addl %ebx, %eax +; X86-NEXT: mull %ecx +; X86-NEXT: addl %ebp, %eax ; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill -; X86-NEXT: movzbl %cl, %eax +; X86-NEXT: movzbl %bl, %eax ; X86-NEXT: adcl %eax, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx @@ -126,197 +124,181 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %edi ; X86-NEXT: movl %edx, %edi -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: addl %esi, %ebx +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: addl %esi, %ebp ; X86-NEXT: adcl $0, %edi ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, %esi -; X86-NEXT: addl %ebx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: addl %ebp, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl %edi, %esi +; X86-NEXT: adcl %edi, %ebx ; X86-NEXT: setb %cl -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: mull %edx -; X86-NEXT: movl %eax, %edi -; X86-NEXT: addl %esi, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: mull %esi +; X86-NEXT: movl %edx, %edi +; X86-NEXT: movl %eax, %esi +; X86-NEXT: addl %ebx, %esi ; X86-NEXT: movzbl %cl, %eax -; X86-NEXT: adcl %eax, %edx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload -; X86-NEXT: adcl %ebp, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl %eax, %edi +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload ; X86-NEXT: adcl $0, (%esp) ## 4-byte Folded Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %ecx -; X86-NEXT: movl %edx, %ebp +; X86-NEXT: movl %edx, %ebx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %ebp, %eax ; X86-NEXT: mull %ecx -; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl %edx, %ebp ; X86-NEXT: movl %eax, %ecx -; X86-NEXT: addl %ebp, %ecx -; X86-NEXT: adcl $0, %esi -; X86-NEXT: movl %ebx, %eax +; X86-NEXT: addl %ebx, %ecx +; X86-NEXT: adcl $0, %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: movl %edx, %ebx ; X86-NEXT: addl %ecx, %eax -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: adcl %esi, %ebx +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl %ebp, %ebx ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl %edx, %ebp ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: addl %ebx, %ecx ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload -; X86-NEXT: adcl %eax, %esi -; X86-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl %eax, %ebp +; X86-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill +; X86-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill ; X86-NEXT: adcl $0, %ecx -; X86-NEXT: adcl $0, %esi +; X86-NEXT: adcl $0, %ebp ; X86-NEXT: addl (%esp), %ecx ## 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload ; X86-NEXT: setb (%esp) ## 1-byte Folded Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl %ebp, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: mull %edi -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %edi -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: movl %eax, %edi -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload -; X86-NEXT: adcl $0, %ebx -; X86-NEXT: movl %ebp, %eax -; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, %ebp -; X86-NEXT: addl %edi, %eax -; X86-NEXT: movl %eax, %edi -; X86-NEXT: adcl %ebx, %ebp -; X86-NEXT: setb %bl +; X86-NEXT: movl %edx, %edi +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: addl %esi, %ebx +; X86-NEXT: adcl $0, %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: mull %edx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: addl %ebx, %eax +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: adcl %edi, %esi +; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: addl %ebp, %eax -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: movzbl %bl, %eax -; X86-NEXT: adcl %eax, %edx +; X86-NEXT: addl %esi, %eax +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 1-byte Folded Reload +; X86-NEXT: adcl %esi, %edx ; X86-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill -; X86-NEXT: adcl %esi, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movzbl (%esp), %eax ## 1-byte Folded Reload -; X86-NEXT: adcl %eax, %ebp -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl %ebp, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movzbl (%esp), %ecx ## 1-byte Folded Reload +; X86-NEXT: adcl %ecx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl $0, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sarl $31, %esi -; X86-NEXT: movl %esi, %edi -; X86-NEXT: imull {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %esi, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: mull %ebp -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: imull %esi, %ebp -; X86-NEXT: addl %edi, %ebp -; X86-NEXT: addl %edx, %ebp -; X86-NEXT: movl %esi, %edx +; X86-NEXT: sarl $31, %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull %eax, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl %ebx, %eax -; X86-NEXT: imull %esi, %ebx -; X86-NEXT: addl %edx, %ebx -; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %edi -; X86-NEXT: addl %edx, %ebx -; X86-NEXT: addl %eax, %ecx -; X86-NEXT: movl %ecx, (%esp) ## 4-byte Spill -; X86-NEXT: movl %eax, %ecx +; X86-NEXT: mull %edi +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl %eax, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull %edi +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl %ebp, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: addl %ecx, %ebp +; X86-NEXT: adcl $0, %ebx +; X86-NEXT: addl %esi, %ebp +; X86-NEXT: movl %ebp, (%esp) ## 4-byte Spill +; X86-NEXT: movl %esi, %ebp +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl %ecx, %ebx +; X86-NEXT: setb %cl +; X86-NEXT: movl %edi, %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %esi +; X86-NEXT: imull %eax, %esi +; X86-NEXT: movl %edi, %eax +; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: addl %eax, %esi +; X86-NEXT: addl %edx, %esi +; X86-NEXT: addl %ebp, %eax +; X86-NEXT: adcl (%esp), %esi ## 4-byte Folded Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload +; X86-NEXT: movzbl %cl, %ecx +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload +; X86-NEXT: addl %eax, %ebx +; X86-NEXT: adcl %esi, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: sarl $31, %eax ; X86-NEXT: movl %eax, %esi -; X86-NEXT: addl %edi, %esi +; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: movl %edx, %ebp -; X86-NEXT: adcl $0, %ebp -; X86-NEXT: addl %ecx, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl %edi, %ebp -; X86-NEXT: setb %bl -; X86-NEXT: addl %eax, %ebp -; X86-NEXT: movzbl %bl, %eax -; X86-NEXT: adcl %edx, %eax -; X86-NEXT: addl (%esp), %ebp ## 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload +; X86-NEXT: movl %eax, %edi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sarl $31, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: addl %edx, %edi +; X86-NEXT: adcl $0, %ebp ; X86-NEXT: movl %esi, %eax +; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: addl %eax, %edi +; X86-NEXT: adcl %edx, %ebp +; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: mull %ecx -; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill -; X86-NEXT: movl %edx, %edi ; X86-NEXT: imull %esi, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: imull %esi, %ebx -; X86-NEXT: addl %ecx, %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %edx -; X86-NEXT: imull %esi, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: imull %esi, %ecx -; X86-NEXT: addl %edx, %ecx ; X86-NEXT: mull %esi ; X86-NEXT: movl %eax, %esi -; X86-NEXT: addl %edx, %ecx -; X86-NEXT: addl %edi, %ebx -; X86-NEXT: movl (%esp), %eax ## 4-byte Reload -; X86-NEXT: addl %eax, %esi -; X86-NEXT: adcl %ebx, %ecx -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: addl %edi, %ebx -; X86-NEXT: adcl $0, %edi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload -; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: addl %eax, %ebx -; X86-NEXT: adcl %edx, %edi -; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill -; X86-NEXT: addl %eax, %edi -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload -; X86-NEXT: adcl %edx, %eax -; X86-NEXT: addl %esi, %edi -; X86-NEXT: adcl %ecx, %eax -; X86-NEXT: movl (%esp), %esi ## 4-byte Reload +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: addl %esi, %eax +; X86-NEXT: addl %edx, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload +; X86-NEXT: addl %ecx, %esi +; X86-NEXT: adcl %edi, %eax +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 1-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload +; X86-NEXT: addl %esi, %ebp +; X86-NEXT: adcl %eax, %edx +; X86-NEXT: movl %ecx, %esi ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload -; X86-NEXT: adcl %ebp, %edi -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload +; X86-NEXT: adcl (%esp), %edi ## 4-byte Folded Reload +; X86-NEXT: adcl %ebx, %ebp +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: sarl $31, %ecx -; X86-NEXT: xorl %ecx, %eax -; X86-NEXT: xorl %ecx, %ebx -; X86-NEXT: orl %eax, %ebx -; X86-NEXT: xorl %ecx, %edi -; X86-NEXT: xorl %esi, %ecx -; X86-NEXT: orl %edi, %ecx -; X86-NEXT: orl %ebx, %ecx +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: sarl $31, %eax +; X86-NEXT: xorl %eax, %edx +; X86-NEXT: xorl %eax, %edi +; X86-NEXT: orl %edx, %edi +; X86-NEXT: xorl %eax, %ebp +; X86-NEXT: xorl %esi, %eax +; X86-NEXT: orl %ebp, %eax +; X86-NEXT: orl %edi, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edx, 12(%eax) +; X86-NEXT: movl %ecx, 12(%eax) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload ; X86-NEXT: movl %ecx, (%eax) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload @@ -324,7 +306,7 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) { ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload ; X86-NEXT: movl %ecx, 8(%eax) ; X86-NEXT: setne %al -; X86-NEXT: addl $52, %esp +; X86-NEXT: addl $60, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -358,232 +340,214 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X64-NEXT: .cfi_offset %r14, -32 ; X64-NEXT: .cfi_offset %r15, -24 ; X64-NEXT: .cfi_offset %rbp, -16 -; X64-NEXT: movq %r8, %rbx -; X64-NEXT: movq %rcx, %r11 -; X64-NEXT: movq %rdx, %r10 -; X64-NEXT: movq %rsi, %r15 +; X64-NEXT: movq %rcx, %r15 +; X64-NEXT: movq %rdx, %r14 +; X64-NEXT: movq %rsi, %r11 ; X64-NEXT: movq %rdx, %rax ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; X64-NEXT: movq %rax, %r10 ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r8 -; X64-NEXT: addq %rsi, %r8 +; X64-NEXT: movq %rax, %rbx +; X64-NEXT: addq %rsi, %rbx ; X64-NEXT: adcq $0, %rcx -; X64-NEXT: movq %r10, %rax +; X64-NEXT: movq %r14, %rax ; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %r12 -; X64-NEXT: movq %rax, %r10 -; X64-NEXT: addq %r8, %r10 +; X64-NEXT: movq %rax, %r14 +; X64-NEXT: addq %rbx, %r14 ; X64-NEXT: adcq %rcx, %r12 ; X64-NEXT: setb %al ; X64-NEXT: movzbl %al, %ecx -; X64-NEXT: movq %r11, %rax +; X64-NEXT: movq %r15, %rax ; X64-NEXT: mulq %r9 -; X64-NEXT: movq %rax, %r14 -; X64-NEXT: addq %r12, %r14 +; X64-NEXT: movq %rax, %rsi +; X64-NEXT: addq %r12, %rsi ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %rbx -; X64-NEXT: movq %rdx, %r8 +; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; X64-NEXT: mulq %r8 +; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; X64-NEXT: movq %r15, %rax -; X64-NEXT: mulq %rbx +; X64-NEXT: movq %r11, %rax +; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rdx, %r12 ; X64-NEXT: movq %rax, %r13 -; X64-NEXT: addq %r8, %r13 +; X64-NEXT: addq %rbx, %r13 ; X64-NEXT: adcq $0, %r12 ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %r9, %rsi ; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; X64-NEXT: mulq %r9 -; X64-NEXT: movq %rdx, %r11 +; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: addq %r13, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; X64-NEXT: adcq %r12, %r11 -; X64-NEXT: setb %cl -; X64-NEXT: movq %r15, %r9 -; X64-NEXT: movq %r15, %rax -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rdx, %rbp -; X64-NEXT: movq %rax, %r8 -; X64-NEXT: addq %r11, %r8 -; X64-NEXT: movzbl %cl, %eax -; X64-NEXT: adcq %rax, %rbp -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15 -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r8 ## 8-byte Folded Reload -; X64-NEXT: adcq %r10, %rbp -; X64-NEXT: adcq $0, %r14 -; X64-NEXT: adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Folded Spill -; X64-NEXT: movq %rdi, %rcx +; X64-NEXT: adcq %r12, %rbx +; X64-NEXT: setb %r8b +; X64-NEXT: movq %r11, %rax +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rdx, %r13 +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: addq %rbx, %rbp +; X64-NEXT: movzbl %r8b, %eax +; X64-NEXT: adcq %rax, %r13 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 +; X64-NEXT: addq %r10, %rbp +; X64-NEXT: adcq %r14, %r13 +; X64-NEXT: adcq $0, %rsi +; X64-NEXT: adcq $0, %rcx +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rdx, %r11 -; X64-NEXT: movq %rax, %rdi -; X64-NEXT: movq %r9, %rax -; X64-NEXT: movq %r9, %rsi -; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rdx, %r13 +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rdx, %r8 +; X64-NEXT: movq %rax, %r14 +; X64-NEXT: movq %r11, %rbx +; X64-NEXT: movq %r11, %rax +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: movq %rax, %r9 -; X64-NEXT: addq %r11, %r9 -; X64-NEXT: adcq $0, %r13 +; X64-NEXT: addq %r8, %r9 +; X64-NEXT: adcq $0, %r10 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12 -; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %r12 -; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: addq %r9, %rax -; X64-NEXT: movq %rax, %r9 -; X64-NEXT: adcq %r13, %r10 +; X64-NEXT: movq %rax, %rdi +; X64-NEXT: adcq %r10, %r11 ; X64-NEXT: setb %cl -; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq %rbx, %rax ; X64-NEXT: mulq %r12 -; X64-NEXT: movq %rdx, %r11 -; X64-NEXT: movq %rax, %r13 -; X64-NEXT: addq %r10, %r13 +; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: movq %rax, %r8 +; X64-NEXT: addq %r11, %r8 ; X64-NEXT: movzbl %cl, %eax -; X64-NEXT: adcq %rax, %r11 -; X64-NEXT: addq %r8, %rdi +; X64-NEXT: adcq %rax, %r10 +; X64-NEXT: addq %rbp, %r14 +; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; X64-NEXT: adcq %r13, %rdi ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; X64-NEXT: adcq %rbp, %r9 -; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; X64-NEXT: adcq $0, %r13 -; X64-NEXT: adcq $0, %r11 -; X64-NEXT: addq %r14, %r13 -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r11 ## 8-byte Folded Reload -; X64-NEXT: setb {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Folded Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 ## 8-byte Reload -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %r15 +; X64-NEXT: adcq $0, %r8 +; X64-NEXT: adcq $0, %r10 +; X64-NEXT: addq %rsi, %r8 +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r10 ## 8-byte Folded Reload +; X64-NEXT: setb %cl +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 ## 8-byte Reload +; X64-NEXT: movq %r11, %rax +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 ## 8-byte Reload -; X64-NEXT: movq %r10, %rax -; X64-NEXT: mulq %r15 +; X64-NEXT: movq %rax, %r14 +; X64-NEXT: movq %r15, %rax +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rdi -; X64-NEXT: movq %rax, %r8 -; X64-NEXT: addq %rsi, %r8 +; X64-NEXT: movq %rax, %r9 +; X64-NEXT: addq %rsi, %r9 ; X64-NEXT: adcq $0, %rdi -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq %r11, %rax ; X64-NEXT: mulq %r12 -; X64-NEXT: movq %rdx, %r9 -; X64-NEXT: movq %rax, %rsi -; X64-NEXT: addq %r8, %rsi -; X64-NEXT: adcq %rdi, %r9 -; X64-NEXT: setb %r8b -; X64-NEXT: movq %r10, %rax +; X64-NEXT: movq %rdx, %r11 +; X64-NEXT: addq %r9, %rax +; X64-NEXT: movq %rax, %r9 +; X64-NEXT: adcq %rdi, %r11 +; X64-NEXT: setb %sil +; X64-NEXT: movq %r15, %rax ; X64-NEXT: mulq %r12 ; X64-NEXT: movq %rdx, %rbp -; X64-NEXT: movq %rax, %r14 -; X64-NEXT: addq %r9, %r14 -; X64-NEXT: movzbl %r8b, %eax +; X64-NEXT: movq %rax, %r13 +; X64-NEXT: addq %r11, %r13 +; X64-NEXT: movzbl %sil, %eax ; X64-NEXT: adcq %rax, %rbp -; X64-NEXT: addq %r13, %rcx -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; X64-NEXT: adcq %r11, %rsi -; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 1-byte Folded Reload -; X64-NEXT: adcq %rax, %r14 +; X64-NEXT: addq %r8, %r14 +; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; X64-NEXT: adcq %r10, %r9 +; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; X64-NEXT: movzbl %cl, %eax +; X64-NEXT: adcq %rax, %r13 ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq %r10, %r13 -; X64-NEXT: sarq $63, %r13 -; X64-NEXT: movq %r13, %rcx -; X64-NEXT: imulq %r12, %rcx -; X64-NEXT: movq %r13, %rax -; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rax, %r8 -; X64-NEXT: imulq %r13, %r15 -; X64-NEXT: addq %rcx, %r15 -; X64-NEXT: addq %rdx, %r15 -; X64-NEXT: movq %r13, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi ## 8-byte Reload -; X64-NEXT: imulq %rsi, %rcx -; X64-NEXT: movq %rbx, %rax -; X64-NEXT: imulq %r13, %rbx -; X64-NEXT: addq %rcx, %rbx -; X64-NEXT: mulq %r13 -; X64-NEXT: movq %rax, %r11 +; X64-NEXT: movq %r15, %r8 +; X64-NEXT: sarq $63, %r8 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload +; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rdx, %r9 -; X64-NEXT: addq %rdx, %rbx -; X64-NEXT: addq %rax, %r8 -; X64-NEXT: adcq %r15, %rbx -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %r13 -; X64-NEXT: movq %rax, %r15 -; X64-NEXT: addq %r9, %r15 -; X64-NEXT: movq %rdx, %r13 -; X64-NEXT: adcq $0, %r13 -; X64-NEXT: addq %r11, %r15 -; X64-NEXT: adcq %r9, %r13 -; X64-NEXT: setb %cl -; X64-NEXT: addq %rax, %r13 -; X64-NEXT: movzbl %cl, %r9d -; X64-NEXT: adcq %rdx, %r9 -; X64-NEXT: addq %r8, %r13 -; X64-NEXT: adcq %rbx, %r9 +; X64-NEXT: movq %rax, %rsi +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload +; X64-NEXT: mulq %r8 +; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rax, %r11 +; X64-NEXT: movq %rax, %r10 +; X64-NEXT: addq %r9, %r10 +; X64-NEXT: movq %rdx, %r14 +; X64-NEXT: adcq $0, %r14 +; X64-NEXT: addq %rsi, %r10 +; X64-NEXT: movq %rsi, %rdi +; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; X64-NEXT: adcq %r9, %r14 +; X64-NEXT: setb %sil +; X64-NEXT: movq %r8, %r9 +; X64-NEXT: imulq %r12, %r9 +; X64-NEXT: movq %r8, %rax +; X64-NEXT: mulq {{[0-9]+}}(%rsp) +; X64-NEXT: addq %rax, %r9 +; X64-NEXT: addq %rdx, %r9 +; X64-NEXT: addq %rdi, %rax +; X64-NEXT: adcq %r10, %r9 +; X64-NEXT: addq %r11, %r14 +; X64-NEXT: movzbl %sil, %edi +; X64-NEXT: adcq %rcx, %rdi +; X64-NEXT: addq %rax, %r14 +; X64-NEXT: adcq %r9, %rdi ; X64-NEXT: sarq $63, %r12 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: imulq %r12, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload -; X64-NEXT: movq %rdi, %rbx -; X64-NEXT: imulq %r12, %rbx -; X64-NEXT: addq %rcx, %rbx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 ## 8-byte Reload -; X64-NEXT: movq %r8, %rdx -; X64-NEXT: imulq %r12, %rdx -; X64-NEXT: imulq %r12, %r10 -; X64-NEXT: addq %rdx, %r10 -; X64-NEXT: movq %r10, %rcx ; X64-NEXT: mulq %r12 -; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: movq %rax, %rsi -; X64-NEXT: movq %r8, %rax -; X64-NEXT: mulq %r12 -; X64-NEXT: movq %rax, %r8 +; X64-NEXT: movq %rax, %rcx ; X64-NEXT: addq %rdx, %rcx -; X64-NEXT: addq %r10, %rbx -; X64-NEXT: addq %rsi, %r8 -; X64-NEXT: adcq %rbx, %rcx -; X64-NEXT: movq %rsi, %rbx -; X64-NEXT: addq %r10, %rbx -; X64-NEXT: adcq $0, %r10 +; X64-NEXT: adcq $0, %r11 ; X64-NEXT: movq %r12, %rax -; X64-NEXT: mulq %rdi -; X64-NEXT: addq %rax, %rbx -; X64-NEXT: adcq %rdx, %r10 -; X64-NEXT: setb %r12b -; X64-NEXT: addq %rax, %r10 -; X64-NEXT: movzbl %r12b, %eax -; X64-NEXT: adcq %rdx, %rax -; X64-NEXT: addq %r8, %r10 -; X64-NEXT: adcq %rcx, %rax -; X64-NEXT: addq %r11, %rsi -; X64-NEXT: adcq %r15, %rbx -; X64-NEXT: adcq %r13, %r10 -; X64-NEXT: adcq %r9, %rax +; X64-NEXT: mulq %rbx +; X64-NEXT: movq %rdx, %r8 +; X64-NEXT: movq %rax, %r9 +; X64-NEXT: addq %rax, %rcx +; X64-NEXT: adcq %rdx, %r11 +; X64-NEXT: setb %bl +; X64-NEXT: imulq %r12, %r15 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload +; X64-NEXT: mulq %r12 +; X64-NEXT: addq %rax, %r15 +; X64-NEXT: addq %rdx, %r15 +; X64-NEXT: addq %rsi, %rax +; X64-NEXT: adcq %rcx, %r15 +; X64-NEXT: addq %r9, %r11 +; X64-NEXT: movzbl %bl, %edx +; X64-NEXT: adcq %r8, %rdx +; X64-NEXT: addq %rax, %r11 +; X64-NEXT: adcq %r15, %rdx ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi ## 8-byte Folded Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Folded Reload -; X64-NEXT: adcq %r14, %r10 -; X64-NEXT: adcq %rbp, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload -; X64-NEXT: movq %rcx, %rdx -; X64-NEXT: sarq $63, %rdx -; X64-NEXT: xorq %rdx, %rax -; X64-NEXT: xorq %rdx, %rbx -; X64-NEXT: orq %rax, %rbx -; X64-NEXT: xorq %rdx, %r10 -; X64-NEXT: xorq %rsi, %rdx -; X64-NEXT: orq %r10, %rdx -; X64-NEXT: orq %rbx, %rdx +; X64-NEXT: adcq %r10, %rcx +; X64-NEXT: adcq %r14, %r11 +; X64-NEXT: adcq %rdi, %rdx +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi ## 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Folded Reload +; X64-NEXT: adcq %r13, %r11 +; X64-NEXT: adcq %rbp, %rdx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: sarq $63, %rax +; X64-NEXT: xorq %rax, %rdx +; X64-NEXT: xorq %rax, %rcx +; X64-NEXT: orq %rdx, %rcx +; X64-NEXT: xorq %rax, %r11 +; X64-NEXT: xorq %rsi, %rax +; X64-NEXT: orq %r11, %rax +; X64-NEXT: orq %rcx, %rax ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax -; X64-NEXT: movq %rcx, 24(%rax) +; X64-NEXT: movq %rdi, 24(%rax) ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload ; X64-NEXT: movq %rcx, (%rax) ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload @@ -609,8 +573,8 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X86-NEXT: .cfi_def_cfa_offset 16 ; X86-NEXT: pushl %esi ; X86-NEXT: .cfi_def_cfa_offset 20 -; X86-NEXT: subl $152, %esp -; X86-NEXT: .cfi_def_cfa_offset 172 +; X86-NEXT: subl $156, %esp +; X86-NEXT: .cfi_def_cfa_offset 176 ; X86-NEXT: .cfi_offset %esi, -20 ; X86-NEXT: .cfi_offset %edi, -16 ; X86-NEXT: .cfi_offset %ebx, -12 @@ -644,66 +608,65 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X86-NEXT: movzbl %bl, %eax ; X86-NEXT: adcl %eax, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: mull %edi +; X86-NEXT: mull %esi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl %edx, %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %edi +; X86-NEXT: mull %esi ; X86-NEXT: movl %edx, %edi -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: addl %ecx, %ebp +; X86-NEXT: movl %eax, %esi +; X86-NEXT: addl %ecx, %esi ; X86-NEXT: adcl $0, %edi -; X86-NEXT: movl %esi, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: mull %ebx -; X86-NEXT: movl %edx, %esi -; X86-NEXT: addl %ebp, %eax +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl %ebp, %ecx +; X86-NEXT: mull %ebp +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: addl %esi, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl %edi, %esi -; X86-NEXT: setb %cl -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl %ebp, %eax -; X86-NEXT: mull %ebx -; X86-NEXT: movl %eax, %edi -; X86-NEXT: addl %esi, %edi -; X86-NEXT: movzbl %cl, %eax +; X86-NEXT: adcl %edi, %ebp +; X86-NEXT: setb %bl +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull %ecx +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: addl %ebp, %ecx +; X86-NEXT: movzbl %bl, %eax ; X86-NEXT: adcl %eax, %edx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl $0, (%esp) ## 4-byte Folded Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, %eax ; X86-NEXT: mull %ebx ; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl %ebp, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %ebx ; X86-NEXT: movl %edx, %ebp ; X86-NEXT: movl %eax, %ebx ; X86-NEXT: addl %esi, %ebx ; X86-NEXT: adcl $0, %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl %edi, %eax ; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl %edx, %edi ; X86-NEXT: addl %ebx, %eax ; X86-NEXT: movl %eax, %ebx -; X86-NEXT: adcl %ebp, %ecx +; X86-NEXT: adcl %ebp, %edi ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %esi ; X86-NEXT: movl %edx, %ebp ; X86-NEXT: movl %eax, %esi -; X86-NEXT: addl %ecx, %esi +; X86-NEXT: addl %edi, %esi ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload ; X86-NEXT: adcl %eax, %ebp -; X86-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill +; X86-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl $0, %esi @@ -772,43 +735,43 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X86-NEXT: movl %edx, (%esp) ## 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl %ebx, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: mull %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: mull %ebp ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl %edx, %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %esi +; X86-NEXT: mull %ebp ; X86-NEXT: movl %edx, %edi ; X86-NEXT: movl %eax, %ebp ; X86-NEXT: addl %ecx, %ebp ; X86-NEXT: adcl $0, %edi ; X86-NEXT: movl %ebx, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: mull %ebx +; X86-NEXT: movl %esi, %ecx +; X86-NEXT: mull %esi ; X86-NEXT: movl %edx, %esi ; X86-NEXT: addl %ebp, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl %edi, %esi -; X86-NEXT: setb %cl +; X86-NEXT: setb %bl ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl %ebp, %eax -; X86-NEXT: mull %ebx -; X86-NEXT: movl %eax, %edi -; X86-NEXT: addl %esi, %edi -; X86-NEXT: movzbl %cl, %eax +; X86-NEXT: mull %ecx +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: addl %esi, %ecx +; X86-NEXT: movzbl %bl, %eax ; X86-NEXT: adcl %eax, %edx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill ; X86-NEXT: adcl $0, (%esp) ## 4-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: mull %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: mull %edi ; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl %ebp, %eax -; X86-NEXT: mull %ecx +; X86-NEXT: mull %edi ; X86-NEXT: movl %edx, %ebp ; X86-NEXT: movl %eax, %ebx ; X86-NEXT: addl %esi, %ebx @@ -816,25 +779,25 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl %edx, %edi ; X86-NEXT: addl %ebx, %eax ; X86-NEXT: movl %eax, %ebx -; X86-NEXT: adcl %ebp, %ecx +; X86-NEXT: adcl %ebp, %edi ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %ebp -; X86-NEXT: movl %eax, %esi -; X86-NEXT: addl %ecx, %esi +; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: addl %edi, %ebp ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload -; X86-NEXT: adcl %eax, %ebp -; X86-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill +; X86-NEXT: adcl %eax, %esi +; X86-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl $0, %esi ; X86-NEXT: adcl $0, %ebp -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload -; X86-NEXT: adcl (%esp), %ebp ## 4-byte Folded Reload +; X86-NEXT: adcl $0, %esi +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload +; X86-NEXT: adcl (%esp), %esi ## 4-byte Folded Reload ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -857,83 +820,80 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: movl %eax, %edi -; X86-NEXT: addl %ecx, %edi +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: addl %ecx, %ebx ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload -; X86-NEXT: adcl %eax, %ebx +; X86-NEXT: adcl %eax, %edx ; X86-NEXT: movl (%esp), %ecx ## 4-byte Reload -; X86-NEXT: addl %esi, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload -; X86-NEXT: adcl %ebp, %edx +; X86-NEXT: addl %ebp, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload +; X86-NEXT: adcl %esi, %edi ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload -; X86-NEXT: adcl %eax, %edi -; X86-NEXT: adcl $0, %ebx +; X86-NEXT: adcl %eax, %ebx +; X86-NEXT: adcl $0, %edx ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload ; X86-NEXT: movl %ecx, (%esp) ## 4-byte Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: mull %edi -; X86-NEXT: movl %edx, %esi -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %edi -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: addl %esi, %ebp -; X86-NEXT: adcl $0, %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: mull %esi +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl %ebp, %eax ; X86-NEXT: mull %esi -; X86-NEXT: movl %esi, %edi ; X86-NEXT: movl %edx, %esi -; X86-NEXT: addl %ebp, %eax +; X86-NEXT: movl %eax, %edi +; X86-NEXT: addl %ecx, %edi +; X86-NEXT: adcl $0, %esi +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: addl %edi, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl %ebx, %esi -; X86-NEXT: setb %cl -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: adcl %esi, %ecx +; X86-NEXT: setb %bl +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: mull %edi -; X86-NEXT: addl %esi, %eax +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movzbl %cl, %eax +; X86-NEXT: movzbl %bl, %eax ; X86-NEXT: adcl %eax, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: mull %ecx -; X86-NEXT: movl %edx, %esi -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl %ebx, %eax -; X86-NEXT: mull %ecx -; X86-NEXT: movl %edx, %ebp -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: addl %esi, %ecx -; X86-NEXT: adcl $0, %ebp -; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl %ebp, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %edi -; X86-NEXT: addl %ecx, %eax +; X86-NEXT: movl %edx, %ecx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl %ebp, %edi -; X86-NEXT: setb %cl -; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %esi +; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: addl %ecx, %ebx +; X86-NEXT: adcl $0, %esi +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: mull %edi +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: addl %ebx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl %esi, %ebp +; X86-NEXT: setb %cl +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull %edi ; X86-NEXT: movl %eax, %esi -; X86-NEXT: addl %edi, %esi +; X86-NEXT: addl %ebp, %esi ; X86-NEXT: movzbl %cl, %eax ; X86-NEXT: adcl %eax, %edx ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload @@ -942,67 +902,68 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl %ebp, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: mull %edi -; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl %edx, %ebx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %edi -; X86-NEXT: movl %edx, %edi -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: addl %ecx, %ebx -; X86-NEXT: adcl $0, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ebp, %eax -; X86-NEXT: mull %ecx ; X86-NEXT: movl %edx, %ebp -; X86-NEXT: addl %ebx, %eax -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: adcl %edi, %ebp +; X86-NEXT: movl %eax, %edi +; X86-NEXT: addl %ebx, %edi +; X86-NEXT: adcl $0, %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull %ebx +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: addl %edi, %eax +; X86-NEXT: movl %eax, %edi +; X86-NEXT: adcl %ebp, %ecx ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %ecx -; X86-NEXT: movl %edx, %edi -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: addl %ebp, %ecx +; X86-NEXT: mull %ebx +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: addl %ecx, %ebx ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload -; X86-NEXT: adcl %eax, %edi +; X86-NEXT: adcl %eax, %ebp ; X86-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl $0, %ecx -; X86-NEXT: adcl $0, %edi -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl $0, %ebx +; X86-NEXT: adcl $0, %ebp +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: mull %ebx -; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: mull %esi +; X86-NEXT: movl %edx, %ecx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %ebx -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: addl %esi, %ebp -; X86-NEXT: adcl $0, %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: mull %edx +; X86-NEXT: mull %esi ; X86-NEXT: movl %edx, %esi -; X86-NEXT: addl %ebp, %eax -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: adcl %ebx, %esi -; X86-NEXT: setb %bl +; X86-NEXT: movl %eax, %edi +; X86-NEXT: addl %ecx, %edi +; X86-NEXT: adcl $0, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: mull %ecx +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: addl %edi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl %esi, %ecx +; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: addl %esi, %eax -; X86-NEXT: movzbl %bl, %esi -; X86-NEXT: adcl %esi, %edx +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 1-byte Folded Reload +; X86-NEXT: adcl %ecx, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload +; X86-NEXT: addl %ebx, %edi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload -; X86-NEXT: addl %ecx, %ebx -; X86-NEXT: adcl %edi, %ebp +; X86-NEXT: adcl %ebp, %ebx ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 1-byte Folded Reload ; X86-NEXT: adcl %ecx, %eax ; X86-NEXT: adcl $0, %edx @@ -1015,9 +976,9 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X86-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload ; X86-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill -; X86-NEXT: movl %ebx, %edx +; X86-NEXT: movl %edi, %edx ; X86-NEXT: adcl $0, %edx -; X86-NEXT: movl %ebp, %ecx +; X86-NEXT: movl %ebx, %ecx ; X86-NEXT: adcl $0, %ecx ; X86-NEXT: adcl $0, %eax ; X86-NEXT: adcl $0, %esi @@ -1030,131 +991,132 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl %ebx, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: mull %esi +; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: mull %edi ; X86-NEXT: movl %edx, %ecx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %esi -; X86-NEXT: movl %eax, %edi -; X86-NEXT: addl %ecx, %edi -; X86-NEXT: adcl $0, %esi -; X86-NEXT: movl %ebx, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: mull %ebp -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: addl %edi, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl %esi, %ebx -; X86-NEXT: setb %cl -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %ebp +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: mull %edi +; X86-NEXT: movl %edx, %edi +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: addl %ecx, %ebx +; X86-NEXT: adcl $0, %edi +; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: mull %esi +; X86-NEXT: movl %edx, %ecx ; X86-NEXT: addl %ebx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl %edi, %ecx +; X86-NEXT: setb %bl +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: mull %esi +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill -; X86-NEXT: movzbl %cl, %eax +; X86-NEXT: movzbl %bl, %eax ; X86-NEXT: adcl %eax, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: mull %ebx -; X86-NEXT: movl %edx, %edi -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl %esi, %eax -; X86-NEXT: mull %ebx -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: addl %edi, %ebp -; X86-NEXT: adcl $0, %ebx -; X86-NEXT: movl %ecx, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: mull %edi ; X86-NEXT: movl %edx, %ecx -; X86-NEXT: addl %ebp, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl %ebx, %ecx -; X86-NEXT: setb %bl -; X86-NEXT: movl %esi, %eax -; X86-NEXT: mull %edi -; X86-NEXT: movl %eax, %edi -; X86-NEXT: addl %ecx, %edi -; X86-NEXT: movzbl %bl, %eax -; X86-NEXT: adcl %eax, %edx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl $0, (%esp) ## 4-byte Folded Spill -; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl %ebx, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: mull %ebp -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl %esi, %eax -; X86-NEXT: mull %ebp -; X86-NEXT: movl %edx, %ebp -; X86-NEXT: movl %eax, %esi -; X86-NEXT: addl %ecx, %esi -; X86-NEXT: adcl $0, %ebp -; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: mull %edi +; X86-NEXT: movl %edx, %edi +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: addl %ecx, %ebx +; X86-NEXT: adcl $0, %edi +; X86-NEXT: movl %esi, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: mull %ecx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: addl %ebx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl %edi, %esi +; X86-NEXT: setb %bl +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: mull %ecx +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: addl %esi, %ecx +; X86-NEXT: movzbl %bl, %eax +; X86-NEXT: adcl %eax, %edx +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl $0, (%esp) ## 4-byte Folded Spill +; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: mull %ebx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: mull %ebx ; X86-NEXT: movl %edx, %ebx -; X86-NEXT: addl %esi, %eax -; X86-NEXT: movl %eax, %esi -; X86-NEXT: adcl %ebp, %ebx +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: addl %esi, %ebp +; X86-NEXT: adcl $0, %ebx +; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: mull %esi +; X86-NEXT: movl %edx, %edi +; X86-NEXT: addl %ebp, %eax +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: adcl %ebx, %edi ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %ecx -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: addl %ebx, %ebp +; X86-NEXT: mull %esi +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl %eax, %esi +; X86-NEXT: addl %edi, %esi ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload -; X86-NEXT: adcl %eax, %ecx -; X86-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl $0, %ebp -; X86-NEXT: adcl $0, %ecx -; X86-NEXT: addl (%esp), %ebp ## 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload +; X86-NEXT: adcl %eax, %ebx +; X86-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload +; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl $0, %esi +; X86-NEXT: adcl $0, %ebx +; X86-NEXT: addl (%esp), %esi ## 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: mull %edi -; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl %edx, %ecx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %edi ; X86-NEXT: movl %edx, %edi -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: addl %esi, %ebx +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: addl %ecx, %ebp ; X86-NEXT: adcl $0, %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: mull %edx -; X86-NEXT: movl %edx, %esi -; X86-NEXT: addl %ebx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl %edi, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: mull %ecx +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: addl %ebp, %eax +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: adcl %edi, %ecx ; X86-NEXT: setb (%esp) ## 1-byte Folded Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: addl %esi, %ebx -; X86-NEXT: movzbl (%esp), %eax ## 1-byte Folded Reload -; X86-NEXT: adcl %eax, %edx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload -; X86-NEXT: addl %ebp, %edi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload -; X86-NEXT: adcl %ecx, %esi -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload -; X86-NEXT: adcl %eax, %ebx +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: movzbl (%esp), %ecx ## 1-byte Folded Reload +; X86-NEXT: adcl %ecx, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload +; X86-NEXT: addl %esi, %ecx +; X86-NEXT: adcl %ebx, %ebp +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 1-byte Folded Reload +; X86-NEXT: adcl %esi, %eax +; X86-NEXT: movl %eax, %esi ; X86-NEXT: adcl $0, %edx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload ; X86-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill @@ -1165,369 +1127,335 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload ; X86-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload -; X86-NEXT: adcl %eax, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl %eax, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl $0, %ebp +; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl $0, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl $0, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl $0, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: sarl $31, %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %edi ; X86-NEXT: movl %edx, %esi -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %edi +; X86-NEXT: movl %edi, %ebp +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl %eax, %ebx +; X86-NEXT: movl %eax, %edi +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: addl %esi, %ebx -; X86-NEXT: movl %edx, %ebp -; X86-NEXT: adcl $0, %ebp -; X86-NEXT: addl %ecx, %ebx -; X86-NEXT: movl %ebx, (%esp) ## 4-byte Spill -; X86-NEXT: adcl %esi, %ebp -; X86-NEXT: setb %cl -; X86-NEXT: addl %eax, %ebp -; X86-NEXT: movzbl %cl, %eax +; X86-NEXT: movl %ebx, %esi +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: adcl $0, %ebx +; X86-NEXT: addl %ecx, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl %eax, %ebx +; X86-NEXT: setb %al +; X86-NEXT: addl %edi, %ebx +; X86-NEXT: movzbl %al, %eax ; X86-NEXT: adcl %edx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %edi -; X86-NEXT: movl %edx, %esi +; X86-NEXT: mull %ebp +; X86-NEXT: movl %edx, %edi +; X86-NEXT: movl %eax, %ecx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %edi -; X86-NEXT: movl %edx, %ebx +; X86-NEXT: mull %ebp +; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl %eax, %ecx ; X86-NEXT: movl %eax, %edx +; X86-NEXT: movl %eax, %ebp ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: addl %esi, %edx -; X86-NEXT: movl %esi, %eax -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl %ebx, %esi -; X86-NEXT: adcl $0, %esi -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload +; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl %edi, (%esp) ## 4-byte Spill +; X86-NEXT: addl %edi, %edx +; X86-NEXT: movl %esi, %edi +; X86-NEXT: adcl $0, %edi +; X86-NEXT: addl %ecx, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl %eax, %esi -; X86-NEXT: setb %dl -; X86-NEXT: addl %ecx, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movzbl %dl, %edx -; X86-NEXT: adcl %ebx, %edx +; X86-NEXT: adcl %eax, %edi +; X86-NEXT: setb %al +; X86-NEXT: addl %ebp, %edi +; X86-NEXT: movzbl %al, %edx +; X86-NEXT: adcl %esi, %edx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload -; X86-NEXT: addl %esi, %ecx -; X86-NEXT: movl (%esp), %eax ## 4-byte Reload +; X86-NEXT: addl %edi, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload ; X86-NEXT: adcl %edx, %eax -; X86-NEXT: movl %ebp, %ebx -; X86-NEXT: adcl $0, %ebx +; X86-NEXT: movl %ebx, %ebp +; X86-NEXT: adcl $0, %ebp ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload ; X86-NEXT: adcl $0, %esi ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload -; X86-NEXT: adcl $0, %ecx +; X86-NEXT: adcl $0, %edi ; X86-NEXT: adcl $0, %edx -; X86-NEXT: addl %ebx, %ecx +; X86-NEXT: addl %ebp, %edi ; X86-NEXT: adcl %esi, %edx ; X86-NEXT: setb %al -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl (%esp), %edx ## 4-byte Folded Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movzbl %al, %eax -; X86-NEXT: adcl %ebp, %eax +; X86-NEXT: adcl %ebx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill -; X86-NEXT: movl %edi, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: mull %ecx -; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill -; X86-NEXT: movl %edx, %ebp -; X86-NEXT: imull %edi, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: imull %edi, %esi -; X86-NEXT: addl %ecx, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: addl %edx, %ebp +; X86-NEXT: adcl $0, %esi +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: addl %eax, %ebp +; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl %edx, %esi +; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: imull %ecx, %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %edx -; X86-NEXT: imull %edi, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: imull %edi, %ecx -; X86-NEXT: addl %edx, %ecx -; X86-NEXT: mull %edi -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: addl %edx, %ecx -; X86-NEXT: addl %ebp, %esi -; X86-NEXT: movl (%esp), %eax ## 4-byte Reload +; X86-NEXT: mull %ecx ; X86-NEXT: addl %eax, %ebx -; X86-NEXT: adcl %esi, %ecx -; X86-NEXT: movl %eax, %esi -; X86-NEXT: addl %ebp, %esi -; X86-NEXT: adcl $0, %ebp -; X86-NEXT: movl %edi, %eax -; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: addl %edx, %ebx +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload +; X86-NEXT: adcl %ebp, %ebx +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 1-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload ; X86-NEXT: addl %eax, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl %edx, %ebp +; X86-NEXT: adcl %ebx, %ebp +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl (%esp), %eax ## 4-byte Reload +; X86-NEXT: addl %eax, %ebx +; X86-NEXT: adcl $0, %eax +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload +; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill -; X86-NEXT: addl %eax, %ebp -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload -; X86-NEXT: adcl %edx, %eax -; X86-NEXT: addl %ebx, %ebp -; X86-NEXT: adcl %ecx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: imull %edi, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: imull %edi, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload ; X86-NEXT: addl %edx, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: imull %edi, %edx -; X86-NEXT: imull {{[0-9]+}}(%esp), %edi -; X86-NEXT: addl %edx, %edi -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload -; X86-NEXT: addl %edx, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload -; X86-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill -; X86-NEXT: adcl %ecx, %edi -; X86-NEXT: movl %esi, %ecx -; X86-NEXT: addl %edx, %ecx -; X86-NEXT: adcl $0, %edx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload -; X86-NEXT: addl %ebx, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: movl %ebx, %ecx +; X86-NEXT: adcl %ebx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl (%esp), %ebx ## 4-byte Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload +; X86-NEXT: addl %edx, %ebx +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload +; X86-NEXT: movl %eax, %edx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload -; X86-NEXT: adcl %eax, %edx -; X86-NEXT: setb %cl -; X86-NEXT: addl %ebx, %edx -; X86-NEXT: movzbl %cl, %ecx -; X86-NEXT: adcl %eax, %ecx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload -; X86-NEXT: adcl %edi, %ecx -; X86-NEXT: movl %ecx, %edi -; X86-NEXT: movl (%esp), %ecx ## 4-byte Reload -; X86-NEXT: addl %esi, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload -; X86-NEXT: adcl %ebp, %edx -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload -; X86-NEXT: movl %ecx, (%esp) ## 4-byte Spill +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload +; X86-NEXT: adcl %esi, %ebx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: adcl %ebp, %esi +; X86-NEXT: addl %edi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload +; X86-NEXT: movl %ebx, (%esp) ## 4-byte Spill ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: sarl $31, %eax -; X86-NEXT: movl %eax, %edi -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl %eax, %esi ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: movl %eax, %ebp ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl %edx, %edi ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: addl %edx, %ecx -; X86-NEXT: adcl $0, %esi -; X86-NEXT: movl %edi, %eax +; X86-NEXT: adcl $0, %edi +; X86-NEXT: movl %esi, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: addl %eax, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl %edx, %esi -; X86-NEXT: setb %bl -; X86-NEXT: addl %eax, %esi -; X86-NEXT: movzbl %bl, %edi ; X86-NEXT: adcl %edx, %edi +; X86-NEXT: setb %bl +; X86-NEXT: addl %eax, %edi +; X86-NEXT: movzbl %bl, %ebx +; X86-NEXT: adcl %edx, %ebx ; X86-NEXT: movl %ebp, %eax -; X86-NEXT: addl %esi, %eax +; X86-NEXT: addl %edi, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: adcl %edi, %eax +; X86-NEXT: adcl %ebx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl $0, %esi ; X86-NEXT: adcl $0, %edi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: adcl $0, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl %esi, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: movl %eax, %ebp +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: movl %eax, %ecx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: addl %edx, %ebp -; X86-NEXT: adcl $0, %ebx -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: addl %edx, %ebx +; X86-NEXT: adcl $0, %ebp +; X86-NEXT: movl %esi, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: addl %eax, %ebp -; X86-NEXT: adcl %edx, %ebx -; X86-NEXT: setb %cl +; X86-NEXT: movl %eax, %esi ; X86-NEXT: addl %eax, %ebx -; X86-NEXT: movzbl %cl, %eax +; X86-NEXT: adcl %edx, %ebp +; X86-NEXT: setb %al +; X86-NEXT: addl %esi, %ebp +; X86-NEXT: movzbl %al, %eax ; X86-NEXT: adcl %edx, %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload ; X86-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill -; X86-NEXT: adcl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill -; X86-NEXT: movl %ebx, %edx +; X86-NEXT: adcl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill +; X86-NEXT: movl %ebp, %edx ; X86-NEXT: adcl $0, %edx -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl %eax, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload -; X86-NEXT: adcl $0, %eax -; X86-NEXT: addl %esi, %edx -; X86-NEXT: adcl %edi, %eax -; X86-NEXT: movl %eax, %esi -; X86-NEXT: setb %al -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload -; X86-NEXT: addl %edi, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl %ebp, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movzbl %al, %eax -; X86-NEXT: adcl %ebx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl $0, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload -; X86-NEXT: imull %esi, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull %esi, %eax -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: imull %esi, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: imull %esi, %ecx -; X86-NEXT: addl %edx, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload -; X86-NEXT: addl %esi, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload -; X86-NEXT: addl %edx, %edi +; X86-NEXT: addl %edi, %edx +; X86-NEXT: movl %edx, %edi +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload +; X86-NEXT: setb %dl +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl %eax, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload -; X86-NEXT: movl %edi, %eax -; X86-NEXT: addl %esi, %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: adcl $0, %ebp -; X86-NEXT: addl %edx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl %esi, %ebp -; X86-NEXT: setb %al -; X86-NEXT: addl %edi, %ebp -; X86-NEXT: movzbl %al, %eax -; X86-NEXT: adcl %ebx, %eax -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload -; X86-NEXT: adcl %ecx, %eax +; X86-NEXT: adcl %ebx, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movzbl %dl, %ecx +; X86-NEXT: adcl %ebp, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl $0, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload -; X86-NEXT: imull %ebx, %ecx -; X86-NEXT: movl %ebx, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: mull %esi -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: imull %ebx, %esi +; X86-NEXT: movl %ebx, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload +; X86-NEXT: addl %ecx, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload +; X86-NEXT: movl %ebp, %edi +; X86-NEXT: adcl $0, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload +; X86-NEXT: addl %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl %ecx, %edi +; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload ; X86-NEXT: addl %ecx, %esi -; X86-NEXT: addl %edx, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl %ebx, %ecx +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload +; X86-NEXT: addl %eax, %ecx +; X86-NEXT: adcl %edx, %esi +; X86-NEXT: addl %ebx, %edi +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload +; X86-NEXT: adcl %ebp, %eax +; X86-NEXT: addl %ecx, %edi +; X86-NEXT: adcl %esi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull %eax, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, %eax -; X86-NEXT: imull %ebx, %esi -; X86-NEXT: addl %ecx, %esi -; X86-NEXT: mull %ebx -; X86-NEXT: movl %ebx, %ecx -; X86-NEXT: movl %edx, %edi -; X86-NEXT: addl %edx, %esi -; X86-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload +; X86-NEXT: mull %ecx +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl %eax, %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %ecx +; X86-NEXT: movl %edx, %ecx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: addl %edi, %eax -; X86-NEXT: adcl $0, %edx -; X86-NEXT: addl %ebx, %eax -; X86-NEXT: adcl %edi, %edx +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: addl %ebx, %ebp +; X86-NEXT: adcl $0, %ecx +; X86-NEXT: addl %esi, %ebp +; X86-NEXT: adcl %ebx, %ecx ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill -; X86-NEXT: addl %ecx, %edx -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 1-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload -; X86-NEXT: adcl %esi, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload -; X86-NEXT: addl %esi, %ebx -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload -; X86-NEXT: adcl %ebp, %edx -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload +; X86-NEXT: imull %eax, %ebx +; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: addl %eax, %ebx +; X86-NEXT: addl %edx, %ebx +; X86-NEXT: addl %esi, %eax +; X86-NEXT: adcl %ebp, %ebx +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 1-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload +; X86-NEXT: addl %eax, %ecx +; X86-NEXT: adcl %ebx, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload +; X86-NEXT: addl %eax, %esi ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload -; X86-NEXT: adcl (%esp), %ebx ## 4-byte Folded Reload -; X86-NEXT: movl %ebx, (%esp) ## 4-byte Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload +; X86-NEXT: adcl %edi, %ecx ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl (%esp), %ebx ## 4-byte Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload +; X86-NEXT: adcl (%esp), %ecx ## 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload -; X86-NEXT: movl %ebp, %esi -; X86-NEXT: sarl $31, %esi -; X86-NEXT: xorl %esi, %edi -; X86-NEXT: xorl %esi, %edx -; X86-NEXT: orl %edi, %edx -; X86-NEXT: movl %ebx, %edi -; X86-NEXT: xorl %esi, %edi -; X86-NEXT: orl %edx, %edi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload -; X86-NEXT: xorl %esi, %edx -; X86-NEXT: orl %edi, %edx +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload -; X86-NEXT: xorl %esi, %ebx -; X86-NEXT: xorl %esi, %ecx -; X86-NEXT: orl %ebx, %ecx -; X86-NEXT: xorl %esi, %eax +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: sarl $31, %eax +; X86-NEXT: xorl %eax, %edi +; X86-NEXT: xorl %eax, %ecx +; X86-NEXT: orl %edi, %ecx +; X86-NEXT: xorl %eax, %esi +; X86-NEXT: orl %ecx, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload +; X86-NEXT: xorl %eax, %ecx +; X86-NEXT: orl %esi, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload +; X86-NEXT: xorl %eax, %esi +; X86-NEXT: xorl %eax, %edx +; X86-NEXT: orl %esi, %edx +; X86-NEXT: xorl %eax, %ebp +; X86-NEXT: orl %edx, %ebp +; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload +; X86-NEXT: orl %ebp, %eax ; X86-NEXT: orl %ecx, %eax -; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload -; X86-NEXT: orl %eax, %esi -; X86-NEXT: orl %edx, %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %ebp, 28(%eax) +; X86-NEXT: movl %ebx, 28(%eax) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload ; X86-NEXT: movl %ecx, (%eax) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload @@ -1543,7 +1471,7 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload ; X86-NEXT: movl %ecx, 24(%eax) ; X86-NEXT: setne %al -; X86-NEXT: addl $152, %esp +; X86-NEXT: addl $156, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/vec_smulo.ll b/llvm/test/CodeGen/X86/vec_smulo.ll index 7bb7c9b481d39..fef44e062fd3b 100644 --- a/llvm/test/CodeGen/X86/vec_smulo.ll +++ b/llvm/test/CodeGen/X86/vec_smulo.ll @@ -3297,112 +3297,107 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; SSE2-NEXT: pushq %r12 ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: movq %r8, %r14 +; SSE2-NEXT: movq %rcx, %r13 ; SSE2-NEXT: movq %rdx, %r8 ; SSE2-NEXT: movq %rsi, %r11 ; SSE2-NEXT: movq %rdi, %r10 ; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rsi ; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rbp -; SSE2-NEXT: movq %r11, %r12 -; SSE2-NEXT: sarq $63, %r12 -; SSE2-NEXT: movq %r14, %rbx -; SSE2-NEXT: imulq %r12, %rbx +; SSE2-NEXT: movq %r11, %rcx +; SSE2-NEXT: sarq $63, %rcx +; SSE2-NEXT: movq %r9, %r15 +; SSE2-NEXT: imulq %rcx, %r15 ; SSE2-NEXT: movq %r14, %rax -; SSE2-NEXT: mulq %r12 +; SSE2-NEXT: mulq %rcx ; SSE2-NEXT: movq %rax, %rdi -; SSE2-NEXT: imulq %r9, %r12 -; SSE2-NEXT: addq %rbx, %r12 -; SSE2-NEXT: addq %rdx, %r12 -; SSE2-NEXT: movq %r9, %rbx -; SSE2-NEXT: sarq $63, %rbx -; SSE2-NEXT: movq %rbx, %r13 -; SSE2-NEXT: imulq %r11, %r13 -; SSE2-NEXT: movq %rbx, %rax +; SSE2-NEXT: addq %rax, %r15 +; SSE2-NEXT: addq %rdx, %r15 +; SSE2-NEXT: movq %r9, %rax +; SSE2-NEXT: sarq $63, %rax +; SSE2-NEXT: movq %rax, %rcx +; SSE2-NEXT: imulq %r11, %rcx ; SSE2-NEXT: mulq %r10 -; SSE2-NEXT: movq %rax, %r15 -; SSE2-NEXT: imulq %r10, %rbx -; SSE2-NEXT: addq %r13, %rbx -; SSE2-NEXT: addq %rdx, %rbx -; SSE2-NEXT: addq %rdi, %r15 -; SSE2-NEXT: adcq %r12, %rbx +; SSE2-NEXT: movq %rax, %rbx +; SSE2-NEXT: addq %rax, %rcx +; SSE2-NEXT: addq %rdx, %rcx +; SSE2-NEXT: addq %rdi, %rbx +; SSE2-NEXT: adcq %r15, %rcx ; SSE2-NEXT: movq %r10, %rax ; SSE2-NEXT: mulq %r14 -; SSE2-NEXT: movq %rdx, %r12 +; SSE2-NEXT: movq %rdx, %r15 ; SSE2-NEXT: movq %rax, %rdi ; SSE2-NEXT: movq %r11, %rax ; SSE2-NEXT: mulq %r14 ; SSE2-NEXT: movq %rdx, %r14 -; SSE2-NEXT: movq %rax, %r13 -; SSE2-NEXT: addq %r12, %r13 +; SSE2-NEXT: movq %rax, %r12 +; SSE2-NEXT: addq %r15, %r12 ; SSE2-NEXT: adcq $0, %r14 ; SSE2-NEXT: movq %r10, %rax ; SSE2-NEXT: mulq %r9 -; SSE2-NEXT: movq %rdx, %r12 +; SSE2-NEXT: movq %rdx, %r15 ; SSE2-NEXT: movq %rax, %r10 -; SSE2-NEXT: addq %r13, %r10 -; SSE2-NEXT: adcq %r14, %r12 +; SSE2-NEXT: addq %r12, %r10 +; SSE2-NEXT: adcq %r14, %r15 ; SSE2-NEXT: setb %al ; SSE2-NEXT: movzbl %al, %r14d ; SSE2-NEXT: movq %r11, %rax ; SSE2-NEXT: mulq %r9 -; SSE2-NEXT: addq %r12, %rax -; SSE2-NEXT: adcq %r14, %rdx ; SSE2-NEXT: addq %r15, %rax -; SSE2-NEXT: adcq %rbx, %rdx -; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r12 -; SSE2-NEXT: movq %r10, 8(%r12) +; SSE2-NEXT: adcq %r14, %rdx +; SSE2-NEXT: addq %rbx, %rax +; SSE2-NEXT: adcq %rcx, %rdx +; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r15 +; SSE2-NEXT: movq %r10, 8(%r15) ; SSE2-NEXT: sarq $63, %r10 ; SSE2-NEXT: xorq %r10, %rdx ; SSE2-NEXT: xorq %rax, %r10 -; SSE2-NEXT: xorl %r15d, %r15d +; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: orq %rdx, %r10 -; SSE2-NEXT: setne %r15b -; SSE2-NEXT: movq %rcx, %rbx -; SSE2-NEXT: sarq $63, %rbx -; SSE2-NEXT: movq %rsi, %r10 -; SSE2-NEXT: imulq %rbx, %r10 +; SSE2-NEXT: setne %cl +; SSE2-NEXT: movq %r13, %r9 +; SSE2-NEXT: sarq $63, %r9 +; SSE2-NEXT: movq %rbp, %r11 +; SSE2-NEXT: imulq %r9, %r11 ; SSE2-NEXT: movq %rsi, %rax -; SSE2-NEXT: mulq %rbx +; SSE2-NEXT: mulq %r9 ; SSE2-NEXT: movq %rax, %r9 -; SSE2-NEXT: imulq %rbp, %rbx -; SSE2-NEXT: addq %r10, %rbx -; SSE2-NEXT: addq %rdx, %rbx -; SSE2-NEXT: movq %rbp, %r10 -; SSE2-NEXT: sarq $63, %r10 -; SSE2-NEXT: movq %r10, %r14 -; SSE2-NEXT: imulq %rcx, %r14 -; SSE2-NEXT: movq %r10, %rax +; SSE2-NEXT: addq %rax, %r11 +; SSE2-NEXT: addq %rdx, %r11 +; SSE2-NEXT: movq %rbp, %rax +; SSE2-NEXT: sarq $63, %rax +; SSE2-NEXT: movq %rax, %r14 +; SSE2-NEXT: imulq %r13, %r14 ; SSE2-NEXT: mulq %r8 -; SSE2-NEXT: movq %rax, %r11 -; SSE2-NEXT: imulq %r8, %r10 -; SSE2-NEXT: addq %r14, %r10 -; SSE2-NEXT: addq %rdx, %r10 -; SSE2-NEXT: addq %r9, %r11 -; SSE2-NEXT: adcq %rbx, %r10 +; SSE2-NEXT: movq %rax, %r10 +; SSE2-NEXT: addq %rax, %r14 +; SSE2-NEXT: addq %rdx, %r14 +; SSE2-NEXT: addq %r9, %r10 +; SSE2-NEXT: adcq %r11, %r14 ; SSE2-NEXT: movq %r8, %rax ; SSE2-NEXT: mulq %rsi ; SSE2-NEXT: movq %rdx, %r9 -; SSE2-NEXT: movq %rax, %rbx -; SSE2-NEXT: movq %rcx, %rax +; SSE2-NEXT: movq %rax, %r11 +; SSE2-NEXT: movq %r13, %rax ; SSE2-NEXT: mulq %rsi ; SSE2-NEXT: movq %rdx, %rsi -; SSE2-NEXT: movq %rax, %r14 -; SSE2-NEXT: addq %r9, %r14 +; SSE2-NEXT: movq %rax, %rbx +; SSE2-NEXT: addq %r9, %rbx ; SSE2-NEXT: adcq $0, %rsi ; SSE2-NEXT: movq %r8, %rax ; SSE2-NEXT: mulq %rbp ; SSE2-NEXT: movq %rdx, %r8 ; SSE2-NEXT: movq %rax, %r9 -; SSE2-NEXT: addq %r14, %r9 +; SSE2-NEXT: addq %rbx, %r9 ; SSE2-NEXT: adcq %rsi, %r8 ; SSE2-NEXT: setb %al ; SSE2-NEXT: movzbl %al, %esi -; SSE2-NEXT: movq %rcx, %rax +; SSE2-NEXT: movq %r13, %rax ; SSE2-NEXT: mulq %rbp ; SSE2-NEXT: addq %r8, %rax ; SSE2-NEXT: adcq %rsi, %rdx -; SSE2-NEXT: addq %r11, %rax -; SSE2-NEXT: adcq %r10, %rdx -; SSE2-NEXT: movq %r9, 24(%r12) +; SSE2-NEXT: addq %r10, %rax +; SSE2-NEXT: adcq %r14, %rdx +; SSE2-NEXT: movq %r9, 24(%r15) ; SSE2-NEXT: sarq $63, %r9 ; SSE2-NEXT: xorq %r9, %rdx ; SSE2-NEXT: xorq %rax, %r9 @@ -3411,11 +3406,11 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; SSE2-NEXT: setne %al ; SSE2-NEXT: negl %eax ; SSE2-NEXT: movd %eax, %xmm1 -; SSE2-NEXT: negl %r15d -; SSE2-NEXT: movd %r15d, %xmm0 +; SSE2-NEXT: negl %ecx +; SSE2-NEXT: movd %ecx, %xmm0 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE2-NEXT: movq %rbx, 16(%r12) -; SSE2-NEXT: movq %rdi, (%r12) +; SSE2-NEXT: movq %r11, 16(%r15) +; SSE2-NEXT: movq %rdi, (%r15) ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %r12 ; SSE2-NEXT: popq %r13 @@ -3433,112 +3428,107 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; SSSE3-NEXT: pushq %r12 ; SSSE3-NEXT: pushq %rbx ; SSSE3-NEXT: movq %r8, %r14 +; SSSE3-NEXT: movq %rcx, %r13 ; SSSE3-NEXT: movq %rdx, %r8 ; SSSE3-NEXT: movq %rsi, %r11 ; SSSE3-NEXT: movq %rdi, %r10 ; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %rsi ; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %rbp -; SSSE3-NEXT: movq %r11, %r12 -; SSSE3-NEXT: sarq $63, %r12 -; SSSE3-NEXT: movq %r14, %rbx -; SSSE3-NEXT: imulq %r12, %rbx +; SSSE3-NEXT: movq %r11, %rcx +; SSSE3-NEXT: sarq $63, %rcx +; SSSE3-NEXT: movq %r9, %r15 +; SSSE3-NEXT: imulq %rcx, %r15 ; SSSE3-NEXT: movq %r14, %rax -; SSSE3-NEXT: mulq %r12 +; SSSE3-NEXT: mulq %rcx ; SSSE3-NEXT: movq %rax, %rdi -; SSSE3-NEXT: imulq %r9, %r12 -; SSSE3-NEXT: addq %rbx, %r12 -; SSSE3-NEXT: addq %rdx, %r12 -; SSSE3-NEXT: movq %r9, %rbx -; SSSE3-NEXT: sarq $63, %rbx -; SSSE3-NEXT: movq %rbx, %r13 -; SSSE3-NEXT: imulq %r11, %r13 -; SSSE3-NEXT: movq %rbx, %rax +; SSSE3-NEXT: addq %rax, %r15 +; SSSE3-NEXT: addq %rdx, %r15 +; SSSE3-NEXT: movq %r9, %rax +; SSSE3-NEXT: sarq $63, %rax +; SSSE3-NEXT: movq %rax, %rcx +; SSSE3-NEXT: imulq %r11, %rcx ; SSSE3-NEXT: mulq %r10 -; SSSE3-NEXT: movq %rax, %r15 -; SSSE3-NEXT: imulq %r10, %rbx -; SSSE3-NEXT: addq %r13, %rbx -; SSSE3-NEXT: addq %rdx, %rbx -; SSSE3-NEXT: addq %rdi, %r15 -; SSSE3-NEXT: adcq %r12, %rbx +; SSSE3-NEXT: movq %rax, %rbx +; SSSE3-NEXT: addq %rax, %rcx +; SSSE3-NEXT: addq %rdx, %rcx +; SSSE3-NEXT: addq %rdi, %rbx +; SSSE3-NEXT: adcq %r15, %rcx ; SSSE3-NEXT: movq %r10, %rax ; SSSE3-NEXT: mulq %r14 -; SSSE3-NEXT: movq %rdx, %r12 +; SSSE3-NEXT: movq %rdx, %r15 ; SSSE3-NEXT: movq %rax, %rdi ; SSSE3-NEXT: movq %r11, %rax ; SSSE3-NEXT: mulq %r14 ; SSSE3-NEXT: movq %rdx, %r14 -; SSSE3-NEXT: movq %rax, %r13 -; SSSE3-NEXT: addq %r12, %r13 +; SSSE3-NEXT: movq %rax, %r12 +; SSSE3-NEXT: addq %r15, %r12 ; SSSE3-NEXT: adcq $0, %r14 ; SSSE3-NEXT: movq %r10, %rax ; SSSE3-NEXT: mulq %r9 -; SSSE3-NEXT: movq %rdx, %r12 +; SSSE3-NEXT: movq %rdx, %r15 ; SSSE3-NEXT: movq %rax, %r10 -; SSSE3-NEXT: addq %r13, %r10 -; SSSE3-NEXT: adcq %r14, %r12 +; SSSE3-NEXT: addq %r12, %r10 +; SSSE3-NEXT: adcq %r14, %r15 ; SSSE3-NEXT: setb %al ; SSSE3-NEXT: movzbl %al, %r14d ; SSSE3-NEXT: movq %r11, %rax ; SSSE3-NEXT: mulq %r9 -; SSSE3-NEXT: addq %r12, %rax -; SSSE3-NEXT: adcq %r14, %rdx ; SSSE3-NEXT: addq %r15, %rax -; SSSE3-NEXT: adcq %rbx, %rdx -; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %r12 -; SSSE3-NEXT: movq %r10, 8(%r12) +; SSSE3-NEXT: adcq %r14, %rdx +; SSSE3-NEXT: addq %rbx, %rax +; SSSE3-NEXT: adcq %rcx, %rdx +; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %r15 +; SSSE3-NEXT: movq %r10, 8(%r15) ; SSSE3-NEXT: sarq $63, %r10 ; SSSE3-NEXT: xorq %r10, %rdx ; SSSE3-NEXT: xorq %rax, %r10 -; SSSE3-NEXT: xorl %r15d, %r15d +; SSSE3-NEXT: xorl %ecx, %ecx ; SSSE3-NEXT: orq %rdx, %r10 -; SSSE3-NEXT: setne %r15b -; SSSE3-NEXT: movq %rcx, %rbx -; SSSE3-NEXT: sarq $63, %rbx -; SSSE3-NEXT: movq %rsi, %r10 -; SSSE3-NEXT: imulq %rbx, %r10 +; SSSE3-NEXT: setne %cl +; SSSE3-NEXT: movq %r13, %r9 +; SSSE3-NEXT: sarq $63, %r9 +; SSSE3-NEXT: movq %rbp, %r11 +; SSSE3-NEXT: imulq %r9, %r11 ; SSSE3-NEXT: movq %rsi, %rax -; SSSE3-NEXT: mulq %rbx +; SSSE3-NEXT: mulq %r9 ; SSSE3-NEXT: movq %rax, %r9 -; SSSE3-NEXT: imulq %rbp, %rbx -; SSSE3-NEXT: addq %r10, %rbx -; SSSE3-NEXT: addq %rdx, %rbx -; SSSE3-NEXT: movq %rbp, %r10 -; SSSE3-NEXT: sarq $63, %r10 -; SSSE3-NEXT: movq %r10, %r14 -; SSSE3-NEXT: imulq %rcx, %r14 -; SSSE3-NEXT: movq %r10, %rax +; SSSE3-NEXT: addq %rax, %r11 +; SSSE3-NEXT: addq %rdx, %r11 +; SSSE3-NEXT: movq %rbp, %rax +; SSSE3-NEXT: sarq $63, %rax +; SSSE3-NEXT: movq %rax, %r14 +; SSSE3-NEXT: imulq %r13, %r14 ; SSSE3-NEXT: mulq %r8 -; SSSE3-NEXT: movq %rax, %r11 -; SSSE3-NEXT: imulq %r8, %r10 -; SSSE3-NEXT: addq %r14, %r10 -; SSSE3-NEXT: addq %rdx, %r10 -; SSSE3-NEXT: addq %r9, %r11 -; SSSE3-NEXT: adcq %rbx, %r10 +; SSSE3-NEXT: movq %rax, %r10 +; SSSE3-NEXT: addq %rax, %r14 +; SSSE3-NEXT: addq %rdx, %r14 +; SSSE3-NEXT: addq %r9, %r10 +; SSSE3-NEXT: adcq %r11, %r14 ; SSSE3-NEXT: movq %r8, %rax ; SSSE3-NEXT: mulq %rsi ; SSSE3-NEXT: movq %rdx, %r9 -; SSSE3-NEXT: movq %rax, %rbx -; SSSE3-NEXT: movq %rcx, %rax +; SSSE3-NEXT: movq %rax, %r11 +; SSSE3-NEXT: movq %r13, %rax ; SSSE3-NEXT: mulq %rsi ; SSSE3-NEXT: movq %rdx, %rsi -; SSSE3-NEXT: movq %rax, %r14 -; SSSE3-NEXT: addq %r9, %r14 +; SSSE3-NEXT: movq %rax, %rbx +; SSSE3-NEXT: addq %r9, %rbx ; SSSE3-NEXT: adcq $0, %rsi ; SSSE3-NEXT: movq %r8, %rax ; SSSE3-NEXT: mulq %rbp ; SSSE3-NEXT: movq %rdx, %r8 ; SSSE3-NEXT: movq %rax, %r9 -; SSSE3-NEXT: addq %r14, %r9 +; SSSE3-NEXT: addq %rbx, %r9 ; SSSE3-NEXT: adcq %rsi, %r8 ; SSSE3-NEXT: setb %al ; SSSE3-NEXT: movzbl %al, %esi -; SSSE3-NEXT: movq %rcx, %rax +; SSSE3-NEXT: movq %r13, %rax ; SSSE3-NEXT: mulq %rbp ; SSSE3-NEXT: addq %r8, %rax ; SSSE3-NEXT: adcq %rsi, %rdx -; SSSE3-NEXT: addq %r11, %rax -; SSSE3-NEXT: adcq %r10, %rdx -; SSSE3-NEXT: movq %r9, 24(%r12) +; SSSE3-NEXT: addq %r10, %rax +; SSSE3-NEXT: adcq %r14, %rdx +; SSSE3-NEXT: movq %r9, 24(%r15) ; SSSE3-NEXT: sarq $63, %r9 ; SSSE3-NEXT: xorq %r9, %rdx ; SSSE3-NEXT: xorq %rax, %r9 @@ -3547,11 +3537,11 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; SSSE3-NEXT: setne %al ; SSSE3-NEXT: negl %eax ; SSSE3-NEXT: movd %eax, %xmm1 -; SSSE3-NEXT: negl %r15d -; SSSE3-NEXT: movd %r15d, %xmm0 +; SSSE3-NEXT: negl %ecx +; SSSE3-NEXT: movd %ecx, %xmm0 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSSE3-NEXT: movq %rbx, 16(%r12) -; SSSE3-NEXT: movq %rdi, (%r12) +; SSSE3-NEXT: movq %r11, 16(%r15) +; SSSE3-NEXT: movq %rdi, (%r15) ; SSSE3-NEXT: popq %rbx ; SSSE3-NEXT: popq %r12 ; SSSE3-NEXT: popq %r13 @@ -3569,112 +3559,107 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; SSE41-NEXT: pushq %r12 ; SSE41-NEXT: pushq %rbx ; SSE41-NEXT: movq %r8, %r14 +; SSE41-NEXT: movq %rcx, %r13 ; SSE41-NEXT: movq %rdx, %r8 ; SSE41-NEXT: movq %rsi, %r11 ; SSE41-NEXT: movq %rdi, %r10 ; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %rsi ; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %rbp -; SSE41-NEXT: movq %r11, %r12 -; SSE41-NEXT: sarq $63, %r12 -; SSE41-NEXT: movq %r14, %rbx -; SSE41-NEXT: imulq %r12, %rbx +; SSE41-NEXT: movq %r11, %rcx +; SSE41-NEXT: sarq $63, %rcx +; SSE41-NEXT: movq %r9, %r15 +; SSE41-NEXT: imulq %rcx, %r15 ; SSE41-NEXT: movq %r14, %rax -; SSE41-NEXT: mulq %r12 +; SSE41-NEXT: mulq %rcx ; SSE41-NEXT: movq %rax, %rdi -; SSE41-NEXT: imulq %r9, %r12 -; SSE41-NEXT: addq %rbx, %r12 -; SSE41-NEXT: addq %rdx, %r12 -; SSE41-NEXT: movq %r9, %rbx -; SSE41-NEXT: sarq $63, %rbx -; SSE41-NEXT: movq %rbx, %r13 -; SSE41-NEXT: imulq %r11, %r13 -; SSE41-NEXT: movq %rbx, %rax +; SSE41-NEXT: addq %rax, %r15 +; SSE41-NEXT: addq %rdx, %r15 +; SSE41-NEXT: movq %r9, %rax +; SSE41-NEXT: sarq $63, %rax +; SSE41-NEXT: movq %rax, %rcx +; SSE41-NEXT: imulq %r11, %rcx ; SSE41-NEXT: mulq %r10 -; SSE41-NEXT: movq %rax, %r15 -; SSE41-NEXT: imulq %r10, %rbx -; SSE41-NEXT: addq %r13, %rbx -; SSE41-NEXT: addq %rdx, %rbx -; SSE41-NEXT: addq %rdi, %r15 -; SSE41-NEXT: adcq %r12, %rbx +; SSE41-NEXT: movq %rax, %rbx +; SSE41-NEXT: addq %rax, %rcx +; SSE41-NEXT: addq %rdx, %rcx +; SSE41-NEXT: addq %rdi, %rbx +; SSE41-NEXT: adcq %r15, %rcx ; SSE41-NEXT: movq %r10, %rax ; SSE41-NEXT: mulq %r14 -; SSE41-NEXT: movq %rdx, %r12 +; SSE41-NEXT: movq %rdx, %r15 ; SSE41-NEXT: movq %rax, %rdi ; SSE41-NEXT: movq %r11, %rax ; SSE41-NEXT: mulq %r14 ; SSE41-NEXT: movq %rdx, %r14 -; SSE41-NEXT: movq %rax, %r13 -; SSE41-NEXT: addq %r12, %r13 +; SSE41-NEXT: movq %rax, %r12 +; SSE41-NEXT: addq %r15, %r12 ; SSE41-NEXT: adcq $0, %r14 ; SSE41-NEXT: movq %r10, %rax ; SSE41-NEXT: mulq %r9 -; SSE41-NEXT: movq %rdx, %r12 +; SSE41-NEXT: movq %rdx, %r15 ; SSE41-NEXT: movq %rax, %r10 -; SSE41-NEXT: addq %r13, %r10 -; SSE41-NEXT: adcq %r14, %r12 +; SSE41-NEXT: addq %r12, %r10 +; SSE41-NEXT: adcq %r14, %r15 ; SSE41-NEXT: setb %al ; SSE41-NEXT: movzbl %al, %r14d ; SSE41-NEXT: movq %r11, %rax ; SSE41-NEXT: mulq %r9 -; SSE41-NEXT: addq %r12, %rax -; SSE41-NEXT: adcq %r14, %rdx ; SSE41-NEXT: addq %r15, %rax -; SSE41-NEXT: adcq %rbx, %rdx -; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %r12 -; SSE41-NEXT: movq %r10, 8(%r12) +; SSE41-NEXT: adcq %r14, %rdx +; SSE41-NEXT: addq %rbx, %rax +; SSE41-NEXT: adcq %rcx, %rdx +; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %r15 +; SSE41-NEXT: movq %r10, 8(%r15) ; SSE41-NEXT: sarq $63, %r10 ; SSE41-NEXT: xorq %r10, %rdx ; SSE41-NEXT: xorq %rax, %r10 -; SSE41-NEXT: xorl %r15d, %r15d +; SSE41-NEXT: xorl %ecx, %ecx ; SSE41-NEXT: orq %rdx, %r10 -; SSE41-NEXT: setne %r15b -; SSE41-NEXT: movq %rcx, %rbx -; SSE41-NEXT: sarq $63, %rbx -; SSE41-NEXT: movq %rsi, %r10 -; SSE41-NEXT: imulq %rbx, %r10 +; SSE41-NEXT: setne %cl +; SSE41-NEXT: movq %r13, %r9 +; SSE41-NEXT: sarq $63, %r9 +; SSE41-NEXT: movq %rbp, %r11 +; SSE41-NEXT: imulq %r9, %r11 ; SSE41-NEXT: movq %rsi, %rax -; SSE41-NEXT: mulq %rbx +; SSE41-NEXT: mulq %r9 ; SSE41-NEXT: movq %rax, %r9 -; SSE41-NEXT: imulq %rbp, %rbx -; SSE41-NEXT: addq %r10, %rbx -; SSE41-NEXT: addq %rdx, %rbx -; SSE41-NEXT: movq %rbp, %r10 -; SSE41-NEXT: sarq $63, %r10 -; SSE41-NEXT: movq %r10, %r14 -; SSE41-NEXT: imulq %rcx, %r14 -; SSE41-NEXT: movq %r10, %rax +; SSE41-NEXT: addq %rax, %r11 +; SSE41-NEXT: addq %rdx, %r11 +; SSE41-NEXT: movq %rbp, %rax +; SSE41-NEXT: sarq $63, %rax +; SSE41-NEXT: movq %rax, %r14 +; SSE41-NEXT: imulq %r13, %r14 ; SSE41-NEXT: mulq %r8 -; SSE41-NEXT: movq %rax, %r11 -; SSE41-NEXT: imulq %r8, %r10 -; SSE41-NEXT: addq %r14, %r10 -; SSE41-NEXT: addq %rdx, %r10 -; SSE41-NEXT: addq %r9, %r11 -; SSE41-NEXT: adcq %rbx, %r10 +; SSE41-NEXT: movq %rax, %r10 +; SSE41-NEXT: addq %rax, %r14 +; SSE41-NEXT: addq %rdx, %r14 +; SSE41-NEXT: addq %r9, %r10 +; SSE41-NEXT: adcq %r11, %r14 ; SSE41-NEXT: movq %r8, %rax ; SSE41-NEXT: mulq %rsi ; SSE41-NEXT: movq %rdx, %r9 -; SSE41-NEXT: movq %rax, %rbx -; SSE41-NEXT: movq %rcx, %rax +; SSE41-NEXT: movq %rax, %r11 +; SSE41-NEXT: movq %r13, %rax ; SSE41-NEXT: mulq %rsi ; SSE41-NEXT: movq %rdx, %rsi -; SSE41-NEXT: movq %rax, %r14 -; SSE41-NEXT: addq %r9, %r14 +; SSE41-NEXT: movq %rax, %rbx +; SSE41-NEXT: addq %r9, %rbx ; SSE41-NEXT: adcq $0, %rsi ; SSE41-NEXT: movq %r8, %rax ; SSE41-NEXT: mulq %rbp ; SSE41-NEXT: movq %rdx, %r8 ; SSE41-NEXT: movq %rax, %r9 -; SSE41-NEXT: addq %r14, %r9 +; SSE41-NEXT: addq %rbx, %r9 ; SSE41-NEXT: adcq %rsi, %r8 ; SSE41-NEXT: setb %al ; SSE41-NEXT: movzbl %al, %esi -; SSE41-NEXT: movq %rcx, %rax +; SSE41-NEXT: movq %r13, %rax ; SSE41-NEXT: mulq %rbp ; SSE41-NEXT: addq %r8, %rax ; SSE41-NEXT: adcq %rsi, %rdx -; SSE41-NEXT: addq %r11, %rax -; SSE41-NEXT: adcq %r10, %rdx -; SSE41-NEXT: movq %r9, 24(%r12) +; SSE41-NEXT: addq %r10, %rax +; SSE41-NEXT: adcq %r14, %rdx +; SSE41-NEXT: movq %r9, 24(%r15) ; SSE41-NEXT: sarq $63, %r9 ; SSE41-NEXT: xorq %r9, %rdx ; SSE41-NEXT: xorq %rax, %r9 @@ -3682,11 +3667,11 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; SSE41-NEXT: orq %rdx, %r9 ; SSE41-NEXT: setne %al ; SSE41-NEXT: negl %eax -; SSE41-NEXT: negl %r15d -; SSE41-NEXT: movd %r15d, %xmm0 +; SSE41-NEXT: negl %ecx +; SSE41-NEXT: movd %ecx, %xmm0 ; SSE41-NEXT: pinsrd $1, %eax, %xmm0 -; SSE41-NEXT: movq %rbx, 16(%r12) -; SSE41-NEXT: movq %rdi, (%r12) +; SSE41-NEXT: movq %r11, 16(%r15) +; SSE41-NEXT: movq %rdi, (%r15) ; SSE41-NEXT: popq %rbx ; SSE41-NEXT: popq %r12 ; SSE41-NEXT: popq %r13 @@ -3704,112 +3689,107 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; AVX-NEXT: pushq %r12 ; AVX-NEXT: pushq %rbx ; AVX-NEXT: movq %r8, %r14 +; AVX-NEXT: movq %rcx, %r13 ; AVX-NEXT: movq %rdx, %r8 ; AVX-NEXT: movq %rsi, %r11 ; AVX-NEXT: movq %rdi, %r10 ; AVX-NEXT: movq {{[0-9]+}}(%rsp), %rsi ; AVX-NEXT: movq {{[0-9]+}}(%rsp), %rbp -; AVX-NEXT: movq %r11, %r12 -; AVX-NEXT: sarq $63, %r12 -; AVX-NEXT: movq %r14, %rbx -; AVX-NEXT: imulq %r12, %rbx +; AVX-NEXT: movq %r11, %rcx +; AVX-NEXT: sarq $63, %rcx +; AVX-NEXT: movq %r9, %r15 +; AVX-NEXT: imulq %rcx, %r15 ; AVX-NEXT: movq %r14, %rax -; AVX-NEXT: mulq %r12 +; AVX-NEXT: mulq %rcx ; AVX-NEXT: movq %rax, %rdi -; AVX-NEXT: imulq %r9, %r12 -; AVX-NEXT: addq %rbx, %r12 -; AVX-NEXT: addq %rdx, %r12 -; AVX-NEXT: movq %r9, %rbx -; AVX-NEXT: sarq $63, %rbx -; AVX-NEXT: movq %rbx, %r13 -; AVX-NEXT: imulq %r11, %r13 -; AVX-NEXT: movq %rbx, %rax +; AVX-NEXT: addq %rax, %r15 +; AVX-NEXT: addq %rdx, %r15 +; AVX-NEXT: movq %r9, %rax +; AVX-NEXT: sarq $63, %rax +; AVX-NEXT: movq %rax, %rcx +; AVX-NEXT: imulq %r11, %rcx ; AVX-NEXT: mulq %r10 -; AVX-NEXT: movq %rax, %r15 -; AVX-NEXT: imulq %r10, %rbx -; AVX-NEXT: addq %r13, %rbx -; AVX-NEXT: addq %rdx, %rbx -; AVX-NEXT: addq %rdi, %r15 -; AVX-NEXT: adcq %r12, %rbx +; AVX-NEXT: movq %rax, %rbx +; AVX-NEXT: addq %rax, %rcx +; AVX-NEXT: addq %rdx, %rcx +; AVX-NEXT: addq %rdi, %rbx +; AVX-NEXT: adcq %r15, %rcx ; AVX-NEXT: movq %r10, %rax ; AVX-NEXT: mulq %r14 -; AVX-NEXT: movq %rdx, %r12 +; AVX-NEXT: movq %rdx, %r15 ; AVX-NEXT: movq %rax, %rdi ; AVX-NEXT: movq %r11, %rax ; AVX-NEXT: mulq %r14 ; AVX-NEXT: movq %rdx, %r14 -; AVX-NEXT: movq %rax, %r13 -; AVX-NEXT: addq %r12, %r13 +; AVX-NEXT: movq %rax, %r12 +; AVX-NEXT: addq %r15, %r12 ; AVX-NEXT: adcq $0, %r14 ; AVX-NEXT: movq %r10, %rax ; AVX-NEXT: mulq %r9 -; AVX-NEXT: movq %rdx, %r12 +; AVX-NEXT: movq %rdx, %r15 ; AVX-NEXT: movq %rax, %r10 -; AVX-NEXT: addq %r13, %r10 -; AVX-NEXT: adcq %r14, %r12 +; AVX-NEXT: addq %r12, %r10 +; AVX-NEXT: adcq %r14, %r15 ; AVX-NEXT: setb %al ; AVX-NEXT: movzbl %al, %r14d ; AVX-NEXT: movq %r11, %rax ; AVX-NEXT: mulq %r9 -; AVX-NEXT: addq %r12, %rax -; AVX-NEXT: adcq %r14, %rdx ; AVX-NEXT: addq %r15, %rax -; AVX-NEXT: adcq %rbx, %rdx -; AVX-NEXT: movq {{[0-9]+}}(%rsp), %r12 -; AVX-NEXT: movq %r10, 8(%r12) +; AVX-NEXT: adcq %r14, %rdx +; AVX-NEXT: addq %rbx, %rax +; AVX-NEXT: adcq %rcx, %rdx +; AVX-NEXT: movq {{[0-9]+}}(%rsp), %r15 +; AVX-NEXT: movq %r10, 8(%r15) ; AVX-NEXT: sarq $63, %r10 ; AVX-NEXT: xorq %r10, %rdx ; AVX-NEXT: xorq %rax, %r10 -; AVX-NEXT: xorl %r15d, %r15d +; AVX-NEXT: xorl %ecx, %ecx ; AVX-NEXT: orq %rdx, %r10 -; AVX-NEXT: setne %r15b -; AVX-NEXT: movq %rcx, %rbx -; AVX-NEXT: sarq $63, %rbx -; AVX-NEXT: movq %rsi, %r10 -; AVX-NEXT: imulq %rbx, %r10 +; AVX-NEXT: setne %cl +; AVX-NEXT: movq %r13, %r9 +; AVX-NEXT: sarq $63, %r9 +; AVX-NEXT: movq %rbp, %r11 +; AVX-NEXT: imulq %r9, %r11 ; AVX-NEXT: movq %rsi, %rax -; AVX-NEXT: mulq %rbx +; AVX-NEXT: mulq %r9 ; AVX-NEXT: movq %rax, %r9 -; AVX-NEXT: imulq %rbp, %rbx -; AVX-NEXT: addq %r10, %rbx -; AVX-NEXT: addq %rdx, %rbx -; AVX-NEXT: movq %rbp, %r10 -; AVX-NEXT: sarq $63, %r10 -; AVX-NEXT: movq %r10, %r14 -; AVX-NEXT: imulq %rcx, %r14 -; AVX-NEXT: movq %r10, %rax +; AVX-NEXT: addq %rax, %r11 +; AVX-NEXT: addq %rdx, %r11 +; AVX-NEXT: movq %rbp, %rax +; AVX-NEXT: sarq $63, %rax +; AVX-NEXT: movq %rax, %r14 +; AVX-NEXT: imulq %r13, %r14 ; AVX-NEXT: mulq %r8 -; AVX-NEXT: movq %rax, %r11 -; AVX-NEXT: imulq %r8, %r10 -; AVX-NEXT: addq %r14, %r10 -; AVX-NEXT: addq %rdx, %r10 -; AVX-NEXT: addq %r9, %r11 -; AVX-NEXT: adcq %rbx, %r10 +; AVX-NEXT: movq %rax, %r10 +; AVX-NEXT: addq %rax, %r14 +; AVX-NEXT: addq %rdx, %r14 +; AVX-NEXT: addq %r9, %r10 +; AVX-NEXT: adcq %r11, %r14 ; AVX-NEXT: movq %r8, %rax ; AVX-NEXT: mulq %rsi ; AVX-NEXT: movq %rdx, %r9 -; AVX-NEXT: movq %rax, %rbx -; AVX-NEXT: movq %rcx, %rax +; AVX-NEXT: movq %rax, %r11 +; AVX-NEXT: movq %r13, %rax ; AVX-NEXT: mulq %rsi ; AVX-NEXT: movq %rdx, %rsi -; AVX-NEXT: movq %rax, %r14 -; AVX-NEXT: addq %r9, %r14 +; AVX-NEXT: movq %rax, %rbx +; AVX-NEXT: addq %r9, %rbx ; AVX-NEXT: adcq $0, %rsi ; AVX-NEXT: movq %r8, %rax ; AVX-NEXT: mulq %rbp ; AVX-NEXT: movq %rdx, %r8 ; AVX-NEXT: movq %rax, %r9 -; AVX-NEXT: addq %r14, %r9 +; AVX-NEXT: addq %rbx, %r9 ; AVX-NEXT: adcq %rsi, %r8 ; AVX-NEXT: setb %al ; AVX-NEXT: movzbl %al, %esi -; AVX-NEXT: movq %rcx, %rax +; AVX-NEXT: movq %r13, %rax ; AVX-NEXT: mulq %rbp ; AVX-NEXT: addq %r8, %rax ; AVX-NEXT: adcq %rsi, %rdx -; AVX-NEXT: addq %r11, %rax -; AVX-NEXT: adcq %r10, %rdx -; AVX-NEXT: movq %r9, 24(%r12) +; AVX-NEXT: addq %r10, %rax +; AVX-NEXT: adcq %r14, %rdx +; AVX-NEXT: movq %r9, 24(%r15) ; AVX-NEXT: sarq $63, %r9 ; AVX-NEXT: xorq %r9, %rdx ; AVX-NEXT: xorq %rax, %r9 @@ -3817,11 +3797,11 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; AVX-NEXT: orq %rdx, %r9 ; AVX-NEXT: setne %al ; AVX-NEXT: negl %eax -; AVX-NEXT: negl %r15d -; AVX-NEXT: vmovd %r15d, %xmm0 +; AVX-NEXT: negl %ecx +; AVX-NEXT: vmovd %ecx, %xmm0 ; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 -; AVX-NEXT: movq %rbx, 16(%r12) -; AVX-NEXT: movq %rdi, (%r12) +; AVX-NEXT: movq %r11, 16(%r15) +; AVX-NEXT: movq %rdi, (%r15) ; AVX-NEXT: popq %rbx ; AVX-NEXT: popq %r12 ; AVX-NEXT: popq %r13 @@ -3838,113 +3818,104 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; AVX512F-NEXT: pushq %r13 ; AVX512F-NEXT: pushq %r12 ; AVX512F-NEXT: pushq %rbx -; AVX512F-NEXT: movq %r9, %rbp ; AVX512F-NEXT: movq %rcx, %r11 ; AVX512F-NEXT: movq %rdx, %r10 -; AVX512F-NEXT: movq %rsi, %r9 -; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %r15 -; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %rsi -; AVX512F-NEXT: movq %rcx, %r12 -; AVX512F-NEXT: sarq $63, %r12 -; AVX512F-NEXT: movq %r15, %rbx -; AVX512F-NEXT: imulq %r12, %rbx -; AVX512F-NEXT: movq %r15, %rax -; AVX512F-NEXT: mulq %r12 -; AVX512F-NEXT: movq %rax, %rcx -; AVX512F-NEXT: imulq %rsi, %r12 -; AVX512F-NEXT: addq %rbx, %r12 +; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %r13 +; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %r14 +; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %rbp +; AVX512F-NEXT: sarq $63, %rcx +; AVX512F-NEXT: movq %rbp, %r12 +; AVX512F-NEXT: imulq %rcx, %r12 +; AVX512F-NEXT: movq %r14, %rax +; AVX512F-NEXT: mulq %rcx +; AVX512F-NEXT: movq %rax, %r15 +; AVX512F-NEXT: addq %rax, %r12 ; AVX512F-NEXT: addq %rdx, %r12 -; AVX512F-NEXT: movq %rsi, %rbx -; AVX512F-NEXT: sarq $63, %rbx -; AVX512F-NEXT: movq %rbx, %r13 -; AVX512F-NEXT: imulq %r11, %r13 -; AVX512F-NEXT: movq %rbx, %rax +; AVX512F-NEXT: movq %rbp, %rax +; AVX512F-NEXT: sarq $63, %rax +; AVX512F-NEXT: movq %rax, %rcx +; AVX512F-NEXT: imulq %r11, %rcx ; AVX512F-NEXT: mulq %r10 -; AVX512F-NEXT: movq %rax, %r14 -; AVX512F-NEXT: imulq %r10, %rbx -; AVX512F-NEXT: addq %r13, %rbx -; AVX512F-NEXT: addq %rdx, %rbx -; AVX512F-NEXT: addq %rcx, %r14 -; AVX512F-NEXT: adcq %r12, %rbx +; AVX512F-NEXT: movq %rax, %rbx +; AVX512F-NEXT: addq %rax, %rcx +; AVX512F-NEXT: addq %rdx, %rcx +; AVX512F-NEXT: addq %r15, %rbx +; AVX512F-NEXT: adcq %r12, %rcx ; AVX512F-NEXT: movq %r10, %rax -; AVX512F-NEXT: mulq %r15 -; AVX512F-NEXT: movq %rdx, %r12 -; AVX512F-NEXT: movq %rax, %rcx -; AVX512F-NEXT: movq %r11, %rax -; AVX512F-NEXT: mulq %r15 +; AVX512F-NEXT: mulq %r14 ; AVX512F-NEXT: movq %rdx, %r15 -; AVX512F-NEXT: movq %rax, %r13 -; AVX512F-NEXT: addq %r12, %r13 -; AVX512F-NEXT: adcq $0, %r15 +; AVX512F-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512F-NEXT: movq %r11, %rax +; AVX512F-NEXT: mulq %r14 +; AVX512F-NEXT: movq %rdx, %r14 +; AVX512F-NEXT: movq %rax, %r12 +; AVX512F-NEXT: addq %r15, %r12 +; AVX512F-NEXT: adcq $0, %r14 ; AVX512F-NEXT: movq %r10, %rax -; AVX512F-NEXT: mulq %rsi -; AVX512F-NEXT: movq %rdx, %r12 +; AVX512F-NEXT: mulq %rbp +; AVX512F-NEXT: movq %rdx, %r15 ; AVX512F-NEXT: movq %rax, %r10 -; AVX512F-NEXT: addq %r13, %r10 -; AVX512F-NEXT: adcq %r15, %r12 +; AVX512F-NEXT: addq %r12, %r10 +; AVX512F-NEXT: adcq %r14, %r15 ; AVX512F-NEXT: setb %al -; AVX512F-NEXT: movzbl %al, %r15d +; AVX512F-NEXT: movzbl %al, %r14d ; AVX512F-NEXT: movq %r11, %rax -; AVX512F-NEXT: mulq %rsi -; AVX512F-NEXT: addq %r12, %rax -; AVX512F-NEXT: adcq %r15, %rdx -; AVX512F-NEXT: addq %r14, %rax -; AVX512F-NEXT: adcq %rbx, %rdx -; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %r12 -; AVX512F-NEXT: movq %r10, 24(%r12) +; AVX512F-NEXT: mulq %rbp +; AVX512F-NEXT: addq %r15, %rax +; AVX512F-NEXT: adcq %r14, %rdx +; AVX512F-NEXT: addq %rbx, %rax +; AVX512F-NEXT: adcq %rcx, %rdx +; AVX512F-NEXT: movq %r10, 24(%r13) ; AVX512F-NEXT: sarq $63, %r10 ; AVX512F-NEXT: xorq %r10, %rdx ; AVX512F-NEXT: xorq %rax, %r10 ; AVX512F-NEXT: orq %rdx, %r10 ; AVX512F-NEXT: setne %al ; AVX512F-NEXT: kmovw %eax, %k0 -; AVX512F-NEXT: movq %r9, %rsi -; AVX512F-NEXT: sarq $63, %rsi -; AVX512F-NEXT: movq %r8, %r11 -; AVX512F-NEXT: imulq %rsi, %r11 +; AVX512F-NEXT: movq %rsi, %rcx +; AVX512F-NEXT: sarq $63, %rcx +; AVX512F-NEXT: movq %r9, %rbx +; AVX512F-NEXT: imulq %rcx, %rbx ; AVX512F-NEXT: movq %r8, %rax -; AVX512F-NEXT: mulq %rsi +; AVX512F-NEXT: mulq %rcx ; AVX512F-NEXT: movq %rax, %r10 -; AVX512F-NEXT: imulq %rbp, %rsi -; AVX512F-NEXT: addq %r11, %rsi -; AVX512F-NEXT: addq %rdx, %rsi -; AVX512F-NEXT: movq %rbp, %r11 -; AVX512F-NEXT: sarq $63, %r11 -; AVX512F-NEXT: movq %r11, %r14 -; AVX512F-NEXT: imulq %r9, %r14 -; AVX512F-NEXT: movq %r11, %rax +; AVX512F-NEXT: addq %rax, %rbx +; AVX512F-NEXT: addq %rdx, %rbx +; AVX512F-NEXT: movq %r9, %rax +; AVX512F-NEXT: sarq $63, %rax +; AVX512F-NEXT: movq %rax, %rcx +; AVX512F-NEXT: imulq %rsi, %rcx ; AVX512F-NEXT: mulq %rdi -; AVX512F-NEXT: movq %rax, %rbx -; AVX512F-NEXT: imulq %rdi, %r11 -; AVX512F-NEXT: addq %r14, %r11 -; AVX512F-NEXT: addq %rdx, %r11 -; AVX512F-NEXT: addq %r10, %rbx -; AVX512F-NEXT: adcq %rsi, %r11 +; AVX512F-NEXT: movq %rax, %r11 +; AVX512F-NEXT: addq %rax, %rcx +; AVX512F-NEXT: addq %rdx, %rcx +; AVX512F-NEXT: addq %r10, %r11 +; AVX512F-NEXT: adcq %rbx, %rcx ; AVX512F-NEXT: movq %rdi, %rax ; AVX512F-NEXT: mulq %r8 ; AVX512F-NEXT: movq %rdx, %r10 -; AVX512F-NEXT: movq %rax, %r14 -; AVX512F-NEXT: movq %r9, %rax +; AVX512F-NEXT: movq %rax, %rbx +; AVX512F-NEXT: movq %rsi, %rax ; AVX512F-NEXT: mulq %r8 ; AVX512F-NEXT: movq %rdx, %r8 -; AVX512F-NEXT: movq %rax, %r15 -; AVX512F-NEXT: addq %r10, %r15 +; AVX512F-NEXT: movq %rax, %r14 +; AVX512F-NEXT: addq %r10, %r14 ; AVX512F-NEXT: adcq $0, %r8 ; AVX512F-NEXT: movq %rdi, %rax -; AVX512F-NEXT: mulq %rbp +; AVX512F-NEXT: mulq %r9 ; AVX512F-NEXT: movq %rdx, %rdi ; AVX512F-NEXT: movq %rax, %r10 -; AVX512F-NEXT: addq %r15, %r10 +; AVX512F-NEXT: addq %r14, %r10 ; AVX512F-NEXT: adcq %r8, %rdi ; AVX512F-NEXT: setb %al -; AVX512F-NEXT: movzbl %al, %esi -; AVX512F-NEXT: movq %r9, %rax -; AVX512F-NEXT: mulq %rbp +; AVX512F-NEXT: movzbl %al, %r8d +; AVX512F-NEXT: movq %rsi, %rax +; AVX512F-NEXT: mulq %r9 ; AVX512F-NEXT: addq %rdi, %rax -; AVX512F-NEXT: adcq %rsi, %rdx -; AVX512F-NEXT: addq %rbx, %rax -; AVX512F-NEXT: adcq %r11, %rdx -; AVX512F-NEXT: movq %r10, 8(%r12) +; AVX512F-NEXT: adcq %r8, %rdx +; AVX512F-NEXT: addq %r11, %rax +; AVX512F-NEXT: adcq %rcx, %rdx +; AVX512F-NEXT: movq %r10, 8(%r13) ; AVX512F-NEXT: sarq $63, %r10 ; AVX512F-NEXT: xorq %r10, %rdx ; AVX512F-NEXT: xorq %rax, %r10 @@ -3956,8 +3927,9 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; AVX512F-NEXT: korw %k0, %k1, %k1 ; AVX512F-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512F-NEXT: movq %rcx, 16(%r12) -; AVX512F-NEXT: movq %r14, (%r12) +; AVX512F-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512F-NEXT: movq %rax, 16(%r13) +; AVX512F-NEXT: movq %rbx, (%r13) ; AVX512F-NEXT: popq %rbx ; AVX512F-NEXT: popq %r12 ; AVX512F-NEXT: popq %r13 @@ -3974,113 +3946,104 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; AVX512BW-NEXT: pushq %r13 ; AVX512BW-NEXT: pushq %r12 ; AVX512BW-NEXT: pushq %rbx -; AVX512BW-NEXT: movq %r9, %rbp ; AVX512BW-NEXT: movq %rcx, %r11 ; AVX512BW-NEXT: movq %rdx, %r10 -; AVX512BW-NEXT: movq %rsi, %r9 -; AVX512BW-NEXT: movq {{[0-9]+}}(%rsp), %r15 -; AVX512BW-NEXT: movq {{[0-9]+}}(%rsp), %rsi -; AVX512BW-NEXT: movq %rcx, %r12 -; AVX512BW-NEXT: sarq $63, %r12 -; AVX512BW-NEXT: movq %r15, %rbx -; AVX512BW-NEXT: imulq %r12, %rbx -; AVX512BW-NEXT: movq %r15, %rax -; AVX512BW-NEXT: mulq %r12 -; AVX512BW-NEXT: movq %rax, %rcx -; AVX512BW-NEXT: imulq %rsi, %r12 -; AVX512BW-NEXT: addq %rbx, %r12 +; AVX512BW-NEXT: movq {{[0-9]+}}(%rsp), %r13 +; AVX512BW-NEXT: movq {{[0-9]+}}(%rsp), %r14 +; AVX512BW-NEXT: movq {{[0-9]+}}(%rsp), %rbp +; AVX512BW-NEXT: sarq $63, %rcx +; AVX512BW-NEXT: movq %rbp, %r12 +; AVX512BW-NEXT: imulq %rcx, %r12 +; AVX512BW-NEXT: movq %r14, %rax +; AVX512BW-NEXT: mulq %rcx +; AVX512BW-NEXT: movq %rax, %r15 +; AVX512BW-NEXT: addq %rax, %r12 ; AVX512BW-NEXT: addq %rdx, %r12 -; AVX512BW-NEXT: movq %rsi, %rbx -; AVX512BW-NEXT: sarq $63, %rbx -; AVX512BW-NEXT: movq %rbx, %r13 -; AVX512BW-NEXT: imulq %r11, %r13 -; AVX512BW-NEXT: movq %rbx, %rax +; AVX512BW-NEXT: movq %rbp, %rax +; AVX512BW-NEXT: sarq $63, %rax +; AVX512BW-NEXT: movq %rax, %rcx +; AVX512BW-NEXT: imulq %r11, %rcx ; AVX512BW-NEXT: mulq %r10 -; AVX512BW-NEXT: movq %rax, %r14 -; AVX512BW-NEXT: imulq %r10, %rbx -; AVX512BW-NEXT: addq %r13, %rbx -; AVX512BW-NEXT: addq %rdx, %rbx -; AVX512BW-NEXT: addq %rcx, %r14 -; AVX512BW-NEXT: adcq %r12, %rbx +; AVX512BW-NEXT: movq %rax, %rbx +; AVX512BW-NEXT: addq %rax, %rcx +; AVX512BW-NEXT: addq %rdx, %rcx +; AVX512BW-NEXT: addq %r15, %rbx +; AVX512BW-NEXT: adcq %r12, %rcx ; AVX512BW-NEXT: movq %r10, %rax -; AVX512BW-NEXT: mulq %r15 -; AVX512BW-NEXT: movq %rdx, %r12 -; AVX512BW-NEXT: movq %rax, %rcx -; AVX512BW-NEXT: movq %r11, %rax -; AVX512BW-NEXT: mulq %r15 +; AVX512BW-NEXT: mulq %r14 ; AVX512BW-NEXT: movq %rdx, %r15 -; AVX512BW-NEXT: movq %rax, %r13 -; AVX512BW-NEXT: addq %r12, %r13 -; AVX512BW-NEXT: adcq $0, %r15 +; AVX512BW-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512BW-NEXT: movq %r11, %rax +; AVX512BW-NEXT: mulq %r14 +; AVX512BW-NEXT: movq %rdx, %r14 +; AVX512BW-NEXT: movq %rax, %r12 +; AVX512BW-NEXT: addq %r15, %r12 +; AVX512BW-NEXT: adcq $0, %r14 ; AVX512BW-NEXT: movq %r10, %rax -; AVX512BW-NEXT: mulq %rsi -; AVX512BW-NEXT: movq %rdx, %r12 +; AVX512BW-NEXT: mulq %rbp +; AVX512BW-NEXT: movq %rdx, %r15 ; AVX512BW-NEXT: movq %rax, %r10 -; AVX512BW-NEXT: addq %r13, %r10 -; AVX512BW-NEXT: adcq %r15, %r12 +; AVX512BW-NEXT: addq %r12, %r10 +; AVX512BW-NEXT: adcq %r14, %r15 ; AVX512BW-NEXT: setb %al -; AVX512BW-NEXT: movzbl %al, %r15d +; AVX512BW-NEXT: movzbl %al, %r14d ; AVX512BW-NEXT: movq %r11, %rax -; AVX512BW-NEXT: mulq %rsi -; AVX512BW-NEXT: addq %r12, %rax -; AVX512BW-NEXT: adcq %r15, %rdx -; AVX512BW-NEXT: addq %r14, %rax -; AVX512BW-NEXT: adcq %rbx, %rdx -; AVX512BW-NEXT: movq {{[0-9]+}}(%rsp), %r12 -; AVX512BW-NEXT: movq %r10, 24(%r12) +; AVX512BW-NEXT: mulq %rbp +; AVX512BW-NEXT: addq %r15, %rax +; AVX512BW-NEXT: adcq %r14, %rdx +; AVX512BW-NEXT: addq %rbx, %rax +; AVX512BW-NEXT: adcq %rcx, %rdx +; AVX512BW-NEXT: movq %r10, 24(%r13) ; AVX512BW-NEXT: sarq $63, %r10 ; AVX512BW-NEXT: xorq %r10, %rdx ; AVX512BW-NEXT: xorq %rax, %r10 ; AVX512BW-NEXT: orq %rdx, %r10 ; AVX512BW-NEXT: setne %al ; AVX512BW-NEXT: kmovd %eax, %k0 -; AVX512BW-NEXT: movq %r9, %rsi -; AVX512BW-NEXT: sarq $63, %rsi -; AVX512BW-NEXT: movq %r8, %r11 -; AVX512BW-NEXT: imulq %rsi, %r11 +; AVX512BW-NEXT: movq %rsi, %rcx +; AVX512BW-NEXT: sarq $63, %rcx +; AVX512BW-NEXT: movq %r9, %rbx +; AVX512BW-NEXT: imulq %rcx, %rbx ; AVX512BW-NEXT: movq %r8, %rax -; AVX512BW-NEXT: mulq %rsi +; AVX512BW-NEXT: mulq %rcx ; AVX512BW-NEXT: movq %rax, %r10 -; AVX512BW-NEXT: imulq %rbp, %rsi -; AVX512BW-NEXT: addq %r11, %rsi -; AVX512BW-NEXT: addq %rdx, %rsi -; AVX512BW-NEXT: movq %rbp, %r11 -; AVX512BW-NEXT: sarq $63, %r11 -; AVX512BW-NEXT: movq %r11, %r14 -; AVX512BW-NEXT: imulq %r9, %r14 -; AVX512BW-NEXT: movq %r11, %rax +; AVX512BW-NEXT: addq %rax, %rbx +; AVX512BW-NEXT: addq %rdx, %rbx +; AVX512BW-NEXT: movq %r9, %rax +; AVX512BW-NEXT: sarq $63, %rax +; AVX512BW-NEXT: movq %rax, %rcx +; AVX512BW-NEXT: imulq %rsi, %rcx ; AVX512BW-NEXT: mulq %rdi -; AVX512BW-NEXT: movq %rax, %rbx -; AVX512BW-NEXT: imulq %rdi, %r11 -; AVX512BW-NEXT: addq %r14, %r11 -; AVX512BW-NEXT: addq %rdx, %r11 -; AVX512BW-NEXT: addq %r10, %rbx -; AVX512BW-NEXT: adcq %rsi, %r11 +; AVX512BW-NEXT: movq %rax, %r11 +; AVX512BW-NEXT: addq %rax, %rcx +; AVX512BW-NEXT: addq %rdx, %rcx +; AVX512BW-NEXT: addq %r10, %r11 +; AVX512BW-NEXT: adcq %rbx, %rcx ; AVX512BW-NEXT: movq %rdi, %rax ; AVX512BW-NEXT: mulq %r8 ; AVX512BW-NEXT: movq %rdx, %r10 -; AVX512BW-NEXT: movq %rax, %r14 -; AVX512BW-NEXT: movq %r9, %rax +; AVX512BW-NEXT: movq %rax, %rbx +; AVX512BW-NEXT: movq %rsi, %rax ; AVX512BW-NEXT: mulq %r8 ; AVX512BW-NEXT: movq %rdx, %r8 -; AVX512BW-NEXT: movq %rax, %r15 -; AVX512BW-NEXT: addq %r10, %r15 +; AVX512BW-NEXT: movq %rax, %r14 +; AVX512BW-NEXT: addq %r10, %r14 ; AVX512BW-NEXT: adcq $0, %r8 ; AVX512BW-NEXT: movq %rdi, %rax -; AVX512BW-NEXT: mulq %rbp +; AVX512BW-NEXT: mulq %r9 ; AVX512BW-NEXT: movq %rdx, %rdi ; AVX512BW-NEXT: movq %rax, %r10 -; AVX512BW-NEXT: addq %r15, %r10 +; AVX512BW-NEXT: addq %r14, %r10 ; AVX512BW-NEXT: adcq %r8, %rdi ; AVX512BW-NEXT: setb %al -; AVX512BW-NEXT: movzbl %al, %esi -; AVX512BW-NEXT: movq %r9, %rax -; AVX512BW-NEXT: mulq %rbp +; AVX512BW-NEXT: movzbl %al, %r8d +; AVX512BW-NEXT: movq %rsi, %rax +; AVX512BW-NEXT: mulq %r9 ; AVX512BW-NEXT: addq %rdi, %rax -; AVX512BW-NEXT: adcq %rsi, %rdx -; AVX512BW-NEXT: addq %rbx, %rax -; AVX512BW-NEXT: adcq %r11, %rdx -; AVX512BW-NEXT: movq %r10, 8(%r12) +; AVX512BW-NEXT: adcq %r8, %rdx +; AVX512BW-NEXT: addq %r11, %rax +; AVX512BW-NEXT: adcq %rcx, %rdx +; AVX512BW-NEXT: movq %r10, 8(%r13) ; AVX512BW-NEXT: sarq $63, %r10 ; AVX512BW-NEXT: xorq %r10, %rdx ; AVX512BW-NEXT: xorq %rax, %r10 @@ -4092,8 +4055,9 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; AVX512BW-NEXT: korw %k0, %k1, %k1 ; AVX512BW-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512BW-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512BW-NEXT: movq %rcx, 16(%r12) -; AVX512BW-NEXT: movq %r14, (%r12) +; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512BW-NEXT: movq %rax, 16(%r13) +; AVX512BW-NEXT: movq %rbx, (%r13) ; AVX512BW-NEXT: popq %rbx ; AVX512BW-NEXT: popq %r12 ; AVX512BW-NEXT: popq %r13 diff --git a/llvm/test/CodeGen/X86/xmulo.ll b/llvm/test/CodeGen/X86/xmulo.ll index 8d68303300ec6..cbbe089c80192 100644 --- a/llvm/test/CodeGen/X86/xmulo.ll +++ b/llvm/test/CodeGen/X86/xmulo.ll @@ -212,68 +212,66 @@ define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) { ; WIN32-NEXT: pushl %ebx ; WIN32-NEXT: pushl %edi ; WIN32-NEXT: pushl %esi -; WIN32-NEXT: subl $8, %esp -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; WIN32-NEXT: movl %ecx, %ebx -; WIN32-NEXT: sarl $31, %ebx -; WIN32-NEXT: movl %ebp, %esi -; WIN32-NEXT: imull %ebx, %esi -; WIN32-NEXT: movl %ebp, %eax -; WIN32-NEXT: mull %ebx -; WIN32-NEXT: movl %eax, %edi +; WIN32-NEXT: subl $12, %esp ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: imull %eax, %ebx -; WIN32-NEXT: addl %esi, %ebx -; WIN32-NEXT: addl %edx, %ebx -; WIN32-NEXT: movl %eax, %esi -; WIN32-NEXT: sarl $31, %esi -; WIN32-NEXT: movl %esi, %edx -; WIN32-NEXT: imull %ecx, %edx -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; WIN32-NEXT: movl %esi, %eax -; WIN32-NEXT: imull %ecx, %esi -; WIN32-NEXT: addl %edx, %esi +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx +; WIN32-NEXT: movl %edx, %ecx +; WIN32-NEXT: movl %edx, %ebp +; WIN32-NEXT: sarl $31, %ecx +; WIN32-NEXT: movl %ebx, %edi +; WIN32-NEXT: imull %ecx, %edi ; WIN32-NEXT: mull %ecx -; WIN32-NEXT: addl %edx, %esi -; WIN32-NEXT: addl %edi, %eax -; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill -; WIN32-NEXT: adcl %ebx, %esi -; WIN32-NEXT: movl %ecx, %eax +; WIN32-NEXT: movl %eax, %esi +; WIN32-NEXT: addl %eax, %edi +; WIN32-NEXT: addl %edx, %edi +; WIN32-NEXT: movl %ebx, %eax +; WIN32-NEXT: sarl $31, %eax +; WIN32-NEXT: movl %eax, %ecx +; WIN32-NEXT: imull %ebp, %ecx +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp ; WIN32-NEXT: mull %ebp +; WIN32-NEXT: addl %eax, %ecx +; WIN32-NEXT: addl %edx, %ecx +; WIN32-NEXT: addl %esi, %eax +; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; WIN32-NEXT: adcl %edi, %ecx +; WIN32-NEXT: movl %ebp, %eax +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi +; WIN32-NEXT: mull %esi ; WIN32-NEXT: movl %edx, %ebx ; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: mull %ebp +; WIN32-NEXT: mull %esi ; WIN32-NEXT: movl %edx, %edi -; WIN32-NEXT: movl %eax, %ecx -; WIN32-NEXT: addl %ebx, %ecx +; WIN32-NEXT: movl %eax, %esi +; WIN32-NEXT: addl %ebx, %esi ; WIN32-NEXT: adcl $0, %edi -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: movl %ebp, %eax ; WIN32-NEXT: mull {{[0-9]+}}(%esp) -; WIN32-NEXT: movl %edx, %ebx -; WIN32-NEXT: movl %eax, %ebp -; WIN32-NEXT: addl %ecx, %ebp -; WIN32-NEXT: adcl %edi, %ebx -; WIN32-NEXT: setb %cl +; WIN32-NEXT: movl %edx, %ebp +; WIN32-NEXT: movl %eax, %ebx +; WIN32-NEXT: addl %esi, %ebx +; WIN32-NEXT: adcl %edi, %ebp +; WIN32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN32-NEXT: mull {{[0-9]+}}(%esp) -; WIN32-NEXT: addl %ebx, %eax -; WIN32-NEXT: movzbl %cl, %ecx -; WIN32-NEXT: adcl %ecx, %edx -; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload +; WIN32-NEXT: addl %ebp, %eax +; WIN32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload ; WIN32-NEXT: adcl %esi, %edx -; WIN32-NEXT: movl %ebp, %ecx +; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; WIN32-NEXT: adcl %ecx, %edx +; WIN32-NEXT: movl %ebx, %ecx ; WIN32-NEXT: sarl $31, %ecx ; WIN32-NEXT: xorl %ecx, %edx ; WIN32-NEXT: xorl %eax, %ecx ; WIN32-NEXT: orl %edx, %ecx ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: movl %ebp, 4(%eax) +; WIN32-NEXT: movl %ebx, 4(%eax) ; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; WIN32-NEXT: movl %ecx, (%eax) ; WIN32-NEXT: setne %al -; WIN32-NEXT: addl $8, %esp +; WIN32-NEXT: addl $12, %esp ; WIN32-NEXT: popl %esi ; WIN32-NEXT: popl %edi ; WIN32-NEXT: popl %ebx @@ -572,65 +570,65 @@ define i64 @smuloselecti64(i64 %v1, i64 %v2) { ; WIN32-NEXT: pushl %edi ; WIN32-NEXT: pushl %esi ; WIN32-NEXT: pushl %eax -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; WIN32-NEXT: sarl $31, %ecx -; WIN32-NEXT: movl %ebp, %edi -; WIN32-NEXT: imull %ecx, %edi -; WIN32-NEXT: movl %ebp, %eax -; WIN32-NEXT: mull %ecx -; WIN32-NEXT: movl %eax, %esi -; WIN32-NEXT: imull %ebx, %ecx -; WIN32-NEXT: addl %edi, %ecx +; WIN32-NEXT: movl %ecx, %edx +; WIN32-NEXT: movl %ecx, %edi +; WIN32-NEXT: sarl $31, %edx +; WIN32-NEXT: movl %esi, %ecx +; WIN32-NEXT: imull %edx, %ecx +; WIN32-NEXT: mull %edx +; WIN32-NEXT: movl %eax, %ebp +; WIN32-NEXT: addl %eax, %ecx ; WIN32-NEXT: addl %edx, %ecx -; WIN32-NEXT: sarl $31, %ebx -; WIN32-NEXT: movl %ebx, %edx -; WIN32-NEXT: imull {{[0-9]+}}(%esp), %edx +; WIN32-NEXT: movl %esi, %eax +; WIN32-NEXT: sarl $31, %eax +; WIN32-NEXT: movl %eax, %esi +; WIN32-NEXT: imull %edi, %esi ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi -; WIN32-NEXT: movl %ebx, %eax -; WIN32-NEXT: imull %edi, %ebx -; WIN32-NEXT: addl %edx, %ebx ; WIN32-NEXT: mull %edi -; WIN32-NEXT: addl %edx, %ebx -; WIN32-NEXT: addl %esi, %eax +; WIN32-NEXT: addl %eax, %esi +; WIN32-NEXT: addl %edx, %esi +; WIN32-NEXT: addl %ebp, %eax ; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill -; WIN32-NEXT: adcl %ecx, %ebx +; WIN32-NEXT: adcl %ecx, %esi ; WIN32-NEXT: movl %edi, %eax -; WIN32-NEXT: mull %ebp -; WIN32-NEXT: movl %edx, %esi -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: mull %ebp -; WIN32-NEXT: movl %edx, %ecx -; WIN32-NEXT: movl %eax, %edi -; WIN32-NEXT: addl %esi, %edi -; WIN32-NEXT: adcl $0, %ecx +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; WIN32-NEXT: mull %ecx +; WIN32-NEXT: movl %edx, %ebp ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: mull %ecx +; WIN32-NEXT: movl %edx, %ebx +; WIN32-NEXT: movl %eax, %ecx +; WIN32-NEXT: addl %ebp, %ecx +; WIN32-NEXT: adcl $0, %ebx +; WIN32-NEXT: movl %edi, %eax ; WIN32-NEXT: mull {{[0-9]+}}(%esp) -; WIN32-NEXT: movl %edx, %ebp -; WIN32-NEXT: movl %eax, %esi -; WIN32-NEXT: addl %edi, %esi -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi -; WIN32-NEXT: adcl %ecx, %ebp +; WIN32-NEXT: movl %edx, %edi +; WIN32-NEXT: movl %eax, %ebp +; WIN32-NEXT: addl %ecx, %ebp +; WIN32-NEXT: adcl %ebx, %edi +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx ; WIN32-NEXT: setb %cl -; WIN32-NEXT: movl %edi, %eax +; WIN32-NEXT: movl %ebx, %eax ; WIN32-NEXT: mull {{[0-9]+}}(%esp) -; WIN32-NEXT: addl %ebp, %eax +; WIN32-NEXT: addl %edi, %eax ; WIN32-NEXT: movzbl %cl, %ecx ; WIN32-NEXT: adcl %ecx, %edx ; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload -; WIN32-NEXT: adcl %ebx, %edx -; WIN32-NEXT: sarl $31, %esi -; WIN32-NEXT: xorl %esi, %edx -; WIN32-NEXT: xorl %eax, %esi +; WIN32-NEXT: adcl %esi, %edx +; WIN32-NEXT: sarl $31, %ebp +; WIN32-NEXT: xorl %ebp, %edx +; WIN32-NEXT: xorl %eax, %ebp ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: orl %edx, %esi +; WIN32-NEXT: orl %edx, %ebp ; WIN32-NEXT: jne LBB12_2 ; WIN32-NEXT: # %bb.1: ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx ; WIN32-NEXT: LBB12_2: -; WIN32-NEXT: movl %edi, %edx +; WIN32-NEXT: movl %ebx, %edx ; WIN32-NEXT: addl $4, %esp ; WIN32-NEXT: popl %esi ; WIN32-NEXT: popl %edi @@ -991,57 +989,54 @@ define zeroext i1 @smulobri64(i64 %v1, i64 %v2) { ; WIN32-NEXT: pushl %ebx ; WIN32-NEXT: pushl %edi ; WIN32-NEXT: pushl %esi -; WIN32-NEXT: pushl %eax -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; WIN32-NEXT: movl %ecx, %edi -; WIN32-NEXT: sarl $31, %edi -; WIN32-NEXT: movl %ebp, %esi -; WIN32-NEXT: imull %edi, %esi -; WIN32-NEXT: movl %ebp, %eax -; WIN32-NEXT: mull %edi -; WIN32-NEXT: movl %eax, %ebx +; WIN32-NEXT: subl $8, %esp ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: imull %eax, %edi -; WIN32-NEXT: addl %esi, %edi -; WIN32-NEXT: addl %edx, %edi -; WIN32-NEXT: movl %eax, %esi -; WIN32-NEXT: sarl $31, %esi -; WIN32-NEXT: movl %esi, %edx -; WIN32-NEXT: imull %ecx, %edx -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; WIN32-NEXT: movl %esi, %eax +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx +; WIN32-NEXT: movl %edx, %ecx +; WIN32-NEXT: movl %edx, %edi +; WIN32-NEXT: sarl $31, %ecx +; WIN32-NEXT: movl %ebx, %esi ; WIN32-NEXT: imull %ecx, %esi -; WIN32-NEXT: addl %edx, %esi ; WIN32-NEXT: mull %ecx +; WIN32-NEXT: movl %eax, %ebp +; WIN32-NEXT: addl %eax, %esi ; WIN32-NEXT: addl %edx, %esi -; WIN32-NEXT: addl %ebx, %eax -; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill -; WIN32-NEXT: adcl %edi, %esi -; WIN32-NEXT: movl %ecx, %eax -; WIN32-NEXT: movl %ecx, %ebx -; WIN32-NEXT: mull %ebp +; WIN32-NEXT: movl %ebx, %eax +; WIN32-NEXT: sarl $31, %eax +; WIN32-NEXT: movl %eax, %ecx +; WIN32-NEXT: imull %edi, %ecx +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; WIN32-NEXT: mull %ebx +; WIN32-NEXT: addl %eax, %ecx +; WIN32-NEXT: addl %edx, %ecx +; WIN32-NEXT: addl %ebp, %eax +; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; WIN32-NEXT: adcl %esi, %ecx +; WIN32-NEXT: movl %ebx, %eax +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi +; WIN32-NEXT: mull %esi ; WIN32-NEXT: movl %edx, %edi ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: mull %ebp -; WIN32-NEXT: movl %edx, %ebp -; WIN32-NEXT: movl %eax, %ecx -; WIN32-NEXT: addl %edi, %ecx -; WIN32-NEXT: adcl $0, %ebp +; WIN32-NEXT: mull %esi +; WIN32-NEXT: movl %edx, %esi +; WIN32-NEXT: movl %eax, %ebp +; WIN32-NEXT: addl %edi, %ebp +; WIN32-NEXT: adcl $0, %esi ; WIN32-NEXT: movl %ebx, %eax ; WIN32-NEXT: mull {{[0-9]+}}(%esp) ; WIN32-NEXT: movl %edx, %ebx ; WIN32-NEXT: movl %eax, %edi -; WIN32-NEXT: addl %ecx, %edi -; WIN32-NEXT: adcl %ebp, %ebx -; WIN32-NEXT: setb %cl +; WIN32-NEXT: addl %ebp, %edi +; WIN32-NEXT: adcl %esi, %ebx +; WIN32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN32-NEXT: mull {{[0-9]+}}(%esp) ; WIN32-NEXT: addl %ebx, %eax -; WIN32-NEXT: movzbl %cl, %ecx -; WIN32-NEXT: adcl %ecx, %edx -; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload +; WIN32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload ; WIN32-NEXT: adcl %esi, %edx +; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; WIN32-NEXT: adcl %ecx, %edx ; WIN32-NEXT: sarl $31, %edi ; WIN32-NEXT: xorl %edi, %edx ; WIN32-NEXT: xorl %eax, %edi @@ -1050,7 +1045,7 @@ define zeroext i1 @smulobri64(i64 %v1, i64 %v2) { ; WIN32-NEXT: # %bb.3: # %continue ; WIN32-NEXT: movb $1, %al ; WIN32-NEXT: LBB18_2: # %overflow -; WIN32-NEXT: addl $4, %esp +; WIN32-NEXT: addl $8, %esp ; WIN32-NEXT: popl %esi ; WIN32-NEXT: popl %edi ; WIN32-NEXT: popl %ebx @@ -1699,69 +1694,68 @@ define zeroext i1 @smuloi64_load(ptr %ptr1, i64 %v2, ptr %res) { ; WIN32-NEXT: pushl %ebx ; WIN32-NEXT: pushl %edi ; WIN32-NEXT: pushl %esi -; WIN32-NEXT: subl $16, %esp -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; WIN32-NEXT: subl $20, %esp +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: movl (%eax), %esi -; WIN32-NEXT: movl 4(%eax), %ebp -; WIN32-NEXT: sarl $31, %ebx -; WIN32-NEXT: movl %ebx, %ecx -; WIN32-NEXT: imull %ebp, %ecx -; WIN32-NEXT: movl %ebx, %eax -; WIN32-NEXT: imull %esi, %ebx -; WIN32-NEXT: addl %ecx, %ebx -; WIN32-NEXT: mull %esi -; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; WIN32-NEXT: addl %edx, %ebx -; WIN32-NEXT: movl %ebp, %ecx -; WIN32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; WIN32-NEXT: sarl $31, %ecx +; WIN32-NEXT: movl (%eax), %edx +; WIN32-NEXT: movl %edx, (%esp) # 4-byte Spill +; WIN32-NEXT: movl 4(%eax), %ebx +; WIN32-NEXT: movl %ecx, %eax +; WIN32-NEXT: sarl $31, %eax +; WIN32-NEXT: movl %eax, %esi +; WIN32-NEXT: imull %ebx, %esi +; WIN32-NEXT: mull %edx +; WIN32-NEXT: movl %eax, %ebp +; WIN32-NEXT: addl %eax, %esi +; WIN32-NEXT: addl %edx, %esi +; WIN32-NEXT: movl %ebx, %edi +; WIN32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; WIN32-NEXT: sarl $31, %edi +; WIN32-NEXT: imull %edi, %ecx ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: movl %eax, %edi -; WIN32-NEXT: imull %ecx, %edi -; WIN32-NEXT: mull %ecx -; WIN32-NEXT: imull {{[0-9]+}}(%esp), %ecx -; WIN32-NEXT: addl %edi, %ecx +; WIN32-NEXT: mull %edi +; WIN32-NEXT: addl %eax, %ecx ; WIN32-NEXT: addl %edx, %ecx -; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; WIN32-NEXT: adcl %ebx, %ecx +; WIN32-NEXT: addl %eax, %ebp +; WIN32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; WIN32-NEXT: adcl %esi, %ecx +; WIN32-NEXT: movl (%esp), %esi # 4-byte Reload ; WIN32-NEXT: movl %esi, %eax ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi ; WIN32-NEXT: mull %edi -; WIN32-NEXT: movl %edx, %ebx +; WIN32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; WIN32-NEXT: movl %ebp, %eax +; WIN32-NEXT: movl %ebx, %eax ; WIN32-NEXT: mull %edi ; WIN32-NEXT: movl %edx, %ebp ; WIN32-NEXT: movl %eax, %edi -; WIN32-NEXT: addl %ebx, %edi +; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; WIN32-NEXT: adcl $0, %ebp ; WIN32-NEXT: movl %esi, %eax ; WIN32-NEXT: mull {{[0-9]+}}(%esp) -; WIN32-NEXT: movl %edx, %ebx -; WIN32-NEXT: movl %eax, %esi -; WIN32-NEXT: addl %edi, %esi -; WIN32-NEXT: adcl %ebp, %ebx -; WIN32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; WIN32-NEXT: movl %edx, %esi +; WIN32-NEXT: movl %eax, %ebx +; WIN32-NEXT: addl %edi, %ebx +; WIN32-NEXT: adcl %ebp, %esi +; WIN32-NEXT: setb (%esp) # 1-byte Folded Spill ; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; WIN32-NEXT: mull {{[0-9]+}}(%esp) -; WIN32-NEXT: addl %ebx, %eax -; WIN32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload -; WIN32-NEXT: adcl %edi, %edx +; WIN32-NEXT: addl %esi, %eax +; WIN32-NEXT: movzbl (%esp), %esi # 1-byte Folded Reload +; WIN32-NEXT: adcl %esi, %edx ; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; WIN32-NEXT: adcl %ecx, %edx -; WIN32-NEXT: movl %esi, %ecx +; WIN32-NEXT: movl %ebx, %ecx ; WIN32-NEXT: sarl $31, %ecx ; WIN32-NEXT: xorl %ecx, %edx ; WIN32-NEXT: xorl %eax, %ecx ; WIN32-NEXT: orl %edx, %ecx ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: movl %esi, 4(%eax) +; WIN32-NEXT: movl %ebx, 4(%eax) ; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; WIN32-NEXT: movl %ecx, (%eax) ; WIN32-NEXT: setne %al -; WIN32-NEXT: addl $16, %esp +; WIN32-NEXT: addl $20, %esp ; WIN32-NEXT: popl %esi ; WIN32-NEXT: popl %edi ; WIN32-NEXT: popl %ebx @@ -1810,62 +1804,58 @@ define zeroext i1 @smuloi64_load2(i64 %v1, ptr %ptr2, ptr %res) { ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN32-NEXT: movl (%eax), %ebx ; WIN32-NEXT: movl 4(%eax), %ebp -; WIN32-NEXT: movl %ecx, %edi -; WIN32-NEXT: sarl $31, %edi -; WIN32-NEXT: movl %ebx, %esi -; WIN32-NEXT: imull %edi, %esi -; WIN32-NEXT: movl %ebx, %eax -; WIN32-NEXT: mull %edi -; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill -; WIN32-NEXT: imull %ebp, %edi -; WIN32-NEXT: addl %esi, %edi -; WIN32-NEXT: addl %edx, %edi -; WIN32-NEXT: movl %ebp, %esi ; WIN32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; WIN32-NEXT: sarl $31, %esi -; WIN32-NEXT: movl %esi, %edx -; WIN32-NEXT: imull %ecx, %edx -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; WIN32-NEXT: movl %esi, %eax -; WIN32-NEXT: imull %ecx, %esi -; WIN32-NEXT: addl %edx, %esi +; WIN32-NEXT: sarl $31, %ecx +; WIN32-NEXT: movl %ebp, %edi +; WIN32-NEXT: imull %ecx, %edi +; WIN32-NEXT: movl %ebx, %eax ; WIN32-NEXT: mull %ecx -; WIN32-NEXT: addl %edx, %esi -; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload +; WIN32-NEXT: movl %eax, %esi +; WIN32-NEXT: addl %eax, %edi +; WIN32-NEXT: addl %edx, %edi +; WIN32-NEXT: movl %ebp, %eax +; WIN32-NEXT: sarl $31, %eax +; WIN32-NEXT: movl %eax, %ecx +; WIN32-NEXT: imull {{[0-9]+}}(%esp), %ecx +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp +; WIN32-NEXT: mull %ebp +; WIN32-NEXT: addl %eax, %ecx +; WIN32-NEXT: addl %edx, %ecx +; WIN32-NEXT: addl %esi, %eax ; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; WIN32-NEXT: adcl %edi, %esi -; WIN32-NEXT: movl %ecx, %eax +; WIN32-NEXT: adcl %edi, %ecx +; WIN32-NEXT: movl %ebp, %eax ; WIN32-NEXT: mull %ebx -; WIN32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill +; WIN32-NEXT: movl %edx, %ebp +; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN32-NEXT: mull %ebx ; WIN32-NEXT: movl %edx, %edi -; WIN32-NEXT: movl %eax, %ecx -; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; WIN32-NEXT: movl %eax, %esi +; WIN32-NEXT: addl %ebp, %esi ; WIN32-NEXT: adcl $0, %edi ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: mull %ebp -; WIN32-NEXT: movl %edx, %ebx -; WIN32-NEXT: movl %eax, %ebp -; WIN32-NEXT: addl %ecx, %ebp -; WIN32-NEXT: adcl %edi, %ebx -; WIN32-NEXT: setb %cl +; WIN32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; WIN32-NEXT: movl %edx, %ebp +; WIN32-NEXT: movl %eax, %ebx +; WIN32-NEXT: addl %esi, %ebx +; WIN32-NEXT: adcl %edi, %ebp +; WIN32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; WIN32-NEXT: addl %ebx, %eax -; WIN32-NEXT: movzbl %cl, %ecx -; WIN32-NEXT: adcl %ecx, %edx -; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; WIN32-NEXT: addl %ebp, %eax +; WIN32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload ; WIN32-NEXT: adcl %esi, %edx -; WIN32-NEXT: movl %ebp, %ecx +; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; WIN32-NEXT: adcl %ecx, %edx +; WIN32-NEXT: movl %ebx, %ecx ; WIN32-NEXT: sarl $31, %ecx ; WIN32-NEXT: xorl %ecx, %edx ; WIN32-NEXT: xorl %eax, %ecx ; WIN32-NEXT: orl %edx, %ecx ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: movl %ebp, 4(%eax) -; WIN32-NEXT: movl (%esp), %ecx # 4-byte Reload +; WIN32-NEXT: movl %ebx, 4(%eax) +; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; WIN32-NEXT: movl %ecx, (%eax) ; WIN32-NEXT: setne %al ; WIN32-NEXT: addl $16, %esp