Skip to content

Commit

Permalink
Add new insvti_lowpart_1 and insvdi_lowpart_1 patterns.
Browse files Browse the repository at this point in the history
This patch implements another of Uros' suggestions, to investigate a
insvti_lowpart_1 pattern to improve TImode parameter passing on x86_64.
In PR 88873, the RTL the middle-end expands for passing V2DF in TImode
is subtly different from what it does for V2DI in TImode, sufficiently so
that my explanations for why insvti_lowpart_1 isn't required don't apply
in this case.

This patch adds an insvti_lowpart_1 pattern, complementing the existing
insvti_highpart_1 pattern, and also a 32-bit variant, insvdi_lowpart_1.
Because the middle-end represents 128-bit constants using CONST_WIDE_INT
and 64-bit constants using CONST_INT, it's easiest to treat these as
different patterns, rather than attempt <dwi> parameterization.

This patch also includes a peephole2 (actually a pair) to transform
xchg instructions into mov instructions, when one of the destinations
is unused.  This optimization is required to produce the optimal code
sequences below.

For the 64-bit case:

__int128 foo(__int128 x, unsigned long long y)
{
  __int128 m = ~((__int128)~0ull);
  __int128 t = x & m;
  __int128 r = t | y;
  return r;
}

Before:
        xchgq   %rdi, %rsi
        movq    %rdx, %rax
        xorl    %esi, %esi
        xorl    %edx, %edx
        orq     %rsi, %rax
        orq     %rdi, %rdx
        ret

After:
        movq    %rdx, %rax
        movq    %rsi, %rdx
        ret

For the 32-bit case:

long long bar(long long x, int y)
{
  long long mask = ~0ull << 32;
  long long t = x & mask;
  long long r = t | (unsigned int)y;
  return r;
}

Before:
        pushl   %ebx
        movl    12(%esp), %edx
        xorl    %ebx, %ebx
        xorl    %eax, %eax
        movl    16(%esp), %ecx
        orl     %ebx, %edx
        popl    %ebx
        orl     %ecx, %eax
        ret

After:
        movl    12(%esp), %eax
        movl    8(%esp), %edx
        ret

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures.  Ok for mainline?

2023-07-09  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
        * config/i386/i386.md (peephole2): Transform xchg insn with a
        REG_UNUSED note to a (simple) move.
        (*insvti_lowpart_1): New define_insn_and_split.
        (*insvdi_lowpart_1): Likewise.

gcc/testsuite/ChangeLog
        * gcc.target/i386/insvdi_lowpart-1.c: New test case.
        * gcc.target/i386/insvti_lowpart-1.c: Likewise.

Cheers,
Roger
  • Loading branch information
rogersayle authored and ouuleilei-bot committed Jul 9, 2023
1 parent c2d62cd commit 4f73967
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 0 deletions.
66 changes: 66 additions & 0 deletions gcc/config/i386/i386.md
Original file line number Diff line number Diff line change
Expand Up @@ -3175,6 +3175,30 @@
[(parallel [(set (match_dup 1) (match_dup 2))
(set (match_dup 2) (match_dup 1))])])

;; Convert xchg with a REG_UNUSED note to a mov (variant #1).
(define_peephole2
[(parallel [(set (match_operand:SWI 0 "general_reg_operand")
(match_operand:SWI 1 "general_reg_operand"))
(set (match_dup 1) (match_dup 0))])]
"((REGNO (operands[0]) != AX_REG
&& REGNO (operands[1]) != AX_REG)
|| optimize_size < 2
|| !optimize_insn_for_size_p ())
&& peep2_reg_dead_p (1, operands[0])"
[(set (match_dup 1) (match_dup 0))])

;; Convert xchg with a REG_UNUSED note to a mov (variant #2).
(define_peephole2
[(parallel [(set (match_operand:SWI 0 "general_reg_operand")
(match_operand:SWI 1 "general_reg_operand"))
(set (match_dup 1) (match_dup 0))])]
"((REGNO (operands[0]) != AX_REG
&& REGNO (operands[1]) != AX_REG)
|| optimize_size < 2
|| !optimize_insn_for_size_p ())
&& peep2_reg_dead_p (1, operands[1])"
[(set (match_dup 0) (match_dup 1))])

;; Convert moves to/from AX_REG into xchg with -Oz.
(define_peephole2
[(set (match_operand:SWI48 0 "general_reg_operand")
Expand Down Expand Up @@ -3505,6 +3529,48 @@
split_double_concat (TImode, operands[0], operands[4], operands[2]);
DONE;
})

(define_insn_and_split "*insvti_lowpart_1"
[(set (match_operand:TI 0 "nonimmediate_operand" "=ro,r,r,&r")
(any_or_plus:TI
(and:TI
(match_operand:TI 1 "nonimmediate_operand" "r,m,r,m")
(match_operand:TI 3 "const_scalar_int_operand" "n,n,n,n"))
(zero_extend:TI
(match_operand:DI 2 "nonimmediate_operand" "r,r,m,m"))))]
"TARGET_64BIT
&& CONST_WIDE_INT_P (operands[3])
&& CONST_WIDE_INT_NUNITS (operands[3]) == 2
&& CONST_WIDE_INT_ELT (operands[3], 0) == 0
&& CONST_WIDE_INT_ELT (operands[3], 1) == -1"
"#"
"&& reload_completed"
[(const_int 0)]
{
operands[4] = gen_highpart (DImode, operands[1]);
split_double_concat (TImode, operands[0], operands[2], operands[4]);
DONE;
})

(define_insn_and_split "*insvdi_lowpart_1"
[(set (match_operand:DI 0 "nonimmediate_operand" "=ro,r,r,&r")
(any_or_plus:DI
(and:DI
(match_operand:DI 1 "nonimmediate_operand" "r,m,r,m")
(match_operand:DI 3 "const_int_operand" "n,n,n,n"))
(zero_extend:DI
(match_operand:SI 2 "nonimmediate_operand" "r,r,m,m"))))]
"!TARGET_64BIT
&& CONST_INT_P (operands[3])
&& UINTVAL (operands[3]) == 0xffffffff00000000ll"
"#"
"&& reload_completed"
[(const_int 0)]
{
operands[4] = gen_highpart (SImode, operands[1]);
split_double_concat (DImode, operands[0], operands[2], operands[4]);
DONE;
})

;; Floating point push instructions.

Expand Down
13 changes: 13 additions & 0 deletions gcc/testsuite/gcc.target/i386/insvdi_lowpart-1.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
/* { dg-do compile { target ia32 } } */
/* { dg-options "-O2" } */

long long foo(long long x, int y)
{
long long mask = ~0ull << 32;
long long t = x & mask;
long long r = t | (unsigned int)y;
return r;
}

/* { dg-final { scan-assembler-not "xorl" } } */
/* { dg-final { scan-assembler-not "orq" } } */
13 changes: 13 additions & 0 deletions gcc/testsuite/gcc.target/i386/insvti_lowpart-1.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
/* { dg-do compile { target int128 } } */
/* { dg-options "-O2" } */

__int128 foo(__int128 x, unsigned long long y)
{
__int128 m = ~((__int128)~0ull);
__int128 t = x & m;
__int128 r = t | y;
return r;
}

/* { dg-final { scan-assembler-not "xorl" } } */
/* { dg-final { scan-assembler-not "orq" } } */

0 comments on commit 4f73967

Please sign in to comment.