Skip to content

Commit

Permalink
[X86][CodeGen] Add missing patterns for APX NDD instructions about en…
Browse files Browse the repository at this point in the history
…coding trick
  • Loading branch information
KanRobert committed Mar 1, 2024
1 parent b8e0f3e commit 924ad19
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 41 deletions.
81 changes: 58 additions & 23 deletions llvm/lib/Target/X86/X86InstrCompiler.td
Original file line number Diff line number Diff line change
Expand Up @@ -1493,27 +1493,71 @@ def : Pat<(xor GR32:$src1, -2147483648),

// Odd encoding trick: -128 fits into an 8-bit immediate field while
// +128 doesn't, so in this special case use a sub instead of an add.
def : Pat<(add GR16:$src1, 128),
(SUB16ri GR16:$src1, -128)>;
let Predicates = [NoNDD] in {
def : Pat<(add GR16:$src1, 128),
(SUB16ri GR16:$src1, -128)>;
def : Pat<(add GR32:$src1, 128),
(SUB32ri GR32:$src1, -128)>;
def : Pat<(add GR64:$src1, 128),
(SUB64ri32 GR64:$src1, -128)>;

def : Pat<(X86add_flag_nocf GR16:$src1, 128),
(SUB16ri GR16:$src1, -128)>;
def : Pat<(X86add_flag_nocf GR32:$src1, 128),
(SUB32ri GR32:$src1, -128)>;
def : Pat<(X86add_flag_nocf GR64:$src1, 128),
(SUB64ri32 GR64:$src1, -128)>;
}
let Predicates = [HasNDD] in {
def : Pat<(add GR16:$src1, 128),
(SUB16ri_ND GR16:$src1, -128)>;
def : Pat<(add GR32:$src1, 128),
(SUB32ri_ND GR32:$src1, -128)>;
def : Pat<(add GR64:$src1, 128),
(SUB64ri32_ND GR64:$src1, -128)>;

def : Pat<(X86add_flag_nocf GR16:$src1, 128),
(SUB16ri_ND GR16:$src1, -128)>;
def : Pat<(X86add_flag_nocf GR32:$src1, 128),
(SUB32ri_ND GR32:$src1, -128)>;
def : Pat<(X86add_flag_nocf GR64:$src1, 128),
(SUB64ri32_ND GR64:$src1, -128)>;
}
def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),
(SUB16mi addr:$dst, -128)>;

def : Pat<(add GR32:$src1, 128),
(SUB32ri GR32:$src1, -128)>;
def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
(SUB32mi addr:$dst, -128)>;

def : Pat<(add GR64:$src1, 128),
(SUB64ri32 GR64:$src1, -128)>;
def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),
(SUB64mi32 addr:$dst, -128)>;
let Predicates = [HasNDD] in {
def : Pat<(add (loadi16 addr:$src), 128),
(SUB16mi_ND addr:$src, -128)>;
def : Pat<(add (loadi32 addr:$src), 128),
(SUB32mi_ND addr:$src, -128)>;
def : Pat<(add (loadi64 addr:$src), 128),
(SUB64mi32_ND addr:$src, -128)>;
}

def : Pat<(X86add_flag_nocf GR16:$src1, 128),
(SUB16ri GR16:$src1, -128)>;
def : Pat<(X86add_flag_nocf GR32:$src1, 128),
(SUB32ri GR32:$src1, -128)>;
def : Pat<(X86add_flag_nocf GR64:$src1, 128),
(SUB64ri32 GR64:$src1, -128)>;
// The same trick applies for 32-bit immediate fields in 64-bit
// instructions.
let Predicates = [NoNDD] in {
def : Pat<(add GR64:$src1, 0x0000000080000000),
(SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
def : Pat<(X86add_flag_nocf GR64:$src1, 0x0000000080000000),
(SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
}
let Predicates = [HasNDD] in {
def : Pat<(add GR64:$src1, 0x0000000080000000),
(SUB64ri32_ND GR64:$src1, 0xffffffff80000000)>;
def : Pat<(X86add_flag_nocf GR64:$src1, 0x0000000080000000),
(SUB64ri32_ND GR64:$src1, 0xffffffff80000000)>;
}
def : Pat<(store (add (loadi64 addr:$dst), 0x0000000080000000), addr:$dst),
(SUB64mi32 addr:$dst, 0xffffffff80000000)>;
let Predicates = [HasNDD] in {
def : Pat<(add(loadi64 addr:$src), 0x0000000080000000),
(SUB64mi32_ND addr:$src, 0xffffffff80000000)>;
}

// Depositing value to 8/16 bit subreg:
def : Pat<(or (and GR64:$dst, -256),
Expand All @@ -1532,15 +1576,6 @@ def : Pat<(or (and GR32:$dst, -65536),
(i32 (zextloadi16 addr:$src))),
(INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>;

// The same trick applies for 32-bit immediate fields in 64-bit
// instructions.
def : Pat<(add GR64:$src1, 0x0000000080000000),
(SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
def : Pat<(store (add (loadi64 addr:$dst), 0x0000000080000000), addr:$dst),
(SUB64mi32 addr:$dst, 0xffffffff80000000)>;
def : Pat<(X86add_flag_nocf GR64:$src1, 0x0000000080000000),
(SUB64ri32 GR64:$src1, 0xffffffff80000000)>;

// To avoid needing to materialize an immediate in a register, use a 32-bit and
// with implicit zero-extension instead of a 64-bit and if the immediate has at
// least 32 bits of leading zeros. If in addition the last 32 bits can be
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/CodeGen/X86/apx/sub.ll
Original file line number Diff line number Diff line change
Expand Up @@ -89,31 +89,31 @@ entry:
define i16 @sub16ri8(i16 noundef %a) {
; CHECK-LABEL: sub16ri8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addl $-123, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xc7,0x85]
; CHECK-NEXT: subl $-128, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xef,0x80]
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%sub = sub i16 %a, 123
%sub = sub i16 %a, -128

This comment has been minimized.

Copy link
@phoebewang

phoebewang Mar 1, 2024

Contributor

The pattern is for add 128, why testing fro sub -128?

This comment has been minimized.

Copy link
@KanRobert

KanRobert Mar 1, 2024

Author Contributor

For the patterns defined in TD, the add is the name of the node instead of the op in IR.

We always use add node for sub op.

Combining: t6: i64 = sub nsw t4, Constant:i64<-2147483648>
Creating constant: t11: i64 = Constant<2147483648>
Creating new node: t12: i64 = add t4, Constant:i64<2147483648>
 ... into: t12: i64 = add t4, Constant:i64<2147483648>
ret i16 %sub
}

define i32 @sub32ri8(i32 noundef %a) {
; CHECK-LABEL: sub32ri8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addl $-123, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xc7,0x85]
; CHECK-NEXT: subl $-128, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xef,0x80]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%sub = sub i32 %a, 123
%sub = sub i32 %a, -128
ret i32 %sub
}

define i64 @sub64ri8(i64 noundef %a) {
; CHECK-LABEL: sub64ri8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addq $-123, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0xc7,0x85]
; CHECK-NEXT: subq $-128, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0xef,0x80]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%sub = sub i64 %a, 123
%sub = sub i64 %a, -128
ret i64 %sub
}

Expand Down Expand Up @@ -153,11 +153,11 @@ entry:
define i64 @sub64ri(i64 noundef %a) {
; CHECK-LABEL: sub64ri:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addq $-123456, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x81,0xc7,0xc0,0x1d,0xfe,0xff]
; CHECK-NEXT: # imm = 0xFFFE1DC0
; CHECK-NEXT: subq $-2147483648, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x81,0xef,0x00,0x00,0x00,0x80]
; CHECK-NEXT: # imm = 0x80000000
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%sub = sub i64 %a, 123456
%sub = sub i64 %a, -2147483648
ret i64 %sub
}

Expand Down Expand Up @@ -211,34 +211,34 @@ define i16 @sub16mi8(ptr %a) {
; CHECK-LABEL: sub16mi8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07]
; CHECK-NEXT: addl $-123, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xc0,0x85]
; CHECK-NEXT: subl $-128, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xe8,0x80]
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%t= load i16, ptr %a
%sub = sub nsw i16 %t, 123
%sub = sub nsw i16 %t, -128
ret i16 %sub
}

define i32 @sub32mi8(ptr %a) {
; CHECK-LABEL: sub32mi8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addl $-123, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0x07,0x85]
; CHECK-NEXT: subl $-128, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0x2f,0x80]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%t= load i32, ptr %a
%sub = sub nsw i32 %t, 123
%sub = sub nsw i32 %t, -128
ret i32 %sub
}

define i64 @sub64mi8(ptr %a) {
; CHECK-LABEL: sub64mi8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addq $-123, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0x07,0x85]
; CHECK-NEXT: subq $-128, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0x2f,0x80]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%t= load i64, ptr %a
%sub = sub nsw i64 %t, 123
%sub = sub nsw i64 %t, -128
ret i64 %sub
}

Expand Down Expand Up @@ -282,12 +282,12 @@ entry:
define i64 @sub64mi(ptr %a) {
; CHECK-LABEL: sub64mi:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addq $-123456, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x81,0x07,0xc0,0x1d,0xfe,0xff]
; CHECK-NEXT: # imm = 0xFFFE1DC0
; CHECK-NEXT: subq $-2147483648, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x81,0x2f,0x00,0x00,0x00,0x80]
; CHECK-NEXT: # imm = 0x80000000
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%t= load i64, ptr %a
%sub = sub nsw i64 %t, 123456
%sub = sub nsw i64 %t, -2147483648
ret i64 %sub
}

Expand Down

0 comments on commit 924ad19

Please sign in to comment.