Skip to content

Commit

Permalink
[ISel] Add pattern matching for depositing subreg value (#75978)
Browse files Browse the repository at this point in the history
Depositing value into the lowest byte/word is a common code pattern.
This patch improves the code generation for it to avoid redundant AND
and OR operations.
  • Loading branch information
david-xl committed Dec 21, 2023
1 parent 6a870cc commit f44079d
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 0 deletions.
17 changes: 17 additions & 0 deletions llvm/lib/Target/X86/X86InstrCompiler.td
Original file line number Diff line number Diff line change
Expand Up @@ -1515,6 +1515,23 @@ def : Pat<(X86add_flag_nocf GR32:$src1, 128),
def : Pat<(X86add_flag_nocf GR64:$src1, 128),
(SUB64ri32 GR64:$src1, -128)>;

// Depositing value to 8/16 bit subreg:
def : Pat<(or (and GR64:$dst, -256),
(i64 (zextloadi8 addr:$src))),
(INSERT_SUBREG (i64 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>;

def : Pat<(or (and GR32:$dst, -256),
(i32 (zextloadi8 addr:$src))),
(INSERT_SUBREG (i32 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>;

def : Pat<(or (and GR64:$dst, -65536),
(i64 (zextloadi16 addr:$src))),
(INSERT_SUBREG (i64 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>;

def : Pat<(or (and GR32:$dst, -65536),
(i32 (zextloadi16 addr:$src))),
(INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>;

// The same trick applies for 32-bit immediate fields in 64-bit
// instructions.
def : Pat<(add GR64:$src1, 0x0000000080000000),
Expand Down
93 changes: 93 additions & 0 deletions llvm/test/CodeGen/X86/insert.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc < %s -mtriple=i386-unknown-unknown | FileCheck %s --check-prefixes=X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64

define i64 @sub8(i64 noundef %res, ptr %byte) {
; X86-LABEL: sub8:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movb (%ecx), %al
; X86-NEXT: retl
;
; X64-LABEL: sub8:
; X64: # %bb.0: # %entry
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: movb (%rsi), %al
; X64-NEXT: retq
entry:
%and = and i64 %res, -256
%d = load i8, ptr %byte, align 1
%conv2 = zext i8 %d to i64
%or = or i64 %and, %conv2
ret i64 %or
}

define i64 @sub16(i64 noundef %res, ptr %byte) {
; X86-LABEL: sub16:
; X86: # %bb.0: # %entry
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shll $16, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl (%eax), %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: sub16:
; X64: # %bb.0: # %entry
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: movw (%rsi), %ax
; X64-NEXT: retq
entry:
%and = and i64 %res, -65536
%d = load i16, ptr %byte, align 1
%conv2 = zext i16 %d to i64
%or = or i64 %and, %conv2
ret i64 %or
}

define i32 @sub8_32(i32 noundef %res, ptr %byte) {
; X86-LABEL: sub8_32:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movb (%ecx), %al
; X86-NEXT: retl
;
; X64-LABEL: sub8_32:
; X64: # %bb.0: # %entry
; X64-NEXT: movl %edi, %eax
; X64-NEXT: movb (%rsi), %al
; X64-NEXT: retq
entry:
%and = and i32 %res, -256
%d = load i8, ptr %byte, align 1
%conv2 = zext i8 %d to i32
%or = or i32 %and, %conv2
ret i32 %or
}

define i32 @sub16_32(i32 noundef %res, ptr %byte) {
; X86-LABEL: sub16_32:
; X86: # %bb.0: # %entry
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shll $16, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl (%eax), %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: sub16_32:
; X64: # %bb.0: # %entry
; X64-NEXT: movl %edi, %eax
; X64-NEXT: movw (%rsi), %ax
; X64-NEXT: retq
entry:
%and = and i32 %res, -65536
%d = load i16, ptr %byte, align 1
%conv2 = zext i16 %d to i32
%or = or i32 %and, %conv2
ret i32 %or
}

0 comments on commit f44079d

Please sign in to comment.