diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 457833f8cc331..c77c77ee4a3ee 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1515,6 +1515,23 @@ def : Pat<(X86add_flag_nocf GR32:$src1, 128), def : Pat<(X86add_flag_nocf GR64:$src1, 128), (SUB64ri32 GR64:$src1, -128)>; +// Depositing value to 8/16 bit subreg: +def : Pat<(or (and GR64:$dst, -256), + (i64 (zextloadi8 addr:$src))), + (INSERT_SUBREG (i64 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>; + +def : Pat<(or (and GR32:$dst, -256), + (i32 (zextloadi8 addr:$src))), + (INSERT_SUBREG (i32 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>; + +def : Pat<(or (and GR64:$dst, -65536), + (i64 (zextloadi16 addr:$src))), + (INSERT_SUBREG (i64 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>; + +def : Pat<(or (and GR32:$dst, -65536), + (i32 (zextloadi16 addr:$src))), + (INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>; + // The same trick applies for 32-bit immediate fields in 64-bit // instructions. def : Pat<(add GR64:$src1, 0x0000000080000000), diff --git a/llvm/test/CodeGen/X86/insert.ll b/llvm/test/CodeGen/X86/insert.ll new file mode 100644 index 0000000000000..381de2ecaa164 --- /dev/null +++ b/llvm/test/CodeGen/X86/insert.ll @@ -0,0 +1,93 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s -mtriple=i386-unknown-unknown | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64 + +define i64 @sub8(i64 noundef %res, ptr %byte) { +; X86-LABEL: sub8: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb (%ecx), %al +; X86-NEXT: retl +; +; X64-LABEL: sub8: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movb (%rsi), %al +; X64-NEXT: retq +entry: + %and = and i64 %res, -256 + %d = load i8, ptr %byte, align 1 + %conv2 = zext i8 %d to i64 + %or = or i64 %and, %conv2 + ret i64 %or +} + +define i64 @sub16(i64 noundef %res, ptr %byte) { +; X86-LABEL: sub16: +; X86: # %bb.0: # %entry +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: shll $16, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl (%eax), %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: retl +; +; X64-LABEL: sub16: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movw (%rsi), %ax +; X64-NEXT: retq +entry: + %and = and i64 %res, -65536 + %d = load i16, ptr %byte, align 1 + %conv2 = zext i16 %d to i64 + %or = or i64 %and, %conv2 + ret i64 %or +} + +define i32 @sub8_32(i32 noundef %res, ptr %byte) { +; X86-LABEL: sub8_32: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb (%ecx), %al +; X86-NEXT: retl +; +; X64-LABEL: sub8_32: +; X64: # %bb.0: # %entry +; X64-NEXT: movl %edi, %eax +; X64-NEXT: movb (%rsi), %al +; X64-NEXT: retq +entry: + %and = and i32 %res, -256 + %d = load i8, ptr %byte, align 1 + %conv2 = zext i8 %d to i32 + %or = or i32 %and, %conv2 + ret i32 %or +} + +define i32 @sub16_32(i32 noundef %res, ptr %byte) { +; X86-LABEL: sub16_32: +; X86: # %bb.0: # %entry +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: shll $16, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl (%eax), %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: retl +; +; X64-LABEL: sub16_32: +; X64: # %bb.0: # %entry +; X64-NEXT: movl %edi, %eax +; X64-NEXT: movw (%rsi), %ax +; X64-NEXT: retq +entry: + %and = and i32 %res, -65536 + %d = load i16, ptr %byte, align 1 + %conv2 = zext i16 %d to i32 + %or = or i32 %and, %conv2 + ret i32 %or +}