Skip to content

Commit

Permalink
[X86] Add tests showing failure to combine consecutive loads + FSHL i…
Browse files Browse the repository at this point in the history
…nto a single load

Similar to some of the regressions seen in D75114
  • Loading branch information
RKSimon committed Mar 4, 2020
1 parent 5b1e5b4 commit 4c411d2
Showing 1 changed file with 151 additions and 0 deletions.
151 changes: 151 additions & 0 deletions llvm/test/CodeGen/X86/fshl.ll
Expand Up @@ -524,6 +524,157 @@ define i64 @const_shift_i64(i64 %x, i64 %y) nounwind {
ret i64 %tmp
}

;
; Combine Consecutive Loads
;

define i8 @combine_fshl_load_i8(i8* %p) nounwind {
; X86-LABEL: combine_fshl_load_i8:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movb 1(%eax), %al
; X86-NEXT: retl
;
; X64-LABEL: combine_fshl_load_i8:
; X64: # %bb.0:
; X64-NEXT: movb 1(%rdi), %al
; X64-NEXT: retq
%p1 = getelementptr i8, i8* %p, i32 1
%ld0 = load i8, i8 *%p
%ld1 = load i8, i8 *%p1
%res = call i8 @llvm.fshl.i8(i8 %ld1, i8 %ld0, i8 8)
ret i8 %res
}

define i16 @combine_fshl_load_i16(i16* %p) nounwind {
; X86-FAST-LABEL: combine_fshl_load_i16:
; X86-FAST: # %bb.0:
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-FAST-NEXT: movzwl (%eax), %ecx
; X86-FAST-NEXT: movzwl 2(%eax), %eax
; X86-FAST-NEXT: shldw $8, %cx, %ax
; X86-FAST-NEXT: retl
;
; X86-SLOW-LABEL: combine_fshl_load_i16:
; X86-SLOW: # %bb.0:
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SLOW-NEXT: movzwl 2(%ecx), %eax
; X86-SLOW-NEXT: movzbl 1(%ecx), %ecx
; X86-SLOW-NEXT: shll $8, %eax
; X86-SLOW-NEXT: orl %ecx, %eax
; X86-SLOW-NEXT: # kill: def $ax killed $ax killed $eax
; X86-SLOW-NEXT: retl
;
; X64-FAST-LABEL: combine_fshl_load_i16:
; X64-FAST: # %bb.0:
; X64-FAST-NEXT: movzwl (%rdi), %ecx
; X64-FAST-NEXT: movzwl 2(%rdi), %eax
; X64-FAST-NEXT: shldw $8, %cx, %ax
; X64-FAST-NEXT: retq
;
; X64-SLOW-LABEL: combine_fshl_load_i16:
; X64-SLOW: # %bb.0:
; X64-SLOW-NEXT: movzwl 2(%rdi), %eax
; X64-SLOW-NEXT: movzbl 1(%rdi), %ecx
; X64-SLOW-NEXT: shll $8, %eax
; X64-SLOW-NEXT: orl %ecx, %eax
; X64-SLOW-NEXT: # kill: def $ax killed $ax killed $eax
; X64-SLOW-NEXT: retq
%p0 = getelementptr i16, i16* %p, i32 0
%p1 = getelementptr i16, i16* %p, i32 1
%ld0 = load i16, i16 *%p0
%ld1 = load i16, i16 *%p1
%res = call i16 @llvm.fshl.i16(i16 %ld1, i16 %ld0, i16 8)
ret i16 %res
}

define i32 @combine_fshl_load_i32(i32* %p) nounwind {
; X86-FAST-LABEL: combine_fshl_load_i32:
; X86-FAST: # %bb.0:
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-FAST-NEXT: movl 8(%eax), %ecx
; X86-FAST-NEXT: movl 12(%eax), %eax
; X86-FAST-NEXT: shldl $8, %ecx, %eax
; X86-FAST-NEXT: retl
;
; X86-SLOW-LABEL: combine_fshl_load_i32:
; X86-SLOW: # %bb.0:
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SLOW-NEXT: movl 11(%eax), %eax
; X86-SLOW-NEXT: retl
;
; X64-FAST-LABEL: combine_fshl_load_i32:
; X64-FAST: # %bb.0:
; X64-FAST-NEXT: movl 8(%rdi), %ecx
; X64-FAST-NEXT: movl 12(%rdi), %eax
; X64-FAST-NEXT: shldl $8, %ecx, %eax
; X64-FAST-NEXT: retq
;
; X64-SLOW-LABEL: combine_fshl_load_i32:
; X64-SLOW: # %bb.0:
; X64-SLOW-NEXT: movl 11(%rdi), %eax
; X64-SLOW-NEXT: retq
%p0 = getelementptr i32, i32* %p, i32 2
%p1 = getelementptr i32, i32* %p, i32 3
%ld0 = load i32, i32 *%p0
%ld1 = load i32, i32 *%p1
%res = call i32 @llvm.fshl.i32(i32 %ld1, i32 %ld0, i32 8)
ret i32 %res
}

define i64 @combine_fshl_load_i64(i64* %p) nounwind {
; X86-FAST-LABEL: combine_fshl_load_i64:
; X86-FAST: # %bb.0:
; X86-FAST-NEXT: pushl %esi
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-FAST-NEXT: movl 12(%ecx), %eax
; X86-FAST-NEXT: movl 16(%ecx), %esi
; X86-FAST-NEXT: movl 20(%ecx), %edx
; X86-FAST-NEXT: shldl $24, %esi, %edx
; X86-FAST-NEXT: shrdl $8, %esi, %eax
; X86-FAST-NEXT: popl %esi
; X86-FAST-NEXT: retl
;
; X86-SLOW-LABEL: combine_fshl_load_i64:
; X86-SLOW: # %bb.0:
; X86-SLOW-NEXT: pushl %esi
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SLOW-NEXT: movl 20(%eax), %edx
; X86-SLOW-NEXT: movl 12(%eax), %ecx
; X86-SLOW-NEXT: movl 16(%eax), %esi
; X86-SLOW-NEXT: shrl $8, %ecx
; X86-SLOW-NEXT: movl %esi, %eax
; X86-SLOW-NEXT: shll $24, %eax
; X86-SLOW-NEXT: orl %ecx, %eax
; X86-SLOW-NEXT: shrl $8, %esi
; X86-SLOW-NEXT: shll $24, %edx
; X86-SLOW-NEXT: orl %esi, %edx
; X86-SLOW-NEXT: popl %esi
; X86-SLOW-NEXT: retl
;
; X64-FAST-LABEL: combine_fshl_load_i64:
; X64-FAST: # %bb.0:
; X64-FAST-NEXT: movq 8(%rdi), %rcx
; X64-FAST-NEXT: movq 16(%rdi), %rax
; X64-FAST-NEXT: shldq $24, %rcx, %rax
; X64-FAST-NEXT: retq
;
; X64-SLOW-LABEL: combine_fshl_load_i64:
; X64-SLOW: # %bb.0:
; X64-SLOW-NEXT: movq 8(%rdi), %rcx
; X64-SLOW-NEXT: movq 16(%rdi), %rax
; X64-SLOW-NEXT: shrq $40, %rcx
; X64-SLOW-NEXT: shlq $24, %rax
; X64-SLOW-NEXT: orq %rcx, %rax
; X64-SLOW-NEXT: retq
%p0 = getelementptr i64, i64* %p, i64 1
%p1 = getelementptr i64, i64* %p, i64 2
%ld0 = load i64, i64 *%p0
%ld1 = load i64, i64 *%p1
%res = call i64 @llvm.fshl.i64(i64 %ld1, i64 %ld0, i64 24)
ret i64 %res
}

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
Expand Down

0 comments on commit 4c411d2

Please sign in to comment.