Skip to content

Commit

Permalink
[X86] Remove some InstRWs for plain store instructions on Sandy Bridge.
Browse files Browse the repository at this point in the history
We were forcing the latency of these instructions to 5 cycles, but every other scheduler model had them as 1 cycle. I'm sure I didn't get everything, but this gets a big portion.

llvm-svn: 329339
  • Loading branch information
topperc committed Apr 5, 2018
1 parent 650fd6c commit c6bb36a
Show file tree
Hide file tree
Showing 18 changed files with 165 additions and 201 deletions.
3 changes: 0 additions & 3 deletions llvm/lib/Target/X86/X86SchedBroadwell.td
Original file line number Diff line number Diff line change
Expand Up @@ -663,9 +663,6 @@ def: InstRW<[BWWriteResGroup10], (instregex "FBSTPm",
"MMX_MOVD64mr",
"MMX_MOVNTQmr",
"MMX_MOVQ64mr",
"MOV(16|32|64)mr",
"MOV8mi",
"MOV8mr",
"MOVNTI_64mr",
"MOVNTImr",
"ST_FP32m",
Expand Down
3 changes: 0 additions & 3 deletions llvm/lib/Target/X86/X86SchedHaswell.td
Original file line number Diff line number Diff line change
Expand Up @@ -633,9 +633,6 @@ def: InstRW<[HWWriteResGroup1], (instregex "FBSTPm",
"MMX_MOVD64mr",
"MMX_MOVNTQmr",
"MMX_MOVQ64mr",
"MOV(16|32|64)mr",
"MOV8mi",
"MOV8mr",
"MOVNTI_64mr",
"MOVNTImr",
"ST_FP32m",
Expand Down
33 changes: 5 additions & 28 deletions llvm/lib/Target/X86/X86SchedSandyBridge.td
Original file line number Diff line number Diff line change
Expand Up @@ -925,30 +925,8 @@ def SBWriteResGroup33 : SchedWriteRes<[SBPort4,SBPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[SBWriteResGroup33], (instregex "MOV(8|16|32|64)mr",
"MOVNTI_64mr",
"MOVNTImr",
"PUSH64i8",
"PUSH(16|32|64)r",
"VEXTRACTF128mr",
"(V?)MOVAPD(Y?)mr",
"(V?)MOVAPS(Y?)mr",
"(V?)MOVDQA(Y?)mr",
"(V?)MOVDQU(Y?)mr",
"(V?)MOVHPDmr",
"(V?)MOVHPSmr",
"(V?)MOVLPDmr",
"(V?)MOVLPSmr",
"(V?)MOVNTDQ(Y?)mr",
"(V?)MOVNTPD(Y?)mr",
"(V?)MOVNTPS(Y?)mr",
"(V?)MOVPDI2DImr",
"(V?)MOVPQI2QImr",
"(V?)MOVPQIto64mr",
"(V?)MOVSDmr",
"(V?)MOVSSmr",
"(V?)MOVUPD(Y?)mr",
"(V?)MOVUPS(Y?)mr")>;
def: InstRW<[SBWriteResGroup33], (instregex "PUSH64i8",
"PUSH(16|32|64)r")>;

def SBWriteResGroup34 : SchedWriteRes<[SBPort0,SBPort15]> {
let Latency = 7;
Expand Down Expand Up @@ -998,7 +976,7 @@ def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPD(Y?)mr",
"VMASKMOVPS(Y?)mr")>;

def SBWriteResGroup38 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
let Latency = 5;
let Latency = 2;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
Expand All @@ -1018,8 +996,7 @@ def SBWriteResGroup40 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
def: InstRW<[SBWriteResGroup40], (instregex "MOV8mi",
"STOSB",
def: InstRW<[SBWriteResGroup40], (instregex "STOSB",
"STOSL",
"STOSQ",
"STOSW")>;
Expand All @@ -1039,7 +1016,7 @@ def SBWriteResGroup42 : SchedWriteRes<[SBPort05,SBPort015]> {
def: InstRW<[SBWriteResGroup42], (instregex "CMPXCHG(8|16|32|64)rr")>;

def SBWriteResGroup43 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
let Latency = 5;
let Latency = 3;
let NumMicroOps = 4;
let ResourceCycles = [1,1,2];
}
Expand Down
2 changes: 0 additions & 2 deletions llvm/lib/Target/X86/X86SchedSkylakeClient.td
Original file line number Diff line number Diff line change
Expand Up @@ -667,8 +667,6 @@ def: InstRW<[SKLWriteResGroup11], (instregex "FBSTPm",
"MMX_MOVD64mr",
"MMX_MOVNTQmr",
"MMX_MOVQ64mr",
"MOV(8|16|32|64)mr",
"MOV8mi",
"MOVNTI_64mr",
"MOVNTImr",
"ST_FP32m",
Expand Down
3 changes: 0 additions & 3 deletions llvm/lib/Target/X86/X86SchedSkylakeServer.td
Original file line number Diff line number Diff line change
Expand Up @@ -1331,9 +1331,6 @@ def: InstRW<[SKXWriteResGroup11], (instregex "FBSTPm",
"MMX_MOVD64mr",
"MMX_MOVNTQmr",
"MMX_MOVQ64mr",
"MOV(16|32|64)mr",
"MOV8mi",
"MOV8mr",
"MOVAPDmr",
"MOVAPSmr",
"MOVDQAmr",
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/CodeGen/X86/avx-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1662,14 +1662,14 @@ define <4 x float> @test_extractf128(<8 x float> %a0, <8 x float> %a1, <4 x floa
; GENERIC-LABEL: test_extractf128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [5:1.00]
; GENERIC-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_extractf128:
; SANDY: # %bb.0:
; SANDY-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
; SANDY-NEXT: vzeroupper # sched: [100:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
Expand Down Expand Up @@ -2526,14 +2526,14 @@ define <4 x double> @test_movapd(<4 x double> *%a0, <4 x double> *%a1) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50]
; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vmovapd %ymm0, (%rsi) # sched: [5:1.00]
; GENERIC-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movapd:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50]
; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmovapd %ymm0, (%rsi) # sched: [5:1.00]
; SANDY-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_movapd:
Expand Down Expand Up @@ -2588,14 +2588,14 @@ define <8 x float> @test_movaps(<8 x float> *%a0, <8 x float> *%a1) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50]
; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vmovaps %ymm0, (%rsi) # sched: [5:1.00]
; GENERIC-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movaps:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50]
; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmovaps %ymm0, (%rsi) # sched: [5:1.00]
; SANDY-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_movaps:
Expand Down Expand Up @@ -2816,15 +2816,15 @@ define void @test_movntdq(<4 x i64> %a0, <4 x i64> *%a1) {
; GENERIC-LABEL: test_movntdq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vmovntdq %ymm0, (%rdi) # sched: [5:1.00]
; GENERIC-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movntdq:
; SANDY: # %bb.0:
; SANDY-NEXT: #APP
; SANDY-NEXT: vmovntdq %ymm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00]
; SANDY-NEXT: #NO_APP
; SANDY-NEXT: vzeroupper # sched: [100:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
Expand Down Expand Up @@ -2883,13 +2883,13 @@ define <4 x double> @test_movntpd(<4 x double> %a0, <4 x double> *%a1) {
; GENERIC-LABEL: test_movntpd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vmovntpd %ymm0, (%rdi) # sched: [5:1.00]
; GENERIC-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movntpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmovntpd %ymm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_movntpd:
Expand Down Expand Up @@ -2936,13 +2936,13 @@ define <8 x float> @test_movntps(<8 x float> %a0, <8 x float> *%a1) {
; GENERIC-LABEL: test_movntps:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vmovntps %ymm0, (%rdi) # sched: [5:1.00]
; GENERIC-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movntps:
; SANDY: # %bb.0:
; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmovntps %ymm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_movntps:
Expand Down Expand Up @@ -3116,16 +3116,16 @@ define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50]
; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vmovupd %ymm0, (%rsi) # sched: [5:1.00]
; GENERIC-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movupd:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:0.50]
; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [5:1.00]
; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [5:1.00]
; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [1:1.00]
; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_movupd:
Expand Down Expand Up @@ -3180,16 +3180,16 @@ define <8 x float> @test_movups(<8 x float> *%a0, <8 x float> *%a1) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50]
; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vmovups %ymm0, (%rsi) # sched: [5:1.00]
; GENERIC-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movups:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:0.50]
; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [5:1.00]
; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [5:1.00]
; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [1:1.00]
; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_movups:
Expand Down
22 changes: 11 additions & 11 deletions llvm/test/CodeGen/X86/avx512-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1937,7 +1937,7 @@ define void @f32tof64_loadstore() {
; GENERIC: # %bb.0: # %entry
; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
; GENERIC-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
; GENERIC-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: f32tof64_loadstore:
Expand All @@ -1960,7 +1960,7 @@ define void @f64tof32_loadstore() nounwind uwtable {
; GENERIC: # %bb.0: # %entry
; GENERIC-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
; GENERIC-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
; GENERIC-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
; GENERIC-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: f64tof32_loadstore:
Expand Down Expand Up @@ -5934,7 +5934,7 @@ define <4 x i32> @mov_test4(i32* %x) {
define void @mov_test5(float %x, float* %y) {
; GENERIC-LABEL: mov_test5:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovss %xmm0, (%rdi) # sched: [5:1.00]
; GENERIC-NEXT: vmovss %xmm0, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: mov_test5:
Expand All @@ -5948,7 +5948,7 @@ define void @mov_test5(float %x, float* %y) {
define void @mov_test6(double %x, double* %y) {
; GENERIC-LABEL: mov_test6:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovsd %xmm0, (%rdi) # sched: [5:1.00]
; GENERIC-NEXT: vmovsd %xmm0, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: mov_test6:
Expand Down Expand Up @@ -6943,8 +6943,8 @@ define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
define i8 @conv1(<8 x i1>* %R) {
; GENERIC-LABEL: conv1:
; GENERIC: # %bb.0: # %entry
; GENERIC-NEXT: movb $-1, (%rdi) # sched: [5:1.00]
; GENERIC-NEXT: movb $-2, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
; GENERIC-NEXT: movb $-1, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: movb $-2, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
; GENERIC-NEXT: movb $-2, %al # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
Expand Down Expand Up @@ -7512,7 +7512,7 @@ define void @f1(i32 %c) {
; GENERIC: # %bb.0: # %entry
; GENERIC-NEXT: movzbl {{.*}}(%rip), %edi # sched: [5:0.50]
; GENERIC-NEXT: xorl $1, %edi # sched: [1:0.33]
; GENERIC-NEXT: movb %dil, {{.*}}(%rip) # sched: [5:1.00]
; GENERIC-NEXT: movb %dil, {{.*}}(%rip) # sched: [1:1.00]
; GENERIC-NEXT: jmp f2 # TAILCALL
;
; SKX-LABEL: f1:
Expand All @@ -7536,7 +7536,7 @@ define void @store_i16_i1(i16 %x, i1 *%y) {
; GENERIC-LABEL: store_i16_i1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: andl $1, %edi # sched: [1:0.33]
; GENERIC-NEXT: movb %dil, (%rsi) # sched: [5:1.00]
; GENERIC-NEXT: movb %dil, (%rsi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: store_i16_i1:
Expand All @@ -7553,7 +7553,7 @@ define void @store_i8_i1(i8 %x, i1 *%y) {
; GENERIC-LABEL: store_i8_i1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: andl $1, %edi # sched: [1:0.33]
; GENERIC-NEXT: movb %dil, (%rsi) # sched: [5:1.00]
; GENERIC-NEXT: movb %dil, (%rsi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: store_i8_i1:
Expand Down Expand Up @@ -8698,7 +8698,7 @@ define <16 x float> @broadcast_ss_spill(float %x) {
; GENERIC-NEXT: subq $24, %rsp # sched: [1:0.33]
; GENERIC-NEXT: .cfi_def_cfa_offset 32
; GENERIC-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [5:1.00]
; GENERIC-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00]
; GENERIC-NEXT: callq func_f32
; GENERIC-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [5:1.00]
; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33]
Expand Down Expand Up @@ -8728,7 +8728,7 @@ define <8 x double> @broadcast_sd_spill(double %x) {
; GENERIC-NEXT: subq $24, %rsp # sched: [1:0.33]
; GENERIC-NEXT: .cfi_def_cfa_offset 32
; GENERIC-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [5:1.00]
; GENERIC-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00]
; GENERIC-NEXT: callq func_f64
; GENERIC-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [5:1.00]
; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33]
Expand Down
23 changes: 11 additions & 12 deletions llvm/test/CodeGen/X86/extractelement-legalization-store-ordering.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,22 @@ define void @test_extractelement_legalization_storereuse(<4 x i32> %a, i32* noca
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: paddd (%ecx), %xmm0
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK-NEXT: movdqa %xmm0, (%ecx)
; CHECK-NEXT: movl (%ecx), %esi
; CHECK-NEXT: movl 4(%ecx), %edi
; CHECK-NEXT: shll $4, %edx
; CHECK-NEXT: movl 8(%ecx), %ebx
; CHECK-NEXT: movl 12(%ecx), %ecx
; CHECK-NEXT: movl %esi, 12(%eax,%edx)
; CHECK-NEXT: movl %edi, (%eax,%edx)
; CHECK-NEXT: movl %ebx, 8(%eax,%edx)
; CHECK-NEXT: movl %ecx, 4(%eax,%edx)
; CHECK-NEXT: paddd (%edx), %xmm0
; CHECK-NEXT: movdqa %xmm0, (%edx)
; CHECK-NEXT: movl (%edx), %esi
; CHECK-NEXT: movl 4(%edx), %edi
; CHECK-NEXT: shll $4, %ecx
; CHECK-NEXT: movl 8(%edx), %ebx
; CHECK-NEXT: movl 12(%edx), %edx
; CHECK-NEXT: movl %esi, 12(%eax,%ecx)
; CHECK-NEXT: movl %edi, (%eax,%ecx)
; CHECK-NEXT: movl %ebx, 8(%eax,%ecx)
; CHECK-NEXT: movl %edx, 4(%eax,%ecx)
; CHECK-NEXT: popl %esi
; CHECK-NEXT: popl %edi
; CHECK-NEXT: popl %ebx
; CHECK-NEXT: retl
; CHECK-NEXT: ## -- End function
entry:
%0 = bitcast i32* %y to <4 x i32>*
%1 = load <4 x i32>, <4 x i32>* %0, align 16
Expand Down
Loading

0 comments on commit c6bb36a

Please sign in to comment.