diff --git a/llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll b/llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll index 1347026e3e17b..b31e864b67695 100644 --- a/llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll +++ b/llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-linux-gnu < %s \ ; RUN: | FileCheck --check-prefix=CHECK-LE %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-linux-gnu -mattr=+vsx < %s \ @@ -8,36 +9,127 @@ ; RUN: | FileCheck --check-prefix=CHECK-P9BE %s define double @splat_swap(<2 x double> %x, <2 x double> %y) nounwind { - %added = fadd <2 x double> %x, %y - %call = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %added) nounwind readnone - %res1 = extractelement <2 x double> %call, i32 0 - %res2 = extractelement <2 x double> %call, i32 1 - %ret = fsub double %res1, %res2 - ret double %ret - ; CHECK-LE-LABEL: splat_swap: -; CHECK-LE: xxmrghd [[XREG1:[0-9]+]], [[XREG1]], [[XREG2:[0-9]+]] -; CHECK-LE-NEXT: xxswapd [[XREG2]], [[XREG1]] -; CHECK-LE-NEXT: xssubdp [[XREG2]], [[XREG2]], [[XREG1]] -; CHECK-LE-NEXT: addi [[REG1:[0-9]+]], [[REG1]], {{[0-9]+}} +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: mflr 0 +; CHECK-LE-NEXT: std 0, 16(1) +; CHECK-LE-NEXT: stdu 1, -80(1) +; CHECK-LE-NEXT: li 3, 64 +; CHECK-LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill +; CHECK-LE-NEXT: xvadddp 63, 34, 35 +; CHECK-LE-NEXT: xxlor 1, 63, 63 +; CHECK-LE-NEXT: bl rint +; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: xxswapd 0, 63 +; CHECK-LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; CHECK-LE-NEXT: li 3, 48 +; CHECK-LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill +; CHECK-LE-NEXT: fmr 1, 0 +; CHECK-LE-NEXT: bl rint +; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: li 3, 48 +; CHECK-LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; CHECK-LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload +; CHECK-LE-NEXT: li 3, 64 +; CHECK-LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; CHECK-LE-NEXT: xxmrghd 0, 0, 1 +; CHECK-LE-NEXT: xxswapd 1, 0 +; CHECK-LE-NEXT: xssubdp 1, 1, 0 +; CHECK-LE-NEXT: addi 1, 1, 80 +; CHECK-LE-NEXT: ld 0, 16(1) +; CHECK-LE-NEXT: mtlr 0 +; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: splat_swap: -; CHECK-BE: xxmrghd [[XREG1:[0-9]+]], [[XREG1]], [[XREG2:[0-9]+]] -; CHECK-BE-NEXT: xxswapd [[XREG2]], [[XREG1]] -; CHECK-BE-NEXT: xssubdp [[XREG2]], [[XREG1]], [[XREG2]] -; CHECK-BE-NEXT: addi [[REG1:[0-9]+]], [[REG1]], {{[0-9]+}} +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: mflr 0 +; CHECK-BE-NEXT: std 0, 16(1) +; CHECK-BE-NEXT: stdu 1, -160(1) +; CHECK-BE-NEXT: li 3, 144 +; CHECK-BE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill +; CHECK-BE-NEXT: xvadddp 63, 34, 35 +; CHECK-BE-NEXT: xxlor 1, 63, 63 +; CHECK-BE-NEXT: bl rint +; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; CHECK-BE-NEXT: li 3, 128 +; CHECK-BE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill +; CHECK-BE-NEXT: xxswapd 1, 63 +; CHECK-BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-BE-NEXT: bl rint +; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: li 3, 128 +; CHECK-BE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload +; CHECK-BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; CHECK-BE-NEXT: li 3, 144 +; CHECK-BE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; CHECK-BE-NEXT: xxmrghd 0, 0, 1 +; CHECK-BE-NEXT: xxswapd 1, 0 +; CHECK-BE-NEXT: xssubdp 1, 0, 1 +; CHECK-BE-NEXT: addi 1, 1, 160 +; CHECK-BE-NEXT: ld 0, 16(1) +; CHECK-BE-NEXT: mtlr 0 +; CHECK-BE-NEXT: blr ; ; CHECK-P9LE-LABEL: splat_swap: -; CHECK-P9LE-DAG: xxmrghd [[XREG1:[0-9]+]], [[XREG1]], [[XREG2:[0-9]+]] -; CHECK-P9LE: xxswapd [[XREG2]], [[XREG1]] -; CHECK-P9LE-NEXT: xssubdp [[XREG2]], [[XREG2]], [[XREG1]] -; CHECK-P9LE-NEXT: addi [[REG1:[0-9]+]], [[REG1]], {{[0-9]+}} +; CHECK-P9LE: # %bb.0: +; CHECK-P9LE-NEXT: mflr 0 +; CHECK-P9LE-NEXT: std 0, 16(1) +; CHECK-P9LE-NEXT: stdu 1, -64(1) +; CHECK-P9LE-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; CHECK-P9LE-NEXT: xvadddp 63, 34, 35 +; CHECK-P9LE-NEXT: xscpsgndp 1, 63, 63 +; CHECK-P9LE-NEXT: bl rint +; CHECK-P9LE-NEXT: nop +; CHECK-P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; CHECK-P9LE-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; CHECK-P9LE-NEXT: xxswapd 1, 63 +; CHECK-P9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-P9LE-NEXT: bl rint +; CHECK-P9LE-NEXT: nop +; CHECK-P9LE-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; CHECK-P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; CHECK-P9LE-NEXT: xxmrghd 0, 0, 1 +; CHECK-P9LE-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; CHECK-P9LE-NEXT: xxswapd 1, 0 +; CHECK-P9LE-NEXT: xssubdp 1, 1, 0 +; CHECK-P9LE-NEXT: addi 1, 1, 64 +; CHECK-P9LE-NEXT: ld 0, 16(1) +; CHECK-P9LE-NEXT: mtlr 0 +; CHECK-P9LE-NEXT: blr ; ; CHECK-P9BE-LABEL: splat_swap: -; CHECK-P9BE-DAG: xxmrghd [[XREG1:[0-9]+]], [[XREG1]], [[XREG2:[0-9]+]] -; CHECK-P9BE: xxswapd [[XREG2]], [[XREG1]] -; CHECK-P9BE-NEXT: xssubdp [[XREG2]], [[XREG1]], [[XREG2]] -; CHECK-P9BE-NEXT: addi [[REG1:[0-9]+]], [[REG1]], {{[0-9]+}} +; CHECK-P9BE: # %bb.0: +; CHECK-P9BE-NEXT: mflr 0 +; CHECK-P9BE-NEXT: std 0, 16(1) +; CHECK-P9BE-NEXT: stdu 1, -144(1) +; CHECK-P9BE-NEXT: stxv 63, 128(1) # 16-byte Folded Spill +; CHECK-P9BE-NEXT: xvadddp 63, 34, 35 +; CHECK-P9BE-NEXT: xscpsgndp 1, 63, 63 +; CHECK-P9BE-NEXT: bl rint +; CHECK-P9BE-NEXT: nop +; CHECK-P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; CHECK-P9BE-NEXT: stxv 1, 112(1) # 16-byte Folded Spill +; CHECK-P9BE-NEXT: xxswapd 1, 63 +; CHECK-P9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-P9BE-NEXT: bl rint +; CHECK-P9BE-NEXT: nop +; CHECK-P9BE-NEXT: lxv 0, 112(1) # 16-byte Folded Reload +; CHECK-P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; CHECK-P9BE-NEXT: xxmrghd 0, 0, 1 +; CHECK-P9BE-NEXT: lxv 63, 128(1) # 16-byte Folded Reload +; CHECK-P9BE-NEXT: xxswapd 1, 0 +; CHECK-P9BE-NEXT: xssubdp 1, 0, 1 +; CHECK-P9BE-NEXT: addi 1, 1, 144 +; CHECK-P9BE-NEXT: ld 0, 16(1) +; CHECK-P9BE-NEXT: mtlr 0 +; CHECK-P9BE-NEXT: blr + %added = fadd <2 x double> %x, %y + %call = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %added) nounwind readnone + %res1 = extractelement <2 x double> %call, i32 0 + %res2 = extractelement <2 x double> %call, i32 1 + %ret = fsub double %res1, %res2 + ret double %ret } declare <2 x double> @llvm.rint.v2f64(<2 x double>)