Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Power9] Exploit vector integer extend instructions
This patch adds build vector patterns to exploit the vector integer extend instructions: vextsb2w - Vector Extend Sign Byte To Word vextsb2d - Vector Extend Sign Byte To Doubleword vextsh2w - Vector Extend Sign Halfword To Word vextsh2d - Vector Extend Sign Halfword To Doubleword vextsw2d - Vector Extend Sign Word To Doubleword Differential Revision: https://reviews.llvm.org/D33510 llvm-svn: 304992
- Loading branch information
Showing
2 changed files
with
141 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||
; RUN: llc -verify-machineinstrs -mcpu=pwr9 < %s | FileCheck %s -check-prefix=PWR9 | ||
target triple = "powerpc64le-unknown-linux-gnu" | ||
|
||
define <4 x i32> @vextsb2w(<16 x i8> %a) { | ||
; PWR9-LABEL: vextsb2w: | ||
; PWR9: # BB#0: # %entry | ||
; PWR9-NEXT: vextsb2w 2, 2 | ||
; PWR9-NEXT: blr | ||
entry: | ||
%vecext = extractelement <16 x i8> %a, i32 0 | ||
%conv = sext i8 %vecext to i32 | ||
%vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 | ||
%vecext1 = extractelement <16 x i8> %a, i32 4 | ||
%conv2 = sext i8 %vecext1 to i32 | ||
%vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1 | ||
%vecext4 = extractelement <16 x i8> %a, i32 8 | ||
%conv5 = sext i8 %vecext4 to i32 | ||
%vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2 | ||
%vecext7 = extractelement <16 x i8> %a, i32 12 | ||
%conv8 = sext i8 %vecext7 to i32 | ||
%vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3 | ||
ret <4 x i32> %vecinit9 | ||
} | ||
|
||
define <2 x i64> @vextsb2d(<16 x i8> %a) { | ||
; PWR9-LABEL: vextsb2d: | ||
; PWR9: # BB#0: # %entry | ||
; PWR9-NEXT: vextsb2d 2, 2 | ||
; PWR9-NEXT: blr | ||
entry: | ||
%vecext = extractelement <16 x i8> %a, i32 0 | ||
%conv = sext i8 %vecext to i64 | ||
%vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 | ||
%vecext1 = extractelement <16 x i8> %a, i32 8 | ||
%conv2 = sext i8 %vecext1 to i64 | ||
%vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 | ||
ret <2 x i64> %vecinit3 | ||
} | ||
|
||
define <4 x i32> @vextsh2w(<8 x i16> %a) { | ||
; PWR9-LABEL: vextsh2w: | ||
; PWR9: # BB#0: # %entry | ||
; PWR9-NEXT: vextsh2w 2, 2 | ||
; PWR9-NEXT: blr | ||
entry: | ||
%vecext = extractelement <8 x i16> %a, i32 0 | ||
%conv = sext i16 %vecext to i32 | ||
%vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 | ||
%vecext1 = extractelement <8 x i16> %a, i32 2 | ||
%conv2 = sext i16 %vecext1 to i32 | ||
%vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1 | ||
%vecext4 = extractelement <8 x i16> %a, i32 4 | ||
%conv5 = sext i16 %vecext4 to i32 | ||
%vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2 | ||
%vecext7 = extractelement <8 x i16> %a, i32 6 | ||
%conv8 = sext i16 %vecext7 to i32 | ||
%vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3 | ||
ret <4 x i32> %vecinit9 | ||
} | ||
|
||
define <2 x i64> @vextsh2d(<8 x i16> %a) { | ||
; PWR9-LABEL: vextsh2d: | ||
; PWR9: # BB#0: # %entry | ||
; PWR9-NEXT: vextsh2d 2, 2 | ||
; PWR9-NEXT: blr | ||
entry: | ||
%vecext = extractelement <8 x i16> %a, i32 0 | ||
%conv = sext i16 %vecext to i64 | ||
%vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 | ||
%vecext1 = extractelement <8 x i16> %a, i32 4 | ||
%conv2 = sext i16 %vecext1 to i64 | ||
%vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 | ||
ret <2 x i64> %vecinit3 | ||
} | ||
|
||
define <2 x i64> @vextsw2d(<4 x i32> %a) { | ||
; PWR9-LABEL: vextsw2d: | ||
; PWR9: # BB#0: # %entry | ||
; PWR9-NEXT: vextsw2d 2, 2 | ||
; PWR9-NEXT: blr | ||
entry: | ||
%vecext = extractelement <4 x i32> %a, i32 0 | ||
%conv = sext i32 %vecext to i64 | ||
%vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 | ||
%vecext1 = extractelement <4 x i32> %a, i32 2 | ||
%conv2 = sext i32 %vecext1 to i64 | ||
%vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 | ||
ret <2 x i64> %vecinit3 | ||
} |