Skip to content

Commit

Permalink
[PowerPC] Improve codegen for vector fp to int widening conversions
Browse files Browse the repository at this point in the history
We currently do not utilize instructions that convert single
precision vectors to doubleword integer vectors. These conversions
come up in code occasionally and this improvement allows us to
open code some functions that need to be added to altivec.h.
  • Loading branch information
nemanjai committed Apr 22, 2021
1 parent 28b6726 commit 092619c
Show file tree
Hide file tree
Showing 2 changed files with 160 additions and 0 deletions.
32 changes: 32 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrVSX.td
Expand Up @@ -2899,6 +2899,22 @@ def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
(f64 (fpextend (extractelt v4f32:$B, 3))))),
(v2f64 (XVCVSPDP (XXSLDWI (XXPERMDI $A, $B, 3),
(XXPERMDI $A, $B, 3), 1)))>;
def : Pat<(v2i64 (fp_to_sint
(build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
(f64 (fpextend (extractelt v4f32:$A, 2)))))),
(v2i64 (XVCVSPSXDS $A))>;
def : Pat<(v2i64 (fp_to_uint
(build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
(f64 (fpextend (extractelt v4f32:$A, 2)))))),
(v2i64 (XVCVSPUXDS $A))>;
def : Pat<(v2i64 (fp_to_sint
(build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
(f64 (fpextend (extractelt v4f32:$A, 3)))))),
(v2i64 (XVCVSPSXDS (XXSLDWI $A, $A, 1)))>;
def : Pat<(v2i64 (fp_to_uint
(build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
(f64 (fpextend (extractelt v4f32:$A, 3)))))),
(v2i64 (XVCVSPUXDS (XXSLDWI $A, $A, 1)))>;
def : Pat<WToDPExtractConv.BV02S,
(v2f64 (XVCVSXWDP $A))>;
def : Pat<WToDPExtractConv.BV13S,
Expand Down Expand Up @@ -3008,6 +3024,22 @@ def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
(f64 (fpextend (extractelt v4f32:$B, 3))))),
(v2f64 (XVCVSPDP (XXPERMDI $B, $A, 0)))>;
def : Pat<(v2i64 (fp_to_sint
(build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
(f64 (fpextend (extractelt v4f32:$A, 3)))))),
(v2i64 (XVCVSPSXDS $A))>;
def : Pat<(v2i64 (fp_to_uint
(build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
(f64 (fpextend (extractelt v4f32:$A, 3)))))),
(v2i64 (XVCVSPUXDS $A))>;
def : Pat<(v2i64 (fp_to_sint
(build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
(f64 (fpextend (extractelt v4f32:$A, 2)))))),
(v2i64 (XVCVSPSXDS (XXSLDWI $A, $A, 1)))>;
def : Pat<(v2i64 (fp_to_uint
(build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
(f64 (fpextend (extractelt v4f32:$A, 2)))))),
(v2i64 (XVCVSPUXDS (XXSLDWI $A, $A, 1)))>;
def : Pat<WToDPExtractConv.BV02S,
(v2f64 (XVCVSXWDP (XXSLDWI $A, $A, 1)))>;
def : Pat<WToDPExtractConv.BV13S,
Expand Down
128 changes: 128 additions & 0 deletions llvm/test/CodeGen/PowerPC/build-vector-tests.ll
Expand Up @@ -6532,3 +6532,131 @@ entry:
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
ret <2 x double> %vecinit3
}

define dso_local <2 x i64> @test_xvcvspsxds13(<4 x float> %a) local_unnamed_addr {
; P9BE-LABEL: test_xvcvspsxds13:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: xxsldwi vs0, v2, v2, 1
; P9BE-NEXT: xvcvspsxds v2, vs0
; P9BE-NEXT: blr
;
; P9LE-LABEL: test_xvcvspsxds13:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: xvcvspsxds v2, v2
; P9LE-NEXT: blr
;
; P8BE-LABEL: test_xvcvspsxds13:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: xxsldwi vs0, v2, v2, 1
; P8BE-NEXT: xvcvspsxds v2, vs0
; P8BE-NEXT: blr
;
; P8LE-LABEL: test_xvcvspsxds13:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: xvcvspsxds v2, v2
; P8LE-NEXT: blr
entry:
%vecext = extractelement <4 x float> %a, i32 1
%conv = fptosi float %vecext to i64
%vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
%vecext1 = extractelement <4 x float> %a, i32 3
%conv2 = fptosi float %vecext1 to i64
%vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
ret <2 x i64> %vecinit3
}

define dso_local <2 x i64> @test_xvcvspuxds13(<4 x float> %a) local_unnamed_addr {
; P9BE-LABEL: test_xvcvspuxds13:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: xxsldwi vs0, v2, v2, 1
; P9BE-NEXT: xvcvspuxds v2, vs0
; P9BE-NEXT: blr
;
; P9LE-LABEL: test_xvcvspuxds13:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: xvcvspuxds v2, v2
; P9LE-NEXT: blr
;
; P8BE-LABEL: test_xvcvspuxds13:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: xxsldwi vs0, v2, v2, 1
; P8BE-NEXT: xvcvspuxds v2, vs0
; P8BE-NEXT: blr
;
; P8LE-LABEL: test_xvcvspuxds13:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: xvcvspuxds v2, v2
; P8LE-NEXT: blr
entry:
%vecext = extractelement <4 x float> %a, i32 1
%conv = fptoui float %vecext to i64
%vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
%vecext1 = extractelement <4 x float> %a, i32 3
%conv2 = fptoui float %vecext1 to i64
%vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
ret <2 x i64> %vecinit3
}

define dso_local <2 x i64> @test_xvcvspsxds02(<4 x float> %a) local_unnamed_addr {
; P9BE-LABEL: test_xvcvspsxds02:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: xvcvspsxds v2, v2
; P9BE-NEXT: blr
;
; P9LE-LABEL: test_xvcvspsxds02:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: xxsldwi vs0, v2, v2, 1
; P9LE-NEXT: xvcvspsxds v2, vs0
; P9LE-NEXT: blr
;
; P8BE-LABEL: test_xvcvspsxds02:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: xvcvspsxds v2, v2
; P8BE-NEXT: blr
;
; P8LE-LABEL: test_xvcvspsxds02:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: xxsldwi vs0, v2, v2, 1
; P8LE-NEXT: xvcvspsxds v2, vs0
; P8LE-NEXT: blr
entry:
%vecext = extractelement <4 x float> %a, i32 0
%conv = fptosi float %vecext to i64
%vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
%vecext1 = extractelement <4 x float> %a, i32 2
%conv2 = fptosi float %vecext1 to i64
%vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
ret <2 x i64> %vecinit3
}

define dso_local <2 x i64> @test_xvcvspuxds02(<4 x float> %a) local_unnamed_addr {
; P9BE-LABEL: test_xvcvspuxds02:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: xvcvspuxds v2, v2
; P9BE-NEXT: blr
;
; P9LE-LABEL: test_xvcvspuxds02:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: xxsldwi vs0, v2, v2, 1
; P9LE-NEXT: xvcvspuxds v2, vs0
; P9LE-NEXT: blr
;
; P8BE-LABEL: test_xvcvspuxds02:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: xvcvspuxds v2, v2
; P8BE-NEXT: blr
;
; P8LE-LABEL: test_xvcvspuxds02:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: xxsldwi vs0, v2, v2, 1
; P8LE-NEXT: xvcvspuxds v2, vs0
; P8LE-NEXT: blr
entry:
%vecext = extractelement <4 x float> %a, i32 0
%conv = fptoui float %vecext to i64
%vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
%vecext1 = extractelement <4 x float> %a, i32 2
%conv2 = fptoui float %vecext1 to i64
%vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
ret <2 x i64> %vecinit3
}

0 comments on commit 092619c

Please sign in to comment.