Skip to content

Commit

Permalink
[Hexagon] Switch vunpackub->op->vpackeb pattern to vzb/vshuffeb
Browse files Browse the repository at this point in the history
V6_vzb and V6_vshuffeb can use any 2 resources in a packet, while
V6_vunpackub/V6_vpackeb both need a shift resource.

Also, add patterns for shifting vectors of i8.
  • Loading branch information
Krzysztof Parzyszek committed Oct 12, 2022
1 parent b2674de commit 7963216
Show file tree
Hide file tree
Showing 5 changed files with 488 additions and 106 deletions.
34 changes: 22 additions & 12 deletions llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
Original file line number Diff line number Diff line change
Expand Up @@ -634,14 +634,14 @@ let Predicates = [UseHVX] in {
(LoVec (V6_valignb HvxVR:$Vt, HvxVR:$Vs, I32:$Rt))>;

def: Pat<(HexagonVASL HVI8:$Vs, I32:$Rt),
(V6_vpackeb (V6_vaslh (HiVec (VZxtb HvxVR:$Vs)), I32:$Rt),
(V6_vaslh (LoVec (VZxtb HvxVR:$Vs)), I32:$Rt))>;
(V6_vshuffeb (V6_vaslh (HiVec (V6_vzb HvxVR:$Vs)), I32:$Rt),
(V6_vaslh (LoVec (V6_vzb HvxVR:$Vs)), I32:$Rt))>;
def: Pat<(HexagonVASR HVI8:$Vs, I32:$Rt),
(V6_vpackeb (V6_vasrh (HiVec (VSxtb HvxVR:$Vs)), I32:$Rt),
(V6_vasrh (LoVec (VSxtb HvxVR:$Vs)), I32:$Rt))>;
(V6_vshuffeb (V6_vasrh (HiVec (V6_vsb HvxVR:$Vs)), I32:$Rt),
(V6_vasrh (LoVec (V6_vsb HvxVR:$Vs)), I32:$Rt))>;
def: Pat<(HexagonVLSR HVI8:$Vs, I32:$Rt),
(V6_vpackeb (V6_vlsrh (HiVec (VZxtb HvxVR:$Vs)), I32:$Rt),
(V6_vlsrh (LoVec (VZxtb HvxVR:$Vs)), I32:$Rt))>;
(V6_vshuffeb (V6_vlsrh (HiVec (V6_vzb HvxVR:$Vs)), I32:$Rt),
(V6_vlsrh (LoVec (V6_vzb HvxVR:$Vs)), I32:$Rt))>;

def: Pat<(HexagonVASL HVI16:$Vs, I32:$Rt), (V6_vaslh HvxVR:$Vs, I32:$Rt)>;
def: Pat<(HexagonVASL HVI32:$Vs, I32:$Rt), (V6_vaslw HvxVR:$Vs, I32:$Rt)>;
Expand All @@ -655,6 +655,16 @@ let Predicates = [UseHVX] in {
def: Pat<(add HVI32:$Vx, (HexagonVASR HVI32:$Vu, I32:$Rt)),
(V6_vasrw_acc HvxVR:$Vx, HvxVR:$Vu, I32:$Rt)>;

def: Pat<(shl HVI8:$Vs, HVI8:$Vt),
(V6_vshuffeb (V6_vaslhv (HiVec (V6_vzb $Vs)), (HiVec (V6_vzb $Vt))),
(V6_vaslhv (LoVec (V6_vzb $Vs)), (LoVec (V6_vzb $Vt))))>;
def: Pat<(sra HVI8:$Vs, HVI8:$Vt),
(V6_vshuffeb (V6_vasrhv (HiVec (V6_vsb $Vs)), (HiVec (V6_vzb $Vt))),
(V6_vasrhv (LoVec (V6_vsb $Vs)), (LoVec (V6_vzb $Vt))))>;
def: Pat<(srl HVI8:$Vs, HVI8:$Vt),
(V6_vshuffeb (V6_vlsrhv (HiVec (V6_vzb $Vs)), (HiVec (V6_vzb $Vt))),
(V6_vlsrhv (LoVec (V6_vzb $Vs)), (LoVec (V6_vzb $Vt))))>;

def: Pat<(shl HVI16:$Vs, HVI16:$Vt), (V6_vaslhv HvxVR:$Vs, HvxVR:$Vt)>;
def: Pat<(shl HVI32:$Vs, HVI32:$Vt), (V6_vaslwv HvxVR:$Vs, HvxVR:$Vt)>;
def: Pat<(sra HVI16:$Vs, HVI16:$Vt), (V6_vasrhv HvxVR:$Vs, HvxVR:$Vt)>;
Expand All @@ -676,22 +686,22 @@ let Predicates = [UseHVX] in {
}

def: Pat<(VecI8 (ctpop HVI8:$Vs)),
(V6_vpackeb (V6_vpopcounth (HiVec (V6_vunpackub HvxVR:$Vs))),
(V6_vpopcounth (LoVec (V6_vunpackub HvxVR:$Vs))))>;
(V6_vshuffeb (V6_vpopcounth (HiVec (V6_vzb HvxVR:$Vs))),
(V6_vpopcounth (LoVec (V6_vzb HvxVR:$Vs))))>;
def: Pat<(VecI16 (ctpop HVI16:$Vs)), (V6_vpopcounth HvxVR:$Vs)>;
def: Pat<(VecI32 (ctpop HVI32:$Vs)),
(V6_vaddw (LoVec (V6_vzh (V6_vpopcounth HvxVR:$Vs))),
(HiVec (V6_vzh (V6_vpopcounth HvxVR:$Vs))))>;

let Predicates = [UseHVX,UseHVXV60] in
def: Pat<(VecI8 (ctlz HVI8:$Vs)),
(V6_vsubb (V6_vpackeb (V6_vcl0h (HiVec (V6_vunpackub HvxVR:$Vs))),
(V6_vcl0h (LoVec (V6_vunpackub HvxVR:$Vs)))),
(V6_vsubb (V6_vshuffeb (V6_vcl0h (HiVec (V6_vzb HvxVR:$Vs))),
(V6_vcl0h (LoVec (V6_vzb HvxVR:$Vs)))),
(V60splatib (i32 0x08)))>;
let Predicates = [UseHVX,UseHVXV62], AddedComplexity = 10 in
def: Pat<(VecI8 (ctlz HVI8:$Vs)),
(V6_vsubb (V6_vpackeb (V6_vcl0h (HiVec (V6_vunpackub HvxVR:$Vs))),
(V6_vcl0h (LoVec (V6_vunpackub HvxVR:$Vs)))),
(V6_vsubb (V6_vshuffeb (V6_vcl0h (HiVec (V6_vzb HvxVR:$Vs))),
(V6_vcl0h (LoVec (V6_vzb HvxVR:$Vs)))),
(V62splatib (i32 0x08)))>;

def: Pat<(VecI16 (ctlz HVI16:$Vs)), (V6_vcl0h HvxVR:$Vs)>;
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/Hexagon/autohvx/bitcount-128b.ll
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
; RUN: llc -march=hexagon < %s | FileCheck %s

; CHECK-LABEL: f0
; CHECK: v[[V00:[0-9]+]]:[[V01:[0-9]+]].uh = vunpack(v0.ub)
; CHECK: v[[V00:[0-9]+]]:[[V01:[0-9]+]].uh = vzxt(v0.ub)
; CHECK-DAG: v[[V02:[0-9]+]].h = vpopcount(v[[V00]].h)
; CHECK-DAG: v[[V03:[0-9]+]].h = vpopcount(v[[V01]].h)
; CHECK: v0.b = vpacke(v[[V02]].h,v[[V03]].h)
; CHECK: v0.b = vshuffe(v[[V02]].b,v[[V03]].b)
define <128 x i8> @f0(<128 x i8> %a0) #0 {
%t0 = call <128 x i8> @llvm.ctpop.v128i8(<128 x i8> %a0)
ret <128 x i8> %t0
Expand All @@ -28,11 +28,11 @@ define <32 x i32> @f2(<32 x i32> %a0) #0 {

; CHECK-LABEL: f3
; CHECK-DAG: r[[R30:[0-9]+]] = ##134744072
; CHECK-DAG: v[[V31:[0-9]+]]:[[V32:[0-9]+]].uh = vunpack(v0.ub)
; CHECK-DAG: v[[V31:[0-9]+]]:[[V32:[0-9]+]].uh = vzxt(v0.ub)
; CHECK: v[[V33:[0-9]+]] = vsplat(r[[R30]])
; CHECK-DAG: v[[V34:[0-9]+]].uh = vcl0(v[[V31]].uh)
; CHECK-DAG: v[[V35:[0-9]+]].uh = vcl0(v[[V32]].uh)
; CHECK: v[[V36:[0-9]+]].b = vpacke(v[[V34]].h,v[[V35]].h)
; CHECK: v[[V36:[0-9]+]].b = vshuffe(v[[V34]].b,v[[V35]].b)
; CHECK: v0.b = vsub(v[[V36]].b,v[[V33]].b)
define <128 x i8> @f3(<128 x i8> %a0) #0 {
%t0 = call <128 x i8> @llvm.ctlz.v128i8(<128 x i8> %a0)
Expand Down Expand Up @@ -64,10 +64,10 @@ define <32 x i32> @f5(<32 x i32> %a0) #0 {
; CHECK: v[[V65:[0-9]+]].b = vsub(v0.b,v[[V63]].b)
; CHECK: v[[V66:[0-9]+]] = vand(v[[V61]],v[[V65]])
; Ctlz:
; CHECK: v[[V67:[0-9]+]]:[[V68:[0-9]+]].uh = vunpack(v[[V66]].ub)
; CHECK: v[[V67:[0-9]+]]:[[V68:[0-9]+]].uh = vzxt(v[[V66]].ub)
; CHECK: v[[V69:[0-9]+]].uh = vcl0(v[[V68]].uh)
; CHECK: v[[V6A:[0-9]+]].uh = vcl0(v[[V67]].uh)
; CHECK: v[[V6B:[0-9]+]].b = vpacke(v[[V6A]].h,v[[V69]].h)
; CHECK: v[[V6B:[0-9]+]].b = vshuffe(v[[V6A]].b,v[[V69]].b)
; CHECK: v[[V6C:[0-9]+]].b = vsub(v[[V6B]].b,v[[V64]].b)
; CHECK: v0.b = vsub(v[[V64]].b,v[[V6C]].b)
define <128 x i8> @f6(<128 x i8> %a0) #0 {
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/Hexagon/autohvx/bitcount-64b.ll
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
; RUN: llc -march=hexagon < %s | FileCheck %s

; CHECK-LABEL: f0
; CHECK: v[[V00:[0-9]+]]:[[V01:[0-9]+]].uh = vunpack(v0.ub)
; CHECK: v[[V00:[0-9]+]]:[[V01:[0-9]+]].uh = vzxt(v0.ub)
; CHECK-DAG: v[[V02:[0-9]+]].h = vpopcount(v[[V00]].h)
; CHECK-DAG: v[[V03:[0-9]+]].h = vpopcount(v[[V01]].h)
; CHECK: v0.b = vpacke(v[[V02]].h,v[[V03]].h)
; CHECK: v0.b = vshuffe(v[[V02]].b,v[[V03]].b)
define <64 x i8> @f0(<64 x i8> %a0) #0 {
%t0 = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a0)
ret <64 x i8> %t0
Expand All @@ -28,11 +28,11 @@ define <16 x i32> @f2(<16 x i32> %a0) #0 {

; CHECK-LABEL: f3
; CHECK-DAG: r[[R30:[0-9]+]] = ##134744072
; CHECK-DAG: v[[V31:[0-9]+]]:[[V32:[0-9]+]].uh = vunpack(v0.ub)
; CHECK-DAG: v[[V31:[0-9]+]]:[[V32:[0-9]+]].uh = vzxt(v0.ub)
; CHECK: v[[V33:[0-9]+]] = vsplat(r[[R30]])
; CHECK-DAG: v[[V34:[0-9]+]].uh = vcl0(v[[V31]].uh)
; CHECK-DAG: v[[V35:[0-9]+]].uh = vcl0(v[[V32]].uh)
; CHECK: v[[V36:[0-9]+]].b = vpacke(v[[V34]].h,v[[V35]].h)
; CHECK: v[[V36:[0-9]+]].b = vshuffe(v[[V34]].b,v[[V35]].b)
; CHECK: v0.b = vsub(v[[V36]].b,v[[V33]].b)
define <64 x i8> @f3(<64 x i8> %a0) #0 {
%t0 = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a0)
Expand Down Expand Up @@ -64,10 +64,10 @@ define <16 x i32> @f5(<16 x i32> %a0) #0 {
; CHECK: v[[V65:[0-9]+]].b = vsub(v0.b,v[[V63]].b)
; CHECK: v[[V66:[0-9]+]] = vand(v[[V61]],v[[V65]])
; Ctlz:
; CHECK: v[[V67:[0-9]+]]:[[V68:[0-9]+]].uh = vunpack(v[[V66]].ub)
; CHECK: v[[V67:[0-9]+]]:[[V68:[0-9]+]].uh = vzxt(v[[V66]].ub)
; CHECK: v[[V69:[0-9]+]].uh = vcl0(v[[V68]].uh)
; CHECK: v[[V6A:[0-9]+]].uh = vcl0(v[[V67]].uh)
; CHECK: v[[V6B:[0-9]+]].b = vpacke(v[[V6A]].h,v[[V69]].h)
; CHECK: v[[V6B:[0-9]+]].b = vshuffe(v[[V6A]].b,v[[V69]].b)
; CHECK: v[[V6C:[0-9]+]].b = vsub(v[[V6B]].b,v[[V64]].b)
; CHECK: v0.b = vsub(v[[V64]].b,v[[V6C]].b)
define <64 x i8> @f6(<64 x i8> %a0) #0 {
Expand Down

0 comments on commit 7963216

Please sign in to comment.