-
Notifications
You must be signed in to change notification settings - Fork 15k
[LoongArch] Add patterns to support vector type average instructions generation #161079
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-loongarch Author: ZhaoQi (zhaoqi5) ChangesPatch is 24.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/161079.diff 4 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index adfe990ba1234..6eb68129d9dba 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -2015,6 +2015,56 @@ def : Pat<(v4i32(fp_to_uint v4f64:$vj)),
(XVFTINTRZ_LU_D v4f64:$vj)),
sub_128)>;
+// XVAVG_{B/H/W/D/BU/HU/WU/DU}, XVAVGR_{B/H/W/D/BU/HU/WU/DU}
+def : Pat<(sra (v32i8 (add v32i8:$xj, v32i8:$xk)), (v32i8 (vsplat_imm_eq_1))),
+ (XVAVG_B v32i8:$xj, v32i8:$xk)>;
+def : Pat<(sra (v16i16 (add v16i16:$xj, v16i16:$xk)), (v16i16 (vsplat_imm_eq_1))),
+ (XVAVG_H v16i16:$xj, v16i16:$xk)>;
+def : Pat<(sra (v8i32 (add v8i32:$xj, v8i32:$xk)), (v8i32 (vsplat_imm_eq_1))),
+ (XVAVG_W v8i32:$xj, v8i32:$xk)>;
+def : Pat<(sra (v4i64 (add v4i64:$xj, v4i64:$xk)), (v4i64 (vsplat_imm_eq_1))),
+ (XVAVG_D v4i64:$xj, v4i64:$xk)>;
+def : Pat<(srl (v32i8 (add v32i8:$xj, v32i8:$xk)), (v32i8 (vsplat_imm_eq_1))),
+ (XVAVG_BU v32i8:$xj, v32i8:$xk)>;
+def : Pat<(srl (v16i16 (add v16i16:$xj, v16i16:$xk)), (v16i16 (vsplat_imm_eq_1))),
+ (XVAVG_HU v16i16:$xj, v16i16:$xk)>;
+def : Pat<(srl (v8i32 (add v8i32:$xj, v8i32:$xk)), (v8i32 (vsplat_imm_eq_1))),
+ (XVAVG_WU v8i32:$xj, v8i32:$xk)>;
+def : Pat<(srl (v4i64 (add v4i64:$xj, v4i64:$xk)), (v4i64 (vsplat_imm_eq_1))),
+ (XVAVG_DU v4i64:$xj, v4i64:$xk)>;
+def : Pat<(sra (v32i8 (add (v32i8 (add v32i8:$vj, v32i8:$vk)),
+ (v32i8 (vsplat_imm_eq_1)))),
+ (v32i8 (vsplat_imm_eq_1))),
+ (XVAVGR_B v32i8:$vj, v32i8:$vk)>;
+def : Pat<(sra (v16i16 (add (v16i16 (add v16i16:$vj, v16i16:$vk)),
+ (v16i16 (vsplat_imm_eq_1)))),
+ (v16i16 (vsplat_imm_eq_1))),
+ (XVAVGR_H v16i16:$vj, v16i16:$vk)>;
+def : Pat<(sra (v8i32 (add (v8i32 (add v8i32:$vj, v8i32:$vk)),
+ (v8i32 (vsplat_imm_eq_1)))),
+ (v8i32 (vsplat_imm_eq_1))),
+ (XVAVGR_W v8i32:$vj, v8i32:$vk)>;
+def : Pat<(sra (v4i64 (add (v4i64 (add v4i64:$vj, v4i64:$vk)),
+ (v4i64 (vsplat_imm_eq_1)))),
+ (v4i64 (vsplat_imm_eq_1))),
+ (XVAVGR_D v4i64:$vj, v4i64:$vk)>;
+def : Pat<(srl (v32i8 (add (v32i8 (add v32i8:$vj, v32i8:$vk)),
+ (v32i8 (vsplat_imm_eq_1)))),
+ (v32i8 (vsplat_imm_eq_1))),
+ (XVAVGR_BU v32i8:$vj, v32i8:$vk)>;
+def : Pat<(srl (v16i16 (add (v16i16 (add v16i16:$vj, v16i16:$vk)),
+ (v16i16 (vsplat_imm_eq_1)))),
+ (v16i16 (vsplat_imm_eq_1))),
+ (XVAVGR_HU v16i16:$vj, v16i16:$vk)>;
+def : Pat<(srl (v8i32 (add (v8i32 (add v8i32:$vj, v8i32:$vk)),
+ (v8i32 (vsplat_imm_eq_1)))),
+ (v8i32 (vsplat_imm_eq_1))),
+ (XVAVGR_WU v8i32:$vj, v8i32:$vk)>;
+def : Pat<(srl (v4i64 (add (v4i64 (add v4i64:$vj, v4i64:$vk)),
+ (v4i64 (vsplat_imm_eq_1)))),
+ (v4i64 (vsplat_imm_eq_1))),
+ (XVAVGR_DU v4i64:$vj, v4i64:$vk)>;
+
// XVABSD_{B/H/W/D}[U]
defm : PatXrXr<abds, "XVABSD">;
defm : PatXrXrU<abdu, "XVABSD">;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index b0eb51a92c6c6..169f0d56c223e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -2153,6 +2153,56 @@ def : Pat<(f32 f32imm_vldi:$in),
def : Pat<(f64 f64imm_vldi:$in),
(f64 (EXTRACT_SUBREG (VLDI (to_f64imm_vldi f64imm_vldi:$in)), sub_64))>;
+// VAVG_{B/H/W/D/BU/HU/WU/DU}, VAVGR_{B/H/W/D/BU/HU/WU/DU}
+def : Pat<(sra (v16i8 (add v16i8:$vj, v16i8:$vk)), (v16i8 (vsplat_imm_eq_1))),
+ (VAVG_B v16i8:$vj, v16i8:$vk)>;
+def : Pat<(sra (v8i16 (add v8i16:$vj, v8i16:$vk)), (v8i16 (vsplat_imm_eq_1))),
+ (VAVG_H v8i16:$vj, v8i16:$vk)>;
+def : Pat<(sra (v4i32 (add v4i32:$vj, v4i32:$vk)), (v4i32 (vsplat_imm_eq_1))),
+ (VAVG_W v4i32:$vj, v4i32:$vk)>;
+def : Pat<(sra (v2i64 (add v2i64:$vj, v2i64:$vk)), (v2i64 (vsplat_imm_eq_1))),
+ (VAVG_D v2i64:$vj, v2i64:$vk)>;
+def : Pat<(srl (v16i8 (add v16i8:$vj, v16i8:$vk)), (v16i8 (vsplat_imm_eq_1))),
+ (VAVG_BU v16i8:$vj, v16i8:$vk)>;
+def : Pat<(srl (v8i16 (add v8i16:$vj, v8i16:$vk)), (v8i16 (vsplat_imm_eq_1))),
+ (VAVG_HU v8i16:$vj, v8i16:$vk)>;
+def : Pat<(srl (v4i32 (add v4i32:$vj, v4i32:$vk)), (v4i32 (vsplat_imm_eq_1))),
+ (VAVG_WU v4i32:$vj, v4i32:$vk)>;
+def : Pat<(srl (v2i64 (add v2i64:$vj, v2i64:$vk)), (v2i64 (vsplat_imm_eq_1))),
+ (VAVG_DU v2i64:$vj, v2i64:$vk)>;
+def : Pat<(sra (v16i8 (add (v16i8 (add v16i8:$vj, v16i8:$vk)),
+ (v16i8 (vsplat_imm_eq_1)))),
+ (v16i8 (vsplat_imm_eq_1))),
+ (VAVGR_B v16i8:$vj, v16i8:$vk)>;
+def : Pat<(sra (v8i16 (add (v8i16 (add v8i16:$vj, v8i16:$vk)),
+ (v8i16 (vsplat_imm_eq_1)))),
+ (v8i16 (vsplat_imm_eq_1))),
+ (VAVGR_H v8i16:$vj, v8i16:$vk)>;
+def : Pat<(sra (v4i32 (add (v4i32 (add v4i32:$vj, v4i32:$vk)),
+ (v4i32 (vsplat_imm_eq_1)))),
+ (v4i32 (vsplat_imm_eq_1))),
+ (VAVGR_W v4i32:$vj, v4i32:$vk)>;
+def : Pat<(sra (v2i64 (add (v2i64 (add v2i64:$vj, v2i64:$vk)),
+ (v2i64 (vsplat_imm_eq_1)))),
+ (v2i64 (vsplat_imm_eq_1))),
+ (VAVGR_D v2i64:$vj, v2i64:$vk)>;
+def : Pat<(srl (v16i8 (add (v16i8 (add v16i8:$vj, v16i8:$vk)),
+ (v16i8 (vsplat_imm_eq_1)))),
+ (v16i8 (vsplat_imm_eq_1))),
+ (VAVGR_BU v16i8:$vj, v16i8:$vk)>;
+def : Pat<(srl (v8i16 (add (v8i16 (add v8i16:$vj, v8i16:$vk)),
+ (v8i16 (vsplat_imm_eq_1)))),
+ (v8i16 (vsplat_imm_eq_1))),
+ (VAVGR_HU v8i16:$vj, v8i16:$vk)>;
+def : Pat<(srl (v4i32 (add (v4i32 (add v4i32:$vj, v4i32:$vk)),
+ (v4i32 (vsplat_imm_eq_1)))),
+ (v4i32 (vsplat_imm_eq_1))),
+ (VAVGR_WU v4i32:$vj, v4i32:$vk)>;
+def : Pat<(srl (v2i64 (add (v2i64 (add v2i64:$vj, v2i64:$vk)),
+ (v2i64 (vsplat_imm_eq_1)))),
+ (v2i64 (vsplat_imm_eq_1))),
+ (VAVGR_DU v2i64:$vj, v2i64:$vk)>;
+
// VABSD_{B/H/W/D}[U]
defm : PatVrVr<abds, "VABSD">;
defm : PatVrVrU<abdu, "VABSD">;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll
index 2a5a8fa05d646..5c5c19935080b 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll
@@ -1,14 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
-; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
define void @xvavg_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: xvavg_b:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
-; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1
+; CHECK-NEXT: xvavg.b $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -25,8 +24,7 @@ define void @xvavg_h(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
-; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1
+; CHECK-NEXT: xvavg.h $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -43,8 +41,7 @@ define void @xvavg_w(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1
-; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1
+; CHECK-NEXT: xvavg.w $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -57,14 +54,22 @@ entry:
}
define void @xvavg_d(ptr %res, ptr %a, ptr %b) nounwind {
-; CHECK-LABEL: xvavg_d:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1
-; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1
-; CHECK-NEXT: xvst $xr0, $a0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: xvavg_d:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: xvld $xr1, $a2, 0
+; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1
+; LA32-NEXT: xvsrai.d $xr0, $xr0, 1
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvavg_d:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvld $xr1, $a2, 0
+; LA64-NEXT: xvavg.d $xr0, $xr0, $xr1
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -79,8 +84,7 @@ define void @xvavg_bu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
-; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1
+; CHECK-NEXT: xvavg.bu $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -97,8 +101,7 @@ define void @xvavg_hu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
-; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1
+; CHECK-NEXT: xvavg.hu $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -115,8 +118,7 @@ define void @xvavg_wu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1
-; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1
+; CHECK-NEXT: xvavg.wu $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -129,14 +131,22 @@ entry:
}
define void @xvavg_du(ptr %res, ptr %a, ptr %b) nounwind {
-; CHECK-LABEL: xvavg_du:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1
-; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1
-; CHECK-NEXT: xvst $xr0, $a0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: xvavg_du:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: xvld $xr1, $a2, 0
+; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1
+; LA32-NEXT: xvsrli.d $xr0, $xr0, 1
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvavg_du:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvld $xr1, $a2, 0
+; LA64-NEXT: xvavg.du $xr0, $xr0, $xr1
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -151,9 +161,7 @@ define void @xvavgr_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
-; CHECK-NEXT: xvaddi.bu $xr0, $xr0, 1
-; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1
+; CHECK-NEXT: xvavgr.b $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -171,9 +179,7 @@ define void @xvavgr_h(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
-; CHECK-NEXT: xvaddi.hu $xr0, $xr0, 1
-; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1
+; CHECK-NEXT: xvavgr.h $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -191,9 +197,7 @@ define void @xvavgr_w(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1
-; CHECK-NEXT: xvaddi.wu $xr0, $xr0, 1
-; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1
+; CHECK-NEXT: xvavgr.w $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -207,15 +211,23 @@ entry:
}
define void @xvavgr_d(ptr %res, ptr %a, ptr %b) nounwind {
-; CHECK-LABEL: xvavgr_d:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1
-; CHECK-NEXT: xvaddi.du $xr0, $xr0, 1
-; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1
-; CHECK-NEXT: xvst $xr0, $a0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: xvavgr_d:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: xvld $xr1, $a2, 0
+; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1
+; LA32-NEXT: xvaddi.du $xr0, $xr0, 1
+; LA32-NEXT: xvsrai.d $xr0, $xr0, 1
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvavgr_d:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvld $xr1, $a2, 0
+; LA64-NEXT: xvavgr.d $xr0, $xr0, $xr1
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -231,9 +243,7 @@ define void @xvavgr_bu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
-; CHECK-NEXT: xvaddi.bu $xr0, $xr0, 1
-; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1
+; CHECK-NEXT: xvavgr.bu $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -251,9 +261,7 @@ define void @xvavgr_hu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
-; CHECK-NEXT: xvaddi.hu $xr0, $xr0, 1
-; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1
+; CHECK-NEXT: xvavgr.hu $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -271,9 +279,7 @@ define void @xvavgr_wu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1
-; CHECK-NEXT: xvaddi.wu $xr0, $xr0, 1
-; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1
+; CHECK-NEXT: xvavgr.wu $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -287,15 +293,23 @@ entry:
}
define void @xvavgr_du(ptr %res, ptr %a, ptr %b) nounwind {
-; CHECK-LABEL: xvavgr_du:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1
-; CHECK-NEXT: xvaddi.du $xr0, $xr0, 1
-; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1
-; CHECK-NEXT: xvst $xr0, $a0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: xvavgr_du:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: xvld $xr1, $a2, 0
+; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1
+; LA32-NEXT: xvaddi.du $xr0, $xr0, 1
+; LA32-NEXT: xvsrli.d $xr0, $xr0, 1
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvavgr_du:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvld $xr1, $a2, 0
+; LA64-NEXT: xvavgr.du $xr0, $xr0, $xr1
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll
index 20b8898436cc4..334af22edee59 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll
@@ -1,14 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s
-; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64
define void @vavg_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vavg_b:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1
-; CHECK-NEXT: vsrai.b $vr0, $vr0, 1
+; CHECK-NEXT: vavg.b $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -25,8 +24,7 @@ define void @vavg_h(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1
-; CHECK-NEXT: vsrai.h $vr0, $vr0, 1
+; CHECK-NEXT: vavg.h $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -43,8 +41,7 @@ define void @vavg_w(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1
-; CHECK-NEXT: vsrai.w $vr0, $vr0, 1
+; CHECK-NEXT: vavg.w $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -57,14 +54,22 @@ entry:
}
define void @vavg_d(ptr %res, ptr %a, ptr %b) nounwind {
-; CHECK-LABEL: vavg_d:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1
-; CHECK-NEXT: vsrai.d $vr0, $vr0, 1
-; CHECK-NEXT: vst $vr0, $a0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vavg_d:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: vld $vr0, $a1, 0
+; LA32-NEXT: vld $vr1, $a2, 0
+; LA32-NEXT: vadd.d $vr0, $vr0, $vr1
+; LA32-NEXT: vsrai.d $vr0, $vr0, 1
+; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vavg_d:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vld $vr1, $a2, 0
+; LA64-NEXT: vavg.d $vr0, $vr0, $vr1
+; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: ret
entry:
%va = load <2 x i64>, ptr %a
%vb = load <2 x i64>, ptr %b
@@ -79,8 +84,7 @@ define void @vavg_bu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1
-; CHECK-NEXT: vsrli.b $vr0, $vr0, 1
+; CHECK-NEXT: vavg.bu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -97,8 +101,7 @@ define void @vavg_hu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1
-; CHECK-NEXT: vsrli.h $vr0, $vr0, 1
+; CHECK-NEXT: vavg.hu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -115,8 +118,7 @@ define void @vavg_wu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1
-; CHECK-NEXT: vsrli.w $vr0, $vr0, 1
+; CHECK-NEXT: vavg.wu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -129,14 +131,22 @@ entry:
}
define void @vavg_du(ptr %res, ptr %a, ptr %b) nounwind {
-; CHECK-LABEL: vavg_du:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1
-; CHECK-NEXT: vsrli.d $vr0, $vr0, 1
-; CHECK-NEXT: vst $vr0, $a0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vavg_du:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: vld $vr0, $a1, 0
+; LA32-NEXT: vld $vr1, $a2, 0
+; LA32-NEXT: vadd.d $vr0, $vr0, $vr1
+; LA32-NEXT: vsrli.d $vr0, $vr0, 1
+; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-...
[truncated]
|
|
We could make these AVGFLOORS -> avg
AVGFLOORU -> avg.u
AVGCEILS -> avgr
AVGCEILU -> avgr.u |
Thanks for catching this. More tests should be added. So I'd prefer to do that in a separate commit. |
1b4a585 to
505f45a
Compare
97ee081 to
6a0219d
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
6a0219d to
f644f90
Compare
Suggested-by: tangaac <tangyan01@loongson.cn> Link: #161079 (comment)
NOTE: For simplicity and convenience,
v2i64/v4i64types on LA32 is not optimized. If hoping to implement this in the future, special handling forbitcastandbuild_vectoris needed.