-
Notifications
You must be signed in to change notification settings - Fork 10.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LoongArch] Add support for getNumberOfRegisters() #88372
Conversation
The `TTI` hooks are used during vectorization for calculating register pressure. The default implementation definitly wrong value for register number (all register class are 8 registers). This patch also defines LoongArch's own register classes.
@llvm/pr-subscribers-backend-loongarch @llvm/pr-subscribers-llvm-transforms Author: wanglei (wangleiat) ChangesThe This patch also defines LoongArch's own register classes. Full diff: https://github.com/llvm/llvm-project/pull/88372.diff 3 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
index d47dded9ea6ecf..add1c60d89d21c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
@@ -40,4 +40,45 @@ TypeSize LoongArchTTIImpl::getRegisterBitWidth(
llvm_unreachable("Unsupported register kind");
}
+unsigned LoongArchTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
+ switch (ClassID) {
+ case LoongArchRegisterClass::GPRRC:
+ // 30 = 32 GPRs - r0 (zero register) - r21 (non-allocatable)
+ return 30;
+ case LoongArchRegisterClass::FPRRC:
+ return ST->hasBasicF() ? 32 : 0;
+ case LoongArchRegisterClass::VRRC:
+ return ST->hasExtLSX() ? 32 : 0;
+ }
+ llvm_unreachable("unknown register class");
+}
+
+unsigned LoongArchTTIImpl::getRegisterClassForType(bool Vector,
+ Type *Ty) const {
+ if (Vector)
+ return LoongArchRegisterClass::VRRC;
+ if (!Ty)
+ return LoongArchRegisterClass::GPRRC;
+
+ Type *ScalarTy = Ty->getScalarType();
+ if ((ScalarTy->isFloatTy() && ST->hasBasicF()) ||
+ (ScalarTy->isDoubleTy() && ST->hasBasicD())) {
+ return LoongArchRegisterClass::FPRRC;
+ }
+
+ return LoongArchRegisterClass::GPRRC;
+}
+
+const char *LoongArchTTIImpl::getRegisterClassName(unsigned ClassID) const {
+ switch (ClassID) {
+ case LoongArchRegisterClass::GPRRC:
+ return "LoongArch::GPRRC";
+ case LoongArchRegisterClass::FPRRC:
+ return "LoongArch::FPRRC";
+ case LoongArchRegisterClass::VRRC:
+ return "LoongArch::VRRC";
+ }
+ llvm_unreachable("unknown register class");
+}
+
// TODO: Implement more hooks to provide TTI machinery for LoongArch.
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
index d296c9ed576fbd..34c18163bbdb6e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
@@ -28,6 +28,7 @@ class LoongArchTTIImpl : public BasicTTIImplBase<LoongArchTTIImpl> {
typedef TargetTransformInfo TTI;
friend BaseT;
+ enum LoongArchRegisterClass { GPRRC, FPRRC, VRRC };
const LoongArchSubtarget *ST;
const LoongArchTargetLowering *TLI;
@@ -40,6 +41,9 @@ class LoongArchTTIImpl : public BasicTTIImplBase<LoongArchTTIImpl> {
TLI(ST->getTargetLowering()) {}
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;
+ unsigned getNumberOfRegisters(unsigned ClassID) const;
+ unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
+ const char *getRegisterClassName(unsigned ClassID) const;
// TODO: Implement more hooks to provide TTI machinery for LoongArch.
};
diff --git a/llvm/test/Transforms/LoopVectorize/LoongArch/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/LoongArch/reg-usage.ll
new file mode 100644
index 00000000000000..f45a2f0f5b7e8f
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/LoongArch/reg-usage.ll
@@ -0,0 +1,44 @@
+; REQUIRES: asserts
+; RUN: opt --passes=loop-vectorize --mtriple loongarch64-linux-gnu \
+; RUN: --mattr=+lsx -debug-only=loop-vectorize --force-vector-width=1 \
+; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-SCALAR
+; RUN: opt --passes=loop-vectorize --mtriple loongarch64-linux-gnu \
+; RUN: --mattr=+lsx -debug-only=loop-vectorize --force-vector-width=4 \
+; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-VECTOR
+
+define void @bar(ptr %A, i32 signext %n) {
+; CHECK-LABEL: bar
+; CHECK-SCALAR: LV(REG): Found max usage: 2 item
+; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: LoongArch::GPRRC, 2 registers
+; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: LoongArch::FPRRC, 1 registers
+; CHECK-SCALAR-NEXT: LV(REG): Found invariant usage: 1 item
+; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: LoongArch::GPRRC, 1 registers
+; CHECK-SCALAR-NEXT: LV: The target has 30 registers of LoongArch::GPRRC register class
+; CHECK-SCALAR-NEXT: LV: The target has 32 registers of LoongArch::FPRRC register class
+; CHECK-VECTOR: LV(REG): Found max usage: 1 item
+; CHECK-VECTOR-NEXT: LV(REG): RegisterClass: LoongArch::VRRC, 3 registers
+; CHECK-VECTOR-NEXT: LV(REG): Found invariant usage: 1 item
+; CHECK-VECTOR-NEXT: LV(REG): RegisterClass: LoongArch::GPRRC, 1 registers
+; CHECK-VECTOR-NEXT: LV: The target has 32 registers of LoongArch::VRRC register class
+
+entry:
+ %cmp4 = icmp sgt i32 %n, 0
+ br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader: ; preds = %entry
+ %wide.trip.count = zext nneg i32 %n to i64
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ ret void
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %0 = trunc i64 %indvars.iv to i32
+ %conv = sitofp i32 %0 to float
+ %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+ store float %conv, ptr %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
|
The
TTI
hooks are used during vectorization for calculating register pressure. The default implementation defined wrong value for register number (all register class are 8 registers).This patch also defines LoongArch's own register classes.