-
Notifications
You must be signed in to change notification settings - Fork 10.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[X86] Use vXi1 for k
constraint in inline asm
#77733
Conversation
✅ With the latest revision this PR passed the C/C++ code formatter. |
@llvm/pr-subscribers-clang-codegen @llvm/pr-subscribers-backend-x86 Author: Phoebe Wang (phoebewang) ChangesFixes #77172 Full diff: https://github.com/llvm/llvm-project/pull/77733.diff 5 Files Affected:
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index b89017de0bcf14..beff0ad9da2709 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -2399,9 +2399,9 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S,
Tmp = Builder.CreatePtrToInt(
Tmp, llvm::IntegerType::get(CTX, (unsigned)TmpSize));
Tmp = Builder.CreateTrunc(Tmp, TruncTy);
- } else if (TruncTy->isIntegerTy()) {
+ } else if (Tmp->getType()->isIntegerTy() && TruncTy->isIntegerTy()) {
Tmp = Builder.CreateZExtOrTrunc(Tmp, TruncTy);
- } else if (TruncTy->isVectorTy()) {
+ } else if (Tmp->getType()->isVectorTy() || TruncTy->isVectorTy()) {
Tmp = Builder.CreateBitCast(Tmp, TruncTy);
}
}
diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp
index d053f41ab168f5..2291c991fb1107 100644
--- a/clang/lib/CodeGen/Targets/X86.cpp
+++ b/clang/lib/CodeGen/Targets/X86.cpp
@@ -40,6 +40,11 @@ static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
return llvm::Type::getX86_MMXTy(CGF.getLLVMContext());
}
+ if (Constraint == "k") {
+ llvm::Type *Int1Ty = llvm::Type::getInt1Ty(CGF.getLLVMContext());
+ return llvm::FixedVectorType::get(Int1Ty, Ty->getScalarSizeInBits());
+ }
+
// No operation needed
return Ty;
}
diff --git a/clang/test/CodeGen/X86/avx512-kconstraints-att_inline_asm.c b/clang/test/CodeGen/X86/avx512-kconstraints-att_inline_asm.c
index b4939bfc2ca831..74b6719bf9cfdd 100644
--- a/clang/test/CodeGen/X86/avx512-kconstraints-att_inline_asm.c
+++ b/clang/test/CodeGen/X86/avx512-kconstraints-att_inline_asm.c
@@ -41,7 +41,7 @@ __m512i mask_Yk_i64(long long msk, __m512i x, __m512i y){
}
char k_wise_op_i8(char msk_src1,char msk_src2){
-//CHECK: i8 asm "kandb\09$2, $1, $0", "=k,k,k,~{dirflag},~{fpsr},~{flags}"(i8 %{{.*}}, i8 %{{.*}})
+//CHECK: <8 x i1> asm "kandb\09$2, $1, $0", "=k,k,k,~{dirflag},~{fpsr},~{flags}"(<8 x i1> %{{.*}}, <8 x i1> %{{.*}})
char msk_dst;
asm ("kandb\t%2, %1, %0"
: "=k" (msk_dst)
@@ -50,7 +50,7 @@ char k_wise_op_i8(char msk_src1,char msk_src2){
}
short k_wise_op_i16(short msk_src1, short msk_src2){
-//CHECK: i16 asm "kandw\09$2, $1, $0", "=k,k,k,~{dirflag},~{fpsr},~{flags}"(i16 %{{.*}}, i16 %{{.*}})
+//CHECK: <16 x i1> asm "kandw\09$2, $1, $0", "=k,k,k,~{dirflag},~{fpsr},~{flags}"(<16 x i1> %{{.*}}, <16 x i1> %{{.*}})
short msk_dst;
asm ("kandw\t%2, %1, %0"
: "=k" (msk_dst)
@@ -59,7 +59,7 @@ short k_wise_op_i16(short msk_src1, short msk_src2){
}
int k_wise_op_i32(int msk_src1, int msk_src2){
-//CHECK: i32 asm "kandd\09$2, $1, $0", "=k,k,k,~{dirflag},~{fpsr},~{flags}"(i32 %{{.*}}, i32 %{{.*}})
+//CHECK: <32 x i1> asm "kandd\09$2, $1, $0", "=k,k,k,~{dirflag},~{fpsr},~{flags}"(<32 x i1> %{{.*}}, <32 x i1> %{{.*}})
int msk_dst;
asm ("kandd\t%2, %1, %0"
: "=k" (msk_dst)
@@ -68,7 +68,7 @@ int k_wise_op_i32(int msk_src1, int msk_src2){
}
long long k_wise_op_i64(long long msk_src1, long long msk_src2){
-//CHECK: i64 asm "kandq\09$2, $1, $0", "=k,k,k,~{dirflag},~{fpsr},~{flags}"(i64 %{{.*}}, i64 %{{.*}})
+//CHECK: <64 x i1> asm "kandq\09$2, $1, $0", "=k,k,k,~{dirflag},~{fpsr},~{flags}"(<64 x i1> %{{.*}}, <64 x i1> %{{.*}})
long long msk_dst;
asm ("kandq\t%2, %1, %0"
: "=k" (msk_dst)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 8c4f091c793dcb..39d648f1872d3d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57046,17 +57046,17 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// in the normal allocation?
case 'k':
if (Subtarget.hasAVX512()) {
- if (VT == MVT::i1)
+ if (VT == MVT::v1i1 || VT == MVT::i1)
return std::make_pair(0U, &X86::VK1RegClass);
- if (VT == MVT::i8)
+ if (VT == MVT::v8i1 || VT == MVT::i8)
return std::make_pair(0U, &X86::VK8RegClass);
- if (VT == MVT::i16)
+ if (VT == MVT::v16i1 || VT == MVT::i16)
return std::make_pair(0U, &X86::VK16RegClass);
}
if (Subtarget.hasBWI()) {
- if (VT == MVT::i32)
+ if (VT == MVT::v32i1 || VT == MVT::i32)
return std::make_pair(0U, &X86::VK32RegClass);
- if (VT == MVT::i64)
+ if (VT == MVT::v64i1 || VT == MVT::i64)
return std::make_pair(0U, &X86::VK64RegClass);
}
break;
@@ -57304,17 +57304,17 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case 'k':
// This register class doesn't allocate k0 for masked vector operation.
if (Subtarget.hasAVX512()) {
- if (VT == MVT::i1)
+ if (VT == MVT::v1i1 || VT == MVT::i1)
return std::make_pair(0U, &X86::VK1WMRegClass);
- if (VT == MVT::i8)
+ if (VT == MVT::v8i1 || VT == MVT::i8)
return std::make_pair(0U, &X86::VK8WMRegClass);
- if (VT == MVT::i16)
+ if (VT == MVT::v16i1 || VT == MVT::i16)
return std::make_pair(0U, &X86::VK16WMRegClass);
}
if (Subtarget.hasBWI()) {
- if (VT == MVT::i32)
+ if (VT == MVT::v32i1 || VT == MVT::i32)
return std::make_pair(0U, &X86::VK32WMRegClass);
- if (VT == MVT::i64)
+ if (VT == MVT::v64i1 || VT == MVT::i64)
return std::make_pair(0U, &X86::VK64WMRegClass);
}
break;
@@ -57467,15 +57467,15 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
Res.second = nullptr;
}
} else if (isVKClass(*Class)) {
- if (VT == MVT::i1)
+ if (VT == MVT::v1i1 || VT == MVT::i1)
Res.second = &X86::VK1RegClass;
- else if (VT == MVT::i8)
+ else if (VT == MVT::v8i1 || VT == MVT::i8)
Res.second = &X86::VK8RegClass;
- else if (VT == MVT::i16)
+ else if (VT == MVT::v16i1 || VT == MVT::i16)
Res.second = &X86::VK16RegClass;
- else if (VT == MVT::i32)
+ else if (VT == MVT::v32i1 || VT == MVT::i32)
Res.second = &X86::VK32RegClass;
- else if (VT == MVT::i64)
+ else if (VT == MVT::v64i1 || VT == MVT::i64)
Res.second = &X86::VK64RegClass;
else {
// Type mismatch and not a clobber: Return an error;
diff --git a/llvm/test/CodeGen/X86/pr41678.ll b/llvm/test/CodeGen/X86/pr41678.ll
index 283b2b421604ef..36411e42510f4c 100644
--- a/llvm/test/CodeGen/X86/pr41678.ll
+++ b/llvm/test/CodeGen/X86/pr41678.ll
@@ -20,3 +20,22 @@ entry:
store i16 %0, ptr %b, align 2
ret void
}
+
+define void @b() {
+; CHECK-LABEL: b:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: subl $2, %esp
+; CHECK-NEXT: .cfi_def_cfa_offset 6
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: # kill: def $k0 killed $k6
+; CHECK-NEXT: kmovw %k6, (%esp)
+; CHECK-NEXT: addl $2, %esp
+; CHECK-NEXT: .cfi_def_cfa_offset 4
+; CHECK-NEXT: retl
+entry:
+ %b = alloca <16 x i1>, align 2
+ %0 = call <16 x i1> asm "", "={k6},~{dirflag},~{fpsr},~{flags}"() #1
+ store <16 x i1> %0, ptr %b, align 2
+ ret void
+}
|
@llvm/pr-subscribers-clang Author: Phoebe Wang (phoebewang) ChangesFixes #77172 Full diff: https://github.com/llvm/llvm-project/pull/77733.diff 5 Files Affected:
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index b89017de0bcf14..beff0ad9da2709 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -2399,9 +2399,9 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S,
Tmp = Builder.CreatePtrToInt(
Tmp, llvm::IntegerType::get(CTX, (unsigned)TmpSize));
Tmp = Builder.CreateTrunc(Tmp, TruncTy);
- } else if (TruncTy->isIntegerTy()) {
+ } else if (Tmp->getType()->isIntegerTy() && TruncTy->isIntegerTy()) {
Tmp = Builder.CreateZExtOrTrunc(Tmp, TruncTy);
- } else if (TruncTy->isVectorTy()) {
+ } else if (Tmp->getType()->isVectorTy() || TruncTy->isVectorTy()) {
Tmp = Builder.CreateBitCast(Tmp, TruncTy);
}
}
diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp
index d053f41ab168f5..2291c991fb1107 100644
--- a/clang/lib/CodeGen/Targets/X86.cpp
+++ b/clang/lib/CodeGen/Targets/X86.cpp
@@ -40,6 +40,11 @@ static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
return llvm::Type::getX86_MMXTy(CGF.getLLVMContext());
}
+ if (Constraint == "k") {
+ llvm::Type *Int1Ty = llvm::Type::getInt1Ty(CGF.getLLVMContext());
+ return llvm::FixedVectorType::get(Int1Ty, Ty->getScalarSizeInBits());
+ }
+
// No operation needed
return Ty;
}
diff --git a/clang/test/CodeGen/X86/avx512-kconstraints-att_inline_asm.c b/clang/test/CodeGen/X86/avx512-kconstraints-att_inline_asm.c
index b4939bfc2ca831..74b6719bf9cfdd 100644
--- a/clang/test/CodeGen/X86/avx512-kconstraints-att_inline_asm.c
+++ b/clang/test/CodeGen/X86/avx512-kconstraints-att_inline_asm.c
@@ -41,7 +41,7 @@ __m512i mask_Yk_i64(long long msk, __m512i x, __m512i y){
}
char k_wise_op_i8(char msk_src1,char msk_src2){
-//CHECK: i8 asm "kandb\09$2, $1, $0", "=k,k,k,~{dirflag},~{fpsr},~{flags}"(i8 %{{.*}}, i8 %{{.*}})
+//CHECK: <8 x i1> asm "kandb\09$2, $1, $0", "=k,k,k,~{dirflag},~{fpsr},~{flags}"(<8 x i1> %{{.*}}, <8 x i1> %{{.*}})
char msk_dst;
asm ("kandb\t%2, %1, %0"
: "=k" (msk_dst)
@@ -50,7 +50,7 @@ char k_wise_op_i8(char msk_src1,char msk_src2){
}
short k_wise_op_i16(short msk_src1, short msk_src2){
-//CHECK: i16 asm "kandw\09$2, $1, $0", "=k,k,k,~{dirflag},~{fpsr},~{flags}"(i16 %{{.*}}, i16 %{{.*}})
+//CHECK: <16 x i1> asm "kandw\09$2, $1, $0", "=k,k,k,~{dirflag},~{fpsr},~{flags}"(<16 x i1> %{{.*}}, <16 x i1> %{{.*}})
short msk_dst;
asm ("kandw\t%2, %1, %0"
: "=k" (msk_dst)
@@ -59,7 +59,7 @@ short k_wise_op_i16(short msk_src1, short msk_src2){
}
int k_wise_op_i32(int msk_src1, int msk_src2){
-//CHECK: i32 asm "kandd\09$2, $1, $0", "=k,k,k,~{dirflag},~{fpsr},~{flags}"(i32 %{{.*}}, i32 %{{.*}})
+//CHECK: <32 x i1> asm "kandd\09$2, $1, $0", "=k,k,k,~{dirflag},~{fpsr},~{flags}"(<32 x i1> %{{.*}}, <32 x i1> %{{.*}})
int msk_dst;
asm ("kandd\t%2, %1, %0"
: "=k" (msk_dst)
@@ -68,7 +68,7 @@ int k_wise_op_i32(int msk_src1, int msk_src2){
}
long long k_wise_op_i64(long long msk_src1, long long msk_src2){
-//CHECK: i64 asm "kandq\09$2, $1, $0", "=k,k,k,~{dirflag},~{fpsr},~{flags}"(i64 %{{.*}}, i64 %{{.*}})
+//CHECK: <64 x i1> asm "kandq\09$2, $1, $0", "=k,k,k,~{dirflag},~{fpsr},~{flags}"(<64 x i1> %{{.*}}, <64 x i1> %{{.*}})
long long msk_dst;
asm ("kandq\t%2, %1, %0"
: "=k" (msk_dst)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 8c4f091c793dcb..39d648f1872d3d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57046,17 +57046,17 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// in the normal allocation?
case 'k':
if (Subtarget.hasAVX512()) {
- if (VT == MVT::i1)
+ if (VT == MVT::v1i1 || VT == MVT::i1)
return std::make_pair(0U, &X86::VK1RegClass);
- if (VT == MVT::i8)
+ if (VT == MVT::v8i1 || VT == MVT::i8)
return std::make_pair(0U, &X86::VK8RegClass);
- if (VT == MVT::i16)
+ if (VT == MVT::v16i1 || VT == MVT::i16)
return std::make_pair(0U, &X86::VK16RegClass);
}
if (Subtarget.hasBWI()) {
- if (VT == MVT::i32)
+ if (VT == MVT::v32i1 || VT == MVT::i32)
return std::make_pair(0U, &X86::VK32RegClass);
- if (VT == MVT::i64)
+ if (VT == MVT::v64i1 || VT == MVT::i64)
return std::make_pair(0U, &X86::VK64RegClass);
}
break;
@@ -57304,17 +57304,17 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case 'k':
// This register class doesn't allocate k0 for masked vector operation.
if (Subtarget.hasAVX512()) {
- if (VT == MVT::i1)
+ if (VT == MVT::v1i1 || VT == MVT::i1)
return std::make_pair(0U, &X86::VK1WMRegClass);
- if (VT == MVT::i8)
+ if (VT == MVT::v8i1 || VT == MVT::i8)
return std::make_pair(0U, &X86::VK8WMRegClass);
- if (VT == MVT::i16)
+ if (VT == MVT::v16i1 || VT == MVT::i16)
return std::make_pair(0U, &X86::VK16WMRegClass);
}
if (Subtarget.hasBWI()) {
- if (VT == MVT::i32)
+ if (VT == MVT::v32i1 || VT == MVT::i32)
return std::make_pair(0U, &X86::VK32WMRegClass);
- if (VT == MVT::i64)
+ if (VT == MVT::v64i1 || VT == MVT::i64)
return std::make_pair(0U, &X86::VK64WMRegClass);
}
break;
@@ -57467,15 +57467,15 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
Res.second = nullptr;
}
} else if (isVKClass(*Class)) {
- if (VT == MVT::i1)
+ if (VT == MVT::v1i1 || VT == MVT::i1)
Res.second = &X86::VK1RegClass;
- else if (VT == MVT::i8)
+ else if (VT == MVT::v8i1 || VT == MVT::i8)
Res.second = &X86::VK8RegClass;
- else if (VT == MVT::i16)
+ else if (VT == MVT::v16i1 || VT == MVT::i16)
Res.second = &X86::VK16RegClass;
- else if (VT == MVT::i32)
+ else if (VT == MVT::v32i1 || VT == MVT::i32)
Res.second = &X86::VK32RegClass;
- else if (VT == MVT::i64)
+ else if (VT == MVT::v64i1 || VT == MVT::i64)
Res.second = &X86::VK64RegClass;
else {
// Type mismatch and not a clobber: Return an error;
diff --git a/llvm/test/CodeGen/X86/pr41678.ll b/llvm/test/CodeGen/X86/pr41678.ll
index 283b2b421604ef..36411e42510f4c 100644
--- a/llvm/test/CodeGen/X86/pr41678.ll
+++ b/llvm/test/CodeGen/X86/pr41678.ll
@@ -20,3 +20,22 @@ entry:
store i16 %0, ptr %b, align 2
ret void
}
+
+define void @b() {
+; CHECK-LABEL: b:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: subl $2, %esp
+; CHECK-NEXT: .cfi_def_cfa_offset 6
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: # kill: def $k0 killed $k6
+; CHECK-NEXT: kmovw %k6, (%esp)
+; CHECK-NEXT: addl $2, %esp
+; CHECK-NEXT: .cfi_def_cfa_offset 4
+; CHECK-NEXT: retl
+entry:
+ %b = alloca <16 x i1>, align 2
+ %0 = call <16 x i1> asm "", "={k6},~{dirflag},~{fpsr},~{flags}"() #1
+ store <16 x i1> %0, ptr %b, align 2
+ ret void
+}
|
k
constraint in inline asmk
constraint in inline asm
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we add test coverage for the gpr <-> mask transfers?
Is the concern about existing BC files using gpr? It's covered by existing test case, e.g., function |
Why not return |
You mean in two |
Is it represented by i64 x 1 in 32-bit mode after this patch? |
No, it's |
if (VT == MVT::v1i1 || VT == MVT::i1) | ||
return std::make_pair(0U, &X86::VK1RegClass); | ||
if (VT == MVT::i8) | ||
if (VT == MVT::v8i1 || VT == MVT::i8) | ||
return std::make_pair(0U, &X86::VK8RegClass); | ||
if (VT == MVT::i16) | ||
if (VT == MVT::v16i1 || VT == MVT::i16) | ||
return std::make_pair(0U, &X86::VK16RegClass); | ||
} | ||
if (Subtarget.hasBWI()) { | ||
if (VT == MVT::i32) | ||
if (VT == MVT::v32i1 || VT == MVT::i32) | ||
return std::make_pair(0U, &X86::VK32RegClass); | ||
if (VT == MVT::i64) | ||
if (VT == MVT::v64i1 || VT == MVT::i64) | ||
return std::make_pair(0U, &X86::VK64RegClass); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i1, i8, i16 is kept for what? backward compatibility of IR?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Never mind. I see your comment.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Thanks @KanRobert ! |
Fixes #77172