diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index b89017de0bcf1..beff0ad9da270 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -2399,9 +2399,9 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S, Tmp = Builder.CreatePtrToInt( Tmp, llvm::IntegerType::get(CTX, (unsigned)TmpSize)); Tmp = Builder.CreateTrunc(Tmp, TruncTy); - } else if (TruncTy->isIntegerTy()) { + } else if (Tmp->getType()->isIntegerTy() && TruncTy->isIntegerTy()) { Tmp = Builder.CreateZExtOrTrunc(Tmp, TruncTy); - } else if (TruncTy->isVectorTy()) { + } else if (Tmp->getType()->isVectorTy() || TruncTy->isVectorTy()) { Tmp = Builder.CreateBitCast(Tmp, TruncTy); } } diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp index d053f41ab168f..2291c991fb110 100644 --- a/clang/lib/CodeGen/Targets/X86.cpp +++ b/clang/lib/CodeGen/Targets/X86.cpp @@ -40,6 +40,11 @@ static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF, return llvm::Type::getX86_MMXTy(CGF.getLLVMContext()); } + if (Constraint == "k") { + llvm::Type *Int1Ty = llvm::Type::getInt1Ty(CGF.getLLVMContext()); + return llvm::FixedVectorType::get(Int1Ty, Ty->getScalarSizeInBits()); + } + // No operation needed return Ty; } diff --git a/clang/test/CodeGen/X86/avx512-kconstraints-att_inline_asm.c b/clang/test/CodeGen/X86/avx512-kconstraints-att_inline_asm.c index b4939bfc2ca83..74b6719bf9cfd 100644 --- a/clang/test/CodeGen/X86/avx512-kconstraints-att_inline_asm.c +++ b/clang/test/CodeGen/X86/avx512-kconstraints-att_inline_asm.c @@ -41,7 +41,7 @@ __m512i mask_Yk_i64(long long msk, __m512i x, __m512i y){ } char k_wise_op_i8(char msk_src1,char msk_src2){ -//CHECK: i8 asm "kandb\09$2, $1, $0", "=k,k,k,~{dirflag},~{fpsr},~{flags}"(i8 %{{.*}}, i8 %{{.*}}) +//CHECK: <8 x i1> asm "kandb\09$2, $1, $0", "=k,k,k,~{dirflag},~{fpsr},~{flags}"(<8 x i1> %{{.*}}, <8 x i1> %{{.*}}) char msk_dst; asm ("kandb\t%2, %1, %0" : "=k" (msk_dst) @@ -50,7 +50,7 @@ char k_wise_op_i8(char msk_src1,char msk_src2){ } short k_wise_op_i16(short msk_src1, short msk_src2){ -//CHECK: i16 asm "kandw\09$2, $1, $0", "=k,k,k,~{dirflag},~{fpsr},~{flags}"(i16 %{{.*}}, i16 %{{.*}}) +//CHECK: <16 x i1> asm "kandw\09$2, $1, $0", "=k,k,k,~{dirflag},~{fpsr},~{flags}"(<16 x i1> %{{.*}}, <16 x i1> %{{.*}}) short msk_dst; asm ("kandw\t%2, %1, %0" : "=k" (msk_dst) @@ -59,7 +59,7 @@ short k_wise_op_i16(short msk_src1, short msk_src2){ } int k_wise_op_i32(int msk_src1, int msk_src2){ -//CHECK: i32 asm "kandd\09$2, $1, $0", "=k,k,k,~{dirflag},~{fpsr},~{flags}"(i32 %{{.*}}, i32 %{{.*}}) +//CHECK: <32 x i1> asm "kandd\09$2, $1, $0", "=k,k,k,~{dirflag},~{fpsr},~{flags}"(<32 x i1> %{{.*}}, <32 x i1> %{{.*}}) int msk_dst; asm ("kandd\t%2, %1, %0" : "=k" (msk_dst) @@ -68,7 +68,7 @@ int k_wise_op_i32(int msk_src1, int msk_src2){ } long long k_wise_op_i64(long long msk_src1, long long msk_src2){ -//CHECK: i64 asm "kandq\09$2, $1, $0", "=k,k,k,~{dirflag},~{fpsr},~{flags}"(i64 %{{.*}}, i64 %{{.*}}) +//CHECK: <64 x i1> asm "kandq\09$2, $1, $0", "=k,k,k,~{dirflag},~{fpsr},~{flags}"(<64 x i1> %{{.*}}, <64 x i1> %{{.*}}) long long msk_dst; asm ("kandq\t%2, %1, %0" : "=k" (msk_dst) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e19128ec77565..ff2014d8fa7b1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -57085,17 +57085,17 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, // in the normal allocation? case 'k': if (Subtarget.hasAVX512()) { - if (VT == MVT::i1) + if (VT == MVT::v1i1 || VT == MVT::i1) return std::make_pair(0U, &X86::VK1RegClass); - if (VT == MVT::i8) + if (VT == MVT::v8i1 || VT == MVT::i8) return std::make_pair(0U, &X86::VK8RegClass); - if (VT == MVT::i16) + if (VT == MVT::v16i1 || VT == MVT::i16) return std::make_pair(0U, &X86::VK16RegClass); } if (Subtarget.hasBWI()) { - if (VT == MVT::i32) + if (VT == MVT::v32i1 || VT == MVT::i32) return std::make_pair(0U, &X86::VK32RegClass); - if (VT == MVT::i64) + if (VT == MVT::v64i1 || VT == MVT::i64) return std::make_pair(0U, &X86::VK64RegClass); } break; @@ -57343,17 +57343,17 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, case 'k': // This register class doesn't allocate k0 for masked vector operation. if (Subtarget.hasAVX512()) { - if (VT == MVT::i1) + if (VT == MVT::v1i1 || VT == MVT::i1) return std::make_pair(0U, &X86::VK1WMRegClass); - if (VT == MVT::i8) + if (VT == MVT::v8i1 || VT == MVT::i8) return std::make_pair(0U, &X86::VK8WMRegClass); - if (VT == MVT::i16) + if (VT == MVT::v16i1 || VT == MVT::i16) return std::make_pair(0U, &X86::VK16WMRegClass); } if (Subtarget.hasBWI()) { - if (VT == MVT::i32) + if (VT == MVT::v32i1 || VT == MVT::i32) return std::make_pair(0U, &X86::VK32WMRegClass); - if (VT == MVT::i64) + if (VT == MVT::v64i1 || VT == MVT::i64) return std::make_pair(0U, &X86::VK64WMRegClass); } break; @@ -57506,15 +57506,15 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, Res.second = nullptr; } } else if (isVKClass(*Class)) { - if (VT == MVT::i1) + if (VT == MVT::v1i1 || VT == MVT::i1) Res.second = &X86::VK1RegClass; - else if (VT == MVT::i8) + else if (VT == MVT::v8i1 || VT == MVT::i8) Res.second = &X86::VK8RegClass; - else if (VT == MVT::i16) + else if (VT == MVT::v16i1 || VT == MVT::i16) Res.second = &X86::VK16RegClass; - else if (VT == MVT::i32) + else if (VT == MVT::v32i1 || VT == MVT::i32) Res.second = &X86::VK32RegClass; - else if (VT == MVT::i64) + else if (VT == MVT::v64i1 || VT == MVT::i64) Res.second = &X86::VK64RegClass; else { // Type mismatch and not a clobber: Return an error; diff --git a/llvm/test/CodeGen/X86/pr41678.ll b/llvm/test/CodeGen/X86/pr41678.ll index 283b2b421604e..36411e42510f4 100644 --- a/llvm/test/CodeGen/X86/pr41678.ll +++ b/llvm/test/CodeGen/X86/pr41678.ll @@ -20,3 +20,22 @@ entry: store i16 %0, ptr %b, align 2 ret void } + +define void @b() { +; CHECK-LABEL: b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subl $2, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 6 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: # kill: def $k0 killed $k6 +; CHECK-NEXT: kmovw %k6, (%esp) +; CHECK-NEXT: addl $2, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl +entry: + %b = alloca <16 x i1>, align 2 + %0 = call <16 x i1> asm "", "={k6},~{dirflag},~{fpsr},~{flags}"() #1 + store <16 x i1> %0, ptr %b, align 2 + ret void +}