57 changes: 57 additions & 0 deletions clang/include/clang/Sema/SemaHLSL.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,60 @@ class AttributeCommonInfo;
class IdentifierInfo;
class ParsedAttr;
class Scope;
class VarDecl;

using llvm::dxil::ResourceClass;

// FIXME: This can be hidden (as static function in SemaHLSL.cpp) once we no
// longer need to create builtin buffer types in HLSLExternalSemaSource.
bool CreateHLSLAttributedResourceType(
Sema &S, QualType Wrapped, ArrayRef<const Attr *> AttrList,
QualType &ResType, HLSLAttributedResourceLocInfo *LocInfo = nullptr);

enum class BindingType : uint8_t { NotAssigned, Explicit, Implicit };

// DeclBindingInfo struct stores information about required/assigned resource
// binding onon a declaration for specific resource class.
struct DeclBindingInfo {
const VarDecl *Decl;
ResourceClass ResClass;
const HLSLResourceBindingAttr *Attr;
BindingType BindType;

DeclBindingInfo(const VarDecl *Decl, ResourceClass ResClass,
BindingType BindType = BindingType::NotAssigned,
const HLSLResourceBindingAttr *Attr = nullptr)
: Decl(Decl), ResClass(ResClass), Attr(Attr), BindType(BindType) {}

void setBindingAttribute(HLSLResourceBindingAttr *A, BindingType BT) {
assert(Attr == nullptr && BindType == BindingType::NotAssigned &&
"binding attribute already assigned");
Attr = A;
BindType = BT;
}
};

// ResourceBindings class stores information about all resource bindings
// in a shader. It is used for binding diagnostics and implicit binding
// assigments.
class ResourceBindings {
public:
DeclBindingInfo *addDeclBindingInfo(const VarDecl *VD,
ResourceClass ResClass);
DeclBindingInfo *getDeclBindingInfo(const VarDecl *VD,
ResourceClass ResClass);
bool hasBindingInfoForDecl(const VarDecl *VD) const;

private:
// List of all resource bindings required by the shader.
// A global declaration can have multiple bindings for different
// resource classes. They are all stored sequentially in this list.
// The DeclToBindingListIndex hashtable maps a declaration to the
// index of the first binding info in the list.
llvm::SmallVector<DeclBindingInfo> BindingsList;
llvm::DenseMap<const VarDecl *, unsigned> DeclToBindingListIndex;
};

class SemaHLSL : public SemaBase {
public:
SemaHLSL(Sema &S);
Expand All @@ -55,6 +102,7 @@ class SemaHLSL : public SemaBase {
mergeParamModifierAttr(Decl *D, const AttributeCommonInfo &AL,
HLSLParamModifierAttr::Spelling Spelling);
void ActOnTopLevelFunction(FunctionDecl *FD);
void ActOnVariableDeclarator(VarDecl *VD);
void CheckEntryPoint(FunctionDecl *FD);
void CheckSemanticAnnotation(FunctionDecl *EntryPoint, const Decl *Param,
const HLSLAnnotationAttr *AnnotationAttr);
Expand Down Expand Up @@ -102,6 +150,15 @@ class SemaHLSL : public SemaBase {
llvm::DenseMap<const HLSLAttributedResourceType *,
HLSLAttributedResourceLocInfo>
LocsForHLSLAttributedResources;

// List of all resource bindings
ResourceBindings Bindings;

private:
void collectResourcesOnVarDecl(VarDecl *D);
void collectResourcesOnUserRecordDecl(const VarDecl *VD,
const RecordType *RT);
void processExplicitBindingsOnDecl(VarDecl *D);
};

} // namespace clang
Expand Down
2 changes: 1 addition & 1 deletion clang/include/clang/Serialization/ASTBitCodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -1149,7 +1149,7 @@ enum PredefinedTypeIDs {
///
/// Type IDs for non-predefined types will start at
/// NUM_PREDEF_TYPE_IDs.
const unsigned NUM_PREDEF_TYPE_IDS = 505;
const unsigned NUM_PREDEF_TYPE_IDS = 506;

// Ensure we do not overrun the predefined types we reserved
// in the enum PredefinedTypeIDs above.
Expand Down
9 changes: 6 additions & 3 deletions clang/lib/AST/ByteCode/Integral.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,11 +122,14 @@ template <unsigned Bits, bool Signed> class Integral final {
APSInt toAPSInt() const {
return APSInt(APInt(Bits, static_cast<uint64_t>(V), Signed), !Signed);
}
APSInt toAPSInt(unsigned NumBits) const {
APSInt toAPSInt(unsigned BitWidth) const { return APSInt(toAPInt(BitWidth)); }
APInt toAPInt(unsigned BitWidth) const {
if constexpr (Signed)
return APSInt(toAPSInt().sextOrTrunc(NumBits), !Signed);
return APInt(Bits, static_cast<uint64_t>(V), Signed)
.sextOrTrunc(BitWidth);
else
return APSInt(toAPSInt().zextOrTrunc(NumBits), !Signed);
return APInt(Bits, static_cast<uint64_t>(V), Signed)
.zextOrTrunc(BitWidth);
}
APValue toAPValue(const ASTContext &) const { return APValue(toAPSInt()); }

Expand Down
6 changes: 2 additions & 4 deletions clang/lib/AST/ByteCode/IntegralAP.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ template <bool Signed> class IntegralAP final {

IntegralAP(APInt V) : V(V) {}
/// Arbitrary value for uninitialized variables.
IntegralAP() : IntegralAP(-1, 3) {}
IntegralAP() : IntegralAP(Signed ? -1 : 7, 3) {}

IntegralAP operator-() const { return IntegralAP(-V); }
IntegralAP operator-(const IntegralAP &Other) const {
Expand Down Expand Up @@ -112,9 +112,7 @@ template <bool Signed> class IntegralAP final {

template <unsigned Bits, bool InputSigned>
static IntegralAP from(Integral<Bits, InputSigned> I, unsigned BitWidth) {
APInt Copy = APInt(BitWidth, static_cast<uint64_t>(I), InputSigned);

return IntegralAP<Signed>(Copy);
return IntegralAP<Signed>(I.toAPInt(BitWidth));
}

static IntegralAP zero(int32_t BitWidth) {
Expand Down
1 change: 0 additions & 1 deletion clang/lib/AST/ByteCode/Interp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1040,7 +1040,6 @@ bool Free(InterpState &S, CodePtr OpPC, bool DeleteIsArrayForm,
return nullptr;
};

AllocType->dump();
if (const FunctionDecl *VirtualDelete =
getVirtualOperatorDelete(AllocType);
VirtualDelete &&
Expand Down
1 change: 1 addition & 0 deletions clang/lib/AST/Type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2525,6 +2525,7 @@ bool Type::isSveVLSBuiltinType() const {
case BuiltinType::SveBool:
case BuiltinType::SveBoolx2:
case BuiltinType::SveBoolx4:
case BuiltinType::SveMFloat8:
return true;
default:
return false;
Expand Down
5 changes: 3 additions & 2 deletions clang/lib/CodeGen/CGVTT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,9 @@ CodeGenVTables::EmitVTTDefinition(llvm::GlobalVariable *VTT,
cast<llvm::StructType>(VTable->getValueType())
->getElementType(AddressPoint.VTableIndex));
unsigned Offset = ComponentSize * AddressPoint.AddressPointIndex;
llvm::ConstantRange InRange(llvm::APInt(32, -Offset, true),
llvm::APInt(32, VTableSize - Offset, true));
llvm::ConstantRange InRange(
llvm::APInt(32, (int)-Offset, true),
llvm::APInt(32, (int)(VTableSize - Offset), true));
llvm::Constant *Init = llvm::ConstantExpr::getGetElementPtr(
VTable->getValueType(), VTable, Idxs, /*InBounds=*/true, InRange);

Expand Down
2 changes: 1 addition & 1 deletion clang/lib/CodeGen/CoverageMappingGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2066,7 +2066,7 @@ struct CounterCoverageMappingBuilder
GapRegionCounter = OutCount;
}

if (!S->isConsteval() && !llvm::EnableSingleByteCoverage)
if (!llvm::EnableSingleByteCoverage)
// Create Branch Region around condition.
createBranchRegion(S->getCond(), ThenCount,
subtractCounters(ParentCount, ThenCount));
Expand Down
5 changes: 3 additions & 2 deletions clang/lib/CodeGen/ItaniumCXXABI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2099,8 +2099,9 @@ ItaniumCXXABI::getVTableAddressPoint(BaseSubobject Base,
unsigned VTableSize =
ComponentSize * Layout.getVTableSize(AddressPoint.VTableIndex);
unsigned Offset = ComponentSize * AddressPoint.AddressPointIndex;
llvm::ConstantRange InRange(llvm::APInt(32, -Offset, true),
llvm::APInt(32, VTableSize - Offset, true));
llvm::ConstantRange InRange(
llvm::APInt(32, (int)-Offset, true),
llvm::APInt(32, (int)(VTableSize - Offset), true));
return llvm::ConstantExpr::getGetElementPtr(
VTable->getValueType(), VTable, Indices, /*InBounds=*/true, InRange);
}
Expand Down
6 changes: 3 additions & 3 deletions clang/lib/Parse/ParseInit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -436,9 +436,9 @@ ExprResult Parser::createEmbedExpr() {
ASTContext &Context = Actions.getASTContext();
SourceLocation StartLoc = ConsumeAnnotationToken();
if (Data->BinaryData.size() == 1) {
Res = IntegerLiteral::Create(Context,
llvm::APInt(CHAR_BIT, Data->BinaryData.back()),
Context.UnsignedCharTy, StartLoc);
Res = IntegerLiteral::Create(
Context, llvm::APInt(CHAR_BIT, (unsigned char)Data->BinaryData.back()),
Context.UnsignedCharTy, StartLoc);
} else {
auto CreateStringLiteralFromStringRef = [&](StringRef Str, QualType Ty) {
llvm::APSInt ArraySize =
Expand Down
11 changes: 7 additions & 4 deletions clang/lib/Parse/ParseStmt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1518,10 +1518,13 @@ StmtResult Parser::ParseIfStatement(SourceLocation *TrailingElseLoc) {
SourceLocation ConstevalLoc;

if (Tok.is(tok::kw_constexpr)) {
Diag(Tok, getLangOpts().CPlusPlus17 ? diag::warn_cxx14_compat_constexpr_if
: diag::ext_constexpr_if);
IsConstexpr = true;
ConsumeToken();
// C23 supports constexpr keyword, but only for object definitions.
if (getLangOpts().CPlusPlus) {
Diag(Tok, getLangOpts().CPlusPlus17 ? diag::warn_cxx14_compat_constexpr_if
: diag::ext_constexpr_if);
IsConstexpr = true;
ConsumeToken();
}
} else {
if (Tok.is(tok::exclaim)) {
NotLocation = ConsumeToken();
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Sema/SemaDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7883,6 +7883,9 @@ NamedDecl *Sema::ActOnVariableDeclarator(
// Handle attributes prior to checking for duplicates in MergeVarDecl
ProcessDeclAttributes(S, NewVD, D);

if (getLangOpts().HLSL)
HLSL().ActOnVariableDeclarator(NewVD);

// FIXME: This is probably the wrong location to be doing this and we should
// probably be doing this for more attributes (especially for function
// pointer attributes such as format, warn_unused_result, etc.). Ideally
Expand Down
5 changes: 3 additions & 2 deletions clang/lib/Sema/SemaExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3598,9 +3598,10 @@ ExprResult Sema::ActOnCharacterConstant(const Token &Tok, Scope *UDLScope) {
Lit, Tok.getLocation());
}

ExprResult Sema::ActOnIntegerConstant(SourceLocation Loc, uint64_t Val) {
ExprResult Sema::ActOnIntegerConstant(SourceLocation Loc, int64_t Val) {
unsigned IntSize = Context.getTargetInfo().getIntWidth();
return IntegerLiteral::Create(Context, llvm::APInt(IntSize, Val),
return IntegerLiteral::Create(Context,
llvm::APInt(IntSize, Val, /*isSigned=*/true),
Context.IntTy, Loc);
}

Expand Down
376 changes: 263 additions & 113 deletions clang/lib/Sema/SemaHLSL.cpp

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion clang/lib/Sema/SemaOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5697,7 +5697,9 @@ StmtResult SemaOpenMP::ActOnOpenMPCanonicalLoop(Stmt *AStmt) {
llvm_unreachable("unhandled unary increment operator");
}
Step = IntegerLiteral::Create(
Ctx, llvm::APInt(Ctx.getIntWidth(LogicalTy), Direction), LogicalTy, {});
Ctx,
llvm::APInt(Ctx.getIntWidth(LogicalTy), Direction, /*isSigned=*/true),
LogicalTy, {});
} else if (auto *IncBin = dyn_cast<BinaryOperator>(Inc)) {
if (IncBin->getOpcode() == BO_AddAssign) {
Step = IncBin->getRHS();
Expand Down
3 changes: 3 additions & 0 deletions clang/test/AST/ast-dump-aarch64-sve-types.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@
// CHECK: TypedefDecl {{.*}} implicit __SVBfloat16_t '__SVBfloat16_t'
// CHECK-NEXT: -BuiltinType {{.*}} '__SVBfloat16_t'

// CHECK: TypedefDecl {{.*}} implicit __SVMfloat8_t '__SVMfloat8_t'
// CHECK-NEXT: -BuiltinType {{.*}} '__SVMfloat8_t'

// CHECK: TypedefDecl {{.*}} implicit __SVBool_t '__SVBool_t'
// CHECK-NEXT: -BuiltinType {{.*}} '__SVBool_t'

Expand Down
2 changes: 2 additions & 0 deletions clang/test/CodeGen/aarch64-sve.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// CHECK: %f16 = alloca <vscale x 8 x half>, align 16
// CHECK: %f32 = alloca <vscale x 4 x float>, align 16
// CHECK: %f64 = alloca <vscale x 2 x double>, align 16
// CHECK: %mf8 = alloca <vscale x 16 x i8>, align 16
// CHECK: %bf16 = alloca <vscale x 8 x bfloat>, align 16
// CHECK: %b8 = alloca <vscale x 16 x i1>, align 2

Expand All @@ -33,6 +34,7 @@ void test_locals(void) {
__SVFloat32_t f32;
__SVFloat64_t f64;

__SVMfloat8_t mf8;
__SVBfloat16_t bf16;

__SVBool_t b8;
Expand Down
42 changes: 42 additions & 0 deletions clang/test/CodeGen/sanitize-coverage-gated-callbacks.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// RUN: %clang %s -target arm64-apple-darwin -emit-llvm -S -fsanitize-coverage=trace-pc-guard -mllvm -sanitizer-coverage-gated-trace-callbacks=1 -o - | FileCheck %s --check-prefixes=CHECK,GATED
// RUN: %clang %s -target arm64-apple-darwin -emit-llvm -S -fsanitize-coverage=trace-pc-guard -mllvm -sanitizer-coverage-gated-trace-callbacks=0 -o - | FileCheck %s --check-prefixes=CHECK,PLAIN
// RUN: not %clang %s -target arm64-apple-darwin -emit-llvm -S -fsanitize-coverage=trace-pc -mllvm -sanitizer-coverage-gated-trace-callbacks=1 -o /dev/null 2>&1 | FileCheck %s --check-prefixes=INCOMPATIBLE
// RUN: not %clang %s -target arm64-apple-darwin -emit-llvm -S -fsanitize-coverage=inline-8bit-counters -mllvm -sanitizer-coverage-gated-trace-callbacks=1 -o /dev/null 2>&1 | FileCheck %s --check-prefixes=INCOMPATIBLE
// RUN: not %clang %s -target arm64-apple-darwin -emit-llvm -S -fsanitize-coverage=inline-bool-flag -mllvm -sanitizer-coverage-gated-trace-callbacks=1 -o /dev/null 2>&1 | FileCheck %s --check-prefixes=INCOMPATIBLE

// Verify that we do not emit the __sancov_gate section for "plain" trace-pc-guard
// GATED: section "__DATA,__sancov_gate"
// PLAIN-NOT: section "__DATA,__sancov_gate"

// Produce an error for all incompatible sanitizer coverage modes.
// INCOMPATIBLE: error: 'sanitizer-coverage-gated-trace-callbacks' is only supported with trace-pc-guard

int x[10];

// CHECK: define{{.*}} void @foo
void foo(int n, int m) {
// COM: Verify that we're emitting the call to __sanitizer_cov_trace_pc_guard upon
// COM: checking the value of __sancov_should_track.
// GATED: [[VAL:%.*]] = load i64, {{.*}}@__sancov_should_track
// GATED-NOT: [[VAL:%.*]] = load i64, i64* @__sancov_should_track
// GATED-NEXT: [[CMP:%.*]] = icmp ne i64 [[VAL]], 0
// GATED-NEXT: br i1 [[CMP]], label %[[L_TRUE:.*]], label %[[L_FALSE:.*]], !prof [[WEIGHTS:!.+]]
// GATED: [[L_TRUE]]:
// GATED-NEXT: call void @__sanitizer_cov_trace_pc_guard
// GATED: br i1 [[CMP]], label %[[L_TRUE_2:.*]], label %[[L_FALSE_2:.*]]
// GATED: [[L_TRUE_2]]:
// GATED-NEXT: call void @__sanitizer_cov_trace_pc_guard
// GATED: [[WEIGHTS]] = !{!"branch_weights", i32 1, i32 100000}

// COM: With the non-gated instrumentation, we should not emit the
// COM: __sancov_should_track global.
// PLAIN-NOT: __sancov_should_track
// But we should still be emitting the calls to the callback.
// PLAIN: call void @__sanitizer_cov_trace_pc_guard
if (n) {
x[n] = 42;
if (m) {
x[m] = 41;
}
}
}
4 changes: 4 additions & 0 deletions clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ void f(__SVFloat16_t, __SVFloat16_t);
void f(__SVFloat32_t, __SVFloat32_t);
void f(__SVFloat64_t, __SVFloat64_t);
void f(__SVBfloat16_t, __SVBfloat16_t);
void f(__SVMfloat8_t, __SVMfloat8_t);
void f(__SVBool_t, __SVBool_t);
void f(__SVCount_t, __SVCount_t);

Expand Down Expand Up @@ -150,6 +151,7 @@ void f(__clang_svboolx4_t, __clang_svboolx4_t);
// CHECK-NEXT: call void @_Z1fu13__SVFloat16_tS_(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer)
// CHECK-NEXT: call void @_Z1fu13__SVFloat32_tS_(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer)
// CHECK-NEXT: call void @_Z1fu13__SVFloat64_tS_(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer)
// CHECK-NEXT: call void @_Z1fu13__SVMfloat8_tS_(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer)
// CHECK-NEXT: call void @_Z1fu14__SVBfloat16_tS_(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer)
// CHECK-NEXT: call void @_Z1fu10__SVBool_tS_(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer)
// CHECK-NEXT: call void @_Z1fu11__SVCount_tS_(target("aarch64.svcount") zeroinitializer, target("aarch64.svcount") zeroinitializer)
Expand Down Expand Up @@ -664,6 +666,7 @@ void f(__clang_svboolx4_t, __clang_svboolx4_t);
// COMPAT_17-NEXT: call void @_Z1fu13__SVFloat16_tu13__SVFloat16_t(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer)
// COMPAT_17-NEXT: call void @_Z1fu13__SVFloat32_tu13__SVFloat32_t(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer)
// COMPAT_17-NEXT: call void @_Z1fu13__SVFloat64_tu13__SVFloat64_t(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer)
// COMPAT_17-NEXT: call void @_Z1fu13__SVMfloat8_tu13__SVMfloat8_t(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer)
// COMPAT_17-NEXT: call void @_Z1fu14__SVBFloat16_tu14__SVBFloat16_t(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer)
// COMPAT_17-NEXT: call void @_Z1fu10__SVBool_tu10__SVBool_t(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer)
// COMPAT_17-NEXT: call void @_Z1fu11__SVCount_tu11__SVCount_t(target("aarch64.svcount") zeroinitializer, target("aarch64.svcount") zeroinitializer)
Expand Down Expand Up @@ -1100,6 +1103,7 @@ void foo() {
f(__SVFloat16_t(), __SVFloat16_t());
f(__SVFloat32_t(), __SVFloat32_t());
f(__SVFloat64_t(), __SVFloat64_t());
f(__SVMfloat8_t(), __SVMfloat8_t());
f(__SVBfloat16_t(), __SVBfloat16_t());
f(__SVBool_t(), __SVBool_t());
f(__SVCount_t(), __SVCount_t());
Expand Down
5 changes: 5 additions & 0 deletions clang/test/CodeGenCXX/aarch64-sve-typeinfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ auto &f64 = typeid(__SVFloat64_t);

auto &bf16 = typeid(__SVBfloat16_t);

auto &mf8 = typeid(__SVMfloat8_t);

auto &b8 = typeid(__SVBool_t);
auto &c8 = typeid(__SVCount_t);

Expand Down Expand Up @@ -60,6 +62,9 @@ auto &c8 = typeid(__SVCount_t);
// CHECK-DAG: @_ZTSu14__SVBfloat16_t = {{.*}} c"u14__SVBfloat16_t\00"
// CHECK-DAG: @_ZTIu14__SVBfloat16_t = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTSu14__SVBfloat16_t

// CHECK-DAG: @_ZTSu13__SVMfloat8_t = {{.*}} c"u13__SVMfloat8_t\00"
// CHECK-DAG: @_ZTIu13__SVMfloat8_t = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTSu13__SVMfloat8_t

// CHECK-DAG: @_ZTSu10__SVBool_t = {{.*}} c"u10__SVBool_t\00"
// CHECK-DAG: @_ZTIu10__SVBool_t = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTSu10__SVBool_t

Expand Down
17 changes: 17 additions & 0 deletions clang/test/CodeGenCXX/aarch64-sve-vector-init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
// CHECK-NEXT: [[U16:%.*]] = alloca <vscale x 8 x i16>, align 16
// CHECK-NEXT: [[U32:%.*]] = alloca <vscale x 4 x i32>, align 16
// CHECK-NEXT: [[U64:%.*]] = alloca <vscale x 2 x i64>, align 16
// CHECK-NEXT: [[MF8:%.*]] = alloca <vscale x 16 x i8>, align 16
// CHECK-NEXT: [[F16:%.*]] = alloca <vscale x 8 x half>, align 16
// CHECK-NEXT: [[F32:%.*]] = alloca <vscale x 4 x float>, align 16
// CHECK-NEXT: [[F64:%.*]] = alloca <vscale x 2 x double>, align 16
Expand Down Expand Up @@ -64,6 +65,7 @@
// CHECK-NEXT: store <vscale x 8 x i16> zeroinitializer, ptr [[U16]], align 16
// CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, ptr [[U32]], align 16
// CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[U64]], align 16
// CHECK-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[MF8]], align 16
// CHECK-NEXT: store <vscale x 8 x half> zeroinitializer, ptr [[F16]], align 16
// CHECK-NEXT: store <vscale x 4 x float> zeroinitializer, ptr [[F32]], align 16
// CHECK-NEXT: store <vscale x 2 x double> zeroinitializer, ptr [[F64]], align 16
Expand Down Expand Up @@ -119,6 +121,7 @@ void test_locals(void) {
__SVUint16_t u16{};
__SVUint32_t u32{};
__SVUint64_t u64{};
__SVMfloat8_t mf8{};
__SVFloat16_t f16{};
__SVFloat32_t f32{};
__SVFloat64_t f64{};
Expand Down Expand Up @@ -282,6 +285,20 @@ void test_copy_u64(__SVUint64_t a) {
__SVUint64_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z13test_copy_mf8u13__SVMfloat8_t
// CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 16 x i8>, align 16
// CHECK-NEXT: [[B:%.*]] = alloca <vscale x 16 x i8>, align 16
// CHECK-NEXT: store <vscale x 16 x i8> [[A]], ptr [[A_ADDR]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i8>, ptr [[A_ADDR]], align 16
// CHECK-NEXT: store <vscale x 16 x i8> [[TMP0]], ptr [[B]], align 16
// CHECK-NEXT: ret void
//
void test_copy_mf8(__SVMfloat8_t a) {
__SVMfloat8_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z13test_copy_f16u13__SVFloat16_t
// CHECK-SAME: (<vscale x 8 x half> [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
Expand Down
3 changes: 3 additions & 0 deletions clang/test/CodeGenObjC/aarch64-sve-types.m
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,8 @@
// CHECK: error: cannot yet @encode type __SVBfloat16_t
const char bf16[] = @encode(__SVBfloat16_t);

// CHECK: error: cannot yet @encode type __SVMfloat8_t
const char mf8[] = @encode(__SVMfloat8_t);

// CHECK: error: cannot yet @encode type __SVBool_t
const char b8[] = @encode(__SVBool_t);
42 changes: 39 additions & 3 deletions clang/test/Driver/aarch64-v96a.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,50 @@
// RUN: %clang -target aarch64 -mlittle-endian -march=armv9.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV96A %s
// RUN: %clang -target aarch64_be -mlittle-endian -march=armv9.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV96A %s
// RUN: %clang -target aarch64_be -mlittle-endian -march=armv9.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV96A %s
// GENERICV96A: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"
// GENERICV96A: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+cmpbr"{{.*}} "-target-feature" "+fprcvt"{{.*}} "-target-feature" "+sve2p2"

// RUN: %clang -target aarch64_be -march=armv9.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV96A-BE %s
// RUN: %clang -target aarch64_be -march=armv9.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV96A-BE %s
// RUN: %clang -target aarch64 -mbig-endian -march=armv9.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV96A-BE %s
// RUN: %clang -target aarch64 -mbig-endian -march=armv9.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV96A-BE %s
// RUN: %clang -target aarch64_be -mbig-endian -march=armv9.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV96A-BE %s
// RUN: %clang -target aarch64_be -mbig-endian -march=armv9.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV96A-BE %s
// GENERICV96A-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"
//
// GENERICV96A-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+cmpbr"{{.*}} "-target-feature" "+fprcvt"{{.*}} "-target-feature" "+sve2p2"

// ===== Features supported on aarch64 =====

// RUN: %clang -target aarch64 -march=armv9.6a+f8f16mm -### -c %s 2>&1 | FileCheck -check-prefix=V96A-F8F16MM %s
// RUN: %clang -target aarch64 -march=armv9.6-a+f8f16mm -### -c %s 2>&1 | FileCheck -check-prefix=V96A-F8F16MM %s
// V96A-F8F16MM: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+f8f16mm"

// RUN: %clang -target aarch64 -march=armv9.6a+f8f32mm -### -c %s 2>&1 | FileCheck -check-prefix=V96A-F8F32MM %s
// RUN: %clang -target aarch64 -march=armv9.6-a+f8f32mm -### -c %s 2>&1 | FileCheck -check-prefix=V96A-F8F32MM %s
// V96A-F8F32MM: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+f8f32mm"

// RUN: %clang -target aarch64 -march=armv9.6a+lsfe -### -c %s 2>&1 | FileCheck -check-prefix=V96A-LSFE %s
// RUN: %clang -target aarch64 -march=armv9.6-a+lsfe -### -c %s 2>&1 | FileCheck -check-prefix=V96A-LSFE %s
// V96A-LSFE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+lsfe"

// RUN: %clang -target aarch64 -march=armv9.6a+sme2p2 -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SME2p2 %s
// RUN: %clang -target aarch64 -march=armv9.6-a+sme2p2 -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SME2p2 %s
// V96A-SME2p2: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+sme2p2"

// RUN: %clang -target aarch64 -march=armv9.6a+ssve-aes -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SSVE-AES %s
// RUN: %clang -target aarch64 -march=armv9.6-a+ssve-aes -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SSVE-AES %s
// V96A-SSVE-AES: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+ssve-aes"

// RUN: %clang -target aarch64 -march=armv9.6a+sve2p2 -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SVE2p2 %s
// RUN: %clang -target aarch64 -march=armv9.6-a+sve2p2 -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SVE2p2 %s
// V96A-SVE2p2: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+sve2p2"

// RUN: %clang -target aarch64 -march=armv9.6a+sve-aes2 -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SVE-AES2 %s
// RUN: %clang -target aarch64 -march=armv9.6-a+sve-aes2 -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SVE-AES2 %s
// V96A-SVE-AES2: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+sve-aes2"

// RUN: %clang -target aarch64 -march=armv9.6a+sve-bfscale -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SVE-BFSCALE %s
// RUN: %clang -target aarch64 -march=armv9.6-a+sve-bfscale -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SVE-BFSCALE %s
// V96A-SVE-BFSCALE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+sve-bfscale"

// RUN: %clang -target aarch64 -march=armv9.6a+sve-f16f32mm -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SVE-F16F32MM %s
// RUN: %clang -target aarch64 -march=armv9.6-a+sve-f16f32mm -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SVE-F16F32MM %s
// V96A-SVE-F16F32MM: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+sve-f16f32mm"
1 change: 0 additions & 1 deletion clang/test/Driver/hip-include-path.hip
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
// REQUIRES: libgcc
// UNSUPPORTED: system-windows

// RUN: %clang -c -### --target=x86_64-unknown-linux-gnu --cuda-gpu-arch=gfx900 \
Expand Down
2 changes: 0 additions & 2 deletions clang/test/Driver/hip-runtime-libs-msvc.hip
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
// REQUIRES: system-windows

// RUN: touch %t.o

// Test HIP runtime lib args specified by --rocm-path.
Expand Down
11 changes: 11 additions & 0 deletions clang/test/Driver/print-supported-extensions-aarch64.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
// CHECK-NEXT: bf16 FEAT_BF16 Enable BFloat16 Extension
// CHECK-NEXT: brbe FEAT_BRBE Enable Branch Record Buffer Extension
// CHECK-NEXT: bti FEAT_BTI Enable Branch Target Identification
// CHECK-NEXT: cmpbr FEAT_CMPBR Enable Armv9.6-A base compare and branch instructions
// CHECK-NEXT: fcma FEAT_FCMA Enable Armv8.3-A Floating-point complex number support
// CHECK-NEXT: cpa FEAT_CPA Enable Armv9.5-A Checked Pointer Arithmetic
// CHECK-NEXT: crc FEAT_CRC32 Enable Armv8.0-A CRC-32 checksum instructions
Expand All @@ -18,6 +19,8 @@
// CHECK-NEXT: dotprod FEAT_DotProd Enable dot product support
// CHECK-NEXT: f32mm FEAT_F32MM Enable Matrix Multiply FP32 Extension
// CHECK-NEXT: f64mm FEAT_F64MM Enable Matrix Multiply FP64 Extension
// CHECK-NEXT: f8f16mm FEAT_F8F16MM Enable Armv9.6-A FP8 to Half-Precision Matrix Multiplication
// CHECK-NEXT: f8f32mm FEAT_F8F32MM Enable Armv9.6-A FP8 to Single-Precision Matrix Multiplication
// CHECK-NEXT: faminmax FEAT_FAMINMAX Enable FAMIN and FAMAX instructions
// CHECK-NEXT: flagm FEAT_FlagM Enable Armv8.4-A Flag Manipulation instructions
// CHECK-NEXT: fp FEAT_FP Enable Armv8.0-A Floating Point Extensions
Expand All @@ -26,6 +29,7 @@
// CHECK-NEXT: fp8dot2 FEAT_FP8DOT2 Enable FP8 2-way dot instructions
// CHECK-NEXT: fp8dot4 FEAT_FP8DOT4 Enable FP8 4-way dot instructions
// CHECK-NEXT: fp8fma FEAT_FP8FMA Enable Armv9.5-A FP8 multiply-add instructions
// CHECK-NEXT: fprcvt FEAT_FPRCVT Enable Armv9.6-A base convert instructions for SIMD&FP scalar register operands of different input and output sizes
// CHECK-NEXT: fp16 FEAT_FP16 Enable half-precision floating-point data processing
// CHECK-NEXT: gcs FEAT_GCS Enable Armv9.4-A Guarded Call Stack Extension
// CHECK-NEXT: hbc FEAT_HBC Enable Armv8.8-A Hinted Conditional Branches Extension
Expand All @@ -35,6 +39,7 @@
// CHECK-NEXT: ls64 FEAT_LS64, FEAT_LS64_V, FEAT_LS64_ACCDATA Enable Armv8.7-A LD64B/ST64B Accelerator Extension
// CHECK-NEXT: lse FEAT_LSE Enable Armv8.1-A Large System Extension (LSE) atomic instructions
// CHECK-NEXT: lse128 FEAT_LSE128 Enable Armv9.4-A 128-bit Atomic instructions
// CHECK-NEXT: lsfe FEAT_LSFE Enable Armv9.6-A base Atomic floating-point in-memory instructions
// CHECK-NEXT: lut FEAT_LUT Enable Lookup Table instructions
// CHECK-NEXT: mops FEAT_MOPS Enable Armv8.8-A memcpy and memset acceleration instructions
// CHECK-NEXT: memtag FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension
Expand Down Expand Up @@ -64,20 +69,26 @@
// CHECK-NEXT: sme-lutv2 FEAT_SME_LUTv2 Enable Scalable Matrix Extension (SME) LUTv2 instructions
// CHECK-NEXT: sme2 FEAT_SME2 Enable Scalable Matrix Extension 2 (SME2) instructions
// CHECK-NEXT: sme2p1 FEAT_SME2p1 Enable Scalable Matrix Extension 2.1 instructions
// CHECK-NEXT: sme2p2 FEAT_SME2p2 Enable Armv9.6-A Scalable Matrix Extension 2.2 instructions
// CHECK-NEXT: profile FEAT_SPE Enable Statistical Profiling extension
// CHECK-NEXT: predres2 FEAT_SPECRES2 Enable Speculation Restriction Instruction
// CHECK-NEXT: ssbs FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit
// CHECK-NEXT: ssve-aes FEAT_SSVE_AES Enable Armv9.6-A SVE2 AES support in streaming SVE mode
// CHECK-NEXT: ssve-fp8dot2 FEAT_SSVE_FP8DOT2 Enable SVE2 FP8 2-way dot product instructions
// CHECK-NEXT: ssve-fp8dot4 FEAT_SSVE_FP8DOT4 Enable SVE2 FP8 4-way dot product instructions
// CHECK-NEXT: ssve-fp8fma FEAT_SSVE_FP8FMA Enable SVE2 FP8 multiply-add instructions
// CHECK-NEXT: sve FEAT_SVE Enable Scalable Vector Extension (SVE) instructions
// CHECK-NEXT: sve-aes2 FEAT_SVE_AES2 Enable Armv9.6-A SVE multi-vector AES and 128-bit PMULL instructions
// CHECK-NEXT: sve-b16b16 FEAT_SVE_B16B16 Enable SVE2 non-widening and SME2 Z-targeting non-widening BFloat16 instructions
// CHECK-NEXT: sve-bfscale FEAT_SVE_BFSCALE Enable Armv9.6-A SVE BFloat16 scaling instructions
// CHECK-NEXT: sve-f16f32mm FEAT_SVE_F16F32MM Enable Armv9.6-A FP16 to FP32 Matrix Multiply
// CHECK-NEXT: sve2 FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions
// CHECK-NEXT: sve2-aes FEAT_SVE_AES, FEAT_SVE_PMULL128 Enable AES SVE2 instructions
// CHECK-NEXT: sve2-bitperm FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions
// CHECK-NEXT: sve2-sha3 FEAT_SVE_SHA3 Enable SHA3 SVE2 instructions
// CHECK-NEXT: sve2-sm4 FEAT_SVE_SM4 Enable SM4 SVE2 instructions
// CHECK-NEXT: sve2p1 FEAT_SVE2p1 Enable Scalable Vector Extension 2.1 instructions
// CHECK-NEXT: sve2p2 FEAT_SVE2p2 Enable Armv9.6-A Scalable Vector Extension 2.2 instructions
// CHECK-NEXT: the FEAT_THE Enable Armv8.9-A Translation Hardening Extension
// CHECK-NEXT: tlbiw FEAT_TLBIW Enable Armv9.5-A TLBI VMALL for Dirty State
// CHECK-NEXT: tme FEAT_TME Enable Transactional Memory Extension
Expand Down
2 changes: 1 addition & 1 deletion clang/test/Modules/no-external-type-id.cppm
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ export module b;
import a;
export int b();

// CHECK: <DECL_FUNCTION {{.*}} op8=4056
// CHECK: <DECL_FUNCTION {{.*}} op8=4064
// CHECK: <TYPE_FUNCTION_PROTO

//--- a.v1.cppm
Expand Down
4 changes: 2 additions & 2 deletions clang/test/Sema/aarch64-neon-target.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ __attribute__((target("arch=armv8.3-a+fp16")))
void test_v83(float32x4_t v4f32, float16x4_t v4f16, float64x2_t v2f64) {
vcaddq_rot90_f32(v4f32, v4f32);
vcmla_rot90_f16(v4f16, v4f16, v4f16);
vcmlaq_rot270_laneq_f64(v2f64, v2f64, v2f64, 1);
vcmlaq_rot270_f64(v2f64, v2f64, v2f64);
}

__attribute__((target("arch=armv8.5-a")))
Expand Down Expand Up @@ -95,7 +95,7 @@ void undefined(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t v8i16, uint8x16_t
// 8.3 - complex
vcaddq_rot90_f32(v4f32, v4f32); // expected-error {{always_inline function 'vcaddq_rot90_f32' requires target feature 'v8.3a'}}
vcmla_rot90_f16(v4f16, v4f16, v4f16); // expected-error {{always_inline function 'vcmla_rot90_f16' requires target feature 'v8.3a'}}
vcmlaq_rot270_laneq_f64(v2f64, v2f64, v2f64, 1); // expected-error {{always_inline function 'vcmlaq_rot270_f64' requires target feature 'v8.3a'}}
vcmlaq_rot270_f64(v2f64, v2f64, v2f64); // expected-error {{always_inline function 'vcmlaq_rot270_f64' requires target feature 'v8.3a'}}
// 8.5 - frint
vrnd32xq_f32(v4f32); // expected-error {{always_inline function 'vrnd32xq_f32' requires target feature 'v8.5a'}}

Expand Down
3 changes: 3 additions & 0 deletions clang/test/Sema/aarch64-sve-types.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ void f(void) {
int size_bf16[sizeof(__SVBfloat16_t) == 0 ? 1 : -1]; // expected-error {{invalid application of 'sizeof' to sizeless type '__SVBfloat16_t'}}
int align_bf16[__alignof__(__SVBfloat16_t) == 16 ? 1 : -1]; // expected-error {{invalid application of '__alignof' to sizeless type '__SVBfloat16_t'}}

int size_mf8[sizeof(__SVMfloat8_t) == 0 ? 1 : -1]; // expected-error {{invalid application of 'sizeof' to sizeless type '__SVMfloat8_t'}}
int align_mf8[__alignof__(__SVMfloat8_t) == 16 ? 1 : -1]; // expected-error {{invalid application of '__alignof' to sizeless type '__SVMfloat8_t'}}

int size_b8[sizeof(__SVBool_t) == 0 ? 1 : -1]; // expected-error {{invalid application of 'sizeof' to sizeless type '__SVBool_t'}}
int align_b8[__alignof__(__SVBool_t) == 2 ? 1 : -1]; // expected-error {{invalid application of '__alignof' to sizeless type '__SVBool_t'}}
}
31 changes: 31 additions & 0 deletions clang/test/Sema/aarch64-vcmla-undef.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -target-feature +v8.3a -ffreestanding -fsyntax-only -verify -verify-ignore-unexpected=note %s

// REQUIRES: aarch64-registered-target

#include <arm_neon.h>

void test(float64x1_t v1f64, float64x2_t v2f64) {
vcmla_f64(v1f64, v1f64, v1f64); // expected-error {{call to undeclared function 'vcmla_f64'}}
vcmla_lane_f64(v1f64, v1f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmla_lane_f64'}}
vcmla_laneq_f64(v1f64, v1f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmla_laneq_f64'}}
vcmlaq_lane_f64(v2f64, v2f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmlaq_lane_f64'}}
vcmlaq_laneq_f64(v2f64, v2f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmlaq_laneq_f64'}}

vcmla_rot90_f64(v1f64, v1f64, v1f64); // expected-error {{call to undeclared function 'vcmla_rot90_f64'}}
vcmla_rot90_lane_f64(v1f64, v1f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmla_rot90_lane_f64'}}
vcmla_rot90_laneq_f64(v1f64, v1f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmla_rot90_laneq_f64'}}
vcmlaq_rot90_lane_f64(v2f64, v2f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmlaq_rot90_lane_f64'}}
vcmlaq_rot90_laneq_f64(v2f64, v2f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmlaq_rot90_laneq_f64'}}

vcmla_rot180_f64(v1f64, v1f64, v1f64); // expected-error {{call to undeclared function 'vcmla_rot180_f64'}}
vcmla_rot180_lane_f64(v1f64, v1f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmla_rot180_lane_f64'}}
vcmla_rot180_laneq_f64(v1f64, v1f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmla_rot180_laneq_f64'}}
vcmlaq_rot180_lane_f64(v2f64, v2f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmlaq_rot180_lane_f64'}}
vcmlaq_rot180_laneq_f64(v2f64, v2f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmlaq_rot180_laneq_f64'}}

vcmla_rot270_f64(v1f64, v1f64, v1f64); // expected-error {{call to undeclared function 'vcmla_rot270_f64'}}
vcmla_rot270_lane_f64(v1f64, v1f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmla_rot270_lane_f64'}}
vcmla_rot270_laneq_f64(v1f64, v1f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmla_rot270_laneq_f64'}}
vcmlaq_rot270_lane_f64(v2f64, v2f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmlaq_rot270_lane_f64'}}
vcmlaq_rot270_laneq_f64(v1f64, v1f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmlaq_rot270_laneq_f64'}}
}
13 changes: 13 additions & 0 deletions clang/test/Sema/arm-mfp8.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// RUN: %clang_cc1 -fsyntax-only -verify=sve -triple aarch64-arm-none-eabi \
// RUN: -target-feature -fp8 -target-feature +sve %s

// REQUIRES: aarch64-registered-target

#include <arm_sve.h>
void test_vector_sve(svmfloat8_t a, svuint8_t c) {
a + c; // sve-error {{cannot convert between vector type 'svuint8_t' (aka '__SVUint8_t') and vector type 'svmfloat8_t' (aka '__SVMfloat8_t') as implicit conversion would cause truncation}}
a - c; // sve-error {{cannot convert between vector type 'svuint8_t' (aka '__SVUint8_t') and vector type 'svmfloat8_t' (aka '__SVMfloat8_t') as implicit conversion would cause truncation}}
a * c; // sve-error {{cannot convert between vector type 'svuint8_t' (aka '__SVUint8_t') and vector type 'svmfloat8_t' (aka '__SVMfloat8_t') as implicit conversion would cause truncation}}
a / c; // sve-error {{cannot convert between vector type 'svuint8_t' (aka '__SVUint8_t') and vector type 'svmfloat8_t' (aka '__SVMfloat8_t') as implicit conversion would cause truncation}}
}

7 changes: 7 additions & 0 deletions clang/test/Sema/constexpr.c
Original file line number Diff line number Diff line change
Expand Up @@ -367,3 +367,10 @@ struct S10 {
constexpr struct S10 c = { 255 };
// FIXME-expected-error@-1 {{constexpr initializer evaluates to 255 which is not exactly representable in 'long long' bit-field with width 8}}
// See: GH#101299

void constexprif() {
if constexpr (300) {} //expected-error {{expected '(' after 'if'}}
}
void constevalif() {
if consteval (300) {} //expected-error {{expected '(' after 'if'}}
}
8 changes: 4 additions & 4 deletions clang/test/SemaHLSL/resource_binding_attr_error_udt.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@ struct Eg12{
MySRV s1;
MySRV s2;
};
// expected-warning@+3{{binding type 'u' only applies to types containing UAV resources}}
// expected-warning@+2{{binding type 'u' only applies to types containing UAV resources}}
// expected-error@+1{{binding type 'u' cannot be applied more than once}}
Eg12 e12 : register(u9) : register(u10);
Expand All @@ -115,12 +114,14 @@ struct Eg13{
MySRV s1;
MySRV s2;
};
// expected-warning@+4{{binding type 'u' only applies to types containing UAV resources}}
// expected-warning@+3{{binding type 'u' only applies to types containing UAV resources}}
// expected-warning@+2{{binding type 'u' only applies to types containing UAV resources}}
// expected-error@+2{{binding type 'u' cannot be applied more than once}}
// expected-error@+1{{binding type 'u' cannot be applied more than once}}
Eg13 e13 : register(u9) : register(u10) : register(u11);

// expected-error@+1{{binding type 't' cannot be applied more than once}}
Eg13 e13_2 : register(t11) : register(t12);

struct Eg14{
MyTemplatedUAV<int> r1;
};
Expand All @@ -132,4 +133,3 @@ struct Eg15 {
};
// expected no error
Eg15 e15 : register(c0);

2 changes: 2 additions & 0 deletions clang/test/SemaObjC/aarch64-sve-types.m
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,7 @@ @interface foo

@property(nullable) __SVBfloat16_t bf16; // expected-error {{cannot be applied to non-pointer type}}

@property(nullable) __SVMfloat8_t mf8; // expected-error {{cannot be applied to non-pointer type}}

@property(nullable) __SVBool_t b8; // expected-error {{cannot be applied to non-pointer type}}
@end
41 changes: 33 additions & 8 deletions clang/utils/TableGen/SveEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ using TypeSpec = std::string;

namespace {
class SVEType {
bool Float, Signed, Immediate, Void, Constant, Pointer, BFloat;
bool Float, Signed, Immediate, Void, Constant, Pointer, BFloat, MFloat;
bool DefaultType, IsScalable, Predicate, PredicatePattern, PrefetchOp,
Svcount;
unsigned Bitwidth, ElementBitwidth, NumVectors;
Expand All @@ -61,10 +61,10 @@ class SVEType {

SVEType(StringRef TS, char CharMod, unsigned NumVectors = 1)
: Float(false), Signed(true), Immediate(false), Void(false),
Constant(false), Pointer(false), BFloat(false), DefaultType(false),
IsScalable(true), Predicate(false), PredicatePattern(false),
PrefetchOp(false), Svcount(false), Bitwidth(128), ElementBitwidth(~0U),
NumVectors(NumVectors) {
Constant(false), Pointer(false), BFloat(false), MFloat(false),
DefaultType(false), IsScalable(true), Predicate(false),
PredicatePattern(false), PrefetchOp(false), Svcount(false),
Bitwidth(128), ElementBitwidth(~0U), NumVectors(NumVectors) {
if (!TS.empty())
applyTypespec(TS);
applyModifier(CharMod);
Expand All @@ -82,11 +82,14 @@ class SVEType {
bool isVector() const { return NumVectors > 0; }
bool isScalableVector() const { return isVector() && IsScalable; }
bool isFixedLengthVector() const { return isVector() && !IsScalable; }
bool isChar() const { return ElementBitwidth == 8; }
bool isChar() const { return ElementBitwidth == 8 && !MFloat; }
bool isVoid() const { return Void && !Pointer; }
bool isDefault() const { return DefaultType; }
bool isFloat() const { return Float && !BFloat; }
bool isBFloat() const { return BFloat && !Float; }
bool isFloat() const { return Float && !BFloat && !MFloat; }
bool isBFloat() const { return BFloat && !Float && !MFloat; }
bool isMFloat() const {
return MFloat && !BFloat && !Float;
}
bool isFloatingPoint() const { return Float || BFloat; }
bool isInteger() const {
return !isFloatingPoint() && !Predicate && !Svcount;
Expand Down Expand Up @@ -454,6 +457,9 @@ std::string SVEType::builtin_str() const {
else if (isBFloat()) {
assert(ElementBitwidth == 16 && "Not a valid BFloat.");
S += "y";
} else if (isMFloat()) {
assert(ElementBitwidth == 8 && "Not a valid MFloat.");
S += "m";
}

if (!isFloatingPoint()) {
Expand Down Expand Up @@ -509,6 +515,8 @@ std::string SVEType::str() const {
S += "bool";
else if (isBFloat())
S += "bfloat";
else if (isMFloat())
S += "mfloat";
else
S += "int";

Expand Down Expand Up @@ -572,8 +580,16 @@ void SVEType::applyTypespec(StringRef TS) {
case 'b':
BFloat = true;
Float = false;
MFloat = false;
ElementBitwidth = 16;
break;
case 'm':
Signed = false;
MFloat = true;
Float = false;
BFloat = false;
ElementBitwidth = 8;
break;
default:
llvm_unreachable("Unhandled type code!");
}
Expand Down Expand Up @@ -1037,6 +1053,8 @@ std::string Intrinsic::replaceTemplatedArgs(std::string Name, TypeSpec TS,
TypeCode = 'b';
else if (T.isBFloat())
TypeCode = "bf";
else if (T.isMFloat())
TypeCode = "mfp";
else
TypeCode = 'f';
Ret.replace(Pos, NumChars, TypeCode + utostr(T.getElementSizeInBits()));
Expand Down Expand Up @@ -1130,6 +1148,11 @@ uint64_t SVEEmitter::encodeTypeFlags(const SVEType &T) {
return encodeEltType("EltTyBFloat16");
}

if (T.isMFloat()) {
assert(T.getElementSizeInBits() == 8 && "Not a valid MFloat.");
return encodeEltType("EltTyMFloat8");
}

if (T.isPredicateVector() || T.isSvcount()) {
switch (T.getElementSizeInBits()) {
case 8:
Expand Down Expand Up @@ -1305,6 +1328,8 @@ void SVEEmitter::createHeader(raw_ostream &OS) {
OS << "#include <arm_bf16.h>\n";
OS << "#include <arm_vector_types.h>\n";

OS << "typedef __SVMfloat8_t svmfloat8_t;\n\n";

OS << "typedef __SVFloat32_t svfloat32_t;\n";
OS << "typedef __SVFloat64_t svfloat64_t;\n";
OS << "typedef __clang_svint8x2_t svint8x2_t;\n";
Expand Down
28 changes: 14 additions & 14 deletions compiler-rt/lib/fuzzer/FuzzerExtFunctionsWindows.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ using namespace fuzzer;
#define STRINGIFY(A) STRINGIFY_(A)

#if LIBFUZZER_MSVC
#define GET_FUNCTION_ADDRESS(fn) &fn
#else
#define GET_FUNCTION_ADDRESS(fn) __builtin_function_start(fn)
#endif // LIBFUZER_MSVC

// Copied from compiler-rt/lib/sanitizer_common/sanitizer_win_defs.h
#if defined(_M_IX86) || defined(__i386__)
#define WIN_SYM_PREFIX "_"
Expand All @@ -31,17 +36,9 @@ using namespace fuzzer;

// Declare external functions as having alternativenames, so that we can
// determine if they are not defined.
#define EXTERNAL_FUNC(Name, Default) \
__pragma(comment(linker, "/alternatename:" WIN_SYM_PREFIX STRINGIFY( \
#define EXTERNAL_FUNC(Name, Default) \
__pragma(comment(linker, "/alternatename:" WIN_SYM_PREFIX STRINGIFY( \
Name) "=" WIN_SYM_PREFIX STRINGIFY(Default)))
#else
// Declare external functions as weak to allow them to default to a specified
// function if not defined explicitly. We must use weak symbols because clang's
// support for alternatename is not 100%, see
// https://bugs.llvm.org/show_bug.cgi?id=40218 for more details.
#define EXTERNAL_FUNC(Name, Default) \
__attribute__((weak, alias(STRINGIFY(Default))))
#endif // LIBFUZZER_MSVC

extern "C" {
#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \
Expand All @@ -57,20 +54,23 @@ extern "C" {
}

template <typename T>
static T *GetFnPtr(T *Fun, T *FunDef, const char *FnName, bool WarnIfMissing) {
static T *GetFnPtr(void *Fun, void *FunDef, const char *FnName,
bool WarnIfMissing) {
if (Fun == FunDef) {
if (WarnIfMissing)
Printf("WARNING: Failed to find function \"%s\".\n", FnName);
return nullptr;
}
return Fun;
return (T *)Fun;
}

namespace fuzzer {

ExternalFunctions::ExternalFunctions() {
#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \
this->NAME = GetFnPtr<decltype(::NAME)>(::NAME, ::NAME##Def, #NAME, WARN);
#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \
this->NAME = GetFnPtr<decltype(::NAME)>(GET_FUNCTION_ADDRESS(::NAME), \
GET_FUNCTION_ADDRESS(::NAME##Def), \
#NAME, WARN);

#include "FuzzerExtFunctions.def"

Expand Down
214 changes: 114 additions & 100 deletions compiler-rt/lib/lsan/lsan_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -399,26 +399,123 @@ static void ProcessThreadRegistry(Frontier *frontier) {
}

// Scans thread data (stacks and TLS) for heap pointers.
static void ProcessThread(tid_t os_id, uptr sp,
const InternalMmapVector<uptr> &registers,
InternalMmapVector<Range> &extra_ranges,
Frontier *frontier) {
// `extra_ranges` is outside of the function and the loop to reused mapped
// memory.
CHECK(extra_ranges.empty());
LOG_THREADS("Processing thread %llu.\n", os_id);
uptr stack_begin, stack_end, tls_begin, tls_end, cache_begin, cache_end;
DTLS *dtls;
bool thread_found =
GetThreadRangesLocked(os_id, &stack_begin, &stack_end, &tls_begin,
&tls_end, &cache_begin, &cache_end, &dtls);
if (!thread_found) {
// If a thread can't be found in the thread registry, it's probably in the
// process of destruction. Log this event and move on.
LOG_THREADS("Thread %llu not found in registry.\n", os_id);
return;
}

if (!sp)
sp = stack_begin;

if (flags()->use_registers) {
uptr registers_begin = reinterpret_cast<uptr>(registers.data());
uptr registers_end =
reinterpret_cast<uptr>(registers.data() + registers.size());
ScanRangeForPointers(registers_begin, registers_end, frontier, "REGISTERS",
kReachable);
}

if (flags()->use_stacks) {
LOG_THREADS("Stack at %p-%p (SP = %p).\n", (void *)stack_begin,
(void *)stack_end, (void *)sp);
if (sp < stack_begin || sp >= stack_end) {
// SP is outside the recorded stack range (e.g. the thread is running a
// signal handler on alternate stack, or swapcontext was used).
// Again, consider the entire stack range to be reachable.
LOG_THREADS("WARNING: stack pointer not in stack range.\n");
uptr page_size = GetPageSizeCached();
int skipped = 0;
while (stack_begin < stack_end &&
!IsAccessibleMemoryRange(stack_begin, 1)) {
skipped++;
stack_begin += page_size;
}
LOG_THREADS("Skipped %d guard page(s) to obtain stack %p-%p.\n", skipped,
(void *)stack_begin, (void *)stack_end);
} else {
// Shrink the stack range to ignore out-of-scope values.
stack_begin = sp;
}
ScanRangeForPointers(stack_begin, stack_end, frontier, "STACK", kReachable);
GetThreadExtraStackRangesLocked(os_id, &extra_ranges);
ScanExtraStackRanges(extra_ranges, frontier);
}

if (flags()->use_tls) {
if (tls_begin) {
LOG_THREADS("TLS at %p-%p.\n", (void *)tls_begin, (void *)tls_end);
// If the tls and cache ranges don't overlap, scan full tls range,
// otherwise, only scan the non-overlapping portions
if (cache_begin == cache_end || tls_end < cache_begin ||
tls_begin > cache_end) {
ScanRangeForPointers(tls_begin, tls_end, frontier, "TLS", kReachable);
} else {
if (tls_begin < cache_begin)
ScanRangeForPointers(tls_begin, cache_begin, frontier, "TLS",
kReachable);
if (tls_end > cache_end)
ScanRangeForPointers(cache_end, tls_end, frontier, "TLS", kReachable);
}
}
# if SANITIZER_ANDROID
auto *cb = +[](void *dtls_begin, void *dtls_end, uptr /*dso_idd*/,
void *arg) -> void {
ScanRangeForPointers(
reinterpret_cast<uptr>(dtls_begin), reinterpret_cast<uptr>(dtls_end),
reinterpret_cast<Frontier *>(arg), "DTLS", kReachable);
};

// FIXME: There might be a race-condition here (and in Bionic) if the
// thread is suspended in the middle of updating its DTLS. IOWs, we
// could scan already freed memory. (probably fine for now)
__libc_iterate_dynamic_tls(os_id, cb, frontier);
# else
if (dtls && !DTLSInDestruction(dtls)) {
ForEachDVT(dtls, [&](const DTLS::DTV &dtv, int id) {
uptr dtls_beg = dtv.beg;
uptr dtls_end = dtls_beg + dtv.size;
if (dtls_beg < dtls_end) {
LOG_THREADS("DTLS %d at %p-%p.\n", id, (void *)dtls_beg,
(void *)dtls_end);
ScanRangeForPointers(dtls_beg, dtls_end, frontier, "DTLS",
kReachable);
}
});
} else {
// We are handling a thread with DTLS under destruction. Log about
// this and continue.
LOG_THREADS("Thread %llu has DTLS under destruction.\n", os_id);
}
# endif
}
}

static void ProcessThreads(SuspendedThreadsList const &suspended_threads,
Frontier *frontier, tid_t caller_tid,
uptr caller_sp) {
InternalMmapVector<uptr> registers;
InternalMmapVector<Range> extra_ranges;
for (uptr i = 0; i < suspended_threads.ThreadCount(); i++) {
const tid_t os_id = static_cast<tid_t>(suspended_threads.GetThreadID(i));
LOG_THREADS("Processing thread %llu.\n", os_id);
uptr stack_begin, stack_end, tls_begin, tls_end, cache_begin, cache_end;
DTLS *dtls;
bool thread_found =
GetThreadRangesLocked(os_id, &stack_begin, &stack_end, &tls_begin,
&tls_end, &cache_begin, &cache_end, &dtls);
if (!thread_found) {
// If a thread can't be found in the thread registry, it's probably in the
// process of destruction. Log this event and move on.
LOG_THREADS("Thread %llu not found in registry.\n", os_id);
continue;
}
uptr sp;
registers.clear();
extra_ranges.clear();

const tid_t os_id = suspended_threads.GetThreadID(i);
uptr sp = 0;
PtraceRegistersStatus have_registers =
suspended_threads.GetRegistersAndSP(i, &registers, &sp);
if (have_registers != REGISTERS_AVAILABLE) {
Expand All @@ -427,96 +524,13 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads,
// GetRegistersAndSP failed with ESRCH.
if (have_registers == REGISTERS_UNAVAILABLE_FATAL)
continue;
sp = stack_begin;
sp = 0;
}

if (os_id == caller_tid)
sp = caller_sp;

if (flags()->use_registers && have_registers) {
uptr registers_begin = reinterpret_cast<uptr>(registers.data());
uptr registers_end =
reinterpret_cast<uptr>(registers.data() + registers.size());
ScanRangeForPointers(registers_begin, registers_end, frontier,
"REGISTERS", kReachable);
}

if (flags()->use_stacks) {
LOG_THREADS("Stack at %p-%p (SP = %p).\n", (void *)stack_begin,
(void *)stack_end, (void *)sp);
if (sp < stack_begin || sp >= stack_end) {
// SP is outside the recorded stack range (e.g. the thread is running a
// signal handler on alternate stack, or swapcontext was used).
// Again, consider the entire stack range to be reachable.
LOG_THREADS("WARNING: stack pointer not in stack range.\n");
uptr page_size = GetPageSizeCached();
int skipped = 0;
while (stack_begin < stack_end &&
!IsAccessibleMemoryRange(stack_begin, 1)) {
skipped++;
stack_begin += page_size;
}
LOG_THREADS("Skipped %d guard page(s) to obtain stack %p-%p.\n",
skipped, (void *)stack_begin, (void *)stack_end);
} else {
// Shrink the stack range to ignore out-of-scope values.
stack_begin = sp;
}
ScanRangeForPointers(stack_begin, stack_end, frontier, "STACK",
kReachable);
extra_ranges.clear();
GetThreadExtraStackRangesLocked(os_id, &extra_ranges);
ScanExtraStackRanges(extra_ranges, frontier);
}

if (flags()->use_tls) {
if (tls_begin) {
LOG_THREADS("TLS at %p-%p.\n", (void *)tls_begin, (void *)tls_end);
// If the tls and cache ranges don't overlap, scan full tls range,
// otherwise, only scan the non-overlapping portions
if (cache_begin == cache_end || tls_end < cache_begin ||
tls_begin > cache_end) {
ScanRangeForPointers(tls_begin, tls_end, frontier, "TLS", kReachable);
} else {
if (tls_begin < cache_begin)
ScanRangeForPointers(tls_begin, cache_begin, frontier, "TLS",
kReachable);
if (tls_end > cache_end)
ScanRangeForPointers(cache_end, tls_end, frontier, "TLS",
kReachable);
}
}
# if SANITIZER_ANDROID
auto *cb = +[](void *dtls_begin, void *dtls_end, uptr /*dso_idd*/,
void *arg) -> void {
ScanRangeForPointers(reinterpret_cast<uptr>(dtls_begin),
reinterpret_cast<uptr>(dtls_end),
reinterpret_cast<Frontier *>(arg), "DTLS",
kReachable);
};

// FIXME: There might be a race-condition here (and in Bionic) if the
// thread is suspended in the middle of updating its DTLS. IOWs, we
// could scan already freed memory. (probably fine for now)
__libc_iterate_dynamic_tls(os_id, cb, frontier);
# else
if (dtls && !DTLSInDestruction(dtls)) {
ForEachDVT(dtls, [&](const DTLS::DTV &dtv, int id) {
uptr dtls_beg = dtv.beg;
uptr dtls_end = dtls_beg + dtv.size;
if (dtls_beg < dtls_end) {
LOG_THREADS("DTLS %d at %p-%p.\n", id, (void *)dtls_beg,
(void *)dtls_end);
ScanRangeForPointers(dtls_beg, dtls_end, frontier, "DTLS",
kReachable);
}
});
} else {
// We are handling a thread with DTLS under destruction. Log about
// this and continue.
LOG_THREADS("Thread %llu has DTLS under destruction.\n", os_id);
}
# endif
}
ProcessThread(os_id, sp, registers, extra_ranges, frontier);
}

// Add pointers reachable from ThreadContexts
Expand Down
3 changes: 2 additions & 1 deletion compiler-rt/lib/profile/InstrProfilingMerge.c
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,8 @@ int __llvm_profile_merge_from_buffer(const char *ProfileData,
SrcCountersStart = (char *)SrcDataEnd;
SrcCountersEnd = SrcCountersStart +
Header->NumCounters * __llvm_profile_counter_entry_size();
SrcBitmapStart = SrcCountersEnd;
SrcBitmapStart = SrcCountersEnd + __llvm_profile_get_num_padding_bytes(
SrcCountersEnd - SrcCountersStart);
SrcNameStart = SrcBitmapStart + Header->NumBitmapBytes;
SrcValueProfDataStart =
SrcNameStart + getDistanceFromCounterToValueProf(Header);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ TEST(SanitizerCommon, IsAccessibleMemoryRange) {
EXPECT_TRUE(IsAccessibleMemoryRange(mem + 2 * page_size, page_size));
EXPECT_FALSE(IsAccessibleMemoryRange(mem, 3 * page_size));
EXPECT_FALSE(IsAccessibleMemoryRange(0x0, 2));

munmap((void *)mem, 3 * page_size);
}

} // namespace __sanitizer
Expand Down
3 changes: 0 additions & 3 deletions compiler-rt/test/ubsan/TestCases/Misc/Posix/ubsan_options.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
// RUN: %clangxx -fsanitize=integer -fsanitize-recover=integer %s -o %t
// RUN: not %run %t 2>&1 | FileCheck %s

// __ubsan_default_options() doesn't work on Darwin.
// XFAIL: darwin

#include <stdint.h>

extern "C" const char *__ubsan_default_options() {
Expand Down
14 changes: 1 addition & 13 deletions flang/lib/Optimizer/CodeGen/CodeGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3373,19 +3373,7 @@ struct AbsentOpConversion : public fir::FIROpConversion<fir::AbsentOp> {
matchAndRewrite(fir::AbsentOp absent, OpAdaptor,
mlir::ConversionPatternRewriter &rewriter) const override {
mlir::Type ty = convertType(absent.getType());
mlir::Location loc = absent.getLoc();

if (mlir::isa<fir::BoxCharType>(absent.getType())) {
auto structTy = mlir::cast<mlir::LLVM::LLVMStructType>(ty);
assert(!structTy.isOpaque() && !structTy.getBody().empty());
auto undefStruct = rewriter.create<mlir::LLVM::UndefOp>(loc, ty);
auto nullField =
rewriter.create<mlir::LLVM::ZeroOp>(loc, structTy.getBody()[0]);
rewriter.replaceOpWithNewOp<mlir::LLVM::InsertValueOp>(
absent, undefStruct, nullField, 0);
} else {
rewriter.replaceOpWithNewOp<mlir::LLVM::ZeroOp>(absent, ty);
}
rewriter.replaceOpWithNewOp<mlir::LLVM::ZeroOp>(absent, ty);
return mlir::success();
}
};
Expand Down
2 changes: 1 addition & 1 deletion flang/test/Fir/optional.fir
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func.func @foo3(%arg0: !fir.boxchar<1>) -> i1 {
// CHECK-LABEL: @bar3
func.func @bar3() -> i1 {
%0 = fir.absent !fir.boxchar<1>
// CHECK: call i1 @foo3(ptr null, i64 undef)
// CHECK: call i1 @foo3(ptr null, i64 0)
%1 = fir.call @foo3(%0) : (!fir.boxchar<1>) -> i1
return %1 : i1
}
Expand Down
8 changes: 5 additions & 3 deletions lldb/docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,11 @@ are welcome:
expected to work, with functionality improving rapidly. ARM and AArch64 support
is more experimental, with more known issues than the others.

RISC-V support is in active development, refer to the
`tracking issue <https://github.com/llvm/llvm-project/issues/55383>`_
for the current status.
Support for the following architectures is in active development. For their
current state, follow the links to their respective issues:

* `RISC-V <https://github.com/llvm/llvm-project/issues/55383>`_
* `LoongArch <https://github.com/llvm/llvm-project/issues/112693>`_

Get Involved
------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1267,7 +1267,7 @@ def run_vscode(dbg, args, options):
def main():
parser = optparse.OptionParser(
description=(
"A testing framework for the Visual Studio Code Debug " "Adaptor protocol"
"A testing framework for the Visual Studio Code Debug Adaptor protocol"
)
)

Expand Down
8 changes: 5 additions & 3 deletions lldb/source/Expression/DWARFExpression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -860,10 +860,12 @@ llvm::Expected<Value> DWARFExpression::Evaluate(
// TODO: Implement a real typed stack, and store the genericness of the value
// there.
auto to_generic = [&](auto v) {
// TODO: Avoid implicit trunc?
// See https://github.com/llvm/llvm-project/issues/112510.
bool is_signed = std::is_signed<decltype(v)>::value;
return Scalar(llvm::APSInt(
llvm::APInt(8 * opcodes.GetAddressByteSize(), v, is_signed),
!is_signed));
return Scalar(llvm::APSInt(llvm::APInt(8 * opcodes.GetAddressByteSize(), v,
is_signed, /*implicitTrunc=*/true),
!is_signed));
};

// The default kind is a memory location. This is updated by any
Expand Down
1 change: 1 addition & 0 deletions lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5065,6 +5065,7 @@ lldb::Encoding TypeSystemClang::GetEncoding(lldb::opaque_compiler_type_t type,
case clang::BuiltinType::SveUint64x2:
case clang::BuiltinType::SveUint64x3:
case clang::BuiltinType::SveUint64x4:
case clang::BuiltinType::SveMFloat8:
case clang::BuiltinType::SveFloat16:
case clang::BuiltinType::SveBFloat16:
case clang::BuiltinType::SveBFloat16x2:
Expand Down
3 changes: 3 additions & 0 deletions lldb/test/API/tools/lldb-dap/send-event/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
C_SOURCES := main.c

include Makefile.rules
67 changes: 67 additions & 0 deletions lldb/test/API/tools/lldb-dap/send-event/TestDAP_sendEvent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""
Test lldb-dap send-event integration.
"""

import json

from lldbsuite.test.decorators import *
from lldbsuite.test.lldbtest import *
import lldbdap_testcase


class TestDAP_sendEvent(lldbdap_testcase.DAPTestCaseBase):
def test_send_event(self):
"""
Test sending a custom event.
"""
program = self.getBuildArtifact("a.out")
source = "main.c"
custom_event_body = {
"key": 321,
"arr": [True],
}
self.build_and_launch(
program,
stopCommands=[
"lldb-dap send-event my-custom-event-no-body",
"lldb-dap send-event my-custom-event '{}'".format(
json.dumps(custom_event_body)
),
],
)

breakpoint_line = line_number(source, "// breakpoint")

self.set_source_breakpoints(source, [breakpoint_line])
self.continue_to_next_stop()

custom_event = self.dap_server.wait_for_event(
filter=["my-custom-event-no-body"]
)
self.assertEquals(custom_event["event"], "my-custom-event-no-body")
self.assertIsNone(custom_event.get("body", None))

custom_event = self.dap_server.wait_for_event(filter=["my-custom-event"])
self.assertEquals(custom_event["event"], "my-custom-event")
self.assertEquals(custom_event["body"], custom_event_body)

def test_send_internal_event(self):
"""
Test sending an internal event produces an error.
"""
program = self.getBuildArtifact("a.out")
source = "main.c"
self.build_and_launch(program)

breakpoint_line = line_number(source, "// breakpoint")

self.set_source_breakpoints(source, [breakpoint_line])
self.continue_to_next_stop()

resp = self.dap_server.request_evaluate(
"`lldb-dap send-event stopped", context="repl"
)
self.assertRegex(
resp["body"]["result"],
r"Invalid use of lldb-dap send-event, event \"stopped\" should be handled by lldb-dap internally.",
)
6 changes: 6 additions & 0 deletions lldb/test/API/tools/lldb-dap/send-event/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#include <stdio.h>

int main(int argc, char const *argv[]) {
printf("example\n"); // breakpoint 1
return 0;
}
16 changes: 3 additions & 13 deletions lldb/tools/debugserver/source/RNBRemote.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,6 @@ void append_hexified_string(std::ostream &ostrm, const std::string &string) {
}
}

extern void ASLLogCallback(void *baton, uint32_t flags, const char *format,
va_list args);

// from System.framework/Versions/B/PrivateHeaders/sys/codesign.h
extern "C" {
#define CS_OPS_STATUS 0 /* return status */
Expand Down Expand Up @@ -1773,8 +1770,6 @@ static std::string get_value(std::string &line) {

extern void FileLogCallback(void *baton, uint32_t flags, const char *format,
va_list args);
extern void ASLLogCallback(void *baton, uint32_t flags, const char *format,
va_list args);

rnb_err_t RNBRemote::HandlePacket_qRcmd(const char *p) {
const char *c = p + strlen("qRcmd,");
Expand Down Expand Up @@ -1809,8 +1804,8 @@ rnb_err_t RNBRemote::HandlePacket_qRcmd(const char *p) {
static_cast<uint32_t>(strtoul(value.c_str(), &end, 0));
if (errno == 0 && end && *end == '\0') {
DNBLogSetLogMask(logmask);
if (!DNBLogGetLogCallback())
DNBLogSetLogCallback(ASLLogCallback, NULL);
if (auto log_callback = OsLogger::GetLogFunction())
DNBLogSetLogCallback(log_callback, nullptr);
return SendPacket("OK");
}
errno = 0;
Expand Down Expand Up @@ -2177,13 +2172,8 @@ rnb_err_t set_logging(const char *p) {
// Enable DNB logging.
// Use the existing log callback if one was already configured.
if (!DNBLogGetLogCallback()) {
// Use the os_log()-based logger if available; otherwise,
// fallback to ASL.
auto log_callback = OsLogger::GetLogFunction();
if (log_callback)
if (auto log_callback = OsLogger::GetLogFunction())
DNBLogSetLogCallback(log_callback, nullptr);
else
DNBLogSetLogCallback(ASLLogCallback, nullptr);
}

// Update logging to use the configured log channel bitmask.
Expand Down
8 changes: 0 additions & 8 deletions lldb/tools/debugserver/source/libdebugserver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -311,13 +311,6 @@ RNBRunLoopMode RNBRunLoopInferiorExecuting(RNBRemoteSP &remote) {
return mode;
}

void ASLLogCallback(void *baton, uint32_t flags, const char *format,
va_list args) {
#if 0
vprintf(format, args);
#endif
}

extern "C" int debug_server_main(int fd) {
#if 1
g_isatty = 0;
Expand All @@ -327,7 +320,6 @@ extern "C" int debug_server_main(int fd) {
DNBLogSetDebug(1);
DNBLogSetVerbose(1);
DNBLogSetLogMask(-1);
DNBLogSetLogCallback(ASLLogCallback, NULL);
#endif

signal(SIGPIPE, signal_handler);
Expand Down
62 changes: 62 additions & 0 deletions lldb/tools/lldb-dap/DAP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -962,6 +962,68 @@ bool ReplModeRequestHandler::DoExecute(lldb::SBDebugger debugger,
return true;
}

// Sends a DAP event with an optional body.
//
// See
// https://code.visualstudio.com/api/references/vscode-api#debug.onDidReceiveDebugSessionCustomEvent
bool SendEventRequestHandler::DoExecute(lldb::SBDebugger debugger,
char **command,
lldb::SBCommandReturnObject &result) {
// Command format like: `send-event <name> <body>?`
if (!command || !command[0] || llvm::StringRef(command[0]).empty()) {
result.SetError("Not enough arguments found, expected format "
"`lldb-dap send-event <name> <body>?`.");
return false;
}

llvm::StringRef name{command[0]};
// Events that are stateful and should be handled by lldb-dap internally.
const std::array internal_events{"breakpoint", "capabilities", "continued",
"exited", "initialize", "loadedSource",
"module", "process", "stopped",
"terminated", "thread"};
if (std::find(internal_events.begin(), internal_events.end(), name) !=
std::end(internal_events)) {
std::string msg =
llvm::formatv("Invalid use of lldb-dap send-event, event \"{0}\" "
"should be handled by lldb-dap internally.",
name)
.str();
result.SetError(msg.c_str());
return false;
}

llvm::json::Object event(CreateEventObject(name));

if (command[1] && !llvm::StringRef(command[1]).empty()) {
// See if we have unused arguments.
if (command[2]) {
result.SetError(
"Additional arguments found, expected `lldb-dap send-event "
"<name> <body>?`.");
return false;
}

llvm::StringRef raw_body{command[1]};

llvm::Expected<llvm::json::Value> body = llvm::json::parse(raw_body);

if (!body) {
llvm::Error err = body.takeError();
std::string msg = "Failed to parse custom event body: " +
llvm::toString(std::move(err));
result.SetError(msg.c_str());
return false;
}

event.try_emplace("body", std::move(*body));
}

g_dap.SendJSON(llvm::json::Value(std::move(event)));
result.SetStatus(lldb::eReturnStatusSuccessFinishNoResult);
return true;
}

void DAP::SetFrameFormat(llvm::StringRef format) {
if (format.empty())
return;
Expand Down
5 changes: 5 additions & 0 deletions lldb/tools/lldb-dap/DAP.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,11 @@ struct ReplModeRequestHandler : public lldb::SBCommandPluginInterface {
lldb::SBCommandReturnObject &result) override;
};

struct SendEventRequestHandler : public lldb::SBCommandPluginInterface {
bool DoExecute(lldb::SBDebugger debugger, char **command,
lldb::SBCommandReturnObject &result) override;
};

struct DAP {
std::string debug_adaptor_path;
InputStream input;
Expand Down
31 changes: 31 additions & 0 deletions lldb/tools/lldb-dap/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,37 @@ The initial repl-mode can be configured with the cli flag `--repl-mode=<mode>`
and may also be adjusted at runtime using the lldb command
`lldb-dap repl-mode <mode>`.

#### `lldb-dap send-event`

lldb-dap includes a command to trigger a Debug Adapter Protocol event
from a script.

The event maybe a custom DAP event or a standard event, if the event is not
handled internally by `lldb-dap`.

This command has the format:

```
lldb-dap send-event <name> <body>?
```

For example you can use a launch configuration hook to trigger custom events like:

```json
{
"program": "exe",
"stopCommands": [
"lldb-dap send-event MyStopEvent",
"lldb-dap send-event MyStopEvent '{\"key\": 321}",
]
}
```

[See the specification](https://microsoft.github.io/debug-adapter-protocol/specification#Base_Protocol_Event)
for more details on Debug Adapter Protocol events and the VS Code
[debug.onDidReceiveDebugSessionCustomEvent](https://code.visualstudio.com/api/references/vscode-api#debug.onDidReceiveDebugSessionCustomEvent)
API for handling a custom event from an extension.

## Contributing

`lldb-dap` and `lldb` are developed under the umbrella of the [LLVM project](https://llvm.org/).
Expand Down
2 changes: 2 additions & 0 deletions lldb/tools/lldb-dap/lldb-dap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1896,6 +1896,8 @@ void request_initialize(const llvm::json::Object &request) {
cmd.AddCommand(
"repl-mode", new ReplModeRequestHandler(),
"Get or set the repl behavior of lldb-dap evaluation requests.");
cmd.AddCommand("send-event", new SendEventRequestHandler(),
"Sends an DAP event to the client.");

g_dap.progress_event_thread = std::thread(ProgressEventThreadFunction);

Expand Down
6 changes: 6 additions & 0 deletions llvm/docs/ReleaseNotes.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,12 @@ Changes to the ARM Backend
the required alignment space with a sequence of `0x0` bytes (the requested
fill value) rather than NOPs.

* The default behavior for frame pointers in leaf functions has been updated.
When the `-fno-omit-frame-pointer` option is specified, `FPKeepKindStr` is
set to `-mframe-pointer=all`, meaning the frame pointer (FP) is now retained
in leaf functions by default. To eliminate the frame pointer in leaf functions,
you must explicitly use the `-momit-leaf-frame-pointer` option.

Changes to the AVR Backend
--------------------------

Expand Down
4 changes: 3 additions & 1 deletion llvm/include/llvm/ADT/APFixedPoint.h
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,9 @@ class APFixedPoint {
}

APFixedPoint(uint64_t Val, const FixedPointSemantics &Sema)
: APFixedPoint(APInt(Sema.getWidth(), Val, Sema.isSigned()), Sema) {}
: APFixedPoint(APInt(Sema.getWidth(), Val, Sema.isSigned(),
/*implicitTrunc=*/true),
Sema) {}

// Zero initialization.
APFixedPoint(const FixedPointSemantics &Sema) : APFixedPoint(0, Sema) {}
Expand Down
6 changes: 0 additions & 6 deletions llvm/include/llvm/CodeGen/TargetFrameLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -277,12 +277,6 @@ class TargetFrameLowering {
return false;
}

/// Return true if the target wants to keep the frame pointer regardless of
/// the function attribute "frame-pointer".
virtual bool keepFramePointer(const MachineFunction &MF) const {
return false;
}

/// hasFP - Return true if the specified function should have a dedicated
/// frame pointer register. For most targets this is true only if the function
/// has variable sized allocas or if frame pointer elimination is disabled.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -235,8 +235,6 @@ class ExecutorProcessControl {
}

/// Look up and SPS-deserialize a bootstrap map value.
///
///
template <typename T, typename SPSTagT>
Error getBootstrapMapValue(StringRef Key, std::optional<T> &Val) const {
Val = std::nullopt;
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/Transforms/Utils/Instrumentation.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ struct SanitizerCoverageOptions {
bool TraceLoads = false;
bool TraceStores = false;
bool CollectControlFlow = false;
bool GatedCallbacks = false;

SanitizerCoverageOptions() = default;
};
Expand Down
4 changes: 1 addition & 3 deletions llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,15 +122,13 @@ struct SLPVectorizerPass : public PassInfoMixin<SLPVectorizerPass> {
/// or a horizontal reduction was not matched or not possible.
bool vectorizeHorReduction(PHINode *P, Instruction *Root, BasicBlock *BB,
slpvectorizer::BoUpSLP &R,
TargetTransformInfo *TTI,
SmallVectorImpl<WeakTrackingVH> &PostponedInsts);

/// Make an attempt to vectorize reduction and then try to vectorize
/// postponed binary operations.
/// \returns true on any successfull vectorization.
bool vectorizeRootInstruction(PHINode *P, Instruction *Root, BasicBlock *BB,
slpvectorizer::BoUpSLP &R,
TargetTransformInfo *TTI);
slpvectorizer::BoUpSLP &R);

/// Try to vectorize trees that start at insertvalue instructions.
bool vectorizeInsertValueInst(InsertValueInst *IVI, BasicBlock *BB,
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Analysis/ConstantFolding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -888,7 +888,8 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
APInt Offset = APInt(
BitWidth,
DL.getIndexedOffsetInType(
SrcElemTy, ArrayRef((Value *const *)Ops.data() + 1, Ops.size() - 1)));
SrcElemTy, ArrayRef((Value *const *)Ops.data() + 1, Ops.size() - 1)),
/*isSigned=*/true, /*implicitTrunc=*/true);

std::optional<ConstantRange> InRange = GEP->getInRange();
if (InRange)
Expand Down
6 changes: 2 additions & 4 deletions llvm/lib/Analysis/Loads.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,8 @@ static bool isDereferenceableAndAlignedPointer(

auto IsKnownDeref = [&]() {
bool CheckForNonNull, CheckForFreed;
APInt KnownDerefBytes(Size.getBitWidth(),
V->getPointerDereferenceableBytes(DL, CheckForNonNull,
CheckForFreed));
if (!KnownDerefBytes.getBoolValue() || !KnownDerefBytes.uge(Size) ||
if (!Size.ule(V->getPointerDereferenceableBytes(DL, CheckForNonNull,
CheckForFreed)) ||
CheckForFreed)
return false;
if (CheckForNonNull &&
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Analysis/MemoryBuiltins.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -767,6 +767,8 @@ SizeOffsetAPInt ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) {
TypeSize ElemSize = DL.getTypeAllocSize(I.getAllocatedType());
if (ElemSize.isScalable() && Options.EvalMode != ObjectSizeOpts::Mode::Min)
return ObjectSizeOffsetVisitor::unknown();
if (!isUIntN(IntTyBits, ElemSize.getKnownMinValue()))
return ObjectSizeOffsetVisitor::unknown();
APInt Size(IntTyBits, ElemSize.getKnownMinValue());
if (!I.isArrayAllocation())
return SizeOffsetAPInt(align(Size, I.getAlign()), Zero);
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Analysis/ScalarEvolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6883,7 +6883,7 @@ const ConstantRange &ScalarEvolution::getRangeRef(
bool CanBeNull, CanBeFreed;
uint64_t DerefBytes =
V->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
if (DerefBytes > 1) {
if (DerefBytes > 1 && isUIntN(BitWidth, DerefBytes)) {
// The highest address the object can start is DerefBytes bytes before
// the end (unsigned max value). If this value is not a multiple of the
// alignment, the last possible start value is the next lowest multiple
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Bitcode/Reader/BitcodeReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -876,7 +876,8 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
} else {
int64_t Start = BitcodeReader::decodeSignRotatedValue(Record[OpNum++]);
int64_t End = BitcodeReader::decodeSignRotatedValue(Record[OpNum++]);
return ConstantRange(APInt(BitWidth, Start), APInt(BitWidth, End));
return ConstantRange(APInt(BitWidth, Start, true),
APInt(BitWidth, End, true));
}
}

Expand Down
135 changes: 63 additions & 72 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Large diffs are not rendered by default.

6 changes: 5 additions & 1 deletion llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1641,7 +1641,11 @@ SDValue SelectionDAG::getConstant(uint64_t Val, const SDLoc &DL, EVT VT,
assert((EltVT.getSizeInBits() >= 64 ||
(uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
"getConstant with a uint64_t value that doesn't fit in the type!");
return getConstant(APInt(EltVT.getSizeInBits(), Val), DL, VT, isT, isO);
// TODO: Avoid implicit trunc?
// See https://github.com/llvm/llvm-project/issues/112510.
return getConstant(APInt(EltVT.getSizeInBits(), Val, /*isSigned=*/false,
/*implicitTrunc=*/true),
DL, VT, isT, isO);
}

SDValue SelectionDAG::getConstant(const APInt &Val, const SDLoc &DL, EVT VT,
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4333,7 +4333,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
GTI.getSequentialElementStride(DAG.getDataLayout());
// We intentionally mask away the high bits here; ElementSize may not
// fit in IdxTy.
APInt ElementMul(IdxSize, ElementSize.getKnownMinValue());
APInt ElementMul(IdxSize, ElementSize.getKnownMinValue(),
/*isSigned=*/false, /*implicitTrunc=*/true);
bool ElementScalable = ElementSize.isScalable();

// If this is a scalar constant or a splat vector of constants,
Expand Down
10 changes: 8 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2200,7 +2200,10 @@ ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() {
bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS,
int64_t DesiredMaskS) const {
const APInt &ActualMask = RHS->getAPIntValue();
const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
// TODO: Avoid implicit trunc?
// See https://github.com/llvm/llvm-project/issues/112510.
const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS,
/*isSigned=*/false, /*implicitTrunc=*/true);

// If the actual mask exactly matches, success!
if (ActualMask == DesiredMask)
Expand Down Expand Up @@ -2229,7 +2232,10 @@ bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS,
bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
int64_t DesiredMaskS) const {
const APInt &ActualMask = RHS->getAPIntValue();
const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
// TODO: Avoid implicit trunc?
// See https://github.com/llvm/llvm-project/issues/112510.
const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS,
/*isSigned=*/false, /*implicitTrunc=*/true);

// If the actual mask exactly matches, success!
if (ActualMask == DesiredMask)
Expand Down
33 changes: 31 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6813,7 +6813,9 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,

PAmts.push_back(DAG.getConstant(P, DL, SVT));
KAmts.push_back(
DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
/*implicitTrunc=*/true),
DL, ShSVT));
QAmts.push_back(DAG.getConstant(Q, DL, SVT));
return true;
};
Expand Down Expand Up @@ -7084,7 +7086,9 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
PAmts.push_back(DAG.getConstant(P, DL, SVT));
AAmts.push_back(DAG.getConstant(A, DL, SVT));
KAmts.push_back(
DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
/*implicitTrunc=*/true),
DL, ShSVT));
QAmts.push_back(DAG.getConstant(Q, DL, SVT));
return true;
};
Expand Down Expand Up @@ -8813,6 +8817,31 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
IsOrdered ? OrderedOp : UnorderedOp);
}
}

if (FPTestMask == fcNormal) {
// TODO: Handle unordered
ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;

if (isCondCodeLegalOrCustom(IsFiniteOp,
OperandVT.getScalarType().getSimpleVT()) &&
isCondCodeLegalOrCustom(IsNormalOp,
OperandVT.getScalarType().getSimpleVT()) &&
isFAbsFree(OperandVT)) {
// isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
SDValue Inf =
DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
SDValue SmallestNormal = DAG.getConstantFP(
APFloat::getSmallestNormalized(Semantics), DL, OperandVT);

SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
SDValue IsNormal =
DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
}
}
}

// Some checks may be represented as inversion of simpler check, for example
Expand Down
8 changes: 0 additions & 8 deletions llvm/lib/CodeGen/TargetOptionsImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,6 @@ using namespace llvm;
/// DisableFramePointerElim - This returns true if frame pointer elimination
/// optimization should be disabled for the given machine function.
bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
// Check to see if the target want to forcibly keep frame pointer.
if (MF.getSubtarget().getFrameLowering()->keepFramePointer(MF))
return true;

const Function &F = MF.getFunction();

if (!F.hasFnAttribute("frame-pointer"))
Expand All @@ -41,10 +37,6 @@ bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
}

bool TargetOptions::FramePointerIsReserved(const MachineFunction &MF) const {
// Check to see if the target want to forcibly keep frame pointer.
if (MF.getSubtarget().getFrameLowering()->keepFramePointer(MF))
return true;

const Function &F = MF.getFunction();

if (!F.hasFnAttribute("frame-pointer"))
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -588,7 +588,7 @@ GenericValue MCJIT::runFunction(Function *F, ArrayRef<GenericValue> ArgValues) {
return rv;
}
case Type::VoidTyID:
rv.IntVal = APInt(32, ((int(*)())(intptr_t)FPtr)());
rv.IntVal = APInt(32, ((int (*)())(intptr_t)FPtr)(), true);
return rv;
case Type::FloatTyID:
rv.FloatVal = ((float(*)())(intptr_t)FPtr)();
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/IR/Constants.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -932,7 +932,10 @@ Constant *ConstantInt::get(Type *Ty, uint64_t V, bool isSigned) {
}

ConstantInt *ConstantInt::get(IntegerType *Ty, uint64_t V, bool isSigned) {
return get(Ty->getContext(), APInt(Ty->getBitWidth(), V, isSigned));
// TODO: Avoid implicit trunc?
// See https://github.com/llvm/llvm-project/issues/112510.
return get(Ty->getContext(),
APInt(Ty->getBitWidth(), V, isSigned, /*implicitTrunc=*/true));
}

Constant *ConstantInt::get(Type *Ty, const APInt& V) {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Support/APInt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2227,7 +2227,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed,
while (*Prefix) {
Str.push_back(*Prefix);
++Prefix;
};
}

// We insert the digits backward, then reverse them to get the right order.
unsigned StartDig = Str.size();
Expand Down
37 changes: 35 additions & 2 deletions llvm/lib/Target/AArch64/AArch64Features.td
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,39 @@ def FeatureTLBIW : ExtensionWithMArch<"tlbiw", "TLBIW", "FEAT_TLBIW",
// Armv9.6 Architecture Extensions
//===----------------------------------------------------------------------===//

def FeatureCMPBR : ExtensionWithMArch<"cmpbr", "CMPBR", "FEAT_CMPBR",
"Enable Armv9.6-A base compare and branch instructions">;

def FeatureF8F32MM: ExtensionWithMArch<"f8f32mm", "F8F32MM", "FEAT_F8F32MM",
"Enable Armv9.6-A FP8 to Single-Precision Matrix Multiplication">;

def FeatureF8F16MM: ExtensionWithMArch<"f8f16mm", "F8F16MM", "FEAT_F8F16MM",
"Enable Armv9.6-A FP8 to Half-Precision Matrix Multiplication">;

def FeatureFPRCVT: ExtensionWithMArch<"fprcvt", "FPRCVT", "FEAT_FPRCVT",
"Enable Armv9.6-A base convert instructions for SIMD&FP scalar register operands of"
" different input and output sizes">;

def FeatureLSFE : ExtensionWithMArch<"lsfe", "LSFE", "FEAT_LSFE",
"Enable Armv9.6-A base Atomic floating-point in-memory instructions">;

def FeatureSME2p2: ExtensionWithMArch<"sme2p2", "SME2p2", "FEAT_SME2p2",
"Enable Armv9.6-A Scalable Matrix Extension 2.2 instructions", [FeatureSME2p1]>;

def FeatureSSVE_AES : ExtensionWithMArch<"ssve-aes", "SSVE_AES", "FEAT_SSVE_AES",
"Enable Armv9.6-A SVE2 AES support in streaming SVE mode">;

def FeatureSVE2p2 : ExtensionWithMArch<"sve2p2", "SVE2p2", "FEAT_SVE2p2",
"Enable Armv9.6-A Scalable Vector Extension 2.2 instructions", [FeatureSVE2p1]>;

def FeatureSVEAES2: ExtensionWithMArch<"sve-aes2", "SVE_AES2", "FEAT_SVE_AES2",
"Enable Armv9.6-A SVE multi-vector AES and 128-bit PMULL instructions">;

def FeatureSVEBFSCALE: ExtensionWithMArch<"sve-bfscale", "SVE_BFSCALE", "FEAT_SVE_BFSCALE",
"Enable Armv9.6-A SVE BFloat16 scaling instructions">;

def FeatureSVE_F16F32MM: ExtensionWithMArch<"sve-f16f32mm", "SVE_F16F32MM", "FEAT_SVE_F16F32MM",
"Enable Armv9.6-A FP16 to FP32 Matrix Multiply instructions">;

//===----------------------------------------------------------------------===//
// Other Features
Expand Down Expand Up @@ -833,8 +866,8 @@ def HasV9_5aOps : Architecture64<9, 5, "a", "v9.5a",
[HasV9_4aOps, FeatureCPA],
!listconcat(HasV9_4aOps.DefaultExts, [FeatureCPA, FeatureLUT, FeatureFAMINMAX])>;
def HasV9_6aOps : Architecture64<9, 6, "a", "v9.6a",
[HasV9_5aOps],
!listconcat(HasV9_5aOps.DefaultExts, [])>;
[HasV9_5aOps, FeatureCMPBR, FeatureFPRCVT, FeatureSVE2p2],
!listconcat(HasV9_5aOps.DefaultExts, [FeatureCMPBR, FeatureFPRCVT, FeatureSVE2p2])>;
def HasV8_0rOps : Architecture64<8, 0, "r", "v8r",
[ //v8.1
FeatureCRC, FeaturePAN, FeatureLSE, FeatureCONTEXTIDREL2,
Expand Down
32 changes: 16 additions & 16 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2400,10 +2400,11 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
}
case AArch64ISD::BICi: {
// Compute the bit cleared value.
uint64_t Mask =
~(Op->getConstantOperandVal(1) << Op->getConstantOperandVal(2));
APInt Mask =
~(Op->getConstantOperandAPInt(1) << Op->getConstantOperandAPInt(2))
.trunc(Known.getBitWidth());
Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
Known &= KnownBits::makeConstant(APInt(Known.getBitWidth(), Mask));
Known &= KnownBits::makeConstant(Mask);
break;
}
case AArch64ISD::VLSHR: {
Expand Down Expand Up @@ -12839,7 +12840,8 @@ static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
// Benefit form APInt to handle overflow when calculating expected element.
unsigned NumElts = VT.getVectorNumElements();
unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1, /*isSigned=*/false,
/*implicitTrunc=*/true);
// The following shuffle indices must be the successive elements after the
// first real element.
bool FoundWrongElt = std::any_of(FirstRealElt + 1, M.end(), [&](int Elt) {
Expand Down Expand Up @@ -14306,9 +14308,9 @@ static SDValue NormalizeBuildVector(SDValue Op,
// (with operands cast to integers), then the only possibilities
// are constants and UNDEFs.
if (auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
APInt LowBits(EltTy.getSizeInBits(),
CstLane->getZExtValue());
Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
Lane = DAG.getConstant(
CstLane->getAPIntValue().trunc(EltTy.getSizeInBits()).getZExtValue(),
dl, MVT::i32);
} else if (Lane.getNode()->isUndef()) {
Lane = DAG.getUNDEF(MVT::i32);
} else {
Expand Down Expand Up @@ -23713,7 +23715,7 @@ static bool findMoreOptimalIndexType(const MaskedGatherScatterSDNode *N,
EVT NewIndexVT = IndexVT.changeVectorElementType(MVT::i32);
// Stride does not scale explicitly by 'Scale', because it happens in
// the gather/scatter addressing mode.
Index = DAG.getStepVector(SDLoc(N), NewIndexVT, APInt(32, Stride));
Index = DAG.getStepVector(SDLoc(N), NewIndexVT, APInt(32, Stride, true));
return true;
}

Expand Down Expand Up @@ -28727,7 +28729,7 @@ static SDValue GenerateFixedLengthSVETBL(SDValue Op, SDValue Op1, SDValue Op2,
unsigned BitsPerElt = VTOp1.getVectorElementType().getSizeInBits();
unsigned IndexLen = MinSVESize / BitsPerElt;
unsigned ElementsPerVectorReg = VTOp1.getVectorNumElements();
uint64_t MaxOffset = APInt(BitsPerElt, -1, false).getZExtValue();
uint64_t MaxOffset = maxUIntN(BitsPerElt);
EVT MaskEltType = VTOp1.getVectorElementType().changeTypeToInteger();
EVT MaskType = EVT::getVectorVT(*DAG.getContext(), MaskEltType, IndexLen);
bool MinMaxEqual = (MinSVESize == MaxSVESize);
Expand Down Expand Up @@ -29085,16 +29087,14 @@ bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
KnownBits KnownOp0 =
TLO.DAG.computeKnownBits(Op0, OriginalDemandedElts, Depth + 1);
// Op0 &= ~(ConstantOperandVal(1) << ConstantOperandVal(2))
uint64_t BitsToClear = Op->getConstantOperandVal(1)
<< Op->getConstantOperandVal(2);
APInt BitsToClear =
(Op->getConstantOperandAPInt(1) << Op->getConstantOperandAPInt(2))
.trunc(KnownOp0.getBitWidth());
APInt AlreadyZeroedBitsToClear = BitsToClear & KnownOp0.Zero;
if (APInt(Known.getBitWidth(), BitsToClear)
.isSubsetOf(AlreadyZeroedBitsToClear))
if (BitsToClear.isSubsetOf(AlreadyZeroedBitsToClear))
return TLO.CombineTo(Op, Op0);

Known = KnownOp0 &
KnownBits::makeConstant(APInt(Known.getBitWidth(), ~BitsToClear));

Known = KnownOp0 & KnownBits::makeConstant(~BitsToClear);
return false;
}
case ISD::INTRINSIC_WO_CHAIN: {
Expand Down
35 changes: 34 additions & 1 deletion llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -213,12 +213,35 @@ def HasSMEF8F16 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8
def HasSMEF8F32 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F32()">,
AssemblerPredicateWithAll<(all_of FeatureSMEF8F32), "sme-f8f32">;

def HasCMPBR : Predicate<"Subtarget->hasCMPBR()">,
AssemblerPredicateWithAll<(all_of FeatureCMPBR), "cmpbr">;
def HasF8F32MM : Predicate<"Subtarget->hasF8F32MM()">,
AssemblerPredicateWithAll<(all_of FeatureF8F32MM), "f8f32mm">;
def HasF8F16MM : Predicate<"Subtarget->hasF8F16MM()">,
AssemblerPredicateWithAll<(all_of FeatureF8F16MM), "f8f16mm">;
def HasFPRCVT : Predicate<"Subtarget->hasFPRCVT()">,
AssemblerPredicateWithAll<(all_of FeatureFPRCVT), "fprcvt">;
def HasLSFE : Predicate<"Subtarget->hasLSFE()">,
AssemblerPredicateWithAll<(all_of FeatureLSFE), "lsfe">;
def HasSME2p2 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME2p2()">,
AssemblerPredicateWithAll<(all_of FeatureSME2p2), "sme2p2">;
def HasSVEAES2 : Predicate<"Subtarget->hasSVEAES2()">,
AssemblerPredicateWithAll<(all_of FeatureSVEAES2), "sve-aes2">;
def HasSVEBFSCALE : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSVEBFSCALE()">,
AssemblerPredicateWithAll<(all_of FeatureSVEBFSCALE), "sve-bfscale">;
def HasSVE_F16F32MM : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE_F16F32MM()">,
AssemblerPredicateWithAll<(all_of FeatureSVE_F16F32MM), "sve-f16f32mm">;
// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
// they should be enabled if either has been specified.
def HasSVEorSME
: Predicate<"Subtarget->hasSVE() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME),
"sve or sme">;
def HasSVEorSME2p2
: Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE()) ||"
"(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSME2p2())">,
AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME2p2),
"sve or sme2p2">;
def HasSVE2orSME
: Predicate<"Subtarget->hasSVE2() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME),
Expand All @@ -227,6 +250,10 @@ def HasSVE2orSME2
: Predicate<"Subtarget->hasSVE2() || (Subtarget->isStreaming() && Subtarget->hasSME2())">,
AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME2),
"sve2 or sme2">;
def HasSVE2orSSVE_AES
: Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE2()) ||"
"(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSSVE_AES())">,
AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSSVE_AES), "sve2 or ssve-aes">;
def HasSVE2p1_or_HasSME
: Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">;
Expand All @@ -236,7 +263,13 @@ def HasSVE2p1_or_HasSME2
def HasSVE2p1_or_HasSME2p1
: Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME2p1())">,
AssemblerPredicateWithAll<(any_of FeatureSME2p1, FeatureSVE2p1), "sme2p1 or sve2p1">;

def HasSVE2p2orSME2p2
: Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && (Subtarget->hasSVE2p2() || Subtarget->hasSME2p2())">,
AssemblerPredicateWithAll<(any_of FeatureSME2p2, FeatureSVE2p2), "sme2p2 or sve2p2">;
def HasSVE2p1orSSVE_AES
: Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE2p1()) ||"
"(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSSVE_AES())">,
AssemblerPredicateWithAll<(any_of FeatureSVE2p1, FeatureSSVE_AES), "sve2p1 or ssve-aes">;
def HasSMEF16F16orSMEF8F16
: Predicate<"Subtarget->isStreaming() && (Subtarget->hasSMEF16F16() || Subtarget->hasSMEF8F16())">,
AssemblerPredicateWithAll<(any_of FeatureSMEF16F16, FeatureSMEF8F16),
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -333,8 +333,10 @@ AArch64TTIImpl::getInlineCallPenalty(const Function *F, const CallBase &Call,
bool AArch64TTIImpl::shouldMaximizeVectorBandwidth(
TargetTransformInfo::RegisterKind K) const {
assert(K != TargetTransformInfo::RGK_Scalar);
return (K == TargetTransformInfo::RGK_FixedWidthVector &&
ST->isNeonAvailable());
return ((K == TargetTransformInfo::RGK_FixedWidthVector &&
ST->isNeonAvailable()) ||
(K == TargetTransformInfo::RGK_ScalableVector &&
ST->isSVEorStreamingSVEAvailable()));
}

/// Calculate the cost of materializing a 64-bit value. This helper
Expand Down
42 changes: 4 additions & 38 deletions llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,44 +146,10 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
// count easily.
// A tail call isn't considered a call for MachineFrameInfo's purposes.
if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
MCPhysReg HighestVGPRReg = AMDGPU::NoRegister;
for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) {
if (MRI.isPhysRegUsed(Reg)) {
HighestVGPRReg = Reg;
break;
}
}

if (ST.hasMAIInsts()) {
MCPhysReg HighestAGPRReg = AMDGPU::NoRegister;
for (MCPhysReg Reg : reverse(AMDGPU::AGPR_32RegClass.getRegisters())) {
if (MRI.isPhysRegUsed(Reg)) {
HighestAGPRReg = Reg;
break;
}
}
Info.NumAGPR = HighestAGPRReg == AMDGPU::NoRegister
? 0
: TRI.getHWRegIndex(HighestAGPRReg) + 1;
}

MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) {
if (MRI.isPhysRegUsed(Reg)) {
HighestSGPRReg = Reg;
break;
}
}

// We found the maximum register index. They start at 0, so add one to get
// the number of registers.
Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister
? 0
: TRI.getHWRegIndex(HighestVGPRReg) + 1;
Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister
? 0
: TRI.getHWRegIndex(HighestSGPRReg) + 1;

Info.NumVGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::VGPR_32RegClass);
Info.NumExplicitSGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::SGPR_32RegClass);
if (ST.hasMAIInsts())
Info.NumAGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::AGPR_32RegClass);
return Info;
}

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3441,7 +3441,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
: AMDGPU::V_MOV_B32_e32
: Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO
: AMDGPU::S_MOV_B32;
APInt Imm(Is64Bit ? 64 : 32, getImmFor(UseMI.getOperand(1)));
APInt Imm(Is64Bit ? 64 : 32, getImmFor(UseMI.getOperand(1)),
/*isSigned=*/true, /*implicitTrunc=*/true);

if (RI.isAGPR(*MRI, DstReg)) {
if (Is64Bit || !isInlineConstant(Imm))
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3851,3 +3851,12 @@ SIRegisterInfo::getSubRegAlignmentNumBits(const TargetRegisterClass *RC,
}
return 0;
}

unsigned
SIRegisterInfo::getNumUsedPhysRegs(const MachineRegisterInfo &MRI,
const TargetRegisterClass &RC) const {
for (MCPhysReg Reg : reverse(RC.getRegisters()))
if (MRI.isPhysRegUsed(Reg))
return getHWRegIndex(Reg) + 1;
return 0;
}
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,11 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
// No check if the subreg is supported by the current RC is made.
unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC,
unsigned SubReg) const;

// \returns a number of registers of a given \p RC used in a function.
// Does not go inside function calls.
unsigned getNumUsedPhysRegs(const MachineRegisterInfo &MRI,
const TargetRegisterClass &RC) const;
};

namespace AMDGPU {
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,12 +213,12 @@ static unsigned canModifyToInlineImmOp32(const SIInstrInfo *TII,
// that SCC is not live as S_NOT_B32 clobbers it. It's probably not worth
// it, as the reasonable values are already covered by s_movk_i32.
ModifiedImm = ~SrcImm;
if (TII->isInlineConstant(APInt(32, ModifiedImm)))
if (TII->isInlineConstant(APInt(32, ModifiedImm, true)))
return AMDGPU::V_NOT_B32_e32;
}

ModifiedImm = reverseBits<int32_t>(SrcImm);
if (TII->isInlineConstant(APInt(32, ModifiedImm)))
if (TII->isInlineConstant(APInt(32, ModifiedImm, true)))
return Scalar ? AMDGPU::S_BREV_B32 : AMDGPU::V_BFREV_B32_e32;

return 0;
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,12 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_iOS_SaveList;

if (PushPopSplit == ARMSubtarget::SplitR7)
return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_SaveList
return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_R7_SaveList
: CSR_ATPCS_SplitPush_SaveList;

if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
return CSR_AAPCS_SplitPush_R11_SaveList;

return CSR_AAPCS_SaveList;
}

Expand Down
Loading