diff --git a/llvm/docs/TableGen/ProgRef.rst b/llvm/docs/TableGen/ProgRef.rst index 342b91a0c437b4..f2ee7a7e549a89 100644 --- a/llvm/docs/TableGen/ProgRef.rst +++ b/llvm/docs/TableGen/ProgRef.rst @@ -216,7 +216,8 @@ TableGen provides "bang operators" that have a wide variety of uses: : !interleave !isa !le !listconcat !listsplat : !lt !mul !ne !not !or : !setdagop !shl !size !sra !srl - : !strconcat !sub !subst !tail !xor + : !strconcat !sub !subst !substr !tail + : !xor The ``!cond`` operator has a slightly different syntax compared to other bang operators, so it is defined separately: @@ -1723,6 +1724,13 @@ and non-0 as true. record if the *target* record name equals the *value* record name; otherwise it produces the *value*. +``!substr(``\ *string*\ ``,`` *start*\ [``,`` *length*]\ ``)`` + This operator extracts a substring of the given *string*. The starting + position of the substring is specified by *start*, which can range + between 0 and the length of the string. The length of the substring + is specified by *length*; if not specified, the rest of the string is + extracted. The *start* and *length* arguments must be integers. + ``!tail(``\ *a*\ ``)`` This operator produces a new list with all the elements of the list *a* except for the zeroth one. (See also ``!head``.) diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h index 3010b4dad09a8f..a0c5b2778547dc 100644 --- a/llvm/include/llvm/TableGen/Record.h +++ b/llvm/include/llvm/TableGen/Record.h @@ -829,7 +829,7 @@ class BinOpInit : public OpInit, public FoldingSetNode { /// !op (X, Y, Z) - Combine two inits. class TernOpInit : public OpInit, public FoldingSetNode { public: - enum TernaryOp : uint8_t { SUBST, FOREACH, FILTER, IF, DAG }; + enum TernaryOp : uint8_t { SUBST, FOREACH, FILTER, IF, DAG, SUBSTR }; private: Init *LHS, *MHS, *RHS; diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp index cbdce04494f37f..9c0464d4e1bf6c 100644 --- a/llvm/lib/TableGen/Record.cpp +++ b/llvm/lib/TableGen/Record.cpp @@ -1325,6 +1325,27 @@ Init *TernOpInit::Fold(Record *CurRec) const { } break; } + + case SUBSTR: { + StringInit *LHSs = dyn_cast(LHS); + IntInit *MHSi = dyn_cast(MHS); + IntInit *RHSi = dyn_cast(RHS); + if (LHSs && MHSi && RHSi) { + int64_t StringSize = LHSs->getValue().size(); + int64_t Start = MHSi->getValue(); + int64_t Length = RHSi->getValue(); + if (Start < 0 || Start > StringSize) + PrintError(CurRec->getLoc(), + Twine("!substr start position is out of range 0...") + + std::to_string(StringSize) + ": " + + std::to_string(Start)); + if (Length < 0) + PrintError(CurRec->getLoc(), "!substr length must be nonnegative"); + return StringInit::get(LHSs->getValue().substr(Start, Length), + LHSs->getFormat()); + } + break; + } } return const_cast(this); @@ -1364,11 +1385,12 @@ std::string TernOpInit::getAsString() const { std::string Result; bool UnquotedLHS = false; switch (getOpcode()) { - case SUBST: Result = "!subst"; break; - case FOREACH: Result = "!foreach"; UnquotedLHS = true; break; + case DAG: Result = "!dag"; break; case FILTER: Result = "!filter"; UnquotedLHS = true; break; + case FOREACH: Result = "!foreach"; UnquotedLHS = true; break; case IF: Result = "!if"; break; - case DAG: Result = "!dag"; break; + case SUBST: Result = "!subst"; break; + case SUBSTR: Result = "!substr"; break; } return (Result + "(" + (UnquotedLHS ? LHS->getAsUnquotedString() : LHS->getAsString()) + diff --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp index df0df96f40eb79..a45ef6dc10c16b 100644 --- a/llvm/lib/TableGen/TGLexer.cpp +++ b/llvm/lib/TableGen/TGLexer.cpp @@ -589,6 +589,7 @@ tgtok::TokKind TGLexer::LexExclaim() { .Case("listsplat", tgtok::XListSplat) .Case("strconcat", tgtok::XStrConcat) .Case("interleave", tgtok::XInterleave) + .Case("substr", tgtok::XSubstr) .Cases("setdagop", "setop", tgtok::XSetDagOp) // !setop is deprecated. .Cases("getdagop", "getop", tgtok::XGetDagOp) // !getop is deprecated. .Default(tgtok::Error); diff --git a/llvm/lib/TableGen/TGLexer.h b/llvm/lib/TableGen/TGLexer.h index 1856bef3ea9bd2..ee568849ca887e 100644 --- a/llvm/lib/TableGen/TGLexer.h +++ b/llvm/lib/TableGen/TGLexer.h @@ -53,9 +53,9 @@ namespace tgtok { // Bang operators. XConcat, XADD, XSUB, XMUL, XNOT, XAND, XOR, XXOR, XSRA, XSRL, XSHL, - XListConcat, XListSplat, XStrConcat, XInterleave, XCast, XSubst, XForEach, - XFilter, XFoldl, XHead, XTail, XSize, XEmpty, XIf, XCond, XEq, XIsA, - XDag, XNe, XLe, XLt, XGe, XGt, XSetDagOp, XGetDagOp, + XListConcat, XListSplat, XStrConcat, XInterleave, XSubstr, XCast, + XSubst, XForEach, XFilter, XFoldl, XHead, XTail, XSize, XEmpty, XIf, + XCond, XEq, XIsA, XDag, XNe, XLe, XLt, XGe, XGt, XSetDagOp, XGetDagOp, // Boolean literals. TrueVal, FalseVal, diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp index 2671d29a72721e..ebb66ccffc29af 100644 --- a/llvm/lib/TableGen/TGParser.cpp +++ b/llvm/lib/TableGen/TGParser.cpp @@ -25,6 +25,7 @@ #include #include #include +#include using namespace llvm; @@ -1496,6 +1497,9 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { return (TernOpInit::get(Code, LHS, MHS, RHS, Type))->Fold(CurRec); } + case tgtok::XSubstr: + return ParseOperationSubstr(CurRec, ItemType); + case tgtok::XCond: return ParseOperationCond(CurRec, ItemType); @@ -1655,6 +1659,94 @@ RecTy *TGParser::ParseOperatorType() { return Type; } +/// Parse the !substr operation. Return null on error. +/// +/// Substr ::= !substr(string, start-int [, length-int]) => string +Init *TGParser::ParseOperationSubstr(Record *CurRec, RecTy *ItemType) { + TernOpInit::TernaryOp Code = TernOpInit::SUBSTR; + RecTy *Type = StringRecTy::get(); + + Lex.Lex(); // eat the operation + + if (!consume(tgtok::l_paren)) { + TokError("expected '(' after !substr operator"); + return nullptr; + } + + Init *LHS = ParseValue(CurRec); + if (!LHS) + return nullptr; + + if (!consume(tgtok::comma)) { + TokError("expected ',' in !substr operator"); + return nullptr; + } + + SMLoc MHSLoc = Lex.getLoc(); + Init *MHS = ParseValue(CurRec); + if (!MHS) + return nullptr; + + SMLoc RHSLoc = Lex.getLoc(); + Init *RHS; + if (consume(tgtok::comma)) { + RHSLoc = Lex.getLoc(); + RHS = ParseValue(CurRec); + if (!RHS) + return nullptr; + } else { + RHS = IntInit::get(std::numeric_limits::max()); + } + + if (!consume(tgtok::r_paren)) { + TokError("expected ')' in !substr operator"); + return nullptr; + } + + if (ItemType && !Type->typeIsConvertibleTo(ItemType)) { + Error(RHSLoc, Twine("expected value of type '") + + ItemType->getAsString() + "', got '" + + Type->getAsString() + "'"); + } + + TypedInit *LHSt = dyn_cast(LHS); + if (!LHSt && !isa(LHS)) { + TokError("could not determine type of the string in !substr"); + return nullptr; + } + if (LHSt && !isa(LHSt->getType())) { + TokError(Twine("expected string, got type '") + + LHSt->getType()->getAsString() + "'"); + return nullptr; + } + + TypedInit *MHSt = dyn_cast(MHS); + if (!MHSt && !isa(MHS)) { + TokError("could not determine type of the start position in !substr"); + return nullptr; + } + if (MHSt && !isa(MHSt->getType())) { + Error(MHSLoc, Twine("expected int, got type '") + + MHSt->getType()->getAsString() + "'"); + return nullptr; + } + + if (RHS) { + TypedInit *RHSt = dyn_cast(RHS); + if (!RHSt && !isa(RHS)) { + TokError("could not determine type of the length in !substr"); + return nullptr; + } + if (RHSt && !isa(RHSt->getType())) { + TokError(Twine("expected int, got type '") + + RHSt->getType()->getAsString() + "'"); + return nullptr; + } + } + + return (TernOpInit::get(Code, LHS, MHS, RHS, Type))->Fold(CurRec); +} + /// Parse the !foreach and !filter operations. Return null on error. /// /// ForEach ::= !foreach(ID, list-or-dag, expr) => list @@ -2206,7 +2298,8 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, case tgtok::XFoldl: case tgtok::XForEach: case tgtok::XFilter: - case tgtok::XSubst: { // Value ::= !ternop '(' Value ',' Value ',' Value ')' + case tgtok::XSubst: + case tgtok::XSubstr: { // Value ::= !ternop '(' Value ',' Value ',' Value ')' return ParseOperation(CurRec, ItemType); } } diff --git a/llvm/lib/TableGen/TGParser.h b/llvm/lib/TableGen/TGParser.h index bdeb4d35382b21..3ed78a23067ff2 100644 --- a/llvm/lib/TableGen/TGParser.h +++ b/llvm/lib/TableGen/TGParser.h @@ -254,6 +254,7 @@ class TGParser { TypedInit *FirstItem = nullptr); RecTy *ParseType(); Init *ParseOperation(Record *CurRec, RecTy *ItemType); + Init *ParseOperationSubstr(Record *CurRec, RecTy *ItemType); Init *ParseOperationForEachFilter(Record *CurRec, RecTy *ItemType); Init *ParseOperationCond(Record *CurRec, RecTy *ItemType); RecTy *ParseOperatorType(); diff --git a/llvm/test/TableGen/substr.td b/llvm/test/TableGen/substr.td new file mode 100644 index 00000000000000..5efe4ce69215e2 --- /dev/null +++ b/llvm/test/TableGen/substr.td @@ -0,0 +1,81 @@ +// RUN: llvm-tblgen %s | FileCheck %s +// RUN: not llvm-tblgen -DERROR1 %s 2>&1 | FileCheck --check-prefix=ERROR1 %s + +defvar claim = "This is the end of the world!"; + +// CHECK: def Rec1 +// CHECK: fullNoLength = "This is the end of the world!"; +// CHECK: fullLength = "This is the end of the world!"; +// CHECK: thisIsTheEnd = "This is the end"; +// CHECK: DoorsSong = "the end"; +// CHECK: finalNoLength = "end of the world!"; +// CHECK: finalLength = "end of the world!"; + +def Rec1 { + string fullNoLength = !substr(claim, 0); + string fullLength = !substr(claim, 0, 999); + string thisIsTheEnd = !substr(claim, 0, 15); + string DoorsSong = !substr(claim, 8, 7); + string finalNoLength = !substr(claim, 12); + string finalLength = !substr(claim, 12, !sub(!size(claim), 12)); +} + +// CHECK: def Rec2 { +// CHECK: lastName = "Flintstone"; + +def Rec2 { + string firstName = "Fred"; + string name = firstName # " " # "Flintstone"; + string lastName = !substr(name, !add(!size(firstName), 1)); +} + +// CHECK: def Rec3 { +// CHECK: test1 = ""; +// CHECK: test2 = ""; +// CHECK: test3 = ""; +// CHECK: test4 = "h"; +// CHECK: test5 = "hello"; +// CHECK: test6 = ""; + +def Rec3 { + string test1 = !substr("", 0, 0); + string test2 = !substr("", 0, 9); + string test3 = !substr("hello", 0, 0); + string test4 = !substr("hello", 0, 1); + string test5 = !substr("hello", 0, 99); + string test6 = !substr("hello", 5, 99); +} + +// CHECK: def Rec4 +// CHECK: message = "This is the end of the world!"; +// CHECK: messagePrefix = "This is th..."; +// CHECK: warning = "Bad message: 'This is th...'"; + +class C { + string message = msg; + string messagePrefix = !substr(message, 0, 10) # "..."; +} + +def Rec4 : C { + string warning = "Bad message: '" # messagePrefix # "'"; +} + +#ifdef ERROR1 + +// ERROR1: expected string, got type 'int' +// ERROR1: expected int, got type 'bits<3>' +// ERROR1: expected int, got type 'string' +// ERROR1: !substr start position is out of range 0...29: 30 +// ERROR1: !substr length must be nonnegative + +def Rec8 { + string claim1 = !substr(42, 0, 3); + string claim2 = !substr(claim, 0b101); + string claim3 = !substr(claim, 0, "oops"); +} + +def Rec9 { + string claim1 = !substr(claim, !add(!size(claim), 1)); + string claim2 = !substr(claim, 0, -13); +} +#endif