Skip to content

Commit

Permalink
new verion of calcLlkFirstSet which can cope with nullables
Browse files Browse the repository at this point in the history
  • Loading branch information
Rochus Keller committed Oct 23, 2019
1 parent 1555727 commit 7978780
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 36 deletions.
12 changes: 8 additions & 4 deletions CocoGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,21 +241,25 @@ void CocoGen::handlePredicate(QTextStream& out,EbnfSyntax::Node* pred, EbnfSynta
if( ll > 0 )
{
EbnfAnalyzer::LlkNodes llkNodes;
// EbnfAnalyzer::calcLlkFirstSet( ll, 0, llkNodes,sequence, d_tbl );
EbnfAnalyzer::calcLlkFirstSet2( ll, llkNodes,sequence, d_tbl );
EbnfAnalyzer::calcLlkFirstSet( ll, llkNodes,sequence, d_tbl );
//EbnfAnalyzer::calcLlkFirstSet2( ll, llkNodes,sequence, d_tbl );
out << "IF( ";
for( int i = 0; i < llkNodes.size(); i++ )
{
if( i != 0 )
out << "&& ";
if( llkNodes[i].size() > 1 )
out << "( ";
QStringList names;
EbnfSyntax::NodeRefSet::const_iterator j;
for( j = llkNodes[i].begin(); j != llkNodes[i].end(); ++j )
names << tokenName( (*j).d_node->d_tok.d_val.toStr() );
names.sort(Qt::CaseInsensitive);
for( int j = 0; j < names.size(); j++ )
{
if( j != llkNodes[i].begin() )
if( j != 0 )
out << "|| ";
out << "peek(" << i+1 << ") == _" << tokenName( (*j).d_node->d_tok.d_val.toStr() ) << " ";
out << "peek(" << i+1 << ") == _" << names[j] << " ";
}
if( llkNodes[i].size() > 1 )
out << ") ";
Expand Down
143 changes: 114 additions & 29 deletions EbnfAnalyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,67 +81,139 @@ static inline int countNotEmpty( const EbnfAnalyzer::LlkNodes& l )
return res;
}

void EbnfAnalyzer::calcLlkFirstSet(quint16 k, quint16 curBin, LlkNodes& res, const EbnfSyntax::Node* node, FirstFollowSet* tbl)
static QByteArray ws( int level )
{
// Obsolete, use calcLlkFirstSet2 instead
QByteArray res;
for( int i = 0; i < qAbs(level); i++ )
res += " | ";
return res;
}

struct _SubNodeBin
{
const EbnfSyntax::Node* d_node;
quint16 d_from, d_to;
_SubNodeBin( const EbnfSyntax::Node* n, int from, int to ):d_node(n),d_from(from),d_to(to){}
};

quint16 EbnfAnalyzer::calcLlkFirstSetImp(quint16 k, quint16 curBin, LlkNodes& res, const EbnfSyntax::Node* node, FirstFollowSet* tbl, int level)
{
// Gehe entlang der Blätter des Baums und sammle alle Terminals ein gruppiert in Distanzboxen.
// TODO: funktioniert noch nicht für alle möglichen Fälle!
// Wird eigentlich nur mit node=Sequence aufgerufen, da ja Predicates nur dort vorkommen

// returns maximum number of symbols covered by this node

if( node == 0 || node->doIgnore() )
return;
return 0;

#ifdef _DEBUG
qDebug() << ws(level).constData() << "visit bin" << curBin << "l/c" <<
node->d_tok.d_lineNr << node->d_tok.d_colNr << node->toString()
<< "level" << level << "k" << k << "res len" << res.size(); // TEST
#endif

if( level > k * 3 )
{
#ifdef _DEBUG
qCritical() << ws(level).constData() << "calcLlkFirstSet level depth limit hit";
#endif
return 0;
}

switch( node->d_type )
{
case EbnfSyntax::Node::Terminal:
resize( res, curBin );
res[curBin].insert( node ); // hier ist dieser node gemeint, nicht Follow(node)!
break;
#ifdef _DEBUG
qDebug() << ws(level).constData() << "insert" << curBin <<
node->d_tok.d_val.toBa() << node->d_tok.d_lineNr << node->d_tok.d_colNr; // TEST
#endif
return 1;

case EbnfSyntax::Node::Nonterminal:
if( node->d_def && node->d_def->d_node )
return calcLlkFirstSet( k, curBin, res, node->d_def->d_node, tbl );
return calcLlkFirstSetImp( k, curBin, res, node->d_def->d_node, tbl,level+1 );
else
{
// wie Terinal
resize( res, curBin );
res[curBin].insert( node );
#ifdef _DEBUG
qDebug() << ws(level).constData() << "insert" << curBin <<
node->d_tok.d_val.toBa() << node->d_tok.d_lineNr << node->d_tok.d_colNr; // TEST
#endif
return 1;
}
break;

case EbnfSyntax::Node::Sequence:
#if 0
foreach( EbnfSyntax::Node* sub, node->d_subs )
{
if( sub->doIgnore() )
continue;
calcLlkFirstSet( k, curBin++, res, sub, tbl );
calcLlkFirstSet( k, curBin++, res, sub, tbl,level+1 );
if( res.size() >= k )
// if( countNotEmpty(res) >= k ) // not needed
break;
}
// TODO: repetitions and options
#else
{
QList<_SubNodeBin> toVisit;
int i = curBin;
int nullable = 0;
foreach( EbnfSyntax::Node* sub, node->d_subs )
{
if( !sub->doIgnore() )
{
toVisit << _SubNodeBin(sub,i-nullable,i);
i++;
if( sub->isNullable() )
nullable++;
}
}
int max = 0;
for( int i = 0; i < toVisit.size(); i++ )
{
for( int bin = toVisit[i].d_from; bin <= toVisit[i].d_to && bin < k; bin++ )
{
const quint16 tmp = calcLlkFirstSetImp( k, bin, res, toVisit[i].d_node, tbl, level+1 );
if( tmp > 1 )
{
for( int j = i+1; j < toVisit.size(); j++ )
toVisit[j].d_to += tmp - 1;
}
if( tmp > 0 && bin > max )
max = bin;
}
}
return max - curBin + 1;
}
// TODO: repetition
#endif
break;
case EbnfSyntax::Node::Alternative:
foreach( EbnfSyntax::Node* sub, node->d_subs )
{
if( sub->doIgnore() )
continue;
calcLlkFirstSet( k, curBin, res, sub, tbl );
quint16 count = 0;
foreach( EbnfSyntax::Node* sub, node->d_subs )
{
if( sub->doIgnore() )
continue;
const quint16 tmp = calcLlkFirstSetImp( k, curBin, res, sub, tbl,level+1 );
if( tmp > count )
count = tmp;
}
return count;
}
break;

default:
break;
}
}

static QByteArray ws( int level )
{
QByteArray res;
for( int i = 0; i < qAbs(level); i++ )
res += " | ";
return res;
return 0;
}

void EbnfAnalyzer::calcLlkFirstSet2Imp(quint16 k, int curBin, int level, LlkNodes& res,
const EbnfSyntax::Node* node, FirstFollowSet* tbl, QSet<const EbnfSyntax::Node*>& visited)
const EbnfSyntax::Node* node, FirstFollowSet* tbl, CheckSet& visited)
{
if( node == 0 || node->doIgnore() )
return;
Expand All @@ -150,21 +222,27 @@ void EbnfAnalyzer::calcLlkFirstSet2Imp(quint16 k, int curBin, int level, LlkNode
else
visited.insert(node);

qDebug() << ws(level).constData() << "visit" <<
node->d_tok.d_lineNr << node->d_tok.d_colNr << node->toString() << node->d_tok.toString(); // TEST
#ifdef _DEBUG
qDebug() << ws(level).constData() << "visit bin" << curBin << "l/c" <<
node->d_tok.d_lineNr << node->d_tok.d_colNr << node->toString()
<< "level" << level << "k" << k << "res len" << res.size(); // TEST
#endif

// TODO: ev. separate Funktion um von einem Node zum nächsten zu wandern und im Falle von usedBy und Alternative
// mehrere parallele Nodes zurückzugeben. Offen ist die Ermittlung des curBin.

if( level >= 0 )
{
// CheckSet visited; // don't check visited downwards
switch( node->d_type )
{
case EbnfSyntax::Node::Terminal:
resize( res, curBin );
res[curBin].insert( node ); // hier ist dieser node gemeint, nicht Follow(node)!
#ifdef _DEBUG
qDebug() << ws(level).constData() << "insert" << curBin <<
node->d_tok.d_val.data() << node->d_tok.d_lineNr << node->d_tok.d_colNr; // TEST
#endif
break;
case EbnfSyntax::Node::Nonterminal:
if( node->d_def && node->d_def->d_node )
Expand All @@ -174,12 +252,14 @@ void EbnfAnalyzer::calcLlkFirstSet2Imp(quint16 k, int curBin, int level, LlkNode
// wie Terinal
resize( res, curBin );
res[curBin].insert( node );
#ifdef _DEBUG
qDebug() << ws(level).constData() << "insert" << curBin <<
node->d_tok.d_val.data() << node->d_tok.d_lineNr << node->d_tok.d_colNr; // TEST
#endif
}
break;
case EbnfSyntax::Node::Sequence:
#if 0
#if 1
foreach( EbnfSyntax::Node* sub, node->d_subs )
{
if( sub->doIgnore() )
Expand All @@ -200,17 +280,16 @@ void EbnfAnalyzer::calcLlkFirstSet2Imp(quint16 k, int curBin, int level, LlkNode
}
for( int i = curBin; i < k && i < toVisit.size(); i++ )
{
QSet<const EbnfSyntax::Node*> check = visited;
for( int j = i; j < toVisit.size(); j++ )
{
calcLlkFirstSet2Imp( k, i, level + 1, res, toVisit[j], tbl, check );
visited.clear();
calcLlkFirstSet2Imp( k, i, level + 1, res, toVisit[j], tbl, visited ); // TODO: visited too strong
if( !toVisit[j]->isNullable() )
break;
}
if( res.size() >= k )
break;
}
visited += toVisit.toSet();
}
// TODO: repetition
#else
Expand Down Expand Up @@ -248,6 +327,7 @@ void EbnfAnalyzer::calcLlkFirstSet2Imp(quint16 k, int curBin, int level, LlkNode
break;
}
}
// Vermutlich unnötig, auch nach oben zu suchen
if( level <= 0 && res.size() < k )
{
// Vorsicht, es kann sein dass wir beim Weg nach oben wieder dort vorbeikommen, wo man angefangen haben
Expand Down Expand Up @@ -381,6 +461,11 @@ EbnfSyntax::NodeRefSet EbnfAnalyzer::intersectAll(const EbnfAnalyzer::LlkNodes&
return EbnfSyntax::NodeRefSet();
}

void EbnfAnalyzer::calcLlkFirstSet(quint16 k, EbnfAnalyzer::LlkNodes& res, const EbnfSyntax::Node* node, FirstFollowSet* tbl)
{
calcLlkFirstSetImp( k, 0, res, node, tbl, 0 );
}

void EbnfAnalyzer::calcLlkFirstSet2(quint16 k, EbnfAnalyzer::LlkNodes& res, const EbnfSyntax::Node* node, FirstFollowSet* tbl)
{
QSet<const EbnfSyntax::Node*> visited;
Expand Down
7 changes: 5 additions & 2 deletions EbnfAnalyzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ class EbnfAnalyzer
static QStringList collectAllTerminalProductions( EbnfSyntax* );

typedef QList<EbnfSyntax::NodeRefSet> LlkNodes;
static void calcLlkFirstSet(quint16 k, quint16 curBin, LlkNodes&, const EbnfSyntax::Node* node, FirstFollowSet* );
static EbnfSyntax::NodeRefSet intersectAll( const LlkNodes& lhs, const LlkNodes& rhs );
static void calcLlkFirstSet(quint16 k, LlkNodes&, const EbnfSyntax::Node* node, FirstFollowSet* );
static void calcLlkFirstSet2(quint16 k, LlkNodes&, const EbnfSyntax::Node* node, FirstFollowSet* );

static void checkForAmbiguity( FirstFollowSet*, EbnfErrors*);
Expand All @@ -47,8 +47,11 @@ class EbnfAnalyzer
static void findAmbiguousAlternatives( EbnfSyntax::Node*, FirstFollowSet*, EbnfErrors* );
static void findAmbiguousOptionals( EbnfSyntax::Node*, FirstFollowSet*, EbnfErrors* );
static void reportAmbig(EbnfSyntax::Node* seq, int ambigIdx, const EbnfSyntax::NodeRefSet& diff, const EbnfSyntax::NodeSet& ambigSet2, FirstFollowSet*, EbnfErrors* );
typedef QSet<const EbnfSyntax::Node*> CheckSet;
static void calcLlkFirstSet2Imp(quint16 k, int curBin, int level, LlkNodes&, const EbnfSyntax::Node* node,
FirstFollowSet*, QSet<const EbnfSyntax::Node*>& visited );
FirstFollowSet*, CheckSet& visited );
static quint16 calcLlkFirstSetImp(quint16 k, quint16 curBin, LlkNodes&,
const EbnfSyntax::Node* node, FirstFollowSet*, int level );
static bool findPath( EbnfSyntax::ConstNodeList& path, const EbnfSyntax::Node* to );
};

Expand Down
3 changes: 2 additions & 1 deletion LlgenGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,8 @@ void LlgenGen::handlePredicate(QTextStream& out, EbnfSyntax::Node* pred, EbnfSyn
return;

EbnfAnalyzer::LlkNodes llkNodes;
EbnfAnalyzer::calcLlkFirstSet2( ll, llkNodes,sequence, tbl );
//EbnfAnalyzer::calcLlkFirstSet2( ll, llkNodes,sequence, tbl );
EbnfAnalyzer::calcLlkFirstSet( ll, llkNodes,sequence, tbl );
out << "%if( ";
for( int i = 0; i < llkNodes.size(); i++ )
{
Expand Down

0 comments on commit 7978780

Please sign in to comment.