Skip to content

Commit

Permalink
finish dfa optimizer,add antlr style labels and var names
Browse files Browse the repository at this point in the history
  • Loading branch information
mesut146 committed Apr 6, 2021
1 parent 69a33e9 commit 7041c25
Show file tree
Hide file tree
Showing 26 changed files with 207 additions and 12,040 deletions.
2 changes: 1 addition & 1 deletion grammar/cmd
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
dir=$(dirname $0)
cc="${dir}/javacc-7.0.5.jar"
grammar="${dir}/parserx.jj"
out="${dir}/../src/main/java/grammar"
out="${dir}/../src/main/java/mesut/parserx/grammar"
java -cp $cc javacc -OUTPUT_DIRECTORY=${out} -FORCE_LA_CHECK=true $grammar
32 changes: 18 additions & 14 deletions grammar/parserx.jj
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ TOKEN:

TOKEN:
{
<IDENT: <CHAR> ( <CHAR> | ["0"-"9","_"])* >
<IDENT: (<CHAR> | "_") ( <CHAR> | ["0"-"9","_"])* >
| <#CHAR: ["a"-"z","A"-"Z"]>
| <#DIGIT: ["0"-"9"]>
| <#HEX_DIGIT: ["a"-"f","A"-"f","0"-"9"]>
Expand Down Expand Up @@ -191,7 +191,7 @@ void ruleDecl(Tree tree):
Node rhs;
}
{
name=name() declSeparator() rhs = rhs() ";"
name = name() declSeparator() rhs = rhs() ";"
{
decl.name=name;
decl.rhs=rhs;
Expand Down Expand Up @@ -223,9 +223,10 @@ Node rhs():
Node orContent():
{
Node node = new EmptyNode();
Token label;
}
{
(node = sequence())?
(node = sequence())? ["#" label = <IDENT>{node.label = label.image;}]
{
return node;
}
Expand All @@ -246,7 +247,7 @@ Node regex():
Node rule;
}
{
rule = simple() [LOOKAHEAD(regexType() | repetition()) (rule = regexType(rule) | rule = repetition(rule))]
rule = named() [LOOKAHEAD(regexType()) (rule = regexType(rule))]
{ return rule; }
}

Expand All @@ -258,6 +259,19 @@ Node regexType(Node node):{}
}
}

Node named():
{
String name = null;
Node node;
}
{
(name = name() "=")? node = simple()
{
if(name != null) node.name = name;
return node;
}
}

Node simple():
{
Node rule;
Expand Down Expand Up @@ -359,16 +373,6 @@ String name():
token = <IDENT>{return token.image;}
}

Node repetition(Node node):
{
Token num;
}
{
"{" num = <INTEGER> "}"
{
return new Repetition(node, Integer.parseInt(num.toString()));
}
}

EmptyNode empty():
{}
Expand Down
24 changes: 18 additions & 6 deletions src/main/java/mesut/parserx/dfa/Minimization.java
Original file line number Diff line number Diff line change
Expand Up @@ -125,43 +125,55 @@ public static int numOfStates(NFA nfa) {
}

static List<StateSet> group(NFA dfa) {
StateSet acc = new StateSet();
List<StateSet> list = new ArrayList<>();
StateSet noacc = new StateSet();
Map<String, StateSet> names = new HashMap<>();
for (int s = dfa.initial; s <= dfa.lastState; s++) {
if (dfa.isDead(s)) continue;
if (dfa.isAccepting(s)) {
acc.addState(s);
if (names.containsKey(dfa.names[s])) {
//group same token states
names.get(dfa.names[s]).addState(s);
}
else {
//each final state represents a different token so they can't be merged
StateSet acc = new StateSet();
acc.addState(s);
list.add(acc);
names.put(dfa.names[s], acc);
}
}
else {
noacc.addState(s);
}
}
List<StateSet> list = new ArrayList<>();
list.add(acc);
list.add(noacc);
return list;
}

public static NFA optimize(NFA dfa) {
List<StateSet> P = group(dfa);
List<StateSet> done = new ArrayList<>();
List<StateSet> all = new ArrayList<>(P);
while (!P.isEmpty()) {
StateSet set = P.get(0);
List<Integer> list = new ArrayList<>(set.states);
//get a pair
boolean changed = false;
main:
//if any state pair is distinguishable then split
for (int i = 0; i < list.size(); i++) {
for (int j = i + 1; j < list.size(); j++) {
int q1 = list.get(i);
int q2 = list.get(j);
if (dist(q1, q2, P, dfa)) {
if (dist(q1, q2, all, dfa)) {
//split
StateSet s = new StateSet();
s.addState(q2);
set.remove(q2);
P.add(s);
System.out.println(q1 + " " + q2);
all.add(s);
//System.out.println(q1 + " " + q2);
changed = true;
break main;
}
Expand Down
2 changes: 2 additions & 0 deletions src/main/java/mesut/parserx/dfa/NFABuilder.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package mesut.parserx.dfa;

import mesut.parserx.gen.PrepareLexer;
import mesut.parserx.nodes.*;

public class NFABuilder {
Expand All @@ -15,6 +16,7 @@ public static NFA build(Tree tree) {
}

public NFA build() {
tree = new PrepareLexer(tree).prepare();
CharClass.makeDistinctRanges(tree);
nfa = new NFA(100);
nfa.tree = tree;
Expand Down
29 changes: 29 additions & 0 deletions src/main/java/mesut/parserx/gen/PrepareLexer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package mesut.parserx.gen;

import mesut.parserx.nodes.*;

//replaces references with regex contents
public class PrepareLexer extends SimpleTransformer {

Tree tree;

public PrepareLexer(Tree tree) {
this.tree = tree;
}

public Tree prepare() {
for (TokenDecl tokenDecl : tree.tokens) {
transformToken(tokenDecl);
}
return tree;
}

@Override
public Node transformName(NameNode node, Node parent) {
TokenDecl tokenDecl = tree.getToken(node.name);
if (tokenDecl == null) {
throw new RuntimeException("invalid token: " + node.name);
}
return tokenDecl.regex;
}
}
25 changes: 6 additions & 19 deletions src/main/java/mesut/parserx/gen/PrepareTree.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

public class PrepareTree extends SimpleTransformer {
Tree tree;
boolean inToken;

public PrepareTree(Tree tree) {
this.tree = tree;
Expand All @@ -13,11 +12,6 @@ public PrepareTree(Tree tree) {
//check rule , token, string references
public static void checkReferences(Tree tree) {
PrepareTree prepareTree = new PrepareTree(tree);
prepareTree.inToken = true;
for (TokenDecl decl : tree.tokens) {
prepareTree.transformToken(decl);
}
prepareTree.inToken = false;
for (RuleDecl rule : tree.rules) {
prepareTree.transformRule(rule);
}
Expand All @@ -44,30 +38,23 @@ public Node transformName(NameNode node, Node parent) {

@Override
public Node transformName(NameNode node, Node parent) {
if (node.isToken) {
if (tree.getToken(node.name) == null) {
throw new RuntimeException("invalid token: " + node.name);
}
if (tree.hasRule(node.name)) {
node.isToken = false;
}
else {
if (tree.hasRule(node.name)) {
node.isToken = false;
if (tree.getToken(node.name) == null) {
throw new RuntimeException("invalid reference: " + node.name + " in " + parent);
}
else {
if (tree.getToken(node.name) == null) {
throw new RuntimeException("invalid reference: " + node.name + " in " + parent);
}
else {
node.isToken = true;
}
node.isToken = true;
}
}

return node;
}

@Override
public Node transformString(StringNode node, Node parent) {
if (inToken) return node;
String val = node.value;
TokenDecl decl = tree.getTokenByValue(val);
if (decl == null) {
Expand Down
Loading

0 comments on commit 7041c25

Please sign in to comment.