Skip to content

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also .

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also .
...
  • 2 commits
  • 1 file changed
  • 0 commit comments
  • 1 contributor
Commits on May 19, 2013
@tmciver Changed mistaken access level modifier of parseS() method from public to
private.
3e67b07
Commits on May 21, 2013
@tmciver Cleaned up follow sets. Removed unneeded set followOfR and added new set
followOfTMinusFirstOfT which is used to decide if the epsilon production of
T is to be used.  Also expanded on first and follow sets in the doc
comment.
a114a78
Showing with 34 additions and 18 deletions.
  1. +34 −18 src/com/timmciver/bytegrep/parser/DefaultParser.java
View
52 src/com/timmciver/bytegrep/parser/DefaultParser.java
@@ -52,7 +52,21 @@
* | ?T // zero or one
* | epsilon
*
- * [byte-literal] // as defined above
+ * [byte-literal] // as defined above
+ *
+ * The following gives information of first and follow sets.
+ *
+ * first(R) = {'(', '0'}
+ * first(T) = first(R) + {'|', '*', '+', '?'}
+ * = {'(', '0', '|', '*', '+', '?'}
+ * follow(R) = {')', '$'} + first(T)
+ * = {'(', ')', '0', '|', '*', '+', '?', '$'}
+ * follow(T) = follow(R)
+ *
+ * The epsilon production for T is chosen when the next character read is in
+ * follow(T) but not in first(T). This set is as follows:
+ *
+ * follow(T) - first(T) = {')', '$'}
*
* @author tim
*/
@@ -61,17 +75,17 @@
private final static Logger logger = Logger.getLogger(DefaultParser.class.getName());
private Set<Character> firstOfR;
- private Set<Character> followOfR;
+ private Set<Character> followOfTMinusFirstOfT;
public DefaultParser() {
// initialize firstOfR
firstOfR = Collections.unmodifiableSet(
new HashSet<>(Arrays.asList('0', '(')));
- // and followOfR
- followOfR = Collections.unmodifiableSet(
- new HashSet<>(Arrays.asList(')')));
+ // follow(T) - first(T) = {'(', '$'}
+ followOfTMinusFirstOfT = Collections.unmodifiableSet(
+ new HashSet<>(Arrays.asList(')', '$')));
}
@Override
@@ -83,7 +97,7 @@ public RegularExpression parse(String s) throws IOException {
return parseS(reader);
}
- public RegularExpression parseS(PushbackReader reader) throws IOException {
+ private RegularExpression parseS(PushbackReader reader) throws IOException {
RegularExpression re = parseR(reader);
@@ -114,6 +128,7 @@ private RegularExpression parseR(PushbackReader reader) throws IOException {
re = parseGrouping(reader);
break;
default:
+ // next character was not in first(R)
logger.log(Level.SEVERE, "Read unexpected character: " + next);
throw new MalformedInputException("Read unexpected character: " + next);
}
@@ -132,8 +147,8 @@ private RegularExpression parseT(RegularExpression inRegex, PushbackReader reade
// check for end-of-input
if (next == -1) {
- // end of input; choose epsilon production; we're done
- return inRegex;
+ // end of input
+ nextChar = '$';
}
RegularExpression outRegex = null;
@@ -163,22 +178,23 @@ private RegularExpression parseT(RegularExpression inRegex, PushbackReader reade
// zero or one
outRegex = new ZeroOrOne(inRegex);
logger.log(Level.INFO, "Parsed zero or one regular expression: " + outRegex);
- } else {
- // it's an error if nextChar is not in followOfR
- if (!followOfR.contains(nextChar)) {
- throw new MalformedInputException("Read unexpected character: " + nextChar);
- }
+ } else if (followOfTMinusFirstOfT.contains(nextChar)) {
+ // epsilon production
- // push back the read character so that it can be read by parseR
- reader.unread(next);
+ // here we must push back any characters from followOfTMinusFirstOfT
+ // (except $) so that it may be properly read later.
+ if (nextChar != '$') {
+ reader.unread(next);
+ }
- // since the next character is in follow(R), we must choose the
- // epsilon production of T
return inRegex;
+ } else {
+ // should not get here
+ throw new MalformedInputException("Read unexpected character: " + nextChar + ", val = " + next);
}
// The T productions are right recursive (except for the epsilon
- // transition which has already been accoutned for).
+ // transition which has already been accounted for).
return parseT(outRegex, reader);
}

No commit comments for this range

Something went wrong with that request. Please try again.