Permalink
Browse files

Merge branch 'array-match'

  • Loading branch information...
2 parents 3063609 + d9d1219 commit 4767d5d3289d6d67383e299acd5314fe12d1fd02 @tmciver committed May 24, 2013
@@ -1,10 +1,11 @@
package com.timmciver.bytegrep;
-import java.io.InputStream;
+import java.util.List;
/**
- *
+ * A regular expression that matches if either of its two sub
+ * expressions match.
* @author tim
*/
public class AlternationExpression extends RegularExpression {
@@ -18,8 +19,13 @@ public AlternationExpression(RegularExpression expr1, RegularExpression expr2) {
}
@Override
- public boolean internalMatch(InputStream in) {
- return expr1.match(in) || expr2.match(in);
+ public boolean match(byte[] data, int offset, List<Byte> matchedBytes) {
+
+ if (expr1.match(data, offset, matchedBytes)) {
+ return true;
+ }
+
+ return expr2.match(data, offset, matchedBytes);
}
public RegularExpression getFirstExpression() {
@@ -1,13 +1,10 @@
package com.timmciver.bytegrep;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.logging.Level;
-import java.util.logging.Logger;
+import java.util.List;
/**
- *
+ * A regular expression to match a single byte.
* @author tim
*/
public class LiteralByte extends RegularExpression {
@@ -23,16 +20,19 @@ public LiteralByte(int val) {
}
@Override
- public boolean internalMatch(InputStream in) {
- byte nextByte;
- try {
- nextByte = (byte)in.read();
- } catch (IOException ex) {
- Logger.getLogger(LiteralByte.class.getName()).log(Level.SEVERE, null, ex);
+ public boolean match(byte[] data, int offset, List<Byte> matchedBytes) {
+
+ if (offset >= data.length) {
return false;
}
- return (nextByte == literal) ? true : false;
+ boolean matched = data[offset] == literal;
+
+ if (matched) {
+ matchedBytes.add(literal);
+ }
+
+ return matched;
}
public byte getLiteralByte() {
@@ -1,50 +1,25 @@
package com.timmciver.bytegrep;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.logging.Level;
-import java.util.logging.Logger;
+import java.util.List;
/**
- *
+ * Parent class for all regular expressions.
* @author tim
*/
public abstract class RegularExpression {
/**
- * Returns true if this RegularExpression matches the bytes in the
- * given InputStream, false otherwise.
- * @param in the InputStream to read bytes from
- * @return true if there's a match; false otherwise.
+ * Returns the number of bytes matched.
+ * @param data input byte array
+ * @param offset the byte offset into the data array at which matching
+ * should begin
+ * @param matchedBytes a list of the bytes matched so far. Each
+ * RegularExpression implementation should add the bytes that are matched to
+ * this list.
+ * @return true if the RegularExpression matched the input, false otherwise.
*/
- public final boolean match(InputStream in) {
- // mark the stream
- in.mark(Integer.MAX_VALUE);
-
- // call internalMatch
- boolean matched = internalMatch(in);
-
- // if it failed, reset
- if (!matched) {
- try {
- in.reset();
- } catch (IOException ex) {
- Logger.getLogger(RegularExpression.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
-
- return matched;
- }
-
- /**
- * Implemented by subclasses. Subclasses should not call mark() or reset()
- * in the InputStream (this has already been taken care of in the match()
- * method.
- * @param in the InputStream to read bytes from
- * @return true if there's a match; false otherwise.
- */
- protected abstract boolean internalMatch(InputStream in);
+ public abstract boolean match(byte[] data, int offset, List<Byte> matchedBytes);
@Override
public abstract boolean equals(Object o);
@@ -1,10 +1,12 @@
package com.timmciver.bytegrep;
-import java.io.InputStream;
+import java.util.List;
/**
- *
+ * A regular expression that matches if the given regular expression
+ * matches a number of times between minMatches and maxMatches
+ * inclusive.
* @author tim
*/
public class RepetitionExpression extends RegularExpression {
@@ -35,13 +37,14 @@ public RepetitionExpression(RegularExpression expr, int minMatches, int maxMatch
}
@Override
- protected boolean internalMatch(InputStream in) {
+ public boolean match(byte[] data, int offset, List<Byte> matchedBytes) {
// a) If the first one doesn't match and minMatches is equal to zero, it's
// a match. b) If the first one is a match and maxMatches is equal to
// one, then it's a match. c) If minMatches is greater than zero, then
// it's not a match.
- boolean matched = expr.match(in);
+ int numBeforeBytes = matchedBytes.size();
+ boolean matched = expr.match(data, offset, matchedBytes);
if (!matched && minMatches == 0) {
return true;
} else if (matched && maxMatches == 1) {
@@ -51,8 +54,10 @@ protected boolean internalMatch(InputStream in) {
}
// consume input while there's a match
- while (matched) {
- matched = expr.match(in);
+ boolean stillMatches = true;
+ while (stillMatches) {
+ int newOffset = offset + matchedBytes.size() - numBeforeBytes;
+ stillMatches = expr.match(data, newOffset, matchedBytes);
}
// it's a match no matter what now
@@ -1,10 +1,10 @@
package com.timmciver.bytegrep;
-import java.io.InputStream;
+import java.util.List;
/**
- *
+ * Matches a sequence of two regular expressions.
* @author tim
*/
public class SequenceExpression extends RegularExpression {
@@ -18,8 +18,16 @@ public SequenceExpression(RegularExpression expr1, RegularExpression expr2) {
}
@Override
- protected boolean internalMatch(InputStream in) {
- return expr1.match(in) && expr2.match(in);
+ public boolean match(byte[] data, int offset, List<Byte> matchedBytes) {
+ int numMatchedBytes = matchedBytes.size();
+
+ if (!expr1.match(data, offset, matchedBytes)) {
+ return false;
+ }
+
+ int newOffset = offset + matchedBytes.size() - numMatchedBytes;
+
+ return expr2.match(data, newOffset, matchedBytes);
}
public RegularExpression getFirstExpression() {
@@ -4,10 +4,12 @@
import com.timmciver.bytegrep.RegularExpression;
import com.timmciver.bytegrep.parser.DefaultParser;
import com.timmciver.bytegrep.parser.Parser;
-import java.io.BufferedInputStream;
+import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
/**
*
@@ -38,23 +40,28 @@ public static void main(String[] args) throws IOException {
System.exit(1);
}
- // create a FileInputStream from the given file path
- InputStream in = new BufferedInputStream(new FileInputStream(filePath));
+ // read data from the file
+ File file = new File(filePath);
+ byte[] data = new byte[(int)file.length()];
+ InputStream in = new FileInputStream(file);
+ in.read(data);
// try matching at every byte
- long index = -1;
- boolean matched;
- int byteVal;
- do {
- ++index;
- matched = re.match(in);
- byteVal = in.read();
- //System.out.println("Read byte: " + byteVal);
- } while (!matched && byteVal != -1);
+ //int numMatched = 0;
+ //int byteVal;
+ int offset;
+ boolean matched = false;
+ List<Byte> matchedBytes = new ArrayList<>();
+ for (offset = 0; offset < data.length; ++offset) {
+ if ((matched = re.match(data, offset, matchedBytes))) {
+ break;
+ }
+ matchedBytes.clear();
+ }
// tell user if we found a match or not
if (matched) {
- System.out.println("Found match at byte offset " + Long.toHexString(index));
+ System.out.println("Found match at byte offset " + offset);
} else {
System.out.println("No match found.");
}
Oops, something went wrong.

0 comments on commit 4767d5d

Please sign in to comment.