Skip to content

Commit

Permalink
Optimizes some of the character patterns
Browse files Browse the repository at this point in the history
  • Loading branch information
renggli committed Dec 27, 2014
1 parent c327d20 commit 7dbc8f1
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 24 deletions.
Expand Up @@ -2,7 +2,6 @@

import org.petitparser.parser.Parser;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
Expand All @@ -25,7 +24,7 @@ static CharacterPredicate any() {
*/
static CharacterPredicate anyOf(String string) {
List<CharacterRange> ranges = string.chars()
.mapToObj((value) -> new CharacterRange((char) value))
.mapToObj((value) -> new CharacterRange((char) value, (char) value))
.collect(Collectors.toList());
return CharacterRange.toCharacterPredicate(ranges);
}
Expand All @@ -42,7 +41,7 @@ static CharacterPredicate none() {
*/
static CharacterPredicate noneOf(String string) {
List<CharacterRange> ranges = string.chars()
.mapToObj((value) -> new CharacterRange((char) value))
.mapToObj((value) -> new CharacterRange((char) value, (char) value))
.collect(Collectors.toList());
return CharacterRange.toCharacterPredicate(ranges).not();
}
Expand All @@ -55,12 +54,35 @@ static CharacterPredicate of(char character) {
}

/**
* Returns a character predicate that matches any character between {@code start} and {@code stop}.
* Returns a character predicate that matches any character between {@code start} and {@code
* stop}.
*/
static CharacterPredicate range(char start, char stop) {
return value -> start <= value && value <= stop;
}

/**
* Returns a character predicate that matches character ranges between {@code starts} and {@code
* stops}.
*/
static CharacterPredicate ranges(char[] starts, char[] stops) {
if (starts.length != stops.length) {
throw new IllegalArgumentException("Invalid ranges.");
}
for (int i = 0; i < starts.length; i++) {
if (starts[i] > stops[i]) {
throw new IllegalArgumentException("Invalid range: " + starts[i] + "-" + stops[i]);
}
if (i + 1 < starts.length && starts[i + 1] <= stops[i]) {
throw new IllegalArgumentException("Invalid sequence.");
}
}
return value -> {
int index = Arrays.binarySearch(starts, value);
return index >= 0 || index < -1 && value <= stops[-index - 2];
};
}

/**
* Returns a character predicate that matches the provided pattern.
*/
Expand All @@ -70,7 +92,7 @@ static CharacterPredicate pattern(String pattern) {

class PatternParser {
static final Parser PATTERN_SIMPLE = CharacterParser.any()
.map((Character value) -> new CharacterRange(value));
.map((Character value) -> new CharacterRange(value, value));
static final Parser PATTERN_RANGE = CharacterParser.any()
.seq(CharacterParser.of('-'))
.seq(CharacterParser.any())
Expand Down Expand Up @@ -156,5 +178,4 @@ public CharacterPredicate or(CharacterPredicate... others) {
return new AltCharacterPredicate(array);
}
}

}
Expand Up @@ -38,32 +38,28 @@ public int compare(CharacterRange first, CharacterRange second) {
}
}

// 3. build the corresponding predicates
List<CharacterPredicate> predicates = new ArrayList<>();
for (CharacterRange range : mergedRanges) {
if (range.stop - range.start > 1) {
predicates.add(CharacterPredicate.range(range.start, range.stop));
} else {
for (char value = range.start; value <= range.stop; value++) {
predicates.add(CharacterPredicate.of(value));
}
// 3. build the best resulting predicates
if (mergedRanges.isEmpty()) {
return CharacterPredicate.none();
} else if (mergedRanges.size() == 1) {
return mergedRanges.get(0).start == mergedRanges.get(0).stop
? CharacterPredicate.of(mergedRanges.get(0).start)
: CharacterPredicate.range(mergedRanges.get(0).start, mergedRanges.get(0).stop);
} else {
char[] starts = new char[mergedRanges.size()];
char[] stops = new char[mergedRanges.size()];
for (int i = 0; i < mergedRanges.size(); i++) {
starts[i] = mergedRanges.get(i).start;
stops[i] = mergedRanges.get(i).stop;
}
return CharacterPredicate.ranges(starts, stops);
}

// 4. when necessary build a composite predicate
return predicates.size() == 1
? predicates.get(0)
: new CharacterPredicate.AltCharacterPredicate(
predicates.toArray(new CharacterPredicate[predicates.size()]));
}

private final char start;
private final char stop;

CharacterRange(char start) {
this(start, start);
}

CharacterRange(char start, char stop) {
this.start = start;
this.stop = stop;
Expand Down

0 comments on commit 7dbc8f1

Please sign in to comment.