Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also .

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also .
Choose a base branch
Nothing to show
...
Choose a head branch
Nothing to show
  • 10 commits
  • 15 files changed
  • 0 commit comments
  • 1 contributor
Showing with 331 additions and 131 deletions.
  1. +12 −11 src/dfa.cc
  2. +8 −3 src/dfa.h
  3. +1 −1 src/expr.cc
  4. +12 −23 src/expr.h
  5. +6 −4 src/exprutil.cc
  6. +2 −0 src/exprutil.h
  7. +2 −4 src/generator.cc
  8. +3 −2 src/generator.h
  9. +23 −20 src/lexer.cc
  10. +1 −1 src/lexer.h
  11. +155 −16 src/recon.cc
  12. +39 −15 src/regen.cc
  13. +38 −7 src/regen.h
  14. +24 −19 src/regex.cc
  15. +5 −5 src/regex.h
View
@@ -539,7 +539,6 @@ JITCompiler::JITCompiler(const DFA &dfa, std::size_t state_code_size = 64):
cmp(arg3, NULL);
je("finalize");
mov(ptr[arg3+sizeof(uint8_t*)], tmp2);
- jmp("finalize");
L("finalize");
#ifdef XBYAK32
pop(ebx);
@@ -560,17 +559,19 @@ JITCompiler::JITCompiler(const DFA &dfa, std::size_t state_code_size = 64):
L(labelbuf);
states_addr[i] = getCurr();
if (dfa.IsAcceptState(i)) {
- inLocalLabel();
- cmp(arg3, NULL);
- je(".ret");
mov(tmp2, arg1);
- if (dfa.flag().shortest_match()) jmp(".ret");
- jmp(".cont");
- L(".ret");
- mov(reg_a, i);
- jmp("return");
- L(".cont");
- outLocalLabel();
+ if (!dfa.flag().full_match()) {
+ inLocalLabel();
+ cmp(arg3, NULL);
+ je(".ret");
+ if (dfa.flag().shortest_match()) jmp(".ret");
+ jmp("@f");
+ L(".ret");
+ mov(reg_a, i);
+ jmp("return");
+ L("@@");
+ outLocalLabel();
+ }
}
// can transition without table lookup ?
const DFA::AlterTrans &at = dfa[i].alter_transition;
View
@@ -35,18 +35,23 @@ class JITCompiler: public Xbyak::CodeGenerator {
class DFA {
#ifdef REGEN_ENABLE_XBYAK
class Jitter: public Xbyak::CodeGenerator {
- friend class DFA;
struct Transition {
void* t[256];
Transition(void* fill = NULL) { std::fill(t, t+256, fill); }
void* &operator[](std::size_t index) { return t[index]; }
};
+ struct CodeInfo {
+ CodeInfo(): data_addr(NULL), code_addr(NULL) {}
+ void** data_addr;
+ void** code_addr;
+ };
public:
Jitter(std::size_t code_size = 4096): CodeGenerator(code_size), state_num_(0) {}
- private:
std::deque<Transition> data_segment_;
+ std::deque<Xbyak::CodeGenerator*> code_segments_;
+ std::deque<CodeInfo> code_info_;
std::size_t state_num_;
-};
+ };
#endif
public:
typedef uint32_t state_t;
View
@@ -62,7 +62,7 @@ Expr::Connect(std::set<StateExpr*> &src, std::set<StateExpr*> &dst, bool reverse
Expr* Expr::Shuffle(Expr *lhs, Expr *rhs, ExprPool *p)
{
- Expr *e;
+ Expr *e = NULL;
std::vector<Expr *> ls, rs;
ExprPool tmp_pool;
lhs->Serialize(ls, &tmp_pool); rhs->Serialize(rs, &tmp_pool);
View
@@ -7,24 +7,12 @@ namespace regen {
class Expr;
class StateExpr;
-class Literal;
-class CharClass;
-class Dot;
-class BegLine;
-class EndLine;
-class None;
-class Epsilon;
-class Operator;
-class EOP;
+class Literal; class CharClass; class Dot; class BegLine; class EndLine;
+class None; class Epsilon; class Operator; class EOP;
class BinaryExpr;
-class Concat;
-class Union;
-class Intersection;
-class XOR;
+class Concat; class Union; class Intersection; class XOR;
class UnaryExpr;
-class Qmark;
-class Plus;
-class Star;
+class Qmark; class Plus; class Star;
struct ExprPool;
class ExprVisitor {
@@ -127,28 +115,29 @@ class Expr {
struct ExprPool {
public:
- ExprPool() {}
- ~ExprPool() { for(std::vector<Expr*>::iterator i = pool.begin(); i != pool.end(); ++i) delete *i; }
+ ExprPool(): p_(NULL) {}
+ ~ExprPool() { for(std::vector<Expr*>::iterator i = pool.begin(); i != pool.end(); ++i) delete *i; delete p_; }
template<class T> T* alloc()
- { pool.push_back(NULL); pool.back() = new T(); return (T*)pool.back(); }
+ { p_ = new T(); pool.push_back(p_); p_ = NULL; return (T*)pool.back(); }
template<class T, class P1> T* alloc(P1 p1)
- { pool.push_back(NULL); pool.back() = new T(p1); return (T*)pool.back(); }
+ { p_ = new T(p1); pool.push_back(p_); p_ = NULL; return (T*)pool.back(); }
template<class T, class P1, class P2> T* alloc(P1 p1, P2 p2)
- { pool.push_back(NULL); pool.back() = new T(p1, p2); return (T*)pool.back(); }
+ { p_ = new T(p1, p2); pool.push_back(p_); p_ = NULL; return (T*)pool.back(); }
template<class T, class P1, class P2, class P3> T* alloc(P1 p1, P2 p2, P3 p3)
- { pool.push_back(NULL); pool.back() = new T(p1, p2, p3); return (T*)pool.back(); }
+ { p_ = new T(p1, p2, p3); pool.push_back(p_); p_ = NULL; return (T*)pool.back(); }
void drain(ExprPool &p) { drain(&p); }
void drain(ExprPool *p)
{
pool.reserve(pool.size()+p->pool.size());
pool.insert(pool.end(), p->pool.begin(), p->pool.end());
- p->pool.erase(p->pool.begin(), p->pool.end());
+ p->pool.clear();
}
private:
std::vector<Expr*> pool;
+ Expr *p_;
};
class StateExpr: public Expr {
View
@@ -83,7 +83,8 @@ void PrintRegexVisitor::Print(Expr* e)
void PrintRegexVisitor::Visit(BinaryExpr *e)
{
- if (e->lhs()->type() == Expr::kUnion) {
+ if (Expr::SuperTypeOf(e->lhs()) == Expr::kBinaryExpr
+ && e->lhs()->type() != Expr::kConcat) {
printf("(");
e->lhs()->Accept(this);
printf(")");
@@ -93,7 +94,8 @@ void PrintRegexVisitor::Visit(BinaryExpr *e)
PrintExprVisitor::Print(e);
- if (e->rhs()->type() == Expr::kUnion) {
+ if (Expr::SuperTypeOf(e->rhs()) == Expr::kBinaryExpr
+ && e->rhs()->type() != Expr::kConcat) {
printf("(");
e->rhs()->Accept(this);
printf(")");
@@ -104,8 +106,8 @@ void PrintRegexVisitor::Visit(BinaryExpr *e)
void PrintRegexVisitor::Visit(UnaryExpr *e)
{
- switch (e->lhs()->type()) {
- case Expr::kConcat: case Expr::kUnion:
+ switch (Expr::SuperTypeOf(e->lhs())) {
+ case Expr::kBinaryExpr:
printf("(");
e->lhs()->Accept(this);
printf(")");
View
@@ -29,6 +29,8 @@ class PrintExprVisitor: public ExprVisitor {
void Visit(None * e) { printf("[:None:]"); }
void Visit(Concat* e) {}
void Visit(Union* e) { printf("|"); }
+ void Visit(Intersection* e) { printf("&"); }
+ void Visit(XOR* e) { printf("&&"); }
void Visit(Qmark* e) { printf("?"); }
void Visit(Plus* e) { printf("+"); }
void Visit(Star* e) { printf("*"); }
View
@@ -19,14 +19,13 @@ unsigned char* normalize(unsigned int c, unsigned char *buf)
return buf;
}
-void DotGenerate(const Regex &regex)
+void DotGenerate(const DFA &dfa)
{
static const char* const normal = "circle";
static const char* const accept = "doublecircle";
static const char* const thema = "fillcolor=lightsteelblue1, style=filled, color = navyblue ";
unsigned char buf[10];
puts("digraph DFA {\n rankdir=\"LR\"");
- const DFA &dfa = regex.dfa();
for (std::size_t i = 0; i < dfa.size(); i++) {
printf(" q%"PRIuS" [shape=%s, %s]\n", i, (dfa.IsAcceptState(i) ? accept : normal), thema);
}
@@ -53,11 +52,10 @@ void DotGenerate(const Regex &regex)
puts("}");
}
-void CGenerate(const Regex& regex)
+void CGenerate(const DFA &dfa)
{
puts("typedef unsigned char UCHAR;");
puts("typedef unsigned char *UCHARP;");
- const DFA &dfa = regex.dfa();
for (std::size_t i = 0; i < dfa.size(); i++) {
const DFA::Transition &transition = dfa.GetTransition(i);
printf("void s%"PRIuS"(UCHARP beg, UCHARP buf, UCHARP end)\n{\n", i);
View
@@ -2,13 +2,14 @@
#define REGEN_GENERATOR_H_
#include "regex.h"
+#include "dfa.h"
namespace regen {
namespace Generator {
-void DotGenerate(const Regex &regex);
-void CGenerate(const Regex &regex);
+void DotGenerate(const DFA &dfa);
+void CGenerate(const DFA &dfa);
} // namespace Generator
View
@@ -5,29 +5,30 @@ namespace regen {
Lexer::Type Lexer::Consume()
{
if (ptr_ >= end_) {
+ literal_ = '\0';
token_ = kEOP;
return token_;
}
switch (literal_ = *ptr_++) {
- // Regen Extension
- case '@': token_ = kRecursive; break;
- case '#': token_ = kPermutation; break;
- case '!': token_ = kComplement; break;
+ case '@': token_ = flag_.recursion_ext() ? kRecursion : kLiteral; break;
+ case '#': token_ = flag_.permutation_ext() ? kPermutation : kLiteral; break;
+ case '!': token_ = flag_.complement_ext() ? kComplement : kLiteral; break;
case '&': {
- if (*ptr_ == '&') {
+ if (*ptr_ == '&' && flag_.xor_ext()) {
ptr_++;
token_ = kXOR;
- } else {
+ } else if (flag_.intersection_ext()) {
token_ = kIntersection;
+ } else {
+ token_ = kLiteral;
}
break;
}
- // Normal Symbols
case '.': token_ = kDot; break;
case '[': token_ = kCharClass; break;
case '|': {
- if (*ptr_ == '|') {
+ if (*ptr_ == '|' && flag_.shuffle_ext()) {
ptr_++;
token_ = kShuffle;
} else {
@@ -65,16 +66,18 @@ Lexer::Type Lexer::Consume()
Lexer::Type Lexer::lex_metachar()
{
- if (*ptr_ == '_' && '0' <= *(ptr_+1) && *(ptr_+1) <= '9') ptr_++;
- if ('0' <= *ptr_ && *ptr_ <= '9') {
- weakref_ = (*(ptr_-1) == '_');
- backref_ = 0;
- do {
- backref_ *= 10;
- backref_ += *ptr_++ - '0';
- } while ('0' <= *ptr_ && *ptr_ <= '9');
- backref_--;
- return kBackRef;
+ if (flag_.weakbackref_ext()) {
+ if (*ptr_ == '_' && '0' <= *(ptr_+1) && *(ptr_+1) <= '9') ptr_++;
+ if ('0' <= *ptr_ && *ptr_ <= '9') {
+ weakref_ = (*(ptr_-1) == '_');
+ backref_ = 0;
+ do {
+ backref_ *= 10;
+ backref_ += *ptr_++ - '0';
+ } while ('0' <= *ptr_ && *ptr_ <= '9');
+ backref_--;
+ return kBackRef;
+ }
}
Type token;
@@ -234,7 +237,7 @@ bool Lexer::Concatenated()
switch (token_) {
case kLiteral: case kCharClass: case kDot:
case kEndLine: case kBegLine: case kNone:
- case kLpar: case kComplement: case kRecursive:
+ case kLpar: case kComplement: case kRecursion:
case kByteRange: case kBackRef:
return true;
default:
@@ -257,7 +260,7 @@ const char* Lexer::TokenToString(Lexer::Type token)
{
static const char* str[] = {
"kLiteral", "kCharClass", "kDot", "kBegLine", "kEndLine",
- "kRecursive", "kByteRange", "kEOP", "kConcat", "kUnion",
+ "kRecursion", "kByteRange", "kEOP", "kConcat", "kUnion",
"kIntersection", "kQmark", "kStar", "kPlus", "kRepetition",
"kRpar", "kLpar", "kEpsilon", "kNone", "kComplement"
};
View
@@ -12,7 +12,7 @@ class Lexer {
public:
enum Type {
kLiteral=0, kCharClass, kDot, kBegLine,
- kEndLine, kRecursive, kByteRange, kBackRef, kEOP,
+ kEndLine, kRecursion, kByteRange, kBackRef, kEOP,
kConcat, kUnion, kIntersection, kXOR, kShuffle,
kQmark, kStar, kPlus, kRepetition,
kRpar, kLpar, kEpsilon, kNone, kComplement, kPermutation
Oops, something went wrong.

No commit comments for this range