Skip to content

Commit

Permalink
Implement caching of lexers
Browse files Browse the repository at this point in the history
  • Loading branch information
sorear committed Sep 7, 2010
1 parent 83c3bc3 commit 4926d3c
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 31 deletions.
60 changes: 52 additions & 8 deletions lib/Cursor.cs
Expand Up @@ -789,6 +789,14 @@ public class LexerState {
}
}

public class LexerCache {
public Dictionary<LAD[], Lexer> nfas = new Dictionary<LAD[], Lexer>();
public Dictionary<string, DynObject[]> protorx_fns =
new Dictionary<string, DynObject[]>();
public Dictionary<string, Lexer> protorx_nfa =
new Dictionary<string, Lexer>();
}

public class Lexer {
public LAD[] alts;
public NFA pad = new NFA();
Expand All @@ -797,8 +805,18 @@ public class Lexer {
public static bool LtmTrace =
Environment.GetEnvironmentVariable("NIECZA_LTM_TRACE") != null;

public Lexer(IP6 cursorObj, string tag, LAD[] alts) {
pad.cursor_class = cursorObj.GetMO();
public static Lexer GetLexer(IP6 cursor, LAD[] lads, string title) {
LexerCache lc = cursor.GetMO().GetLexerCache();
Lexer ret;
if (lc.nfas.TryGetValue(lads, out ret))
return ret;
ret = new Lexer(cursor.GetMO(), title, lads);
lc.nfas[lads] = ret;
return ret;
}

public Lexer(DynMetaObject cmo, string tag, LAD[] alts) {
pad.cursor_class = cmo;
this.alts = alts;
this.tag = tag;
int root = pad.AddNode();
Expand Down Expand Up @@ -883,11 +901,23 @@ public class Lexer {
}

public static IP6[] RunProtoregex(IP6 cursor, string name) {
DynObject[] candidates = ResolveProtoregex(cursor.GetMO(), name);
LAD[] branches = new LAD[candidates.Length];
for (int i = 0; i < candidates.Length; i++)
branches[i] = ((SubInfo) candidates[i].slots["info"]).ltm;
Lexer l = new Lexer(cursor, name, branches);
DynMetaObject kl = cursor.GetMO();
LexerCache lc = kl.GetLexerCache();
DynObject[] candidates = ResolveProtoregex(kl, name);
Lexer l;
if (!lc.protorx_nfa.TryGetValue(name, out l)) {
if (LtmTrace)
Console.WriteLine("+ Protoregex lexer MISS on {0}.{1}",
kl.name, name);
LAD[] branches = new LAD[candidates.Length];
for (int i = 0; i < candidates.Length; i++)
branches[i] = ((SubInfo) candidates[i].slots["info"]).ltm;
lc.protorx_nfa[name] = l = new Lexer(cursor.GetMO(), name, branches);
} else {
if (LtmTrace)
Console.WriteLine("+ Protoregex lexer HIT on {0}.{1}",
kl.name, name);
}
Cursor c = (Cursor)cursor;
int[] brnum = l.Run(c.backing, c.pos);
IP6[] ret = new IP6[brnum.Length];
Expand All @@ -898,6 +928,18 @@ public class Lexer {

public static DynObject[] ResolveProtoregex(DynMetaObject cursor_class,
string name) {
DynObject[] ret;
if (cursor_class.GetLexerCache().
protorx_fns.TryGetValue(name, out ret)) {
if (LtmTrace)
Console.WriteLine("+ Protoregex method list HIT on {0}.{1}",
cursor_class.name, name);
return ret;
}
if (LtmTrace)
Console.WriteLine("+ Protoregex method list MISS on {0}.{1}",
cursor_class.name, name);

IP6 proto = cursor_class.Can(name);

List<DynObject> raword = new List<DynObject>();
Expand All @@ -919,6 +961,8 @@ public class Lexer {
if (unshadowed.Contains(o))
useord.Add(o);

return useord.ToArray();
ret = useord.ToArray();
cursor_class.GetLexerCache().protorx_fns[name] = ret;
return ret;
}
}
7 changes: 7 additions & 0 deletions lib/Kernel.cs
Expand Up @@ -279,6 +279,13 @@ public class DynMetaObject {
public IP6 typeObject;
public string name;

public LexerCache lexcache;
public LexerCache GetLexerCache() {
if (lexcache == null)
lexcache = new LexerCache();
return lexcache;
}

public delegate Frame InvokeHandler(IP6 th, Frame c,
Variable[] pos, Dictionary<string, Variable> named);
public delegate Frame FetchHandler(IP6 th, Frame c);
Expand Down
4 changes: 4 additions & 0 deletions src/CgOp.pm
Expand Up @@ -397,6 +397,10 @@ use warnings;
CgOp::Primitive->new(op => [ 'cast', $_[0] ], zyg => [ $_[1] ]);
}

sub const {
CgOp::Primitive->new(op => [ 'const' ], zyg => [ $_[0] ]);
}

sub newscalar {
rawscall('Kernel.NewROScalar', $_[0]);
}
Expand Down
14 changes: 14 additions & 0 deletions src/CodeGen.pm
Expand Up @@ -92,6 +92,7 @@ use 5.010;
'System.IO.File.ReadAllText' => [m => 'System.String'],
'Lexer.RunProtoregex' => [m => 'IP6[]'],
'Lexer.GetLexer' => [m => 'Lexer'],
'Kernel.Die' => [c => 'Void'],
'Kernel.CoTake' => [c => 'Variable'],
'Kernel.Take' => [c => 'Variable'],
Expand Down Expand Up @@ -209,6 +210,7 @@ use 5.010;
has minlets => (isa => 'Int', is => 'ro', default => 0);
has body => (isa => 'Body', is => 'ro');
has bodies => (isa => 'ArrayRef', is => 'ro', default => sub { [] });
has consttab => (isa => 'ArrayRef', is => 'ro', default => sub { [] });

has savedstks => (isa => 'HashRef', is => 'ro', default => sub { +{} });

Expand Down Expand Up @@ -328,6 +330,15 @@ use 5.010;
$self->_emit(_lexn($self->minlets + $i) . " = $v");
}

sub const {
my ($self) = @_;
my ($val, $type) = $self->_popn(1);
my $knum = @{ $self->consttab };
my $name = "K_" . $self->csname . "_$knum";
push @{ $self->consttab }, "$type $name = $val";
$self->_push($type, $name);
}

sub has_let {
my ($self, $which) = @_;
my $i = @{ $self->letstack } - 1;
Expand Down Expand Up @@ -775,6 +786,9 @@ use 5.010;
join (", ", map { ($_ // 0) } @{ $self->lineinfo }), "};\n";
print ::NIECZA_OUT " " x 4, "private static SubInfo ${name}_info = ",
"new SubInfo(${name}_lines, ${name}, null, null, null);\n";
for (@{ $self->consttab }) {
print ::NIECZA_OUT " " x 4, "private static $_;\n";
}
}

sub BUILD {
Expand Down
8 changes: 5 additions & 3 deletions src/RxOp.pm
Expand Up @@ -413,9 +413,11 @@ use CgOp;

my @code;
push @code, CgOp::rawcall(CgOp::rxframe, "LTMPushAlts",
CgOp::rawnew('Lexer',
CgOp::rawcall(CgOp::rxframe,'MakeCursor'), CgOp::clr_string(''),
CgOp::rawnewarr('LAD', map { $_->lad } @{ $self->zyg })),
CgOp::rawscall('Lexer.GetLexer',
CgOp::rawcall(CgOp::rxframe, 'MakeCursor'),
CgOp::const(CgOp::rawnewarr('LAD',
map { $_->lad } @{ $self->zyg })),
CgOp::clr_string('')),
CgOp::rawnewarr('Int32', map { CgOp::labelid($_) } @ls));
push @code, CgOp::rawccall(CgOp::rxframe, 'Backtrack');
for (my $i = 0; $i < @ls; $i++) {
Expand Down
37 changes: 17 additions & 20 deletions t/JsonTinyG.pl
@@ -1,45 +1,42 @@
grammar JSONGrammar {
rule TOP { [<object> |<array> ] { say "parsed {$/.pos} chars" } }
rule object {'{' <pairlist> '}' }
rule pairlist { [ <pair> [ \, <pair> ]* ]? }
rule TOP {
[
| <?before '{'> :: <object>
| <?before '['> :: <array>
]
{ say "parsed {$/.pos} chars" }
}
rule object { '{' :: <pairlist> '}' }
rule pairlist { [ <pair> [ "," <pair> ]* ]? }
rule pair { <string> ':' <value> }
rule array {'[' [ <value> [ \, <value> ]* ]? ']' }
rule array { '[' :: [ <value> [ "," <value> ]* ]? ']' }

proto token value {*}
token value:sym<number> {
'-'?
[ 0 | <[1..9]> <[0..9]>* ]
[ \. <[0..9]>+ ]?
[ <[eE]> [\+|\-]? <[0..9]>+ ]?
[ 0 || <[1..9]> <[0..9]>* ]
[ "." <[0..9]>+ ]?
[ <[eE]> ["+"|"-"]? <[0..9]>+ ]?
}
token value:sym<true> { <sym> }
token value:sym<false> { <sym> }
token value:sym<null> { <sym> }
token value:sym<object> { <object> }
token value:sym<array> { <array> }
token value:sym<string> { <string> }
token value:sym<string> { <?before '"'> :: <string> }

token string {
\" [ <str> | \\ <str_escape> ]* \"
'"' [ <str> || \\ <str_escape> ]* '"'
}

token str {
[
<!before \t>
<!before \n>
<!before \\>
<!before \">
.
]+
# <-["\\\t\n]>+
}
token str { <-[\t\n\\\"]>+ }

token xdigit {
<[0..9 a..f A..F]>
}

token str_escape {
<["\\/bfnrt]> | u <xdigit> <xdigit> <xdigit> <xdigit>
<["\\/bfnrt]> || u <xdigit> <xdigit> <xdigit> <xdigit>
}
}

Expand Down

0 comments on commit 4926d3c

Please sign in to comment.