Permalink
Browse files

[nrx] Kernel-level code for initial nrx

  • Loading branch information...
1 parent cd6e7c1 commit 76cd7d9098e590d41569ee0ec79af3583fd2f8a2 @sorear committed Sep 4, 2010
Showing with 66 additions and 194 deletions.
  1. +66 −89 lib/Cursor.cs
  2. +0 −105 lib/SAFE.setting
View
@@ -2,30 +2,6 @@
using System;
using System.Collections.Generic;
using System.Text;
-// this exists to allow O(1) addition, since additions (esp. in the presence
-// of backtracking) dominate lookups
-
-public class Matched {
- public Matched next;
- public string name;
- public Variable val; // or null for a list-mode sentinel
-
- public Matched(Matched next, string name, Variable val) {
- this.next = next;
- this.name = name;
- this.val = val;
- }
-}
-//
-//public class Match {
-// public string backing;
-// public int from;
-// public int to;
-// public Dictionary<string,Variable> captures;
-//}
-//
-//public class Xact {
-//
// extends Frame for a time/space tradeoff
// we keep the cursor in exploded form to avoid creating lots and lots of
@@ -51,7 +27,12 @@ public struct State {
public PSN<State> bt;
+ // when this is set, one value has already been given, so we don't need
+ // any more Lists
+ public bool return_one;
+
// our backing string, in a cheap to index form
+ public string orig_s;
public char[] orig;
// cache of orig.Length
public int end;
@@ -144,21 +125,38 @@ public struct State {
return bt.obj.reps.obj;
}
- /*
+ public Cursor MakeCursor() {
+ return new Cursor(bt.obj.klasses.obj, bt.obj.xact, orig_s, orig, bt.obj.pos);
+ }
+
public static DynMetaObject ListMO;
public static DynMetaObject LLArrayMO;
public static DynMetaObject RegexBacktrackIteratorMO;
- public static Variable FalseV;
public Frame End(Frame th) {
if (return_one) {
th.caller.resultSlot = MakeCursor();
} else {
- DynObject lst = new DynObject(ListMO);
DynObject obs = new DynObject(LLArrayMO);
+ List<Variable> ks = new List<Variable>();
+ ks.Add(Kernel.NewROScalar(MakeCursor()));
+ obs.slots["value"] = ks;
+ DynObject it = new DynObject(RegexBacktrackIteratorMO);
+ it.slots["value"] = Kernel.NewRWScalar(Kernel.AnyP);
+ it.slots["next"] = Kernel.NewRWScalar(Kernel.AnyP);
+ it.slots["valid"] = Kernel.NewRWScalar(Kernel.AnyP);
+ it.slots["rxframe"] = Kernel.BoxAny(this, Kernel.AnyP);
DynObject its = new DynObject(LLArrayMO);
- DynObject it = new RegexBacktrackIteratorMO;
+ List<Variable> iss = new List<Variable>();
+ iss.Add(Kernel.NewROScalar(it));
+ its.slots["value"] = iss;
+ DynObject lst = new DynObject(ListMO);
+ lst.slots["items"] = Kernel.NewROScalar(obs);
+ lst.slots["rest"] = Kernel.NewROScalar(its);
+ lst.slots["flat"] = Kernel.NewROScalar(Kernel.AnyP);
+ th.caller.resultSlot = Kernel.NewROScalar(lst);
+ }
+ return th.caller;
}
- */
}
public sealed class XAct {
@@ -169,89 +167,68 @@ public sealed class XAct {
public XAct(string tag, XAct next) { this.tag = tag; this.next = next; }
}
-public class Cursor {
- // XXX It's a bit wrong that we ref the string both from the cursor and
- // from $*ORIG.
- public Matched captures;
- public string backing;
- public XAct xact;
- public int pos;
-
+// This is used to carry match states in and out of subrules. Within subrules,
+// match states are represented much more ephemerally in the state of RxFrame.
+public class Cursor : IP6 {
public static bool Trace =
Environment.GetEnvironmentVariable("NIECZA_RX_TRACE") != null;
- public Cursor(Matched captures, string backing, int pos) {
- this.captures = captures;
+ public DynMetaObject klass;
+ public XAct xact;
+ public string backing;
+ public char[] backing_ca;
+ public int pos;
+
+ public Cursor(DynMetaObject klass, XAct xact, string backing, char[] backing_ca, int pos) {
+ this.klass = klass;
+ this.xact = xact;
this.backing = backing;
+ this.backing_ca = backing_ca;
this.pos = pos;
}
- public Cursor(string backing) : this(null, backing, 0) { }
+ public override DynMetaObject GetMO() { return klass; }
- public Cursor At(int npos) {
- return new Cursor(captures, backing, npos);
+ public override Frame GetAttribute(Frame caller, string name) {
+ return Fail(caller, "Cursors cannot have attributes");
}
- public Cursor Exact(string what) {
- if (backing.Length - what.Length >= pos &&
- backing.Substring(pos, what.Length) == what) {
- if (Trace)
- Console.WriteLine("* matched {0} at {1}", what, pos);
- return At(pos + what.Length);
- } else {
- if (Trace)
- Console.WriteLine("! no match {0} at {1}", what, pos);
- return null;
- }
+ public override bool IsDefined() {
+ return true;
}
- public Cursor AnyChar() {
- if (backing.Length - 1 >= pos) {
- if (Trace)
- Console.WriteLine("* matched any char at {0}", pos);
- return At(pos + 1);
- } else {
- if (Trace)
- Console.WriteLine("! no match any char at {0}", pos);
- return null;
- }
+ public Cursor At(int npos) {
+ return new Cursor(klass, xact, backing, backing_ca, npos);
}
- public Cursor CClass(CC cc) {
- if (backing.Length - 1 >= pos && cc.Accepts(backing[pos])) {
- if (Trace)
- Console.WriteLine("* matched cc {0} at {1}", cc, pos);
- return At(pos + 1);
- } else {
- if (Trace)
- Console.WriteLine("! no match cc {0} at {1}", cc, pos);
- return null;
- }
- }
+ public Variable SimpleWS() {
+ int l = backing_ca.Length;
+ int p = pos;
- public Cursor SetCaps(Matched caps) {
- return new Cursor(caps, backing, pos);
- }
+ DynObject obs = new DynObject(RxFrame.LLArrayMO);
+ List<Variable> ks = new List<Variable>();
+ obs.slots["value"] = ks;
- public Cursor Bind(string name, Variable what) {
- return SetCaps(new Matched(captures, name, what));
- }
+ DynObject its = new DynObject(RxFrame.LLArrayMO);
+ its.slots["value"] = new List<Variable>();
+
+ DynObject lst = new DynObject(RxFrame.ListMO);
+ lst.slots["items"] = Kernel.NewROScalar(obs);
+ lst.slots["rest"] = Kernel.NewROScalar(its);
+ lst.slots["flat"] = Kernel.NewROScalar(Kernel.AnyP);
- public Cursor SimpleWS() {
- int l = backing.Length;
- int p = pos;
if (p != 0 && p != l && CC.Word.Accepts(backing[p]) &&
CC.Word.Accepts(backing[p-1])) {
if (Trace)
Console.WriteLine("! no match <ws> at {0}", pos);
- return null;
+ } else {
+ while (p != l && Char.IsWhiteSpace(backing, p)) { p++; }
+ if (Trace)
+ Console.WriteLine("* match <ws> at {0} to {1}", pos, p);
+ ks.Add(Kernel.NewROScalar(At(p)));
}
- while (p != l && Char.IsWhiteSpace(backing, p)) { p++; }
- if (Trace)
- Console.WriteLine("* match <ws> at {0} to {1}", pos, p);
-
- return At(p);
+ return Kernel.NewROScalar(lst);
}
}
View
@@ -869,8 +869,6 @@ sub take($p) { # should be \|$p
Q:CgOp { (rawsccall Kernel.Take (l $p)) }
}
-# these are immutable, though we may wind up reusing them in some cases by
-# uniqueness rules (TBD)
my class Cursor {
method new($str) { Q:CgOp { (box (@ (l self)) (rawnew Cursor (unbox String
(@ (l $str))))) } }
@@ -893,77 +891,6 @@ my class Cursor {
}
}
-sub _rxstr($C, $str, $k) {
- #say "_rxstr : " ~ ($C.str ~ (" @ " ~ ($C.from ~ (" ? " ~ $str))));
- Q:CgOp {
- (letn rt (rawcall (unbox Cursor (@ (l $C))) Exact
- (unbox String (@ (l $str))))
- [ternary
- (!= (l rt) (null Cursor))
- (subcall (@ (l $k)) (box (@ (l $C)) (l rt)))
- (null Variable)])
- };
-}
-
-sub _rxdot($C, $k) {
- Q:CgOp {
- (letn rt (rawcall (unbox Cursor (@ (l $C))) AnyChar)
- [ternary
- (!= (l rt) (null Cursor))
- (subcall (@ (l $k)) (box (@ (l $C)) (l rt)))
- (null Variable)])
- };
-}
-
-sub _rxcc($C, $cc, $k) {
- Q:CgOp {
- (letn rt (rawcall (unbox Cursor (@ {$C})) CClass
- (unwrap CC (@ {$cc})))
- [ternary
- (!= (l rt) (null Cursor))
- (subcall (@ {$k}) (box (@ {$C}) (l rt)))
- (null Variable)])
- };
-}
-
-sub _rxcut($C, $f, $k) {
- my @l := gather $f($C, &take);
- @l && $k(@l.shift);
-}
-
-sub _rxbefore($C, $f, $k) {
- my @l := gather $f($C, &take);
- @l && $k($C);
-}
-
-sub _rxnotbefore($C, $f, $k) {
- my @l := gather $f($C, &take);
- @l || $k($C);
-}
-
-sub _rxalt($C, $lad, $k, *@alts) {
- sub lbody($ix) { @alts[$ix]($C, $k) }
-
- Q:CgOp {
- (letn csr (unbox Cursor (@ (l $C)))
- lexer (rawnew Lexer (@ (l $C)) (clr_string "")
- (unwrap 'LAD[]' (@ (l $lad))))
- fates (rawcall (l lexer) Run (getfield backing (l csr))
- (getfield pos (l csr)))
- i (int 0)
- nfate (getfield Length (l fates))
- (whileloop 0 0 (< (l i) (l nfate)) (prog
- (sink (subcall (@ (l &lbody))
- (box Num (cast Double (getindex (l i) (l fates))))))
- (l i (+ (l i) (int 1)))))
- (null Variable))
- };
-}
-
-sub _rxcall(@list, $k) {
- $k(@list.shift) while @list;
-}
-
PRE-INIT {
ClassHOW.HOW.add-method("add-multiregex",
anon method add-multiregex($name, $rx) {
@@ -975,38 +902,6 @@ PRE-INIT {
});
}
-sub _rxproto($C, $k, $name) {
- sub lbody($fn) { $fn($C).for($k) }
- Q:CgOp {
- (letn branches (rawscall Lexer.RunProtoregex
- (@ (l $C)) (unbox String (@ (l $name))))
- ix (int 0)
- max (getfield Length (l branches))
- (whileloop 0 0 (< (l ix) (l max))
- (sink (subcall (@ (l &lbody))
- (newscalar (getindex (l ix) (l branches))))))
- (null Variable))
- };
-}
-
-# A call to a subrule could return a cursor of a different type, or with
-# unwanted subcaptures that need to be cleared for <.foo>
-sub _rxbind($C, @names, $fun, $k) {
- $fun($C, -> $C2 {
- my $C3 = Q:CgOp {
- (box (@ (l $C)) (rawcall (unbox Cursor (@ (l $C2)))
- SetCaps (getfield captures (unbox Cursor (@ (l $C))))))
- };
- for @names -> $n { #OK
- $C3 = Q:CgOp {
- (box (@ (l $C3)) (rawcall (unbox Cursor (@ (l $C3)))
- Bind (unbox String (@ (l $n))) (l $C3)))
- };
- }
- $k($C3);
- });
-}
-
my class Regex is Sub {
method ACCEPTS($str) {
my $i = 0;

0 comments on commit 76cd7d9

Please sign in to comment.