Support for reading in FOL valuation from a file

svn/trunk@5212
nltk · Aug 24, 2007 · 7ad2d47 · 7ad2d47
1 parent 2ddf81c
commit 7ad2d47
Show file tree

Hide file tree

Showing 4 changed files with 103 additions and 3 deletions.
diff --git a/nltk/data.py b/nltk/data.py
@@ -159,6 +159,7 @@ def load(resource_url, format='auto', cache=True, verbose=False):
       - C{'cfg'}
       - C{'pcfg'}
       - C{'fcfg'}
+      - C{'val'}
       - C{'raw'}
 
     If no format is specified, C{load()} will attempt to determine a
@@ -200,6 +201,7 @@ def load(resource_url, format='auto', cache=True, verbose=False):
         if resource_url.endswith('.cfg'): format = 'cfg'
         if resource_url.endswith('.pcfg'): format = 'pcfg'
         if resource_url.endswith('.fcfg'): format = 'fcfg'
+        if resource_url.endswith('.val'): format = 'val'
 
     # Load the resource.
     if format == 'pickle':
@@ -213,6 +215,8 @@ def load(resource_url, format='auto', cache=True, verbose=False):
     elif format == 'fcfg':
         # NB parse_fcfg returns a FeatGramLex -- a tuple (grammar, lexicon)
         resource_val = cfg.parse_fcfg(_open(resource_url).read())
+    elif format == 'val':
+        resource_val = sem.parse_val(_open(resource_url).read())
     elif format == 'raw':
         resource_val = _open(resource_url).read()
     else:

diff --git a/nltk/sem/__init__.py b/nltk/sem/__init__.py
@@ -12,7 +12,7 @@
 
 """
 
-from syn2sem import *
+from util import *
 from evaluate import *
 from logic import *
 
diff --git a/nltk/sem/syn2sem.py → nltk/sem/util.py b/nltk/sem/syn2sem.py → nltk/sem/util.py
@@ -13,6 +13,7 @@
 """
 
 import evaluate
+import re
 from nltk import tokenize, Tree
 
 
@@ -108,6 +109,51 @@ def text_evaluate(inputs, grammar, model, assignment, semtrace=0):
 """
 
 
+_VAL_SPLIT_RE = re.compile(r'\s*=+>\s*')
+_ELEMENT_SPLIT_RE = re.compile(r'\s*,\s*')
+_TUPLES_RE = re.compile(r"""\s*         
+                                              (\([^)]+\))  # tuple-expression
+                                              \s*""", re.VERBOSE)
+
+def parse_valuation_line(s):
+    """
+    Parse a line in a valuation file.
+    """
+    pieces = _VAL_SPLIT_RE.split(s)
+    symbol = pieces[0]
+    value = pieces[1]
+    # check whether the value is meant to be a set
+    if value.startswith('{'):
+        value = value[1:-1]
+        tuple_strings = _TUPLES_RE.findall(value)
+        # are the set elements tuples?
+        if tuple_strings:
+            set_elements = []
+            for ts in tuple_strings:
+                ts = ts[1:-1]
+                element = tuple(_ELEMENT_SPLIT_RE.split(ts))
+                set_elements.append(element)
+        else:
+            set_elements = _ELEMENT_SPLIT_RE.split(value)
+        value = set(set_elements)
+    return symbol, value
+
+def parse_valuation(s):
+    """
+    Convert a valuation file into a valuation.
+    """
+    statements = []
+    for linenum, line in enumerate(s.splitlines()):
+        line = line.strip()
+        if line.startswith('#') or line=='': continue
+        try: statements.append(parse_valuation_line(line))
+        except ValueError:
+            raise ValueError, 'Unable to parse line %s: %s' % (linenum, line)
+    val = evaluate.Valuation()
+    val.read(statements)
+    return val
+
+
 def demo_model0():
     global m0, g0
     val = evaluate.Valuation()
@@ -234,5 +280,5 @@ def demo():
                 print '%d:  %s' % (n, semrep)
                 n += 1
 
-if __name__ == "__main__":
-    demo()
+#if __name__ == "__main__":
+    #demo()
diff --git a/nltk/test/semantics.doctest b/nltk/test/semantics.doctest
@@ -117,6 +117,56 @@ doesn't work since cf not called on 'foo'
     >>> love1.tuples() == love2.tuples()
     True
 
+Parse a valuation from a string.
+
+    >>> v = """
+    ... john => b1
+    ... mary => g1
+    ... suzie => g2
+    ... fido => d1
+    ... tess => d2
+    ... noosa => n
+    ... girl => {g1, g2}
+    ... boy => {b1, b2}
+    ... dog => {d1, d2}
+    ... bark => {d1, d2}
+    ... walk => {b1, g2, d1}
+    ... chase => {(b1, g1), (b2, g1), (g1, d1), (g2, d2)}
+    ... see => {(b1, g1), (b2, d2), (g1, b1),(d2, b1), (g2, n)}
+    ... in => {(b1, n), (b2, n), (d2, n)}
+    ... with => {(b1, g1), (g1, b1), (d1, b1), (b1, d1)}
+    ... """
+    >>> val = parse_valuation(v)
+    >>> print val
+    {'bark': {'d2': True, 'd1': True},
+     'boy': {'b1': True, 'b2': True},
+     'chase': {'d1': {'g1': True},
+               'd2': {'g2': True},
+               'g1': {'b1': True, 'b2': True}},
+     'dog': {'d2': True, 'd1': True},
+     'fido': 'd1',
+     'girl': {'g2': True, 'g1': True},
+     'in': {'n': {'d2': True, 'b1': True, 'b2': True}},
+     'john': 'b1',
+     'mary': 'g1',
+     'noosa': 'n',
+     'see': {'b1': {'d2': True, 'g1': True},
+             'd2': {'b2': True},
+             'g1': {'b1': True},
+             'n': {'g2': True}},
+     'suzie': 'g2',
+     'tess': 'd2',
+     'walk': {'d1': True, 'b1': True, 'g2': True},
+     'with': {'b1': {'g1': True, 'd1': True},
+              'd1': {'b1': True},
+              'g1': {'b1': True}}}
+
+
+Load a valuation from a file.
+
+    >>> val = nltk.data.load('grammars/valuation1.val')
+    >>> val.domain
+
 Tests for function argument application in a Model
 
     >>> val = Valuation()