align parser methods with updated API, incorporate read_cfg into Cont…

…extFreeGrammar.read, and same for read_pcfg and read_fcfg
nltk · Apr 22, 2014 · 52f017f · 52f017f
1 parent a580803
commit 52f017f
Show file tree

Hide file tree

Showing 17 changed files with 146 additions and 137 deletions.
diff --git a/nltk/data.py b/nltk/data.py
@@ -661,9 +661,9 @@ def retrieve(resource_url, filename=None, verbose=True):
     'pickle': "A serialized python object, stored using the pickle module.",
     'json': "A serialized python object, stored using the json module.",
     'yaml': "A serialized python object, stored using the yaml module.",
-    'cfg': "A context free grammar, parsed by nltk.read_cfg().",
-    'pcfg': "A probabilistic CFG, parsed by nltk.read_pcfg().",
-    'fcfg': "A feature CFG, parsed by nltk.read_fcfg().",
+    'cfg': "A context free grammar.",
+    'pcfg': "A probabilistic CFG.",
+    'fcfg': "A feature CFG.",
     'fol': "A list of first order logic expressions, parsed by "
             "nltk.sem.parse_fol() using nltk.sem.logic.LogicParser.",
     'logic': "A list of first order logic expressions, parsed by "
@@ -803,13 +803,13 @@ def load(resource_url, format='auto', cache=True, verbose=False,
         if format == 'text':
             resource_val = string_data
         elif format == 'cfg':
-            resource_val = nltk.grammar.read_cfg(
+            resource_val = nltk.grammar.ContextFreeGrammar.read(
                 string_data, encoding=encoding)
         elif format == 'pcfg':
-            resource_val = nltk.grammar.read_pcfg(
+            resource_val = nltk.grammar.WeightedGrammar.read(
                 string_data, encoding=encoding)
         elif format == 'fcfg':
-            resource_val = nltk.grammar.read_fcfg(
+            resource_val = nltk.grammar.FeatureGrammar.read(
                 string_data, logic_parser=logic_parser,
                 fstruct_reader=fstruct_reader, encoding=encoding)
         elif format == 'fol':

diff --git a/nltk/grammar.py b/nltk/grammar.py
@@ -507,6 +507,16 @@ def _calculate_leftcorners(self):
             for left in lefts:
                 lc.update(self._immediate_leftcorner_words.get(left, set()))
 
+    @classmethod
+    def read(cls, input, encoding=None):
+        """
+        Return the ``ContextFreeGrammar`` corresponding to the input string(s).
+
+        :param input: a grammar, either in the form of a string or as a list of strings.
+        """
+        start, productions = read_grammar(input, standard_nonterm_parser,
+                                          encoding=encoding)
+        return ContextFreeGrammar(start, productions)
 
     def start(self):
         """
@@ -757,6 +767,35 @@ def _calculate_indexes(self):
                 if is_terminal(token):
                     self._lexical_index.setdefault(token, set()).add(prod)
 
+    @classmethod
+    def read(cls, input, features=None, logic_parser=None, fstruct_reader=None,
+               encoding=None):
+        """
+        Return a feature structure based ``FeatureGrammar``.
+
+        :param input: a grammar, either in the form of a string or else
+        as a list of strings.
+        :param features: a tuple of features (default: SLASH, TYPE)
+        :param logic_parser: a parser for lambda-expressions,
+        by default, ``LogicParser()``
+        :param fstruct_reader: a feature structure parser
+        (only if features and logic_parser is None)
+        """
+        if features is None:
+            features = (SLASH, TYPE)
+
+        if fstruct_reader is None:
+            fstruct_reader = FeatStructReader(features, FeatStructNonterminal,
+                                              logic_parser=logic_parser)
+        elif logic_parser is not None:
+            raise Exception('\'logic_parser\' and \'fstruct_reader\' must '
+                            'not both be set')
+
+        start, productions = read_grammar(input, fstruct_reader.read_partial,
+                                          encoding=encoding)
+        return FeatureGrammar(start, productions)
+
+
     def productions(self, lhs=None, rhs=None, empty=False):
         """
         Return the grammar productions, filtered by the left-hand side
@@ -865,6 +904,19 @@ def __init__(self, productions):
         """
         self._productions = productions
 
+    @classmethod
+    def read(cls, input):
+        productions = []
+        for linenum, line in enumerate(input.split('\n')):
+            line = line.strip()
+            if line.startswith('#') or line=='': continue
+            try: productions += _read_dependency_production(line)
+            except ValueError:
+                raise ValueError('Unable to parse line %s: %s' % (linenum, line))
+        if len(productions) == 0:
+            raise ValueError('No productions found!')
+        return DependencyGrammar(productions)
+
     def contains(self, head, mod):
         """
         :param head: A head word.
@@ -1031,6 +1083,20 @@ def __init__(self, start, productions, calculate_leftcorners=True):
                 raise ValueError("Productions for %r do not sum to 1" % lhs)
 
 
+    @classmethod
+    def read(cls, input, encoding=None):
+        """
+        Return a probabilistic ``WeightedGrammar`` corresponding to the
+        input string(s).
+
+        :param input: a grammar, either in the form of a string or else
+             as a list of strings.
+        """
+        start, productions = read_grammar(input, standard_nonterm_parser,
+                                          probabilistic=True, encoding=encoding)
+        return WeightedGrammar(start, productions)
+
+
 #################################################################
 # Inducing Grammars
 #################################################################
@@ -1069,82 +1135,27 @@ def induce_pcfg(start, productions):
 
 
 #################################################################
-# Parsing Grammars
+# Helper functions for reading productions
 #################################################################
 
-# Parsing CFGs
-
-def read_cfg_production(input):
+def _read_cfg_production(input):
     """
     Return a list of context-free ``Productions``.
     """
-    return read_production(input, standard_nonterm_parser)
-
-def read_cfg(input, encoding=None):
-    """
-    Return the ``ContextFreeGrammar`` corresponding to the input string(s).
+    return _read_production(input, standard_nonterm_parser)
 
-    :param input: a grammar, either in the form of a string or
-        as a list of strings.
-    """
-    start, productions = read_grammar(input, standard_nonterm_parser,
-                                       encoding=encoding)
-    return ContextFreeGrammar(start, productions)
-
-# Parsing Probabilistic CFGs
-
-def read_pcfg_production(input):
+def _read_pcfg_production(input):
     """
     Return a list of PCFG ``WeightedProductions``.
     """
-    return read_production(input, standard_nonterm_parser, probabilistic=True)
+    return _read_production(input, standard_nonterm_parser, probabilistic=True)
 
-def read_pcfg(input, encoding=None):
-    """
-    Return a probabilistic ``WeightedGrammar`` corresponding to the
-    input string(s).
-
-    :param input: a grammar, either in the form of a string or else
-        as a list of strings.
-    """
-    start, productions = read_grammar(input, standard_nonterm_parser,
-                                       probabilistic=True, encoding=encoding)
-    return WeightedGrammar(start, productions)
-
-# Parsing Feature-based CFGs
-
-def read_fcfg_production(input, fstruct_reader):
+def _read_fcfg_production(input, fstruct_reader):
     """
     Return a list of feature-based ``Productions``.
     """
-    return read_production(input, fstruct_reader)
+    return _read_production(input, fstruct_reader)
 
-def read_fcfg(input, features=None, logic_parser=None, fstruct_reader=None,
-               encoding=None):
-    """
-    Return a feature structure based ``FeatureGrammar``.
-
-    :param input: a grammar, either in the form of a string or else
-        as a list of strings.
-    :param features: a tuple of features (default: SLASH, TYPE)
-    :param logic_parser: a parser for lambda-expressions,
-        by default, ``LogicParser()``
-    :param fstruct_reader: a feature structure parser
-        (only if features and logic_parser is None)
-    """
-    if features is None:
-        features = (SLASH, TYPE)
-
-    if fstruct_reader is None:
-        fstruct_reader = FeatStructReader(features, FeatStructNonterminal,
-                                          logic_parser=logic_parser)
-    elif logic_parser is not None:
-        raise Exception('\'logic_parser\' and \'fstruct_reader\' must '
-                        'not both be set')
-
-    start, productions = read_grammar(input, fstruct_reader.read_partial,
-                                       encoding=encoding)
-    return FeatureGrammar(start, productions)
 
 # Parsing generic grammars
 
@@ -1153,7 +1164,7 @@ def read_fcfg(input, features=None, logic_parser=None, fstruct_reader=None,
 _TERMINAL_RE = re.compile(r'( "[^"]+" | \'[^\']+\' ) \s*', re.VERBOSE)
 _DISJUNCTION_RE = re.compile(r'\| \s*', re.VERBOSE)
 
-def read_production(line, nonterm_parser, probabilistic=False):
+def _read_production(line, nonterm_parser, probabilistic=False):
     """
     Parse a grammar rule, given as a string, and return
     a list of productions.
@@ -1208,6 +1219,10 @@ def read_production(line, nonterm_parser, probabilistic=False):
         return [Production(lhs, rhs) for rhs in rhsides]
 
 
+#################################################################
+# Reading Phrase Structure Grammars
+#################################################################
+
 def read_grammar(input, nonterm_parser, probabilistic=False, encoding=None):
     """
     Return a pair consisting of a starting category and a list of
@@ -1251,7 +1266,7 @@ def read_grammar(input, nonterm_parser, probabilistic=False, encoding=None):
                     raise ValueError('Bad directive')
             else:
                 # expand out the disjunctions on the RHS
-                productions += read_production(line, nonterm_parser, probabilistic)
+                productions += _read_production(line, nonterm_parser, probabilistic)
         except ValueError as e:
             raise ValueError('Unable to parse line %s: %s\n%s' %
                              (linenum+1, line, e))
@@ -1272,7 +1287,7 @@ def standard_nonterm_parser(string, pos):
 
 
 #################################################################
-# Parsing Dependency Grammars
+# Reading Dependency Grammars
 #################################################################
 
 _READ_DG_RE = re.compile(r'''^\s*                # leading whitespace
@@ -1288,19 +1303,7 @@ def standard_nonterm_parser(string, pos):
                              re.VERBOSE)
 _SPLIT_DG_RE = re.compile(r'''('[^']'|[-=]+>|"[^"]+"|'[^']+'|\|)''')
 
-def read_dependency_grammar(s):
-    productions = []
-    for linenum, line in enumerate(s.split('\n')):
-        line = line.strip()
-        if line.startswith('#') or line=='': continue
-        try: productions += read_dependency_production(line)
-        except ValueError:
-            raise ValueError('Unable to parse line %s: %s' % (linenum, line))
-    if len(productions) == 0:
-        raise ValueError('No productions found!')
-    return DependencyGrammar(productions)
-
-def read_dependency_production(s):
+def _read_dependency_production(s):
     if not _READ_DG_RE.match(s):
         raise ValueError('Bad production string')
     pieces = _SPLIT_DG_RE.split(s)
@@ -1324,7 +1327,7 @@ def cfg_demo():
     A demonstration showing how ``ContextFreeGrammars`` can be created and used.
     """
 
-    from nltk import nonterminals, Production, read_cfg
+    from nltk import nonterminals, Production, ContextFreeGrammar
 
     # Create some nonterminals
     S, NP, VP, PP = nonterminals('S, NP, VP, PP')
@@ -1338,7 +1341,7 @@ def cfg_demo():
     print(Production(S, [NP]))
 
     # Create some Grammar Productions
-    grammar = read_cfg("""
+    grammar = ContextFreeGrammar.read("""
       S -> NP VP
       PP -> P NP
       NP -> Det N | NP PP
@@ -1356,7 +1359,7 @@ def cfg_demo():
     print(repr(grammar.productions()).replace(',', ',\n'+' '*25))
     print()
 
-toy_pcfg1 = read_pcfg("""
+toy_pcfg1 = WeightedGrammar.read("""
     S -> NP VP [1.0]
     NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15]
     Det -> 'the' [0.8] | 'my' [0.2]
@@ -1367,7 +1370,7 @@ def cfg_demo():
     P -> 'with' [0.61] | 'under' [0.39]
     """)
 
-toy_pcfg2 = read_pcfg("""
+toy_pcfg2 = WeightedGrammar.read("""
     S    -> NP VP         [1.0]
     VP   -> V NP          [.59]
     VP   -> V             [.40]
@@ -1461,7 +1464,7 @@ def dg_demo():
     A demonstration showing the creation and inspection of a
     ``DependencyGrammar``.
     """
-    grammar = read_dependency_grammar("""
+    grammar = DependencyGrammar.read("""
     'scratch' -> 'cats' | 'walls'
     'walls' -> 'the'
     'cats' -> 'the'
@@ -1506,12 +1509,5 @@ def demo():
 __all__ = ['Nonterminal', 'nonterminals',
            'Production', 'DependencyProduction', 'WeightedProduction',
            'ContextFreeGrammar', 'WeightedGrammar', 'DependencyGrammar',
-           'StatisticalDependencyGrammar',
-           'induce_pcfg', 'read_cfg', 'read_cfg_production',
-           'read_pcfg', 'read_pcfg_production',
-           'read_fcfg', 'read_fcfg_production',
-           'read_grammar', 'read_production',
-           'read_dependency_grammar', 'read_dependency_production',
-           'demo', 'cfg_demo', 'pcfg_demo', 'dg_demo', 'sdg_demo',
-           'toy_pcfg1', 'toy_pcfg2']
+           'StatisticalDependencyGrammar', 'induce_pcfg', 'read_grammar']
 
diff --git a/nltk/parse/chart.py b/nltk/parse/chart.py
@@ -1547,8 +1547,8 @@ def parse(self, tokens, tree_class=Tree):
 ########################################################################
 
 def demo_grammar():
-    from nltk.grammar import read_cfg
-    return read_cfg("""
+    from nltk.grammar import ContextFreeGrammar
+    return ContextFreeGrammar.read("""
 S  -> NP VP
 PP -> "with" NP
 NP -> NP PP

diff --git a/nltk/parse/generate.py b/nltk/parse/generate.py
@@ -64,11 +64,11 @@ def _generate_one(grammar, item, depth):
 """
 
 def demo(N=23):
-    from nltk.grammar import read_cfg
+    from nltk.grammar import ContextFreeGrammar
 
     print('Generating the first %d sentences for demo grammar:' % (N,))
     print(demo_grammar)
-    grammar = read_cfg(demo_grammar)
+    grammar = ContextFreeGrammar.read(demo_grammar)
     for n, sent in enumerate(generate(grammar, n=N), 1):
         print('%3d. %s' % (n, ' '.join(sent)))
 

diff --git a/nltk/parse/nonprojectivedependencyparser.py b/nltk/parse/nonprojectivedependencyparser.py
@@ -11,7 +11,6 @@
 import math
 
 from nltk.compat import xrange
-from nltk.grammar import read_dependency_grammar
 
 from nltk.parse.dependencygraph import DependencyGraph, conll_data2
 
@@ -620,7 +619,9 @@ def nonprojective_conll_parse_demo():
         print(parse_graph)
 
 def rule_based_demo():
-    grammar = read_dependency_grammar("""
+    from nltk.grammar import DependencyGrammar
+
+    grammar = DependencyGrammar.read("""
     'taught' -> 'play' | 'man'
     'man' -> 'the' | 'in'
     'in' -> 'corner'