Update README files and Stanford Dependencies manual for 3.5.2

stanfordnlp · Apr 18, 2015 · 65c92e7 · 65c92e7
1 parent 9110507
commit 65c92e7
Show file tree

Hide file tree

Showing 7 changed files with 113 additions and 42 deletions.
diff --git a/doc/corenlp/README.txt b/doc/corenlp/README.txt
@@ -1,7 +1,7 @@
 Stanford CoreNLP - Stanford's Suite of NLP Tools
 ------------------------------------------------
 
-Copyright © 2009-2014 The Board of Trustees of
+Copyright © 2009-2015 The Board of Trustees of
 The Leland Stanford Junior University. All Rights Reserved.
 
 DOCUMENTATION
@@ -14,7 +14,7 @@ LICENSE
 
 //
 // StanfordCoreNLP -- a suite of NLP tools
-// Copyright © 2009-2014 The Board of Trustees of
+// Copyright © 2009-2015 The Board of Trustees of
 // The Leland Stanford Junior University. All Rights Reserved.
 //
 // This program is free software; you can redistribute it and/or
@@ -42,6 +42,9 @@ LICENSE
 CHANGES
 ---------------------------------
 
+2015-04-20    3.5.2     Switch to Universal dependencies, add Chinese
+                        coreference system
+
 2015-01-29    3.5.1     NER, dependency parser, SPIED improvements; 
                         general bugfixes 
 

diff --git a/doc/lexparser/README.txt b/doc/lexparser/README.txt
@@ -1,17 +1,18 @@
-Stanford Lexicalized Parser v3.5.1 - 2015-01-29
+Stanford Lexicalized Parser v3.5.2 - 2015-04-20
 -----------------------------------------------
 
-Copyright (c) 2002-2012 The Board of Trustees of The Leland Stanford Junior
+Copyright (c) 2002-2015 The Board of Trustees of The Leland Stanford Junior
 University. All Rights Reserved.
 
 Original core parser code by Dan Klein.  Support code, additional
 modules, languages, features, internationalization, compaction, typed
 dependencies, etc. by Christopher Manning, Roger Levy, Teg Grenager,
 Galen Andrew, Marie-Catherine de Marneffe, Jenny Finkel, Spence Green,
 Bill MacCartney, Anna Rafferty, Huihsin Tseng, Pi-Chuan Chang,
-Wolfgang Maier, Richard Eckart, Richard Socher, and John Bauer.
+Wolfgang Maier, Richard Eckart, Richard Socher, John Bauer,
+Sebastian Schuster, and Jon Gauthier.
 
-This release prepared by John Bauer.
+This release was prepared by Jason Bolton.
 
 This package contains 4 parsers: a high-accuracy unlexicalized PCFG; a
 lexicalized dependency parser; a factored model, where the estimates
@@ -41,7 +42,7 @@ QUICKSTART
 UNIX COMMAND-LINE USAGE
 
 On a Unix system you should be able to parse the English test file with the
-following command:
+following command:	
 
     ./lexparser.sh data/testsent.txt
 
@@ -173,10 +174,27 @@ evaluation metrics:
 See the usage instructions and javadocs in the requisite classes located in
 edu.stanford.nlp.parser.metrics.
 
+UNIVERSAL DEPENDENCIES vs. STANFORD DEPENDENCIES
+
+Since v3.5.2 the default dependency representation is the new Universal Dependencies
+representation. Universal Dependencies were developed with the goal of being a
+cross-linguistically valid representation. Note that some constructs such as prepositional
+phrases are now analyzed differently and that the set of relations was updated. Please 
+look at the Universal Dependencies documentation for more information:
+
+      http://universaldependencies.github.io/docs/
+
+The parser also still supports the original Stanford Dependencies representation 
+as described in the StanfordDependenciesManual.pdf. Use the flag
+
+     -originalDependencies
+
+to obtain original Stanford Dependencies.
+
 LICENSE
 
 // StanfordLexicalizedParser -- a probabilistic lexicalized NL CFG parser
-// Copyright (c) 2002-2012 The Board of Trustees of
+// Copyright (c) 2002-2015 The Board of Trustees of
 // The Leland Stanford Junior University. All Rights Reserved.
 //
 // This program is free software; you can redistribute it and/or
@@ -206,6 +224,8 @@ LICENSE
 CHANGES
 ---------------------------------
 
+2015-04-20    3.5.2     Switch to universal dependencies
+
 2015-01-29    3.5.1     Dependency parser improvements; general 
                         bugfixes 
 

diff --git a/doc/lexparser/README_dependencies.txt b/doc/lexparser/README_dependencies.txt
@@ -1,14 +1,37 @@
-STANFORD DEPENDENCIES.  Stanford Parser v3.5.1
+UNIVERSAL/STANFORD DEPENDENCIES.  Stanford Parser v3.5.2
 -----------------------------------------------------------
 
+IMPORTANT: Starting with version 3.5.2 the default dependencies
+representation output by the Stanford Parser is the new Universal
+Dependencies Representation. Universal Dependencies were developed
+with the goal of being a cross-linguistically valid representation.
+Note that some constructs such as prepositional phrases are now 
+analyzed differently and that the set of relations was updated. The
+online documentation of English Universal Dependencies at
+
+    http://universaldependencies.github.io/docs/#language-en
+
+should be consulted for the current set of dependency relations.
+
+
+The parser and converter also still support the original 
+Stanford Dependencies as described in the Stanford Dependencies 
+manual. Use the flag
+
+    -originalDependencies
+
+to obtain the original Stanford Dependencies.
+
+
 The manual for the English version of the Stanford Dependencies
 representation:
 
     StanfordDependenciesManual.pdf
 
-should be consulted for the current set of dependency representations
-and the correct commands for generating Stanford Dependencies together
-with any of the Stanford Parser, another parser, or a treebank.
+should be consulted for the set of dependency relations in the original
+Stanford Dependencies representation and the correct commands for 
+generating Stanford Dependencies together with any of the Stanford Parser, 
+another parser, or a treebank.
 
 A typed dependencies representation is also available for Chinese.  For
 the moment the documentation consists of the code, and a brief
@@ -19,24 +42,24 @@ D. Manning. 2009.  Discriminative Reordering with Chinese Grammatical
 Relations Features.  Third Workshop on Syntax and Structure in Statistical
 Translation. http://nlp.stanford.edu/pubs/ssst09-chang.pdf
 
-
 --------------------------------------
-UNIVERSAL DEPENDENCIES
+DEPENDENCIES SCHEMES
 
-We are at present involved in a group developing a cross-linguistically
-valid Universal Dependencies representation and treebanks:
+For an overview of the original English Universal Dependencies schemes, please look
+at:
 
-      http://universaldependencies.github.io/docs/
+  Marie-Catherine de Marneffe, Timothy Dozat, Natalia Silveira, Katri Haverinen,
+  Filip Ginter, Joakim Nivre and Christopher D. Manning. 2014. Universal Stanford
+  dependencies: A cross-linguistic typology. 9th International Conference on
+  Language Resources and Evaluation (LREC 2014).
+  http://nlp.stanford.edu/pubs/USD_LREC14_paper_camera_ready.pdf
 
-It is our intention to transition Stanford NLP tools to Universal
-Dependencies, but there is not yet any model support for Universal
-Dependencies in this release.  (However, there is no reason that you
-yourself cannot train a POS tagger and a neural-net dependency parser
-for Universal Dependencies.
+Please note, though, that some of the relations discussed in this paper
+were subsequently updated and please refer to the online documentation at
+
+    http://universaldependencies.github.com/docs/
 
-
---------------------------------------
-ORIGINAL DEPENDENCIES SCHEME
+for an up to date documention of the set of relations.
 
 For an overview of the original typed dependencies scheme, please look
 at:
@@ -54,9 +77,18 @@ For more discussion of the design principles, please see:
   workshop on Cross-Framework and Cross-Domain Parser Evaluation, pp. 1-8.
   http://nlp.stanford.edu/~manning/papers/dependencies-coling08.pdf
 
-These papers can be cited as references for the English Stanford
-Dependencies.
+These papers can be cited as references for the original English Stanford
+Dependencies and Enlgish Universal Dependencies.
+
+--------------------------------------
+CHANGES IN ENGLISH TYPED DEPENDENCIES CODE -- v3.5.2
+
+Switch to Universal Dependencies as the default representation.
+Please see the Universal Dependencies documentation at
+
+      http://universaldependencies.github.io/docs/
 
+for more information on the new relations.
 
 --------------------------------------
 CHANGES IN ENGLISH TYPED DEPENDENCIES CODE -- v3.5.1

diff --git a/doc/lexparser/StanfordDependenciesManual.pdf b/doc/lexparser/StanfordDependenciesManual.pdf
diff --git a/doc/lexparser/StanfordDependenciesManual.tex b/doc/lexparser/StanfordDependenciesManual.tex
@@ -33,12 +33,20 @@
 % Revised for the Stanford Parser v.\ 3.2 in June 2013
 % Revised for the Stanford Parser v.\ 3.3 in November 2013
 % Revised for the Stanford Parser v.\ 3.3 in December 2013
-Revised for the Stanford Parser v.\ 3.5.1 in February 2015
+%Revised for the Stanford Parser v.\ 3.5.1 in February 2015
+Revised for the Stanford Parser v.\ 3.5.2 in April 2015
 }
 
 \begin{document}
 \maketitle
 
+Please note that this manual describes the original Stanford 
+Dependencies representation. As of version 3.5.2 the default representation
+output by the Stanford Parser and Stanford CoreNLP is the new Universal Dependencies (UD)
+representation. Take a look at the Universal Dependencies documentation 
+at \textsf{http://universaldependencies.github.com/docs/} for a description of UD 
+relations.
+
 \section{Introduction}
 
 The Stanford typed dependencies representation was designed to provide
@@ -1034,11 +1042,14 @@ \subsubsection*{$\star$ \textbf{edu.stanford.nlp.parser.lexparser.LexicalizedPar
 option will also give you the context-free phrase structure grammar
 representation of the sentences.
 
+\noindent Note that as of version 3.5.2 the default representation is the new Universal Dependencies representation. 
+Use the \texttt{-originalDependencies} option to obtain original Stanford Dependencies.
+
 \bigskip
 
 \noindent Command line example: \\
 {\hangindent=0.4in
-\raggedright\texttt{java -mx200m edu.stanford.nlp.parser.lexparser.LexicalizedParser -retainTmpSubcategories -outputFormat "penn,typedDependencies" -outputFormatOptions "basicDependencies" englishPCFG.ser.gz file.txt}\\}
+\raggedright\texttt{java -mx200m edu.stanford.nlp.parser.lexparser.LexicalizedParser -retainTmpSubcategories -originalDependencies -outputFormat "penn,typedDependencies" -outputFormatOptions "basicDependencies" englishPCFG.ser.gz file.txt}\\}
 
 \begin{quote}
 \hspace*{-0.375in}%
@@ -1047,6 +1058,8 @@ \subsubsection*{$\star$ \textbf{edu.stanford.nlp.parser.lexparser.LexicalizedPar
 \hspace*{1.5in}"edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz",\\
 \hspace*{1.5in}"-maxLength", "80", "-retainTmpSubcategories"); \\
 TreebankLanguagePack tlp = new PennTreebankLanguagePack(); \\
+// Uncomment the following line to obtain original Stanford Dependencies\\
+// tlp.setGenerateOriginalDependencies(true); \\
 GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();}
 
 \bigskip
@@ -1064,15 +1077,17 @@ \subsubsection*{$\star$ \textbf{edu.stanford.nlp.trees.EnglishGrammaticalStructu
 
 \paragraph{Command-line usage.} Use the \texttt{-treeFile} option as shown in the command line example below. The options to get the different types of representation are as follows:
 \begin{tabbing}
-\hspace{.3cm} \= \texttt{-collapsedTree} \= \hspace{.3cm} \= \kill
+\hspace{.3cm} \= \texttt{-originalDependencies} \= \hspace{.3cm} \= \kill
 \> \texttt{-basic} \>\> basic dependencies\\
 \> \texttt{-collapsed} \>\> collapsed dependencies (not necessarily a tree structure)\\
 \> \texttt{-CCprocessed} \>\> collapsed dependencies with propagation of conjunct \\
 \> \> \> dependencies (not necessarily a tree structure)\\
 \> \texttt{-collapsedTree} \>\> collapsed dependencies that preserve a tree structure\\
 \> \texttt{-nonCollapsed} \>\> non-collapsed dependencies: basic dependencies as well as\\
 \> 						 \>\> the extra ones which do not preserve a tree structure\\
-\> \texttt{-conllx} \>\> dependencies printed out in CoNLL X (CoNLL 2006) format
+\> \texttt{-conllx} \>\> dependencies printed out in CoNLL X (CoNLL 2006) format \\
+\> \texttt{-originalDependencies} \>\> output the original Stanford Dependencies instead of \\
+\>                                            \>\> the new Universal Dependencies.
 \end{tabbing}
 
 \noindent If you want the non-collapsed version of the dependencies where the basic ones are separated from the extra ones, add the flag \texttt{-extraSep}. This will print the basic dependencies, a separator (======) and the extra dependencies.
@@ -1137,6 +1152,8 @@ \subsubsection*{$\star$ \textbf{edu.stanford.nlp.trees.EnglishGrammaticalStructu
 \texttt{
 // One time setup \\
 TreebankLanguagePack tlp = new PennTreebankLanguagePack(); \\
+// Uncomment the following line to obtain original Stanford Dependencies\\
+// tlp.setGenerateOriginalDependencies(true); \\
 GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); \\
 // For each Tree \\
 Tree parseTree; // assumed to come from a treebank or parser \\
@@ -1233,10 +1250,9 @@ \section{Further references for Stanford Dependencies}\label{refs}
 team of collaborators has led to a new synthesis spanning
 tokenization, morphological features, parts of speech, and
 dependencies, known as Universal Dependencies: 
-\url{http://universaldependencies.github.io/docs/}.  Our medium-term
-plan is to transition our tools from the current Stanford Dependencies
-to Universal Dependencies.
-
+\url{http://universaldependencies.github.io/docs/}.  Since version 3.5.2 
+the default representation output by our parser is the Universal Dependencies
+representation.
 
 \section{Recent changes}
 

diff --git a/itest/src/edu/stanford/nlp/parser/nndep/DependencyParserITest.java b/itest/src/edu/stanford/nlp/parser/nndep/DependencyParserITest.java
@@ -46,17 +46,17 @@ public void testDependencyParserEnglishSD() {
   }
 
   // Lower because we're evaluating on PTB + extraDevTest, not just PTB
-  private static final double EnglishUdLas = 84.9873;
+  private static final double EnglishUsdLas = 84.9873;
 
   /**
    * Test that the NN dependency parser performance doesn't change.
    */
-  public void testDependencyParserEnglishUD() {
+  public void testDependencyParserEnglishUSD() {
     DependencyParser parser = new DependencyParser();
-    parser.loadModelFile("/u/nlp/data/depparser/nn/distrib-2015-04-16/english_UD.gz");
+    parser.loadModelFile("/u/nlp/data/depparser/nn/distrib-2015-04-16/english_USD.gz");
     double las = parser.testCoNLL("/u/nlp/data/depparser/nn/data/dependency_treebanks/USD/dev.conll", null);
-    assertEquals(String.format("English UD LAS should be %.2f but was %.2f",
-        EnglishUdLas, las), EnglishUdLas, las, 1e-4);
+    assertEquals(String.format("English USD LAS should be %.2f but was %.2f",
+        EnglishUsdLas, las), EnglishUsdLas, las, 1e-4);
   }
 
   private static final double EnglishConll2008Las = 90.97206578058122;

diff --git a/src/edu/stanford/nlp/parser/nndep/DependencyParser.java b/src/edu/stanford/nlp/parser/nndep/DependencyParser.java
@@ -75,7 +75,7 @@
  * @author Jon Gauthier
  */
 public class DependencyParser {
-  public static final String DEFAULT_MODEL = "edu/stanford/nlp/models/parser/nndep/english_UD.gz";
+  public static final String DEFAULT_MODEL = "edu/stanford/nlp/models/parser/nndep/english_USD.gz";
 
   /**
    * Words, parts of speech, and dependency relation labels which were
@@ -1278,4 +1278,4 @@ public static void main(String[] args) {
       parser.parseTextFile(input, output);
     }
   }
-}
+}