Skip to content

Commit fe2a967

Browse files
sonalguptaStanford NLP
authored andcommitted
Merged with master
1 parent d5b8086 commit fe2a967

File tree

430 files changed

+148094
-36100
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

430 files changed

+148094
-36100
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,6 @@ You can find more explanation and documentation on [the Stanford CoreNLP homepag
1212
The most recent models associated with the code in the HEAD of this repository can be found [here](http://nlp.stanford.edu/software/stanford-corenlp-models-current.jar).
1313

1414
For information about making contributions to Stanford CoreNLP, see the file `CONTRIBUTING.md`.
15+
16+
Questions about CoreNLP can either be posted on StackOverflow with the tag [stanford-nlp](http://stackoverflow.com/questions/tagged/stanford-nlp),
17+
or on the [mailing lists](http://nlp.stanford.edu/software/corenlp.shtml#Mail).

build.gradle

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,11 @@ apply plugin: 'java'
66
apply plugin: 'eclipse'
77
apply plugin: 'application'
88

9+
// Gradle java plugin
910
sourceCompatibility = 1.8
1011
targetCompatibility = 1.8
12+
compileJava.options.encoding = 'UTF-8'
13+
1114
version = '3.4.1'
1215

1316
// Gradle application plugin

build.xml

Lines changed: 42 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,48 @@
316316
<zipfileset file="${source.path}/edu/stanford/nlp/time/suservlet/prototype.js"/>
317317
</war>
318318
</target>
319+
320+
<target name="openie.war" depends="compile,jar"
321+
description="build the openie webapp">
322+
<war destfile="openie.war"
323+
webxml="${source.path}/edu/stanford/nlp/naturalli/demo/web.xml">
324+
<lib dir=".">
325+
<include name="javanlp-core.jar"/>
326+
</lib>
327+
<zipfileset prefix="WEB-INF/data"
328+
file="/u/nlp/data/pos-tagger/distrib/english-left3words-distsim.tagger"/>
329+
<zipfileset prefix="WEB-INF/data"
330+
file="${source.path}/edu/stanford/nlp/time/rules/*"/>
331+
<zipfileset prefix="WEB-INF/data"
332+
file="${source.path}/edu/stanford/nlp/time/holidays/*"/>
333+
<zipfileset prefix="WEB-INF/data"
334+
file="/u/nlp/data/ner/goodClassifiers/english.all.3class.distsim.crf.ser.gz"/>
335+
<zipfileset prefix="WEB-INF/data"
336+
file="/u/nlp/data/ner/goodClassifiers/english.conll.4class.distsim.crf.ser.gz"/>
337+
<zipfileset prefix="WEB-INF/data"
338+
file="/u/nlp/data/ner/goodClassifiers/english.muc.7class.distsim.crf.ser.gz"/>
339+
<zipfileset prefix="WEB-INF/data"
340+
file="/u/nlp/data/depparser/nn/distrib/english_SD.gz"/>
341+
<zipfileset prefix="WEB-INF/data"
342+
file="/u/nlp/data/lexparser/englishPCFG.ser.gz"/>
343+
<zipfileset prefix="WEB-INF/data"
344+
file="/home/gabor/workspace/naturalli/etc/clauseSplitterModel.ser.gz"/>
345+
<zipfileset prefix="WEB-INF/data"
346+
file="/home/gabor/workspace/naturalli/etc/pp.tab.gz"/>
347+
<zipfileset prefix="WEB-INF/data"
348+
file="/home/gabor/workspace/naturalli/etc/obj.tab.gz"/>
349+
<zipfileset prefix="WEB-INF/data"
350+
file="/home/gabor/workspace/naturalli/etc/privative.tab.gz"/>
351+
<zipfileset prefix="WEB-INF/data"
352+
file="/home/gabor/workspace/naturalli/etc/subj_obj_pp.tab.gz"/>
353+
<zipfileset prefix="WEB-INF/data"
354+
file="/home/gabor/workspace/naturalli/etc/subj_pp_obj.tab.gz"/>
355+
<zipfileset prefix="WEB-INF/data"
356+
file="/home/gabor/workspace/naturalli/etc/subj_pp_pp.tab.gz"/>
357+
<zipfileset prefix="WEB-INF/data"
358+
file="/home/gabor/workspace/naturalli/etc/subj_pp.tab.gz"/>
359+
</war>
360+
</target>
319361

320362
<target name="parser.war" depends="compile,jar"
321363
description="build the parser webapp">
@@ -409,59 +451,6 @@
409451
</war>
410452
</target>
411453

412-
<target name="spied.war" depends="compile,jar"
413-
description="build the spied webapp">
414-
<war destfile="spied.war"
415-
webxml="${source.path}/edu/stanford/nlp/patterns/demo/web.xml">
416-
<lib dir=".">
417-
<include name="javanlp-core.jar"/>
418-
</lib>
419-
<lib dir="${basedir}/lib">
420-
<include name="commons-lang3-3.1.jar"/>
421-
<include name="xom-1.2.10.jar"/>
422-
<include name="xml-apis.jar"/>
423-
<include name="joda-time.jar"/>
424-
<include name="jollyday-0.4.7.jar"/>
425-
<include name="javax.json.jar"/>
426-
</lib>
427-
<lib dir="/u/nlp/data/StanfordCoreNLPModels">
428-
<include name="stanford-corenlp-models-current.jar"/>
429-
</lib>
430-
<zipfileset file="${source.path}/edu/stanford/nlp/patterns/demo/frontend/*"/>
431-
<zipfileset file="${source.path}/edu/stanford/nlp/patterns/*.properties"/>
432-
<!--<zipfileset prefix="WEB-INF/data"-->
433-
<!--file="/u/nlp/data/pos-tagger/distrib/english-left3words-distsim.tagger"/>-->
434-
<!--<zipfileset prefix="WEB-INF/data"-->
435-
<!--file="${source.path}/edu/stanford/nlp/time/rules/*"/>-->
436-
<!--<zipfileset prefix="WEB-INF/data"-->
437-
<!--file="${source.path}/edu/stanford/nlp/time/holidays/*"/>-->
438-
<!--<zipfileset prefix="WEB-INF/data"-->
439-
<!--file="/u/nlp/data/ner/goodClassifiers/english.all.3class.distsim.crf.ser.gz"/>-->
440-
<!--<zipfileset prefix="WEB-INF/data"-->
441-
<!--file="/u/nlp/data/ner/goodClassifiers/english.conll.4class.distsim.crf.ser.gz"/>-->
442-
<!--<zipfileset prefix="WEB-INF/data"-->
443-
<!--file="/u/nlp/data/ner/goodClassifiers/english.muc.7class.distsim.crf.ser.gz"/>-->
444-
<!--<zipfileset prefix="WEB-INF/data"-->
445-
<!--file="/u/nlp/data/depparser/nn/distrib/english_SD.gz"/>-->
446-
<!--<zipfileset prefix="WEB-INF/data"-->
447-
<!--file="/u/nlp/data/lexparser/englishPCFG.ser.gz"/>-->
448-
<!--<zipfileset prefix="WEB-INF/data"-->
449-
<!--file="/home/gabor/workspace/naturalli/etc/clauseSplitterModel.ser.gz"/>-->
450-
<!--<zipfileset prefix="WEB-INF/data"-->
451-
<!--file="/home/gabor/workspace/naturalli/etc/pp.tab.gz"/>-->
452-
<!--<zipfileset prefix="WEB-INF/data"-->
453-
<!--file="/home/gabor/workspace/naturalli/etc/obj.tab.gz"/>-->
454-
<!--<zipfileset prefix="WEB-INF/data"-->
455-
<!--file="/home/gabor/workspace/naturalli/etc/privative.tab.gz"/>-->
456-
<!--<zipfileset prefix="WEB-INF/data"-->
457-
<!--file="/home/gabor/workspace/naturalli/etc/subj_obj_pp.tab.gz"/>-->
458-
<!--<zipfileset prefix="WEB-INF/data"-->
459-
<!--file="/home/gabor/workspace/naturalli/etc/subj_pp_obj.tab.gz"/>-->
460-
<!--<zipfileset prefix="WEB-INF/data"-->
461-
<!--file="/home/gabor/workspace/naturalli/etc/subj_pp_pp.tab.gz"/>-->
462-
<!--<zipfileset prefix="WEB-INF/data"-->
463-
</war>
464-
</target>
465454
<property environment="env" />
466455

467456
<condition property="version1.7">

data/edu/stanford/nlp/process/ptblexer.gold

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -885,7 +885,7 @@ origins
885885
''
886886
Libyan
887887
ruler
888-
Muammar
888+
Mu`ammar
889889
al-Qaddafi
890890
referred
891891
to

data/edu/stanford/nlp/upos/ENUniversalPOS.tsurgeon

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,35 @@
55
% Author: Sebastian Schuster
66
% Author: Christopher Manning
77
%
8+
% The original Penn Treebank WSJ contains 45 POS tags (but almost certainly # for British pound currency is a bad idea!)
9+
% {#=173, $=9,039, ''=8,658, ,=60,489, -LRB-=1,672, -RRB-=1,689, .=48,733, :=6,087, CC=29,462, CD=44,937, DT=101,190,
10+
% EX=1,077, FW=268, IN=121,903, JJ=75,266, JJR=4,042, JJS=2,396, LS=64, MD=11,997, NN=163,935, NNP=114,053,
11+
% NNPS=3,087, NNS=73,964, PDT=441, POS=10,801, PRP=21,357, PRP$=10,241, RB=38,197, RBR=2,175, RBS=555, RP=3,275,
12+
% SYM=70, TO=27,449, UH=117, VB=32,565, VBD=37,493, VBG=18,239, VBN=24,865, VBP=15,377, VBZ=26,436, WDT=5,323,
13+
% WP=2,887, WP$=219, WRB=2,625, ``=8,878}
814
%
9-
% Context-sensitive mappings
15+
% The Web Treebank corpus adds 6 tags, but doesn't have #, yielding 50 POS tags:
16+
% ADD, AFX, GW, HYPH, NFP, XX
17+
%
18+
% OntoNotes 4.0 has 53 tags. It doesn't have # but adds: -LSB-, -RSB- [both mistakes!], ADD, AFX, CODE, HYPH, NFP,
19+
% X [mistake!], XX.
1020
%
1121
%
22+
% ------------------------------
23+
% Context-sensitive mappings
24+
%
1225
% TO -> PART (in CONJP phrases)
13-
CONJP < TO=target < VB
26+
@CONJP < TO=target < VB
1427

1528
relabel target PART
1629

1730
% TO -> PART
18-
VP < VP < (/^TO$/=target <... {/.*/})
31+
@VP < @VP < (/^TO$/=target <... {/.*/})
32+
33+
relabel target PART
34+
35+
% TO -> PART
36+
@VP <: (/^TO$/=target <... {/.*/})
1937

2038
relabel target PART
2139

@@ -24,13 +42,22 @@ TO=target <... {/.*/}
2442

2543
relabel target ADP
2644

27-
% delete the next two rules, third one should also cover them
28-
%
29-
% VB -> AUX (passive, case 1)
30-
%VP < (/^VB/=target < /^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase|get|got|getting|gets|gotten)$/ ) < (VP|ADJP [ < VBN|VBD | < (VP|ADJP < VBN|VBD) < CC ] )
45+
% Don't do this, we are now treating these as copular constructions
46+
% VB.* -> AUX (for passives where main verb is part of an ADJP)
47+
%@VP < (/^VB/=target < /^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase|get|got|getting|gets|gotten)$/ ) < (@ADJP [ < VBN|VBD | < (@VP|ADJP < VBN|VBD) < CC ] )
3148
%
3249
%relabel target AUX
3350
%
51+
% VB.* -> AUX (for cases with fronted main VPs)
52+
@SINV < (@VP < (/^VB/=target < /^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase)$/ ) $-- (@VP < VBD|VBN))
53+
54+
relabel target AUX
55+
56+
% VB.* -> AUX (another, rarer case of fronted VPs)
57+
@SINV < (@VP < (@VP < (/^VB/=target < /^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase)$/ )) $-- (@VP < VBD|VBN))
58+
59+
relabel target AUX
60+
3461
% VB.* -> AUX (passive, case 2)
3562
%SQ|SINV < (/^VB/=target < /^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase)$/ $++ (VP < VBD|VBN))
3663
%
@@ -42,7 +69,7 @@ VP < VP < (/^VB.*$/=target <... {/.*/})
4269
relabel target AUX
4370

4471
% VB -> AUX (active, case 2)
45-
SQ|SINV < (/^VB/=target $++ /^(?:VP|ADJP)/ <... {/.*/})
72+
@SQ|SINV < (/^VB/=target $++ /^(?:VP)/ <... {/.*/})
4673

4774
relabel target AUX
4875

@@ -91,7 +118,7 @@ RB=target <... {/.*/}
91118

92119
relabel target ADV
93120

94-
%
121+
% ------------------------------
95122
% 1 to 1 mappings
96123
%
97124
%
@@ -110,10 +137,10 @@ DT=target <... {/.*/}
110137

111138
relabel target DET
112139

113-
% EX -> DET
140+
% EX -> PRON
114141
EX=target <... {/.*/}
115142

116-
relabel target DET
143+
relabel target PRON
117144

118145
% FW -> X
119146
FW=target <... {/.*/}
@@ -280,7 +307,7 @@ AFX=target <... {/.*/}
280307

281308
relabel target X
282309

283-
%GW -> X
310+
% GW -> X
284311
GW=target <... {/.*/}
285312

286313
relabel target X

doc/classify/README.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Stanford Classifier v3.5.1 - 2015-01-29
1+
Stanford Classifier v3.5.2 - 2015-04-20
22
-------------------------------------------------
33

44
Copyright (c) 2003-2012 The Board of Trustees of
@@ -15,7 +15,7 @@ This package contains a maximum entropy classifier.
1515

1616
For more information about the classifier, point a web browser at the included javadoc directory, starting at the Package page for the edu.stanford.nlp.classify package, and looking also at the ColumnDataClassifier class documentation therein.
1717

18-
This software requires Java 5 (JDK 1.6.0+). (You must have installed it
18+
This software requires Java 8 (JDK 1.8.0+). (You must have installed it
1919
separately. Check the command "java -version".)
2020

2121

@@ -76,6 +76,8 @@ LICENSE
7676
CHANGES
7777
-------------------------
7878

79+
2015-04-20 3.5.2 Update for compatibility
80+
7981
2015-01-29 3.5.1 New input/output options, support for GloVe
8082
word vectors
8183

doc/corenlp/CoreNLP-to-HTML.xsl

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,11 @@ xmlns:d="http://nlp.stanford.edu/CoreNLP/v1">
6666

6767
<xsl:template match="root/document/sentences/sentence">
6868
<xsl:param name="position" select="'0'"/>
69-
<i><b>Sentence #<xsl:value-of select="$position"/></b></i>
69+
<p><i><b>Sentence #<xsl:value-of select="$position"/></b></i>
70+
<xsl:if test="@sentiment">
71+
<xsl:text> Sentiment: </xsl:text><xsl:value-of select="@sentiment"/>
72+
</xsl:if>
73+
</p>
7074

7175
<p>
7276
<i>Tokens</i><br/>
@@ -88,16 +92,7 @@ xmlns:d="http://nlp.stanford.edu/CoreNLP/v1">
8892
</p>
8993

9094
<p>
91-
<i>Collapsed dependencies</i>
92-
<ul>
93-
<xsl:for-each select="dependencies[@type='collapsed-dependencies']">
94-
<xsl:apply-templates select="dep"/>
95-
</xsl:for-each>
96-
</ul>
97-
</p>
98-
99-
<p>
100-
<i>Collapsed dependencies with CC processed</i>
95+
<i>Enhanced dependencies</i>
10196
<ul>
10297
<xsl:for-each select="dependencies[@type='collapsed-ccprocessed-dependencies']">
10398
<xsl:apply-templates select="dep"/>
@@ -118,6 +113,7 @@ xmlns:d="http://nlp.stanford.edu/CoreNLP/v1">
118113
<th>NER</th>
119114
<th>Normalized NER</th>
120115
<th>Speaker</th>
116+
<th>Sentiment</th>
121117
</tr>
122118
<xsl:for-each select="token">
123119
<tr>
@@ -130,6 +126,7 @@ xmlns:d="http://nlp.stanford.edu/CoreNLP/v1">
130126
<td><xsl:value-of select="NER"/></td>
131127
<td><xsl:value-of select="NormalizedNER"/></td>
132128
<td><xsl:value-of select="Speaker"/></td>
129+
<td><xsl:value-of select="sentiment"/></td>
133130
</tr>
134131
</xsl:for-each>
135132
</table>

doc/corenlp/README.txt

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Stanford CoreNLP - Stanford's Suite of NLP Tools
22
------------------------------------------------
33

4-
Copyright © 2009-2014 The Board of Trustees of
4+
Copyright © 2009-2015 The Board of Trustees of
55
The Leland Stanford Junior University. All Rights Reserved.
66

77
DOCUMENTATION
@@ -14,7 +14,7 @@ LICENSE
1414

1515
//
1616
// StanfordCoreNLP -- a suite of NLP tools
17-
// Copyright © 2009-2014 The Board of Trustees of
17+
// Copyright © 2009-2015 The Board of Trustees of
1818
// The Leland Stanford Junior University. All Rights Reserved.
1919
//
2020
// This program is free software; you can redistribute it and/or
@@ -42,6 +42,9 @@ LICENSE
4242
CHANGES
4343
---------------------------------
4444

45+
2015-04-20 3.5.2 Switch to Universal dependencies, add Chinese
46+
coreference systemCore NLP
47+
4548
2015-01-29 3.5.1 NER, dependency parser, SPIED improvements;
4649
general bugfixes
4750

doc/corenlp/corenlp.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@
44
# Simple uses for xml and plain text output to files are:
55
# ./corenlp.sh -file filename
66
# ./corenlp.sh -file filename -outputFormat text
7+
# Split into sentences, run POS tagger and NER, write CoNLL-style TSV file:
8+
# ./corenlp.sh -annotators tokenize,ssplit,pos,lemma,ner -outputFormat conll -file input.txt
79
# You can also start a simple shell where you can enter sentences to be processed:
810
# ./corenlp.sh
911

1012
OS=`uname`
11-
# Macs (BSD) don't support readlink -e
12-
if [ "$OS" == "Darwin" ]; then
13+
# Some machines (older OS X, BSD, Windows environments) don't support readlink -e
14+
if hash readlink 2>/dev/null; then
1315
scriptdir=`dirname $0`
1416
else
1517
scriptpath=$(readlink -e "$0") || scriptpath=$0

doc/corenlp/pom-full.xml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
<modelVersion>4.0.0</modelVersion>
33
<groupId>edu.stanford.nlp</groupId>
44
<artifactId>stanford-corenlp</artifactId>
5-
<version>3.5.1</version>
5+
<version>3.5.2</version>
66
<packaging>jar</packaging>
77
<name>Stanford CoreNLP</name>
88
<description>Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.</description>
@@ -14,8 +14,8 @@
1414
</license>
1515
</licenses>
1616
<scm>
17-
<url>http://nlp.stanford.edu/software/stanford-corenlp-2015-01-29.zip</url>
18-
<connection>http://nlp.stanford.edu/software/stanford-corenlp-2015-01-29.zip</connection>
17+
<url>http://nlp.stanford.edu/software/stanford-corenlp-2015-04-21.zip</url>
18+
<connection>http://nlp.stanford.edu/software/stanford-corenlp-2015-04-21.zip</connection>
1919
</scm>
2020
<developers>
2121
<developer>
@@ -78,7 +78,7 @@
7878
<configuration>
7979
<artifacts>
8080
<artifact>
81-
<file>${project.basedir}/stanford-corenlp-3.5.1-models.jar</file>
81+
<file>${project.basedir}/stanford-corenlp-3.5.2-models.jar</file>
8282
<type>jar</type>
8383
<classifier>models</classifier>
8484
</artifact>

0 commit comments

Comments
 (0)