Merge branch 'develop' of https://github.com/nltk/nltk into develop

nltk · Feb 27, 2016 · 3d92820 · 3d92820
2 parents 43696a1 + f05bea2
commit 3d92820
Show file tree

Hide file tree

Showing 8 changed files with 34 additions and 14 deletions.
diff --git a/nltk/corpus/reader/plaintext.py b/nltk/corpus/reader/plaintext.py
@@ -71,7 +71,12 @@ def raw(self, fileids=None):
         """
         if fileids is None: fileids = self._fileids
         elif isinstance(fileids, string_types): fileids = [fileids]
-        return concat([self.open(f).read() for f in fileids])
+        raw_texts = []
+        for f in fileids:
+            _fin = self.open(f)
+            raw_texts.append(_fin.read())
+            _fin.close() 
+        return concat(raw_texts)
 
     def words(self, fileids=None):
         """

diff --git a/nltk/corpus/reader/util.py b/nltk/corpus/reader/util.py
@@ -334,6 +334,9 @@ def iterate_from(self, start_tok):
 
         # If we reach this point, then we should know our length.
         assert self._len is not None
+        # Enforce closing of stream once we reached end of file
+        # We should have reached EOF once we're out of the while loop.
+        self.close()
 
     # Use concat for these, so we can use a ConcatenatedCorpusView
     # when possible.

diff --git a/nltk/data.py b/nltk/data.py
@@ -974,11 +974,17 @@ def __init__(self, filename):
         zipfile.ZipFile.__init__(self, filename)
         assert self.filename == filename
         self.close()
+        # After closing a ZipFile object, the _fileRefCnt needs to be cleared 
+        # for Python2and3 compatible code.
+        self._fileRefCnt = 0
 
     def read(self, name):
         assert self.fp is None
         self.fp = open(self.filename, 'rb')
         value = zipfile.ZipFile.read(self, name)
+        # Ensure that _fileRefCnt needs to be set for Python2and3 compatible code.
+        # Since we only opened one file here, we add 1.
+        self._fileRefCnt += 1
         self.close()
         return value
 

diff --git a/nltk/parse/malt.py b/nltk/parse/malt.py
@@ -2,6 +2,7 @@
 # Natural Language Toolkit: Interface to MaltParser
 #
 # Author: Dan Garrette <dhgarrette@gmail.com>
+# Contributor: Liling Tan, Mustufain, osamamukhtar11
 #
 # Copyright (C) 2001-2015 NLTK Project
 # URL: <http://nltk.org/>
@@ -12,6 +13,7 @@
 from nltk.six import text_type
 
 import os
+import sys
 import tempfile
 import subprocess
 import inspect
@@ -66,7 +68,7 @@ def find_maltparser(parser_dirname):
     # Checks that that the found directory contains all the necessary .jar
     malt_dependencies = ['','','']
     _malt_jars = set(find_jars_within_path(_malt_dir))
-    _jars = set(jar.rpartition('/')[2] for jar in _malt_jars)
+    _jars = set(os.path.split(jar)[1] for jar in _malt_jars)
     malt_dependencies = set(['log4j.jar', 'libsvm.jar', 'liblinear-1.8.jar'])
 
     assert malt_dependencies.issubset(_jars)
@@ -215,8 +217,10 @@ def generate_malt_command(self, inputfilename, outputfilename=None, mode=None):
         """
 
         cmd = ['java']
-        cmd+= self.additional_java_args # Adds additional java arguments.
-        cmd+= ['-cp', ':'.join(self.malt_jars)] # Adds classpaths for jars
+        cmd+= self.additional_java_args # Adds additional java arguments
+        # Joins classpaths with ";" if on Windows and on Linux/Mac use ":"
+        classpaths_separator = ';' if sys.platform.startswith('win') else ':'
+        cmd+= ['-cp', classpaths_separator.join(self.malt_jars)] # Adds classpaths for jars
         cmd+= ['org.maltparser.Malt'] # Adds the main function.
 
         # Adds the model file.

diff --git a/nltk/sentiment/util.py b/nltk/sentiment/util.py
@@ -407,7 +407,7 @@ def demo_tweets(trainer, n_instances=None, output=None):
     :param output: the output file where results have to be reported.
     """
     from nltk.tokenize import TweetTokenizer
-    from sentiment_analyzer import SentimentAnalyzer
+    from nltk.sentiment import SentimentAnalyzer
     from nltk.corpus import twitter_samples, stopwords
 
     # Different customizations for the TweetTokenizer
@@ -484,7 +484,7 @@ def demo_movie_reviews(trainer, n_instances=None, output=None):
     :param output: the output file where results have to be reported.
     """
     from nltk.corpus import movie_reviews
-    from sentiment_analyzer import SentimentAnalyzer
+    from nltk.sentiment import SentimentAnalyzer
 
     if n_instances is not None:
         n_instances = int(n_instances/2)
@@ -536,7 +536,7 @@ def demo_subjectivity(trainer, save_analyzer=False, n_instances=None, output=Non
         and negative.
     :param output: the output file where results have to be reported.
     """
-    from sentiment_analyzer import SentimentAnalyzer
+    from nltk.sentiment import SentimentAnalyzer
     from nltk.corpus import subjectivity
 
     if n_instances is not None:
@@ -650,7 +650,7 @@ def demo_vader_instance(text):
 
     :param text: a text whose polarity has to be evaluated.
     """
-    from vader import SentimentIntensityAnalyzer
+    from nltk.sentiment import SentimentIntensityAnalyzer
     vader_analyzer = SentimentIntensityAnalyzer()
     print(vader_analyzer.polarity_scores(text))
 
@@ -663,7 +663,7 @@ def demo_vader_tweets(n_instances=None, output=None):
     """
     from collections import defaultdict
     from nltk.corpus import twitter_samples
-    from vader import SentimentIntensityAnalyzer
+    from nltk.sentiment import SentimentIntensityAnalyzer
     from nltk.metrics import (accuracy as eval_accuracy, precision as eval_precision,
         recall as eval_recall, f_measure as eval_f_measure)
 

diff --git a/nltk/tokenize/mwe.py b/nltk/tokenize/mwe.py
@@ -70,8 +70,9 @@ def add_mwe(self, mwe):
         >>> tokenizer.add_mwe(('a', 'b'))
         >>> tokenizer.add_mwe(('a', 'b', 'c'))
         >>> tokenizer.add_mwe(('a', 'x'))
-        >>> tokenizer._mwes.as_dict()
-        {'a': {'x': {True: None}, 'b': {True: None, 'c': {True: None}}}}
+        >>> expected = {'a': {'x': {True: None}, 'b': {True: None, 'c': {True: None}}}}
+        >>> tokenizer._mwes.as_dict() == expected
+        True
 
         """
         self._mwes.insert(mwe)

diff --git a/nltk/translate/bleu_score.py b/nltk/translate/bleu_score.py
@@ -491,7 +491,7 @@ def method3(self, p_n, *args, **kwargs):
         incvnt = 1 # From the mteval-v13a.pl, it's referred to as k.
         for i, p_i in enumerate(p_n):
             if p_i == 0:
-                p_n[i] = 1 / 2**incvnt
+                p_n[i] = 1 / (2**incvnt * p_i.denominator)
                 incvnt+=1
         return p_n
 

diff --git a/nltk/util.py b/nltk/util.py
@@ -1332,8 +1332,9 @@ def as_dict(self, d=None):
 
         >>> from nltk.util import Trie
         >>> trie = Trie(["abc", "def"])
-        >>> trie.as_dict()
-        {'a': {'b': {'c': {True: None}}}, 'd': {'e': {'f': {True: None}}}}
+        >>> expected = {'a': {'b': {'c': {True: None}}}, 'd': {'e': {'f': {True: None}}}}
+        >>> trie.as_dict() == expected
+        True
 
         """
         def _default_to_regular(d):