From 2ec6001b6127ef78fa582c688b7330cae2c2de74 Mon Sep 17 00:00:00 2001
From: Pierpaolo Pantone <24alsecondo@gmail.com>
Date: Fri, 19 Feb 2016 11:42:43 +0100
Subject: [PATCH 01/14] Fix imports in sentiment module

---
 nltk/sentiment/util.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/nltk/sentiment/util.py b/nltk/sentiment/util.py
index 009348a83d..37e1bc5b82 100644
--- a/nltk/sentiment/util.py
+++ b/nltk/sentiment/util.py
@@ -407,7 +407,7 @@ def demo_tweets(trainer, n_instances=None, output=None):
     :param output: the output file where results have to be reported.
     """
     from nltk.tokenize import TweetTokenizer
-    from sentiment_analyzer import SentimentAnalyzer
+    from nltk.sentiment import SentimentAnalyzer
     from nltk.corpus import twitter_samples, stopwords
 
     # Different customizations for the TweetTokenizer
@@ -484,7 +484,7 @@ def demo_movie_reviews(trainer, n_instances=None, output=None):
     :param output: the output file where results have to be reported.
     """
     from nltk.corpus import movie_reviews
-    from sentiment_analyzer import SentimentAnalyzer
+    from nltk.sentiment import SentimentAnalyzer
 
     if n_instances is not None:
         n_instances = int(n_instances/2)
@@ -536,7 +536,7 @@ def demo_subjectivity(trainer, save_analyzer=False, n_instances=None, output=Non
         and negative.
     :param output: the output file where results have to be reported.
     """
-    from sentiment_analyzer import SentimentAnalyzer
+    from nltk.sentiment import SentimentAnalyzer
     from nltk.corpus import subjectivity
 
     if n_instances is not None:

From 46e924f213739b7ac9429e022798419709efd8c7 Mon Sep 17 00:00:00 2001
From: Pierpaolo Pantone <24alsecondo@gmail.com>
Date: Fri, 19 Feb 2016 11:50:40 +0100
Subject: [PATCH 02/14] Fix vader imports in sentiment module

---
 nltk/sentiment/util.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/nltk/sentiment/util.py b/nltk/sentiment/util.py
index 37e1bc5b82..119e28245f 100644
--- a/nltk/sentiment/util.py
+++ b/nltk/sentiment/util.py
@@ -650,7 +650,7 @@ def demo_vader_instance(text):
 
     :param text: a text whose polarity has to be evaluated.
     """
-    from vader import SentimentIntensityAnalyzer
+    from nltk.sentiment import SentimentIntensityAnalyzer
     vader_analyzer = SentimentIntensityAnalyzer()
     print(vader_analyzer.polarity_scores(text))
 
@@ -663,7 +663,7 @@ def demo_vader_tweets(n_instances=None, output=None):
     """
     from collections import defaultdict
     from nltk.corpus import twitter_samples
-    from vader import SentimentIntensityAnalyzer
+    from nltk.sentiment import SentimentIntensityAnalyzer
     from nltk.metrics import (accuracy as eval_accuracy, precision as eval_precision,
         recall as eval_recall, f_measure as eval_f_measure)
 

From 2085bf8b4c834ad9456cf1debc70ce2e48306ea7 Mon Sep 17 00:00:00 2001
From: alvations <alvations@gmail.com>
Date: Fri, 19 Feb 2016 16:41:34 +0100
Subject: [PATCH 03/14] Use os.path.split to handle file paths in all OS

Currently, `jar.rpartition('/')[2]` only works for linux.
---
 nltk/parse/malt.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nltk/parse/malt.py b/nltk/parse/malt.py
index 41141fecf4..5cce984ae3 100644
--- a/nltk/parse/malt.py
+++ b/nltk/parse/malt.py
@@ -66,7 +66,7 @@ def find_maltparser(parser_dirname):
     # Checks that that the found directory contains all the necessary .jar
     malt_dependencies = ['','','']
     _malt_jars = set(find_jars_within_path(_malt_dir))
-    _jars = set(jar.rpartition('/')[2] for jar in _malt_jars)
+    _jars = set(os.path.split(jar)[1] for jar in _malt_jars)
     malt_dependencies = set(['log4j.jar', 'libsvm.jar', 'liblinear-1.8.jar'])
 
     assert malt_dependencies.issubset(_jars)

From 22585dfdc748dbcde74713dd3da9c93909acd792 Mon Sep 17 00:00:00 2001
From: alvations <alvations@gmail.com>
Date: Mon, 22 Feb 2016 19:26:28 +0100
Subject: [PATCH 04/14] Use OS specific classpath separator

See http://stackoverflow.com/questions/4528438/classpath-does-not-work-under-linux
---
 nltk/parse/malt.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/nltk/parse/malt.py b/nltk/parse/malt.py
index 5cce984ae3..5d440a203f 100644
--- a/nltk/parse/malt.py
+++ b/nltk/parse/malt.py
@@ -12,6 +12,7 @@
 from nltk.six import text_type
 
 import os
+import sys
 import tempfile
 import subprocess
 import inspect
@@ -215,8 +216,10 @@ def generate_malt_command(self, inputfilename, outputfilename=None, mode=None):
         """
 
         cmd = ['java']
-        cmd+= self.additional_java_args # Adds additional java arguments.
-        cmd+= ['-cp', ':'.join(self.malt_jars)] # Adds classpaths for jars
+        cmd+= self.additional_java_args # Adds additional java arguments
+        # Joins classpaths with ";" if on Windows and on Linux/Mac use ":"
+        classpaths_separator = ';' if sys.platform.startswith('win') else ':'
+        cmd+= ['-cp', classpaths_separator.join(self.malt_jars)] # Adds classpaths for jars
         cmd+= ['org.maltparser.Malt'] # Adds the main function.
 
         # Adds the model file.

From 046bfb000a68d8348524095cf3d3689692c65f7b Mon Sep 17 00:00:00 2001
From: alvations <alvations@gmail.com>
Date: Mon, 22 Feb 2016 19:36:49 +0100
Subject: [PATCH 05/14] Updated contributor info.

---
 nltk/parse/malt.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/nltk/parse/malt.py b/nltk/parse/malt.py
index 5d440a203f..8f3d1c971f 100644
--- a/nltk/parse/malt.py
+++ b/nltk/parse/malt.py
@@ -2,6 +2,7 @@
 # Natural Language Toolkit: Interface to MaltParser
 #
 # Author: Dan Garrette <dhgarrette@gmail.com>
+# Contributor: Liling Tan, Mustufain, osamamukhtar11
 #
 # Copyright (C) 2001-2015 NLTK Project
 # URL: <http://nltk.org/>

From 1bdab4acf682e4824d86cf136938bd4338c9ed6b Mon Sep 17 00:00:00 2001
From: alvations <alvations@gmail.com>
Date: Wed, 24 Feb 2016 15:34:00 +0100
Subject: [PATCH 06/14] Fix implementation error on Method3 Smoothing

The smooth should only affect the precision count not the ngram precision itself, so the `p_i.denominator` needs to be considered, i.e.`(1/2**k) / p_i.denominator`. Simplified mathematically to `1/ (2**k * p_i.denominator)`
---
 nltk/translate/bleu_score.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nltk/translate/bleu_score.py b/nltk/translate/bleu_score.py
index 356ee82cdd..294774745b 100644
--- a/nltk/translate/bleu_score.py
+++ b/nltk/translate/bleu_score.py
@@ -491,7 +491,7 @@ def method3(self, p_n, *args, **kwargs):
         incvnt = 1 # From the mteval-v13a.pl, it's referred to as k.
         for i, p_i in enumerate(p_n):
             if p_i == 0:
-                p_n[i] = 1 / 2**incvnt
+                p_n[i] = 1 / (2**incvnt * p_i.denominator)
                 incvnt+=1
         return p_n
     

From ca52a97f171ce482658b7602bf6881f2e6a73342 Mon Sep 17 00:00:00 2001
From: alvations <alvations@gmail.com>
Date: Thu, 25 Feb 2016 08:44:07 +0100
Subject: [PATCH 07/14] Fixing the ZipFile issue in Python3.5

---
 nltk/data.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/nltk/data.py b/nltk/data.py
index bf2497eb3e..d208a63293 100644
--- a/nltk/data.py
+++ b/nltk/data.py
@@ -978,8 +978,8 @@ def __init__(self, filename):
     def read(self, name):
         assert self.fp is None
         self.fp = open(self.filename, 'rb')
-        value = zipfile.ZipFile.read(self, name)
-        self.close()
+        with self.open(name) as zfin:
+          value = zfin.read()
         return value
 
     def write(self, *args, **kwargs):

From b73a8e3d889fe4a42b022161edb77c055dbe4655 Mon Sep 17 00:00:00 2001
From: alvations <alvations@gmail.com>
Date: Thu, 25 Feb 2016 10:54:15 +0100
Subject: [PATCH 08/14] Enforce read-mode for ZipFile

---
 nltk/data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nltk/data.py b/nltk/data.py
index d208a63293..aab65d2511 100644
--- a/nltk/data.py
+++ b/nltk/data.py
@@ -978,7 +978,7 @@ def __init__(self, filename):
     def read(self, name):
         assert self.fp is None
         self.fp = open(self.filename, 'rb')
-        with self.open(name) as zfin:
+        with self.open(name, 'r') as zfin:
           value = zfin.read()
         return value
 

From 21532d5992366a8f0ce1e4eede71195b2529b004 Mon Sep 17 00:00:00 2001
From: alvations <alvations@gmail.com>
Date: Thu, 25 Feb 2016 11:41:49 +0100
Subject: [PATCH 09/14] Reverting to old data.py

---
 nltk/data.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/nltk/data.py b/nltk/data.py
index aab65d2511..bf2497eb3e 100644
--- a/nltk/data.py
+++ b/nltk/data.py
@@ -978,8 +978,8 @@ def __init__(self, filename):
     def read(self, name):
         assert self.fp is None
         self.fp = open(self.filename, 'rb')
-        with self.open(name, 'r') as zfin:
-          value = zfin.read()
+        value = zipfile.ZipFile.read(self, name)
+        self.close()
         return value
 
     def write(self, *args, **kwargs):

From b111ed721b6158a62f9f68b2a2d59b4ab00e02f9 Mon Sep 17 00:00:00 2001
From: alvations <alvations@gmail.com>
Date: Thu, 25 Feb 2016 12:06:29 +0100
Subject: [PATCH 10/14] Added _fileRefCnt support to be python3.5+ compatible

---
 nltk/data.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/nltk/data.py b/nltk/data.py
index bf2497eb3e..2e4d7e6d36 100644
--- a/nltk/data.py
+++ b/nltk/data.py
@@ -974,11 +974,17 @@ def __init__(self, filename):
         zipfile.ZipFile.__init__(self, filename)
         assert self.filename == filename
         self.close()
+        # After closing a ZipFile object, the _fileRefCnt needs to be cleared 
+        # for Python2and3 compatible code.
+        self._fileRefCnt = 0
 
     def read(self, name):
         assert self.fp is None
         self.fp = open(self.filename, 'rb')
         value = zipfile.ZipFile.read(self, name)
+        # Ensure that _fileRefCnt needs to be set for Python2and3 compatible code.
+        # Since we only opened one file here, we add 1.
+        self._fileRefCnt += 1
         self.close()
         return value
 

From 6d7eeec50d9db67cb72ce9122cbfb1503cd135f1 Mon Sep 17 00:00:00 2001
From: alvations <alvations@gmail.com>
Date: Thu, 25 Feb 2016 13:47:12 +0100
Subject: [PATCH 11/14] Resolves unclosed warning for .raw()

Resolves the warning issues for unopened text when PlaintextCorpusReader.raw()
---
 nltk/corpus/reader/plaintext.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/nltk/corpus/reader/plaintext.py b/nltk/corpus/reader/plaintext.py
index b12669f517..ad2b5328cc 100644
--- a/nltk/corpus/reader/plaintext.py
+++ b/nltk/corpus/reader/plaintext.py
@@ -71,7 +71,12 @@ def raw(self, fileids=None):
         """
         if fileids is None: fileids = self._fileids
         elif isinstance(fileids, string_types): fileids = [fileids]
-        return concat([self.open(f).read() for f in fileids])
+        raw_texts = []
+        for f in fileids:
+            _fin = self.open(f)
+            raw_texts.append(_fin.read())
+            _fin.close() 
+        return concat(raw_texts)
 
     def words(self, fileids=None):
         """

From 8ff1c359a1114535442ce5367bbd04c5608efc71 Mon Sep 17 00:00:00 2001
From: alvations <alvations@gmail.com>
Date: Thu, 25 Feb 2016 14:37:45 +0100
Subject: [PATCH 12/14] Enforce stream.close() after reaching EOF

Note that the stream will reopen once it's accessed again.
---
 nltk/corpus/reader/util.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/nltk/corpus/reader/util.py b/nltk/corpus/reader/util.py
index bb31cb9bdd..d51ad04e7f 100644
--- a/nltk/corpus/reader/util.py
+++ b/nltk/corpus/reader/util.py
@@ -334,6 +334,9 @@ def iterate_from(self, start_tok):
 
         # If we reach this point, then we should know our length.
         assert self._len is not None
+        # Enforce closing of stream once we reached end of file
+        # We should have reached EOF once we're out of the while loop.
+        self.close()
 
     # Use concat for these, so we can use a ConcatenatedCorpusView
     # when possible.

From a488bb6c900c89ea4d2ca154627fa92093626332 Mon Sep 17 00:00:00 2001
From: alvations <alvations@gmail.com>
Date: Fri, 26 Feb 2016 14:22:49 +0100
Subject: [PATCH 13/14] Fixes doctest for python3

Dictionary hash are dynamic in Python3 so the __repr__ output will be different all the time. Doctest should check for value equality instead.

Fixes issue on https://nltk.ci.cloudbees.com/job/nltk/TOXENV=py34-jenkins,jdk=jdk8latestOnlineInstall/lastCompletedBuild/testReport/nltk.tokenize.mwe/MWETokenizer/add_mwe/
---
 nltk/tokenize/mwe.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/nltk/tokenize/mwe.py b/nltk/tokenize/mwe.py
index 12b4797786..a09c5a6f9f 100644
--- a/nltk/tokenize/mwe.py
+++ b/nltk/tokenize/mwe.py
@@ -70,8 +70,9 @@ def add_mwe(self, mwe):
         >>> tokenizer.add_mwe(('a', 'b'))
         >>> tokenizer.add_mwe(('a', 'b', 'c'))
         >>> tokenizer.add_mwe(('a', 'x'))
-        >>> tokenizer._mwes.as_dict()
-        {'a': {'x': {True: None}, 'b': {True: None, 'c': {True: None}}}}
+        >>> expected = {'a': {'x': {True: None}, 'b': {True: None, 'c': {True: None}}}}
+        >>> tokenizer._mwes.as_dict() == expected
+        True
 
         """
         self._mwes.insert(mwe)

From 80b3b1dc6c146d3ee81a8bfc045005c9e308168c Mon Sep 17 00:00:00 2001
From: alvations <alvations@gmail.com>
Date: Sat, 27 Feb 2016 00:27:50 +0100
Subject: [PATCH 14/14] Fixes doctest for python3

Dictionary hash are dynamic in Python3 so the __repr__ output will be different all the time. Doctest should check for value equality instead. Monotonic nested defaultdict/dictionary outputs should not be affected though.

Fixes issue on https://nltk.ci.cloudbees.com/job/nltk/TOXENV=py34-jenkins,jdk=jdk8latestOnlineInstall/lastCompletedBuild/testReport/nltk.util/Trie/as_dict/

(Same issue as https://github.com/nltk/nltk/commit/a488bb6c900c89ea4d2ca154627fa92093626332)

@stevenbird Sorry I presented the wrong radio button in the previous commit when fixing https://github.com/nltk/nltk/commit/a488bb6c900c89ea4d2ca154627fa92093626332
---
 nltk/util.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/nltk/util.py b/nltk/util.py
index 2af0fb05d7..d04544b8e4 100644
--- a/nltk/util.py
+++ b/nltk/util.py
@@ -1332,8 +1332,9 @@ def as_dict(self, d=None):
 
         >>> from nltk.util import Trie
         >>> trie = Trie(["abc", "def"])
-        >>> trie.as_dict()
-        {'a': {'b': {'c': {True: None}}}, 'd': {'e': {'f': {True: None}}}}
+        >>> expected = {'a': {'b': {'c': {True: None}}}, 'd': {'e': {'f': {True: None}}}}
+        >>> trie.as_dict() == expected
+        True
 
         """
         def _default_to_regular(d):