Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/nltk/nltk into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
stevenbird committed Feb 27, 2016
2 parents 43696a1 + f05bea2 commit 3d92820
Show file tree
Hide file tree
Showing 8 changed files with 34 additions and 14 deletions.
7 changes: 6 additions & 1 deletion nltk/corpus/reader/plaintext.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,12 @@ def raw(self, fileids=None):
"""
if fileids is None: fileids = self._fileids
elif isinstance(fileids, string_types): fileids = [fileids]
return concat([self.open(f).read() for f in fileids])
raw_texts = []
for f in fileids:
_fin = self.open(f)
raw_texts.append(_fin.read())
_fin.close()
return concat(raw_texts)

def words(self, fileids=None):
"""
Expand Down
3 changes: 3 additions & 0 deletions nltk/corpus/reader/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,9 @@ def iterate_from(self, start_tok):

# If we reach this point, then we should know our length.
assert self._len is not None
# Enforce closing of stream once we reached end of file
# We should have reached EOF once we're out of the while loop.
self.close()

# Use concat for these, so we can use a ConcatenatedCorpusView
# when possible.
Expand Down
6 changes: 6 additions & 0 deletions nltk/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -974,11 +974,17 @@ def __init__(self, filename):
zipfile.ZipFile.__init__(self, filename)
assert self.filename == filename
self.close()
# After closing a ZipFile object, the _fileRefCnt needs to be cleared
# for Python2and3 compatible code.
self._fileRefCnt = 0

def read(self, name):
assert self.fp is None
self.fp = open(self.filename, 'rb')
value = zipfile.ZipFile.read(self, name)
# Ensure that _fileRefCnt needs to be set for Python2and3 compatible code.
# Since we only opened one file here, we add 1.
self._fileRefCnt += 1
self.close()
return value

Expand Down
10 changes: 7 additions & 3 deletions nltk/parse/malt.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Natural Language Toolkit: Interface to MaltParser
#
# Author: Dan Garrette <dhgarrette@gmail.com>
# Contributor: Liling Tan, Mustufain, osamamukhtar11
#
# Copyright (C) 2001-2015 NLTK Project
# URL: <http://nltk.org/>
Expand All @@ -12,6 +13,7 @@
from nltk.six import text_type

import os
import sys
import tempfile
import subprocess
import inspect
Expand Down Expand Up @@ -66,7 +68,7 @@ def find_maltparser(parser_dirname):
# Checks that that the found directory contains all the necessary .jar
malt_dependencies = ['','','']
_malt_jars = set(find_jars_within_path(_malt_dir))
_jars = set(jar.rpartition('/')[2] for jar in _malt_jars)
_jars = set(os.path.split(jar)[1] for jar in _malt_jars)
malt_dependencies = set(['log4j.jar', 'libsvm.jar', 'liblinear-1.8.jar'])

assert malt_dependencies.issubset(_jars)
Expand Down Expand Up @@ -215,8 +217,10 @@ def generate_malt_command(self, inputfilename, outputfilename=None, mode=None):
"""

cmd = ['java']
cmd+= self.additional_java_args # Adds additional java arguments.
cmd+= ['-cp', ':'.join(self.malt_jars)] # Adds classpaths for jars
cmd+= self.additional_java_args # Adds additional java arguments
# Joins classpaths with ";" if on Windows and on Linux/Mac use ":"
classpaths_separator = ';' if sys.platform.startswith('win') else ':'
cmd+= ['-cp', classpaths_separator.join(self.malt_jars)] # Adds classpaths for jars
cmd+= ['org.maltparser.Malt'] # Adds the main function.

# Adds the model file.
Expand Down
10 changes: 5 additions & 5 deletions nltk/sentiment/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ def demo_tweets(trainer, n_instances=None, output=None):
:param output: the output file where results have to be reported.
"""
from nltk.tokenize import TweetTokenizer
from sentiment_analyzer import SentimentAnalyzer
from nltk.sentiment import SentimentAnalyzer
from nltk.corpus import twitter_samples, stopwords

# Different customizations for the TweetTokenizer
Expand Down Expand Up @@ -484,7 +484,7 @@ def demo_movie_reviews(trainer, n_instances=None, output=None):
:param output: the output file where results have to be reported.
"""
from nltk.corpus import movie_reviews
from sentiment_analyzer import SentimentAnalyzer
from nltk.sentiment import SentimentAnalyzer

if n_instances is not None:
n_instances = int(n_instances/2)
Expand Down Expand Up @@ -536,7 +536,7 @@ def demo_subjectivity(trainer, save_analyzer=False, n_instances=None, output=Non
and negative.
:param output: the output file where results have to be reported.
"""
from sentiment_analyzer import SentimentAnalyzer
from nltk.sentiment import SentimentAnalyzer
from nltk.corpus import subjectivity

if n_instances is not None:
Expand Down Expand Up @@ -650,7 +650,7 @@ def demo_vader_instance(text):
:param text: a text whose polarity has to be evaluated.
"""
from vader import SentimentIntensityAnalyzer
from nltk.sentiment import SentimentIntensityAnalyzer
vader_analyzer = SentimentIntensityAnalyzer()
print(vader_analyzer.polarity_scores(text))

Expand All @@ -663,7 +663,7 @@ def demo_vader_tweets(n_instances=None, output=None):
"""
from collections import defaultdict
from nltk.corpus import twitter_samples
from vader import SentimentIntensityAnalyzer
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.metrics import (accuracy as eval_accuracy, precision as eval_precision,
recall as eval_recall, f_measure as eval_f_measure)

Expand Down
5 changes: 3 additions & 2 deletions nltk/tokenize/mwe.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,9 @@ def add_mwe(self, mwe):
>>> tokenizer.add_mwe(('a', 'b'))
>>> tokenizer.add_mwe(('a', 'b', 'c'))
>>> tokenizer.add_mwe(('a', 'x'))
>>> tokenizer._mwes.as_dict()
{'a': {'x': {True: None}, 'b': {True: None, 'c': {True: None}}}}
>>> expected = {'a': {'x': {True: None}, 'b': {True: None, 'c': {True: None}}}}
>>> tokenizer._mwes.as_dict() == expected
True
"""
self._mwes.insert(mwe)
Expand Down
2 changes: 1 addition & 1 deletion nltk/translate/bleu_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ def method3(self, p_n, *args, **kwargs):
incvnt = 1 # From the mteval-v13a.pl, it's referred to as k.
for i, p_i in enumerate(p_n):
if p_i == 0:
p_n[i] = 1 / 2**incvnt
p_n[i] = 1 / (2**incvnt * p_i.denominator)
incvnt+=1
return p_n

Expand Down
5 changes: 3 additions & 2 deletions nltk/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -1332,8 +1332,9 @@ def as_dict(self, d=None):
>>> from nltk.util import Trie
>>> trie = Trie(["abc", "def"])
>>> trie.as_dict()
{'a': {'b': {'c': {True: None}}}, 'd': {'e': {'f': {True: None}}}}
>>> expected = {'a': {'b': {'c': {True: None}}}, 'd': {'e': {'f': {True: None}}}}
>>> trie.as_dict() == expected
True
"""
def _default_to_regular(d):
Expand Down

0 comments on commit 3d92820

Please sign in to comment.