nltk · BatMrE · Aug 29, 2021 · Oct 26, 2021 · dimazest · Aug 31, 2021
diff --git a/AUTHORS.md b/AUTHORS.md
@@ -280,6 +280,7 @@
 - Hiroki Teranishi <https://github.com/chantera>
 - Ruben Cartuyvels <https://github.com/rubencart>
 - Dalton Pearson <https://github.com/daltonpearson>
+- Abdul Rafey Khan <https://github.com/BatMrE>
 
 ## Others whose work we've taken and included in NLTK, but who didn't directly contribute it:
 

diff --git a/nltk/parse/corenlp.py b/nltk/parse/corenlp.py
@@ -326,7 +326,7 @@ def tokenize(self, text, properties=None):
             for token in sentence["tokens"]:
                 yield token["originalText"] or token["word"]
 
-    def tag_sents(self, sentences):
+    def tag_sents(self, sentences, properties=None):
         """
         Tag multiple sentences.
 
@@ -339,9 +339,11 @@ def tag_sents(self, sentences):
         """
         # Converting list(list(str)) -> list(str)
         sentences = (" ".join(words) for words in sentences)
-        return [sentences[0] for sentences in self.raw_tag_sents(sentences)]
+        if properties is None:
+            properties = {"tokenize.whitespace": "true"}
+        return [sentences[0] for sentences in self.raw_tag_sents(sentences, properties)]
 
-    def tag(self, sentence):
+    def tag(self, sentence, properties=None):
         """
         Tag a list of tokens.
 
@@ -360,9 +362,9 @@ def tag(self, sentence):
         ('airspeed', 'NN'), ('of', 'IN'), ('an', 'DT'),
         ('unladen', 'JJ'), ('swallow', 'VB'), ('?', '.')]
         """
-        return self.tag_sents([sentence])[0]
+        return self.tag_sents([sentence], properties)[0]
 
-    def raw_tag_sents(self, sentences):
+    def raw_tag_sents(self, sentences, properties=None):
         """
         Tag multiple sentences.
 
@@ -377,8 +379,13 @@ def raw_tag_sents(self, sentences):
             "annotators": "tokenize,ssplit,",
         }
 
+        default_properties.update(properties or {})
+
         # Supports only 'pos' or 'ner' tags.
-        assert self.tagtype in ["pos", "ner"]
+        assert self.tagtype in [
+            "pos",
+            "ner",
+        ], "CoreNLP tagger supports only 'pos' or 'ner' tags."
         default_properties["annotators"] += self.tagtype
         for sentence in sentences:
             tagged_data = self.api_call(sentence, properties=default_properties)