nltk · Sion1225 · May 21, 2024 · May 21, 2024 · May 22, 2024 · May 22, 2024
diff --git a/AUTHORS.md b/AUTHORS.md
@@ -302,6 +302,7 @@
 - Akihiro Yamazaki <https://github.com/zakkie>
 - Ron Urbach <https://github.com/sharpblade4>
 - Vivek Kalyan <https://github.com/vivekkalyan>
+- Siwon Seo <https://github.com/Sion1225>
 
 ## Others whose work we've taken and included in NLTK, but who didn't directly contribute it:
 

diff --git a/nltk/stem/wordnet.py b/nltk/stem/wordnet.py
@@ -1,11 +1,13 @@
 # Natural Language Toolkit: WordNet stemmer interface
 #
-# Copyright (C) 2001-2023 NLTK Project
+# Copyright (C) 2001-2024 NLTK Project
 # Author: Steven Bird <stevenbird1@gmail.com>
 #         Edward Loper <edloper@gmail.com>
 # URL: <https://www.nltk.org/>
 # For license information, see LICENSE.TXT
 
+from typing import List
+
 from nltk.corpus import wordnet as wn
 
 
@@ -15,19 +17,6 @@ class WordNetLemmatizer:
 
     Lemmatize using WordNet's built-in morphy function.
     Returns the input word unchanged if it cannot be found in WordNet.
-
-        >>> from nltk.stem import WordNetLemmatizer
-        >>> wnl = WordNetLemmatizer()
-        >>> print(wnl.lemmatize('dogs'))
-        dog
-        >>> print(wnl.lemmatize('churches'))
-        church
-        >>> print(wnl.lemmatize('aardwolves'))
-        aardwolf
-        >>> print(wnl.lemmatize('abaci'))
-        abacus
-        >>> print(wnl.lemmatize('hardrock'))
-        hardrock
     """
 
     def lemmatize(self, word: str, pos: str = "n") -> str:
@@ -41,9 +30,54 @@ def lemmatize(self, word: str, pos: str = "n") -> str:
             for satellite adjectives.
         :type pos: str
         :return: The lemma of `word`, for the given `pos`.
+
+            >>> from nltk.stem import WordNetLemmatizer
+            >>> wnl = WordNetLemmatizer()
+            >>> print(wnl.lemmatize('dogs'))
+            dog
+            >>> print(wnl.lemmatize('churches'))
+            church
+            >>> print(wnl.lemmatize('aardwolves'))
+            aardwolf
+            >>> print(wnl.lemmatize('abaci'))
+            abacus
+            >>> print(wnl.lemmatize('hardrock'))
+            hardrock
         """
         lemmas = wn._morphy(word, pos)
         return min(lemmas, key=len) if lemmas else word
 
+    def lemmatize_text(self, text: str) -> List[str]:
+        """
+        Tokenize input text, estimate the part-of-speech tag of each word,
+        and return a list of lemmas and pos tag.
+
+        Returns each input word unchanged when it cannot be found in WordNet.
+
+        :param text: The input text to lemmatize.
+        :type text: str
+        :return: A list with the estimated lemma and pos tag of each `word` in the input text.
+
+            >>> from nltk.stem import WordNetLemmatizer
+            >>> wntl = WordNetLemmatizer().lemmatize_text
+            >>> print(wntl('Proverbs are short sentences drawn from long experience.'))
+            ['Proverbs', 'be', 'short', 'sentence', 'draw', 'from', 'long', 'experience', '.']
+            >>> print(wntl('proverbs are short sentences drawn from long experience.'))
+            ['proverb', 'be', 'short', 'sentence', 'draw', 'from', 'long', 'experience', '.']
+        """
+        from nltk.tag import pos_tag
+        from nltk.tokenize import word_tokenize
+
+        return [
+            # Lemmatize each POS-tagged word:
+            (self.lemmatize(word, self.tag2pos(tag)), tag)
+            # Tokenize the input text and POS tag each word:
+            for word, tag in pos_tag(word_tokenize(text))
+        ]
+
+    @staticmethod
+    def tag2pos(tag):
+        return {"N": "n", "V": "v", "J": "a", "R": "r"}.get(tag[0], "n")
+
     def __repr__(self):
         return "<WordNetLemmatizer>"