In [2]:
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from nltk.corpus import wordnet

# Download necessary datasets
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt')


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [3]:
lemmatizer = WordNetLemmatizer()

words = ["running", "happier", "flies", "unbelievable", "replaying"]

for word in words:
    lemma = lemmatizer.lemmatize(word, pos='v')  # 'v' for verbs
    print(f"Word: {word} -> Lemma: {lemma}")


Word: running -> Lemma: run
Word: happier -> Lemma: happier
Word: flies -> Lemma: fly
Word: unbelievable -> Lemma: unbelievable
Word: replaying -> Lemma: replay


In [4]:
import re

prefixes = ["un", "re", "pre", "mis", "dis", "non", "in", "im", "ir", "il"]
suffixes = ["ing", "ed", "s", "es", "er", "est", "ly", "ment", "ness", "able"]

def analyze_morphology(word):
    root = word
    prefix_found, suffix_found = "", ""

    # Check for prefixes
    for prefix in prefixes:
        if word.startswith(prefix):
            prefix_found = prefix
            root = word[len(prefix):]
            break

    # Check for suffixes
    for suffix in suffixes:
        if word.endswith(suffix):
            suffix_found = suffix
            root = root[:-len(suffix)]
            break

    return {"Word": word, "Prefix": prefix_found, "Root": root, "Suffix": suffix_found}

words = ["unbelievable", "replaying", "happiness", "disapprove", "running"]

for word in words:
    print(analyze_morphology(word))


{'Word': 'unbelievable', 'Prefix': 'un', 'Root': 'believ', 'Suffix': 'able'}
{'Word': 'replaying', 'Prefix': 're', 'Root': 'play', 'Suffix': 'ing'}
{'Word': 'happiness', 'Prefix': '', 'Root': 'happines', 'Suffix': 's'}
{'Word': 'disapprove', 'Prefix': 'dis', 'Root': 'approve', 'Suffix': ''}
{'Word': 'running', 'Prefix': '', 'Root': 'runn', 'Suffix': 'ing'}


In [5]:
import spacy

nlp = spacy.load("en_core_web_sm")

word = "replaying"
doc = nlp(word)

for token in doc:
    print(f"Word: {token.text}")
    print(f"Lemma: {token.lemma_}")
    print(f"Morphology: {token.morph}")


Word: replaying
Lemma: replay
Morphology: Aspect=Prog|Tense=Pres|VerbForm=Part
