In [30]:
import nltk
from nltk.corpus import wordnet
from nltk.tree import ParentedTree
from nltk import word_tokenize, pos_tag, RegexpParser
from nltk.tree import Tree

sentence = input("Enter a sentence: ")

# Tokenize the sentence and extract the part of speech tags
tokens = word_tokenize(sentence)
pos_tags = pos_tag(tokens)

words = nltk.word_tokenize(sentence)

# Identify the part-of-speech tags for the words
pos_tags = nltk.pos_tag(words)

# Define the grammar for the parser
grammar = r"""
    NP: {<DT|PP\$>?<JJ>*<NN>}   # chunk determiner/possessive, adjectives and noun
        {<NNP>+}                # chunk sequences of proper nouns
        {<PRP>}
    VP: {<VB.*>+}               # chunk verbs and their modals
        {<JJ>*<RB>?<VB.*>+}     # chunk adverbs modifying the verb
    PP: {<IN><NP>}              # chunk prepositions followed by noun phrases
"""

# Create the parser and parse the sentence
parser = RegexpParser(grammar)
tree = parser.parse(pos_tags)

# Visualize the parse tree
tree.draw()

Enter a sentence: I saw her duck


In [31]:
for i in pos_tags:
    print(i)

('I', 'PRP')
('saw', 'VBD')
('her', 'PRP')
('duck', 'NN')


In [32]:
import spacy

nlp = spacy.load('en_core_web_sm')
text = sentence
doc = nlp(text)

# Getting dependency tags
for token in doc:
    print(token.text, '=>', token.dep_)

# Importing visualizer
from spacy import displacy

# Visualizing dependency tree
displacy.render(doc, jupyter=True)

I => nsubj
saw => ROOT
her => poss
duck => dobj


In [33]:
aw = []
ambiguous_words = []
sentences = []

for i, (word, pos) in enumerate(pos_tags):
    w=""
    if pos == 'NN' or pos == 'VB':
        # Check if the word is a homograph (similar words with a different meaning)
        
        if len(nltk.corpus.wordnet.synsets(word)) > 1:
            print("The word '{}' is ambiguous in the sentence '{}'.".format(
                word, sentence))
            aw.append(word)
            w=word

        # Identify the possible meanings for any verbs or nouns with multiple senses
        meanings = set()
        for synset in wordnet.synsets(word):
            for lemma in synset.lemmas():
                meanings.add(lemma.name())
        if len(meanings) > 1:
            print(f"Ambiguous word: {word}")
            for j, meaning in enumerate(meanings):
                ambiguous_words.append(meaning)
                print(f"  Meaning {j+1}: {meaning}") 
    # Generate all possible sentences by replacing ambiguous words
    for meanings in ambiguous_words:
            new_sentence = sentence.replace(word, meanings)
            sentences.append(new_sentence) 

The word 'duck' is ambiguous in the sentence 'I saw her duck'.
Ambiguous word: duck
  Meaning 1: dip
  Meaning 2: fudge
  Meaning 3: douse
  Meaning 4: duck
  Meaning 5: parry
  Meaning 6: evade
  Meaning 7: sidestep
  Meaning 8: hedge
  Meaning 9: circumvent
  Meaning 10: skirt
  Meaning 11: duck's_egg
  Meaning 12: dodge
  Meaning 13: put_off
  Meaning 14: elude


In [34]:
for i in sentences:
    print(i)

I saw her dip
I saw her fudge
I saw her douse
I saw her duck
I saw her parry
I saw her evade
I saw her sidestep
I saw her hedge
I saw her circumvent
I saw her skirt
I saw her duck's_egg
I saw her dodge
I saw her put_off
I saw her elude


In [35]:
import nltk
from nltk.tokenize import word_tokenize
from nltk import pos_tag

# grammar = ('''
#     NP: {<NNP><VBZ|VBD><DT><JJ>*<NN><.>}
#     ''')

# grammar = r"""
#     NP: {<DT>?<JJ>*<NN.*>+}       # noun phrase
#     VP: {<VB.*><NP|PP>*}          # verb phrase
#     PP: {<IN><NP>}                # prepositional phrase
#     ADJ: {<JJ.*>}                 # adjective
#     ADV: {<RB.*>}                 # adverb
#     CONJ: {<CC>}                  # conjunction
#     PUNC: {<\.|,|:|''|\(``|\)|\$>}  # punctuation
#     # Define sentence patterns
#     S: {<NP><VP>}                 # simple sentence
    
# """

grammar = r"""
    NP: {<DT>?<JJ>*<NN|NNS|NNP|NNPS>+}       # noun phrase (requires at least one noun)
    VP: {<VB|VBD|VBG|VBN|VBP|VBZ><NP>+}     # verb phrase (requires at least one noun phrase)
    PP: {<IN><NN|NNS|NNP|NNPS>}             # prepositional phrase (requires a noun)
    ADJ: {<JJ|JJR|JJS>}                     # adjective (matches only common adjective types)
    ADV: {<RB|RBR|RBS>}                     # adverb (matches only common adverb types)
    CONJ: {<CC>}                            # conjunction (matches only coordinating conjunctions)
    PUNC: {<\.|,|:|;|\?|!>}                 # punctuation (matches common punctuation symbols)
"""



# sentences = [
#     "Mary had a little lamb.",
#     "John has a cute black pup.",
#     "I ate five apples.",
#     "I saw her duck",
# ]

def has_noun_phrase(sentence):
    parsed = chunkParser.parse(pos_tag(word_tokenize(sentence)))
    for subtree in parsed:
        if type(subtree) == nltk.Tree and subtree.label() == 'NP':
            return True
    return False

chunkParser = nltk.RegexpParser(grammar)
for sentence in sentences:
    if(has_noun_phrase(sentence)):
        print(sentence)
    # print(has_noun_phrase(sentence))

I saw her dip
I saw her fudge
I saw her douse
I saw her duck
I saw her parry
I saw her evade
I saw her sidestep
I saw her hedge
I saw her circumvent
I saw her skirt
I saw her duck's_egg
I saw her dodge
I saw her put_off
I saw her elude


In [36]:
import nltk

def disambiguate(sentence):
    # Tokenize the sentence into individual words
    tokens = nltk.word_tokenize(sentence)
    
    # Part-of-speech tag each word
    pos_tags = nltk.pos_tag(tokens)
    
    # Identify the possible meanings of the sentence based on the parts of speech
    for i, (word, pos) in enumerate(pos_tags):
        if pos == 'NN' or pos == 'VB':
            # Check if the word is a homograph
            if len(nltk.corpus.wordnet.synsets(word)) > 1:
                # If the word is a homograph, prompt the user to clarify the intended meaning
                print("The word '{}' is ambiguous in the sentence '{}'.".format(word, sentence))
                clarification = input("Please clarify the intended meaning: ")
                
                # Replace the ambiguous word with the user's clarification
                tokens[i] = clarification
    
    # Reconstruct the sentence from the tokens
    corrected_sentence = ' '.join(tokens)
    # Tokenize the sentence and extract the part of speech tags
    tokens2 = word_tokenize(corrected_sentence)
    pos_tags2 = pos_tag(tokens2)

    words2 = nltk.word_tokenize(corrected_sentence)

    # Identify the part-of-speech tags for the words
    pos_tags2 = nltk.pos_tag(words2)

    # Define the grammar for the parser
    grammar2 = r"""
        NP: {<DT|PP\$>?<JJ>*<NN>}   # chunk determiner/possessive, adjectives and noun
            {<NNP>+}                # chunk sequences of proper nouns
            {<PRP>}
        VP: {<VB.*>+}               # chunk verbs and their modals
            {<JJ>*<RB>?<VB.*>+}     # chunk adverbs modifying the verb
        PP: {<IN><NP>}              # chunk prepositions followed by noun phrases
    """

    # Create the parser and parse the sentence
    parser2 = RegexpParser(grammar2)
    tree2 = parser2.parse(pos_tags2)

    # Visualize the parse tree
    tree2.draw()
    return corrected_sentence

disambiguate(text)


The word 'duck' is ambiguous in the sentence 'I saw her duck'.
Please clarify the intended meaning: duck belonging to her


'I saw her duck belonging to her'