In [1]:
import nltk
from nltk.corpus import wordnet as wn
from nltk.wsd import lesk
from nltk.tokenize import word_tokenize

# Ensure necessary NLTK data is downloaded
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt')

def word_sense_disambiguation(sentence, ambiguous_word):
    """
    Perform Word Sense Disambiguation using the Lesk Algorithm.
    
    Args:
        sentence (str): The sentence containing the ambiguous word.
        ambiguous_word (str): The word to disambiguate.

    Returns:
        str: The most probable sense/meaning of the word.
    """
    words = word_tokenize(sentence)
    sense = lesk(words, ambiguous_word)
    
    if sense:
        return f"Word: {ambiguous_word}\nSense: {sense.name()}\nDefinition: {sense.definition()}\nExamples: {sense.examples()}"
    else:
        return f"No sense found for '{ambiguous_word}' in this context."

def display_all_word_senses(sentence):
    """
    Display word senses for all words in a given sentence using the Lesk Algorithm.
    
    Args:
        sentence (str): The input sentence.
    """
    words = word_tokenize(sentence)
    print(f"\nInput Sentence: {sentence}\n")
    print("Disambiguated Word Senses:\n")
    
    for word in words:
        sense = lesk(words, word)
        print(f"Word: {word}")
        if sense:
            print(f" - Sense: {sense.name()}")
            print(f" - Definition: {sense.definition()}")
            if sense.examples():
                print(f" - Examples: {sense.examples()}")
        else:
            print(" - No valid sense found in WordNet.")
        print("-" * 40)

# Example Usage
if __name__ == "__main__":
    # Disambiguate a single word in context
    print(word_sense_disambiguation("I went to the bank to deposit money.", "bank"))
    print()
    print(word_sense_disambiguation("The river overflowed near the bank.", "bank"))

    # Display senses for all words in a sentence
    display_all_word_senses("The patient is suffering from severe depression and anxiety.")
    display_all_word_senses("The bat flew out of the cave and startled the group.")


[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\rosha\AppData\Roaming\nltk_data...
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\rosha\AppData\Roaming\nltk_data...
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\rosha\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.


Word: bank
Sense: savings_bank.n.02
Definition: a container (usually with a slot in the top) for keeping money at home
Examples: ['the coin bank was empty']

Word: bank
Sense: savings_bank.n.02
Definition: a container (usually with a slot in the top) for keeping money at home
Examples: ['the coin bank was empty']

Input Sentence: The patient is suffering from severe depression and anxiety.

Disambiguated Word Senses:

Word: The
 - No valid sense found in WordNet.
----------------------------------------
Word: patient
 - Sense: affected_role.n.01
 - Definition: the semantic role of an entity that is not the agent but is directly involved in or affected by the happening denoted by the verb in the clause
----------------------------------------
Word: is
 - Sense: exist.v.01
 - Definition: have an existence, be extant
 - Examples: ['Is there a God?']
----------------------------------------
Word: suffering
 - Sense: suffering.n.02
 - Definition: misery resulting from affliction
-----------

# Objective

    The primary objective of this project is to identify the correct sense (meaning) of a polysemous word (a word with multiple meanings) based on its context within a sentence. This is achieved using the Lesk algorithm, a classic approach in Natural Language Processing (NLP) that leverages dictionary definitions (WordNet glosses) for disambiguation.

# Technologies & Tools Used

    Python 3.x

    NLTK (Natural Language Toolkit):

        WordNet (wordnet)

        Lesk algorithm (nltk.wsd.lesk)

        Tokenization (punkt)

# Methodology

    Input: A sentence containing an ambiguous word.

    Tokenization: The sentence is tokenized into words using NLTK's word_tokenize.

    Disambiguation:

        The Lesk algorithm is applied to compare the gloss of each synset of the ambiguous word with the context (surrounding words).

        The sense (synset) with the highest overlap is selected.

    Output: The selected sense (with name, definition, and examples) is displayed.

# Model

In [12]:
import nltk
from nltk.corpus import wordnet as wn
from nltk.wsd import lesk
from nltk.tokenize import word_tokenize

In [13]:
# Ensure necessary NLTK data is downloaded
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt')

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\rosha\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\rosha\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\rosha\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [14]:
def word_sense_disambiguation(sentence, ambiguous_word):
    """
    Perform Word Sense Disambiguation using the Lesk Algorithm.
    
    Args:
        sentence (str): The sentence containing the ambiguous word.
        ambiguous_word (str): The word to disambiguate.

    Returns:
        str: The most probable sense/meaning of the word.
    """
    words = word_tokenize(sentence)
    sense = lesk(words, ambiguous_word)
    
    if sense:
        return f"Word: {ambiguous_word}\nSense: {sense.name()}\nDefinition: {sense.definition()}\nExamples: {sense.examples()}"
    else:
        return f"No sense found for '{ambiguous_word}' in this context."


In [15]:
def display_all_word_senses(sentence):
    """
    Display word senses for all words in a given sentence using the Lesk Algorithm.
    
    Args:
        sentence (str): The input sentence.
    """
    words = word_tokenize(sentence)
    print(f"\nInput Sentence: {sentence}\n")
    print("Disambiguated Word Senses:\n")
    
    for word in words:
        sense = lesk(words, word)
        print(f"Word: {word}")
        if sense:
            print(f" - Sense: {sense.name()}")
            print(f" - Definition: {sense.definition()}")
            if sense.examples():
                print(f" - Examples: {sense.examples()}")
        else:
            print(" - No valid sense found in WordNet.")
        print("-" * 40)

In [16]:
# Example Usage
if __name__ == "__main__":
    # Disambiguate a single word in context
    print(word_sense_disambiguation("I went to the bank to deposit money.", "bank"))
    print()
    print(word_sense_disambiguation("The river overflowed near the bank.", "bank"))


Word: bank
Sense: savings_bank.n.02
Definition: a container (usually with a slot in the top) for keeping money at home
Examples: ['the coin bank was empty']

Word: bank
Sense: savings_bank.n.02
Definition: a container (usually with a slot in the top) for keeping money at home
Examples: ['the coin bank was empty']


# Another Example-Patient Mental Health

In [17]:
if __name__ == "__main__":
    display_all_word_senses("The patient is suffering from severe depression and anxiety.")


Input Sentence: The patient is suffering from severe depression and anxiety.

Disambiguated Word Senses:

Word: The
 - No valid sense found in WordNet.
----------------------------------------
Word: patient
 - Sense: affected_role.n.01
 - Definition: the semantic role of an entity that is not the agent but is directly involved in or affected by the happening denoted by the verb in the clause
----------------------------------------
Word: is
 - Sense: exist.v.01
 - Definition: have an existence, be extant
 - Examples: ['Is there a God?']
----------------------------------------
Word: suffering
 - Sense: suffering.n.02
 - Definition: misery resulting from affliction
----------------------------------------
Word: from
 - No valid sense found in WordNet.
----------------------------------------
Word: severe
 - Sense: severe.s.04
 - Definition: unsparing and uncompromising in discipline or judgment; - H.G.Wells
 - Examples: ['a parent severe to the pitch of hostility', 'a hefty six-footer 

In [18]:
if __name__ == "__main__":
    display_all_word_senses("The bat flew out of the cave and startled the group.")


Input Sentence: The bat flew out of the cave and startled the group.

Disambiguated Word Senses:

Word: The
 - No valid sense found in WordNet.
----------------------------------------
Word: bat
 - Sense: cricket_bat.n.01
 - Definition: the club used in playing cricket
 - Examples: ['a cricket bat has a narrow handle and a broad flat end for hitting']
----------------------------------------
Word: flew
 - Sense: vanish.v.05
 - Definition: decrease rapidly and disappear
 - Examples: ['the money vanished in las Vegas', 'all my stock assets have vaporized']
----------------------------------------
Word: out
 - Sense: out.s.04
 - Definition: out of power; especially having been unsuccessful in an election
 - Examples: ['now the Democrats are out']
----------------------------------------
Word: of
 - No valid sense found in WordNet.
----------------------------------------
Word: the
 - No valid sense found in WordNet.
----------------------------------------
Word: cave
 - Sense: cave.v.01
