In [None]:
# Lab Assignment 6: Word Sense Disambiguation using Lesk Algorithm
# •	Implement the Lesk Algorithm for word sense disambiguation.
# •	Take an ambiguous word (e.g., "bank") and disambiguate its meaning based on context.
# •	Use WordNet for retrieving word definitions and related synsets.

In [None]:
# Step 1: Install nltk and download WordNet data
import nltk
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt_tab')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [None]:
from nltk.corpus import wordnet as wn
from nltk.tokenize import word_tokenize

In [None]:
# Lesk Algorithm implementation
def lesk_algorithm(context_sentence, ambiguous_word):
    max_overlap = 0
    best_sense = None
    context = set(word_tokenize(context_sentence.lower()))

    for sense in wn.synsets(ambiguous_word):
        # Get definition and examples for the sense
        signature = set(word_tokenize(sense.definition()))
        for example in sense.examples():
            signature.update(word_tokenize(example))

        # Calculate overlap
        overlap = len(context.intersection(signature))

        if overlap > max_overlap:
            max_overlap = overlap
            best_sense = sense

    return best_sense

In [None]:
# Test sentences with the ambiguous word "bank"
sentences = [
    "He deposited cash in the bank",
    "They sat on the river bank and had a picnic"
]

ambiguous_word = "bank"

for sentence in sentences:
    best_sense = lesk_algorithm(sentence, ambiguous_word)
    print(f"Sentence: {sentence}")
    if best_sense:
        print(f"Predicted Sense: {best_sense.name()}")
        print(f"Definition: {best_sense.definition()}\n")
    else:
        print("No suitable sense found.\n")

Sentence: He deposited cash in the bank
Predicted Sense: bank.n.06
Definition: the funds held by a gambling house or the dealer in some gambling games

Sentence: They sat on the river bank and had a picnic
Predicted Sense: bank.n.01
Definition: sloping land (especially the slope beside a body of water)



In [None]:
# Test sentences with the ambiguous word "amazon"
sentences = [
    "Amazon is the largest service provider",
    "Amazon is the longest river in the world"
]

ambiguous_word = "amazon"

for sentence in sentences:
    best_sense = lesk_algorithm(sentence, ambiguous_word)
    print(f"Sentence: {sentence}")
    if best_sense:
        print(f"Predicted Sense: {best_sense.name()}")
        print(f"Definition: {best_sense.definition()}\n")
    else:
        print("No suitable sense found.\n")

Sentence: Amazon is the largest service provider
Predicted Sense: amazon.n.02
Definition: (Greek mythology) one of a nation of women warriors of Scythia (who burned off the right breast in order to use a bow and arrow more effectively)

Sentence: Amazon is the longest river in the world
Predicted Sense: amazon.n.03
Definition: a major South American river; arises in the Andes and flows eastward into the South Atlantic; the world's 2nd longest river (4000 miles)

