### Download the necessary nltk packages

In [1]:
# import ssl
# #disable ssl certificate verification
# ssl._create_default_https_context = ssl._create_unverified_context

In [2]:
# import nltk
# nltk.download('wordnet')

# Part 1

In [3]:
# from nltk.corpus import wordnet as wn
from nltk.wsd import lesk   #Wordnet is already there in the Leask Algorithm Library
from nltk.tokenize import word_tokenize

In [4]:
def lesk_algorithm(sentence, ambiguous_word):
    # Tokenize i/p sentence
    tokens = word_tokenize(sentence.lower())

    best_sense = lesk(tokens, ambiguous_word)

    if best_sense is not None:
        return best_sense, best_sense.definition()
    else:
        return None, "No suitable sense found."

In [5]:
def get_result(sentence, ambiguous_word):
    best_sense, definition = lesk_algorithm(sentence, ambiguous_word)

    if best_sense:
        print(f"Ambiguous word: {ambiguous_word}")
        print(f"Best sense: {best_sense}")
        print(f"Definition: {definition}")
    else:
        print("No suitable sense found.")

In [6]:
sentence = "He went to the bank to deposit his money."
ambiguous_word = "bank"
get_result(sentence, ambiguous_word)

Ambiguous word: bank
Best sense: Synset('savings_bank.n.02')
Definition: a container (usually with a slot in the top) for keeping money at home


In [7]:
sentence = "The river bank was full of dead fishes."
ambiguous_word = "bank"
get_result(sentence, ambiguous_word)

Ambiguous word: bank
Best sense: Synset('bank.v.07')
Definition: cover with ashes so to control the rate of burning


### The Results are not that accurate. We can improve the accuracy by using another approach in the next Part(Part 2)

# Part 2

In [8]:
from nltk.corpus import wordnet as wn
from nltk.tokenize import word_tokenize

In [9]:
def get_context(sentence, ambiguous_word):
    tokens = word_tokenize(sentence.lower())
    context = set(tokens)
    context.discard(ambiguous_word.lower())
    return context

def lesk_algorithm(sentence, ambiguous_word):
    # Get the context from i/p
    context = get_context(sentence, ambiguous_word)
    
    #count overlap for best match
    best_sense = None
    max_overlap = 0
    for synset in wn.synsets(ambiguous_word):
        # Tokenize the i/p sentence and get defination of the synset
        sentence_tokens = word_tokenize(synset.definition().lower())

        # Calculate the overlap between the context and the sentence tokens
        overlap = len(context.intersection(set(sentence_tokens)))

        # Update the best sense if the current overlap is higher
        if overlap > max_overlap:
            max_overlap = overlap
            best_sense = synset

    if best_sense is not None:
        return best_sense, best_sense.definition()
    return None, "No suitable sense found."

In [10]:
def get_result(sentence, ambiguous_word):
    best_sense, definition = lesk_algorithm(sentence, ambiguous_word)

    if best_sense:
        print(f"Ambiguous word: {ambiguous_word}")
        print(f"Best sense: {best_sense}")
        print(f"Definition: {definition}")
    else:
        print("No suitable sense found.")

In [11]:
sentence = "He went to the bank to deposit his money."
ambiguous_word = "bank"
get_result(sentence, ambiguous_word)

Ambiguous word: bank
Best sense: Synset('depository_financial_institution.n.01')
Definition: a financial institution that accepts deposits and channels the money into lending activities


In [12]:
sentence = "The river bank was full of dead fishes."
ambiguous_word = "bank"
get_result(sentence, ambiguous_word)

Ambiguous word: bank
Best sense: Synset('bank.n.01')
Definition: sloping land (especially the slope beside a body of water)


### We can see, that the accuracy is improved by using the approach in Part 2. We have used the overlapping between the context and wordnet synsets to improve the accuracy.

In [13]:
!jupyter nbconvert --to html LeaskAlgo.ipynb

[NbConvertApp] Converting notebook LeaskAlgo.ipynb to html
[NbConvertApp] Writing 292275 bytes to LeaskAlgo.html
