In [37]:
import nltk
from nltk.corpus import wordnet as wn
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize

## Preprocessing input for Lesk's Algorithm

In [38]:
def pre_proccess(sentence):
    processed_sent = []
    # tokenize words of the sentence
    words = word_tokenize(sentence)
    # Lemmatize words to get their root form
    lemmatizer = WordNetLemmatizer()
    # Get stop words
    stop_words = set(stopwords.words("english"))

    # Remove stopwards and add lemmatized root form of words
    for w in words:
        if w not in stop_words:
                processed_sent.append(lemmatizer.lemmatize(w))

    return processed_sent

## Get Context bag consisting of all senses of the context words

In [39]:
def lesk_context_bag(context_sentence, word):
    context_bag_list = []
    context_sentence.remove(word)
    for w in context_sentence:
        for syn in wn.synsets(w):
            gloss = pre_proccess(str(syn.definition()))
            for w_g in gloss:
                context_bag_list.append(w_g)
    return context_bag_list

# lesk_context_bag(sent, word)

## Get Lesk score for each Sense of the Ambiguous word

In [40]:
def compute_lesk_score(context_sentence, word):
    lesk_scores = {}
    context_bag = lesk_context_bag(context_sentence, word)
    for sense in wn.synsets(word):
        count = 0
        for w_gloss in sense.definition().split():
            for w in context_bag:
                if w==w_gloss:
                    count += 1
        lesk_scores[sense.name()] = count
    return lesk_scores

# Get the most Apt Sense of the Ambiguous word 
# Using Lesk Score as metric

In [41]:
def lesk(context_sentence, word):
    lesk_scores = compute_lesk_score(context_sentence, word)
    max_ = 0
    lesk_prediction = None
    for s in lesk_scores:
        if max_ < lesk_scores[s]:
            lesk_prediction = s
            max_ = lesk_scores[s]
    
    return lesk_prediction

# Walker Algorithm

In [99]:
def walker(sent, word, method = "path_similarity"):
    max_ = 0
    predicted = None
    for sense in wn.synsets(word):
        score = 0
        for w in sent:
            for context_sense in wn.synsets(w):
#                 print(sense.name(), context_sense.name())
                try:
                    if method == "path_similarity":
                        score += wn.synset(sense.name()).path_similarity(wn.synset(context_sense.name()))
                    elif method == "lch_similarity":
                        score += wn.synset(sense.name()).lch_similarity(wn.synset(context_sense.name()))
                    elif method == "wup_similarity":
                        score += wn.synset(sense.name()).wup_similarity(wn.synset(context_sense.name()))
                except:
                    continue
        if max_ < score:
            max_ = score
            predicted = sense
    return predicted

## Get Input

In [100]:
# sent = input("Enter Sentence")
# ambi_word = input("Enter Ambiguous word")

sentence = "Soham drives his bicycle on the bank of the road"#"Chris gets 5 percent interest on his money in the bank"
ambiguous_word = "bank"

# Test

In [101]:
word = WordNetLemmatizer().lemmatize(ambiguous_word)
# print(word)
sent = pre_proccess(sentence)
# print(sent)

prediction = lesk(sent, word)
w = wn.synset(prediction)
w.name(), w.definition()
# compute_lesk_score(sent, word)

('bank.n.07',
 'a slope in the turn of a road or track; the outside is higher than the inside in order to reduce the effects of centrifugal force')

## Test for Walkers algorithm
The Algorithm works better with tagged data

In [107]:
w = walker(sent, word, method="wup_similarity")
w.name(), w.definition()

('bank.v.05', 'be in the banking business')