In [None]:
! pip install nltk



In [None]:
! pip install transformers torch nltk



In [None]:
import nltk
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt')
nltk.download('punkt_tab')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [None]:
import torch
from transformers import BertTokenizer, BertModel
from nltk.corpus import wordnet as wn
from nltk.tokenize import word_tokenize
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Load BERT
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

def embed_text(text):
    """Embed text using BERT CLS token representation."""
    inputs = tokenizer(text, return_tensors='pt', truncation=True, max_length=128)
    with torch.no_grad():
        outputs = model(**inputs)
    # CLS token is the first token's embedding
    return outputs.last_hidden_state[0, 0].numpy()

def get_sense_embeddings(senses):
    """Compute embeddings for all sense definitions + examples."""
    embeddings = []
    for sense in senses:
        gloss = sense.definition()
        examples = ' '.join(sense.examples())
        text = gloss + ' ' + examples
        embeddings.append(embed_text(text))
    return np.array(embeddings)

def disambiguate(sentence, word):
    tokens = word_tokenize(sentence)
    senses = wn.synsets(word)
    if not senses:
        return f"No senses found for the word '{word}'."

    # Embed sentence context
    context_emb = embed_text(sentence)

    # Embed senses definitions
    sense_embs = get_sense_embeddings(senses)

    # Compute cosine similarity between context and each sense
    sims = cosine_similarity([context_emb], sense_embs)[0]

    # Pick the best sense
    best_idx = sims.argmax()
    best_sense = senses[best_idx]

    result = {
        "word": word,
        "sense_name": best_sense.name(),
        "definition": best_sense.definition(),
        "examples": best_sense.examples(),
        "similarity_score": sims[best_idx]
    }
    return result

# Interactive loop
if __name__ == "__main__":
    print("Advanced WSD with BERT embeddings (type 'exit' to quit)")
    while True:
        sentence = input("\nEnter a sentence: ")
        if sentence.lower() == 'exit':
            break
        word = input("Enter the ambiguous word to disambiguate: ")

        output = disambiguate(sentence, word)
        if isinstance(output, str):
            print(output)
        else:
            print(f"\nWord: {output['word']}")
            print(f"Best sense: {output['sense_name']}")
            print(f"Definition: {output['definition']}")
            print(f"Examples: {output['examples']}")
            print(f"Similarity Score: {output['similarity_score']:.4f}")


Advanced WSD with BERT embeddings (type 'exit' to quit)

Enter a sentence: He went to the bank to withdraw money.
Enter the ambiguous word to disambiguate: bank

Word: bank
Best sense: savings_bank.n.02
Definition: a container (usually with a slot in the top) for keeping money at home
Examples: ['the coin bank was empty']
Similarity Score: 0.8633

Enter a sentence: She went to the **bat** to buy some equipment for the game.
Enter the ambiguous word to disambiguate: bat

Word: bat
Best sense: cream.v.02
Definition: beat thoroughly and conclusively in a competition or fight
Examples: ['We licked the other team on Sunday!']
Similarity Score: 0.8449

Enter a sentence: quit
Enter the ambiguous word to disambiguate: exit

Word: exit
Best sense: exit.v.02
Definition: lose the lead
Examples: []
Similarity Score: 0.9217

Enter a sentence: exit
