In [None]:

import nltk
from transformers import BertTokenizer, AdamW, BertForSequenceClassification
from torch.utils.data import DataLoader, Dataset
import torch
from sklearn.metrics import accuracy_score
nltk.download('punkt_tab')  # This line downloads the necessary data
from nltk.corpus import wordnet as wn
from nltk.corpus import stopwords
from nltk.wsd import lesk
from sklearn.metrics import accuracy_score
from transformers import pipeline
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
nltk.download('wordnet')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
nltk.download('averaged_perceptron_tagger_eng')
from nltk.tag import pos_tag


[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!


In [None]:
'''
@author: jcheung

Developed for Python 2. Automatically converted to Python 3; may result in bugs.
'''
import xml.etree.cElementTree as ET
import codecs

class WSDInstance:
    def __init__(self, my_id, lemma, context, index, pos=None):
        self.id = my_id
        self.lemma = lemma
        self.context = context
        self.index = index
        self.pos = pos  # Store POS information
    def __str__(self):
        return '%s\t%s\t%s\t%d\t%s' % (self.id, self.lemma, ' '.join(self.context), self.index, self.pos)

def load_instances(f):
    '''
    Load two lists of cases to perform WSD on. The structure that is returned is a dict, where
    the keys are the ids, and the values are instances of WSDInstance.
    '''
    tree = ET.parse(f)
    root = tree.getroot()

    dev_instances = {}
    test_instances = {}

    for text in root:
        if text.attrib['id'].startswith('d001'):
            instances = dev_instances
        else:
            instances = test_instances
        for sentence in text:
            # print(sentence)
            # construct sentence context
            context = [to_ascii(el.attrib['lemma']) for el in sentence]
            # print(context)
            for i, el in enumerate(sentence):
                if el.tag == 'instance':
                    my_id = el.attrib['id']
                    lemma = to_ascii(el.attrib['lemma'])
                    pos = pos_to_wordnet(el.attrib['pos'])
                    instances[my_id] = WSDInstance(my_id, lemma, context, i, pos)
    return dev_instances, test_instances

def load_key(f):
    '''
    Load the solutions as dicts.
    Key is the id
    Value is the list of correct sense keys.
    '''
    dev_key = {}
    test_key = {}
    for line in open(f):
        if len(line) <= 1: continue
        #print (line)
        doc, my_id, sense_key = line.strip().split(' ', 2)
        if doc == 'd001':
            dev_key[my_id] = sense_key.split()
        else:
            test_key[my_id] = sense_key.split()
    return dev_key, test_key

def to_ascii(s):
    # remove all non-ascii characters
    return codecs.encode(s, 'ascii', 'ignore').decode()

def pos_to_wordnet(tag):
    """
    Convert POS tags from input data to WordNet POS tags.
    """
    if tag.startswith('NN'):  # Noun tags
        return wn.NOUN
    elif tag.startswith('VB'):  # Verb tags
        return wn.VERB
    elif tag.startswith('JJ'):  # Adjective tags
        return wn.ADJ
    elif tag.startswith('RB'):  # Adverb tags
        return wn.ADV
    else:
        return None  # Tags not used in WordNet synsets

data_f = 'multilingual-all-words.en.xml'
key_f = 'wordnet.en.key'
dev_instances, test_instances = load_instances(data_f)
dev_key, test_key = load_key(key_f)

# IMPORTANT: keys contain fewer entries than the instances; need to remove them
dev_instances = {k:v for (k,v) in dev_instances.items() if k in dev_key}
test_instances = {k:v for (k,v) in test_instances.items() if k in test_key}

# read to use here
print(len(dev_instances)) # number of dev instances
print(len(test_instances)) # number of test instances


194
1450


# Best Preprocessing Technique

In [None]:

# === Preprocessing Variants === #
def synset_to_sense_key(synset):
    return synset.lemmas()[0].key()

def preprocess_baseline(context):
    """
    Baseline preprocessing: lemmatization + stopword removal.
    """
    stop_words = set(stopwords.words("english"))
    lemmatizer = WordNetLemmatizer()
    return [
        lemmatizer.lemmatize(word.lower())
        for word in word_tokenize(" ".join(context))
        if word.lower() not in stop_words and word.isalpha()
    ]

def preprocess_pos_filtered(context):
    """
    POS-filtered preprocessing: only include nouns, verbs, adjectives, and adverbs.
    """
    lemmatizer = WordNetLemmatizer()
    pos_tags = pos_tag(context)
    filtered_context = [
        lemmatizer.lemmatize(word.lower())
        for word, tag in pos_tags
        if tag.startswith(("N", "V", "J", "R")) and word.isalpha()
    ]
    return filtered_context

def preprocess_extended_context(context, lemma):
    """
    Extended context preprocessing: include synonyms and hypernyms of context words.
    """
    lemmatizer = WordNetLemmatizer()
    stop_words = set(stopwords.words("english"))
    context = [
        lemmatizer.lemmatize(word.lower())
        for word in word_tokenize(" ".join(context))
        if word.lower() not in stop_words and word.isalpha()
    ]

    synonyms, hypernyms = set(), set()
    for word in context:
        for synset in wn.synsets(word):
            synonyms.update([lemma.name() for lemma in synset.lemmas()])
            hypernyms.update([hypernym.name().split(".")[0] for hypernym in synset.hypernyms()])

    return list(context) + list(synonyms) + list(hypernyms)

# Adjusted Lesk Algorithm to handle multiple preprocessing variants
def run_lesk_with_preprocessing(preprocessing_function, instances, gold_key):
    """
    Run Lesk with the given preprocessing function.
    """
    predictions = {}
    for my_id, instance in instances.items():
        # Handle preprocessing functions with multiple arguments
        if preprocessing_function == preprocess_extended_context:
            context = preprocessing_function(instance.context, instance.lemma)
        else:
            context = preprocessing_function(instance.context)

        lesk_result = lesk(context, instance.lemma, instance.pos)
        if lesk_result:
            predictions[my_id] = synset_to_sense_key(lesk_result)
    return predictions

def evaluate_lesk_preprocessing(preprocessing_function, instances, gold_key):
    """
    Evaluate Lesk with a specific preprocessing function.
    """
    predictions = run_lesk_with_preprocessing(preprocessing_function, instances, gold_key)
    gold_labels = [gold_key[k][0] for k in predictions.keys() if k in gold_key]
    pred_labels = [predictions[k] for k in predictions.keys() if k in gold_key]
    return accuracy_score(gold_labels, pred_labels)

# === Run Experiments === #
baseline_lesk_accuracy = evaluate_lesk_preprocessing(preprocess_baseline, dev_instances, dev_key)
pos_filtered_lesk_accuracy = evaluate_lesk_preprocessing(preprocess_pos_filtered, dev_instances, dev_key)
extended_context_lesk_accuracy = evaluate_lesk_preprocessing(preprocess_extended_context, dev_instances, dev_key)

print(f"Baseline Lesk Accuracy: {baseline_lesk_accuracy * 100:.2f}%")
print(f"POS-Filtered Lesk Accuracy: {pos_filtered_lesk_accuracy * 100:.2f}%")
print(f"Extended Context Lesk Accuracy: {extended_context_lesk_accuracy * 100:.2f}%")

Baseline Lesk Accuracy: 25.26%
POS-Filtered Lesk Accuracy: 25.77%
Extended Context Lesk Accuracy: 25.77%


# MFS and Lesk

In [None]:
def preprocess_context(context, lemma):
    processed_context = preprocess_extended_context(context, lemma)
    return processed_context

def most_frequent_sense(lemma):
    synsets = wn.synsets(lemma)
    return synsets[0] if synsets else None


def evaluate(predictions, gold_key):
    gold_labels = [gold_key[k][0] for k in predictions.keys() if k in gold_key]
    pred_labels = [predictions[k] for k in predictions.keys() if k in gold_key]
    return accuracy_score(gold_labels, pred_labels)

def run_mfs_baseline(instances, gold_key):
    predictions = {}
    for my_id, instance in instances.items():
        mfs = most_frequent_sense(instance.lemma)
        if mfs:
            predictions[my_id] = synset_to_sense_key(mfs)
    return predictions


def run_lesk_algorithm(instances, gold_key):
    predictions = {}
    for my_id, instance in instances.items():
        context = preprocess_context(instance.context, instance.lemma)
        lesk_result = lesk(context, instance.lemma, instance.pos)
        if lesk_result:
            predictions[my_id] = synset_to_sense_key(lesk_result)
    return predictions



# Evaluate Most Frequent Sense Baseline
mfs_predictions = run_mfs_baseline(test_instances, test_key)
# print first 5 entries  in mfs predictions
print(list(mfs_predictions.items())[:5])
print(list(test_key.items())[:5])
# print first entry in train key

mfs_accuracy = evaluate(mfs_predictions, test_key)
print(f'Most Frequent Sense Baseline Accuracy: {mfs_accuracy * 100:.2f}%')

# Evaluate Lesk's Algorithm
lesk_predictions = run_lesk_algorithm(test_instances, test_key)
lesk_accuracy = evaluate(lesk_predictions, test_key)
print(f'Lesk Algorithm Accuracy: {lesk_accuracy * 100:.2f}%')


[('d002.s001.t001', 'victory%1:11:00::'), ('d002.s001.t002', 'israel%1:15:00::'), ('d002.s002.t001', 'victory%1:11:00::'), ('d002.s002.t002', 'visit%1:04:02::'), ('d002.s002.t005', 'team%1:14:00::')]
[('d002.s001.t001', ['victory%1:11:00::']), ('d002.s001.t002', ['israel%1:15:00::']), ('d002.s002.t001', ['victory%1:11:00::']), ('d002.s002.t002', ['visit%1:04:02::']), ('d002.s002.t005', ['team%1:14:00::'])]
Most Frequent Sense Baseline Accuracy: 49.17%
Lesk Algorithm Accuracy: 29.72%


# Extended LESK

In [None]:
def run_extended_lesk_algorithm(instances, gold_key):
    predictions = {}
    for my_id, instance in instances.items():
        context = preprocess_context(instance.context, instance.lemma)
        max_overlap = 0
        best_synset = None

        for synset in wn.synsets(instance.lemma, pos=instance.pos):
            gloss = synset.definition().split()
            examples = [word for ex in synset.examples() for word in ex.split()]
            hypernyms = [hypernym.name().split(".")[0] for hypernym in synset.hypernyms()]
            related_words = gloss + examples + hypernyms

            overlap = len(set(context) & set(related_words))
            if overlap > max_overlap:
                max_overlap = overlap
                best_synset = synset

        if best_synset:
            predictions[my_id] = synset_to_sense_key(best_synset)

    return predictions

  # Extended Lesk
lesk_predictions = run_extended_lesk_algorithm(test_instances, test_key)
lesk_accuracy = evaluate(lesk_predictions, test_key)
print(f"Extended Lesk Algorithm Accuracy: {lesk_accuracy * 100:.2f}%")


Extended Lesk Algorithm Accuracy: 35.15%


# Word2vec and Hyperparameter Tuning on DEV set

In [None]:
from gensim.models import Word2Vec
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# === Training Word2Vec Model === #
# Use your dataset's contexts to train Word2Vec
sentences = [instance.context for instance in dev_instances.values()]
word2vec_model = Word2Vec(sentences, vector_size=100, window=5, min_count=1, workers=4)

def get_word_embedding(word, model):
    """
    Get the Word2Vec embedding for a word.
    """
    if word in model.wv:
        return model.wv[word]
    return np.zeros(model.vector_size)  # Return zero vector if word is not in vocabulary

def get_context_embedding(context, model):
    """
    Get the average Word2Vec embedding for a context.
    """
    embeddings = [get_word_embedding(word, model) for word in context if word in model.wv]
    if embeddings:
        return np.mean(embeddings, axis=0)
    return np.zeros(model.vector_size)  # Return zero vector if no words in vocabulary

# === Word2Vec-Based Prediction === #
def word2vec_wsd(instance, model):
    """
    Predict the word sense for a given instance using Word2Vec.
    """
    context_embedding = get_context_embedding(instance.context, model)
    sense_scores = {}

    # Compute similarity between context and each sense definition
    for synset in wn.synsets(instance.lemma):
        definition_embedding = get_context_embedding(synset.definition().split(), model)
        similarity = cosine_similarity([context_embedding], [definition_embedding])[0][0]
        sense_scores[synset.lemmas()[0].key()] = similarity

    # Return the sense with the highest similarity
    return max(sense_scores, key=sense_scores.get) if sense_scores else None

# === Evaluate Word2Vec Method === #
def evaluate_word2vec_wsd(instances, gold_key, model):
    predictions = {}
    for my_id, instance in instances.items():
        predicted_sense = word2vec_wsd(instance, model)
        if predicted_sense:
            predictions[my_id] = predicted_sense

    # Evaluate accuracy
    gold_labels = [gold_key[k][0] for k in predictions.keys() if k in gold_key]
    pred_labels = [predictions[k] for k in predictions.keys() if k in gold_key]
    return accuracy_score(gold_labels, pred_labels)

# Evaluate on the test set
word2vec_accuracy = evaluate_word2vec_wsd(test_instances, test_key, word2vec_model)
print(f"Word2Vec WSD Accuracy: {word2vec_accuracy * 100:.2f}%")

Word2Vec WSD Accuracy: 27.59%


In [None]:
word2vec_model2 = Word2Vec(sentences, vector_size=100, window=10, min_count=1, workers=4)
word2vec_accuracy = evaluate_word2vec_wsd(test_instances, test_key, word2vec_model2)
print(f"Word2Vec WSD Accuracy: {word2vec_accuracy * 100:.2f}%")

Word2Vec WSD Accuracy: 27.79%


# BERT

In [None]:
from transformers import BertTokenizer, BertModel
import torch
from scipy.spatial.distance import cosine
from sklearn.metrics import accuracy_score
from nltk.corpus import wordnet as wn

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load pretrained BERT model and tokenizer, move model to GPU
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertModel.from_pretrained('bert-base-uncased').to(device)

def get_embedding(text, model, tokenizer):
    """
    Get the embedding for a given text using a pretrained BERT model.
    """
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).squeeze().cpu()

def bert_wsd(instance, model, tokenizer):
    """
    Perform WSD using BERT embeddings.
    """
    context = ' '.join(instance.context)  # Combine the context words into a single string
    context_embedding = get_embedding(context, model, tokenizer)
    sense_scores = {}

    # Use WordNet synsets to get senses and their definitions
    for synset in wn.synsets(instance.lemma):
        definition = synset.definition()
        sense_embedding = get_embedding(definition, model, tokenizer)
        similarity = 1 - cosine(context_embedding, sense_embedding)
        sense_scores[synset.lemmas()[0].key()] = similarity

    return max(sense_scores, key=sense_scores.get) if sense_scores else None

def evaluate_bert_wsd(instances, gold_key, model, tokenizer):
    """
    Evaluate the BERT-based WSD method on the dataset.
    """
    predictions = {}
    for my_id, instance in instances.items():
        predicted_sense = bert_wsd(instance, model, tokenizer)
        if predicted_sense:
            predictions[my_id] = predicted_sense

    # Debug: Print sample predictions
    for my_id in list(predictions.keys())[5:10]:
        print(f"Instance ID: {my_id}")
        print(f"Predicted: {predictions[my_id]}")
        print(f"Gold: {gold_key.get(my_id, None)}")
        print()

    # Evaluate accuracy
    gold_labels = [gold_key[k][0] for k in predictions.keys() if k in gold_key]
    pred_labels = [predictions[k] for k in predictions.keys() if k in gold_key]
    return accuracy_score(gold_labels, pred_labels)

# Evaluate the BERT method on the test instances
bert_accuracy = evaluate_bert_wsd(test_instances, test_key, bert_model, bert_tokenizer)
print(f"BERT WSD Accuracy on Test Set (GPU): {bert_accuracy * 100:.2f}%")

Using device: cuda
Instance ID: d002.s001.t001
Predicted: victory%1:11:00::
Gold: ['victory%1:11:00::']

Instance ID: d002.s001.t002
Predicted: israel%1:15:00::
Gold: ['israel%1:15:00::']

Instance ID: d002.s002.t001
Predicted: victory%1:11:00::
Gold: ['victory%1:11:00::']

Instance ID: d002.s002.t002
Predicted: visit%1:14:00::
Gold: ['visit%1:04:02::']

Instance ID: d002.s002.t005
Predicted: team%1:14:01::
Gold: ['team%1:14:00::']

BERT WSD Accuracy on Test Set (GPU): 35.86%


In [None]:
from transformers import BertTokenizer, BertModel
import torch
from nltk.corpus import wordnet as wn
from sklearn.metrics import accuracy_score
from scipy.spatial.distance import cosine

# Check if GPU is available and set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device used: {device}")

# Initialize BERT tokenizer and model
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertModel.from_pretrained('bert-base-uncased').to(device)

def compute_context_embedding(context, target_word):
    """
    Compute contextualized embedding for a target word using BERT.
    """
    # Prepare tokenized input from context
    inputs = bert_tokenizer(' '.join(context), return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)

    with torch.no_grad():
        outputs = bert_model(**inputs)
        embeddings = outputs.last_hidden_state

    # Identify the index of the target word in the tokenized input
    target_token_ids = bert_tokenizer.encode(target_word, add_special_tokens=False)
    target_index = inputs['input_ids'][0].tolist().index(target_token_ids[0])

    # Return the flattened contextualized embedding for the target word
    return embeddings[0, target_index, :].view(-1).cpu().numpy()

def compute_synset_embeddings(lemma):
    """
    Generate embeddings for all synsets of a lemma using their definitions.
    """
    embeddings = {}
    for synset in wn.synsets(lemma):
        key = synset.lemmas()[0].key()
        definition_embedding = compute_definition_embedding(synset.definition())
        embeddings[key] = definition_embedding
    return embeddings

def compute_definition_embedding(text):
    """
    Compute the embedding for a text (definition) using BERT.
    """
    inputs = bert_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128).to(device)
    with torch.no_grad():
        outputs = bert_model(**inputs)
        return outputs.last_hidden_state.mean(dim=1).view(-1).cpu().numpy()

def predict_word_sense(context, target_word):
    """
    Predict the sense of a target word based on the closest match to its contextualized embedding.
    """
    # Compute the embedding for the target word in its context
    target_embedding = compute_context_embedding(context, target_word)

    # Compute the embeddings for the senses of the target word
    sense_embeddings = compute_synset_embeddings(target_word)

    # Compute cosine similarity and find the closest match
    similarity_scores = {
        sense_key: 1 - cosine(target_embedding, embedding)
        for sense_key, embedding in sense_embeddings.items()
    }
    return max(similarity_scores, key=similarity_scores.get, default=None)

def evaluate_model(instances, gold_key):
    """
    Evaluate the WSD model on test instances.
    """
    predictions = {}
    for instance_id, instance in instances.items():
        context = instance.context
        target_word = instance.lemma
        predicted_sense = predict_word_sense(context, target_word)
        predictions[instance_id] = predicted_sense

    gold_labels = [gold_key[k][0] for k in predictions if k in gold_key]
    predicted_labels = [predictions[k] for k in predictions if k in gold_key]
    return accuracy_score(gold_labels, predicted_labels)

# Perform evaluation
wsd_accuracy = evaluate_model(test_instances, test_key)
print(f"Word Sense Disambiguation Model Accuracy: {wsd_accuracy * 100:.2f}%")
