In [1]:
from sklearn.decomposition import NMF
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import mean_squared_error
import numpy as np

# Sample documents
documents = [
    "The cat sat on the mat.",
    "Dogs are loyal and friendly.",
    "Cats and dogs can be good pets.",
    "The dog chased the ball in the park.",
    "I love my pet cat.",
    "Pets make people happy."
]

# 1. TF-IDF Vectorization
vectorizer = TfidfVectorizer(stop_words='english')
tfidf = vectorizer.fit_transform(documents)

# 2. NMF for Topic Modeling
nmf_model = NMF(n_components=2, random_state=42)
W = nmf_model.fit_transform(tfidf)  # Document-topic matrix
H = nmf_model.components_           # Topic-word matrix

# 3. Display Topics
feature_names = vectorizer.get_feature_names_out()
n_top_words = 5
print("Topics discovered using NMF:\n")
for topic_idx, topic in enumerate(H):
    top_words = [feature_names[i] for i in topic.argsort()[:-n_top_words - 1:-1]]
    print(f"Topic {topic_idx + 1}: {', '.join(top_words)}")

# 4. Reconstruction Error (approximation quality)
reconstructed = np.dot(W, H)
error = mean_squared_error(tfidf.toarray(), reconstructed)
print(f"\nReconstruction Error (MSE): {error:.6f}")

Topics discovered using NMF:

Topic 1: dogs, pets, cats, good, friendly
Topic 2: cat, sat, love, pet, mat

Reconstruction Error (MSE): 0.031970


In [2]:
import nltk
from nltk.corpus import wordnet as wn
from nltk.wsd import lesk
from nltk.tokenize import word_tokenize

nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt')

# Example sentence with ambiguous word: "bank"
sentence = "He sat on the bank of the river and watched the water flow."
tokens = word_tokenize(sentence)

# Apply Lesk Algorithm
sense = lesk(tokens, 'bank')
print("\nWord Sense Disambiguation using Lesk Algorithm:")
print(f"Sentence: {sentence}")
print(f"Disambiguated Sense: {sense}")
print(f"Definition: {sense.definition() if sense else 'No sense found'}")

[nltk_data] Downloading package wordnet to C:\Users\attar/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to C:\Users\attar/nltk_data...
[nltk_data] Downloading package punkt to C:\Users\attar/nltk_data...
[nltk_data]   Package punkt is already up-to-date!



Word Sense Disambiguation using Lesk Algorithm:
Sentence: He sat on the bank of the river and watched the water flow.
Disambiguated Sense: Synset('depository_financial_institution.n.01')
Definition: a financial institution that accepts deposits and channels the money into lending activities
