# WordNet Semantic Relationships Analysis

This notebook demonstrates how to use WordNet to identify various semantic relationships in text data:
- **Synonymy**: Words with similar meanings
- **Antonymy**: Words with opposite meanings
- **Hypernymy**: General-to-specific relationships (e.g., animal ‚Üí dog)
- **Hyponymy**: Specific-to-general relationships (e.g., dog ‚Üí animal)
- **Meronymy**: Part-whole relationships
- **Holonymy**: Whole-part relationships

In [1]:
# Install required packages
!pip install nltk -q

In [10]:
# Import libraries
import nltk
from nltk.corpus import wordnet as wn
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import string
from collections import defaultdict
import pandas as pd

# Download required NLTK data
nltk.download('wordnet', quiet=True)
nltk.download('omw-1.4', quiet=True)
nltk.download('punkt_tab', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)
nltk.download('stopwords', quiet=True)

print("Setup complete!")

Setup complete!


## 2. Basic WordNet Operations

In [3]:
# Get synsets (synonym sets) for a word
word = "happy"
synsets = wn.synsets(word)

print(f"Synsets for '{word}':")
for i, synset in enumerate(synsets, 1):
    print(f"{i}. {synset.name()}: {synset.definition()}")
    print(f"   Examples: {synset.examples()}")
    print()

Synsets for 'happy':
1. happy.a.01: enjoying or showing or marked by joy or pleasure
   Examples: ['a happy smile', 'spent many happy days on the beach', 'a happy marriage']

2. felicitous.s.02: marked by good fortune
   Examples: ['a felicitous life', 'a happy outcome']

3. glad.s.02: eagerly disposed to act or to be of service
   Examples: ['glad to help']

4. happy.s.04: well expressed and to the point
   Examples: ['a happy turn of phrase', 'a few well-chosen words']



## 3. Finding Synonyms

In [4]:
def get_synonyms(word):
    """
    Get all synonyms for a given word.
    """
    synonyms = set()

    for synset in wn.synsets(word):
        for lemma in synset.lemmas():
            # Replace underscores with spaces
            synonym = lemma.name().replace('_', ' ')
            if synonym.lower() != word.lower():
                synonyms.add(synonym)

    return sorted(list(synonyms))

# Example
word = "happy"
synonyms = get_synonyms(word)
print(f"Synonyms of '{word}':")
print(synonyms)

Synonyms of 'happy':
['felicitous', 'glad', 'well-chosen']


## 4. Finding Antonyms

In [5]:
def get_antonyms(word):
    """
    Get all antonyms for a given word.
    """
    antonyms = set()

    for synset in wn.synsets(word):
        for lemma in synset.lemmas():
            for antonym in lemma.antonyms():
                antonyms.add(antonym.name().replace('_', ' '))

    return sorted(list(antonyms))

# Example
word = "happy"
antonyms = get_antonyms(word)
print(f"Antonyms of '{word}':")
print(antonyms)

Antonyms of 'happy':
['unhappy']


## 5. Finding Hypernyms (General Terms)

In [6]:
def get_hypernyms(word, pos=None):
    """
    Get hypernyms (more general terms) for a word.
    pos: part of speech (wn.NOUN, wn.VERB, wn.ADJ, wn.ADV)
    """
    hypernyms = set()

    synsets = wn.synsets(word, pos=pos) if pos else wn.synsets(word)

    for synset in synsets:
        for hypernym in synset.hypernyms():
            for lemma in hypernym.lemmas():
                hypernyms.add(lemma.name().replace('_', ' '))

    return sorted(list(hypernyms))

# Example
word = "dog"
hypernyms = get_hypernyms(word, pos=wn.NOUN)
print(f"Hypernyms of '{word}' (more general terms):")
print(hypernyms)

Hypernyms of 'dog' (more general terms):
['blighter', 'bloke', 'canid', 'canine', 'catch', 'chap', 'cuss', 'disagreeable woman', 'domestic animal', 'domesticated animal', 'fella', 'feller', 'fellow', 'gent', 'lad', 'sausage', 'scoundrel', 'stop', 'support', 'unpleasant woman', 'villain']


## 6. Finding Hyponyms (Specific Terms)

In [7]:
def get_hyponyms(word, pos=None, max_results=20):
    """
    Get hyponyms (more specific terms) for a word.
    """
    hyponyms = set()

    synsets = wn.synsets(word, pos=pos) if pos else wn.synsets(word)

    for synset in synsets:
        for hyponym in synset.hyponyms():
            for lemma in hyponym.lemmas():
                hyponyms.add(lemma.name().replace('_', ' '))

    result = sorted(list(hyponyms))
    return result[:max_results] if max_results else result

# Example
word = "animal"
hyponyms = get_hyponyms(word, pos=wn.NOUN, max_results=20)
print(f"Hyponyms of '{word}' (more specific terms, limited to 20):")
print(hyponyms)

Hyponyms of 'animal' (more specific terms, limited to 20):
['acrodont', 'adult', 'biped', 'captive', 'chordate', 'conceptus', 'creepy-crawly', 'critter', 'darter', 'domestic animal', 'domesticated animal', 'ectotherm', 'embryo', 'feeder', 'female', 'fertilized egg', 'fictional animal', 'game', 'giant', 'herbivore']


## 7. Comprehensive Semantic Analysis Function

In [8]:
def analyze_word_semantics(word, pos=None):
    """
    Comprehensive semantic analysis of a word.
    """
    print(f"\n{'='*60}")
    print(f"Semantic Analysis for: '{word}'")
    print(f"{'='*60}\n")

    # Synsets
    synsets = wn.synsets(word, pos=pos) if pos else wn.synsets(word)
    print(f"Number of synsets: {len(synsets)}\n")

    if synsets:
        print("First synset definition:")
        print(f"  {synsets[0].definition()}\n")

    # Synonyms
    synonyms = get_synonyms(word)
    print(f"Synonyms ({len(synonyms)}):")
    print(f"  {', '.join(synonyms[:10])}{'...' if len(synonyms) > 10 else ''}\n")

    # Antonyms
    antonyms = get_antonyms(word)
    print(f"Antonyms ({len(antonyms)}):")
    print(f"  {', '.join(antonyms) if antonyms else 'None found'}\n")

    # Hypernyms
    hypernyms = get_hypernyms(word, pos=pos)
    print(f"Hypernyms - More general ({len(hypernyms)}):")
    print(f"  {', '.join(hypernyms[:10])}{'...' if len(hypernyms) > 10 else ''}\n")

    # Hyponyms
    hyponyms = get_hyponyms(word, pos=pos, max_results=10)
    print(f"Hyponyms - More specific (showing first 10):")
    print(f"  {', '.join(hyponyms) if hyponyms else 'None found'}\n")

    return {
        'word': word,
        'synsets_count': len(synsets),
        'synonyms': synonyms,
        'antonyms': antonyms,
        'hypernyms': hypernyms,
        'hyponyms': hyponyms
    }

# Example usage
result = analyze_word_semantics("car", pos=wn.NOUN)


Semantic Analysis for: 'car'

Number of synsets: 5

First synset definition:
  a motor vehicle with four wheels; usually propelled by an internal combustion engine

Synonyms (10):
  auto, automobile, cable car, elevator car, gondola, machine, motorcar, railcar, railroad car, railway car

Antonyms (0):
  None found

Hypernyms - More general (4):
  automotive vehicle, compartment, motor vehicle, wheeled vehicle

Hyponyms - More specific (showing first 10):
  Model T, S.U.V., SUV, Stanley Steamer, ambulance, baggage car, beach waggon, beach wagon, bus, cab



## 8. Analyzing Text Data

In [12]:
def analyze_text_semantics(text, top_n=10):
    """
    Analyze semantic relationships in a text.
    """
    # Tokenize and clean
    tokens = word_tokenize(text.lower())
    stop_words = set(stopwords.words('english'))

    # Filter out stopwords and punctuation
    words = [word for word in tokens
             if word not in stop_words
             and word not in string.punctuation
             and len(word) > 2]

    # Remove duplicates while preserving order
    unique_words = list(dict.fromkeys(words))

    results = []

    for word in unique_words[:top_n]:
        synsets = wn.synsets(word)
        if synsets:
            results.append({
                'word': word,
                'synonyms': get_synonyms(word)[:5],
                'antonyms': get_antonyms(word),
                'hypernyms': get_hypernyms(word)[:3]
            })

    return results

# Example text
sample_text = """
The quick brown fox jumps over the lazy dog.
The weather is beautiful today with bright sunshine.
The large elephant walked slowly through the dense forest.
"""

print("Analyzing text for semantic relationships...\n")
results = analyze_text_semantics(sample_text, top_n=8)

for item in results:
    print(f"\nWord: {item['word']}")
    print(f"  Synonyms: {', '.join(item['synonyms']) if item['synonyms'] else 'None'}")
    print(f"  Antonyms: {', '.join(item['antonyms']) if item['antonyms'] else 'None'}")
    print(f"  Hypernyms: {', '.join(item['hypernyms']) if item['hypernyms'] else 'None'}")

Analyzing text for semantic relationships...


Word: quick
  Synonyms: agile, fast, flying, immediate, nimble
  Antonyms: None
  Hypernyms: area, region

Word: brown
  Synonyms: Brown University, John Brown, Robert Brown, browned, brownish
  Antonyms: None
  Hypernyms: chromatic color, chromatic colour, color

Word: fox
  Synonyms: Charles James Fox, George Fox, bedevil, befuddle, confound
  Antonyms: None
  Hypernyms: Algonquian, Algonquian language, Algonquin

Word: jumps
  Synonyms: alternate, bound, chute, climb up, derail
  Antonyms: None
  Hypernyms: actuation, alter, appear

Word: lazy
  Synonyms: faineant, indolent, otiose, slothful, work-shy
  Antonyms: None
  Hypernyms: None

Word: dog
  Synonyms: Canis familiaris, andiron, blackguard, bounder, cad
  Antonyms: None
  Hypernyms: blighter, bloke, canid

Word: weather
  Synonyms: atmospheric condition, brave, brave out, conditions, endure
  Antonyms: None
  Hypernyms: angle, atmospheric phenomenon, crumble

Word: beautiful
  Syn

## 9. Creating a Semantic Relationships DataFrame

In [19]:
def create_semantic_dataframe(words_list):
    """
    Create a pandas DataFrame with semantic relationships for a list of words.
    """
    data = []

    for word in words_list:
        synsets = wn.synsets(word)
        if synsets:
            data.append({
                'Word': word,
                'Definition': synsets[0].definition(),
                'Synonyms': ', '.join(get_synonyms(word)[:5]),
                'Antonyms': ', '.join(get_antonyms(word)),
                'Hypernyms': ', '.join(get_hypernyms(word)[:3]),
                'Hyponyms': ', '.join(get_hyponyms(word, max_results=3))
            })

    return pd.DataFrame(data)

# Example
words = ['happy', 'sad', 'dog', 'car', 'beautiful', 'run', 'eat']
df = create_semantic_dataframe(words)
df.to_csv('wordnet.csv')

print("Semantic Relationships DataFrame:")
display(df)

Semantic Relationships DataFrame:


Unnamed: 0,Word,Definition,Synonyms,Antonyms,Hypernyms,Hyponyms
0,happy,enjoying or showing or marked by joy or pleasure,"felicitous, glad, well-chosen",unhappy,,
1,sad,experiencing or showing sorrow or unhappiness;...,"deplorable, distressing, lamentable, pitiful, ...",glad,,
2,dog,a member of the genus Canis (probably descende...,"Canis familiaris, andiron, blackguard, bounder...",,"blighter, bloke, canid","Belgian griffon, Brussels griffon, Great Pyrenees"
3,car,a motor vehicle with four wheels; usually prop...,"auto, automobile, cable car, elevator car, gon...",,"automotive vehicle, compartment, motor vehicle","Model T, S.U.V., SUV"
4,beautiful,delighting the senses or exciting intellectual...,,ugly,,
5,run,a score in baseball made by a runner touching ...,"be given, black market, bleed, break away, bunk","idle, malfunction","accompany, accomplish, action","MOT, MOT test, Ministry of Transportation test"
6,eat,take in solid food,"consume, corrode, deplete, eat on, eat up",,"consume, damage, drop","banquet, binge, bolt"


## 10. Finding Semantic Similarity

In [15]:
def semantic_similarity(word1, word2):
    """
    Calculate semantic similarity between two words using WordNet.
    Returns similarity score between 0 and 1.
    """
    synsets1 = wn.synsets(word1)
    synsets2 = wn.synsets(word2)

    if not synsets1 or not synsets2:
        return 0.0

    # Calculate maximum similarity between any pair of synsets
    max_similarity = 0.0

    for s1 in synsets1:
        for s2 in synsets2:
            similarity = s1.path_similarity(s2)
            if similarity and similarity > max_similarity:
                max_similarity = similarity

    return max_similarity

# Example comparisons
word_pairs = [
    ('dog', 'cat'),
    ('dog', 'car'),
    ('happy', 'joyful'),
    ('big', 'small'),
    ('car', 'automobile')
]

print("Semantic Similarity Scores (0-1, higher = more similar):\n")
for w1, w2 in word_pairs:
    score = semantic_similarity(w1, w2)
    print(f"{w1:12} <-> {w2:12} : {score:.3f}")

Semantic Similarity Scores (0-1, higher = more similar):

dog          <-> cat          : 0.200
dog          <-> car          : 0.143
happy        <-> joyful       : 0.333
big          <-> small        : 0.333
car          <-> automobile   : 1.000


## 11. Advanced: Hypernym Tree

In [16]:
def get_hypernym_tree(word, max_depth=5):
    """
    Get the hypernym tree (hierarchy) for a word.
    """
    synsets = wn.synsets(word)
    if not synsets:
        return None

    synset = synsets[0]  # Use first synset

    def climb_tree(synset, depth=0):
        if depth >= max_depth:
            return []

        hypernyms = synset.hypernyms()
        if not hypernyms:
            return [synset.name()]

        result = [synset.name()]
        for hypernym in hypernyms[:1]:  # Follow first hypernym
            result.extend(climb_tree(hypernym, depth + 1))

        return result

    return climb_tree(synset)

# Example
word = "dog"
tree = get_hypernym_tree(word)

print(f"Hypernym tree for '{word}' (from specific to general):\n")
if tree:
    for i, synset_name in enumerate(tree):
        indent = "  " * i
        synset = wn.synset(synset_name)
        print(f"{indent}{'‚îî‚îÄ' if i > 0 else ''} {synset_name}: {synset.definition()}")

Hypernym tree for 'dog' (from specific to general):

 dog.n.01: a member of the genus Canis (probably descended from the common wolf) that has been domesticated by man since prehistoric times; occurs in many breeds
  ‚îî‚îÄ domestic_animal.n.01: any of various animals that have been tamed and made fit for a human environment
    ‚îî‚îÄ animal.n.01: a living organism characterized by voluntary movement
      ‚îî‚îÄ organism.n.01: a living thing that has (or can develop) the ability to act or function independently
        ‚îî‚îÄ living_thing.n.01: a living (or once living) entity


## 12. Interactive Word Explorer

In [17]:
def explore_word(word):
    """
    Interactive exploration of a word's semantic relationships.
    """
    print(f"\n{'#'*70}")
    print(f"# Exploring: {word.upper()}")
    print(f"{'#'*70}\n")

    # Get all synsets
    synsets = wn.synsets(word)

    if not synsets:
        print(f"No WordNet entries found for '{word}'")
        return

    print(f"üìö Found {len(synsets)} different meanings\n")

    for i, synset in enumerate(synsets[:3], 1):  # Show first 3 meanings
        print(f"\n{i}. {synset.name()}")
        print(f"   Definition: {synset.definition()}")
        if synset.examples():
            print(f"   Example: {synset.examples()[0]}")

    print("\n" + "-"*70)

    # Semantic relationships
    analyze_word_semantics(word)

    # Hypernym tree
    print("\n" + "-"*70)
    print("\nüå≥ Hypernym Tree (Generalization Path):\n")
    tree = get_hypernym_tree(word, max_depth=6)
    if tree:
        for i, synset_name in enumerate(tree):
            indent = "  " * i
            synset = wn.synset(synset_name)
            print(f"{indent}{'‚îî‚îÄ' if i > 0 else '‚óè'} {synset.lemmas()[0].name()} ({synset_name.split('.')[1]})")

# Try it out!
explore_word("computer")


######################################################################
# Exploring: COMPUTER
######################################################################

üìö Found 2 different meanings


1. computer.n.01
   Definition: a machine for performing calculations automatically

2. calculator.n.01
   Definition: an expert at calculation (or at operating calculating machines)

----------------------------------------------------------------------

Semantic Analysis for: 'computer'

Number of synsets: 2

First synset definition:
  a machine for performing calculations automatically

Synonyms (9):
  calculator, computing device, computing machine, data processor, electronic computer, estimator, figurer, information processing system, reckoner

Antonyms (0):
  None found

Hypernyms - More general (2):
  expert, machine

Hyponyms - More specific (showing first 10):
  Turing machine, actuary, adder, analog computer, analogue computer, client, digital computer, guest, home computer, host