In [1]:
import nltk
nltk.download('wordnet')
nltk.download('omw')

[nltk_data] Downloading package wordnet to /home/malyvsen/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw to /home/malyvsen/nltk_data...
[nltk_data]   Package omw is already up-to-date!


True

In [2]:
from nltk.corpus import wordnet
from nltk import ngrams

In [3]:
def synset_popularity(synset):
    return sum(lemma.count() for lemma in synset.lemmas())

synset_popularity(wordnet.synsets('cat')[0])

18

In [4]:
def contains(parent, child):
    return parent in child.closure(lambda x: x.hypernyms())

contains(wordnet.synsets('animal')[0], wordnet.synsets('human')[0])

True

In [5]:
def synset_word(synset):
    return synset.lemmas()[0].name().replace('_', ' ')

synset_word(wordnet.synsets('month')[0])

'calendar month'

In [6]:
class Node:
    def __init__(self, synset, children=None):
        self.synset = synset
        self.children = [] if children is None else children
    
    def __str__(self, level=0):
        indent = ' ' * (level - 1) * 2
        if level > 0:
            indent += '|-'
        word = synset_word(self.synset)
        return indent + word + '\n' + ''.join(child.__str__(level + 1) for child in self.children)

In [7]:
def build_tree(root, synset=None):
    update_root = synset is not None and synset_popularity(synset) > 0
    if update_root:
        new_node = Node(synset)
        root.children.append(new_node)
        root = new_node
    if synset is None:
        synset = root.synset
    for subset in synset.hyponyms():
        build_tree(root, subset)
    if update_root:
        root.children.sort(key=lambda child: synset_popularity(child.synset), reverse=True)
    return root

print(build_tree(Node(wordnet.synsets('alcohol')[0])))

alcohol
|-beer
|-liquor
  |-whiskey
    |-bourbon
  |-brandy
    |-applejack
  |-bitters
  |-bathtub gin
|-daiquiri
|-martini
|-Scotch and soda
|-hot toddy
|-wine
  |-champagne
  |-sherry
  |-port
  |-vermouth
  |-vintage



In [8]:
root_noun = wordnet.synsets('cat')[0].root_hypernyms()[0]
root_noun

Synset('entity.n.01')

In [9]:
noun_tree = build_tree(Node(root_noun))
with open('./english_tree.txt', 'w') as file:
    file.write(str(noun_tree))