In [2]:
import pandas

text_file = open('terms_sample_100.txt', 'r')

terms = text_file.readlines()
cleaned_terms = [x.rstrip() for x in terms]


#print (cleaned_terms)

Checking if word tokens from the list are present in wordnet:

In [3]:
from nltk.corpus import wordnet as wn
wn_lemmas = set(wn.all_lemma_names())
count_present = 0
count_missing = 0

for term in cleaned_terms:
    if term in wn_lemmas:
#        print (term, " is in WordNet")
        #syn_sets = [wn.synsets(cleaned_term) for cleaned_term in cleaned_terms]
        count_present+=1
    else:
#        print (term, " is not in WordNet")
        count_missing+=1

        
print ("In wordNet: ", count_present, "not in wordnet: ", count_missing )

In wordNet:  60 not in wordnet:  40


In [4]:

lower_present = 0
lower_missing = 0
lowercased_terms = [x.lower() for x in cleaned_terms]

for term in lowercased_terms:
    if term in wn_lemmas:
#        print (term, " is in WordNet")
        #syn_sets = [wn.synsets(cleaned_term) for cleaned_term in cleaned_terms]
        lower_present+=1
    else:
#        print (term, " is not in WordNet")
        lower_missing+=1
print ("In wordNet: ", lower_present, "not in wordnet: ", lower_missing )    


In wordNet:  60 not in wordnet:  40


Many are missing, we need to lemmatize words in the list, mostly a copy-paste from [here](https://rustyonrampage.github.io/text-mining/2017/11/23/stemming-and-lemmatization-with-python-and-nltk.html)

In [5]:
from nltk.corpus import wordnet as wn
from nltk.stem.wordnet import WordNetLemmatizer
from collections import Counter 

wnl = WordNetLemmatizer()

lemmatized_terms = []

def get_pos( word ):
    w_synsets = wn.synsets(word)

    pos_counts = Counter()
    pos_counts["n"] = len(  [ item for item in w_synsets if item.pos()=="n"]  )
    pos_counts["v"] = len(  [ item for item in w_synsets if item.pos()=="v"]  )
    pos_counts["a"] = len(  [ item for item in w_synsets if item.pos()=="a"]  )
    pos_counts["r"] = len(  [ item for item in w_synsets if item.pos()=="r"]  )
    
    most_common_pos_list = pos_counts.most_common(3)
    return most_common_pos_list[0][0] # first indexer for getting the top POS from l

for word in lowercased_terms:
    word.lower()
    lemma = wnl.lemmatize( word, get_pos(word) )
    lemmatized_terms.append(lemma)
 



Now let us search in a lemmatized version of the list (with the help of [this](https://stackoverflow.com/questions/48805672/printing-synsets-of-words-in-a-list-in-python) ):

In [6]:
wn_lemmas = set(wn.all_lemma_names())
count_present = 0
count_missing = 0

def get_synsets(word_term):
    synsets = []
    definitions = []
    #hypernyms = []
    #hyponyms = []
    
    syns = wn.synsets(word_term)
    word = syns[0].lemmas()[0].name()
    definition = syns[0].definition()
    examples = syns[0].examples()
    synonyms = []
    antonyms = []
    
    hypo = lambda s: s.hyponyms()
    hyper = lambda s: s.hypernyms()
    
    
    for syn in wn.synsets(word_term):
        hyponyms = list(syn.closure(hypo))
        hypernyms = list(syn.closure(hyper))
        
        for l in syn.lemmas():     
            synonyms.append(l.name())
            if l.antonyms():
                antonyms.append(l.antonyms()[0].name())
    
    return definition, examples, set(synonyms), set(antonyms), hyponyms, hypernyms

for term in lemmatized_terms:
    if term in wn_lemmas:
        print ("============", term, "==========")
        #syn_sets = [wn.synsets(cleaned_term) for cleaned_term in cleaned_terms]
        count_present+=1        
        defs, examples, set_synonyms, set_antonyms, hyponyms, hypernyms = get_synsets(term)
        print ("Definition: ", defs)
        print ("Examples: ", examples )
        print ("Synonyms: ", set_synonyms )
        print ("Hyponyms: ", hyponyms)
        print ("Hypernyms: ", hypernyms)
        
    else:
        #print (term, " is not in WordNet")
        count_missing+=1


        
print ("Lemmatized version is in wordNet: ", count_present, "Lemmatized version is still not in wordnet: ", count_missing )

Definition:  the male reproductive cell; the male gamete
Examples:  ['a sperm is mostly a nucleus surrounded by little other cellular material']
Synonyms:  {'spermatozoan', 'sperm', 'spermatozoon', 'sperm_cell'}
Hyponyms:  []
Hypernyms:  [Synset('gamete.n.01'), Synset('reproductive_cell.n.01'), Synset('cell.n.02'), Synset('living_thing.n.01'), Synset('whole.n.02'), Synset('object.n.01'), Synset('physical_entity.n.01'), Synset('entity.n.01')]
Definition:  the state of being unable to produce offspring; in a woman it is an inability to conceive; in a man it is an inability to impregnate
Examples:  []
Synonyms:  {'infertility', 'sterility'}
Hyponyms:  [Synset('barrenness.n.01'), Synset('cacogenesis.n.01'), Synset('dysgenesis.n.01'), Synset('impotence.n.02'), Synset('erectile_dysfunction.n.01')]
Hypernyms:  [Synset('physical_condition.n.01'), Synset('condition.n.01'), Synset('state.n.02'), Synset('attribute.n.02'), Synset('abstraction.n.06'), Synset('entity.n.01')]
Definition:  any small c

As for hyponyms and hypernyms, we have to include Synset objects, not just words, see discussion here: 
https://stackoverflow.com/questions/45388056/how-to-remove-synset-and-pos-tag-numbers-on-hypernyms-and-hyponyms
Manual selection of the domain hyponyms/hypernym is thus needed.