## Import necessary packages.

In [1]:
import nltk
#The following resources only need to be downloaded once; uncomment and run if running this notebook in Binder.
#nltk.download('wordnet')
#nltk.download('brown')
#nltk.download('punkt')
#nltk.download('averaged_perceptron_tagger')
import ontology_category as oc

## Initialize the ontology categorizer.

In [2]:
svo = oc.init_svo()

## Demonstrate the different functions available through the ontology categorizer.

- What is? Long form output that determines what categories a specific wordsense belongs to.
- Is cat? Does a term belong to a specific category?

In [3]:
svo.what_is('precipitation')

Unnamed: 0,term,wordnet_ss_index,definition,pos,amount.2,quantity.0,quantity,action.3,process.5,process,phenomenon.0,phenomenon,event.0,property.1,attribute.1,property
0,precipitation,0.0,the quantity of water falling to earth at a sp...,n,yes,yes,yes,no,no,no,no,no,no,no,no,no
1,precipitation,1.0,the process of forming a chemical precipitate,n,no,no,no,yes,yes,yes,no,no,no,no,no,no
2,precipitation,2.0,the falling to earth of any form of water (rai...,n,no,no,no,no,yes,yes,yes,yes,no,no,no,no
3,precipitation,3.0,the act of casting down or falling headlong fr...,n,no,no,no,no,no,yes,no,no,yes,no,no,no
4,precipitation,4.0,an unexpected acceleration or hastening,n,no,no,no,no,no,yes,no,no,yes,no,no,no
5,precipitation,5.0,overly eager speed (and possible carelessness),n,no,no,no,no,no,no,no,no,no,yes,yes,yes


In [4]:
svo.is_cat('precipitation','phenomenon')

Unnamed: 0,term,wordnet_ss_index,definition,pos,phenomenon
0,precipitation,0.0,the quantity of water falling to earth at a sp...,n,no
1,precipitation,1.0,the process of forming a chemical precipitate,n,no
2,precipitation,2.0,the falling to earth of any form of water (rai...,n,yes
3,precipitation,3.0,the act of casting down or falling headlong fr...,n,no
4,precipitation,4.0,an unexpected acceleration or hastening,n,no
5,precipitation,5.0,overly eager speed (and possible carelessness),n,no


In [5]:
svo.what_is('security')

Unnamed: 0,term,wordnet_ss_index,definition,pos,attribute.1,property,condition.0,state.1,state,object.0,phenomenon,action.0,act.1,event.0,process
0,security,0.0,the state of being free from danger or injury,n,yes,yes,yes,yes,yes,no,no,no,no,no,no
1,security,1.0,defense against financial failure; financial i...,n,no,no,no,no,no,no,no,no,no,no,no
2,security,2.0,freedom from anxiety or fear,n,yes,yes,no,yes,yes,no,no,no,no,no,no
3,security,3.0,a formal declaration that documents a fact of ...,n,no,no,no,no,no,no,no,no,no,no,no
4,security,4.0,property that your creditor can claim in case ...,n,no,no,no,no,no,no,no,no,no,no,no
5,security,5.0,a department responsible for the security of t...,n,no,no,no,no,no,no,no,no,no,no,no
6,security,6.0,a guarantee that an obligation will be met,n,no,no,no,no,no,no,no,no,no,no,no
7,security,7.0,an electrical device that sets off an alarm wh...,n,no,no,no,no,no,yes,yes,no,no,no,no
8,security,8.0,measures taken as a precaution against theft o...,n,no,no,no,no,no,no,no,yes,yes,yes,yes


In [6]:
svo.what_is('food')

Unnamed: 0,term,wordnet_ss_index,definition,pos,matter.2,phenomenon
0,food,0.0,any substance that can be metabolized by an an...,n,yes,yes
1,food,1.0,any solid substance (as opposed to liquid) tha...,n,yes,yes
2,food,2.0,anything that provides mental stimulus for thi...,n,no,no


In [7]:
svo.what_is('coping')

Unnamed: 0,term,wordnet_ss_index,definition,pos,object.0,phenomenon,verb,act.5,process
0,coping,0.0,brick that is laid sideways at the top of a wall,n,yes,yes,no,no,no
1,coping,1.0,come to terms with,v,no,no,yes,yes,yes
2,coping,2.0,take by theft,v,no,no,yes,no,yes
3,coping,3.0,take into custody,v,no,no,yes,no,yes


In [8]:
svo.what_is('morbidity')

Unnamed: 0,term,wordnet_ss_index,definition,pos,ratio.0,quantitative_relation.0,quantity,attribute.1,property,condition.0,state.1,state
0,morbidity,0.0,the relative incidence of a particular disease,n,yes,yes,yes,no,no,no,no,no
1,morbidity,1.0,an abnormally gloomy or unhealthy state of mind,n,no,no,no,yes,yes,yes,yes,yes
2,morbidity,2.0,the quality of being unhealthful and generally...,n,no,no,no,yes,yes,no,no,no


In [9]:
svo.what_is('cost')

Unnamed: 0,term,wordnet_ss_index,definition,pos,attribute.1,property,verb,process
0,cost,0.0,the total spent for goods or services includin...,n,no,no,no,no
1,cost,1.0,the property of having material worth (often i...,n,yes,yes,no,no
2,cost,2.0,value measured by what must be given or done o...,n,yes,yes,no,no
3,cost,3.0,be priced at,v,no,no,yes,yes
4,cost,4.0,"require to lose, suffer, or sacrifice",v,no,no,yes,yes


## Sandbox demonstrating how a basic algorithm reasons through determining how to ground a given term based on terms in its definition. 

In [10]:
# helper function to search for nouns in term definitions and determine relevant phenomena,
# processes, properties (to be condensed..)

phen_hyp = [('process',[5]),('event',[0]),('phenomenon',[0])]
process_hyp = [('act',[1]),('action',[0,1,3,4])]
object_hyp = [('object',[0,2,3,4]),('system',[1,4,5]),('body',[0,3,8]),\
           ('matter',[2]),('form',[2,5])]
quantity_hyp = [('quantity',[0,2]),('property',[1,3]),('amount',[0,2])]
state_hyp = [('attribute',[0,1]),('condition',[0,1,2]),('state',[1,4])]

def get_synsets(hyp_set):
    synsets = []
    for (word, index) in hyp_set:
        for i in index:
            synsets.append(nltk.corpus.wordnet.synsets(word)[i])
    return synsets

process_synsets = get_synsets(process_hyp)
phen_synsets = get_synsets(phen_hyp)
object_synsets = get_synsets(object_hyp)
quantity_synsets = get_synsets(quantity_hyp)
state_synsets = get_synsets(state_hyp)

def check_hypernym(tree):
    elements = []
    for h in tree:
        if isinstance(h, list):
            elements.extend(check_hypernym(h))
        else:
            elements.append(h)
    return elements

def intersection(lst1, lst2): 
    lst3 = [value for value in lst1 if value in lst2] 
    return lst3

def categorize_noun(noun):
    cat = []
    n_synsets = nltk.corpus.wordnet.synsets(noun)
    for s in n_synsets:
        categories = []
        if s.pos()=='n':
            flat_hyp_tree = check_hypernym(s.tree(lambda s:s.hypernyms()))
            if intersection(flat_hyp_tree,state_synsets) != []:
                categories.append('state')
            if intersection(flat_hyp_tree,process_synsets) != []:
                categories.append('process')
            if intersection(flat_hyp_tree,quantity_synsets) != []:
                categories.append('quantity')
            if intersection(flat_hyp_tree,object_synsets) != []:
                categories.append('object')
                #for lemma in s.lemmas():
                #    print(lemma.name())
            if intersection(flat_hyp_tree,phen_synsets) != []:
                categories.append('phenomenon')
            cat.append([categories,s])
            #print(noun,s.definition(),flat_hyp_tree)
    return cat
                    
#parsing noun tree from user input

def print_noun_tree(phrase,depth):
    is_noun = lambda pos: pos[:2] == 'NN'
    tokenized = nltk.word_tokenize(phrase)
    nouns = [word for (word, pos) in nltk.pos_tag(tokenized) if is_noun(pos)] 
    for n in nouns:
        categories = categorize_noun(n)
        for c in categories:
            print('\t'*depth,n,c[0])
            if depth<2 and intersection(c[0],['state','phenomenon'])!=[]:
                print('\t'*depth,c[1].definition())
                print_noun_tree(c[1].definition(),depth+1)

In [11]:
print_noun_tree('drought',0)

 drought ['state']
 a shortage of rainfall
	 shortage ['state', 'quantity']
	 the property of being an amount by which something is less than expected or required
		 property []
		 property ['state', 'quantity']
		 property ['object']
		 property ['state', 'quantity']
		 property ['object']
		 amount ['quantity']
		 amount ['state', 'quantity']
		 amount ['quantity']
		 amount ['quantity']
	 shortage ['state']
	 an acute insufficiency
		 insufficiency ['state']
		 insufficiency ['state']
		 insufficiency ['state', 'quantity']
	 rainfall ['phenomenon']
	 water falling in drops from vapor condensed in the atmosphere
		 water ['object']
		 water []
		 water ['object']
		 water ['object']
		 water ['object']
		 water ['object']
		 drops ['state', 'object']
		 drops ['quantity']
		 drops ['state', 'quantity']
		 drops ['object']
		 drops ['object']
		 drops ['phenomenon']
		 drops ['object']
		 drops ['object']
		 drops ['process', 'phenomenon']
		 vapor ['object']
		 vapor ['process', 'phe