In [192]:
import nltk
from nltk.corpus import wordnet as wn

import itertools
import random
import copy

In [241]:
def restem_fun(pos_tag):
    """
    Handles the restemming of a particular POS tag after it has been converted to a Stem via wordnet lemmaisation.

    pos_tag is from https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html eg VBD
    
    This can be extended as required from https://www.nodebox.net/code/index.php/Linguistics
    """
    import pattern.en as en
    restem_fun = {frozenset(['NNS', 'NNPS']) : en.pluralize,
                  frozenset(['RBR', 'JJR']) : en.comparative,
                  frozenset(['JJS']) : en.superlative, #Skip RBS, as ("Most") not changed by WordNet
                  frozenset(['VBD', 'VBN']) :  lambda w: en.conjugate(w, en.PAST) # A lot more of these can be made with en.conugate
                 }
    
    for category in restem_fun.keys():
        if pos_tag in category:
            return restem_fun[category]
    else:
        return lambda x: x
        


In [242]:
def get_all_antonyms(word, pos=None):
    synsets = wn.synsets(word, pos=pos)
    for synset in synsets:
        for lemma in synset.lemmas():
            for anto in lemma.antonyms():
                yield anto.name()


In [243]:
#These constants define the types that I am interested in, as well as what POS tags they have for what wordnet tags
NOUN_POS_TAGS = frozenset(["NN", "NNS"])
ADJ_POS_TAGS = frozenset(["JJ","JJS", "JSR", "VBN"]) #VBN is here because it is hard to tell the difference between a VERB PAST PARTICPANT and an ADJECTIVE
VERB_POS_TAGS = frozenset(["VB","VBS", "VBN","VBG"]) #Not VBD as that tends to be 'had' or 'were'
ADVERB_POS_TAGS = frozenset(["RB","RBS"])


def get_pos_sub_function(pos_tag_set, wordnet_tag):
    def inner(words, index = False):
        for ii,(pword,p_pos_tag) in enumerate(nltk.pos_tag(words)):
            if p_pos_tag in pos_tag_set:
                restem = restem_fun(p_pos_tag)

                antos =  get_all_antonyms(pword, wordnet_tag)
                antos = map(restem,antos)
                antos = list(antos)
                if len(antos)>0:
                    if index:
                        yield(ii, antos)
                    else:
                        yield(pword, antos)

    return inner


#Define the functions: all take sequence of words as parameter
get_noun_subs = get_pos_sub_function(NOUN_POS_TAGS, wn.NOUN)
get_adj_subs = get_pos_sub_function(ADJ_POS_TAGS, wn.ADJ)
get_verb_subs = get_pos_sub_function(VERB_POS_TAGS, wn.VERB)
get_adverb_subs = get_pos_sub_function(ADVERB_POS_TAGS, wn.ADV)

In [244]:
base_sent = "Mr. Jones had never thought"
base_words = nltk.tokenize.word_tokenize(base_sent)
nltk.pos_tag(base_words)


[('Mr.', 'NNP'),
 ('Jones', 'NNP'),
 ('had', 'VBD'),
 ('never', 'RB'),
 ('thought', 'VBN')]

In [245]:
random.randint(0,2)

0

In [246]:
def semantic_corruptions(sent):
    words = nltk.tokenize.word_tokenize(sent)
    corruptions = dict(itertools.chain(get_adj_subs(words, index=True),
                    get_noun_subs(words, index=True),
                    get_adverb_subs(words, index=True),
                    get_verb_subs(words, index=True),
                   ))
    for corrupt_index in corruptions.keys():
        antos = corruptions[corrupt_index]
        anto_index = random.randint(0,len(antos)-1)
        words[corrupt_index] = antos[anto_index]
        
    return " ".join(words)
    
    


In [247]:
semantic_corruptions("The article is the most common determiner (DT) in English.")

u'The article is the least uncommon determiner ( DT ) in English .'

In [256]:
semantic_corruptions("We may have a problem")

u'We may abstain a problem'

In [229]:
nltk.pos_tag(nltk.tokenize.word_tokenize("The article is the most common determiner (DT) in English."))

[('The', 'DT'),
 ('article', 'NN'),
 ('is', 'VBZ'),
 ('the', 'DT'),
 ('most', 'RBS'),
 ('common', 'JJ'),
 ('determiner', 'NN'),
 ('(', ':'),
 ('DT', 'NNP'),
 (')', ':'),
 ('in', 'IN'),
 ('English', 'NNP'),
 ('.', '.')]

In [230]:
list(get_all_antonyms("most"))

[u'fewest', u'least', u'least']

In [231]:
wn.synsets("most")

[Synset('most.a.01'),
 Synset('most.a.02'),
 Synset('most.r.01'),
 Synset('most.r.02'),
 Synset('about.r.07')]

In [71]:
def is_clear_adjective(word):
    """To be clear:
         - Synsets must all be adjective, or sattilite adjgective (ie no noun or verb form)
         - must only be one adjective synset (ie not two senses)
    """
    
    synsets = wn.synsets(word)
    if all(map(lambda ss: ss.pos==wn.ADJ or ss.pos==wn.ADJ_SAT, synsets)):
        adj_sets = filter(lambda ss: ss.pos==wn.ADJ, synsets)
        if len(adj_sets)==1:
            adj_ss = adj_sets[1]
            return adj_ss
            
            
            
        else: #Has either no Adjective cases or multiple
            return []
    else: #Has mixed POS uses 
        return [] 
        
    


In [104]:
word="children"
list(map(lambda ss: ss.pos(), wn.synsets(word)))

['n', 'n', 'n', 'n']

In [106]:
ss= wn.synsets(word) [1]

In [109]:
ss.lemmas()

[Lemma('child.n.02.child'), Lemma('child.n.02.kid')]

TypeError: tree() missing 1 required positional argument: 'rel'

In [39]:
ll.antonyms()

[]

In [42]:
sss=list(wn.synsets("hot"))

In [53]:
sss[0].definition()

'used of physical heat; having a high or higher than desirable temperature or giving off heat or feeling or causing a sensation of heat or burning'

In [50]:
ss=sss[2]

In [52]:
ss.definition()

'extended meanings; especially of psychological heat; marked by intensity or vehemence especially of passion or enthusiasm'

In [54]:
help(any)

Help on built-in function any in module builtins:

any(...)
    any(iterable) -> bool
    
    Return True if bool(x) is True for any x in the iterable.
    If the iterable is empty, return False.

