In [2]:
from nltk import word_tokenize
from nltk.corpus import wordnet as wn

In [3]:
my_word = 'victim'

In [4]:
for ss in wn.synsets(my_word):
    print(ss)
    print(ss.definition())
    print(ss.examples())
    print(ss.lemma_names())
    print()

Synset('victim.n.01')
an unfortunate person who suffers from some adverse circumstance
[]
['victim']

Synset('victim.n.02')
a person who is tricked or swindled
[]
['victim', 'dupe']



In [5]:
my_ss = wn.synset('victim.n.01')

In [6]:
def all_hypernyms(ss):
    return ss.closure(lambda x: x.hypernyms())

In [7]:
def all_hyponyms(ss):
    return ss.closure(lambda x: x.hyponyms())

In [8]:
for ss in wn.synsets('person'):
    print(ss)
    print(ss.definition())
    print(ss.examples())
    print(ss.lemma_names())
    print()

Synset('person.n.01')
a human being
['there was too much for one person to do']
['person', 'individual', 'someone', 'somebody', 'mortal', 'soul']

Synset('person.n.02')
a human body (usually including the clothing)
['a weapon was hidden on his person']
['person']

Synset('person.n.03')
a grammatical category used in the classification of pronouns, possessive determiners, and verb forms according to whether they indicate the speaker, the addressee, or a third party
['stop talking about yourself in the third person']
['person']



In [89]:
def is_a_person(ss):
    return \
        ss.lexname() == 'noun.person' or \
        ss == wn.synset('person.n.01') or \
        wn.synset('person.n.01') in list(all_hypernyms(ss))

In [7]:
for ss in all_hyponyms(my_ss):
    print(ss)
    print(ss.definition())
    print(ss.examples())
    print(ss.lemma_names())
    print()

Synset('casualty.n.01')
someone injured or killed or captured or missing in a military engagement
[]
['casualty']

Synset('casualty.n.02')
someone injured or killed in an accident
[]
['casualty', 'injured_party']

Synset('hunted_person.n.01')
a person who is hunted
[]
['hunted_person']

Synset('martyr.n.01')
one who suffers for the sake of principle
[]
['martyr', 'sufferer']

Synset('martyr.n.02')
one who voluntarily suffers death as the penalty for refusing to renounce their religion
[]
['martyr']

Synset('muggee.n.01')
a victim of a mugging
['the law seems to give more protection to the mugger than to the muggee']
['muggee']

Synset('murderee.n.01')
a victim who is murdered
[]
['murderee']

Synset('poor_devil.n.01')
someone you feel sorry for
[]
['poor_devil', 'wretch']

Synset('prey.n.01')
a person who is the aim of an attack (especially a victim of ridicule or exploitation) by some hostile person or influence
['he fell prey to muggers', 'everyone was fair game', 'the target of a ma

In [66]:
for ss in all_hypernyms(my_ss):
    print(ss)
    print(ss.definition())
    print(ss.examples())
    print(ss.lemma_names())
    print()

Synset('unfortunate.n.01')
a person who suffers misfortune
[]
['unfortunate', 'unfortunate_person']

Synset('person.n.01')
a human being
['there was too much for one person to do']
['person', 'individual', 'someone', 'somebody', 'mortal', 'soul']

Synset('causal_agent.n.01')
any entity that produces an effect or is responsible for events or results
[]
['causal_agent', 'cause', 'causal_agency']

Synset('organism.n.01')
a living thing that has (or can develop) the ability to act or function independently
[]
['organism', 'being']

Synset('physical_entity.n.01')
an entity that has physical existence
[]
['physical_entity']

Synset('living_thing.n.01')
a living (or once living) entity
[]
['living_thing', 'animate_thing']

Synset('entity.n.01')
that which is perceived or known or inferred to have its own distinct existence (living or nonliving)
[]
['entity']

Synset('whole.n.02')
an assemblage of parts that is regarded as a single entity
['how big is that part compared to the whole?', 'the te

In [107]:
all_words = set()
all_words = all_words.union(word_tokenize(my_ss.definition()))
all_words = all_words.union(my_ss.lemma_names())
for ex in my_ss.examples():
    all_words = all_words.union(word_tokenize(ex))
for ss in all_hyponyms(my_ss):
    all_words = all_words.union(word_tokenize(ss.definition()))
    all_words = all_words.union(ss.lemma_names())
    for ex in ss.examples():
        all_words = all_words.union(word_tokenize(ex))

In [108]:
len(all_words)

133

In [109]:
all_words

{'(',
 ')',
 '69-155',
 ';',
 'Arabic',
 'Christian',
 'Greek',
 'Palestinians',
 'Polycarp',
 'Saint_Polycarp',
 'Smyrna',
 'St._Polycarp',
 'a',
 'accident',
 'adverse',
 'aim',
 'an',
 'and',
 'anger',
 'another',
 'applied',
 'as',
 'attack',
 'back',
 'bag',
 'because',
 'being',
 'bishop',
 'bombers',
 'boss',
 'burned',
 'by',
 'captured',
 'casualty',
 'chased',
 'circa',
 'circumstance',
 'death',
 'engagement',
 'errors',
 'especially',
 'everyone',
 'exploitation',
 'fair',
 'fair_game',
 'faith',
 'feel',
 'fell',
 'film',
 'for',
 'forth',
 'from',
 'game',
 'give',
 'he',
 'him',
 'his',
 'holy',
 'hostile',
 'hunted',
 'hunted_person',
 'in',
 'influence',
 'injured',
 'injured_party',
 'is',
 'jumped',
 'killed',
 'law',
 'manhunt',
 'martyr',
 'martyrs',
 'military',
 'missing',
 'more',
 'muggee',
 'mugger',
 'muggers',
 'mugging',
 'murdered',
 'murderee',
 'of',
 'on',
 'one',
 'or',
 'others',
 'pagans',
 'penalty',
 'person',
 'poor_devil',
 'prey',
 'principle',


In [111]:
nouns = set()
verbs = set()
adjectives = set()
for w in all_words:
    if len(w) > 2:
        for ss in wn.synsets(w):
            if ss.pos() == 'n':
                nouns.add(w)
            elif ss.pos() == 'v':
                verbs.add(w)
            elif ss.pos() == 'a' or ss.pos() == 's':
                adjectives.add(w)

In [112]:
len(nouns)

79

In [127]:
for w in sorted(nouns):
    for ss in wn.synsets(w):
        if ss.pos() == 'n':
            print(w)
            print(ss)
            print(ss.definition())
            print()

Arabic
Synset('arabic.n.01')
the Semitic language of the Arabs; spoken in a variety of dialects

Christian
Synset('christian.n.01')
a religious person who believes Jesus is the Christ and who is a member of a Christian denomination

Greek
Synset('greek.n.01')
the Hellenic branch of the Indo-European family of languages

Greek
Synset('greek.n.02')
a native or inhabitant of Greece

Palestinians
Synset('palestinian.n.01')
a descendant of the Arabs who inhabited Palestine

Polycarp
Synset('polycarp.n.01')
Greek bishop of Smyrna who refused to recant his Christian faith and was burned to death by pagans (circa 69-155)

Saint_Polycarp
Synset('polycarp.n.01')
Greek bishop of Smyrna who refused to recant his Christian faith and was burned to death by pagans (circa 69-155)

Smyrna
Synset('izmir.n.01')
a port city in western Turkey

St._Polycarp
Synset('polycarp.n.01')
Greek bishop of Smyrna who refused to recant his Christian faith and was burned to death by pagans (circa 69-155)

accident
Syns

In [None]:
selected_noun_sss = {
    'accident.n.01',
    'accident.n.02',
    'anger.n.01',
    'anger.n.02',
    'attack.n.01',
    'bomber.n.01',
    'bomber.n.02',
    'casualty.n.01',
    'fatal_accident.n.01',
    'casualty.n.04',
    'pursued.n.01',
    'death.n.01',
    'death.n.02',
    'death.n.03',
    'death.n.05',
    'death.n.08',
    'battle.n.01',
    'exploitation.n.02',
    'prey.n.01',
    'religion.n.01',
    'religion.n.02',
    'hostile.n.01',
    'hunted_person.n.01',
    'casualty.n.02',
    'law.n.01',
    'law.n.02',
    'law.n.03',
    'jurisprudence.n.01',
    'police.n.01',
    'manhunt.n.01',
    'martyr.n.01',
    'martyr.n.02',
    'military.n.01',
    'muggee.n.01',
    'mugger.n.01',
    'mugging.n.01',
    'murderee.n.01',
    'heathen.n.01',
    'punishment.n.01',
    'penalty.n.02',
    'penalty.n.03',
    'person.n.01',
    'person.n.02',
    'poor_devil.n.01',
    'prey.n.01',
    'prey.n.02',
    'protection.n.01',
    'protective_covering.n.01',
    'security.n.02',
    'protection.n.04',
    'auspices.n.01',
    'protection.n.06',
    'protection.n.07',
    'punching_bag.n.01',
    'pursued.n.01',
    'pursuer.n.01',
    'ridicule.n.01',
    'derision.n.02',
    'scapegoat.n.01',
    'suicide.n.01',
    'unfortunate.n.01',
    'victim.n.01',
    'victim.n.02',
    'world_health_organization.n.01',
}

In [113]:
len(verbs)

45

In [114]:
len(adjectives)

27

In [115]:
keywords = nouns.union(verbs).union(adjectives)

In [64]:
len(keywords)

102

In [65]:
keywords

{'Arabic',
 'Christian',
 'Greek',
 'Palestinians',
 'Polycarp',
 'Saint_Polycarp',
 'Smyrna',
 'St._Polycarp',
 'accident',
 'adverse',
 'aim',
 'anger',
 'another',
 'applied',
 'attack',
 'back',
 'bag',
 'being',
 'bishop',
 'bombers',
 'boss',
 'burned',
 'captured',
 'casualty',
 'chased',
 'circumstance',
 'death',
 'engagement',
 'errors',
 'exploitation',
 'fair',
 'fair_game',
 'faith',
 'feel',
 'fell',
 'film',
 'forth',
 'game',
 'give',
 'holy',
 'hostile',
 'hunted',
 'hunted_person',
 'influence',
 'injured',
 'injured_party',
 'jumped',
 'killed',
 'law',
 'manhunt',
 'martyr',
 'martyrs',
 'military',
 'missing',
 'more',
 'muggee',
 'mugger',
 'muggers',
 'mugging',
 'murdered',
 'murderee',
 'one',
 'pagans',
 'penalty',
 'person',
 'poor_devil',
 'prey',
 'principle',
 'protection',
 'punching',
 'punching_bag',
 'punished',
 'pursued',
 'pursuer',
 'quarry',
 'recant',
 'refused',
 'refusing',
 'religion',
 'renounce',
 'resigned',
 'ridicule',
 'sake',
 'scapegoa