In [1]:
from nltk import word_tokenize
from nltk.corpus import wordnet as wn

In [2]:
def all_hypernyms(ss):
    return ss.closure(lambda x: x.hypernyms())

In [3]:
def all_hyponyms(ss):
    return ss.closure(lambda x: x.hyponyms())

In [4]:
for ss in wn.synsets('person'):
    print(ss)
    print(ss.definition())
    print(ss.examples())
    print(ss.lemma_names())
    print()

Synset('person.n.01')
a human being
['there was too much for one person to do']
['person', 'individual', 'someone', 'somebody', 'mortal', 'soul']

Synset('person.n.02')
a human body (usually including the clothing)
['a weapon was hidden on his person']
['person']

Synset('person.n.03')
a grammatical category used in the classification of pronouns, possessive determiners, and verb forms according to whether they indicate the speaker, the addressee, or a third party
['stop talking about yourself in the third person']
['person']



In [5]:
def is_a_person(ss):
    return \
        ss.lexname() == 'noun.person' or \
        ss == wn.synset('person.n.01') or \
        wn.synset('person.n.01') in list(all_hypernyms(ss))

# Persona

In [6]:
person_sss = (ss for ss in wn.all_synsets() if is_a_person(ss))

In [7]:
spa_person_lemmas = set(lemma for ss in person_sss for lemma in ss.lemma_names(lang='spa'))

In [8]:
len(spa_person_lemmas)

6068

# Victimario

In [9]:
victimario_word_1 = 'victimizer' # Google Translate
victimario_word_2 = 'killer' # Wordreference
victimario_word_3 = 'murderer' # Wordreference
victimario_word_4 = 'assassin' # Wordreference
victimario_word_5 = 'aggressor' # Linguee
victimario_words = [victimario_word_1, victimario_word_2, victimario_word_3, victimario_word_4, victimario_word_5]

In [10]:
for word in victimario_words:
    for ss in wn.synsets(word):
        print(word)
        print(ss)
        print(ss.definition())
        print(ss.examples())
        print(ss.lemma_names())
        print()

victimizer
Synset('victimizer.n.01')
a person who victimizes others
['I thought we were partners, not victim and victimizer']
['victimizer', 'victimiser']

killer
Synset('killer.n.01')
someone who causes the death of a person or animal
[]
['killer', 'slayer']

killer
Synset('cause_of_death.n.01')
the causal agent resulting in death
['heart disease is the biggest killer in the United States']
['cause_of_death', 'killer']

killer
Synset('killer.n.03')
a difficulty that is hard to deal with
['that exam was a real killer']
['killer']

killer
Synset('killer_whale.n.01')
predatory black-and-white toothed whale with large dorsal fin; common in cold seas
[]
['killer_whale', 'killer', 'orca', 'grampus', 'sea_wolf', 'Orcinus_orca']

murderer
Synset('murderer.n.01')
a criminal who commits homicide (who performs the unlawful premeditated killing of another human being)
[]
['murderer', 'liquidator', 'manslayer']

assassin
Synset('assassin.n.01')
a murderer (especially one who kills a prominent poli

In [11]:
victimario_sss_str = {
    'victimizer.n.01',
    'killer.n.01',
    'murderer.n.01',
    'assassin.n.01',
    'attacker.n.01',
}

In [12]:
for ss_str in victimario_sss_str:
    print(ss_str, list(all_hypernyms(wn.synset(ss_str))))
    print()

killer.n.01 [Synset('person.n.01'), Synset('causal_agent.n.01'), Synset('organism.n.01'), Synset('physical_entity.n.01'), Synset('living_thing.n.01'), Synset('entity.n.01'), Synset('whole.n.02'), Synset('object.n.01')]

victimizer.n.01 [Synset('bad_person.n.01'), Synset('person.n.01'), Synset('causal_agent.n.01'), Synset('organism.n.01'), Synset('physical_entity.n.01'), Synset('living_thing.n.01'), Synset('entity.n.01'), Synset('whole.n.02'), Synset('object.n.01')]

attacker.n.01 [Synset('wrongdoer.n.01'), Synset('bad_person.n.01'), Synset('person.n.01'), Synset('causal_agent.n.01'), Synset('organism.n.01'), Synset('physical_entity.n.01'), Synset('living_thing.n.01'), Synset('entity.n.01'), Synset('whole.n.02'), Synset('object.n.01')]

murderer.n.01 [Synset('criminal.n.01'), Synset('killer.n.01'), Synset('principal.n.05'), Synset('person.n.01'), Synset('wrongdoer.n.01'), Synset('causal_agent.n.01'), Synset('organism.n.01'), Synset('bad_person.n.01'), Synset('physical_entity.n.01'), Syn

In [13]:
victimario_sss = {
    wn.synset('victimizer.n.01'), # 'bad_person.n.01'
    wn.synset('killer.n.01'), # 'killer.n.01'
#     wn.synset('murderer.n.01'), # 'killer.n.01'
#     wn.synset('assassin.n.01'), # 'killer.n.01'
    wn.synset('attacker.n.01'), # 'bad_person.n.01'
}

In [14]:
victimario_all_sss = victimario_sss.union(hypo for ss in victimario_sss for hypo in all_hyponyms(ss))

In [15]:
victimario_spa_lemmas = set(lemma for ss in victimario_all_sss for lemma in ss.lemma_names(lang='spa'))

In [16]:
victimario_spa_lemmas = set()
for ss in victimario_all_sss:
    eng_lemmas = ss.lemma_names()
    spa_lemmas = ss.lemma_names(lang='spa')
    if len(spa_lemmas) >= len(eng_lemmas):
        victimario_spa_lemmas = victimario_spa_lemmas.union(spa_lemmas)
    else:
        print(eng_lemmas)
        print(spa_lemmas)
        print()

['marauder', 'predator', 'vulture', 'piranha']
['buitre']

['stoner', 'lapidator']
['lapidador']

['attacker', 'aggressor', 'assailant', 'assaulter']
['atacante']

['exterminator', 'terminator', 'eradicator']
[]

['serial_killer', 'serial_murderer']
[]

['hangman']
[]

['iconoclast']
[]

['infanticide']
[]

['white_slaver']
[]

['mass_murderer']
[]

['slasher']
[]

['executioner', 'public_executioner']
[]

['night_rider', 'nightrider']
[]

['victimizer', 'victimiser']
[]

['suicide', 'felo-de-se']
['suicida']

['patricide']
[]

['stabber']
[]

['moss-trooper']
[]

['regicide']
[]

['matricide']
[]

['headsman', 'headman']
[]

['fratricide']
[]

['killer', 'slayer']
['asesino']

['hatchet_man', 'iceman']
['sicario']

['parricide']
[]

['bullyboy']
[]

['butcher']
[]

['murderess']
[]

['shedder', 'spiller']
['derramador_de_sangre']

['tough_guy', 'plug-ugly']
[]

['skinhead']
[]

['garroter', 'garrotter', 'strangler', 'throttler', 'choker']
['estrangulador']

['assassin', 'assassinator'

In [17]:
victimario_spa_lemmas = victimario_spa_lemmas.union({
    'animal', 'bestia', 'salvaje', 'bárbaro', 'bruto',
    'exterminador', 'extirpador',
    'lapidador',
    'pistolero', 'armado', 'sicario', 'matón', 'francotirador', 'tirador',
    'fratricida', 'fratricidio',
    'navajero', 'acuchillador', 'apuñalador',
    'electrocución',
    'bandolero'
    'violador', 'violación', 
    'regicida', 'regicidio',
    'verdugo',
    'matricida', 'matricidio',
    'emboscador', 'emboscada',
    'asesino',
    'patricida', 'patricidio',
    'asesino', 'bandido',
    'matón', 'abusón', 'camorrero', 'vándalo', 'gamberro', 'rufián', 'criminal', 'alborotador', 'agitador',
    'verdugo', 'ejecutor',
    'victimario',
    'esclavista',
    'carnicero',
    'saqueador', 'merodeador',
    'predador', 'depredador',
    'atacante', 'agresor', 'asaltante', 'asaltador',
    'esbirro',
    'infanticida', 'infanticidio',
    'traficante',
    'estrangulador',
    'parricida', 'parricidio',
})

# Víctima

In [18]:
my_word = 'victim'

In [19]:
for ss in wn.synsets(my_word):
    print(ss)
    print(ss.definition())
    print(ss.examples())
    print(ss.lemma_names())
    print()

Synset('victim.n.01')
an unfortunate person who suffers from some adverse circumstance
[]
['victim']

Synset('victim.n.02')
a person who is tricked or swindled
[]
['victim', 'dupe']



In [20]:
my_ss = wn.synset('victim.n.01')

In [21]:
victima_sss = {my_ss}

In [22]:
victima_all_sss = victima_sss.union(hypo for ss in victima_sss for hypo in all_hyponyms(ss))

In [23]:
victima_spa_lemmas = set(lemma for ss in victima_all_sss for lemma in ss.lemma_names(lang='spa'))

In [24]:
victima_spa_lemmas = set()
for ss in victima_all_sss:
    eng_lemmas = ss.lemma_names()
    spa_lemmas = ss.lemma_names(lang='spa')
    if len(spa_lemmas) >= len(eng_lemmas):
        victima_spa_lemmas = victima_spa_lemmas.union(spa_lemmas)
    else:
        print(eng_lemmas)
        print(spa_lemmas)
        print()

['punching_bag']
[]

['poor_devil', 'wretch']
[]

['martyr', 'sufferer']
[]

['pursued', 'chased']
[]

['prey', 'quarry', 'target', 'fair_game']
[]

['shaheed']
[]

['murderee']
[]

['Polycarp', 'Saint_Polycarp', 'St._Polycarp']
[]

['casualty']
[]



In [25]:
victima_spa_lemmas = victima_spa_lemmas.union({
    'perseguido', 'acosado', 'cazado',
    'asesinado',
    'herido', 'baja', 'víctima', 'pérdida',
    'pobre diablo', 'desdichado', 'desgraciado',
    'mártir', 'sufridor',
    'presa'
})

In [26]:
for ss in all_hypernyms(my_ss):
    print(ss)
    print(ss.definition())
    print(ss.examples())
    print(ss.lemma_names())
    print()

Synset('unfortunate.n.01')
a person who suffers misfortune
[]
['unfortunate', 'unfortunate_person']

Synset('person.n.01')
a human being
['there was too much for one person to do']
['person', 'individual', 'someone', 'somebody', 'mortal', 'soul']

Synset('causal_agent.n.01')
any entity that produces an effect or is responsible for events or results
[]
['causal_agent', 'cause', 'causal_agency']

Synset('organism.n.01')
a living thing that has (or can develop) the ability to act or function independently
[]
['organism', 'being']

Synset('physical_entity.n.01')
an entity that has physical existence
[]
['physical_entity']

Synset('living_thing.n.01')
a living (or once living) entity
[]
['living_thing', 'animate_thing']

Synset('entity.n.01')
that which is perceived or known or inferred to have its own distinct existence (living or nonliving)
[]
['entity']

Synset('whole.n.02')
an assemblage of parts that is regarded as a single entity
['how big is that part compared to the whole?', 'the te

# Words related to Víctima

In [27]:
for ss in all_hyponyms(my_ss):
    print(ss)
    print(ss.definition())
    print(ss.examples())
    print(ss.lemma_names())
    print()

Synset('casualty.n.01')
someone injured or killed or captured or missing in a military engagement
[]
['casualty']

Synset('casualty.n.02')
someone injured or killed in an accident
[]
['casualty', 'injured_party']

Synset('hunted_person.n.01')
a person who is hunted
[]
['hunted_person']

Synset('martyr.n.01')
one who suffers for the sake of principle
[]
['martyr', 'sufferer']

Synset('martyr.n.02')
one who voluntarily suffers death as the penalty for refusing to renounce their religion
[]
['martyr']

Synset('muggee.n.01')
a victim of a mugging
['the law seems to give more protection to the mugger than to the muggee']
['muggee']

Synset('murderee.n.01')
a victim who is murdered
[]
['murderee']

Synset('poor_devil.n.01')
someone you feel sorry for
[]
['poor_devil', 'wretch']

Synset('prey.n.01')
a person who is the aim of an attack (especially a victim of ridicule or exploitation) by some hostile person or influence
['he fell prey to muggers', 'everyone was fair game', 'the target of a ma

In [28]:
all_words = set()
all_words = all_words.union(word_tokenize(my_ss.definition()))
all_words = all_words.union(my_ss.lemma_names())
for ex in my_ss.examples():
    all_words = all_words.union(word_tokenize(ex))
for ss in all_hyponyms(my_ss):
    all_words = all_words.union(word_tokenize(ss.definition()))
    all_words = all_words.union(ss.lemma_names())
    for ex in ss.examples():
        all_words = all_words.union(word_tokenize(ex))

In [29]:
len(all_words)

133

In [30]:
all_words

{'(',
 ')',
 '69-155',
 ';',
 'Arabic',
 'Christian',
 'Greek',
 'Palestinians',
 'Polycarp',
 'Saint_Polycarp',
 'Smyrna',
 'St._Polycarp',
 'a',
 'accident',
 'adverse',
 'aim',
 'an',
 'and',
 'anger',
 'another',
 'applied',
 'as',
 'attack',
 'back',
 'bag',
 'because',
 'being',
 'bishop',
 'bombers',
 'boss',
 'burned',
 'by',
 'captured',
 'casualty',
 'chased',
 'circa',
 'circumstance',
 'death',
 'engagement',
 'errors',
 'especially',
 'everyone',
 'exploitation',
 'fair',
 'fair_game',
 'faith',
 'feel',
 'fell',
 'film',
 'for',
 'forth',
 'from',
 'game',
 'give',
 'he',
 'him',
 'his',
 'holy',
 'hostile',
 'hunted',
 'hunted_person',
 'in',
 'influence',
 'injured',
 'injured_party',
 'is',
 'jumped',
 'killed',
 'law',
 'manhunt',
 'martyr',
 'martyrs',
 'military',
 'missing',
 'more',
 'muggee',
 'mugger',
 'muggers',
 'mugging',
 'murdered',
 'murderee',
 'of',
 'on',
 'one',
 'or',
 'others',
 'pagans',
 'penalty',
 'person',
 'poor_devil',
 'prey',
 'principle',


In [31]:
nouns = set()
verbs = set()
adjectives = set()
for w in all_words:
    if len(w) > 2:
        for ss in wn.synsets(w):
            if ss.pos() == 'n':
                nouns.add(w)
            elif ss.pos() == 'v':
                verbs.add(w)
            elif ss.pos() == 'a' or ss.pos() == 's':
                adjectives.add(w)

In [32]:
len(nouns)

79

In [33]:
for w in sorted(nouns):
    for ss in wn.synsets(w):
        if ss.pos() == 'n':
            print(w)
            print(ss)
            print(ss.definition())
            print()

Arabic
Synset('arabic.n.01')
the Semitic language of the Arabs; spoken in a variety of dialects

Christian
Synset('christian.n.01')
a religious person who believes Jesus is the Christ and who is a member of a Christian denomination

Greek
Synset('greek.n.01')
the Hellenic branch of the Indo-European family of languages

Greek
Synset('greek.n.02')
a native or inhabitant of Greece

Palestinians
Synset('palestinian.n.01')
a descendant of the Arabs who inhabited Palestine

Polycarp
Synset('polycarp.n.01')
Greek bishop of Smyrna who refused to recant his Christian faith and was burned to death by pagans (circa 69-155)

Saint_Polycarp
Synset('polycarp.n.01')
Greek bishop of Smyrna who refused to recant his Christian faith and was burned to death by pagans (circa 69-155)

Smyrna
Synset('izmir.n.01')
a port city in western Turkey

St._Polycarp
Synset('polycarp.n.01')
Greek bishop of Smyrna who refused to recant his Christian faith and was burned to death by pagans (circa 69-155)

accident
Syns

In [34]:
selected_noun_sss = {
    'accident.n.01',
    'accident.n.02',
    'anger.n.01',
    'anger.n.02',
    'attack.n.01',
    'bomber.n.01',
    'bomber.n.02',
    'casualty.n.01',
    'fatal_accident.n.01',
    'casualty.n.04',
    'pursued.n.01',
    'death.n.01',
    'death.n.02',
    'death.n.03',
    'death.n.05',
    'death.n.08',
    'battle.n.01',
    'exploitation.n.02',
    'prey.n.01',
    'religion.n.01',
    'religion.n.02',
    'hostile.n.01',
    'hunted_person.n.01',
    'casualty.n.02',
    'law.n.01',
    'law.n.02',
    'law.n.03',
    'jurisprudence.n.01',
    'police.n.01',
    'manhunt.n.01',
    'martyr.n.01',
    'martyr.n.02',
    'military.n.01',
    'muggee.n.01',
    'mugger.n.01',
    'mugging.n.01',
    'murderee.n.01',
    'heathen.n.01',
    'punishment.n.01',
    'penalty.n.02',
    'penalty.n.03',
    'person.n.01',
    'person.n.02',
    'poor_devil.n.01',
    'prey.n.01',
    'prey.n.02',
    'protection.n.01',
    'protective_covering.n.01',
    'security.n.02',
    'protection.n.04',
    'auspices.n.01',
    'protection.n.06',
    'protection.n.07',
    'punching_bag.n.01',
    'pursued.n.01',
    'pursuer.n.01',
    'ridicule.n.01',
    'derision.n.02',
    'scapegoat.n.01',
    'suicide.n.01',
    'unfortunate.n.01',
    'victim.n.01',
    'victim.n.02',
    'world_health_organization.n.01',
}

In [35]:
len(verbs)

45

In [36]:
for w in sorted(verbs):
    for ss in wn.synsets(w):
        if ss.pos() == 'v':
            print(w)
            print(ss)
            print(ss.definition())
            print()

aim
Synset('aim.v.01')
point or cause to go (blows, weapons, or objects such as photographic equipment) towards

aim
Synset('aim.v.02')
propose or intend

aim
Synset('drive.v.11')
move into a desired direction of discourse

aim
Synset('calculate.v.05')
specifically design a product, event, or activity for a certain public

aim
Synset('target.v.01')
intend (something) to move towards a certain goal

aim
Synset('aim.v.06')
direct (a remark) toward an intended goal

aim
Synset('draw_a_bead_on.v.02')
have an ambitious plan or a lofty goal

anger
Synset('anger.v.01')
make angry

anger
Synset('anger.v.02')
become angry

applied
Synset('use.v.01')
put into service; make work or employ for a particular purpose or for its inherent or natural purpose

applied
Synset('apply.v.02')
be pertinent or relevant or applicable

applied
Synset('apply.v.03')
ask (for something)

applied
Synset('put_on.v.07')
apply to a surface

applied
Synset('lend_oneself.v.01')
be applicable to; as to an analysis

applie

In [37]:
selected_verb_sss = {
    'aim.v.01',
    'anger.v.01',
    'anger.v.02',
    'attack.v.01',
    'attack.v.02',
    'attack.v.03',
    'assail.v.01',
    'attack.v.06',
    'bag.v.01',
    'pocket.v.02',
    'burn.v.01',
    'get.v.11',
    'appropriate.v.02',
    'capture.v.06',
    'chase.v.01',
    'fall.v.06',
    'fall.v.07',
    'fall.v.09',
    'fall.v.13',
    'fall.v.16',
    'fall.v.23',
    'sacrifice.v.01',
    'collapse.v.01',
    'give.v.41',
    'hunt.v.01',
    'hound.v.01',
    'hunt.v.03',
    'hunt.v.07',
    'injure.v.01',
    'hurt.v.04',
    'jump.v.03',
    'kill.v.01',
    'kill.v.02',
    'stamp_out.v.01',
    'kill.v.04',
    'kill.v.05',
    'kill.v.09',
    'kill.v.10',
    'martyr.v.01',
    'martyr.v.02',
    'miss.v.02',
    'mug.v.01',
    'murder.v.01',
    'raven.v.02',
    'punch.v.01',
    'punch.v.02',
    'punish.v.01',
    'pursue.v.02',
    'quest_for.v.01',
    'abjure.v.01',
    'refuse.v.01',
    'refuse.v.02',
    'deny.v.04',
    'reject.v.06',
    'abdicate.v.01',
    'disown.v.02',
    'resign.v.04',
    'ridicule.v.01',
    'suffer.v.01',
    'suffer.v.02',
    'suffer.v.03',
    'digest.v.03',
    'suffer.v.05',
    'suffer.v.06',
    'hurt.v.06',
    'suffer.v.08',
    'suffer.v.10',
    'suffer.v.11',
}

In [38]:
len(adjectives)

27

In [39]:
for w in sorted(adjectives):
    for ss in wn.synsets(w):
        if ss.pos() == 'a' or ss.pos() == 's':
            print(w)
            print(ss)
            print(ss.definition())
            print()

Arabic
Synset('arabic.a.01')
relating to or characteristic of Arabs

Christian
Synset('christian.a.01')
relating to or characteristic of Christianity

Christian
Synset('christian.a.02')
following the teachings or manifesting the qualities or spirit of Jesus Christ

Greek
Synset('greek.a.01')
of or relating to or characteristic of Greece or the Greeks or the Greek language

adverse
Synset('adverse.s.01')
contrary to your interests or welfare

adverse
Synset('adverse.s.02')
in an opposing direction

another
Synset('another.s.01')
any of various alternatives; some other

applied
Synset('applied.a.01')
concerned with concrete problems or data rather than with fundamental principles

back
Synset('back.a.01')
related to or located at the back

back
Synset('back.s.02')
located at or near the back of an animal

back
Synset('back.s.03')
of an earlier date

boss
Synset('boss.s.01')
exceptionally good

burned
Synset('burned.s.01')
treated by heating to a high temperature but below the melting or 

In [40]:
selected_adjective_sss = {
    'adverse.s.01',
    'burned.s.02',
    'barbarous.s.01',
    'crippled.s.01',
    'game.s.02',
    'hostile.a.01',
    'hostile.a.02',
    'hostile.s.04',
    'hunted.s.01',
    'injured.a.01',
    'military.a.01',
    'military.a.02',
    'military.a.03',
    'missing.s.01',
    'murdered.s.01',
    'punished.a.01',
    'pursued.a.01',
    'regretful.a.01',
    'deplorable.s.01',
    'blue.s.08',
    'unfortunate.a.01',
    'unfortunate.s.03',
    'exploited.s.02',
}

In [41]:
spa_nouns = set()
for ss_str in selected_noun_sss:
    ss = wn.synset(ss_str)
    eng_lemmas = ss.lemma_names()
    spa_lemmas = ss.lemma_names(lang='spa')
    if len(spa_lemmas) >= len(eng_lemmas):
        spa_nouns = spa_nouns.union(spa_lemmas)
    else:
        print(eng_lemmas)
        print(spa_lemmas)
        print()

['fatal_accident', 'casualty']
[]

['bomber']
[]

['mugger']
[]

['prey', 'quarry']
['presa']

['heathen', 'pagan', 'gentile', 'infidel']
['infiel']

['martyr', 'sufferer']
[]

['penalty']
[]

['protective_covering', 'protective_cover', 'protection']
['protección']

['bomber']
[]

['military', 'armed_forces', 'armed_services', 'military_machine', 'war_machine']
[]

['casualty']
[]

['protection', 'shelter']
[]

['punching_bag']
[]

['suicide', 'self-destruction', 'self-annihilation']
['suicidio']

['unfortunate', 'unfortunate_person']
[]

['poor_devil', 'wretch']
[]

['police', 'police_force', 'constabulary', 'law']
['guardia', 'policía']

['anger', 'choler', 'ire']
['cólera', 'ira']

['death', 'last']
['muerte']

['punishment', 'penalty', 'penalization', 'penalisation']
['castigo']

['derision', 'ridicule']
[]

['exploitation', 'victimization', 'victimisation', 'using']
['explotación', 'explotación_social']

['pursued', 'chased']
[]

['prey', 'quarry', 'target', 'fair_game']
[]

['Wor

In [42]:
spa_nouns = spa_nouns.union({
    'explotación', 'explotación social', 'victimización',
    'asesinado',
    'ridículo', 'mofa', 'escarnio', 'menosprecio',
    'pobre diablo', 'desdichado', 'desgraciado',
    'castigo',
    'herido', 'baja', 'víctima', 'pérdida',
    'militar', 'fuerzas armadas', 'servicios armados', 'maquinaria militar', 'máquina de guerra',
    'accidente fatal',
    'perseguidor', 'acosador', 'cazador',
    'suicidio',
    'bombardero', 'cazabombardero', 'terrorista', 'dinamitero',
    'protección', 'refugio', 'albergue'
    'muerte',
    'ataque', 'asalto',
    'OMS', 'organización mundial de la salud',
    'atracador',
    'trapo', 'felpudo',
    'infiel', 'pagano',
    'perseguido', 'acosado', 'cazado',
    'ciencias del derecho', 'jurisprudencia', 'derecho', 'filosofía legal',
    'presa',
    'cacería', 'caza', 'persecución',
    'protección',
    'policía', 'fuerzas policiales', 'ley', 'guardia', 'cuerpo de policía',
    'cólera', 'ira', 'rabia',
    'muerte',
    'desafortunado',
    'castigo', 'penalización',
    'mártir', 'sufridor',
    'blanco',
    'enemigo',
    'muerto',
})

In [43]:
spa_verbs = set()
for ss_str in selected_verb_sss:
    ss = wn.synset(ss_str)
    eng_lemmas = ss.lemma_names()
    spa_lemmas = ss.lemma_names(lang='spa')
    if len(spa_lemmas) >= len(eng_lemmas):
        spa_verbs = spa_verbs.union(spa_lemmas)
    else:
        print(eng_lemmas)
        print(spa_lemmas)
        print()

['appropriate', 'capture', 'seize', 'conquer']
['tomar']

['punch']
[]

['fall', 'fall_down']
[]

['hunt', 'run', 'hunt_down', 'track_down']
['cazar']

['suffer', 'meet']
[]

['get', 'catch', 'capture']
['capturar']

['kill']
[]

['martyr', 'martyrize', 'martyrise']
['martirizar']

['kill']
[]

['martyr']
[]

['raven', 'prey', 'predate']
[]

['resign', 'reconcile', 'submit']
[]

['abdicate', 'renounce']
[]

['hunt']
[]

['digest', 'endure', 'stick_out', 'stomach', 'bear', 'stand', 'tolerate', 'support', 'brook', 'abide', 'suffer', 'put_up']
['aguantar', 'apechar', 'digerir', 'sobrellevar', 'soportar', 'sufrir', 'tolerar']

['collapse', 'fall_in', 'cave_in', 'give', 'give_way', 'break', 'founder']
['caer', 'ceder', 'colapsar', 'derrumbarse', 'irse_a_pique', 'romperse']

['refuse', 'reject', 'pass_up', 'turn_down', 'decline']
['declinar', 'denegar', 'rechazar']

['hurt', 'injure']
['perjudicar']

['suffer', 'lose']
[]

['assail', 'assault', 'set_on', 'attack']
['asaltar', 'atacar']

['hu

In [44]:
spa_verbs = spa_verbs.union({
    'dar un puñetazo', 'golpear',
    'aguantar', 'apechar', 'digerir', 'sobrellevar', 'soportar', 'sufrir', 'tolerar',
    'matar', 'asesinar',
    'declinar', 'denegar', 'rechazar', 'rehusar', 'negar',
    'ridiculizar', 'humillar', 'vilipendiar', 'escarnecer',
    'perseguir', 'acosar',
    'cazar',
    'matar', 'asesinar',
    'rastrear',
    'caer',
    'resignar', 'aceptar', 'rendir', 'entregar',
    'capturar',
    'cazar',
    'sufrir', 'padecer', 'perder',
    'martirizar', 'sacrificar',
    'abdicar', 'renunciar',
    'negar', 'rehusar',
    'asaltar', 'atacar',
    'apropiar', 'capturar', 'conquistar', 'apoderar', 'incautar', 'embargar',
    'castigar', 'penalizar',
    'enfurecer', 'enfadar', 'enojar',
    'atacar', 'asaltar', 'atracar', 'disparar',
    'abjurar', 'renegar',
    'doler', 'sufrir',
    'herir', 'perjudicar',
    'pegar', 'bloquear',
    'caer', 'ceder', 'colapsar', 'derrumbar', 'romper',
    'asediar', 'acosar', 'abusar',
    'morir',
})

In [45]:
spa_adjectives = set()
for ss_str in selected_adjective_sss:
    ss = wn.synset(ss_str)
    eng_lemmas = ss.lemma_names()
    spa_lemmas = ss.lemma_names(lang='spa')
    if len(spa_lemmas) >= len(eng_lemmas):
        spa_adjectives = spa_adjectives.union(spa_lemmas)
    else:
        print(eng_lemmas)
        print(spa_lemmas)
        print()

['pursued']
[]

['missing']
[]

['hostile', 'uncongenial', 'unfriendly']
[]

['crippled', 'halt', 'halting', 'lame', 'gimpy', 'game']
[]

['punished']
[]

['murdered']
[]

['unfortunate']
[]

['game', 'gamy', 'gamey', 'gritty', 'mettlesome', 'spirited', 'spunky']
[]

['deplorable', 'distressing', 'lamentable', 'pitiful', 'sad', 'sorry']
[]

['barbarous', 'brutal', 'cruel', 'fell', 'roughshod', 'savage', 'vicious']
[]

['adverse', 'inauspicious', 'untoward']
[]

['exploited', 'ill-used', 'put-upon', 'used', 'victimized', 'victimised']
[]

['hunted']
[]

['regretful', 'sorry', 'bad']
[]

['unfortunate']
[]

['injured']
[]

['burned', 'burnt', 'burned-over', 'burned-out', 'burnt-out']
[]

['blue', 'dark', 'dingy', 'disconsolate', 'dismal', 'gloomy', 'grim', 'sorry', 'drab', 'drear', 'dreary']
[]



In [46]:
spa_adjectives = spa_adjectives.union({
    'perseguido',
    'desafortunado', 'desgraciado', 'desdichado',
    'cazado',
    'hostil',
    'explotado', 'victimizado', 'maltratado', 'abusado',
    'deplorable', 'angustiado', 'alarmante', 'inquietante', 'lamentable', 'despreciable', 'detestable', 'triste',
    'oscuro', 'negro', 'sucio', 'triste', 'desconsolado', 'descorazonado', 'deprimente', 'deporable', 'funesto',
    'desalentador', 'lúgubre', 'gris', 'sombrío', 'pobre',
    'desaparecido', 'perdido',
    'adverso', 'desfavorable',
    'quemado',
    'arrepentido',
    'herido',
    'castigado',
    'asesinado', 'matado',
    'bárbaro', 'brutal', 'cruel', 'letal', 'mortal', 'atropellado', 'pisoteado', 'salvaje', 'violento',
    'feroz', 'despiadado', 'sanguinario', 'mezquino', 'malicioso'
})

# Saving data

In [47]:
def clean(xs):
    return [x.replace('_', ' ') for x in xs]

In [48]:
def to_file(xs, filename):
    with open(filename, 'w') as f:
        for x in xs:
            f.write(x + '\n')

In [49]:
to_file(clean(spa_person_lemmas), 'data/personas.txt')

In [50]:
to_file(clean(victimario_spa_lemmas), 'data/victimarios.txt')

In [51]:
to_file(clean(victima_spa_lemmas), 'data/victimas.txt')

In [52]:
to_file(clean(spa_nouns), 'data/nombres.txt')

In [53]:
to_file(clean(spa_verbs), 'data/verbos.txt')

In [54]:
to_file(clean(spa_adjectives), 'data/adjetivos.txt')