In [171]:
import numpy as np
import pandas as pd
import csv
import nltk
from nltk.corpus import wordnet as wn
from nltk.stem import WordNetLemmatizer

nltk.download('wordnet')

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\marlo\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

### 1. Load National Research Council

In [172]:
file_path = "NRC-Emotion-Lexicon/NRC-Emotion-Lexicon/NRC-Emotion-Lexicon-Wordlevel-v0.92.txt"
nrc_lexicon = {}
with open(file_path, newline='') as csvfile:
    text = csv.reader(csvfile, delimiter='\t', quotechar='|')
    for row in text:
        if int(row[2]) == 1:
            if row[0] not in nrc_lexicon.keys():
                nrc_lexicon[row[0]] = []
            nrc_lexicon[row[0]].append(row[1])

In [173]:
# Vista primors 5 elementos del diccionario
print(len(nrc_lexicon.keys()))
for k,v in list(nrc_lexicon.items())[:5]:
    print(k, v)

6453
abacus ['trust']
abandon ['fear', 'negative', 'sadness']
abandoned ['anger', 'fear', 'negative', 'sadness']
abandonment ['anger', 'fear', 'negative', 'sadness', 'surprise']
abba ['positive']


### 2. Extender el léxico NRC utilizando WordNet

In [174]:
wordnet_to_penn = {
 'n': 'NN', # sustantivo
 'v': 'VB', # verbo
 'a': 'JJ', # adjetivo
 's': 'JJ', # adjetivo superlativo
 'r': 'RB', # adverbio
 'c': 'CC' # conjunción
}

penn_to_wordnet = {
 'CC': 'c', # Coordinating conjunction
 'CD': 'c', # Cardinal number
 'DT': 'c', # Determiner
 'EX': 'c', # Existential there
 'FW': 'x', # Foreign word
 'IN': 'c', # Preposition or subordinating conjunction
 'JJ': 'a', # Adjective
 'JJR': 'a', # Adjective, comparative
 'JJS': 'a', # Adjective, superlative
 'LS': 'c', # List item marker
 'MD': 'v', # Modal
 'NN': 'n', # Noun, singular or mass
 'NNS': 'n', # Noun, plural
 'NNP': 'n', # Proper noun, singular
 'NNPS': 'n', # Proper noun, plural
  'PDT': 'c', # Predeterminer
 'POS': 'c', # Possessive ending
 'PRP': 'n', # Personal pronoun
 'PRP$': 'n', # Possessive pronoun
 'RB': 'r', # Adverb
 'RBR': 'r', # Adverb, comparative
 'RBS': 'r', # Adverb, superlative
 'RP': 'r', # Particle
 'SYM': 'x', # Symbol
 'TO': 'c', # to
 'UH': 'x', # Interjection
 'VB': 'v', # Verb, base form
 'VBD': 'v', # Verb, past tense
 'VBG': 'v', # Verb, gerund or present participle
 'VBN': 'v', # Verb, past participle
 'VBP': 'v', # Verb, non-3rd person singular present
 'VBZ': 'v', # Verb, 3rd person singular present
 'WDT': 'c', # Wh-determiner
 'WP': 'n', # Wh-pronoun
 'WP$': 'n', # Possessive wh-pronoun
 'WRB': 'r', # Wh-adverb
 'X': 'x' # Any word not categorized by the other tags
 }

In [175]:
def getLemmaPos(word):
    lemmatizer = WordNetLemmatizer()
    pos_tag = wn.synsets(word)[0].pos()
    lemma = lemmatizer.lemmatize(word, pos=pos_tag)
    return lemma, pos_tag


# getLemmaPos('st')
wn.synsets('st')

[]

In [176]:
def getSynonyms(word):
    synonyms = wn.synonyms(word)
    synonyms = [sub_array[0] for sub_array in synonyms if sub_array]
    sims = []
    for s in synonyms:
        for s1 in wn.synsets(s):
            [sims.append(str(lem.name())) for lem in s1.lemmas()]
    return list(set([getLemmaPos(word) for word in set(sims)]))

getSynonyms('joy')

[('ravish', 'v'),
 ('delectation', 'n'),
 ('gladden', 'v'),
 ('enjoy', 'v'),
 ('exuberate', 'v'),
 ('triumph', 'n'),
 ('joyfulness', 'n'),
 ('transport', 'n'),
 ('jubilate', 'v'),
 ('wallow', 'n'),
 ('revel', 'n'),
 ('exult', 'v'),
 ('pleasure', 'n'),
 ('enthrall', 'v'),
 ('enthral', 'v'),
 ('please', 'v'),
 ('enrapture', 'v'),
 ('rejoice', 'v'),
 ('joy', 'n'),
 ('joyousness', 'n'),
 ('enchant', 'v'),
 ('delight', 'n')]

In [177]:
def getHypernyms(word):
    synsets = wn.synsets(word)
    hypernyms = []
    for synset in synsets:
        for hypernym in synset.hypernyms():
            hypernyms.append((hypernym.name().split('.')[0], hypernym.pos()))
    return list(set(hypernyms))

getHypernyms('joy')

[('positive_stimulus', 'n'), ('feel', 'v'), ('emotion', 'n')]

In [178]:
def getHyponyms(word):
    synsets = wn.synsets(word)
    hyponyms = []
    for synset in synsets:
        for hyponym in synset.hyponyms():
            hyponyms.append((hyponym.name().split('.')[0], hyponym.pos()))
    return list(set(hyponyms))

getHyponyms('joy')

[('exult', 'v'),
 ('elation', 'n'),
 ('gladden', 'v'),
 ('overjoy', 'v'),
 ('exultation', 'n'),
 ('exuberance', 'n'),
 ('exhilaration', 'n'),
 ('cheer', 'v')]

In [179]:
def getDerivedWords(word):
    synsets = wn.synsets(word)
    derived_words = []
    for synset in synsets:
        derived_words.append((word, synset.pos()))
        for lemma in synset.lemmas():
            for related_lemma in lemma.derivationally_related_forms():
                derived_words.append((related_lemma.name(), synset.pos()))

    return list(set(derived_words))
getDerivedWords('joy')


[('please', 'n'),
 ('joy', 'v'),
 ('rejoicing', 'v'),
 ('joyous', 'n'),
 ('joyful', 'n'),
 ('delight', 'n'),
 ('joy', 'n')]

In [185]:
extended_lexicon = {}

for word in list(nrc_lexicon.keys()):
    emotions = nrc_lexicon[word]
    for symnonym in getSynonyms(word):
        if symnonym not in extended_lexicon.keys():
            extended_lexicon[symnonym] = emotions
    for hypernym in getHypernyms(word):
        if hypernym not in extended_lexicon.keys():
            extended_lexicon[hypernym] = emotions

    for hyponym in getHyponyms(word):
        if hyponym not in extended_lexicon.keys():
            extended_lexicon[hyponym] = emotions

    for derived_words in getDerivedWords(word):
        if derived_words not in extended_lexicon.keys():
            extended_lexicon[derived_words] = emotions

extended_lexicon

{('tablet', 'n'): ['trust'],
 ('calculator', 'n'): ['trust'],
 ('abacus', 'n'): ['trust'],
 ('vehemence', 'n'): ['fear', 'negative', 'sadness'],
 ('vacate', 'v'): ['fear', 'negative', 'sadness'],
 ('cede', 'v'): ['fear', 'negative', 'sadness'],
 ('forsake', 'v'): ['fear', 'negative', 'sadness'],
 ('empty', 'n'): ['fear', 'negative', 'sadness'],
 ('void', 'n'): ['fear', 'negative', 'sadness'],
 ('unconstraint', 'n'): ['fear', 'negative', 'sadness'],
 ('allow', 'v'): ['fear', 'negative', 'sadness'],
 ('fierceness', 'n'): ['fear', 'negative', 'sadness'],
 ('stop', 'n'): ['fear', 'negative', 'sadness'],
 ('part_with', 'v'): ['fear', 'negative', 'sadness'],
 ('throw_in_the_towel', 'v'): ['fear', 'negative', 'sadness'],
 ('lay_off', 'v'): ['fear', 'negative', 'sadness'],
 ('throw_in', 'v'): ['fear', 'negative', 'sadness'],
 ('furiousness', 'n'): ['fear', 'negative', 'sadness'],
 ('hollow', 'n'): ['fear', 'negative', 'sadness'],
 ('cease', 'n'): ['fear', 'negative', 'sadness'],
 ('surrender',