# Topic Top N
### Once we have our topics, we want to look at the Top N words for each topic. We also want to expand these words by including their synonyms, hyponyms, hypernyms, meronyms, holonyms & entailments.

In [1]:
import gensim
import json
import nltk

from nltk.corpus import wordnet as wn



In [2]:
# Load LDA model
lda = gensim.models.ldamodel.LdaModel.load('../../data/topic-model/lda/exp4.model')

# This creates and returns a dictionary with the topics (1-15) as the keys. The values will be the top N words of each topic.
# Top N words = 30
topic_dict = {'Topic ' + str((i+1)): [token for token, score in lda.show_topic(i, topn=30)] for i in range(0, lda.num_topics)}

In [3]:
def wordnet(list):
    '''
    This function takes in a list of tokens and gets the synonyms, hyponyms, hypernyms, meronyms, holonyms & entailments for each token.
    Duplicates are avoided by using set().
    Returns them as a set.
    '''
    wordnet = set()
    for token in list:
        for synset in wn.synsets(token):
            for lemma in synset.lemmas():
                wordnet.add(lemma.name())
            for hypernym in synset.hypernyms():
                for lemma in hypernym.lemma_names():
                    wordnet.add(lemma)
            for hyponym in synset.hyponyms():
                for lemma in hyponym.lemma_names():
                    wordnet.add(lemma)
            for meronym in synset.part_meronyms():
                for lemma in meronym.lemma_names():
                    wordnet.add(lemma)
            for holonym in synset.part_holonyms():
                for lemma in holonym.lemma_names():
                    wordnet.add(lemma)
            for entailment in synset.entailments():
                for lemma in entailment.lemma_names():
                    wordnet.add(lemma)

    return wordnet 

In [4]:
# Expand the Top N words
expanded_topic_dict = {}
for key,value in topic_dict.items():
    expanded_topic_dict[key] = list(wordnet(value))

In [5]:
# Save the expanded topic dictionary in JSON format
# Serialize data into file:
json.dump(expanded_topic_dict, open("../../data/topic-model/topN/exp4.json", 'w'))