# Topic Top N
### Once we have our topics, we want to look at the Top N words for each topic. We also want to expand these words by including their synonyms, hyponyms, hypernyms, meronyms, holonyms & entailments.

In [1]:
import gensim

# Load LDA model
lda = gensim.models.ldamodel.LdaModel.load('../../data/topic-model/models/exp3.model')

# This creates and returns a dictionary with the topics (1-15) as the keys. The values will be the top N words of each topic.
# Top N words = 30
topic_dict = {'Topic ' + str((i+1)): [token for token, score in lda.show_topic(i, topn=30)] for i in range(0, lda.num_topics)}

In [2]:
print (topic_dict)

{'Topic 1': ['heat', 'minute', 'cook', 'stir', 'boil', 'stirring', 'water', 'medium', 'bring', 'simmer', 'large', 'saucepan', 'skillet', 'sauce', 'remove', 'reduce', 'cover', 'tender', 'mixture', 'pan', 'salt', 'butter', 'pasta', 'drain', 'serve', 'cup', 'low', 'pot', 'occasionally', 'oil'], 'Topic 2': ['mix', 'ingredient', 'well', 'refrigerate', 'enjoy', 'serve', 'everything', 'glaze', 'chilled', 'ham', 'eat', 'bowl', 'better', 'thoroughly', 'usually', 'chip', 'instead', 'recipe', 'feel', 'dessert', 'dinner', 'either', 'ive', 'throughout', 'proceed', 'used', 'grab', 'longer', 'fork', 'extremely'], 'Topic 3': ['water', 'tortilla', 'time', 'cup', 'surface', 'hot', 'lid', 'piece', 'jar', 'knead', 'white', 'hand', 'size', 'keep', 'fold', 'whip', 'salsa', 'need', 'recipe', 'cabbage', 'bag', 'point', 'cocoa', 'sit', 'much', 'tightly', 'liquid', 'leave', 'start', 'used'], 'Topic 4': ['pan', 'cream', 'beat', 'cake', 'cool', 'chocolate', 'mixture', 'egg', 'bowl', 'sugar', 'butter', 'set', 'van

In [5]:
import nltk
from nltk.corpus import wordnet as wn

def wordnet(list):
    '''
    This function takes in a list of tokens and gets the synonyms, hyponyms, hypernyms, meronyms, holonyms & entailments for each token.
    Duplicates are avoided by using set().
    Returns them as a set.
    '''
    wordnet = set()
    for token in list:
        for synset in wn.synsets(token):
            for lemma in synset.lemmas():
                wordnet.add(lemma.name())
            for hypernym in synset.hypernyms():
                for lemma in hypernym.lemma_names():
                    wordnet.add(lemma)
            for hyponym in synset.hyponyms():
                for lemma in hyponym.lemma_names():
                    wordnet.add(lemma)
            for meronym in synset.part_meronyms():
                for lemma in meronym.lemma_names():
                    wordnet.add(lemma)
            for holonym in synset.part_holonyms():
                for lemma in holonym.lemma_names():
                    wordnet.add(lemma)
            for entailment in synset.entailments():
                for lemma in entailment.lemma_names():
                    wordnet.add(lemma)

    return wordnet 

In [7]:
# Expand the Top N words
expanded_topic_dict = {}
for key,value in topic_dict.items():
    expanded_topic_dict[key] = list(wordnet(value))
print (expanded_topic_dict)

{'Topic 1': ['low_gear', 'mammal_genus', 'cover_version', 'abbreviate', 'alphabet_soup', 'linkboy', 'belittle', 'expectant', 'shake_up', 'on_occasion', 'plank_over', 'aether', 'oil_colour', 'even_up', 'food', 'invalid', 'delocalize', 'carry_off', 'overlap', 'flowage', 'heat', 'estrus', 'moment', 'enliven', 'officiate', 'trophy', 'Dutch_oven', 'miscellany', 'metal', 'reservoir', 'piss', 'batter', 'Bercy_butter', 'stakes', 'chocolate_sauce', 'emotionalism', 'desulphurize', 'cream', 'foster', 'steam_boiler', 'plant_organ', 'almond_oil', 'limewater', 'disturbance', 'terminate', 'create_from_raw_material', 'assure', 'ace', 'frost', 'transmission', 'grease', 'railcar', 'incubate', 'matte', 'oil_color', 'bring_down', 'silicate', 'locomote', 'demineralize', 'fright', 'gift', 'helper', 'sizzle', 'culmination', 'irrigate', 'adjoin', 'counterbalance', 'piping', 'secrete', 'clear_away', 'disembowel', 'take_up', 'subtract', 'coffeepot', 'obscure', 'stockpot', 'common_salt', 'falsify', 'garment', 'i

In [9]:
# Save the expanded topic dictionary in JSON format
import json

# Serialize data into file:
json.dump(expanded_topic_dict, open("../../data/topic-model/topN/exp3.json", 'w'))