In [1]:
# Heavily cribbed from https://towardsdatascience.com/topic-modeling-and-latent-dirichlet-allocation-in-python-9bf156893c24
# and https://towardsdatascience.com/topic-modelling-in-python-with-nltk-and-gensim-4ef03213cd21

# Import necessary items from the Classical Languages Toolkit and elsewhere for cleaning the data.
import gensim
from cltk.tokenize.word import nltk_tokenize_words
from cltk.tokenize.sentence import TokenizeSentence
tokenizer = TokenizeSentence('latin')
from cltk.stop.latin import CorpusStoplist
from cltk.stop.latin import STOPS_LIST
from cltk.stem.lemma import LemmaReplacer
from cltk.stem.latin.j_v import JVReplacer

jv_replacer = JVReplacer()
tokenizer = TokenizeSentence('latin')
lemmatizer = LemmaReplacer('latin')

In [2]:
# A function to prepare the raw text and turn it into tokens.

def prepare_text(text):
    # tokenize the individual words
    tokens = nltk_tokenize_words(text)
    # remove any tokens smaller than 4 characters in length
    tokens = [token for token in tokens if len(token) > 4]
    # replace j and v with i and u.
    tokens = [jv_replacer.replace(token) for token in tokens]
    # make all words lower-case
    tokens = [token.lower() for token in tokens]
    # remove punctuation
    tokens = [token for token in tokens if token not in ['.', ',','!','?','"',':', ';','[',']']]
    # remove line numbers
    tokens = [x for x in tokens if not (x.isdigit() or x[0] == '-' and x[1:].isdigit())]
    # turn all of the tokens into their dictionary forms
    tokens = [''.join(lemmatizer.lemmatize(token)) for token in tokens]
    # remove common words
    tokens = [token for token in tokens if token not in STOPS_LIST]
    return tokens

In [3]:
# We know from experience that the data include certain words that are generic, so we remove them here.
add_stops = ['noster','nos','ille','quoque','primus']
for stop in add_stops:
    STOPS_LIST.append(stop)

In [24]:
# Open the text files and process them.
import random
import os, glob
text_data = []
folder_path = '/home/sjhuskey/Dropbox/What-is-digital-latin/texts'
for filename in glob.glob(os.path.join(folder_path, '*.txt')):
    with open(filename, 'r') as f:
        text = f.read()
        tokens = prepare_text(text)
        if random.random() > .8: # What does this do?
            text_data.append(tokens)

In [25]:
text_data

[['pasco',
  'musam',
  'damonis',
  'alphesiboei',
  'immemor',
  'herba',
  'miro',
  'juvencus1',
  'certo2',
  'qui1',
  'stupefacio',
  'carmen1',
  'lynces',
  'muto1',
  're-queo',
  'flumen',
  'curro',
  'damonis',
  'musam',
  'dico2',
  'alphesiboei',
  'magnus',
  'supero',
  'timaui',
  'illyrici',
  'lego2',
  'aequor',
  'umquam',
  'liceo1',
  'dico2',
  'facio',
  'liceo1',
  'totus1',
  'fero',
  'orbo',
  'sophocleo',
  'carmen1',
  'digno',
  'coturno',
  'principium',
  'desino',
  'accipio',
  'jubeo',
  'carmen1',
  'coepio',
  'tempus',
  'circos',
  'victrix',
  'hedera',
  'serpo',
  'laurus',
  'frigidum',
  'caelo',
  'nox',
  'decedo',
  'umbro',
  'tener',
  'pecus1',
  'gratus',
  'herba',
  'incumbo',
  'teres',
  'damon',
  'coepio',
  'oliva',
  'nasco',
  'prae',
  'venio',
  'lucifer',
  'almus',
  'conjunx',
  'indignus',
  'nysae',
  'decipio',
  'amor',
  'queror',
  'divus',
  'testis1',
  'proficio',
  'extremus',
  'morior',
  'alloquor',
  'in

In [26]:
# Do a basic count of the words in the texts, just to get a rough idea of the data.
from collections import Counter
text_list = []
for i in text_data:
    for word in i:
        text_list.append(word)
Counter(text_list).most_common(20)

[('carmen1', 50),
 ('duco', 46),
 ('incipio', 25),
 ('venio', 24),
 ('daphnim', 24),
 ('verro', 21),
 ('maenalios', 20),
 ('tibia', 20),
 ('dico2', 19),
 ('domus', 19),
 ('omne', 18),
 ('silva', 18),
 ('canto', 16),
 ('fero', 15),
 ('amor', 15),
 ('video', 13),
 ('pasco', 12),
 ('saepis', 11),
 ('herba', 10),
 ('lego2', 10)]

In [27]:
# Make a dictionary out of the data
from gensim import corpora
dictionary = corpora.Dictionary(text_data)

In [28]:
# Turn the dictionary into a "bag of words" corpus.
corpus = [dictionary.doc2bow(text) for text in text_data]

In [29]:
# Display the "bag of words" corpus.
corpus

[[(0, 2),
  (1, 1),
  (2, 1),
  (3, 1),
  (4, 1),
  (5, 1),
  (6, 1),
  (7, 1),
  (8, 1),
  (9, 1),
  (10, 2),
  (11, 1),
  (12, 3),
  (13, 1),
  (14, 3),
  (15, 2),
  (16, 3),
  (17, 1),
  (18, 1),
  (19, 1),
  (20, 2),
  (21, 1),
  (22, 1),
  (23, 1),
  (24, 1),
  (25, 1),
  (26, 1),
  (27, 1),
  (28, 1),
  (29, 1),
  (30, 1),
  (31, 1),
  (32, 2),
  (33, 1),
  (34, 1),
  (35, 1),
  (36, 1),
  (37, 1),
  (38, 1),
  (39, 16),
  (40, 1),
  (41, 2),
  (42, 1),
  (43, 1),
  (44, 2),
  (45, 3),
  (46, 1),
  (47, 2),
  (48, 1),
  (49, 1),
  (50, 1),
  (51, 1),
  (52, 1),
  (53, 2),
  (54, 1),
  (55, 1),
  (56, 1),
  (57, 1),
  (58, 2),
  (59, 3),
  (60, 2),
  (61, 1),
  (62, 1),
  (63, 1),
  (64, 2),
  (65, 2),
  (66, 3),
  (67, 1),
  (68, 12),
  (69, 1),
  (70, 2),
  (71, 1),
  (72, 1),
  (73, 1),
  (74, 1),
  (75, 1),
  (76, 3),
  (77, 1),
  (78, 1),
  (79, 2),
  (80, 2),
  (81, 1),
  (82, 1),
  (83, 2),
  (84, 1),
  (85, 9),
  (86, 20),
  (87, 1),
  (88, 2),
  (89, 1),
  (90, 1),
  (91,

In [30]:
# Save the dictionary
import pickle
pickle.dump(corpus, open('corpus.pkl', 'wb'))
dictionary.save('dictionary.gensim')

In [31]:
# Creat TF-IDF model object
from gensim import corpora, models

tfidf = models.TfidfModel(corpus)
corpus_tfidf = tfidf[corpus]

from pprint import pprint

for doc in corpus_tfidf:
    pprint(doc)
    break

[(0, 0.034401548797712705),
 (1, 0.030853797126571693),
 (2, 0.030853797126571693),
 (3, 0.030853797126571693),
 (4, 0.030853797126571693),
 (5, 0.030853797126571693),
 (6, 0.030853797126571693),
 (7, 0.030853797126571693),
 (8, 0.030853797126571693),
 (9, 0.030853797126571693),
 (10, 0.061707594253143386),
 (11, 0.030853797126571693),
 (12, 0.09256139137971507),
 (13, 0.030853797126571693),
 (14, 0.09256139137971507),
 (15, 0.034401548797712705),
 (16, 0.02254140183739283),
 (17, 0.030853797126571693),
 (18, 0.030853797126571693),
 (19, 0.017200774398856353),
 (20, 0.034401548797712705),
 (21, 0.007513800612464278),
 (22, 0.030853797126571693),
 (23, 0.007513800612464278),
 (24, 0.017200774398856353),
 (25, 0.017200774398856353),
 (26, 0.017200774398856353),
 (27, 0.030853797126571693),
 (28, 0.017200774398856353),
 (29, 0.030853797126571693),
 (30, 0.030853797126571693),
 (31, 0.030853797126571693),
 (32, 0.034401548797712705),
 (33, 0.017200774398856353),
 (34, 0.030853797126571693)

In [32]:
# Train the lda model using gensim.models.LdaMulticore and save it to ‘lda_model’
lda_model = gensim.models.LdaMulticore(corpus, num_topics=4, id2word=dictionary, passes=2, workers=2)

lda_model.save('model1.gensim')

for idx, topic in lda_model.print_topics(-1):
    print('Topic: {} \nWords: {}'.format(idx, topic))

Topic: 0 
Words: 0.007*"video" + 0.007*"carmen1" + 0.006*"silva" + 0.006*"omne" + 0.006*"specto" + 0.006*"corydon" + 0.005*"fero" + 0.005*"harena" + 0.005*"cerno" + 0.005*"venio"
Topic: 1 
Words: 0.007*"duco" + 0.006*"carmen1" + 0.005*"daphnim" + 0.005*"incipio" + 0.005*"domus" + 0.004*"venio" + 0.004*"tibia" + 0.004*"maenalios" + 0.003*"verro" + 0.003*"fero"
Topic: 2 
Words: 0.015*"carmen1" + 0.013*"duco" + 0.008*"dico2" + 0.008*"incipio" + 0.007*"venio" + 0.006*"silva" + 0.006*"omne" + 0.006*"amor" + 0.006*"canto" + 0.005*"verro"
Topic: 3 
Words: 0.021*"carmen1" + 0.021*"duco" + 0.013*"daphnim" + 0.011*"maenalios" + 0.011*"tibia" + 0.010*"verro" + 0.010*"incipio" + 0.010*"domus" + 0.009*"venio" + 0.006*"omne"


  score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
  score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
  score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
  score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
  score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
  score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
  score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
  score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
  score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
  score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)


In [33]:
# Run LDA using TF-IDF
lda_model_tfidf = gensim.models.LdaMulticore(corpus_tfidf, num_topics=4, id2word=dictionary, passes=2, workers=2)

lda_model_tfidf.save('model2.gensim')

for idx, topic in lda_model_tfidf.print_topics(-1):
    print('Topic: {} Word: {}'.format(idx, topic))

Topic: 0 Word: 0.001*"daphnim" + 0.001*"maenalios" + 0.001*"duco" + 0.001*"verro" + 0.001*"tibia" + 0.001*"incipio" + 0.001*"domus" + 0.001*"carmen1" + 0.001*"mater" + 0.001*"necto"
Topic: 1 Word: 0.003*"duco" + 0.003*"daphnim" + 0.003*"verro" + 0.002*"tibia" + 0.002*"maenalios" + 0.002*"domus" + 0.002*"incipio" + 0.002*"carmen1" + 0.002*"lycida" + 0.002*"gallus1"
Topic: 2 Word: 0.002*"corydon" + 0.002*"specto" + 0.002*"cerno" + 0.002*"harena" + 0.002*"alexi" + 0.002*"spectaculum" + 0.001*"quivis" + 0.001*"ardeo" + 0.001*"thestylis" + 0.001*"flos"
Topic: 3 Word: 0.001*"gallus1" + 0.001*"duco" + 0.001*"verro" + 0.001*"maenalios" + 0.001*"daphnim" + 0.001*"tibia" + 0.001*"domus" + 0.001*"carmen1" + 0.001*"inquam" + 0.001*"mater"


  score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
  score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
  score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
  score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
  score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
  score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
  score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
  score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
  score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
  score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)


In [34]:
# Test an unseen document against the lda models. This is one of Boccaccio's bucolic poems, so it should have a high score.

unseen_document1 = open('/home/sjhuskey/Dropbox/What-is-digital-latin/test_texts/boccaccio1.txt')
unseen_text1 = unseen_document1.read()
bow_vector = dictionary.doc2bow(prepare_text(unseen_text1))

for index, score in sorted(lda_model[bow_vector], key=lambda tup: -1*tup[1]):
    print("LDA Score: {}\t Topic: {}".format(score, lda_model.print_topic(index, 5)))
    
for index, score in sorted(lda_model_tfidf[bow_vector], key=lambda tup: -1*tup[1]):
    print("\nTF-IDF Score: {}\t \nTopic: {}".format(score, lda_model_tfidf.print_topic(index, 5)))

LDA Score: 0.7038761377334595	 Topic: 0.015*"carmen1" + 0.013*"duco" + 0.008*"dico2" + 0.008*"incipio" + 0.007*"venio"
LDA Score: 0.2369382679462433	 Topic: 0.021*"carmen1" + 0.021*"duco" + 0.013*"daphnim" + 0.011*"maenalios" + 0.011*"tibia"
LDA Score: 0.05832018330693245	 Topic: 0.007*"video" + 0.007*"carmen1" + 0.006*"silva" + 0.006*"omne" + 0.006*"specto"

TF-IDF Score: 0.67042475938797	 
Topic: 0.003*"duco" + 0.003*"daphnim" + 0.003*"verro" + 0.002*"tibia" + 0.002*"maenalios"

TF-IDF Score: 0.3277927041053772	 
Topic: 0.002*"corydon" + 0.002*"specto" + 0.002*"cerno" + 0.002*"harena" + 0.002*"alexi"


In [35]:
# Test an unseen document against the lda models. This is another one of Boccaccio's bucolic poems, so it should have a high score.

unseen_document2 = open('/home/sjhuskey/Dropbox/What-is-digital-latin/test_texts/boccaccio2.txt')
unseen_text2 = unseen_document2.read()
bow_vector = dictionary.doc2bow(prepare_text(unseen_text2))

for index, score in sorted(lda_model[bow_vector], key=lambda tup: -1*tup[1]):
    print("LDA Score: {}\t Topic: {}".format(score, lda_model.print_topic(index, 5)))
    
for index, score in sorted(lda_model_tfidf[bow_vector], key=lambda tup: -1*tup[1]):
    print("\nTF-IDF Score: {}\t \nTopic: {}".format(score, lda_model_tfidf.print_topic(index, 5)))

LDA Score: 0.6970363259315491	 Topic: 0.015*"carmen1" + 0.013*"duco" + 0.008*"dico2" + 0.008*"incipio" + 0.007*"venio"
LDA Score: 0.16693300008773804	 Topic: 0.021*"carmen1" + 0.021*"duco" + 0.013*"daphnim" + 0.011*"maenalios" + 0.011*"tibia"
LDA Score: 0.13524051010608673	 Topic: 0.007*"video" + 0.007*"carmen1" + 0.006*"silva" + 0.006*"omne" + 0.006*"specto"

TF-IDF Score: 0.5389984250068665	 
Topic: 0.003*"duco" + 0.003*"daphnim" + 0.003*"verro" + 0.002*"tibia" + 0.002*"maenalios"

TF-IDF Score: 0.45936939120292664	 
Topic: 0.002*"corydon" + 0.002*"specto" + 0.002*"cerno" + 0.002*"harena" + 0.002*"alexi"


In [36]:
# Test another unseen document against the lda models. This is a letter by Boccaccio, so it shouldn't have a high score.

unseen_document3 = open('/home/sjhuskey/Dropbox/What-is-digital-latin/test_texts/boccaccioEp1.txt')
unseen_text3 = unseen_document3.read()
bow_vector = dictionary.doc2bow(prepare_text(unseen_text3))

for index, score in sorted(lda_model[bow_vector], key=lambda tup: -1*tup[1]):
    print("LDA Score: {}\t Topic: {}".format(score, lda_model.print_topic(index, 5)))
    
for index, score in sorted(lda_model_tfidf[bow_vector], key=lambda tup: -1*tup[1]):
    print("\nTF-IDF Score: {}\t \nTopic: {}".format(score, lda_model_tfidf.print_topic(index, 5)))

LDA Score: 0.5438342094421387	 Topic: 0.015*"carmen1" + 0.013*"duco" + 0.008*"dico2" + 0.008*"incipio" + 0.007*"venio"
LDA Score: 0.3289404511451721	 Topic: 0.021*"carmen1" + 0.021*"duco" + 0.013*"daphnim" + 0.011*"maenalios" + 0.011*"tibia"
LDA Score: 0.12276345491409302	 Topic: 0.007*"video" + 0.007*"carmen1" + 0.006*"silva" + 0.006*"omne" + 0.006*"specto"

TF-IDF Score: 0.6371163725852966	 
Topic: 0.003*"duco" + 0.003*"daphnim" + 0.003*"verro" + 0.002*"tibia" + 0.002*"maenalios"

TF-IDF Score: 0.35368505120277405	 
Topic: 0.002*"corydon" + 0.002*"specto" + 0.002*"cerno" + 0.002*"harena" + 0.002*"alexi"


In [37]:
# Test another unseen document against the lda models. This is a theological text.

unseen_document4 = open('/home/sjhuskey/Dropbox/What-is-digital-latin/test_texts/lactantius.txt')
unseen_text4 = unseen_document4.read()
bow_vector = dictionary.doc2bow(prepare_text(unseen_text4))

for index, score in sorted(lda_model[bow_vector], key=lambda tup: -1*tup[1]):
    print("LDA Score: {}\t Topic: {}".format(score, lda_model.print_topic(index, 5)))
    
for index, score in sorted(lda_model_tfidf[bow_vector], key=lambda tup: -1*tup[1]):
    print("\nTF-IDF Score: {}\t \nTopic: {}".format(score, lda_model_tfidf.print_topic(index, 5)))

LDA Score: 0.5804693102836609	 Topic: 0.015*"carmen1" + 0.013*"duco" + 0.008*"dico2" + 0.008*"incipio" + 0.007*"venio"
LDA Score: 0.31151944398880005	 Topic: 0.021*"carmen1" + 0.021*"duco" + 0.013*"daphnim" + 0.011*"maenalios" + 0.011*"tibia"
LDA Score: 0.10793621838092804	 Topic: 0.007*"video" + 0.007*"carmen1" + 0.006*"silva" + 0.006*"omne" + 0.006*"specto"

TF-IDF Score: 0.6940121650695801	 
Topic: 0.003*"duco" + 0.003*"daphnim" + 0.003*"verro" + 0.002*"tibia" + 0.002*"maenalios"

TF-IDF Score: 0.3058326244354248	 
Topic: 0.002*"corydon" + 0.002*"specto" + 0.002*"cerno" + 0.002*"harena" + 0.002*"alexi"


In [38]:
# Test a short passage against the lda models. This is passage from Caesar's commentary on the civil war.

unseen_text5 = '''Litteris C. Caesaris consulibus redditis aegre ab his impetratum est summa tribunorum plebis contentione, ut in senatu recitarentur; ut vero ex litteris ad senatum referretur, impetrari non potuit. Referunt consules de re publica [in civitate]. [Incitat] L. Lentulus consul senatu rei publicae se non defuturum pollicetur, si audacter ac fortiter sententias dicere velint; sin Caesarem respiciant atque eius gratiam sequantur, ut superioribus fecerint temporibus, se sibi consilium capturum neque senatus auctoritati obtemperaturum: habere se quoque ad Caesaris gratiam atque amicitiam receptum.'''
bow_vector = dictionary.doc2bow(prepare_text(unseen_text5))

for index, score in sorted(lda_model[bow_vector], key=lambda tup: -1*tup[1]):
    print("LDA Score: {}\t Topic: {}".format(score, lda_model.print_topic(index, 5)))
    
for index, score in sorted(lda_model_tfidf[bow_vector], key=lambda tup: -1*tup[1]):
    print("\nTF-IDF Score: {}\t \nTopic: {}".format(score, lda_model_tfidf.print_topic(index, 5)))

LDA Score: 0.6353273391723633	 Topic: 0.015*"carmen1" + 0.013*"duco" + 0.008*"dico2" + 0.008*"incipio" + 0.007*"venio"
LDA Score: 0.32750403881073	 Topic: 0.021*"carmen1" + 0.021*"duco" + 0.013*"daphnim" + 0.011*"maenalios" + 0.011*"tibia"
LDA Score: 0.02126004360616207	 Topic: 0.007*"video" + 0.007*"carmen1" + 0.006*"silva" + 0.006*"omne" + 0.006*"specto"
LDA Score: 0.015908580273389816	 Topic: 0.007*"duco" + 0.006*"carmen1" + 0.005*"daphnim" + 0.005*"incipio" + 0.005*"domus"

TF-IDF Score: 0.4888608455657959	 
Topic: 0.003*"duco" + 0.003*"daphnim" + 0.003*"verro" + 0.002*"tibia" + 0.002*"maenalios"

TF-IDF Score: 0.47795218229293823	 
Topic: 0.002*"corydon" + 0.002*"specto" + 0.002*"cerno" + 0.002*"harena" + 0.002*"alexi"

TF-IDF Score: 0.01660105772316456	 
Topic: 0.001*"gallus1" + 0.001*"duco" + 0.001*"verro" + 0.001*"maenalios" + 0.001*"daphnim"

TF-IDF Score: 0.01658589206635952	 
Topic: 0.001*"daphnim" + 0.001*"maenalios" + 0.001*"duco" + 0.001*"verro" + 0.001*"tibia"


In [39]:
# Visualize topics.
dictionary = gensim.corpora.Dictionary.load('dictionary.gensim')
corpus = pickle.load(open('corpus.pkl', 'rb'))
lda = gensim.models.ldamodel.LdaModel.load('model1.gensim')

import pyLDAvis.gensim
lda_display = pyLDAvis.gensim.prepare(lda, corpus, dictionary, sort_topics=False)
pyLDAvis.display(lda_display)

# Saliency: a measure of how much the term tells you about the topic.

# Relevance: a weighted average of the probability of the word given the topic and the word given the topic normalized by the probability of the topic.

# The size of the bubble measures the importance of the topics, relative to the data.

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  return pd.concat([default_term_info] + list(topic_dfs))


In [40]:
# Visualize topics.
dictionary = gensim.corpora.Dictionary.load('dictionary.gensim')
corpus = pickle.load(open('corpus.pkl', 'rb'))
lda = gensim.models.ldamodel.LdaModel.load('model2.gensim')

import pyLDAvis.gensim
lda_display = pyLDAvis.gensim.prepare(lda, corpus, dictionary, sort_topics=False)
pyLDAvis.display(lda_display)

# Saliency: a measure of how much the term tells you about the topic.

# Relevance: a weighted average of the probability of the word given the topic and the word given the topic normalized by the probability of the topic.

# The size of the bubble measures the importance of the topics, relative to the data.

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  return pd.concat([default_term_info] + list(topic_dfs))
