# LDA Model Training

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

## Data

In [2]:
data_file_path='DATA.csv'
data=pd.read_csv(data_file_path)
data

Unnamed: 0,PeakID,Distance to TSS,motif_string
0,seq_40800,-81,"motif_149,motif_162,motif_166,motif_166,motif_..."
1,seq_10314,-228,motif_144
2,seq_18866,9616,"motif_104,motif_126,motif_126,motif_147,motif_..."
3,seq_45348,5770,"motif_120,motif_121,motif_128,motif_128,motif_..."
4,seq_2616,-12540,"motif_100,motif_103,motif_126,motif_126,motif_..."
...,...,...,...
60095,seq_5507,-127293,"motif_137,motif_137,motif_13,motif_174,motif_1..."
60096,seq_11070,37721,"motif_132,motif_132,motif_132,motif_142,motif_..."
60097,seq_17945,88589,"motif_161,motif_162,motif_162,motif_174,motif_..."
60098,seq_29137,93216,"motif_104,motif_105,motif_117,motif_117,motif_..."


In [3]:
print(data[data['motif_string'].isna()])
data.dropna(subset=['motif_string'],inplace=True)
data['motif_list']=data['motif_string'].apply(lambda x:x.split(','))
data

          PeakID  Distance to TSS motif_string
18840  seq_57232             -180          NaN
23892  seq_20216             -147          NaN
29667  seq_31231              483          NaN
46832   seq_8465             -341          NaN
47219  seq_46451             4737          NaN


Unnamed: 0,PeakID,Distance to TSS,motif_string,motif_list
0,seq_40800,-81,"motif_149,motif_162,motif_166,motif_166,motif_...","[motif_149, motif_162, motif_166, motif_166, m..."
1,seq_10314,-228,motif_144,[motif_144]
2,seq_18866,9616,"motif_104,motif_126,motif_126,motif_147,motif_...","[motif_104, motif_126, motif_126, motif_147, m..."
3,seq_45348,5770,"motif_120,motif_121,motif_128,motif_128,motif_...","[motif_120, motif_121, motif_128, motif_128, m..."
4,seq_2616,-12540,"motif_100,motif_103,motif_126,motif_126,motif_...","[motif_100, motif_103, motif_126, motif_126, m..."
...,...,...,...,...
60095,seq_5507,-127293,"motif_137,motif_137,motif_13,motif_174,motif_1...","[motif_137, motif_137, motif_13, motif_174, mo..."
60096,seq_11070,37721,"motif_132,motif_132,motif_132,motif_142,motif_...","[motif_132, motif_132, motif_132, motif_142, m..."
60097,seq_17945,88589,"motif_161,motif_162,motif_162,motif_174,motif_...","[motif_161, motif_162, motif_162, motif_174, m..."
60098,seq_29137,93216,"motif_104,motif_105,motif_117,motif_117,motif_...","[motif_104, motif_105, motif_117, motif_117, m..."


In [4]:
from gensim.corpora import Dictionary

docs=data['motif_list'].values
dictionary=Dictionary(docs)

print('Motif-Index to Motif-Name Mapping:')
for i,v in dictionary.items():
    print(f'{i} - {v}')
    if i==10:
        break
    

Motif-Index to Motif-Name Mapping:
0 - motif_149
1 - motif_162
2 - motif_166
3 - motif_174
4 - motif_199
5 - motif_230
6 - motif_248
7 - motif_295
8 - motif_60
9 - motif_66
10 - motif_98


In [5]:
corpus = [dictionary.doc2bow(doc) for doc in docs]
print('BOW (Sequence-0):')
docs[0],corpus[0]

BOW (Sequence-0):


(['motif_149',
  'motif_162',
  'motif_166',
  'motif_166',
  'motif_174',
  'motif_199',
  'motif_230',
  'motif_248',
  'motif_295',
  'motif_295',
  'motif_60',
  'motif_60',
  'motif_66',
  'motif_98'],
 [(0, 1),
  (1, 1),
  (2, 2),
  (3, 1),
  (4, 1),
  (5, 1),
  (6, 1),
  (7, 2),
  (8, 2),
  (9, 1),
  (10, 1)])

## Training

In [6]:
# #setup logging for trainging metrics 
# import logging
# logging.basicConfig(filename='test_output/model_callbacks.log', filemode='w',
#                     format="%(asctime)s:%(levelname)s:%(message)s",
#                     level=logging.NOTSET)

# from gensim.models.callbacks import Callback,PerplexityMetric, ConvergenceMetric, CoherenceMetric
# perplexity_logger = PerplexityMetric(corpus=corpus, logger='shell')
# convergence_logger = ConvergenceMetric(logger='shell')
# # coherence_cv_logger = CoherenceMetric(corpus=corpus, coherence = 'c_v', texts = docs)

# %%time
# from gensim.models import LdaModel,LdaMulticore

# #HYPERPARAMETERS
# #passes = epochs
# temp = dictionary[0]
# id2word = dictionary.id2token
# lda = LdaModel(corpus, id2word=id2word, alpha='auto',eval_every = 1,\
#                eta='auto',num_topics=3, iterations=5, passes = 10,
#               callbacks=[perplexity_logger,convergence_logger])

# lda.print_topics()

# %%time
# from gensim.models import LdaModel,LdaMulticore

# id2word = dictionary.id2token
# lda = LdaMulticore(corpus, id2word=id2word,eval_every = 1,\
#                eta='auto',num_topics=3, iterations=500, passes = 100)

# import pyLDAvis.gensim

# pyLDAvis.enable_notebook()
# pyLDAvis.gensim.prepare(lda, corpus, dictionary)

In [8]:
%%time
import logging
from gensim.models.callbacks import Callback,PerplexityMetric, ConvergenceMetric, CoherenceMetric

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)
convergence_logger = ConvergenceMetric(logger='shell')
from gensim.models import LdaModel,LdaMulticore

temp = dictionary[0]
id2word = dictionary.id2token
lda = LdaModel(corpus, id2word=id2word, alpha='auto',chunksize=10000,
               eta='auto',num_topics=3, iterations=1000, passes = 5,
              minimum_probability=0.0,callbacks=[convergence_logger])

2021-02-22 02:13:16,962 : INFO : using autotuned alpha, starting with [0.33333334, 0.33333334, 0.33333334]
2021-02-22 02:13:16,963 : INFO : using serial LDA version on this node
2021-02-22 02:13:16,965 : INFO : running online (multi-pass) LDA training, 3 topics, 5 passes over the supplied corpus of 60095 documents, updating model once every 10000 documents, evaluating perplexity every 60095 documents, iterating 1000x with a convergence threshold of 0.001000
2021-02-22 02:13:16,969 : INFO : PROGRESS: pass 0, at document #10000/60095
2021-02-22 02:13:16,969 : DEBUG : performing inference on a chunk of 10000 documents
2021-02-22 02:13:36,610 : DEBUG : 9886/10000 documents converged within 1000 iterations
2021-02-22 02:13:36,636 : INFO : optimized alpha [0.14514804, 0.1484222, 0.045556366]
2021-02-22 02:13:36,637 : DEBUG : updating topics
2021-02-22 02:13:36,638 : INFO : merging changes from 10000 documents into a model of 60095 documents
2021-02-22 02:13:36,639 : INFO : topic #0 (0.145): 

2021-02-22 02:13:58,762 : INFO : PROGRESS: pass 0, at document #60095/60095
2021-02-22 02:13:58,763 : DEBUG : performing inference on a chunk of 95 documents
2021-02-22 02:13:58,793 : DEBUG : 95/95 documents converged within 1000 iterations
2021-02-22 02:13:58,794 : INFO : optimized alpha [0.07235851, 0.08545494, 0.056860935]
2021-02-22 02:13:58,795 : DEBUG : updating topics
2021-02-22 02:13:58,795 : INFO : merging changes from 95 documents into a model of 60095 documents
2021-02-22 02:13:58,797 : INFO : topic #0 (0.072): 0.027*"motif_60" + 0.024*"motif_10" + 0.024*"motif_11" + 0.024*"motif_12" + 0.023*"motif_219" + 0.023*"motif_9" + 0.021*"motif_6" + 0.019*"motif_5" + 0.019*"motif_8" + 0.018*"motif_62"
2021-02-22 02:13:58,797 : INFO : topic #1 (0.085): 0.060*"motif_60" + 0.046*"motif_184" + 0.036*"motif_218" + 0.036*"motif_37" + 0.033*"motif_23" + 0.030*"motif_42" + 0.027*"motif_166" + 0.026*"motif_13" + 0.026*"motif_40" + 0.024*"motif_19"
2021-02-22 02:13:58,798 : INFO : topic #2 (0.

2021-02-22 02:14:16,774 : INFO : topic #0 (0.063): 0.031*"motif_219" + 0.026*"motif_60" + 0.026*"motif_11" + 0.025*"motif_12" + 0.024*"motif_10" + 0.022*"motif_6" + 0.022*"motif_9" + 0.021*"motif_264" + 0.020*"motif_5" + 0.020*"motif_8"
2021-02-22 02:14:16,774 : INFO : topic #1 (0.095): 0.062*"motif_60" + 0.046*"motif_184" + 0.038*"motif_37" + 0.036*"motif_218" + 0.035*"motif_23" + 0.034*"motif_42" + 0.028*"motif_40" + 0.027*"motif_13" + 0.027*"motif_166" + 0.025*"motif_19"
2021-02-22 02:14:16,775 : INFO : topic #2 (0.076): 0.020*"motif_62" + 0.019*"motif_60" + 0.019*"motif_54" + 0.018*"motif_50" + 0.018*"motif_49" + 0.017*"motif_48" + 0.017*"motif_41" + 0.015*"motif_17" + 0.014*"motif_93" + 0.014*"motif_86"
2021-02-22 02:14:16,776 : INFO : topic diff=0.075821, rho=0.353344
2021-02-22 02:14:16,778 : DEBUG : bound: at document #0
2021-02-22 02:14:16,817 : INFO : -5.024 per-word bound, 32.5 perplexity estimate based on a held-out corpus of 95 documents with 2924 words
2021-02-22 02:14:16

2021-02-22 02:14:31,473 : INFO : topic #2 (0.108): 0.021*"motif_62" + 0.020*"motif_60" + 0.018*"motif_54" + 0.017*"motif_50" + 0.016*"motif_49" + 0.016*"motif_48" + 0.015*"motif_41" + 0.014*"motif_126" + 0.014*"motif_17" + 0.013*"motif_121"
2021-02-22 02:14:31,473 : INFO : topic diff=0.075316, rho=0.333158
2021-02-22 02:14:31,475 : INFO : PROGRESS: pass 2, at document #60000/60095
2021-02-22 02:14:31,476 : DEBUG : performing inference on a chunk of 10000 documents
2021-02-22 02:14:34,013 : DEBUG : 10000/10000 documents converged within 1000 iterations
2021-02-22 02:14:34,064 : INFO : optimized alpha [0.057604365, 0.112797245, 0.11429829]
2021-02-22 02:14:34,065 : DEBUG : updating topics
2021-02-22 02:14:34,066 : INFO : merging changes from 10000 documents into a model of 60095 documents
2021-02-22 02:14:34,067 : INFO : topic #0 (0.058): 0.040*"motif_219" + 0.032*"motif_11" + 0.032*"motif_12" + 0.031*"motif_10" + 0.027*"motif_264" + 0.027*"motif_6" + 0.027*"motif_9" + 0.026*"motif_5" + 

2021-02-22 02:14:43,764 : INFO : PROGRESS: pass 3, at document #50000/60095
2021-02-22 02:14:43,764 : DEBUG : performing inference on a chunk of 10000 documents
2021-02-22 02:14:46,165 : DEBUG : 10000/10000 documents converged within 1000 iterations
2021-02-22 02:14:46,191 : INFO : optimized alpha [0.055419255, 0.13056453, 0.15670875]
2021-02-22 02:14:46,192 : DEBUG : updating topics
2021-02-22 02:14:46,193 : INFO : merging changes from 10000 documents into a model of 60095 documents
2021-02-22 02:14:46,194 : INFO : topic #0 (0.055): 0.047*"motif_219" + 0.037*"motif_11" + 0.037*"motif_12" + 0.035*"motif_10" + 0.032*"motif_264" + 0.032*"motif_6" + 0.032*"motif_9" + 0.030*"motif_5" + 0.028*"motif_8" + 0.024*"motif_4"
2021-02-22 02:14:46,195 : INFO : topic #1 (0.131): 0.065*"motif_60" + 0.049*"motif_184" + 0.040*"motif_37" + 0.038*"motif_218" + 0.037*"motif_23" + 0.035*"motif_42" + 0.029*"motif_40" + 0.029*"motif_13" + 0.028*"motif_166" + 0.026*"motif_19"
2021-02-22 02:14:46,196 : INFO : 

2021-02-22 02:14:57,525 : DEBUG : updating topics
2021-02-22 02:14:57,526 : INFO : merging changes from 10000 documents into a model of 60095 documents
2021-02-22 02:14:57,527 : INFO : topic #0 (0.056): 0.050*"motif_219" + 0.040*"motif_11" + 0.040*"motif_12" + 0.039*"motif_10" + 0.035*"motif_9" + 0.035*"motif_6" + 0.034*"motif_264" + 0.032*"motif_5" + 0.031*"motif_8" + 0.026*"motif_4"
2021-02-22 02:14:57,528 : INFO : topic #1 (0.149): 0.065*"motif_60" + 0.050*"motif_184" + 0.040*"motif_37" + 0.039*"motif_218" + 0.037*"motif_23" + 0.035*"motif_42" + 0.029*"motif_40" + 0.029*"motif_13" + 0.028*"motif_166" + 0.026*"motif_19"
2021-02-22 02:14:57,528 : INFO : topic #2 (0.196): 0.022*"motif_62" + 0.022*"motif_60" + 0.017*"motif_126" + 0.016*"motif_54" + 0.016*"motif_84" + 0.015*"motif_50" + 0.015*"motif_49" + 0.014*"motif_48" + 0.014*"motif_41" + 0.013*"motif_17"
2021-02-22 02:14:57,529 : INFO : topic diff=0.055364, rho=0.301381
2021-02-22 02:14:57,531 : INFO : PROGRESS: pass 4, at document 

CPU times: user 1min 44s, sys: 331 ms, total: 1min 45s
Wall time: 1min 44s


In [26]:
def getTopicDistribution(predictions,ntopics):
    topic_count_dict={i:0 for i in range(ntopics)}
    ndocs=len(predictions)
    for pred in predictions:
        top_topic=sorted(pred,key=lambda x:-x[1])[0][0]
        topic_count_dict[top_topic]+=1
    topic_dist_dict={k:v/ndocs for k,v in topic_count_dict.items()}
    return topic_dist_dict

def likelihoodMetric(predictions,ntopics):
    likelihood=0
    P_T=getTopicDistribution(predictions,ntopics)
#     print(P_T)
    for pred in tqdm(predictions):
        P_Xi_M=0
        for topic_no,P_Xi_T in pred:
            P_Xi_M+=P_Xi_T*P_T[topic_no]
        likelihood+=np.log10(P_Xi_M)
    print(likelihood)
    return likelihood

In [42]:
from gensim.models.coherencemodel import CoherenceModel

def coherenceMetric_cv(model,dictionary,docs):
    cm=CoherenceModel(model=model,dictionary=dictionary ,
                      texts=docs, coherence='c_v',processes=30)
    coherence = cm.get_coherence()
    print(coherence)
    return coherence
# coherenceMetric_cv(lda,dictionary ,docs)

In [45]:
from gensim.models.coherencemodel import CoherenceModel

def coherenceMetric_umass(model,dictionary,corpus):
    cm = CoherenceModel(model=model, corpus=corpus, \
                        coherence='u_mass',processes=30)
    coherence = cm.get_coherence()
    print(coherence)
    return coherence
# coherenceMetric_umass(lda,dictionary ,corpus)

-1.555826726271897


-1.555826726271897

In [44]:
from gensim.models.coherencemodel import CoherenceModel

def coherenceMetric_uci(model,dictionary,docs):
    cm=CoherenceModel(model=model,dictionary=dictionary ,
                      texts=docs, coherence='c_uci',processes=30,
                     window_size =2000)
    coherence = cm.get_coherence()
    print(coherence)
    return coherence
# coherenceMetric_uci(lda,dictionary ,docs)

0.3525572782664838


0.3525572782664838

In [46]:
def perplexityMetric(model,corpus):
    perplexity=model.log_perplexity(corpus)
    print(perplexity)
    return perplexity
# perplexityMetric(lda,corpus)

-5.1017669144749425


-5.1017669144749425

In [21]:
from collections import Counter

def findTopMotifs(predictions,ntopics,data,ntop=5,outdir=None):
    pred_topic=[]
    for pred in tqdm(predictions):
        topic_prob=sorted(pred,key=lambda x:-x[1])
#         print(topic_prob)
        top_topic=topic_prob[0][0]
        pred_topic.append(top_topic)
    _data=data.copy()
    _data['pred_topic']=pred_topic      
    gb=_data[['motif_string','pred_topic']].groupby('pred_topic').\
    agg(lambda x: ','.join(x))
    gb['top_motif']=gb['motif_string'].\
    apply(lambda x:Counter(x.split(',')).most_common(ntop))
    gb.reset_index(inplace=True)
    gb=gb[['pred_topic','top_motif']]
    if outdir is not None:
        gb.to_csv(f'{outdir}/top{ntop}_motifs_topics_{ntopics}.csv',index=False)
    print(gb)
    return gb
# findTopMotifs(lda,corpus,data)

In [22]:
def getAvgTssDist(predictions,ntopics,data,outdir=None):
    pred_topic=[]
    for pred in tqdm(predictions):
        top_topic=sorted(pred,key=lambda x:-x[1])[0][0]
        pred_topic.append(top_topic)
    _data=data.copy()
    _data['pred_topic']=pred_topic
    gb=_data[['Distance to TSS','pred_topic']].groupby('pred_topic').mean()
    gb.reset_index(inplace=True)
    if outdir is not None:
        gb.to_csv(f'{outdir}/avg_tss_dist_topics_{ntopics}.csv',index=False)
    print(gb)
    return gb
# getAvgTssDist(lda,corpus,data)

In [None]:
%%time
import logging
logging.getLogger().setLevel(logging.CRITICAL)
from gensim.models import LdaModel,LdaMulticore

outdir='model_output'
eval_dict={'num_topics':[],'likelihood':[],'coherence_cv':[],\
          'coherence_umass':[],'coherence_uci':[],'perplexity':[]}
temp = dictionary[0]
id2word = dictionary.id2token
for ntopics in range(2,10):
    print('\n'+'='*40)
    print('Num of Topics = '+str(ntopics))
    model = LdaModel(corpus, id2word=id2word, alpha='auto',chunksize=10000,
                   eta='auto',num_topics=ntopics, iterations=1000, passes = 5,
                  minimum_probability=0.0)
    
    predictions=model.get_document_topics(corpus,minimum_probability=0.0)
    print('\nFinding likelihood...')
    likelihood=likelihoodMetric(predictions,ntopics)
    print('\nFinding coherence_cv...')
    coherence_cv=coherenceMetric_cv(model,dictionary,docs)
    print('\nFinding coherence_umass...')
    coherence_umass=coherenceMetric_umass(model,dictionary ,corpus)
    print('\nFinding coherence_uci...')
    coherence_uci=coherenceMetric_uci(model,dictionary,docs)
    print('\nFinding perplexity...')
    perplexity=perplexityMetric(model,corpus)
    print('\nFinding Top Motifs...')
    findTopMotifs(predictions,ntopics,data,outdir=outdir)
    print('\nFindng avg. distance from TSS per topic...')
    getAvgTssDist(predictions,ntopics,data,outdir=outdir)
    eval_dict['num_topics'].append(ntopics)
    eval_dict['likelihood'].append(likelihood)
    eval_dict['coherence_cv'].append(coherence_cv)
    eval_dict['coherence_umass'].append(coherence_umass)
    eval_dict['coherence_uci'].append(coherence_uci)
    eval_dict['perplexity'].append(perplexity)
eval_df=pd.DataFrame(eval_dict)
eval_df.to_csv(f'{outdir}/metrics.csv',index=False)
eval_df


Num of Topics = 2

Finding likelihood...


100%|██████████| 60095/60095 [00:18<00:00, 3163.12it/s]

-15064.758488495485

Finding coherence_cv...





0.5225679165253173

Finding coherence_umass...
-1.5896094769998963

Finding coherence_uci...
0.4109764283332333

Finding perplexity...


  0%|          | 280/60095 [00:00<00:21, 2790.77it/s]

-5.096255175798838

Finding Top Motifs...


100%|██████████| 60095/60095 [00:18<00:00, 3260.85it/s]
  0%|          | 229/60095 [00:00<00:26, 2288.14it/s]

   pred_topic                                          top_motif
0           0  [(motif_60, 29533), (motif_37, 26686), (motif_...
1           1  [(motif_60, 38974), (motif_184, 27939), (motif...

Findng avg. distance from TSS per topic...


100%|██████████| 60095/60095 [00:18<00:00, 3254.59it/s]


   pred_topic  Distance to TSS
0           0     10336.827239
1           1      9386.069989

Num of Topics = 3

Finding likelihood...


100%|██████████| 60095/60095 [00:18<00:00, 3291.50it/s]

-23482.054811504648

Finding coherence_cv...





0.5403626272996208

Finding coherence_umass...
-1.716956873492414

Finding coherence_uci...
0.4966598396084556

Finding perplexity...


  1%|          | 361/60095 [00:00<00:16, 3602.99it/s]

-4.997750812663312

Finding Top Motifs...


100%|██████████| 60095/60095 [00:20<00:00, 2866.28it/s]
  1%|          | 335/60095 [00:00<00:18, 3275.91it/s]

   pred_topic                                          top_motif
0           0  [(motif_11, 7736), (motif_12, 7563), (motif_10...
1           1  [(motif_60, 23369), (motif_62, 15830), (motif_...
2           2  [(motif_60, 39484), (motif_184, 26309), (motif...

Findng avg. distance from TSS per topic...


100%|██████████| 60095/60095 [00:19<00:00, 3105.29it/s]


   pred_topic  Distance to TSS
0           0      9413.750600
1           1     13333.580354
2           2      5699.994912

Num of Topics = 4

Finding likelihood...


100%|██████████| 60095/60095 [00:18<00:00, 3277.30it/s]

-34443.49925633707

Finding coherence_cv...





0.686070632588859

Finding coherence_umass...
-1.4632317921022289

Finding coherence_uci...
0.7455236200065576

Finding perplexity...


  1%|          | 364/60095 [00:00<00:16, 3631.39it/s]

-4.901239691344312

Finding Top Motifs...


100%|██████████| 60095/60095 [00:19<00:00, 3090.92it/s]
  1%|          | 351/60095 [00:00<00:17, 3506.51it/s]

   pred_topic                                          top_motif
0           0  [(motif_60, 18440), (motif_126, 10965), (motif...
1           1  [(motif_54, 7565), (motif_60, 6905), (motif_49...
2           2  [(motif_60, 35150), (motif_184, 24481), (motif...
3           3  [(motif_37, 20622), (motif_23, 19199), (motif_...

Findng avg. distance from TSS per topic...


100%|██████████| 60095/60095 [00:18<00:00, 3320.77it/s]


   pred_topic  Distance to TSS
0           0     14315.663081
1           1     11424.363071
2           2      8650.966309
3           3      3872.322116

Num of Topics = 5

Finding likelihood...


100%|██████████| 60095/60095 [00:18<00:00, 3186.56it/s]

-38028.7004338297

Finding coherence_cv...





0.6608678266683806

Finding coherence_umass...
-1.5466900928181788

Finding coherence_uci...
0.7610939054690984

Finding perplexity...


  1%|          | 341/60095 [00:00<00:17, 3398.24it/s]

-4.8402715340708875

Finding Top Motifs...


100%|██████████| 60095/60095 [00:17<00:00, 3419.08it/s]
  1%|          | 399/60095 [00:00<00:15, 3973.58it/s]

   pred_topic                                          top_motif
0           0  [(motif_60, 15227), (motif_219, 10130), (motif...
1           1  [(motif_11, 8250), (motif_12, 8061), (motif_10...
2           2  [(motif_54, 8345), (motif_62, 6723), (motif_12...
3           3  [(motif_60, 32811), (motif_184, 23555), (motif...
4           4  [(motif_37, 21641), (motif_23, 20165), (motif_...

Findng avg. distance from TSS per topic...


100%|██████████| 60095/60095 [00:19<00:00, 3091.68it/s]


   pred_topic  Distance to TSS
0           0     13088.673068
1           1     12077.198188
2           2     15453.950773
3           3      8666.348130
4           4      4496.180643

Num of Topics = 6

Finding likelihood...


100%|██████████| 60095/60095 [00:21<00:00, 2839.57it/s]


-43008.298729632115

Finding coherence_cv...
0.6852262566695272

Finding coherence_umass...
-1.4966415033929898

Finding coherence_uci...
0.8133070839086761

Finding perplexity...


  1%|          | 306/60095 [00:00<00:19, 3059.44it/s]

-4.786798035477909

Finding Top Motifs...


 82%|████████▏ | 49466/60095 [00:14<00:03, 3278.05it/s]