# partie 3 : modélisation thématique

Julien Velcin, Université Lyon 2 - Master Humanités Numériques

In [1]:
%matplotlib inline

from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
from gensim import corpora, models
#from gensim.models import  Word2Vec
import gensim
import pandas as pd
import numpy as np
from sklearn.preprocessing import normalize
import matplotlib.pyplot as plt
from matplotlib import cm 
import os

# en utilisant gensim, on a besoin de préciser l'expression qui permet de "tokenizer" les textes
#tokenizer = RegexpTokenizer(r'\w+')

## Preprocessing

On va illustrer l'utilisation du modèle LDA

In [2]:
from gensim.utils import simple_preprocess
import pandas

# on charge les données dans un tableau
#df = pandas.read_csv("datasets/huma1.csv", sep="\t")
#df = pandas.read_csv("datasets/dataconf.csv", sep="\t")

# lecture des données pour un fichier texte simple
with open(os.path.join("datasets", "Frank Herbert - Dune.txt")) as f:
    lines = [line.strip() for line in f.readlines()]
doc_set = lines

# colonne qui contient les textes à analyser
#var_texte = 'text'
#var_texte = 'title'
#doc_set = df[var_texte].tolist()


# fonction qui génère les listes de mots (token) à partir des textes
def sent_to_words(sentences):
    for sentence in sentences:
        yield(simple_preprocess(str(sentence), deacc=True))  # deacc=True removes punctuations

# on construit le corpus
data_words = list(sent_to_words(doc_set))

In [3]:
# nombre total de documents
ndocs = len(data_words)
print(ndocs)

8608


## Dictionnaire et corpus

On ne va pas utiliser la librairie *scikit-learn* cette fois, mais passer directement par la librairie *gensim*.

In [4]:
from nltk.corpus import stopwords
stop_words = stopwords.words('english')

def remove_stopwords(texts):
    return [[word for word in simple_preprocess(str(doc)) if word not in stop_words] for doc in texts]

# on retire les mots-outils
data_words_nostops = remove_stopwords(data_words)

In [5]:
print(data_words_nostops[:1])

[['dune']]


In [6]:
# création du dictionnaire
dico = corpora.Dictionary(data_words_nostops)

# ce qui permet par ex. de filtrer le vocabulaire
dico.filter_extremes(no_below=10)

# Create Corpus
texts = data_words_nostops

# matrice Term Document Frequency
corpus = [dico.doc2bow(text) for text in texts]

Nous pouvons vérifier que l'entrée souhaitée est une liste de documents qui sont, chacun, représentés comme une liste de tuples (identifiant du mot dans le dictionnaire, TF).

In [7]:
print(corpus[:10])

[[(0, 1)], [], [], [], [], [(1, 1)], [(0, 1)], [], [], []]


In [8]:
len(dico)

1809

## Apprentissage du modèle

La plupart du temps, il faut fixer le nombre de thématiques souhaitées.

In [9]:
ntopics = 50
#ntopics = 20

On va utiliser le modèle LDA implémenté dans la librairie *gensim*.

In [10]:
# thanks to: https://miningthedetails.com/blog/python/lda/GensimLDA/
from gensim.models.ldamodel import LdaModel

#generate_lda = True
generate_lda = False

# generate LDA model
import logging

if generate_lda:
    print("generate new LDA model")
    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
    ldamodel = LdaModel(corpus, num_topics=ntopics, id2word = dico,
                                           passes=100, random_state=100, per_word_topics=True)
    print(ldamodel)


2021-01-15 12:18:15,481 : INFO : using symmetric alpha at 0.02
2021-01-15 12:18:15,492 : INFO : using symmetric eta at 0.02
2021-01-15 12:18:15,498 : INFO : using serial LDA version on this node


generate new LDA model


2021-01-15 12:18:15,557 : INFO : running online (multi-pass) LDA training, 50 topics, 100 passes over the supplied corpus of 8608 documents, updating model once every 2000 documents, evaluating perplexity every 8608 documents, iterating 50x with a convergence threshold of 0.001000
2021-01-15 12:18:15,567 : INFO : PROGRESS: pass 0, at document #2000/8608
2021-01-15 12:18:16,961 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:18:16,977 : INFO : topic #30 (0.020): 0.022*"halleck" + 0.020*"piter" + 0.020*"said" + 0.017*"water" + 0.014*"room" + 0.014*"life" + 0.012*"could" + 0.011*"kynes" + 0.010*"bene" + 0.010*"gesserit"
2021-01-15 12:18:16,980 : INFO : topic #15 (0.020): 0.052*"paul" + 0.021*"shield" + 0.017*"voice" + 0.011*"could" + 0.011*"said" + 0.010*"mood" + 0.010*"door" + 0.010*"place" + 0.009*"take" + 0.009*"son"
2021-01-15 12:18:16,985 : INFO : topic #49 (0.020): 0.041*"said" + 0.027*"duke" + 0.017*"room" + 0.013*"would" + 0.013*"paul" + 0

2021-01-15 12:18:22,008 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:18:22,019 : INFO : topic #1 (0.020): 0.053*"atreides" + 0.040*"leto" + 0.036*"duke" + 0.035*"said" + 0.034*"alia" + 0.033*"daughter" + 0.029*"traitor" + 0.026*"thus" + 0.025*"find" + 0.025*"long"
2021-01-15 12:18:22,020 : INFO : topic #26 (0.020): 0.065*"thufir" + 0.053*"whispered" + 0.052*"give" + 0.047*"silence" + 0.036*"hawat" + 0.031*"said" + 0.028*"paul" + 0.028*"tell" + 0.025*"ways" + 0.023*"beginning"
2021-01-15 12:18:22,022 : INFO : topic #3 (0.020): 0.087*"said" + 0.073*"lord" + 0.067*"know" + 0.067*"yes" + 0.032*"ah" + 0.031*"training" + 0.030*"baron" + 0.020*"woman" + 0.020*"believe" + 0.019*"perhaps"
2021-01-15 12:18:22,040 : INFO : topic #13 (0.020): 0.037*"say" + 0.036*"kwisatz" + 0.036*"haderach" + 0.034*"want" + 0.028*"figure" + 0.028*"ring" + 0.026*"per" + 0.023*"cent" + 0.020*"smuggler" + 0.020*"paul"
2021-01-15 12:18:22,078 : INFO : topic #38 (0.020): 0.0

2021-01-15 12:18:26,597 : INFO : topic #22 (0.020): 0.058*"lady" + 0.056*"told" + 0.056*"baron" + 0.054*"arrakis" + 0.050*"jessica" + 0.038*"filled" + 0.037*"never" + 0.032*"memory" + 0.031*"person" + 0.030*"become"
2021-01-15 12:18:26,598 : INFO : topic #24 (0.020): 0.078*"troop" + 0.051*"rabban" + 0.050*"slave" + 0.046*"child" + 0.029*"matter" + 0.025*"knife" + 0.021*"plant" + 0.021*"saw" + 0.020*"metal" + 0.019*"near"
2021-01-15 12:18:26,602 : INFO : topic #23 (0.020): 0.081*"name" + 0.047*"god" + 0.041*"world" + 0.039*"removed" + 0.037*"legend" + 0.034*"faced" + 0.031*"missionaria" + 0.031*"protectiva" + 0.027*"brought" + 0.025*"repeated"
2021-01-15 12:18:26,613 : INFO : topic #45 (0.020): 0.094*"head" + 0.043*"storm" + 0.039*"shook" + 0.034*"bring" + 0.029*"swallowed" + 0.027*"years" + 0.025*"jessica" + 0.025*"stare" + 0.025*"glance" + 0.024*"snapped"
2021-01-15 12:18:26,617 : INFO : topic diff=0.429453, rho=0.370015
2021-01-15 12:18:26,619 : INFO : PROGRESS: pass 2, at document #

2021-01-15 12:18:30,944 : INFO : topic #48 (0.020): 0.127*"harkonnen" + 0.074*"us" + 0.048*"death" + 0.047*"behind" + 0.043*"point" + 0.035*"said" + 0.033*"smile" + 0.032*"sire" + 0.024*"half" + 0.024*"needs"
2021-01-15 12:18:30,963 : INFO : topic #33 (0.020): 0.061*"held" + 0.058*"keep" + 0.051*"uncle" + 0.050*"sound" + 0.047*"said" + 0.038*"mouth" + 0.035*"help" + 0.035*"face" + 0.030*"strange" + 0.029*"sat"
2021-01-15 12:18:30,980 : INFO : topic diff=0.250745, rho=0.347021
2021-01-15 12:18:31,315 : INFO : -9.523 per-word bound, 735.6 perplexity estimate based on a held-out corpus of 608 documents with 5442 words
2021-01-15 12:18:31,316 : INFO : PROGRESS: pass 3, at document #8608/8608
2021-01-15 12:18:31,495 : INFO : merging changes from 608 documents into a model of 8608 documents
2021-01-15 12:18:31,503 : INFO : topic #30 (0.020): 0.198*"water" + 0.123*"spice" + 0.096*"life" + 0.038*"fremen" + 0.034*"little" + 0.032*"open" + 0.028*"said" + 0.026*"moisture" + 0.021*"ten" + 0.020*"d

2021-01-15 12:18:35,068 : INFO : topic #30 (0.020): 0.199*"water" + 0.125*"spice" + 0.096*"life" + 0.038*"fremen" + 0.036*"little" + 0.032*"open" + 0.029*"said" + 0.028*"moisture" + 0.021*"ten" + 0.020*"desert"
2021-01-15 12:18:35,084 : INFO : topic diff=0.231322, rho=0.327842
2021-01-15 12:18:35,115 : INFO : PROGRESS: pass 5, at document #2000/8608
2021-01-15 12:18:35,766 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:18:35,774 : INFO : topic #34 (0.020): 0.091*"glanced" + 0.075*"good" + 0.067*"day" + 0.058*"back" + 0.042*"paul" + 0.040*"blood" + 0.030*"love" + 0.029*"leave" + 0.027*"jessica" + 0.025*"said"
2021-01-15 12:18:35,775 : INFO : topic #22 (0.020): 0.108*"arrakis" + 0.075*"never" + 0.070*"lady" + 0.059*"told" + 0.050*"person" + 0.045*"jessica" + 0.042*"known" + 0.042*"filled" + 0.031*"houses" + 0.030*"become"
2021-01-15 12:18:35,776 : INFO : topic #49 (0.020): 0.064*"high" + 0.057*"small" + 0.037*"presently" + 0.036*"read" + 0.033*"

2021-01-15 12:18:39,235 : INFO : PROGRESS: pass 6, at document #4000/8608
2021-01-15 12:18:39,818 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:18:39,824 : INFO : topic #34 (0.020): 0.098*"glanced" + 0.080*"good" + 0.066*"day" + 0.056*"back" + 0.044*"paul" + 0.041*"leave" + 0.037*"blood" + 0.031*"jessica" + 0.029*"group" + 0.027*"said"
2021-01-15 12:18:39,825 : INFO : topic #39 (0.020): 0.165*"dib" + 0.165*"muad" + 0.057*"done" + 0.053*"princess" + 0.044*"irulan" + 0.041*"words" + 0.040*"future" + 0.031*"lasgun" + 0.028*"paul" + 0.028*"carry"
2021-01-15 12:18:39,826 : INFO : topic #4 (0.020): 0.116*"another" + 0.085*"saw" + 0.042*"someone" + 0.040*"killed" + 0.039*"watched" + 0.034*"thing" + 0.034*"came" + 0.027*"felt" + 0.026*"wide" + 0.024*"least"
2021-01-15 12:18:39,831 : INFO : topic #32 (0.020): 0.097*"house" + 0.066*"almost" + 0.063*"blue" + 0.062*"call" + 0.035*"grew" + 0.033*"eh" + 0.030*"creature" + 0.029*"says" + 0.026*"secundus" + 

2021-01-15 12:18:43,774 : INFO : topic #27 (0.020): 0.110*"would" + 0.062*"could" + 0.056*"might" + 0.047*"far" + 0.043*"beyond" + 0.042*"silent" + 0.033*"ever" + 0.033*"still" + 0.029*"remained" + 0.028*"kind"
2021-01-15 12:18:43,776 : INFO : topic #0 (0.020): 0.204*"fremen" + 0.057*"wish" + 0.057*"use" + 0.040*"said" + 0.038*"al" + 0.032*"desert" + 0.028*"one" + 0.028*"live" + 0.028*"remember" + 0.027*"see"
2021-01-15 12:18:43,781 : INFO : topic #5 (0.020): 0.243*"mother" + 0.089*"reverend" + 0.048*"attention" + 0.044*"noted" + 0.041*"power" + 0.030*"jessica" + 0.026*"paul" + 0.025*"way" + 0.024*"food" + 0.019*"within"
2021-01-15 12:18:43,785 : INFO : topic #40 (0.020): 0.138*"sand" + 0.048*"dust" + 0.046*"surface" + 0.042*"across" + 0.032*"wind" + 0.032*"dunes" + 0.028*"toward" + 0.028*"system" + 0.027*"movement" + 0.021*"see"
2021-01-15 12:18:43,814 : INFO : topic diff=0.093879, rho=0.285087
2021-01-15 12:18:43,822 : INFO : PROGRESS: pass 7, at document #8000/8608
2021-01-15 12:18:

2021-01-15 12:18:47,536 : INFO : topic #30 (0.020): 0.196*"water" + 0.127*"spice" + 0.087*"life" + 0.044*"little" + 0.043*"said" + 0.028*"moisture" + 0.024*"fremen" + 0.022*"inner" + 0.021*"open" + 0.018*"ten"
2021-01-15 12:18:47,538 : INFO : topic #11 (0.020): 0.141*"eyes" + 0.076*"paul" + 0.065*"stared" + 0.040*"night" + 0.039*"feet" + 0.037*"rocks" + 0.037*"usul" + 0.029*"cave" + 0.025*"shadows" + 0.024*"opened"
2021-01-15 12:18:47,560 : INFO : topic diff=0.067222, rho=0.274163
2021-01-15 12:18:47,834 : INFO : -9.419 per-word bound, 684.4 perplexity estimate based on a held-out corpus of 608 documents with 5442 words
2021-01-15 12:18:47,838 : INFO : PROGRESS: pass 8, at document #8608/8608
2021-01-15 12:18:48,008 : INFO : merging changes from 608 documents into a model of 8608 documents
2021-01-15 12:18:48,016 : INFO : topic #14 (0.020): 0.132*"turned" + 0.060*"away" + 0.047*"paul" + 0.041*"beneath" + 0.036*"back" + 0.035*"lay" + 0.033*"robe" + 0.031*"special" + 0.030*"chamber" + 0.

2021-01-15 12:18:51,177 : INFO : topic #37 (0.020): 0.114*"hand" + 0.086*"looked" + 0.078*"right" + 0.069*"paul" + 0.068*"left" + 0.055*"without" + 0.033*"though" + 0.029*"wondered" + 0.025*"saw" + 0.022*"away"
2021-01-15 12:18:51,178 : INFO : topic diff=0.123478, rho=0.264406
2021-01-15 12:18:51,203 : INFO : PROGRESS: pass 10, at document #2000/8608
2021-01-15 12:18:51,764 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:18:51,777 : INFO : topic #46 (0.020): 0.086*"think" + 0.078*"father" + 0.072*"thought" + 0.055*"let" + 0.054*"paul" + 0.053*"look" + 0.048*"religious" + 0.035*"put" + 0.031*"mind" + 0.023*"feeling"
2021-01-15 12:18:51,779 : INFO : topic #39 (0.020): 0.193*"dib" + 0.193*"muad" + 0.052*"princess" + 0.052*"done" + 0.041*"irulan" + 0.040*"future" + 0.032*"arrakis" + 0.030*"paul" + 0.030*"words" + 0.025*"carry"
2021-01-15 12:18:51,780 : INFO : topic #15 (0.020): 0.138*"shield" + 0.059*"field" + 0.055*"touched" + 0.055*"within" + 0.0

2021-01-15 12:18:54,817 : INFO : PROGRESS: pass 11, at document #4000/8608
2021-01-15 12:18:55,396 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:18:55,403 : INFO : topic #0 (0.020): 0.331*"fremen" + 0.054*"use" + 0.045*"wish" + 0.032*"al" + 0.029*"one" + 0.028*"said" + 0.026*"remember" + 0.026*"live" + 0.026*"desert" + 0.025*"see"
2021-01-15 12:18:55,403 : INFO : topic #25 (0.020): 0.163*"emperor" + 0.127*"sardaukar" + 0.060*"enough" + 0.050*"floor" + 0.048*"understand" + 0.045*"caught" + 0.039*"battle" + 0.039*"imperial" + 0.029*"said" + 0.022*"military"
2021-01-15 12:18:55,404 : INFO : topic #33 (0.020): 0.087*"held" + 0.084*"face" + 0.062*"keep" + 0.049*"sound" + 0.046*"mouth" + 0.042*"help" + 0.037*"strange" + 0.031*"said" + 0.030*"watch" + 0.030*"uncle"
2021-01-15 12:18:55,411 : INFO : topic #40 (0.020): 0.118*"sand" + 0.057*"dust" + 0.045*"surface" + 0.038*"across" + 0.035*"system" + 0.031*"dunes" + 0.031*"toward" + 0.029*"wind" + 0.028

2021-01-15 12:18:58,880 : INFO : topic #9 (0.020): 0.125*"guild" + 0.069*"yet" + 0.049*"slowly" + 0.047*"human" + 0.042*"imperium" + 0.040*"mean" + 0.039*"report" + 0.037*"simple" + 0.035*"drink" + 0.035*"ancient"
2021-01-15 12:18:58,881 : INFO : topic #13 (0.020): 0.117*"say" + 0.074*"want" + 0.038*"suddenly" + 0.038*"three" + 0.036*"ring" + 0.033*"figure" + 0.033*"sudden" + 0.031*"haderach" + 0.031*"kwisatz" + 0.030*"said"
2021-01-15 12:18:58,904 : INFO : topic #26 (0.020): 0.248*"hawat" + 0.079*"give" + 0.068*"whispered" + 0.066*"tell" + 0.059*"silence" + 0.058*"said" + 0.048*"thufir" + 0.039*"paul" + 0.029*"anger" + 0.023*"beginning"
2021-01-15 12:18:58,922 : INFO : topic #27 (0.020): 0.146*"would" + 0.056*"might" + 0.056*"could" + 0.053*"still" + 0.041*"far" + 0.041*"come" + 0.041*"beyond" + 0.036*"silent" + 0.029*"ever" + 0.025*"remained"
2021-01-15 12:18:58,925 : INFO : topic diff=0.060126, rho=0.240396
2021-01-15 12:18:58,926 : INFO : PROGRESS: pass 12, at document #8000/8608
2

2021-01-15 12:19:04,126 : INFO : topic #39 (0.020): 0.194*"dib" + 0.194*"muad" + 0.053*"done" + 0.051*"future" + 0.050*"princess" + 0.042*"irulan" + 0.032*"words" + 0.032*"paul" + 0.025*"carry" + 0.022*"arrakis"
2021-01-15 12:19:04,152 : INFO : topic #47 (0.020): 0.143*"place" + 0.096*"planet" + 0.066*"even" + 0.065*"much" + 0.041*"waiting" + 0.041*"arrakis" + 0.040*"followed" + 0.035*"caladan" + 0.035*"show" + 0.032*"test"
2021-01-15 12:19:04,160 : INFO : topic diff=0.045007, rho=0.233737
2021-01-15 12:19:04,434 : INFO : -9.394 per-word bound, 672.7 perplexity estimate based on a held-out corpus of 608 documents with 5442 words
2021-01-15 12:19:04,435 : INFO : PROGRESS: pass 13, at document #8608/8608
2021-01-15 12:19:04,785 : INFO : merging changes from 608 documents into a model of 8608 documents
2021-01-15 12:19:04,791 : INFO : topic #47 (0.020): 0.145*"planet" + 0.123*"place" + 0.056*"even" + 0.054*"arrakis" + 0.049*"much" + 0.040*"caladan" + 0.038*"test" + 0.033*"self" + 0.030*"f

2021-01-15 12:19:08,379 : INFO : topic #24 (0.020): 0.077*"rabban" + 0.071*"child" + 0.062*"knife" + 0.058*"troop" + 0.047*"slave" + 0.040*"plant" + 0.038*"matter" + 0.029*"hangings" + 0.027*"near" + 0.026*"metal"
2021-01-15 12:19:08,403 : INFO : topic diff=0.097784, rho=0.227602
2021-01-15 12:19:08,429 : INFO : PROGRESS: pass 15, at document #2000/8608
2021-01-15 12:19:09,124 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:19:09,134 : INFO : topic #48 (0.020): 0.147*"harkonnen" + 0.135*"us" + 0.051*"sire" + 0.048*"point" + 0.042*"behind" + 0.041*"death" + 0.035*"half" + 0.029*"six" + 0.027*"one" + 0.024*"smile"
2021-01-15 12:19:09,135 : INFO : topic #14 (0.020): 0.140*"turned" + 0.058*"away" + 0.042*"paul" + 0.042*"beneath" + 0.038*"back" + 0.032*"question" + 0.031*"lay" + 0.030*"factory" + 0.028*"robe" + 0.027*"special"
2021-01-15 12:19:09,136 : INFO : topic #39 (0.020): 0.195*"muad" + 0.195*"dib" + 0.053*"princess" + 0.053*"done" + 0.041*"ir

2021-01-15 12:19:11,878 : INFO : PROGRESS: pass 16, at document #4000/8608
2021-01-15 12:19:12,384 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:19:12,391 : INFO : topic #32 (0.020): 0.110*"house" + 0.082*"almost" + 0.067*"call" + 0.059*"blue" + 0.037*"grew" + 0.034*"eh" + 0.032*"creature" + 0.032*"says" + 0.030*"turn" + 0.029*"salusa"
2021-01-15 12:19:12,392 : INFO : topic #4 (0.020): 0.131*"saw" + 0.117*"another" + 0.048*"felt" + 0.039*"thing" + 0.039*"someone" + 0.037*"killed" + 0.036*"watched" + 0.028*"guards" + 0.026*"came" + 0.025*"wide"
2021-01-15 12:19:12,394 : INFO : topic #6 (0.020): 0.089*"poison" + 0.071*"get" + 0.051*"hold" + 0.040*"message" + 0.034*"melange" + 0.033*"reason" + 0.032*"orders" + 0.031*"said" + 0.028*"choam" + 0.026*"safe"
2021-01-15 12:19:12,396 : INFO : topic #11 (0.020): 0.149*"eyes" + 0.075*"stared" + 0.074*"paul" + 0.041*"night" + 0.036*"feet" + 0.028*"opened" + 0.027*"weapon" + 0.027*"rocks" + 0.027*"man" + 0

2021-01-15 12:19:15,704 : INFO : topic #42 (0.020): 0.160*"voice" + 0.086*"heard" + 0.070*"said" + 0.058*"cannot" + 0.055*"something" + 0.048*"jessica" + 0.039*"paul" + 0.033*"true" + 0.031*"tone" + 0.026*"drug"
2021-01-15 12:19:15,717 : INFO : topic #4 (0.020): 0.139*"saw" + 0.116*"another" + 0.057*"felt" + 0.039*"someone" + 0.039*"thing" + 0.033*"killed" + 0.032*"watched" + 0.030*"jessica" + 0.026*"came" + 0.025*"wide"
2021-01-15 12:19:15,730 : INFO : topic #39 (0.020): 0.170*"dib" + 0.170*"muad" + 0.055*"done" + 0.053*"princess" + 0.049*"future" + 0.045*"irulan" + 0.034*"words" + 0.029*"carry" + 0.027*"violence" + 0.027*"paul"
2021-01-15 12:19:15,747 : INFO : topic #34 (0.020): 0.099*"glanced" + 0.080*"good" + 0.073*"day" + 0.069*"back" + 0.065*"paul" + 0.045*"said" + 0.043*"leave" + 0.038*"blood" + 0.029*"love" + 0.028*"jessica"
2021-01-15 12:19:15,748 : INFO : topic diff=0.048947, rho=0.211743
2021-01-15 12:19:15,750 : INFO : PROGRESS: pass 17, at document #8000/8608
2021-01-15 12

2021-01-15 12:19:19,117 : INFO : topic #24 (0.020): 0.081*"child" + 0.077*"rabban" + 0.073*"troop" + 0.068*"knife" + 0.052*"slave" + 0.038*"hangings" + 0.032*"matter" + 0.028*"eye" + 0.026*"near" + 0.025*"metal"
2021-01-15 12:19:19,118 : INFO : topic #25 (0.020): 0.198*"emperor" + 0.127*"sardaukar" + 0.058*"floor" + 0.058*"enough" + 0.047*"understand" + 0.040*"battle" + 0.037*"caught" + 0.034*"said" + 0.031*"imperial" + 0.027*"majesty"
2021-01-15 12:19:19,141 : INFO : topic diff=0.037323, rho=0.207150
2021-01-15 12:19:19,410 : INFO : -9.381 per-word bound, 666.7 perplexity estimate based on a held-out corpus of 608 documents with 5442 words
2021-01-15 12:19:19,411 : INFO : PROGRESS: pass 18, at document #8608/8608
2021-01-15 12:19:19,550 : INFO : merging changes from 608 documents into a model of 8608 documents
2021-01-15 12:19:19,565 : INFO : topic #4 (0.020): 0.135*"saw" + 0.108*"another" + 0.054*"felt" + 0.040*"thing" + 0.038*"killed" + 0.037*"someone" + 0.034*"watched" + 0.027*"cam

2021-01-15 12:19:22,517 : INFO : topic #10 (0.020): 0.188*"kynes" + 0.142*"man" + 0.064*"used" + 0.056*"said" + 0.052*"every" + 0.035*"kill" + 0.024*"ritual" + 0.024*"liet" + 0.020*"thousand" + 0.020*"everything"
2021-01-15 12:19:22,541 : INFO : topic diff=0.084577, rho=0.202844
2021-01-15 12:19:22,543 : INFO : PROGRESS: pass 20, at document #2000/8608
2021-01-15 12:19:23,048 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:19:23,056 : INFO : topic #45 (0.020): 0.109*"head" + 0.063*"years" + 0.054*"storm" + 0.044*"shook" + 0.038*"soon" + 0.038*"stare" + 0.036*"snapped" + 0.033*"talk" + 0.031*"bring" + 0.029*"four"
2021-01-15 12:19:23,057 : INFO : topic #27 (0.020): 0.158*"would" + 0.061*"might" + 0.053*"could" + 0.053*"come" + 0.048*"still" + 0.043*"beyond" + 0.042*"far" + 0.031*"silent" + 0.028*"ever" + 0.026*"kind"
2021-01-15 12:19:23,059 : INFO : topic #39 (0.020): 0.195*"dib" + 0.195*"muad" + 0.053*"princess" + 0.053*"done" + 0.042*"future" 

2021-01-15 12:19:25,769 : INFO : PROGRESS: pass 21, at document #4000/8608
2021-01-15 12:19:26,293 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:19:26,300 : INFO : topic #37 (0.020): 0.125*"hand" + 0.099*"looked" + 0.083*"right" + 0.069*"left" + 0.064*"without" + 0.063*"paul" + 0.034*"wondered" + 0.034*"though" + 0.023*"away" + 0.021*"felt"
2021-01-15 12:19:26,302 : INFO : topic #6 (0.020): 0.092*"poison" + 0.072*"get" + 0.053*"hold" + 0.041*"message" + 0.035*"melange" + 0.034*"reason" + 0.032*"orders" + 0.031*"said" + 0.029*"choam" + 0.026*"safe"
2021-01-15 12:19:26,303 : INFO : topic #25 (0.020): 0.168*"emperor" + 0.124*"sardaukar" + 0.059*"enough" + 0.051*"floor" + 0.049*"understand" + 0.047*"caught" + 0.040*"battle" + 0.037*"imperial" + 0.029*"said" + 0.023*"majesty"
2021-01-15 12:19:26,308 : INFO : topic #20 (0.020): 0.148*"first" + 0.080*"need" + 0.057*"like" + 0.046*"arrakeen" + 0.043*"returned" + 0.043*"entire" + 0.029*"pattern" + 0.0

2021-01-15 12:19:29,617 : INFO : topic #26 (0.020): 0.255*"hawat" + 0.079*"give" + 0.068*"tell" + 0.068*"whispered" + 0.060*"silence" + 0.059*"said" + 0.049*"thufir" + 0.036*"paul" + 0.029*"anger" + 0.024*"beginning"
2021-01-15 12:19:29,618 : INFO : topic #40 (0.020): 0.137*"sand" + 0.056*"across" + 0.051*"dust" + 0.045*"surface" + 0.035*"movement" + 0.033*"wind" + 0.033*"dunes" + 0.031*"system" + 0.030*"toward" + 0.023*"bible"
2021-01-15 12:19:29,620 : INFO : topic #48 (0.020): 0.154*"us" + 0.139*"harkonnen" + 0.050*"behind" + 0.042*"point" + 0.040*"sire" + 0.034*"half" + 0.034*"death" + 0.028*"one" + 0.026*"smile" + 0.026*"said"
2021-01-15 12:19:29,636 : INFO : topic #15 (0.020): 0.117*"shield" + 0.085*"within" + 0.055*"touched" + 0.043*"fighting" + 0.041*"field" + 0.039*"paul" + 0.039*"force" + 0.033*"slow" + 0.027*"shields" + 0.026*"motion"
2021-01-15 12:19:29,637 : INFO : topic diff=0.042328, rho=0.191376
2021-01-15 12:19:29,646 : INFO : PROGRESS: pass 22, at document #8000/8608
2

2021-01-15 12:19:33,022 : INFO : topic #4 (0.020): 0.159*"saw" + 0.114*"another" + 0.069*"felt" + 0.044*"thing" + 0.039*"someone" + 0.033*"jessica" + 0.033*"killed" + 0.032*"watched" + 0.030*"paul" + 0.025*"guards"
2021-01-15 12:19:33,039 : INFO : topic #44 (0.020): 0.052*"side" + 0.037*"stillsuit" + 0.035*"awareness" + 0.031*"body" + 0.030*"eyes" + 0.029*"lifted" + 0.027*"space" + 0.023*"studied" + 0.021*"front" + 0.021*"felt"
2021-01-15 12:19:33,045 : INFO : topic diff=0.032796, rho=0.187965
2021-01-15 12:19:33,270 : INFO : -9.375 per-word bound, 664.1 perplexity estimate based on a held-out corpus of 608 documents with 5442 words
2021-01-15 12:19:33,271 : INFO : PROGRESS: pass 23, at document #8608/8608
2021-01-15 12:19:33,446 : INFO : merging changes from 608 documents into a model of 8608 documents
2021-01-15 12:19:33,460 : INFO : topic #24 (0.020): 0.076*"rabban" + 0.071*"child" + 0.066*"knife" + 0.059*"troop" + 0.046*"slave" + 0.038*"plant" + 0.038*"matter" + 0.029*"hangings" + 

2021-01-15 12:19:36,243 : INFO : topic #1 (0.020): 0.185*"duke" + 0.081*"leto" + 0.076*"atreides" + 0.069*"son" + 0.055*"alia" + 0.054*"said" + 0.036*"daughter" + 0.033*"long" + 0.031*"thus" + 0.029*"one"
2021-01-15 12:19:36,246 : INFO : topic diff=0.075502, rho=0.184730
2021-01-15 12:19:36,250 : INFO : PROGRESS: pass 25, at document #2000/8608
2021-01-15 12:19:36,760 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:19:36,766 : INFO : topic #38 (0.020): 0.088*"desert" + 0.077*"took" + 0.062*"deep" + 0.055*"hands" + 0.049*"worm" + 0.048*"open" + 0.032*"second" + 0.030*"last" + 0.026*"paul" + 0.025*"going"
2021-01-15 12:19:36,767 : INFO : topic #39 (0.020): 0.196*"muad" + 0.196*"dib" + 0.053*"princess" + 0.053*"done" + 0.043*"future" + 0.042*"irulan" + 0.033*"arrakis" + 0.029*"words" + 0.027*"paul" + 0.026*"carry"
2021-01-15 12:19:36,768 : INFO : topic #6 (0.020): 0.093*"poison" + 0.064*"get" + 0.049*"hold" + 0.043*"message" + 0.040*"melange" + 0.

2021-01-15 12:19:39,607 : INFO : PROGRESS: pass 26, at document #4000/8608
2021-01-15 12:19:40,096 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:19:40,109 : INFO : topic #26 (0.020): 0.285*"hawat" + 0.073*"give" + 0.067*"tell" + 0.062*"said" + 0.055*"thufir" + 0.055*"whispered" + 0.053*"silence" + 0.035*"paul" + 0.026*"anger" + 0.023*"beginning"
2021-01-15 12:19:40,110 : INFO : topic #4 (0.020): 0.167*"saw" + 0.115*"another" + 0.065*"felt" + 0.041*"thing" + 0.038*"someone" + 0.035*"killed" + 0.035*"watched" + 0.032*"jessica" + 0.027*"paul" + 0.027*"guards"
2021-01-15 12:19:40,115 : INFO : topic #39 (0.020): 0.180*"muad" + 0.180*"dib" + 0.056*"done" + 0.054*"princess" + 0.044*"irulan" + 0.043*"future" + 0.031*"words" + 0.030*"arrakis" + 0.028*"carry" + 0.027*"lasgun"
2021-01-15 12:19:40,118 : INFO : topic #49 (0.020): 0.087*"high" + 0.049*"small" + 0.040*"presently" + 0.035*"family" + 0.034*"read" + 0.031*"obvious" + 0.031*"hope" + 0.030*"onto

2021-01-15 12:19:43,475 : INFO : topic #1 (0.020): 0.237*"duke" + 0.099*"leto" + 0.070*"son" + 0.068*"atreides" + 0.060*"said" + 0.031*"alia" + 0.030*"long" + 0.030*"daughter" + 0.025*"one" + 0.021*"thus"
2021-01-15 12:19:43,491 : INFO : topic #39 (0.020): 0.175*"dib" + 0.175*"muad" + 0.055*"done" + 0.053*"princess" + 0.049*"future" + 0.045*"irulan" + 0.034*"words" + 0.028*"carry" + 0.027*"arrakis" + 0.026*"violence"
2021-01-15 12:19:43,500 : INFO : topic #46 (0.020): 0.175*"thought" + 0.069*"think" + 0.067*"father" + 0.054*"paul" + 0.048*"let" + 0.047*"jessica" + 0.043*"look" + 0.035*"put" + 0.034*"mind" + 0.031*"could"
2021-01-15 12:19:43,520 : INFO : topic #44 (0.020): 0.048*"side" + 0.038*"stillsuit" + 0.037*"body" + 0.036*"awareness" + 0.030*"lifted" + 0.029*"space" + 0.029*"eyes" + 0.023*"felt" + 0.021*"front" + 0.021*"jessica"
2021-01-15 12:19:43,526 : INFO : topic diff=0.038019, rho=0.175943
2021-01-15 12:19:43,534 : INFO : PROGRESS: pass 27, at document #8000/8608
2021-01-15 1

2021-01-15 12:19:46,756 : INFO : topic #49 (0.020): 0.079*"high" + 0.054*"small" + 0.051*"presently" + 0.038*"family" + 0.034*"onto" + 0.034*"rest" + 0.033*"dear" + 0.032*"obvious" + 0.030*"sharp" + 0.029*"hope"
2021-01-15 12:19:46,778 : INFO : topic #11 (0.020): 0.146*"eyes" + 0.078*"paul" + 0.070*"stared" + 0.041*"night" + 0.039*"feet" + 0.037*"rocks" + 0.036*"usul" + 0.030*"cave" + 0.029*"man" + 0.027*"opened"
2021-01-15 12:19:46,805 : INFO : topic diff=0.030048, rho=0.173281
2021-01-15 12:19:47,084 : INFO : -9.371 per-word bound, 662.4 perplexity estimate based on a held-out corpus of 608 documents with 5442 words
2021-01-15 12:19:47,085 : INFO : PROGRESS: pass 28, at document #8608/8608
2021-01-15 12:19:47,255 : INFO : merging changes from 608 documents into a model of 8608 documents
2021-01-15 12:19:47,271 : INFO : topic #19 (0.020): 0.190*"time" + 0.073*"nothing" + 0.056*"fear" + 0.051*"past" + 0.033*"real" + 0.031*"common" + 0.031*"spread" + 0.030*"effect" + 0.025*"looking" + 0

2021-01-15 12:19:49,948 : INFO : topic #11 (0.020): 0.147*"eyes" + 0.073*"paul" + 0.067*"stared" + 0.038*"feet" + 0.038*"night" + 0.037*"usul" + 0.035*"rocks" + 0.033*"cave" + 0.031*"weapon" + 0.028*"man"
2021-01-15 12:19:49,957 : INFO : topic diff=0.068740, rho=0.170737
2021-01-15 12:19:49,958 : INFO : PROGRESS: pass 30, at document #2000/8608
2021-01-15 12:19:50,492 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:19:50,504 : INFO : topic #18 (0.020): 0.091*"knew" + 0.079*"could" + 0.068*"new" + 0.060*"training" + 0.059*"sietch" + 0.042*"trained" + 0.039*"school" + 0.037*"gave" + 0.034*"fact" + 0.029*"short"
2021-01-15 12:19:50,521 : INFO : topic #22 (0.020): 0.232*"arrakis" + 0.108*"never" + 0.066*"told" + 0.061*"lady" + 0.047*"person" + 0.042*"filled" + 0.037*"known" + 0.029*"houses" + 0.026*"become" + 0.024*"accepted"
2021-01-15 12:19:50,523 : INFO : topic #36 (0.020): 0.058*"beside" + 0.054*"lips" + 0.051*"step" + 0.047*"times" + 0.045*"fo

2021-01-15 12:19:53,431 : INFO : PROGRESS: pass 31, at document #4000/8608
2021-01-15 12:19:53,935 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:19:53,948 : INFO : topic #3 (0.020): 0.149*"know" + 0.121*"said" + 0.083*"lord" + 0.072*"woman" + 0.071*"old" + 0.063*"yes" + 0.042*"ah" + 0.034*"perhaps" + 0.030*"young" + 0.029*"jessica"
2021-01-15 12:19:53,952 : INFO : topic #29 (0.020): 0.283*"see" + 0.126*"feyd" + 0.110*"rautha" + 0.050*"maker" + 0.049*"boy" + 0.033*"part" + 0.031*"went" + 0.028*"found" + 0.021*"kept" + 0.021*"truly"
2021-01-15 12:19:53,974 : INFO : topic #43 (0.020): 0.308*"baron" + 0.084*"piter" + 0.077*"said" + 0.067*"take" + 0.051*"mentat" + 0.037*"change" + 0.029*"stop" + 0.024*"women" + 0.021*"ship" + 0.017*"send"
2021-01-15 12:19:53,976 : INFO : topic #6 (0.020): 0.094*"poison" + 0.072*"get" + 0.053*"hold" + 0.042*"message" + 0.036*"melange" + 0.034*"reason" + 0.033*"said" + 0.032*"orders" + 0.029*"choam" + 0.026*"safe"
2

2021-01-15 12:19:57,297 : INFO : topic #32 (0.020): 0.107*"house" + 0.077*"almost" + 0.069*"call" + 0.055*"blue" + 0.037*"grew" + 0.032*"eh" + 0.032*"says" + 0.031*"creature" + 0.029*"turn" + 0.028*"secundus"
2021-01-15 12:19:57,299 : INFO : topic #29 (0.020): 0.277*"see" + 0.139*"feyd" + 0.125*"rautha" + 0.050*"boy" + 0.044*"maker" + 0.032*"part" + 0.029*"found" + 0.029*"went" + 0.024*"upward" + 0.019*"truly"
2021-01-15 12:19:57,314 : INFO : topic #20 (0.020): 0.145*"first" + 0.083*"need" + 0.064*"like" + 0.047*"returned" + 0.041*"arrakeen" + 0.040*"entire" + 0.028*"pattern" + 0.026*"except" + 0.024*"time" + 0.022*"planetologist"
2021-01-15 12:19:57,325 : INFO : topic #47 (0.020): 0.144*"place" + 0.107*"planet" + 0.064*"much" + 0.061*"even" + 0.051*"arrakis" + 0.044*"caladan" + 0.036*"followed" + 0.032*"show" + 0.031*"full" + 0.030*"test"
2021-01-15 12:19:57,329 : INFO : topic diff=0.034923, rho=0.163728
2021-01-15 12:19:57,342 : INFO : PROGRESS: pass 32, at document #8000/8608
2021-0

2021-01-15 12:20:00,561 : INFO : topic #19 (0.020): 0.190*"time" + 0.069*"nothing" + 0.058*"fear" + 0.055*"past" + 0.031*"spread" + 0.029*"real" + 0.028*"gone" + 0.026*"looking" + 0.025*"south" + 0.025*"sky"
2021-01-15 12:20:00,569 : INFO : topic #10 (0.020): 0.179*"kynes" + 0.161*"man" + 0.072*"said" + 0.056*"every" + 0.043*"used" + 0.033*"kill" + 0.025*"everything" + 0.023*"liet" + 0.020*"way" + 0.020*"purpose"
2021-01-15 12:20:00,583 : INFO : topic diff=0.027516, rho=0.161576
2021-01-15 12:20:00,883 : INFO : -9.369 per-word bound, 661.0 perplexity estimate based on a held-out corpus of 608 documents with 5442 words
2021-01-15 12:20:00,884 : INFO : PROGRESS: pass 33, at document #8608/8608
2021-01-15 12:20:01,058 : INFO : merging changes from 608 documents into a model of 8608 documents
2021-01-15 12:20:01,075 : INFO : topic #32 (0.020): 0.104*"house" + 0.080*"almost" + 0.069*"call" + 0.044*"blue" + 0.037*"salusa" + 0.037*"secundus" + 0.035*"grew" + 0.034*"creature" + 0.031*"says" + 

2021-01-15 12:20:03,815 : INFO : topic #39 (0.020): 0.205*"muad" + 0.205*"dib" + 0.049*"princess" + 0.047*"future" + 0.046*"done" + 0.037*"irulan" + 0.031*"arrakis" + 0.030*"words" + 0.026*"paul" + 0.024*"carry"
2021-01-15 12:20:03,817 : INFO : topic diff=0.063366, rho=0.159508
2021-01-15 12:20:03,833 : INFO : PROGRESS: pass 35, at document #2000/8608
2021-01-15 12:20:04,335 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:20:04,346 : INFO : topic #40 (0.020): 0.131*"sand" + 0.057*"dust" + 0.048*"surface" + 0.044*"across" + 0.041*"system" + 0.035*"dunes" + 0.034*"wind" + 0.032*"bible" + 0.031*"toward" + 0.029*"movement"
2021-01-15 12:20:04,347 : INFO : topic #39 (0.020): 0.196*"muad" + 0.196*"dib" + 0.053*"done" + 0.053*"princess" + 0.044*"future" + 0.042*"irulan" + 0.030*"words" + 0.030*"arrakis" + 0.026*"carry" + 0.024*"paul"
2021-01-15 12:20:04,348 : INFO : topic #26 (0.020): 0.257*"hawat" + 0.074*"give" + 0.073*"tell" + 0.062*"whispered" + 0

2021-01-15 12:20:07,493 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:20:07,505 : INFO : topic #20 (0.020): 0.149*"first" + 0.081*"need" + 0.060*"like" + 0.045*"arrakeen" + 0.044*"returned" + 0.043*"entire" + 0.029*"pattern" + 0.025*"except" + 0.024*"old" + 0.024*"time"
2021-01-15 12:20:07,505 : INFO : topic #13 (0.020): 0.126*"say" + 0.072*"want" + 0.055*"three" + 0.037*"kwisatz" + 0.037*"haderach" + 0.035*"suddenly" + 0.034*"figure" + 0.033*"said" + 0.031*"ring" + 0.030*"sudden"
2021-01-15 12:20:07,512 : INFO : topic #9 (0.020): 0.131*"guild" + 0.082*"yet" + 0.047*"human" + 0.045*"imperium" + 0.040*"report" + 0.040*"mean" + 0.037*"slowly" + 0.034*"ancient" + 0.030*"simple" + 0.030*"manner"
2021-01-15 12:20:07,514 : INFO : topic #3 (0.020): 0.148*"know" + 0.120*"said" + 0.082*"lord" + 0.079*"old" + 0.072*"woman" + 0.063*"yes" + 0.042*"ah" + 0.033*"perhaps" + 0.030*"young" + 0.028*"sure"
2021-01-15 12:20:07,516 : INFO : topic #29 (0.020): 0.3

2021-01-15 12:20:10,569 : INFO : topic #37 (0.020): 0.131*"hand" + 0.098*"looked" + 0.088*"right" + 0.070*"left" + 0.065*"without" + 0.060*"paul" + 0.037*"wondered" + 0.034*"though" + 0.025*"away" + 0.020*"knife"
2021-01-15 12:20:10,571 : INFO : topic #3 (0.020): 0.145*"know" + 0.123*"said" + 0.076*"old" + 0.075*"woman" + 0.072*"lord" + 0.060*"yes" + 0.048*"ah" + 0.033*"young" + 0.032*"perhaps" + 0.030*"sure"
2021-01-15 12:20:10,573 : INFO : topic #32 (0.020): 0.107*"house" + 0.077*"almost" + 0.069*"call" + 0.055*"blue" + 0.037*"grew" + 0.032*"eh" + 0.032*"says" + 0.031*"creature" + 0.029*"turn" + 0.028*"secundus"
2021-01-15 12:20:10,577 : INFO : topic #16 (0.020): 0.076*"shall" + 0.070*"well" + 0.050*"arm" + 0.044*"upon" + 0.039*"certain" + 0.036*"control" + 0.034*"set" + 0.033*"better" + 0.031*"said" + 0.028*"try"
2021-01-15 12:20:10,591 : INFO : topic diff=0.032307, rho=0.153748
2021-01-15 12:20:10,617 : INFO : PROGRESS: pass 37, at document #8000/8608
2021-01-15 12:20:11,090 : INFO

2021-01-15 12:20:13,753 : INFO : topic #37 (0.020): 0.130*"hand" + 0.099*"looked" + 0.087*"right" + 0.074*"left" + 0.068*"paul" + 0.064*"without" + 0.037*"wondered" + 0.032*"though" + 0.024*"away" + 0.022*"knife"
2021-01-15 12:20:13,756 : INFO : topic #16 (0.020): 0.075*"shall" + 0.070*"well" + 0.051*"arm" + 0.043*"upon" + 0.038*"certain" + 0.038*"control" + 0.033*"set" + 0.032*"said" + 0.032*"better" + 0.031*"try"
2021-01-15 12:20:13,759 : INFO : topic diff=0.025764, rho=0.151962
2021-01-15 12:20:14,010 : INFO : -9.365 per-word bound, 659.4 perplexity estimate based on a held-out corpus of 608 documents with 5442 words
2021-01-15 12:20:14,011 : INFO : PROGRESS: pass 38, at document #8608/8608
2021-01-15 12:20:14,194 : INFO : merging changes from 608 documents into a model of 8608 documents
2021-01-15 12:20:14,211 : INFO : topic #30 (0.020): 0.217*"water" + 0.135*"spice" + 0.111*"life" + 0.048*"little" + 0.035*"moisture" + 0.031*"said" + 0.022*"open" + 0.021*"ten" + 0.020*"inner" + 0.0

2021-01-15 12:20:16,844 : INFO : topic #6 (0.020): 0.107*"poison" + 0.064*"get" + 0.051*"hold" + 0.042*"message" + 0.042*"melange" + 0.033*"said" + 0.032*"reason" + 0.029*"produce" + 0.028*"assassins" + 0.027*"orders"
2021-01-15 12:20:16,853 : INFO : topic diff=0.059347, rho=0.150238
2021-01-15 12:20:16,855 : INFO : PROGRESS: pass 40, at document #2000/8608
2021-01-15 12:20:17,369 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:20:17,387 : INFO : topic #33 (0.020): 0.156*"face" + 0.082*"held" + 0.058*"keep" + 0.047*"sound" + 0.039*"mouth" + 0.035*"uncle" + 0.033*"help" + 0.031*"watch" + 0.031*"strange" + 0.029*"arms"
2021-01-15 12:20:17,388 : INFO : topic #32 (0.020): 0.111*"house" + 0.079*"almost" + 0.069*"call" + 0.046*"blue" + 0.035*"says" + 0.035*"grew" + 0.034*"secundus" + 0.034*"salusa" + 0.031*"turn" + 0.030*"creature"
2021-01-15 12:20:17,393 : INFO : topic #4 (0.020): 0.165*"saw" + 0.111*"another" + 0.080*"felt" + 0.040*"thing" + 0.038*

2021-01-15 12:20:20,588 : INFO : PROGRESS: pass 41, at document #4000/8608
2021-01-15 12:20:21,256 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:20:21,283 : INFO : topic #16 (0.020): 0.073*"shall" + 0.070*"well" + 0.046*"arm" + 0.046*"upon" + 0.043*"certain" + 0.038*"control" + 0.035*"set" + 0.031*"said" + 0.030*"better" + 0.029*"try"
2021-01-15 12:20:21,287 : INFO : topic #25 (0.020): 0.175*"emperor" + 0.124*"sardaukar" + 0.058*"enough" + 0.052*"floor" + 0.049*"understand" + 0.046*"caught" + 0.041*"battle" + 0.038*"imperial" + 0.029*"said" + 0.024*"majesty"
2021-01-15 12:20:21,296 : INFO : topic #27 (0.020): 0.163*"would" + 0.082*"come" + 0.061*"might" + 0.052*"still" + 0.043*"beyond" + 0.042*"could" + 0.041*"far" + 0.033*"silent" + 0.030*"ever" + 0.026*"kind"
2021-01-15 12:20:21,305 : INFO : topic #7 (0.020): 0.269*"asked" + 0.095*"idaho" + 0.068*"paul" + 0.063*"jessica" + 0.055*"basin" + 0.052*"green" + 0.037*"duncan" + 0.031*"wild" + 0.02

2021-01-15 12:20:24,605 : INFO : topic #24 (0.020): 0.072*"troop" + 0.070*"rabban" + 0.068*"knife" + 0.065*"child" + 0.050*"slave" + 0.039*"matter" + 0.030*"plant" + 0.027*"eye" + 0.026*"near" + 0.026*"metal"
2021-01-15 12:20:24,623 : INFO : topic #49 (0.020): 0.080*"high" + 0.052*"small" + 0.045*"presently" + 0.037*"family" + 0.033*"rest" + 0.032*"onto" + 0.031*"hope" + 0.031*"obvious" + 0.031*"dear" + 0.028*"sharp"
2021-01-15 12:20:24,637 : INFO : topic #36 (0.020): 0.066*"beside" + 0.058*"step" + 0.052*"lips" + 0.047*"forced" + 0.042*"times" + 0.034*"hood" + 0.034*"knowledge" + 0.029*"secret" + 0.028*"cold" + 0.026*"said"
2021-01-15 12:20:24,639 : INFO : topic #2 (0.020): 0.151*"gurney" + 0.116*"halleck" + 0.069*"go" + 0.066*"said" + 0.054*"called" + 0.048*"paul" + 0.045*"made" + 0.031*"man" + 0.026*"darkness" + 0.025*"instant"
2021-01-15 12:20:24,640 : INFO : topic diff=0.030367, rho=0.145396
2021-01-15 12:20:24,643 : INFO : PROGRESS: pass 42, at document #8000/8608
2021-01-15 12:2

2021-01-15 12:20:27,807 : INFO : topic #19 (0.020): 0.190*"time" + 0.069*"nothing" + 0.058*"fear" + 0.055*"past" + 0.031*"spread" + 0.029*"real" + 0.028*"gone" + 0.026*"looking" + 0.025*"south" + 0.025*"coming"
2021-01-15 12:20:27,815 : INFO : topic #20 (0.020): 0.138*"first" + 0.092*"need" + 0.067*"like" + 0.047*"returned" + 0.042*"entire" + 0.040*"arrakeen" + 0.028*"pattern" + 0.025*"except" + 0.025*"old" + 0.022*"time"
2021-01-15 12:20:27,831 : INFO : topic diff=0.024390, rho=0.143883
2021-01-15 12:20:28,066 : INFO : -9.361 per-word bound, 657.4 perplexity estimate based on a held-out corpus of 608 documents with 5442 words
2021-01-15 12:20:28,067 : INFO : PROGRESS: pass 43, at document #8608/8608
2021-01-15 12:20:28,237 : INFO : merging changes from 608 documents into a model of 8608 documents
2021-01-15 12:20:28,245 : INFO : topic #44 (0.020): 0.050*"side" + 0.041*"body" + 0.041*"stillsuit" + 0.040*"space" + 0.037*"awareness" + 0.029*"eyes" + 0.026*"lifted" + 0.020*"studied" + 0.0

2021-01-15 12:20:30,884 : INFO : topic #31 (0.020): 0.190*"men" + 0.058*"door" + 0.055*"two" + 0.037*"one" + 0.036*"air" + 0.029*"pressed" + 0.029*"guard" + 0.027*"ledge" + 0.027*"paul" + 0.026*"friend"
2021-01-15 12:20:30,929 : INFO : topic diff=0.055959, rho=0.142416
2021-01-15 12:20:30,930 : INFO : PROGRESS: pass 45, at document #2000/8608
2021-01-15 12:20:31,458 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:20:31,466 : INFO : topic #38 (0.020): 0.088*"desert" + 0.078*"took" + 0.062*"deep" + 0.057*"hands" + 0.048*"open" + 0.047*"worm" + 0.031*"second" + 0.030*"last" + 0.025*"forward" + 0.025*"going"
2021-01-15 12:20:31,467 : INFO : topic #23 (0.020): 0.107*"name" + 0.079*"god" + 0.077*"world" + 0.039*"protectiva" + 0.039*"missionaria" + 0.038*"legend" + 0.037*"brought" + 0.036*"came" + 0.034*"faced" + 0.033*"removed"
2021-01-15 12:20:31,470 : INFO : topic #24 (0.020): 0.068*"rabban" + 0.067*"child" + 0.067*"knife" + 0.055*"troop" + 0.042*"

2021-01-15 12:20:34,648 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:20:34,663 : INFO : topic #7 (0.020): 0.270*"asked" + 0.095*"idaho" + 0.068*"paul" + 0.062*"jessica" + 0.055*"basin" + 0.052*"green" + 0.037*"duncan" + 0.031*"wild" + 0.029*"subtle" + 0.025*"throat"
2021-01-15 12:20:34,665 : INFO : topic #11 (0.020): 0.157*"eyes" + 0.077*"stared" + 0.063*"paul" + 0.042*"night" + 0.039*"feet" + 0.031*"rocks" + 0.030*"man" + 0.030*"opened" + 0.029*"weapon" + 0.028*"usul"
2021-01-15 12:20:34,680 : INFO : topic #22 (0.020): 0.222*"arrakis" + 0.107*"never" + 0.068*"lady" + 0.068*"told" + 0.045*"person" + 0.040*"filled" + 0.037*"known" + 0.026*"houses" + 0.025*"become" + 0.024*"accepted"
2021-01-15 12:20:34,685 : INFO : topic #45 (0.020): 0.113*"head" + 0.058*"storm" + 0.055*"years" + 0.044*"shook" + 0.038*"soon" + 0.037*"snapped" + 0.036*"stare" + 0.034*"bring" + 0.030*"appeared" + 0.029*"talk"
2021-01-15 12:20:34,686 : INFO : topic #34 (0.020): 

2021-01-15 12:20:37,771 : INFO : topic #12 (0.020): 0.166*"stilgar" + 0.113*"gesserit" + 0.113*"bene" + 0.059*"count" + 0.057*"said" + 0.047*"nodded" + 0.042*"dead" + 0.033*"fenring" + 0.026*"way" + 0.025*"paul"
2021-01-15 12:20:37,780 : INFO : topic #1 (0.020): 0.237*"duke" + 0.098*"leto" + 0.070*"son" + 0.069*"atreides" + 0.060*"said" + 0.034*"alia" + 0.031*"long" + 0.030*"daughter" + 0.026*"one" + 0.022*"thus"
2021-01-15 12:20:37,782 : INFO : topic #45 (0.020): 0.122*"head" + 0.054*"storm" + 0.048*"years" + 0.044*"shook" + 0.038*"stare" + 0.037*"soon" + 0.037*"bring" + 0.034*"snapped" + 0.029*"appeared" + 0.027*"talk"
2021-01-15 12:20:37,786 : INFO : topic #23 (0.020): 0.103*"name" + 0.068*"god" + 0.066*"world" + 0.042*"brought" + 0.040*"legend" + 0.039*"came" + 0.038*"removed" + 0.038*"faced" + 0.037*"protectiva" + 0.037*"missionaria"
2021-01-15 12:20:37,804 : INFO : topic diff=0.028707, rho=0.138271
2021-01-15 12:20:37,818 : INFO : PROGRESS: pass 47, at document #8000/8608
2021-01

2021-01-15 12:20:40,948 : INFO : topic #25 (0.020): 0.196*"emperor" + 0.126*"sardaukar" + 0.058*"floor" + 0.057*"enough" + 0.048*"understand" + 0.042*"battle" + 0.040*"caught" + 0.034*"imperial" + 0.032*"said" + 0.026*"majesty"
2021-01-15 12:20:40,953 : INFO : topic #20 (0.020): 0.138*"first" + 0.091*"need" + 0.069*"like" + 0.047*"returned" + 0.042*"entire" + 0.040*"arrakeen" + 0.028*"pattern" + 0.025*"except" + 0.025*"old" + 0.022*"time"
2021-01-15 12:20:40,969 : INFO : topic diff=0.023080, rho=0.136968
2021-01-15 12:20:41,279 : INFO : -9.357 per-word bound, 655.9 perplexity estimate based on a held-out corpus of 608 documents with 5442 words
2021-01-15 12:20:41,280 : INFO : PROGRESS: pass 48, at document #8608/8608
2021-01-15 12:20:41,421 : INFO : merging changes from 608 documents into a model of 8608 documents
2021-01-15 12:20:41,438 : INFO : topic #25 (0.020): 0.193*"emperor" + 0.114*"sardaukar" + 0.053*"floor" + 0.051*"enough" + 0.047*"battle" + 0.044*"understand" + 0.039*"caught

2021-01-15 12:20:43,935 : INFO : topic #48 (0.020): 0.150*"harkonnen" + 0.139*"us" + 0.050*"point" + 0.047*"behind" + 0.038*"death" + 0.038*"half" + 0.031*"sire" + 0.031*"one" + 0.026*"six" + 0.024*"said"
2021-01-15 12:20:43,989 : INFO : topic diff=0.052957, rho=0.135701
2021-01-15 12:20:43,999 : INFO : PROGRESS: pass 50, at document #2000/8608
2021-01-15 12:20:44,448 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:20:44,455 : INFO : topic #8 (0.020): 0.051*"rock" + 0.042*"light" + 0.034*"sand" + 0.032*"moved" + 0.025*"shai" + 0.025*"hulud" + 0.024*"white" + 0.024*"ahead" + 0.023*"across" + 0.022*"sun"
2021-01-15 12:20:44,456 : INFO : topic #20 (0.020): 0.154*"first" + 0.084*"need" + 0.060*"like" + 0.042*"entire" + 0.042*"arrakeen" + 0.041*"returned" + 0.028*"pattern" + 0.026*"old" + 0.025*"except" + 0.024*"time"
2021-01-15 12:20:44,457 : INFO : topic #17 (0.020): 0.086*"room" + 0.058*"yueh" + 0.045*"table" + 0.040*"paul" + 0.037*"around" + 0.0

2021-01-15 12:20:47,658 : INFO : PROGRESS: pass 51, at document #4000/8608
2021-01-15 12:20:48,119 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:20:48,130 : INFO : topic #22 (0.020): 0.222*"arrakis" + 0.107*"never" + 0.068*"told" + 0.067*"lady" + 0.046*"person" + 0.040*"filled" + 0.036*"known" + 0.026*"houses" + 0.025*"become" + 0.024*"accepted"
2021-01-15 12:20:48,135 : INFO : topic #21 (0.020): 0.111*"great" + 0.094*"many" + 0.071*"things" + 0.061*"one" + 0.050*"always" + 0.036*"already" + 0.032*"order" + 0.030*"rule" + 0.028*"said" + 0.021*"animal"
2021-01-15 12:20:48,136 : INFO : topic #9 (0.020): 0.131*"guild" + 0.083*"yet" + 0.047*"human" + 0.045*"imperium" + 0.040*"report" + 0.040*"mean" + 0.038*"slowly" + 0.034*"ancient" + 0.030*"simple" + 0.030*"manner"
2021-01-15 12:20:48,137 : INFO : topic #31 (0.020): 0.186*"men" + 0.070*"door" + 0.052*"two" + 0.039*"one" + 0.039*"air" + 0.034*"guard" + 0.027*"pressed" + 0.025*"equipment" + 0.023*

2021-01-15 12:20:51,795 : INFO : topic #7 (0.020): 0.262*"asked" + 0.086*"idaho" + 0.068*"basin" + 0.067*"paul" + 0.059*"jessica" + 0.055*"green" + 0.034*"wild" + 0.033*"duncan" + 0.029*"subtle" + 0.023*"throat"
2021-01-15 12:20:51,797 : INFO : topic #40 (0.020): 0.146*"sand" + 0.055*"dust" + 0.055*"across" + 0.048*"surface" + 0.036*"wind" + 0.036*"movement" + 0.035*"dunes" + 0.035*"system" + 0.030*"toward" + 0.027*"bible"
2021-01-15 12:20:51,801 : INFO : topic #22 (0.020): 0.208*"arrakis" + 0.105*"never" + 0.072*"told" + 0.063*"lady" + 0.044*"filled" + 0.042*"person" + 0.035*"known" + 0.028*"become" + 0.027*"memory" + 0.026*"houses"
2021-01-15 12:20:51,808 : INFO : topic #36 (0.020): 0.066*"beside" + 0.057*"step" + 0.052*"lips" + 0.047*"forced" + 0.042*"times" + 0.034*"hood" + 0.034*"knowledge" + 0.029*"secret" + 0.029*"cold" + 0.027*"back"
2021-01-15 12:20:51,813 : INFO : topic diff=0.027205, rho=0.132101
2021-01-15 12:20:51,824 : INFO : PROGRESS: pass 52, at document #8000/8608
2021

2021-01-15 12:20:54,775 : INFO : topic #27 (0.020): 0.159*"would" + 0.083*"come" + 0.069*"still" + 0.059*"might" + 0.045*"beyond" + 0.042*"could" + 0.041*"far" + 0.033*"silent" + 0.029*"ever" + 0.024*"kind"
2021-01-15 12:20:54,801 : INFO : topic #35 (0.020): 0.069*"word" + 0.058*"began" + 0.048*"blade" + 0.044*"command" + 0.041*"jihad" + 0.039*"mapes" + 0.036*"dry" + 0.035*"also" + 0.034*"crysknife" + 0.033*"tried"
2021-01-15 12:20:54,803 : INFO : topic #22 (0.020): 0.202*"arrakis" + 0.111*"never" + 0.078*"told" + 0.059*"lady" + 0.042*"person" + 0.042*"filled" + 0.033*"known" + 0.030*"become" + 0.027*"memory" + 0.026*"houses"
2021-01-15 12:20:54,810 : INFO : topic diff=0.022021, rho=0.130964
2021-01-15 12:20:55,035 : INFO : -9.354 per-word bound, 654.3 perplexity estimate based on a held-out corpus of 608 documents with 5442 words
2021-01-15 12:20:55,035 : INFO : PROGRESS: pass 53, at document #8608/8608
2021-01-15 12:20:55,192 : INFO : merging changes from 608 documents into a model o

2021-01-15 12:20:57,712 : INFO : topic #31 (0.020): 0.189*"men" + 0.059*"door" + 0.055*"two" + 0.038*"one" + 0.036*"air" + 0.029*"pressed" + 0.029*"guard" + 0.027*"ledge" + 0.026*"paul" + 0.026*"friend"
2021-01-15 12:20:57,716 : INFO : topic #33 (0.020): 0.162*"face" + 0.083*"held" + 0.059*"keep" + 0.051*"sound" + 0.040*"mouth" + 0.036*"uncle" + 0.034*"help" + 0.033*"watch" + 0.029*"strange" + 0.028*"arms"
2021-01-15 12:20:57,735 : INFO : topic diff=0.050500, rho=0.129855
2021-01-15 12:20:57,737 : INFO : PROGRESS: pass 55, at document #2000/8608
2021-01-15 12:20:58,204 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:20:58,212 : INFO : topic #17 (0.020): 0.086*"room" + 0.058*"yueh" + 0.044*"table" + 0.040*"paul" + 0.037*"around" + 0.034*"stood" + 0.030*"along" + 0.029*"hall" + 0.026*"door" + 0.025*"stopped"
2021-01-15 12:20:58,213 : INFO : topic #5 (0.020): 0.253*"mother" + 0.100*"reverend" + 0.053*"attention" + 0.050*"power" + 0.049*"noted" + 0

2021-01-15 12:21:00,738 : INFO : topic diff=0.023997, rho=0.127719
2021-01-15 12:21:00,752 : INFO : PROGRESS: pass 56, at document #4000/8608
2021-01-15 12:21:01,187 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:21:01,210 : INFO : topic #2 (0.020): 0.173*"gurney" + 0.115*"halleck" + 0.068*"go" + 0.065*"said" + 0.053*"called" + 0.051*"paul" + 0.044*"made" + 0.031*"man" + 0.024*"darkness" + 0.022*"instant"
2021-01-15 12:21:01,211 : INFO : topic #10 (0.020): 0.213*"kynes" + 0.151*"man" + 0.074*"said" + 0.051*"every" + 0.047*"used" + 0.028*"kill" + 0.022*"everything" + 0.022*"liet" + 0.021*"purpose" + 0.020*"way"
2021-01-15 12:21:01,217 : INFO : topic #38 (0.020): 0.086*"desert" + 0.081*"took" + 0.065*"hands" + 0.059*"deep" + 0.045*"open" + 0.043*"worm" + 0.031*"last" + 0.028*"second" + 0.026*"going" + 0.025*"breath"
2021-01-15 12:21:01,221 : INFO : topic #16 (0.020): 0.073*"shall" + 0.070*"well" + 0.048*"arm" + 0.045*"upon" + 0.043*"certain" + 0

2021-01-15 12:21:04,412 : INFO : topic #34 (0.020): 0.099*"paul" + 0.093*"glanced" + 0.085*"back" + 0.075*"good" + 0.069*"day" + 0.066*"said" + 0.039*"leave" + 0.036*"blood" + 0.029*"love" + 0.026*"stepped"
2021-01-15 12:21:04,413 : INFO : topic #21 (0.020): 0.106*"great" + 0.094*"many" + 0.070*"things" + 0.059*"one" + 0.051*"always" + 0.038*"already" + 0.031*"order" + 0.030*"rule" + 0.030*"said" + 0.021*"thoughts"
2021-01-15 12:21:04,438 : INFO : topic #7 (0.020): 0.262*"asked" + 0.085*"idaho" + 0.068*"basin" + 0.067*"paul" + 0.063*"jessica" + 0.054*"green" + 0.034*"wild" + 0.033*"duncan" + 0.029*"subtle" + 0.023*"throat"
2021-01-15 12:21:04,439 : INFO : topic #43 (0.020): 0.308*"baron" + 0.077*"said" + 0.073*"piter" + 0.067*"take" + 0.046*"mentat" + 0.038*"change" + 0.033*"stop" + 0.025*"women" + 0.022*"could" + 0.019*"ship"
2021-01-15 12:21:04,459 : INFO : topic diff=0.026078, rho=0.126690
2021-01-15 12:21:04,461 : INFO : PROGRESS: pass 57, at document #8000/8608
2021-01-15 12:21:04

2021-01-15 12:21:07,624 : INFO : topic #34 (0.020): 0.108*"paul" + 0.094*"glanced" + 0.082*"back" + 0.072*"good" + 0.071*"day" + 0.070*"said" + 0.039*"leave" + 0.034*"blood" + 0.030*"love" + 0.025*"stepped"
2021-01-15 12:21:07,644 : INFO : topic #25 (0.020): 0.195*"emperor" + 0.125*"sardaukar" + 0.057*"floor" + 0.057*"enough" + 0.048*"understand" + 0.042*"caught" + 0.042*"battle" + 0.034*"imperial" + 0.032*"said" + 0.026*"majesty"
2021-01-15 12:21:07,674 : INFO : topic diff=0.021237, rho=0.125685
2021-01-15 12:21:07,895 : INFO : -9.351 per-word bound, 653.0 perplexity estimate based on a held-out corpus of 608 documents with 5442 words
2021-01-15 12:21:07,897 : INFO : PROGRESS: pass 58, at document #8608/8608
2021-01-15 12:21:08,051 : INFO : merging changes from 608 documents into a model of 8608 documents
2021-01-15 12:21:08,067 : INFO : topic #8 (0.020): 0.055*"rock" + 0.042*"light" + 0.036*"sand" + 0.033*"moved" + 0.028*"shai" + 0.028*"hulud" + 0.025*"across" + 0.023*"ahead" + 0.022

2021-01-15 12:21:10,574 : INFO : topic #18 (0.020): 0.112*"could" + 0.091*"knew" + 0.064*"new" + 0.063*"training" + 0.059*"sietch" + 0.041*"trained" + 0.036*"gave" + 0.035*"school" + 0.034*"fact" + 0.026*"short"
2021-01-15 12:21:10,575 : INFO : topic diff=0.048458, rho=0.124704
2021-01-15 12:21:10,601 : INFO : PROGRESS: pass 60, at document #2000/8608
2021-01-15 12:21:11,060 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:21:11,069 : INFO : topic #24 (0.020): 0.069*"rabban" + 0.067*"knife" + 0.067*"child" + 0.056*"troop" + 0.042*"slave" + 0.038*"matter" + 0.034*"plant" + 0.029*"metal" + 0.029*"eye" + 0.027*"near"
2021-01-15 12:21:11,070 : INFO : topic #48 (0.020): 0.147*"harkonnen" + 0.139*"us" + 0.047*"point" + 0.046*"sire" + 0.046*"behind" + 0.036*"death" + 0.035*"half" + 0.030*"one" + 0.026*"six" + 0.025*"said"
2021-01-15 12:21:11,073 : INFO : topic #36 (0.020): 0.060*"beside" + 0.053*"lips" + 0.052*"step" + 0.046*"times" + 0.045*"forced" + 

2021-01-15 12:21:14,298 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:21:14,311 : INFO : topic #36 (0.020): 0.063*"beside" + 0.056*"lips" + 0.049*"step" + 0.044*"forced" + 0.044*"times" + 0.033*"knowledge" + 0.032*"cold" + 0.031*"hood" + 0.031*"secret" + 0.028*"said"
2021-01-15 12:21:14,313 : INFO : topic #3 (0.020): 0.149*"know" + 0.109*"said" + 0.095*"old" + 0.080*"lord" + 0.073*"woman" + 0.063*"yes" + 0.043*"ah" + 0.033*"perhaps" + 0.032*"young" + 0.028*"sure"
2021-01-15 12:21:14,322 : INFO : topic #12 (0.020): 0.140*"stilgar" + 0.128*"gesserit" + 0.128*"bene" + 0.052*"said" + 0.049*"count" + 0.047*"nodded" + 0.040*"dead" + 0.031*"fenring" + 0.029*"paul" + 0.027*"way"
2021-01-15 12:21:14,324 : INFO : topic #0 (0.020): 0.379*"fremen" + 0.059*"use" + 0.048*"wish" + 0.036*"al" + 0.031*"desert" + 0.028*"live" + 0.025*"remember" + 0.022*"one" + 0.021*"shrugged" + 0.020*"clear"
2021-01-15 12:21:14,329 : INFO : topic #7 (0.020): 0.270*"asked" + 0

2021-01-15 12:21:17,348 : INFO : topic #49 (0.020): 0.080*"high" + 0.051*"small" + 0.044*"presently" + 0.036*"family" + 0.032*"rest" + 0.031*"onto" + 0.030*"hope" + 0.030*"obvious" + 0.030*"dear" + 0.028*"sharp"
2021-01-15 12:21:17,350 : INFO : topic #24 (0.020): 0.071*"rabban" + 0.070*"troop" + 0.067*"knife" + 0.066*"child" + 0.049*"slave" + 0.039*"matter" + 0.030*"plant" + 0.027*"eye" + 0.026*"near" + 0.026*"metal"
2021-01-15 12:21:17,367 : INFO : topic #1 (0.020): 0.237*"duke" + 0.098*"leto" + 0.071*"son" + 0.070*"atreides" + 0.060*"said" + 0.035*"alia" + 0.031*"long" + 0.030*"daughter" + 0.026*"one" + 0.022*"thus"
2021-01-15 12:21:17,370 : INFO : topic #4 (0.020): 0.179*"saw" + 0.111*"another" + 0.086*"felt" + 0.040*"paul" + 0.038*"jessica" + 0.038*"someone" + 0.036*"thing" + 0.032*"killed" + 0.031*"watched" + 0.024*"wide"
2021-01-15 12:21:17,377 : INFO : topic diff=0.024953, rho=0.121893
2021-01-15 12:21:17,378 : INFO : PROGRESS: pass 62, at document #8000/8608
2021-01-15 12:21:17

2021-01-15 12:21:20,663 : INFO : topic #35 (0.020): 0.069*"word" + 0.057*"began" + 0.048*"blade" + 0.044*"command" + 0.041*"jihad" + 0.040*"mapes" + 0.036*"dry" + 0.035*"also" + 0.034*"crysknife" + 0.033*"tried"
2021-01-15 12:21:20,664 : INFO : topic #28 (0.020): 0.172*"people" + 0.056*"thopter" + 0.042*"dune" + 0.042*"paul" + 0.036*"strength" + 0.034*"hidden" + 0.030*"remembered" + 0.030*"five" + 0.029*"wait" + 0.027*"cavern"
2021-01-15 12:21:20,677 : INFO : topic diff=0.020272, rho=0.120998
2021-01-15 12:21:20,942 : INFO : -9.349 per-word bound, 652.0 perplexity estimate based on a held-out corpus of 608 documents with 5442 words
2021-01-15 12:21:20,943 : INFO : PROGRESS: pass 63, at document #8608/8608
2021-01-15 12:21:21,111 : INFO : merging changes from 608 documents into a model of 8608 documents
2021-01-15 12:21:21,123 : INFO : topic #7 (0.020): 0.264*"asked" + 0.073*"idaho" + 0.069*"paul" + 0.065*"basin" + 0.061*"jessica" + 0.055*"green" + 0.038*"wild" + 0.034*"subtle" + 0.029*

2021-01-15 12:21:23,765 : INFO : topic #5 (0.020): 0.247*"mother" + 0.098*"reverend" + 0.053*"noted" + 0.051*"attention" + 0.050*"power" + 0.028*"often" + 0.025*"within" + 0.025*"food" + 0.024*"jessica" + 0.023*"produced"
2021-01-15 12:21:23,779 : INFO : topic diff=0.046434, rho=0.120122
2021-01-15 12:21:23,785 : INFO : PROGRESS: pass 65, at document #2000/8608
2021-01-15 12:21:24,392 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:21:24,401 : INFO : topic #46 (0.020): 0.175*"thought" + 0.074*"think" + 0.066*"father" + 0.053*"paul" + 0.049*"let" + 0.045*"look" + 0.043*"jessica" + 0.038*"religious" + 0.036*"put" + 0.033*"seen"
2021-01-15 12:21:24,402 : INFO : topic #37 (0.020): 0.128*"hand" + 0.099*"looked" + 0.087*"right" + 0.075*"left" + 0.063*"without" + 0.062*"paul" + 0.033*"wondered" + 0.032*"though" + 0.024*"away" + 0.021*"knife"
2021-01-15 12:21:24,420 : INFO : topic #6 (0.020): 0.094*"poison" + 0.067*"get" + 0.051*"hold" + 0.043*"message

2021-01-15 12:21:30,747 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:21:30,790 : INFO : topic #28 (0.020): 0.164*"people" + 0.066*"thopter" + 0.045*"dune" + 0.038*"paul" + 0.036*"hidden" + 0.034*"strength" + 0.032*"five" + 0.028*"remembered" + 0.028*"wait" + 0.027*"dream"
2021-01-15 12:21:30,792 : INFO : topic #36 (0.020): 0.064*"beside" + 0.056*"lips" + 0.049*"step" + 0.044*"forced" + 0.044*"times" + 0.033*"knowledge" + 0.032*"cold" + 0.031*"hood" + 0.031*"secret" + 0.028*"said"
2021-01-15 12:21:30,810 : INFO : topic #16 (0.020): 0.073*"shall" + 0.069*"well" + 0.056*"arm" + 0.045*"upon" + 0.042*"certain" + 0.037*"control" + 0.034*"set" + 0.031*"said" + 0.030*"better" + 0.029*"try"
2021-01-15 12:21:30,850 : INFO : topic #27 (0.020): 0.161*"would" + 0.081*"come" + 0.063*"still" + 0.060*"might" + 0.044*"could" + 0.042*"beyond" + 0.041*"far" + 0.032*"silent" + 0.030*"ever" + 0.025*"kind"
2021-01-15 12:21:30,853 : INFO : topic #30 (0.020): 0.221

2021-01-15 12:21:34,205 : INFO : topic #25 (0.020): 0.172*"emperor" + 0.116*"sardaukar" + 0.059*"floor" + 0.058*"enough" + 0.049*"understand" + 0.047*"caught" + 0.041*"battle" + 0.036*"imperial" + 0.029*"said" + 0.023*"close"
2021-01-15 12:21:34,206 : INFO : topic #8 (0.020): 0.059*"rock" + 0.042*"light" + 0.036*"sand" + 0.033*"moved" + 0.026*"pack" + 0.025*"across" + 0.025*"ahead" + 0.023*"white" + 0.020*"lost" + 0.020*"hulud"
2021-01-15 12:21:34,236 : INFO : topic #16 (0.020): 0.075*"shall" + 0.068*"well" + 0.062*"arm" + 0.044*"upon" + 0.039*"certain" + 0.036*"control" + 0.034*"set" + 0.032*"better" + 0.031*"said" + 0.028*"try"
2021-01-15 12:21:34,237 : INFO : topic #21 (0.020): 0.106*"great" + 0.094*"many" + 0.069*"things" + 0.060*"one" + 0.050*"always" + 0.038*"already" + 0.031*"order" + 0.030*"rule" + 0.029*"said" + 0.021*"thoughts"
2021-01-15 12:21:34,238 : INFO : topic diff=0.024061, rho=0.117603
2021-01-15 12:21:34,243 : INFO : PROGRESS: pass 67, at document #8000/8608
2021-01-

2021-01-15 12:21:37,475 : INFO : topic #23 (0.020): 0.101*"name" + 0.069*"world" + 0.062*"god" + 0.043*"brought" + 0.042*"faced" + 0.041*"legend" + 0.040*"came" + 0.038*"removed" + 0.033*"protectiva" + 0.033*"missionaria"
2021-01-15 12:21:37,558 : INFO : topic #5 (0.020): 0.260*"mother" + 0.099*"reverend" + 0.055*"attention" + 0.050*"power" + 0.046*"noted" + 0.028*"jessica" + 0.025*"said" + 0.024*"within" + 0.022*"food" + 0.020*"often"
2021-01-15 12:21:37,559 : INFO : topic diff=0.019614, rho=0.116798
2021-01-15 12:21:37,836 : INFO : -9.347 per-word bound, 651.4 perplexity estimate based on a held-out corpus of 608 documents with 5442 words
2021-01-15 12:21:37,837 : INFO : PROGRESS: pass 68, at document #8608/8608
2021-01-15 12:21:38,024 : INFO : merging changes from 608 documents into a model of 8608 documents
2021-01-15 12:21:38,057 : INFO : topic #41 (0.020): 0.148*"must" + 0.063*"said" + 0.057*"may" + 0.054*"make" + 0.053*"among" + 0.048*"harkonnens" + 0.037*"course" + 0.037*"relig

2021-01-15 12:21:41,710 : INFO : topic #37 (0.020): 0.127*"hand" + 0.096*"looked" + 0.086*"right" + 0.075*"left" + 0.064*"paul" + 0.061*"without" + 0.034*"wondered" + 0.031*"though" + 0.024*"away" + 0.023*"knife"
2021-01-15 12:21:41,730 : INFO : topic diff=0.044841, rho=0.116010
2021-01-15 12:21:41,746 : INFO : PROGRESS: pass 70, at document #2000/8608
2021-01-15 12:21:42,243 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:21:42,251 : INFO : topic #49 (0.020): 0.086*"high" + 0.051*"small" + 0.041*"presently" + 0.034*"family" + 0.031*"major" + 0.031*"obvious" + 0.029*"read" + 0.029*"rest" + 0.028*"onto" + 0.027*"higher"
2021-01-15 12:21:42,252 : INFO : topic #0 (0.020): 0.383*"fremen" + 0.060*"use" + 0.045*"wish" + 0.037*"al" + 0.033*"desert" + 0.028*"live" + 0.024*"remember" + 0.021*"one" + 0.021*"clear" + 0.020*"different"
2021-01-15 12:21:42,253 : INFO : topic #17 (0.020): 0.085*"room" + 0.058*"yueh" + 0.044*"table" + 0.040*"paul" + 0.038*"ar

2021-01-15 12:21:45,104 : INFO : PROGRESS: pass 71, at document #4000/8608
2021-01-15 12:21:45,590 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:21:45,597 : INFO : topic #16 (0.020): 0.073*"shall" + 0.068*"well" + 0.061*"arm" + 0.045*"upon" + 0.042*"certain" + 0.037*"control" + 0.034*"set" + 0.030*"said" + 0.029*"better" + 0.029*"try"
2021-01-15 12:21:45,599 : INFO : topic #48 (0.020): 0.150*"harkonnen" + 0.142*"us" + 0.046*"behind" + 0.044*"sire" + 0.044*"point" + 0.034*"death" + 0.034*"half" + 0.030*"one" + 0.026*"said" + 0.025*"smile"
2021-01-15 12:21:45,602 : INFO : topic #28 (0.020): 0.164*"people" + 0.065*"thopter" + 0.045*"dune" + 0.039*"paul" + 0.036*"hidden" + 0.034*"strength" + 0.032*"five" + 0.028*"remembered" + 0.028*"wait" + 0.027*"dream"
2021-01-15 12:21:45,608 : INFO : topic #18 (0.020): 0.126*"could" + 0.087*"knew" + 0.064*"training" + 0.063*"new" + 0.051*"sietch" + 0.038*"trained" + 0.035*"gave" + 0.034*"school" + 0.034*"fact

2021-01-15 12:21:50,220 : INFO : topic #27 (0.020): 0.160*"would" + 0.084*"come" + 0.064*"still" + 0.058*"might" + 0.045*"could" + 0.042*"beyond" + 0.041*"far" + 0.034*"silent" + 0.029*"ever" + 0.025*"kind"
2021-01-15 12:21:50,238 : INFO : topic #11 (0.020): 0.147*"eyes" + 0.073*"stared" + 0.063*"paul" + 0.043*"night" + 0.040*"feet" + 0.037*"rocks" + 0.032*"usul" + 0.031*"shadows" + 0.031*"man" + 0.031*"cave"
2021-01-15 12:21:50,241 : INFO : topic #20 (0.020): 0.143*"first" + 0.082*"need" + 0.076*"like" + 0.045*"returned" + 0.040*"arrakeen" + 0.040*"entire" + 0.028*"pattern" + 0.025*"except" + 0.025*"could" + 0.025*"old"
2021-01-15 12:21:50,247 : INFO : topic #21 (0.020): 0.106*"great" + 0.094*"many" + 0.069*"things" + 0.060*"one" + 0.050*"always" + 0.038*"already" + 0.031*"order" + 0.030*"rule" + 0.029*"said" + 0.021*"thoughts"
2021-01-15 12:21:50,252 : INFO : topic diff=0.023166, rho=0.113736
2021-01-15 12:21:50,263 : INFO : PROGRESS: pass 72, at document #8000/8608
2021-01-15 12:21:

2021-01-15 12:21:54,014 : INFO : topic #45 (0.020): 0.119*"head" + 0.061*"storm" + 0.051*"years" + 0.048*"shook" + 0.038*"stare" + 0.038*"soon" + 0.035*"snapped" + 0.034*"bring" + 0.029*"appeared" + 0.028*"talk"
2021-01-15 12:21:54,020 : INFO : topic #43 (0.020): 0.316*"baron" + 0.082*"said" + 0.068*"take" + 0.065*"piter" + 0.043*"mentat" + 0.038*"change" + 0.033*"stop" + 0.028*"women" + 0.024*"could" + 0.021*"ship"
2021-01-15 12:21:54,021 : INFO : topic diff=0.018997, rho=0.113008
2021-01-15 12:21:54,321 : INFO : -9.346 per-word bound, 650.8 perplexity estimate based on a held-out corpus of 608 documents with 5442 words
2021-01-15 12:21:54,323 : INFO : PROGRESS: pass 73, at document #8608/8608
2021-01-15 12:21:54,457 : INFO : merging changes from 608 documents into a model of 8608 documents
2021-01-15 12:21:54,477 : INFO : topic #39 (0.020): 0.202*"dib" + 0.202*"muad" + 0.050*"princess" + 0.048*"done" + 0.047*"future" + 0.039*"irulan" + 0.031*"words" + 0.030*"arrakis" + 0.025*"carry" 

2021-01-15 12:21:58,022 : INFO : topic #18 (0.020): 0.124*"could" + 0.090*"knew" + 0.064*"new" + 0.062*"training" + 0.058*"sietch" + 0.041*"trained" + 0.036*"gave" + 0.034*"school" + 0.033*"fact" + 0.026*"short"
2021-01-15 12:21:58,053 : INFO : topic diff=0.043310, rho=0.112293
2021-01-15 12:21:58,087 : INFO : PROGRESS: pass 75, at document #2000/8608
2021-01-15 12:21:58,715 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:21:58,735 : INFO : topic #5 (0.020): 0.254*"mother" + 0.100*"reverend" + 0.053*"attention" + 0.051*"power" + 0.049*"noted" + 0.026*"often" + 0.025*"jessica" + 0.024*"within" + 0.023*"food" + 0.022*"said"
2021-01-15 12:21:58,739 : INFO : topic #0 (0.020): 0.383*"fremen" + 0.060*"use" + 0.045*"wish" + 0.037*"al" + 0.033*"desert" + 0.028*"live" + 0.024*"remember" + 0.021*"one" + 0.021*"clear" + 0.020*"different"
2021-01-15 12:21:58,755 : INFO : topic #1 (0.020): 0.236*"duke" + 0.091*"leto" + 0.072*"son" + 0.070*"atreides" + 0.060

2021-01-15 12:22:02,960 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:22:02,970 : INFO : topic #41 (0.020): 0.150*"must" + 0.063*"said" + 0.059*"may" + 0.053*"make" + 0.051*"harkonnens" + 0.049*"among" + 0.038*"us" + 0.038*"course" + 0.038*"thought" + 0.035*"man"
2021-01-15 12:22:02,971 : INFO : topic #28 (0.020): 0.164*"people" + 0.065*"thopter" + 0.045*"dune" + 0.039*"paul" + 0.036*"hidden" + 0.034*"strength" + 0.032*"five" + 0.028*"remembered" + 0.028*"wait" + 0.027*"dream"
2021-01-15 12:22:02,973 : INFO : topic #1 (0.020): 0.243*"duke" + 0.101*"leto" + 0.071*"atreides" + 0.067*"son" + 0.060*"said" + 0.038*"alia" + 0.030*"long" + 0.028*"daughter" + 0.026*"one" + 0.022*"thus"
2021-01-15 12:22:02,983 : INFO : topic #13 (0.020): 0.128*"say" + 0.071*"want" + 0.056*"three" + 0.037*"haderach" + 0.037*"kwisatz" + 0.035*"suddenly" + 0.034*"figure" + 0.032*"said" + 0.032*"ring" + 0.030*"sudden"
2021-01-15 12:22:02,987 : INFO : topic #45 (0.020): 0.

2021-01-15 12:22:07,077 : INFO : topic #16 (0.020): 0.075*"shall" + 0.068*"arm" + 0.066*"well" + 0.044*"upon" + 0.039*"certain" + 0.036*"control" + 0.034*"set" + 0.032*"better" + 0.030*"said" + 0.028*"try"
2021-01-15 12:22:07,079 : INFO : topic #46 (0.020): 0.188*"thought" + 0.072*"think" + 0.066*"father" + 0.052*"paul" + 0.049*"let" + 0.047*"jessica" + 0.044*"look" + 0.036*"put" + 0.032*"seen" + 0.031*"could"
2021-01-15 12:22:07,090 : INFO : topic #15 (0.020): 0.116*"within" + 0.116*"shield" + 0.053*"touched" + 0.041*"fighting" + 0.041*"field" + 0.039*"force" + 0.032*"slow" + 0.029*"paul" + 0.029*"shields" + 0.025*"motion"
2021-01-15 12:22:07,104 : INFO : topic #20 (0.020): 0.142*"first" + 0.081*"need" + 0.079*"like" + 0.045*"returned" + 0.040*"arrakeen" + 0.040*"entire" + 0.027*"pattern" + 0.025*"could" + 0.025*"except" + 0.025*"old"
2021-01-15 12:22:07,108 : INFO : topic diff=0.022346, rho=0.110227
2021-01-15 12:22:07,119 : INFO : PROGRESS: pass 77, at document #8000/8608
2021-01-15

2021-01-15 12:22:10,747 : INFO : topic #2 (0.020): 0.203*"gurney" + 0.104*"halleck" + 0.069*"said" + 0.065*"go" + 0.055*"paul" + 0.048*"called" + 0.042*"made" + 0.030*"man" + 0.023*"instant" + 0.022*"darkness"
2021-01-15 12:22:10,772 : INFO : topic #48 (0.020): 0.153*"us" + 0.146*"harkonnen" + 0.050*"behind" + 0.043*"point" + 0.038*"sire" + 0.037*"death" + 0.036*"half" + 0.029*"one" + 0.028*"said" + 0.027*"smile"
2021-01-15 12:22:10,773 : INFO : topic diff=0.018398, rho=0.109564
2021-01-15 12:22:11,062 : INFO : -9.345 per-word bound, 650.4 perplexity estimate based on a held-out corpus of 608 documents with 5442 words
2021-01-15 12:22:11,063 : INFO : PROGRESS: pass 78, at document #8608/8608
2021-01-15 12:22:11,220 : INFO : merging changes from 608 documents into a model of 8608 documents
2021-01-15 12:22:11,226 : INFO : topic #49 (0.020): 0.090*"high" + 0.053*"small" + 0.040*"presently" + 0.032*"major" + 0.032*"family" + 0.030*"obvious" + 0.030*"form" + 0.029*"higher" + 0.029*"permitt

2021-01-15 12:22:14,216 : INFO : topic #27 (0.020): 0.157*"would" + 0.080*"come" + 0.064*"still" + 0.059*"might" + 0.047*"could" + 0.045*"beyond" + 0.044*"far" + 0.032*"silent" + 0.027*"ever" + 0.025*"kind"
2021-01-15 12:22:14,217 : INFO : topic diff=0.041922, rho=0.108912
2021-01-15 12:22:14,219 : INFO : PROGRESS: pass 80, at document #2000/8608
2021-01-15 12:22:14,787 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:22:14,808 : INFO : topic #30 (0.020): 0.214*"water" + 0.137*"spice" + 0.111*"life" + 0.049*"little" + 0.034*"moisture" + 0.030*"said" + 0.021*"open" + 0.021*"ten" + 0.019*"inner" + 0.017*"meters"
2021-01-15 12:22:14,817 : INFO : topic #11 (0.020): 0.155*"eyes" + 0.073*"stared" + 0.064*"paul" + 0.039*"night" + 0.039*"feet" + 0.033*"rocks" + 0.033*"usul" + 0.030*"cave" + 0.028*"opened" + 0.028*"weapon"
2021-01-15 12:22:14,824 : INFO : topic #18 (0.020): 0.126*"could" + 0.088*"knew" + 0.064*"new" + 0.064*"training" + 0.056*"sietch" + 

2021-01-15 12:22:17,792 : INFO : PROGRESS: pass 81, at document #4000/8608
2021-01-15 12:22:18,257 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:22:18,271 : INFO : topic #5 (0.020): 0.250*"mother" + 0.094*"reverend" + 0.056*"power" + 0.055*"attention" + 0.049*"noted" + 0.026*"jessica" + 0.024*"food" + 0.024*"often" + 0.023*"within" + 0.023*"said"
2021-01-15 12:22:18,272 : INFO : topic #36 (0.020): 0.064*"beside" + 0.056*"lips" + 0.049*"step" + 0.044*"forced" + 0.044*"times" + 0.033*"knowledge" + 0.032*"hood" + 0.031*"cold" + 0.031*"secret" + 0.028*"said"
2021-01-15 12:22:18,274 : INFO : topic #37 (0.020): 0.128*"hand" + 0.099*"looked" + 0.086*"right" + 0.072*"left" + 0.063*"without" + 0.061*"paul" + 0.035*"wondered" + 0.032*"though" + 0.025*"spoke" + 0.023*"away"
2021-01-15 12:22:18,275 : INFO : topic #41 (0.020): 0.150*"must" + 0.062*"said" + 0.059*"may" + 0.054*"make" + 0.051*"harkonnens" + 0.049*"among" + 0.038*"us" + 0.038*"course" + 0.03

2021-01-15 12:22:21,617 : INFO : topic #30 (0.020): 0.227*"water" + 0.131*"spice" + 0.105*"life" + 0.048*"little" + 0.035*"moisture" + 0.033*"said" + 0.021*"ten" + 0.020*"inner" + 0.020*"open" + 0.018*"indeed"
2021-01-15 12:22:21,626 : INFO : topic #17 (0.020): 0.077*"room" + 0.062*"yueh" + 0.042*"table" + 0.041*"around" + 0.039*"paul" + 0.033*"stood" + 0.030*"along" + 0.028*"hall" + 0.026*"door" + 0.026*"stopped"
2021-01-15 12:22:21,628 : INFO : topic #37 (0.020): 0.130*"hand" + 0.097*"looked" + 0.087*"right" + 0.071*"left" + 0.063*"without" + 0.061*"paul" + 0.036*"wondered" + 0.032*"though" + 0.025*"away" + 0.024*"spoke"
2021-01-15 12:22:21,636 : INFO : topic #2 (0.020): 0.161*"gurney" + 0.115*"halleck" + 0.069*"go" + 0.065*"said" + 0.055*"called" + 0.050*"paul" + 0.045*"made" + 0.030*"man" + 0.025*"darkness" + 0.024*"instant"
2021-01-15 12:22:21,637 : INFO : topic diff=0.021699, rho=0.107024
2021-01-15 12:22:21,647 : INFO : PROGRESS: pass 82, at document #8000/8608
2021-01-15 12:22:

2021-01-15 12:22:25,066 : INFO : topic #39 (0.020): 0.194*"dib" + 0.194*"muad" + 0.054*"done" + 0.052*"princess" + 0.049*"future" + 0.043*"irulan" + 0.033*"words" + 0.026*"carry" + 0.025*"arrakis" + 0.021*"violence"
2021-01-15 12:22:25,070 : INFO : topic #40 (0.020): 0.147*"sand" + 0.054*"dust" + 0.054*"across" + 0.047*"surface" + 0.039*"wind" + 0.035*"movement" + 0.035*"dunes" + 0.033*"system" + 0.029*"toward" + 0.025*"bible"
2021-01-15 12:22:25,104 : INFO : topic diff=0.017984, rho=0.106417
2021-01-15 12:22:25,400 : INFO : -9.344 per-word bound, 649.7 perplexity estimate based on a held-out corpus of 608 documents with 5442 words
2021-01-15 12:22:25,401 : INFO : PROGRESS: pass 83, at document #8608/8608
2021-01-15 12:22:25,552 : INFO : merging changes from 608 documents into a model of 8608 documents
2021-01-15 12:22:25,562 : INFO : topic #35 (0.020): 0.068*"word" + 0.057*"began" + 0.054*"jihad" + 0.042*"blade" + 0.040*"command" + 0.040*"also" + 0.034*"mapes" + 0.033*"butlerian" + 0.

2021-01-15 12:22:28,221 : INFO : topic #33 (0.020): 0.163*"face" + 0.083*"held" + 0.059*"keep" + 0.051*"sound" + 0.040*"mouth" + 0.036*"uncle" + 0.035*"help" + 0.032*"watch" + 0.030*"strange" + 0.028*"arms"
2021-01-15 12:22:28,239 : INFO : topic diff=0.040700, rho=0.105819
2021-01-15 12:22:28,244 : INFO : PROGRESS: pass 85, at document #2000/8608
2021-01-15 12:22:28,752 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:22:28,772 : INFO : topic #36 (0.020): 0.061*"beside" + 0.053*"lips" + 0.052*"step" + 0.046*"times" + 0.045*"forced" + 0.034*"hood" + 0.033*"knowledge" + 0.032*"secret" + 0.030*"cold" + 0.028*"said"
2021-01-15 12:22:28,773 : INFO : topic #43 (0.020): 0.293*"baron" + 0.079*"said" + 0.072*"piter" + 0.068*"take" + 0.047*"mentat" + 0.041*"change" + 0.030*"stop" + 0.027*"women" + 0.025*"could" + 0.022*"ship"
2021-01-15 12:22:28,775 : INFO : topic #0 (0.020): 0.383*"fremen" + 0.060*"use" + 0.046*"wish" + 0.037*"al" + 0.033*"desert" + 0.02

2021-01-15 12:22:31,528 : INFO : PROGRESS: pass 86, at document #4000/8608
2021-01-15 12:22:32,016 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:22:32,026 : INFO : topic #17 (0.020): 0.081*"room" + 0.068*"yueh" + 0.046*"table" + 0.041*"around" + 0.038*"paul" + 0.033*"stood" + 0.029*"along" + 0.029*"door" + 0.028*"hall" + 0.025*"jessica"
2021-01-15 12:22:32,027 : INFO : topic #4 (0.020): 0.175*"saw" + 0.111*"another" + 0.083*"felt" + 0.041*"paul" + 0.039*"thing" + 0.037*"someone" + 0.035*"jessica" + 0.034*"killed" + 0.033*"watched" + 0.025*"guards"
2021-01-15 12:22:32,029 : INFO : topic #44 (0.020): 0.048*"side" + 0.041*"body" + 0.038*"stillsuit" + 0.035*"awareness" + 0.032*"space" + 0.029*"eyes" + 0.027*"lifted" + 0.022*"studied" + 0.021*"felt" + 0.020*"face"
2021-01-15 12:22:32,054 : INFO : topic #29 (0.020): 0.308*"see" + 0.132*"feyd" + 0.116*"rautha" + 0.051*"maker" + 0.047*"boy" + 0.033*"part" + 0.030*"went" + 0.025*"found" + 0.021*"kept"

2021-01-15 12:22:35,289 : INFO : topic #4 (0.020): 0.178*"saw" + 0.111*"another" + 0.086*"felt" + 0.042*"paul" + 0.038*"jessica" + 0.038*"thing" + 0.037*"someone" + 0.032*"killed" + 0.031*"watched" + 0.024*"wide"
2021-01-15 12:22:35,291 : INFO : topic #42 (0.020): 0.154*"voice" + 0.085*"said" + 0.081*"heard" + 0.065*"cannot" + 0.062*"jessica" + 0.054*"something" + 0.041*"paul" + 0.034*"true" + 0.031*"tone" + 0.025*"hear"
2021-01-15 12:22:35,298 : INFO : topic #47 (0.020): 0.141*"place" + 0.108*"planet" + 0.073*"even" + 0.062*"much" + 0.045*"caladan" + 0.043*"arrakis" + 0.034*"followed" + 0.032*"full" + 0.031*"show" + 0.031*"test"
2021-01-15 12:22:35,300 : INFO : topic #11 (0.020): 0.148*"eyes" + 0.074*"stared" + 0.061*"paul" + 0.043*"night" + 0.040*"feet" + 0.037*"rocks" + 0.032*"usul" + 0.031*"shadows" + 0.031*"cave" + 0.031*"man"
2021-01-15 12:22:35,317 : INFO : topic diff=0.021101, rho=0.104085
2021-01-15 12:22:35,351 : INFO : PROGRESS: pass 87, at document #8000/8608
2021-01-15 12:

2021-01-15 12:22:38,715 : INFO : topic #28 (0.020): 0.171*"people" + 0.057*"thopter" + 0.043*"dune" + 0.043*"paul" + 0.036*"strength" + 0.034*"hidden" + 0.030*"remembered" + 0.030*"five" + 0.028*"wait" + 0.027*"dream"
2021-01-15 12:22:38,717 : INFO : topic #34 (0.020): 0.110*"paul" + 0.093*"glanced" + 0.084*"back" + 0.074*"said" + 0.072*"good" + 0.070*"day" + 0.038*"leave" + 0.034*"blood" + 0.030*"love" + 0.025*"stepped"
2021-01-15 12:22:38,747 : INFO : topic diff=0.017398, rho=0.103526
2021-01-15 12:22:39,012 : INFO : -9.342 per-word bound, 648.8 perplexity estimate based on a held-out corpus of 608 documents with 5442 words
2021-01-15 12:22:39,013 : INFO : PROGRESS: pass 88, at document #8608/8608
2021-01-15 12:22:39,181 : INFO : merging changes from 608 documents into a model of 8608 documents
2021-01-15 12:22:39,188 : INFO : topic #12 (0.020): 0.160*"stilgar" + 0.125*"gesserit" + 0.125*"bene" + 0.055*"said" + 0.054*"count" + 0.041*"nodded" + 0.037*"dead" + 0.035*"fenring" + 0.032*"

2021-01-15 12:22:42,288 : INFO : topic #45 (0.020): 0.110*"head" + 0.062*"years" + 0.061*"storm" + 0.044*"shook" + 0.037*"soon" + 0.036*"stare" + 0.034*"snapped" + 0.032*"bring" + 0.030*"talk" + 0.028*"appeared"
2021-01-15 12:22:42,292 : INFO : topic diff=0.039590, rho=0.102976
2021-01-15 12:22:42,294 : INFO : PROGRESS: pass 90, at document #2000/8608
2021-01-15 12:22:42,800 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:22:42,823 : INFO : topic #26 (0.020): 0.258*"hawat" + 0.075*"give" + 0.073*"tell" + 0.063*"whispered" + 0.062*"said" + 0.057*"silence" + 0.052*"thufir" + 0.037*"paul" + 0.028*"anger" + 0.025*"beginning"
2021-01-15 12:22:42,824 : INFO : topic #2 (0.020): 0.182*"gurney" + 0.115*"halleck" + 0.065*"go" + 0.065*"said" + 0.054*"called" + 0.052*"paul" + 0.043*"made" + 0.030*"man" + 0.022*"instant" + 0.021*"darkness"
2021-01-15 12:22:42,825 : INFO : topic #0 (0.020): 0.383*"fremen" + 0.060*"use" + 0.046*"wish" + 0.037*"al" + 0.033*"de

2021-01-15 12:22:45,862 : INFO : PROGRESS: pass 91, at document #4000/8608
2021-01-15 12:22:46,348 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:22:46,378 : INFO : topic #35 (0.020): 0.066*"word" + 0.055*"began" + 0.048*"mapes" + 0.044*"jihad" + 0.041*"blade" + 0.040*"command" + 0.037*"also" + 0.036*"dry" + 0.033*"tried" + 0.030*"crysknife"
2021-01-15 12:22:46,398 : INFO : topic #19 (0.020): 0.185*"time" + 0.074*"nothing" + 0.058*"fear" + 0.052*"past" + 0.031*"real" + 0.028*"spread" + 0.028*"gone" + 0.026*"coming" + 0.025*"looking" + 0.024*"common"
2021-01-15 12:22:46,401 : INFO : topic #2 (0.020): 0.175*"gurney" + 0.113*"halleck" + 0.067*"go" + 0.065*"said" + 0.053*"called" + 0.051*"paul" + 0.044*"made" + 0.030*"man" + 0.023*"darkness" + 0.022*"instant"
2021-01-15 12:22:46,424 : INFO : topic #18 (0.020): 0.129*"could" + 0.087*"knew" + 0.064*"training" + 0.063*"new" + 0.052*"sietch" + 0.038*"trained" + 0.036*"gave" + 0.034*"school" + 0.034*"f

2021-01-15 12:22:49,844 : INFO : topic #24 (0.020): 0.072*"rabban" + 0.070*"troop" + 0.067*"child" + 0.066*"knife" + 0.049*"slave" + 0.039*"matter" + 0.031*"plant" + 0.028*"eye" + 0.027*"metal" + 0.027*"near"
2021-01-15 12:22:49,876 : INFO : topic #30 (0.020): 0.227*"water" + 0.131*"spice" + 0.105*"life" + 0.048*"little" + 0.035*"moisture" + 0.033*"said" + 0.021*"ten" + 0.020*"inner" + 0.019*"open" + 0.018*"indeed"
2021-01-15 12:22:49,879 : INFO : topic #1 (0.020): 0.236*"duke" + 0.097*"leto" + 0.071*"son" + 0.070*"atreides" + 0.060*"said" + 0.036*"alia" + 0.031*"long" + 0.030*"daughter" + 0.027*"one" + 0.023*"thus"
2021-01-15 12:22:49,882 : INFO : topic #19 (0.020): 0.191*"time" + 0.071*"nothing" + 0.059*"fear" + 0.052*"past" + 0.030*"spread" + 0.030*"real" + 0.027*"gone" + 0.025*"looking" + 0.024*"coming" + 0.023*"sky"
2021-01-15 12:22:49,884 : INFO : topic diff=0.020506, rho=0.101376
2021-01-15 12:22:49,925 : INFO : PROGRESS: pass 92, at document #8000/8608
2021-01-15 12:22:50,667 :

2021-01-15 12:22:55,641 : INFO : topic #32 (0.020): 0.100*"house" + 0.088*"almost" + 0.068*"call" + 0.052*"blue" + 0.035*"grew" + 0.034*"eh" + 0.033*"turn" + 0.032*"creature" + 0.032*"says" + 0.030*"secundus"
2021-01-15 12:22:55,685 : INFO : topic #3 (0.020): 0.150*"know" + 0.110*"said" + 0.097*"old" + 0.075*"woman" + 0.072*"lord" + 0.063*"yes" + 0.045*"ah" + 0.034*"young" + 0.032*"perhaps" + 0.028*"sure"
2021-01-15 12:22:55,715 : INFO : topic diff=0.016909, rho=0.100859
2021-01-15 12:22:56,021 : INFO : -9.340 per-word bound, 648.3 perplexity estimate based on a held-out corpus of 608 documents with 5442 words
2021-01-15 12:22:56,021 : INFO : PROGRESS: pass 93, at document #8608/8608
2021-01-15 12:22:56,225 : INFO : merging changes from 608 documents into a model of 8608 documents
2021-01-15 12:22:56,243 : INFO : topic #20 (0.020): 0.148*"first" + 0.084*"need" + 0.073*"like" + 0.043*"returned" + 0.042*"entire" + 0.037*"arrakeen" + 0.027*"pattern" + 0.025*"old" + 0.024*"except" + 0.023*

2021-01-15 12:23:00,234 : INFO : topic #32 (0.020): 0.103*"house" + 0.089*"almost" + 0.068*"call" + 0.046*"blue" + 0.035*"grew" + 0.034*"secundus" + 0.034*"salusa" + 0.032*"creature" + 0.032*"planet" + 0.032*"turn"
2021-01-15 12:23:00,236 : INFO : topic diff=0.038513, rho=0.100350
2021-01-15 12:23:00,238 : INFO : PROGRESS: pass 95, at document #2000/8608
2021-01-15 12:23:01,115 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:23:01,130 : INFO : topic #32 (0.020): 0.107*"house" + 0.089*"almost" + 0.068*"call" + 0.047*"blue" + 0.034*"grew" + 0.034*"says" + 0.034*"turn" + 0.033*"secundus" + 0.033*"salusa" + 0.031*"planet"
2021-01-15 12:23:01,169 : INFO : topic #27 (0.020): 0.162*"would" + 0.082*"come" + 0.063*"still" + 0.061*"might" + 0.048*"could" + 0.044*"beyond" + 0.042*"far" + 0.032*"silent" + 0.029*"ever" + 0.025*"kind"
2021-01-15 12:23:01,170 : INFO : topic #46 (0.020): 0.176*"thought" + 0.074*"think" + 0.066*"father" + 0.054*"paul" + 0.049*"

2021-01-15 12:23:07,019 : INFO : merging changes from 2000 documents into a model of 8608 documents
2021-01-15 12:23:07,024 : INFO : topic #26 (0.020): 0.274*"hawat" + 0.075*"give" + 0.069*"tell" + 0.062*"said" + 0.060*"whispered" + 0.056*"silence" + 0.054*"thufir" + 0.035*"paul" + 0.027*"anger" + 0.024*"beginning"
2021-01-15 12:23:07,029 : INFO : topic #3 (0.020): 0.150*"know" + 0.107*"said" + 0.096*"old" + 0.079*"lord" + 0.074*"woman" + 0.063*"yes" + 0.043*"ah" + 0.033*"perhaps" + 0.032*"young" + 0.028*"sure"
2021-01-15 12:23:07,030 : INFO : topic #4 (0.020): 0.175*"saw" + 0.111*"another" + 0.083*"felt" + 0.041*"paul" + 0.039*"thing" + 0.037*"someone" + 0.035*"jessica" + 0.034*"killed" + 0.033*"watched" + 0.025*"guards"
2021-01-15 12:23:07,042 : INFO : topic #28 (0.020): 0.164*"people" + 0.064*"thopter" + 0.045*"dune" + 0.039*"paul" + 0.036*"hidden" + 0.034*"strength" + 0.032*"five" + 0.028*"remembered" + 0.028*"wait" + 0.027*"dream"
2021-01-15 12:23:07,063 : INFO : topic #8 (0.020):

2021-01-15 12:23:10,616 : INFO : topic #10 (0.020): 0.201*"kynes" + 0.158*"man" + 0.073*"said" + 0.050*"every" + 0.045*"used" + 0.029*"kill" + 0.025*"liet" + 0.022*"everything" + 0.021*"way" + 0.020*"purpose"
2021-01-15 12:23:10,618 : INFO : topic #29 (0.020): 0.300*"see" + 0.140*"feyd" + 0.125*"rautha" + 0.048*"maker" + 0.048*"boy" + 0.032*"part" + 0.029*"went" + 0.025*"found" + 0.022*"upward" + 0.020*"kept"
2021-01-15 12:23:10,621 : INFO : topic #12 (0.020): 0.162*"stilgar" + 0.114*"gesserit" + 0.114*"bene" + 0.059*"said" + 0.057*"count" + 0.046*"nodded" + 0.041*"dead" + 0.033*"fenring" + 0.031*"way" + 0.029*"paul"
2021-01-15 12:23:10,623 : INFO : topic #24 (0.020): 0.072*"rabban" + 0.069*"troop" + 0.067*"child" + 0.066*"knife" + 0.049*"slave" + 0.039*"matter" + 0.031*"plant" + 0.028*"eye" + 0.027*"metal" + 0.027*"near"
2021-01-15 12:23:10,630 : INFO : topic diff=0.019922, rho=0.098868
2021-01-15 12:23:10,633 : INFO : PROGRESS: pass 97, at document #8000/8608
2021-01-15 12:23:11,415 

2021-01-15 12:23:15,655 : INFO : topic #18 (0.020): 0.139*"could" + 0.095*"knew" + 0.063*"new" + 0.058*"training" + 0.056*"sietch" + 0.039*"trained" + 0.038*"gave" + 0.033*"fact" + 0.028*"school" + 0.024*"one"
2021-01-15 12:23:15,657 : INFO : topic #14 (0.020): 0.155*"turned" + 0.064*"away" + 0.047*"beneath" + 0.045*"paul" + 0.040*"back" + 0.034*"robe" + 0.034*"lay" + 0.032*"question" + 0.030*"answer" + 0.027*"chamber"
2021-01-15 12:23:15,661 : INFO : topic diff=0.016567, rho=0.098388
2021-01-15 12:23:15,923 : INFO : -9.339 per-word bound, 647.7 perplexity estimate based on a held-out corpus of 608 documents with 5442 words
2021-01-15 12:23:15,924 : INFO : PROGRESS: pass 98, at document #8608/8608
2021-01-15 12:23:16,091 : INFO : merging changes from 608 documents into a model of 8608 documents
2021-01-15 12:23:16,100 : INFO : topic #41 (0.020): 0.149*"must" + 0.061*"said" + 0.058*"may" + 0.054*"make" + 0.054*"among" + 0.049*"harkonnens" + 0.038*"course" + 0.037*"us" + 0.036*"thought" 

2021-01-15 12:23:19,988 : INFO : topic #21 (0.020): 0.109*"great" + 0.090*"many" + 0.066*"things" + 0.065*"one" + 0.047*"always" + 0.036*"already" + 0.033*"rule" + 0.033*"order" + 0.028*"said" + 0.020*"animal"
2021-01-15 12:23:20,002 : INFO : topic diff=0.037524, rho=0.097915


LdaModel(num_terms=1809, num_topics=50, decay=0.5, chunksize=2000)


On peut sauvegarder le modèle dans un fichier pour le garder en mémoire.

In [19]:
temp_file = "models/model_dataconf_" + str(ntopics)

if generate_lda:
    ldamodel.save(temp_file)
else:
    ldamodel = LdaModel.load(temp_file)    

2021-01-15 12:24:24,168 : INFO : saving LdaState object under models/model_dataconf_50.state, separately None
2021-01-15 12:24:24,183 : INFO : saved models/model_dataconf_50.state
2021-01-15 12:24:24,190 : INFO : saving LdaModel object under models/model_dataconf_50, separately ['expElogbeta', 'sstats']
2021-01-15 12:24:24,202 : INFO : storing np array 'expElogbeta' to models/model_dataconf_50.expElogbeta.npy
2021-01-15 12:24:24,252 : INFO : not storing attribute id2word
2021-01-15 12:24:24,255 : INFO : not storing attribute dispatcher
2021-01-15 12:24:24,290 : INFO : not storing attribute state
2021-01-15 12:24:24,296 : INFO : saved models/model_dataconf_50


In [20]:
pwz = ldamodel.get_topics()

print("On peut récupérer la matrice stockant p(w/z):", pwz.shape)

# on peut aussi utiliser ldamodel.get_topic_terms(topicid, topn=n) pour obtenir
# les top n mots via leur identifiant, accompagnés de la proba p(w/z)

#ldamodel.get_topic_terms(1,topn=len(dico))

On peut récupérer la matrice stockant p(w/z): (50, 1809)


In [21]:
# show_topics permet d'afficher les mots directement
ldamodel.show_topics(num_topics=ntopics,formatted=False)

[(0,
  [('fremen', 0.39084002),
   ('use', 0.059228238),
   ('wish', 0.045204528),
   ('al', 0.038892522),
   ('desert', 0.033557408),
   ('live', 0.028252507),
   ('remember', 0.02324076),
   ('clear', 0.02114224),
   ('one', 0.020718865),
   ('different', 0.018741386)]),
 (1,
  [('duke', 0.2072339),
   ('leto', 0.08744753),
   ('atreides', 0.07339103),
   ('son', 0.07011205),
   ('said', 0.056533955),
   ('alia', 0.049839783),
   ('daughter', 0.033081464),
   ('long', 0.032318622),
   ('one', 0.028043028),
   ('thus', 0.027558316)]),
 (2,
  [('gurney', 0.19005096),
   ('halleck', 0.09838473),
   ('said', 0.06477322),
   ('go', 0.063096724),
   ('called', 0.05653797),
   ('paul', 0.0520262),
   ('made', 0.043887354),
   ('man', 0.029949298),
   ('instant', 0.023104072),
   ('darkness', 0.02247189)]),
 (3,
  [('know', 0.14811775),
   ('said', 0.106028765),
   ('old', 0.09817265),
   ('woman', 0.07485444),
   ('lord', 0.07103217),
   ('yes', 0.061214544),
   ('ah', 0.044197567),
   ('yo

On peut vérifier que les mots ont une probabilité d'appartenir à *plusieurs* thématiques

In [24]:
import numpy as np
tab = ldamodel.get_topics()
myword = dico.token2id['data']
np.where(tab[:,myword]>0.03)

(array([], dtype=int64),)

In [25]:
# Enable logging for gensim - optional
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.ERROR)

import warnings
warnings.filterwarnings("ignore",category=DeprecationWarning)

Si on souhaite obtenir p(z|d), il faut réexécuter le modèle sur les données (par ex., le corpus).

Les procédures suivantes fournissent plusieurs "vues" intéressantes sur le modèle. Elles viennent du site *machinelearningplus.com* :

https://www.machinelearningplus.com/nlp/topic-modeling-gensim-python/

Tout d'abord, on souhaite un tableau qui liste la thématique majoritaire pour chaque document, accompagnée par ses mots les plus probables.

In [26]:
def format_topics_sentences(ldamodel, corpus, texts):
    # Init output
    sent_topics_df = pd.DataFrame()

    # Get main topic in each document
    i=0
    for i, row in enumerate(ldamodel[corpus]):
        #print(row[0])            
        row = sorted(row[0], key=lambda x: (x[1]), reverse=True)
        #print(row)
        # Get the Dominant topic, Perc Contribution and Keywords for each document
        for j, (topic_num, prop_topic) in enumerate(row):
            if j == 0:  # => dominant topic
                wp = ldamodel.show_topic(topic_num)
                topic_keywords = ", ".join([word for word, prop in wp])
                sent_topics_df = sent_topics_df.append(pd.Series([int(topic_num), round(prop_topic,4), topic_keywords]), ignore_index=True)
            else:
                break
    sent_topics_df.columns = ['Dominant_Topic', 'Perc_Contribution', 'Topic_Keywords']

    # Add original text to the end of the output
    contents = pd.Series(texts)
    sent_topics_df = pd.concat([sent_topics_df, contents], axis=1)
    return(sent_topics_df)


df_topic_sents_keywords = format_topics_sentences(ldamodel=ldamodel, corpus=corpus, texts=doc_set)

# Format
df_dominant_topic = df_topic_sents_keywords.reset_index()
df_dominant_topic.columns = ['Document_No', 'Dominant_Topic', 'Topic_Perc_Contrib', 'Keywords', 'Text']

# Show
df_dominant_topic.head(10)

Unnamed: 0,Document_No,Dominant_Topic,Topic_Perc_Contrib,Keywords,Text
0,0,28.0,0.51,"people, thopter, dune, paul, hidden, strength,...",Dune
1,1,0.0,0.02,"fremen, use, wish, al, desert, live, remember,...",Frank Herbert
2,2,0.0,0.02,"fremen, use, wish, al, desert, live, remember,...",
3,3,0.0,0.02,"fremen, use, wish, al, desert, live, remember,...",Copyright 1965
4,4,0.0,0.02,"fremen, use, wish, al, desert, live, remember,...",
5,5,49.0,0.51,"high, small, presently, family, major, obvious...",Book 1
6,6,28.0,0.51,"people, thopter, dune, paul, hidden, strength,...",DUNE
7,7,0.0,0.02,"fremen, use, wish, al, desert, live, remember,...",
8,8,0.0,0.02,"fremen, use, wish, al, desert, live, remember,...",= = = = = =
9,9,0.0,0.02,"fremen, use, wish, al, desert, live, remember,...",


On peut vouloir obtenir les documents les plus "représentatifs" de chaque thématique (attention, au sens de p(z|d)).

In [27]:
sent_topics_sorteddf = pd.DataFrame()

sent_topics_outdf_grpd = df_topic_sents_keywords.groupby('Dominant_Topic')

for i, grp in sent_topics_outdf_grpd:
    sent_topics_sorteddf = pd.concat([sent_topics_sorteddf, 
                                             grp.sort_values(['Perc_Contribution'], ascending=[0]).head(1)], 
                                            axis=0)

# Reset Index    
sent_topics_sorteddf.reset_index(drop=True, inplace=True)

# Format
sent_topics_sorteddf.columns = ['Topic_Num', "Topic_Perc_Contrib", "Keywords", "Text"]

# Show
sent_topics_sorteddf

Unnamed: 0,Topic_Num,Topic_Perc_Contrib,Keywords,Text
0,0.0,0.755,"fremen, use, wish, al, desert, live, remember,...","""Lisan al-Gaib!"""
1,1.0,0.78,"duke, leto, atreides, son, said, alia, daughte...","Jessica spoke bitterly: ""Chips in the path of ..."
2,2.0,0.8367,"gurney, halleck, said, go, called, paul, made,...","And Paul: ""Gurney, man! Gurney, man!"""
3,3.0,0.755,"know, said, old, woman, lord, yes, ah, young, ...","""When we've rested,"" Jessica said, ""we should ..."
4,4.0,0.6733,"saw, another, felt, paul, thing, someone, jess...","Paul collapsing their tent, recovering it up t..."
5,5.0,0.804,"mother, reverend, noted, attention, power, oft...","""Reverend Mother!"" Chani said. ""What is wrong?"""
6,6.0,0.755,"poison, get, hold, message, melange, said, rea...","""I'm perfectly safe here,"" Paul said."
7,7.0,0.755,"asked, idaho, paul, basin, jessica, green, wil...",Paul cleared his throat.
8,8.0,0.9423,"rock, light, sand, moved, hulud, shai, across,...","Slowly, the filtered sun buried itself beneath..."
9,9.0,0.755,"guild, yet, imperium, human, slowly, report, m...","""I'm a soldier of the Imperium,"" Paul said, ""t..."


Pour finir, le "volume" estimé de documents (en réalité, de mots) couverts par les différentes thématiques.

In [28]:
# Number of Documents for Each Topic
topic_counts = df_topic_sents_keywords['Dominant_Topic'].value_counts()
topic_counts

0.0     587
3.0     439
46.0    350
12.0    312
42.0    311
17.0    297
41.0    278
43.0    277
44.0    270
1.0     263
8.0     261
7.0     258
37.0    235
10.0    231
2.0     228
26.0    208
34.0    196
11.0    177
29.0    173
30.0    160
5.0     155
31.0    152
25.0    146
38.0    141
39.0    137
4.0     135
14.0    132
27.0    125
22.0    120
45.0    115
16.0    115
21.0    112
40.0    112
47.0    110
18.0    106
48.0    103
19.0     99
9.0      99
13.0     98
24.0     88
28.0     83
35.0     79
20.0     79
15.0     78
33.0     77
6.0      71
36.0     65
49.0     62
32.0     56
23.0     47
Name: Dominant_Topic, dtype: int64