# Latent Dirichlet Allocation (LDA)

In [1]:
# 親のフォルダのパスを追加
import sys; sys.path.insert(0, '..')

from util.data_loader import DataLoader
from util.metric_calculator import MetricCalculator

In [2]:
# Movielensのデータの読み込み
data_loader = DataLoader(num_users=1000, num_test_items=5, data_path='../data/ml-10M100K/')
movielens = data_loader.load()

In [3]:
import gensim
import logging
from gensim.corpora.dictionary import Dictionary

movie_content = movielens.item_content.copy()
# tagが付与されていない映画もあるが、genreはすべての映画に付与されている
# tagとgenreを結合したものを映画のコンテンツ情報として似ている映画を探して推薦していく
# tagがない映画に関しては、NaNになっているので、空のリストに変換してから処理をする
movie_content['tag_genre'] = movie_content['tag'].fillna("").apply(list) + movie_content['genre'].apply(list)
movie_content['tag_genre'] = movie_content['tag_genre'].apply(lambda x:list(map(str, x)))

# タグとジャンルデータを使って、LDAを学習する
tag_genre_data = movie_content.tag_genre.tolist()

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
common_dictionary = Dictionary(tag_genre_data)
common_corpus = [common_dictionary.doc2bow(text) for text in tag_genre_data]

# LDAの学習
lda_model = gensim.models.LdaModel(common_corpus, id2word=common_dictionary, num_topics=50, passes=30)
lda_topics = lda_model[common_corpus]




2022-12-10 20:28:31,096 : INFO : adding document #0 to Dictionary<0 unique tokens: []>
2022-12-10 20:28:31,343 : INFO : adding document #10000 to Dictionary<14749 unique tokens: ['3d', 'Adventure', 'Animation', 'Children', 'Comedy']...>
2022-12-10 20:28:31,366 : INFO : built Dictionary<15261 unique tokens: ['3d', 'Adventure', 'Animation', 'Children', 'Comedy']...> from 10681 documents (total 117144 corpus positions)
2022-12-10 20:28:31,370 : INFO : Dictionary lifecycle event {'msg': "built Dictionary<15261 unique tokens: ['3d', 'Adventure', 'Animation', 'Children', 'Comedy']...> from 10681 documents (total 117144 corpus positions)", 'datetime': '2022-12-10T20:28:31.368643', 'gensim': '4.2.0', 'python': '3.9.15 (main, Nov 24 2022, 14:39:17) [MSC v.1916 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.19045-SP0', 'event': 'created'}
2022-12-10 20:28:31,538 : INFO : using symmetric alpha at 0.02
2022-12-10 20:28:31,539 : INFO : using symmetric eta at 0.02
2022-12-10 20:28:31,543 : INFO : us

2022-12-10 20:28:35,317 : INFO : topic #2 (0.020): 0.026*"martin scorsese" + 0.025*"surreal" + 0.016*"Comedy" + 0.015*"based on a comic" + 0.015*"los angeles" + 0.015*"wedding" + 0.014*"existentialism" + 0.014*"nudity (rear)" + 0.013*"visually appealing" + 0.013*"cult film"
2022-12-10 20:28:35,318 : INFO : topic #13 (0.020): 0.052*"drugs" + 0.034*"ummarti2006" + 0.029*"Drama" + 0.019*"dani2006" + 0.019*"sex" + 0.017*"soccer" + 0.017*"ewan mcgregor" + 0.016*"male nudity" + 0.015*"intimate" + 0.014*"gritty"
2022-12-10 20:28:35,321 : INFO : topic diff=0.235841, rho=0.447214
2022-12-10 20:28:35,554 : INFO : -21.344 per-word bound, 2662525.3 perplexity estimate based on a held-out corpus of 681 documents with 5199 words
2022-12-10 20:28:35,555 : INFO : PROGRESS: pass 0, at document #10681/10681
2022-12-10 20:28:35,688 : INFO : merging changes from 681 documents into a model of 10681 documents
2022-12-10 20:28:35,725 : INFO : topic #4 (0.020): 0.051*"espionage" + 0.049*"edward norton" + 0.03

2022-12-10 20:28:37,300 : INFO : topic diff=0.194125, rho=0.369094
2022-12-10 20:28:37,302 : INFO : PROGRESS: pass 1, at document #10000/10681
2022-12-10 20:28:37,633 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:28:37,666 : INFO : topic #26 (0.020): 0.203*"r" + 0.112*"movie to see" + 0.099*"clearplay" + 0.057*"Drama" + 0.039*"easily confused with other movie(s) (title)" + 0.035*"Action" + 0.031*"revenge" + 0.029*"Thriller" + 0.025*"Comedy" + 0.021*"video game adaptation"
2022-12-10 20:28:37,668 : INFO : topic #43 (0.020): 0.040*"Comedy" + 0.037*"dark comedy" + 0.031*"quirky" + 0.028*"black comedy" + 0.026*"baseball" + 0.023*"coen brothers" + 0.022*"oppl" + 0.021*"seen 2008" + 0.019*"light" + 0.019*"joaquin phoenix"
2022-12-10 20:28:37,669 : INFO : topic #18 (0.020): 0.197*"War" + 0.162*"Drama" + 0.049*"nudity (full frontal)" + 0.043*"Action" + 0.039*"not corv lib" + 0.031*"directorial debut" + 0.026*"george clooney" + 0.026*"hw drama" + 0.0

2022-12-10 20:28:41,908 : INFO : topic #49 (0.020): 0.089*"twist ending" + 0.058*"nicolas cage" + 0.055*"philip k. dick" + 0.044*"kevin spacey" + 0.042*"sexuality" + 0.024*"monster" + 0.021*"1970s" + 0.018*"poker" + 0.018*"jodie foster" + 0.013*"circus"
2022-12-10 20:28:41,910 : INFO : topic #27 (0.020): 0.096*"james bond" + 0.066*"007" + 0.061*"bond" + 0.025*"tolkien" + 0.024*"murder" + 0.021*"franchise" + 0.020*"Adventure" + 0.018*"Action" + 0.018*"ireland" + 0.016*"Thriller"
2022-12-10 20:28:41,911 : INFO : topic #37 (0.020): 0.254*"based on a book" + 0.129*"adapted from:book" + 0.050*"magic" + 0.037*"based on book" + 0.028*"Drama" + 0.024*"surrealism" + 0.022*"classic" + 0.014*"kate" + 0.011*"private detective" + 0.011*"dvd"
2022-12-10 20:28:41,913 : INFO : topic #5 (0.020): 0.111*"fantasy" + 0.056*"sci-fi" + 0.048*"adventure" + 0.046*"space" + 0.029*"dvd" + 0.029*"seen more than once" + 0.022*"sequel" + 0.018*"seen at the cinema" + 0.015*"college" + 0.014*"franchise"
2022-12-10 20

2022-12-10 20:28:44,921 : INFO : topic #49 (0.020): 0.097*"twist ending" + 0.060*"nicolas cage" + 0.060*"philip k. dick" + 0.046*"kevin spacey" + 0.043*"sexuality" + 0.025*"1970s" + 0.025*"monster" + 0.024*"jodie foster" + 0.017*"poker" + 0.014*"twist"
2022-12-10 20:28:44,922 : INFO : topic #48 (0.020): 0.069*"Drama" + 0.046*"criterion" + 0.028*"dvd-video" + 0.026*"tumey's dvds" + 0.025*"reflective" + 0.022*"lyrical" + 0.022*"deliberate" + 0.020*"poignant" + 0.020*"quirky" + 0.019*"bittersweet"
2022-12-10 20:28:44,925 : INFO : topic diff=0.115431, rho=0.327201
2022-12-10 20:28:44,928 : INFO : PROGRESS: pass 3, at document #8000/10681
2022-12-10 20:28:45,262 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:28:45,298 : INFO : topic #37 (0.020): 0.267*"based on a book" + 0.136*"adapted from:book" + 0.061*"magic" + 0.038*"based on book" + 0.030*"Drama" + 0.025*"surrealism" + 0.014*"kate" + 0.013*"classic" + 0.012*"dvd" + 0.011*"private detective"
2

2022-12-10 20:28:46,749 : INFO : topic diff=0.056810, rho=0.310978
2022-12-10 20:28:46,751 : INFO : PROGRESS: pass 4, at document #6000/10681
2022-12-10 20:28:47,040 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:28:47,079 : INFO : topic #35 (0.020): 0.052*"heist" + 0.052*"violence" + 0.034*"rape" + 0.030*"ensemble cast" + 0.029*"multiple storylines" + 0.028*"stanley kubrick" + 0.027*"football" + 0.023*"india" + 0.022*"robert de niro" + 0.021*"robert rodriguez"
2022-12-10 20:28:47,081 : INFO : topic #9 (0.020): 0.040*"sean penn" + 0.034*"pirates" + 0.029*"dogs" + 0.028*"infidelity" + 0.026*"blaxploitation" + 0.021*"classical studies" + 0.021*"epic" + 0.018*"jean-claude van damme" + 0.017*"gangster" + 0.017*"marx brothers"
2022-12-10 20:28:47,083 : INFO : topic #23 (0.020): 0.187*"Musical" + 0.078*"70mm" + 0.076*"Drama" + 0.069*"musical" + 0.062*"Comedy" + 0.035*"adultery" + 0.029*"oscar (best supporting actress)" + 0.019*"john travolta" + 0.0

2022-12-10 20:28:49,527 : INFO : topic #38 (0.020): 0.236*"Thriller" + 0.210*"Crime" + 0.185*"Drama" + 0.077*"Mystery" + 0.076*"Action" + 0.038*"to see" + 0.025*"Film-Noir" + 0.009*"death" + 0.008*"cannibalism" + 0.006*"christopher walken"
2022-12-10 20:28:49,528 : INFO : topic #34 (0.020): 0.078*"shakespeare" + 0.052*"pg13" + 0.050*"based on a play" + 0.045*"adapted from:play" + 0.043*"bill murray" + 0.036*"journalism" + 0.030*"paris" + 0.027*"france" + 0.025*"leonardo dicaprio" + 0.025*"whimsical"
2022-12-10 20:28:49,529 : INFO : topic #39 (0.020): 0.083*"politics" + 0.077*"Drama" + 0.069*"music" + 0.065*"biography" + 0.043*"library" + 0.033*"samurai" + 0.031*"biopic" + 0.029*"akira kurosawa" + 0.019*"japan" + 0.018*"biographical"
2022-12-10 20:28:49,530 : INFO : topic #48 (0.020): 0.075*"Drama" + 0.049*"criterion" + 0.026*"reflective" + 0.026*"dvd-video" + 0.026*"tumey's dvds" + 0.022*"lyrical" + 0.022*"poignant" + 0.022*"deliberate" + 0.020*"bittersweet" + 0.018*"black and white"
2

2022-12-10 20:28:52,292 : INFO : topic #29 (0.020): 0.119*"remake" + 0.111*"anime" + 0.064*"japan" + 0.044*"jane austen" + 0.040*"sean connery" + 0.039*"cars" + 0.033*"samuel l. jackson" + 0.029*"submarine" + 0.027*"keira knightley" + 0.022*"car chase"
2022-12-10 20:28:52,292 : INFO : topic #37 (0.020): 0.309*"based on a book" + 0.126*"adapted from:book" + 0.060*"magic" + 0.040*"based on book" + 0.033*"Drama" + 0.012*"shark" + 0.012*"dvd" + 0.011*"film theory & criticism" + 0.010*"witch" + 0.010*"surrealism"
2022-12-10 20:28:52,295 : INFO : topic diff=0.237666, rho=0.284665
2022-12-10 20:28:52,297 : INFO : PROGRESS: pass 6, at document #4000/10681
2022-12-10 20:28:52,611 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:28:52,791 : INFO : topic #27 (0.020): 0.113*"james bond" + 0.080*"007" + 0.076*"bond" + 0.027*"murder" + 0.024*"franchise" + 0.022*"Action" + 0.021*"assassin" + 0.021*"Adventure" + 0.020*"007 (series)" + 0.020*"killer as protagon

2022-12-10 20:28:55,114 : INFO : topic diff=0.117915, rho=0.284665
2022-12-10 20:28:55,116 : INFO : PROGRESS: pass 7, at document #2000/10681
2022-12-10 20:28:55,523 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:28:55,723 : INFO : topic #32 (0.020): 0.206*"time travel" + 0.070*"mel gibson" + 0.033*"bechdel test:fail" + 0.027*"post-apocalyptic" + 0.026*"medieval" + 0.022*"humphrey bogart" + 0.015*"freedom" + 0.015*"robert zemeckis" + 0.014*"time" + 0.013*"free to download"
2022-12-10 20:28:55,724 : INFO : topic #1 (0.020): 0.420*"Drama" + 0.260*"Romance" + 0.174*"Comedy" + 0.017*"bibliothek" + 0.007*"england" + 0.007*"relationships" + 0.006*"david lynch" + 0.005*"library vhs" + 0.005*"oscar (best foreign language film)" + 0.003*"elegant"
2022-12-10 20:28:55,725 : INFO : topic #35 (0.020): 0.047*"violence" + 0.045*"heist" + 0.037*"india" + 0.028*"ensemble cast" + 0.027*"rape" + 0.026*"inspirational" + 0.023*"to-rent" + 0.023*"computers" + 0.02

2022-12-10 20:28:58,922 : INFO : topic #4 (0.020): 0.082*"murder" + 0.057*"edward norton" + 0.048*"espionage" + 0.048*"steven spielberg" + 0.043*"tom cruise" + 0.041*"assassin" + 0.030*"jonossa" + 0.025*"killer-as-protagonist" + 0.021*"milla jovovich" + 0.019*"culture clash"
2022-12-10 20:28:58,924 : INFO : topic #11 (0.020): 0.068*"new york city" + 0.059*"christmas" + 0.045*"mafia" + 0.035*"road trip" + 0.034*"organized crime" + 0.031*"new york" + 0.031*"motorcycle" + 0.027*"al pacino" + 0.024*"archaeology" + 0.019*"claymation"
2022-12-10 20:28:58,925 : INFO : topic #29 (0.020): 0.147*"anime" + 0.143*"remake" + 0.074*"japan" + 0.046*"cars" + 0.039*"keira knightley" + 0.026*"sean connery" + 0.026*"samuel l. jackson" + 0.024*"michael caine" + 0.023*"jane austen" + 0.022*"car chase"
2022-12-10 20:28:58,926 : INFO : topic #34 (0.020): 0.117*"pg13" + 0.045*"based on a play" + 0.044*"journalism" + 0.042*"paris" + 0.042*"shakespeare" + 0.040*"bill murray" + 0.037*"france" + 0.035*"adapted fr

2022-12-10 20:29:01,374 : INFO : topic #29 (0.020): 0.160*"anime" + 0.128*"remake" + 0.084*"japan" + 0.051*"cars" + 0.031*"sean connery" + 0.027*"jane austen" + 0.027*"keira knightley" + 0.024*"samuel l. jackson" + 0.023*"car chase" + 0.022*"michael caine"
2022-12-10 20:29:01,376 : INFO : topic #0 (0.020): 0.085*"clint eastwood" + 0.065*"hayao miyazaki" + 0.050*"gene hackman" + 0.047*"spoof" + 0.040*"television" + 0.038*"western" + 0.037*"19th century" + 0.036*"blindfold" + 0.029*"Western" + 0.025*"mst3k"
2022-12-10 20:29:01,378 : INFO : topic diff=0.048672, rho=0.264069
2022-12-10 20:29:01,576 : INFO : -20.874 per-word bound, 1921108.8 perplexity estimate based on a held-out corpus of 681 documents with 5199 words
2022-12-10 20:29:01,577 : INFO : PROGRESS: pass 8, at document #10681/10681
2022-12-10 20:29:01,841 : INFO : merging changes from 681 documents into a model of 10681 documents
2022-12-10 20:29:01,977 : INFO : topic #5 (0.020): 0.104*"fantasy" + 0.059*"sci-fi" + 0.053*"space"

2022-12-10 20:29:04,116 : INFO : topic diff=0.058106, rho=0.255317
2022-12-10 20:29:04,118 : INFO : PROGRESS: pass 9, at document #10000/10681
2022-12-10 20:29:04,492 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:29:04,662 : INFO : topic #31 (0.020): 0.336*"Horror" + 0.150*"Sci-Fi" + 0.135*"Thriller" + 0.036*"Mystery" + 0.027*"horror" + 0.025*"boxing" + 0.023*"vhs" + 0.016*"not available from netflix" + 0.014*"tumey's dvds" + 0.013*"adam sandler"
2022-12-10 20:29:04,663 : INFO : topic #42 (0.020): 0.081*"serial killer" + 0.078*"psychology" + 0.073*"sven's to see list" + 0.066*"brad pitt" + 0.055*"crime" + 0.051*"prison" + 0.043*"morgan freeman" + 0.026*"mystery" + 0.021*"thriller" + 0.020*"gary oldman"
2022-12-10 20:29:04,665 : INFO : topic #4 (0.020): 0.073*"murder" + 0.069*"edward norton" + 0.060*"steven spielberg" + 0.053*"espionage" + 0.053*"tom cruise" + 0.035*"jonossa" + 0.024*"assassin" + 0.022*"john cusack" + 0.018*"dinosaurs" + 0.01

2022-12-10 20:29:07,734 : INFO : topic #22 (0.020): 0.077*"sci-fi" + 0.056*"jim carrey" + 0.053*"funny" + 0.048*"comedy" + 0.032*"will smith" + 0.030*"virtual reality" + 0.027*"tommy lee jones" + 0.025*"future" + 0.025*"not funny" + 0.023*"stupid"
2022-12-10 20:29:07,736 : INFO : topic #26 (0.020): 0.218*"r" + 0.112*"movie to see" + 0.103*"clearplay" + 0.064*"Drama" + 0.040*"easily confused with other movie(s) (title)" + 0.025*"to see" + 0.024*"russell crowe" + 0.024*"video game adaptation" + 0.021*"philip seymour hoffman" + 0.017*"revenge"
2022-12-10 20:29:07,737 : INFO : topic #38 (0.020): 0.239*"Thriller" + 0.229*"Crime" + 0.190*"Drama" + 0.083*"Mystery" + 0.076*"Action" + 0.031*"to see" + 0.025*"Film-Noir" + 0.006*"christopher walken" + 0.005*"cannibalism" + 0.005*"bibliothek"
2022-12-10 20:29:07,738 : INFO : topic #49 (0.020): 0.093*"twist ending" + 0.060*"nicolas cage" + 0.054*"philip k. dick" + 0.046*"sexuality" + 0.045*"kevin spacey" + 0.028*"1970s" + 0.025*"monster" + 0.021*"j

2022-12-10 20:29:10,486 : INFO : topic #16 (0.020): 0.187*"less than 300 ratings" + 0.119*"Drama" + 0.067*"robin williams" + 0.063*"teen" + 0.063*"Comedy" + 0.056*"high school" + 0.050*"gay" + 0.049*"netflix" + 0.029*"friendship" + 0.020*"mel brooks"
2022-12-10 20:29:10,487 : INFO : topic #49 (0.020): 0.097*"twist ending" + 0.061*"nicolas cage" + 0.056*"philip k. dick" + 0.048*"sexuality" + 0.047*"kevin spacey" + 0.028*"1970s" + 0.025*"monster" + 0.025*"jodie foster" + 0.018*"poker" + 0.014*"twist"
2022-12-10 20:29:10,488 : INFO : topic #20 (0.020): 0.047*"surreal" + 0.042*"satire" + 0.031*"hilarious" + 0.030*"seen more than once" + 0.025*"owned" + 0.021*"vietnam war" + 0.019*"vietnam" + 0.018*"own" + 0.017*"want to see again" + 0.017*"tumey's dvds"
2022-12-10 20:29:10,491 : INFO : topic diff=0.067788, rho=0.240143
2022-12-10 20:29:10,493 : INFO : PROGRESS: pass 11, at document #8000/10681
2022-12-10 20:29:10,872 : INFO : merging changes from 2000 documents into a model of 10681 docume

2022-12-10 20:29:13,537 : INFO : topic #10 (0.020): 0.092*"can't remember" + 0.087*"comedy" + 0.072*"aliens" + 0.051*"parody" + 0.034*"Comedy" + 0.030*"Sci-Fi" + 0.030*"keanu reeves" + 0.021*"Action" + 0.021*"star trek" + 0.020*"underrated"
2022-12-10 20:29:13,540 : INFO : topic diff=0.032541, rho=0.233504
2022-12-10 20:29:13,542 : INFO : PROGRESS: pass 12, at document #6000/10681
2022-12-10 20:29:13,868 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:29:14,032 : INFO : topic #4 (0.020): 0.078*"steven spielberg" + 0.072*"edward norton" + 0.067*"murder" + 0.062*"tom cruise" + 0.039*"espionage" + 0.028*"assassin" + 0.024*"drama" + 0.023*"john cusack" + 0.023*"dinosaurs" + 0.018*"killer-as-protagonist"
2022-12-10 20:29:14,033 : INFO : topic #6 (0.020): 0.049*"quentin tarantino" + 0.049*"documentary" + 0.038*"nonlinear" + 0.032*"tarantino" + 0.031*"angelina jolie" + 0.023*"interesting" + 0.023*"courtroom" + 0.022*"propaganda" + 0.019*"hugh grant" 

2022-12-10 20:29:16,428 : INFO : topic diff=0.179411, rho=0.227387
2022-12-10 20:29:16,431 : INFO : PROGRESS: pass 13, at document #4000/10681
2022-12-10 20:29:16,876 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:29:17,029 : INFO : topic #12 (0.020): 0.094*"racism" + 0.032*"australia" + 0.030*"civil war" + 0.030*"assassination" + 0.029*"historical" + 0.029*"cold war" + 0.025*"murder" + 0.024*"book" + 0.024*"denzel washington" + 0.022*"courtroom drama"
2022-12-10 20:29:17,030 : INFO : topic #41 (0.020): 0.536*"Comedy" + 0.057*"nudity (full frontal - notable)" + 0.041*"nudity (rear)" + 0.021*"imdb bottom 100" + 0.015*"suicide" + 0.014*"sexy" + 0.012*"sexual" + 0.010*"mathematics" + 0.008*"transgender" + 0.008*"torture"
2022-12-10 20:29:17,031 : INFO : topic #4 (0.020): 0.072*"steven spielberg" + 0.070*"murder" + 0.068*"edward norton" + 0.059*"tom cruise" + 0.033*"espionage" + 0.033*"assassin" + 0.025*"dinosaurs" + 0.025*"drama" + 0.025*"john c

2022-12-10 20:29:19,205 : INFO : topic #28 (0.020): 0.133*"disney" + 0.072*"animation" + 0.056*"pixar" + 0.035*"Animation" + 0.033*"children" + 0.033*"Children" + 0.026*"fairy tale" + 0.023*"disney animated feature" + 0.019*"pg" + 0.017*"coming of age"
2022-12-10 20:29:19,206 : INFO : topic #24 (0.020): 0.099*"world war ii" + 0.069*"Drama" + 0.058*"war" + 0.054*"oscar (best cinematography)" + 0.048*"War" + 0.038*"history" + 0.035*"holocaust" + 0.030*"oscar (best actor)" + 0.028*"afi 100 (cheers)" + 0.021*"nazis"
2022-12-10 20:29:19,207 : INFO : topic #34 (0.020): 0.075*"pg13" + 0.072*"shakespeare" + 0.048*"based on a play" + 0.042*"bill murray" + 0.040*"adapted from:play" + 0.038*"paris" + 0.037*"journalism" + 0.034*"france" + 0.033*"leonardo dicaprio" + 0.026*"whimsical"
2022-12-10 20:29:19,208 : INFO : topic #2 (0.020): 0.059*"stanley kubrick" + 0.052*"cult film" + 0.047*"martin scorsese" + 0.040*"los angeles" + 0.026*"marlon brando" + 0.025*"wedding" + 0.025*"kidnapping" + 0.020*"be

2022-12-10 20:29:20,786 : INFO : topic #17 (0.020): 0.054*"boring" + 0.047*"dvd-r" + 0.047*"afi 100 (laughs)" + 0.047*"matt damon" + 0.041*"1980s" + 0.036*"jude law" + 0.031*"dvd-ram" + 0.030*"clv" + 0.023*"want to own" + 0.022*"nudity (full frontal - brief)"
2022-12-10 20:29:20,788 : INFO : topic #16 (0.020): 0.267*"less than 300 ratings" + 0.137*"Drama" + 0.085*"netflix" + 0.058*"Comedy" + 0.044*"high school" + 0.042*"gay" + 0.035*"friendship" + 0.034*"robin williams" + 0.033*"teen" + 0.016*"homosexuality"
2022-12-10 20:29:20,788 : INFO : topic #7 (0.020): 0.120*"dystopia" + 0.096*"pg-13" + 0.062*"robots" + 0.056*"Sci-Fi" + 0.044*"post apocalyptic" + 0.041*"remade" + 0.035*"futuristmovies.com" + 0.012*"2" + 0.012*"space travel" + 0.012*"future dystopias"
2022-12-10 20:29:20,791 : INFO : topic diff=0.089976, rho=0.221727
2022-12-10 20:29:20,793 : INFO : PROGRESS: pass 15, at document #2000/10681
2022-12-10 20:29:21,102 : INFO : merging changes from 2000 documents into a model of 10681

2022-12-10 20:29:22,791 : INFO : topic diff=0.038168, rho=0.216470
2022-12-10 20:29:23,358 : INFO : -20.862 per-word bound, 1905299.6 perplexity estimate based on a held-out corpus of 681 documents with 5199 words
2022-12-10 20:29:23,359 : INFO : PROGRESS: pass 15, at document #10681/10681
2022-12-10 20:29:23,682 : INFO : merging changes from 681 documents into a model of 10681 documents
2022-12-10 20:29:23,844 : INFO : topic #43 (0.020): 0.076*"Comedy" + 0.069*"quirky" + 0.047*"oppl" + 0.043*"dark comedy" + 0.038*"humorous" + 0.034*"coen brothers" + 0.033*"irreverent" + 0.028*"black comedy" + 0.023*"satirical" + 0.022*"witty"
2022-12-10 20:29:23,846 : INFO : topic #18 (0.020): 0.232*"War" + 0.196*"Drama" + 0.080*"nudity (full frontal)" + 0.047*"not corv lib" + 0.042*"directorial debut" + 0.027*"george clooney" + 0.023*"hw drama" + 0.020*"nicole kidman" + 0.014*"john malkovich" + 0.014*"immortality"
2022-12-10 20:29:23,847 : INFO : topic #34 (0.020): 0.107*"pg13" + 0.047*"shakespeare" 

2022-12-10 20:29:26,042 : INFO : topic #20 (0.020): 0.047*"surreal" + 0.046*"satire" + 0.031*"hilarious" + 0.028*"owned" + 0.027*"seen more than once" + 0.021*"divx" + 0.019*"overrated" + 0.018*"depressing" + 0.016*"own" + 0.016*"tumey's dvds"
2022-12-10 20:29:26,043 : INFO : topic #7 (0.020): 0.128*"dystopia" + 0.096*"pg-13" + 0.054*"Sci-Fi" + 0.048*"robots" + 0.045*"remade" + 0.041*"futuristmovies.com" + 0.034*"post apocalyptic" + 0.014*"2" + 0.014*"space travel" + 0.012*"friends should see"
2022-12-10 20:29:26,060 : INFO : topic #32 (0.020): 0.203*"time travel" + 0.054*"mel gibson" + 0.031*"post-apocalyptic" + 0.028*"medieval" + 0.026*"bechdel test:fail" + 0.017*"time" + 0.015*"humphrey bogart" + 0.015*"bullshit history" + 0.015*"end of the world" + 0.014*"1960s"
2022-12-10 20:29:26,061 : INFO : topic #31 (0.020): 0.334*"Horror" + 0.150*"Sci-Fi" + 0.136*"Thriller" + 0.035*"Mystery" + 0.030*"horror" + 0.025*"boxing" + 0.023*"vhs" + 0.015*"not available from netflix" + 0.014*"tumey's 

2022-12-10 20:29:28,845 : INFO : topic #25 (0.020): 0.316*"Documentary" + 0.190*"betamax" + 0.073*"in netflix queue" + 0.021*"notable nudity" + 0.019*"IMAX" + 0.019*"dvd-video" + 0.015*"crappy sequel" + 0.014*"robert downey jr" + 0.012*"no dialogue" + 0.011*"disk"
2022-12-10 20:29:28,846 : INFO : topic #17 (0.020): 0.059*"boring" + 0.055*"afi 100 (laughs)" + 0.049*"dvd-r" + 0.044*"matt damon" + 0.042*"clv" + 0.031*"dvd-ram" + 0.031*"1980s" + 0.029*"jude law" + 0.024*"want to own" + 0.021*"nudity (full frontal - brief)"
2022-12-10 20:29:28,847 : INFO : topic #26 (0.020): 0.225*"r" + 0.115*"movie to see" + 0.107*"clearplay" + 0.065*"Drama" + 0.039*"easily confused with other movie(s) (title)" + 0.025*"to see" + 0.024*"russell crowe" + 0.024*"video game adaptation" + 0.021*"philip seymour hoffman" + 0.017*"revenge"
2022-12-10 20:29:28,847 : INFO : topic #23 (0.020): 0.215*"Musical" + 0.163*"70mm" + 0.102*"Drama" + 0.063*"musical" + 0.037*"adultery" + 0.017*"great acting" + 0.017*"adapted 

2022-12-10 20:29:31,363 : INFO : topic #22 (0.020): 0.058*"sci-fi" + 0.051*"funny" + 0.050*"jim carrey" + 0.039*"comedy" + 0.036*"will smith" + 0.031*"memory" + 0.029*"tommy lee jones" + 0.027*"stupid" + 0.026*"get" + 0.025*"virtual reality"
2022-12-10 20:29:31,363 : INFO : topic #6 (0.020): 0.050*"quentin tarantino" + 0.049*"documentary" + 0.037*"nonlinear" + 0.033*"tarantino" + 0.031*"angelina jolie" + 0.023*"interesting" + 0.022*"courtroom" + 0.022*"propaganda" + 0.019*"notable soundtrack" + 0.019*"hugh grant"
2022-12-10 20:29:31,366 : INFO : topic diff=0.055138, rho=0.202691
2022-12-10 20:29:31,368 : INFO : PROGRESS: pass 18, at document #8000/10681
2022-12-10 20:29:31,708 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:29:31,822 : INFO : topic #10 (0.020): 0.095*"comedy" + 0.090*"can't remember" + 0.065*"aliens" + 0.054*"parody" + 0.037*"Comedy" + 0.031*"keanu reeves" + 0.031*"Sci-Fi" + 0.019*"police" + 0.018*"underrated" + 0.018*"funny"


2022-12-10 20:29:33,935 : INFO : topic diff=0.026190, rho=0.198652
2022-12-10 20:29:33,938 : INFO : PROGRESS: pass 19, at document #6000/10681
2022-12-10 20:29:34,231 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:29:34,299 : INFO : topic #19 (0.020): 0.109*"action" + 0.074*"comic book" + 0.059*"superhero" + 0.048*"Action" + 0.045*"bruce willis" + 0.032*"super-hero" + 0.031*"stephen king" + 0.029*"based on a tv show" + 0.020*"eric's dvds" + 0.019*"adapted from:comic"
2022-12-10 20:29:34,301 : INFO : topic #4 (0.020): 0.078*"steven spielberg" + 0.071*"edward norton" + 0.066*"murder" + 0.061*"tom cruise" + 0.040*"espionage" + 0.029*"assassin" + 0.025*"drama" + 0.024*"john cusack" + 0.023*"dinosaurs" + 0.018*"killer-as-protagonist"
2022-12-10 20:29:34,301 : INFO : topic #48 (0.020): 0.089*"Drama" + 0.054*"criterion" + 0.029*"tumey's dvds" + 0.029*"dvd-video" + 0.024*"reflective" + 0.022*"bibliothek" + 0.021*"deliberate" + 0.021*"lyrical" + 0.021

2022-12-10 20:29:36,863 : INFO : topic #47 (0.020): 0.206*"Adventure" + 0.178*"Fantasy" + 0.160*"Children" + 0.136*"Comedy" + 0.067*"Animation" + 0.044*"Sci-Fi" + 0.012*"natalie portman" + 0.010*"vincent price" + 0.006*"good versus evil" + 0.006*"puppets"
2022-12-10 20:29:36,864 : INFO : topic #15 (0.020): 0.097*"romance" + 0.067*"tom hanks" + 0.056*"drama" + 0.055*"Romance" + 0.047*"oscar (best supporting actor)" + 0.047*"chick flick" + 0.042*"Comedy" + 0.031*"woody allen" + 0.026*"girlie movie" + 0.022*"dance"
2022-12-10 20:29:36,865 : INFO : topic #37 (0.020): 0.294*"based on a book" + 0.134*"adapted from:book" + 0.057*"Drama" + 0.055*"magic" + 0.043*"based on book" + 0.024*"surrealism" + 0.012*"dvd" + 0.011*"no rec?" + 0.010*"shark" + 0.009*"kate"
2022-12-10 20:29:36,865 : INFO : topic #16 (0.020): 0.194*"less than 300 ratings" + 0.120*"Drama" + 0.063*"robin williams" + 0.062*"teen" + 0.061*"high school" + 0.058*"netflix" + 0.054*"Comedy" + 0.048*"gay" + 0.030*"friendship" + 0.022*

2022-12-10 20:29:39,705 : INFO : topic #0 (0.020): 0.074*"clint eastwood" + 0.054*"gene hackman" + 0.050*"western" + 0.045*"spoof" + 0.044*"television" + 0.038*"hayao miyazaki" + 0.032*"blindfold" + 0.029*"president" + 0.028*"19th century" + 0.028*"Western"
2022-12-10 20:29:39,706 : INFO : topic #21 (0.020): 0.319*"nudity (topless)" + 0.200*"nudity (topless - brief)" + 0.115*"johnny depp" + 0.044*"jack nicholson" + 0.042*"Drama" + 0.012*"to see" + 0.009*"united states" + 0.009*"biographical view" + 0.008*"benicio del toro" + 0.007*"hunter s. thompson"
2022-12-10 20:29:39,709 : INFO : topic diff=0.146515, rho=0.191248
2022-12-10 20:29:39,711 : INFO : PROGRESS: pass 21, at document #4000/10681
2022-12-10 20:29:40,141 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:29:40,219 : INFO : topic #48 (0.020): 0.088*"Drama" + 0.053*"criterion" + 0.029*"tumey's dvds" + 0.027*"dvd-video" + 0.025*"reflective" + 0.022*"poignant" + 0.021*"deliberate" + 0.021*

2022-12-10 20:29:42,877 : INFO : topic #11 (0.020): 0.062*"new york city" + 0.059*"christmas" + 0.051*"mafia" + 0.033*"organized crime" + 0.032*"road trip" + 0.031*"new york" + 0.030*"al pacino" + 0.030*"motorcycle" + 0.022*"archaeology" + 0.021*"claymation"
2022-12-10 20:29:42,880 : INFO : topic diff=0.077607, rho=0.191248
2022-12-10 20:29:42,883 : INFO : PROGRESS: pass 22, at document #2000/10681
2022-12-10 20:29:43,305 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:29:43,454 : INFO : topic #22 (0.020): 0.065*"jim carrey" + 0.061*"sci-fi" + 0.051*"funny" + 0.042*"will smith" + 0.034*"tommy lee jones" + 0.032*"comedy" + 0.027*"memory" + 0.025*"future" + 0.025*"stupid" + 0.022*"get"
2022-12-10 20:29:43,456 : INFO : topic #14 (0.020): 0.220*"classic" + 0.068*"national film registry" + 0.050*"afi 100" + 0.037*"imdb top 250" + 0.036*"tumey's dvds" + 0.031*"hitchcock" + 0.028*"alfred hitchcock" + 0.024*"afi 100 (thrills)" + 0.022*"erlend's dvds" 

2022-12-10 20:29:45,802 : INFO : PROGRESS: pass 22, at document #10681/10681
2022-12-10 20:29:45,957 : INFO : merging changes from 681 documents into a model of 10681 documents
2022-12-10 20:29:46,079 : INFO : topic #2 (0.020): 0.048*"los angeles" + 0.042*"cult film" + 0.037*"martin scorsese" + 0.031*"kidnapping" + 0.029*"stanley kubrick" + 0.028*"wedding" + 0.025*"based on a comic" + 0.023*"beautiful" + 0.021*"steampunk" + 0.019*"marlon brando"
2022-12-10 20:29:46,081 : INFO : topic #11 (0.020): 0.062*"new york city" + 0.059*"christmas" + 0.051*"mafia" + 0.033*"organized crime" + 0.032*"road trip" + 0.031*"new york" + 0.031*"al pacino" + 0.030*"motorcycle" + 0.022*"archaeology" + 0.021*"claymation"
2022-12-10 20:29:46,082 : INFO : topic #49 (0.020): 0.100*"twist ending" + 0.060*"nicolas cage" + 0.046*"philip k. dick" + 0.039*"sexuality" + 0.038*"kevin spacey" + 0.031*"1970s" + 0.031*"monster" + 0.022*"poker" + 0.018*"jodie foster" + 0.017*"twist"
2022-12-10 20:29:46,083 : INFO : topic

2022-12-10 20:29:47,675 : INFO : topic #5 (0.020): 0.096*"fantasy" + 0.071*"sci-fi" + 0.049*"space" + 0.048*"adventure" + 0.035*"dvd" + 0.026*"seen more than once" + 0.025*"sequel" + 0.020*"action" + 0.018*"harrison ford" + 0.014*"seen at the cinema"
2022-12-10 20:29:47,676 : INFO : topic #18 (0.020): 0.209*"War" + 0.189*"Drama" + 0.065*"nudity (full frontal)" + 0.051*"not corv lib" + 0.051*"directorial debut" + 0.029*"hw drama" + 0.026*"george clooney" + 0.023*"nicole kidman" + 0.016*"genocide" + 0.016*"john malkovich"
2022-12-10 20:29:47,678 : INFO : topic #20 (0.020): 0.047*"surreal" + 0.046*"satire" + 0.031*"hilarious" + 0.028*"owned" + 0.027*"seen more than once" + 0.021*"divx" + 0.018*"overrated" + 0.018*"depressing" + 0.017*"own" + 0.016*"vietnam war"
2022-12-10 20:29:47,678 : INFO : topic #0 (0.020): 0.085*"clint eastwood" + 0.059*"hayao miyazaki" + 0.051*"gene hackman" + 0.047*"spoof" + 0.041*"western" + 0.041*"television" + 0.034*"blindfold" + 0.034*"19th century" + 0.029*"We

2022-12-10 20:29:49,277 : INFO : topic #12 (0.020): 0.077*"racism" + 0.032*"australia" + 0.030*"cold war" + 0.030*"historical" + 0.030*"civil war" + 0.027*"book" + 0.026*"assassination" + 0.024*"denzel washington" + 0.023*"murder" + 0.020*"cia"
2022-12-10 20:29:49,278 : INFO : topic #6 (0.020): 0.059*"documentary" + 0.058*"quentin tarantino" + 0.038*"tarantino" + 0.031*"nonlinear" + 0.026*"angelina jolie" + 0.023*"courtroom" + 0.022*"propaganda" + 0.021*"interesting" + 0.018*"overrated" + 0.018*"politics"
2022-12-10 20:29:49,280 : INFO : topic diff=0.040271, rho=0.181547
2022-12-10 20:29:49,282 : INFO : PROGRESS: pass 24, at document #10000/10681
2022-12-10 20:29:49,610 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:29:49,706 : INFO : topic #16 (0.020): 0.262*"less than 300 ratings" + 0.130*"Drama" + 0.068*"netflix" + 0.055*"Comedy" + 0.048*"high school" + 0.045*"gay" + 0.044*"robin williams" + 0.043*"teen" + 0.026*"friendship" + 0.019*"homos

2022-12-10 20:29:51,590 : INFO : topic diff=0.047328, rho=0.178627
2022-12-10 20:29:51,591 : INFO : PROGRESS: pass 25, at document #8000/10681
2022-12-10 20:29:51,863 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:29:51,927 : INFO : topic #18 (0.020): 0.204*"War" + 0.186*"Drama" + 0.062*"directorial debut" + 0.059*"nudity (full frontal)" + 0.047*"not corv lib" + 0.033*"hw drama" + 0.026*"nicole kidman" + 0.023*"george clooney" + 0.016*"john malkovich" + 0.013*"old"
2022-12-10 20:29:51,929 : INFO : topic #26 (0.020): 0.229*"r" + 0.118*"movie to see" + 0.110*"clearplay" + 0.066*"Drama" + 0.038*"easily confused with other movie(s) (title)" + 0.025*"to see" + 0.024*"russell crowe" + 0.024*"video game adaptation" + 0.021*"philip seymour hoffman" + 0.017*"revenge"
2022-12-10 20:29:51,929 : INFO : topic #30 (0.020): 0.074*"arnold schwarzenegger" + 0.035*"marriage" + 0.025*"1" + 0.020*"father-son relationship" + 0.020*"women" + 0.017*"sequel better t

2022-12-10 20:29:54,040 : INFO : topic #7 (0.020): 0.130*"dystopia" + 0.060*"Sci-Fi" + 0.057*"pg-13" + 0.053*"robots" + 0.050*"futuristmovies.com" + 0.048*"remade" + 0.039*"post apocalyptic" + 0.017*"2" + 0.014*"space travel" + 0.013*"friends should see"
2022-12-10 20:29:54,042 : INFO : topic #25 (0.020): 0.314*"Documentary" + 0.192*"betamax" + 0.064*"in netflix queue" + 0.023*"IMAX" + 0.021*"notable nudity" + 0.018*"dvd-video" + 0.016*"crappy sequel" + 0.015*"robert downey jr" + 0.012*"no dialogue" + 0.010*"short"
2022-12-10 20:29:54,042 : INFO : topic #44 (0.020): 0.166*"oscar (best picture)" + 0.066*"oscar (best directing)" + 0.064*"oscar (best actor)" + 0.052*"oscar (best actress)" + 0.047*"oscar (best supporting actress)" + 0.047*"ghosts" + 0.026*"peter jackson" + 0.026*"england" + 0.025*"overrated" + 0.025*"mental illness"
2022-12-10 20:29:54,043 : INFO : topic #29 (0.020): 0.149*"anime" + 0.128*"remake" + 0.068*"japan" + 0.047*"sean connery" + 0.045*"cars" + 0.030*"jane austen" 

2022-12-10 20:29:56,049 : INFO : topic #22 (0.020): 0.060*"sci-fi" + 0.058*"jim carrey" + 0.047*"funny" + 0.039*"will smith" + 0.031*"tommy lee jones" + 0.028*"virtual reality" + 0.028*"stupid" + 0.027*"memory" + 0.027*"comedy" + 0.024*"get"
2022-12-10 20:29:56,050 : INFO : topic #45 (0.020): 0.045*"atmospheric" + 0.041*"tense" + 0.041*"disturbing" + 0.032*"zombies" + 0.031*"stylized" + 0.019*"visceral" + 0.019*"menacing" + 0.018*"ominous" + 0.014*"bleak" + 0.014*"erlend's dvds"
2022-12-10 20:29:56,050 : INFO : topic #36 (0.020): 0.132*"true story" + 0.114*"based on a true story" + 0.036*"dustin hoffman" + 0.025*"australian" + 0.018*"bank robbery" + 0.015*"tumey's vhs" + 0.015*"emotion!" + 0.013*"ron howard" + 0.013*"william shatner" + 0.012*"russia"
2022-12-10 20:29:56,053 : INFO : topic diff=0.022301, rho=0.173186
2022-12-10 20:29:56,055 : INFO : PROGRESS: pass 27, at document #6000/10681
2022-12-10 20:29:56,352 : INFO : merging changes from 2000 documents into a model of 10681 docum

2022-12-10 20:29:58,259 : INFO : topic #43 (0.020): 0.075*"quirky" + 0.074*"Comedy" + 0.049*"dark comedy" + 0.042*"humorous" + 0.042*"coen brothers" + 0.033*"oppl" + 0.033*"irreverent" + 0.031*"black comedy" + 0.025*"witty" + 0.025*"satirical"
2022-12-10 20:29:58,262 : INFO : topic diff=0.128268, rho=0.170646
2022-12-10 20:29:58,264 : INFO : PROGRESS: pass 28, at document #4000/10681
2022-12-10 20:29:58,544 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:29:58,579 : INFO : topic #9 (0.020): 0.050*"pirates" + 0.043*"sean penn" + 0.031*"infidelity" + 0.028*"dogs" + 0.028*"marx brothers" + 0.023*"blaxploitation" + 0.022*"italy" + 0.020*"epic" + 0.019*"brian de palma" + 0.018*"audrey hepburn"
2022-12-10 20:29:58,580 : INFO : topic #26 (0.020): 0.243*"r" + 0.124*"clearplay" + 0.123*"movie to see" + 0.065*"Drama" + 0.034*"easily confused with other movie(s) (title)" + 0.027*"to see" + 0.025*"russell crowe" + 0.022*"video game adaptation" + 0.021*"ph

2022-12-10 20:30:00,008 : INFO : topic diff=0.068993, rho=0.170646
2022-12-10 20:30:00,010 : INFO : PROGRESS: pass 29, at document #2000/10681
2022-12-10 20:30:00,349 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:30:00,386 : INFO : topic #39 (0.020): 0.091*"politics" + 0.080*"Drama" + 0.075*"biography" + 0.061*"music" + 0.049*"library" + 0.037*"biopic" + 0.029*"samurai" + 0.028*"akira kurosawa" + 0.023*"japan" + 0.019*"history"
2022-12-10 20:30:00,387 : INFO : topic #33 (0.020): 0.060*"vampire" + 0.047*"vampires" + 0.045*"gothic" + 0.039*"christianity" + 0.038*"Horror" + 0.035*"religion" + 0.031*"erlend's dvds" + 0.029*"netwatch" + 0.028*"scary movies to see on halloween" + 0.023*"m. night shyamalan"
2022-12-10 20:30:00,388 : INFO : topic #5 (0.020): 0.081*"sci-fi" + 0.077*"fantasy" + 0.060*"space" + 0.043*"adventure" + 0.031*"dvd" + 0.026*"harrison ford" + 0.026*"seen more than once" + 0.022*"action" + 0.022*"sequel" + 0.014*"Adventure"
202

2022-12-10 20:30:02,097 : INFO : topic #8 (0.020): 0.091*"tim burton" + 0.085*"family" + 0.044*"watched 2007" + 0.038*"dark" + 0.023*"sandra bullock" + 0.020*"halloween" + 0.015*"twins" + 0.014*"letters" + 0.014*"dramatic" + 0.014*"foul language"
2022-12-10 20:30:02,098 : INFO : topic #21 (0.020): 0.339*"nudity (topless)" + 0.207*"nudity (topless - brief)" + 0.110*"johnny depp" + 0.043*"Drama" + 0.034*"jack nicholson" + 0.012*"to see" + 0.011*"united states" + 0.008*"biographical view" + 0.007*"adapted from:book series" + 0.007*"benicio del toro"
2022-12-10 20:30:02,100 : INFO : topic #13 (0.020): 0.107*"drugs" + 0.032*"gritty" + 0.030*"ummarti2006" + 0.020*"intimate" + 0.019*"male nudity" + 0.017*"ewan mcgregor" + 0.016*"addiction" + 0.016*"good dialogue" + 0.016*"kevin smith" + 0.015*"sex"
2022-12-10 20:30:02,100 : INFO : topic #28 (0.020): 0.085*"disney" + 0.076*"animation" + 0.069*"pixar" + 0.038*"Animation" + 0.032*"children" + 0.029*"Children" + 0.026*"fairy tale" + 0.025*"pg" + 

In [4]:
# LDAContentレコメンド
from src.lda_content import LDAContentRecommender
recommender = LDAContentRecommender()
recommend_result = recommender.recommend(movielens)

2022-12-10 20:30:02,815 : INFO : adding document #0 to Dictionary<0 unique tokens: []>
2022-12-10 20:30:02,954 : INFO : adding document #10000 to Dictionary<14749 unique tokens: ['3d', 'Adventure', 'Animation', 'Children', 'Comedy']...>
2022-12-10 20:30:02,962 : INFO : built Dictionary<15261 unique tokens: ['3d', 'Adventure', 'Animation', 'Children', 'Comedy']...> from 10681 documents (total 117144 corpus positions)
2022-12-10 20:30:02,962 : INFO : Dictionary lifecycle event {'msg': "built Dictionary<15261 unique tokens: ['3d', 'Adventure', 'Animation', 'Children', 'Comedy']...> from 10681 documents (total 117144 corpus positions)", 'datetime': '2022-12-10T20:30:02.962905', 'gensim': '4.2.0', 'python': '3.9.15 (main, Nov 24 2022, 14:39:17) [MSC v.1916 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.19045-SP0', 'event': 'created'}
2022-12-10 20:30:03,193 : INFO : using symmetric alpha at 0.02
2022-12-10 20:30:03,193 : INFO : using symmetric eta at 0.02
2022-12-10 20:30:03,196 : INFO : us

2022-12-10 20:30:05,498 : INFO : topic #16 (0.020): 0.080*"nudity (rear)" + 0.053*"pg" + 0.041*"military" + 0.035*"claymation" + 0.031*"weird" + 0.030*"nudity (topless)" + 0.024*"aardman" + 0.019*"nudity (topless - brief)" + 0.019*"childhood" + 0.018*"penguins"
2022-12-10 20:30:05,501 : INFO : topic diff=0.241935, rho=0.447214
2022-12-10 20:30:05,700 : INFO : -21.496 per-word bound, 2957512.5 perplexity estimate based on a held-out corpus of 681 documents with 5199 words
2022-12-10 20:30:05,702 : INFO : PROGRESS: pass 0, at document #10681/10681
2022-12-10 20:30:05,815 : INFO : merging changes from 681 documents into a model of 10681 documents
2022-12-10 20:30:05,849 : INFO : topic #36 (0.020): 0.035*"russell crowe" + 0.031*"virus" + 0.029*"not available from netflix" + 0.028*"Comedy" + 0.026*"based on a book" + 0.025*"cult film" + 0.025*"peter jackson" + 0.024*"movielens quickpick" + 0.023*"1980s" + 0.021*"adam sandler"
2022-12-10 20:30:05,850 : INFO : topic #6 (0.020): 0.125*"Musical

2022-12-10 20:30:07,549 : INFO : topic #42 (0.020): 0.086*"martial arts" + 0.059*"robin williams" + 0.042*"jonossa" + 0.041*"seen 2006" + 0.031*"japan" + 0.029*"samurai" + 0.027*"Drama" + 0.025*"orson welles" + 0.024*"hugh grant" + 0.023*"divorce"
2022-12-10 20:30:07,551 : INFO : topic #41 (0.020): 0.141*"remake" + 0.058*"new york city" + 0.049*"christmas" + 0.044*"family" + 0.037*"new york" + 0.031*"Drama" + 0.027*"seen" + 0.024*"kevin spacey" + 0.023*"predictable" + 0.021*"friday night movie"
2022-12-10 20:30:07,552 : INFO : topic #5 (0.020): 0.162*"classic" + 0.082*"musical" + 0.027*"afi 100 (laughs)" + 0.024*"los angeles" + 0.018*"witch" + 0.018*"national film registry" + 0.016*"john travolta" + 0.015*"Musical" + 0.014*"adapted from b'way" + 0.013*"movie to see"
2022-12-10 20:30:07,553 : INFO : topic #49 (0.020): 0.081*"national film registry" + 0.026*"natalie portman" + 0.025*"tumey's dvds" + 0.022*"Film-Noir" + 0.020*"film noir" + 0.020*"black and white" + 0.019*"imdb top 250" + 

2022-12-10 20:30:09,204 : INFO : topic #45 (0.020): 0.235*"less than 300 ratings" + 0.109*"Drama" + 0.061*"nudity (topless - notable)" + 0.036*"Comedy" + 0.035*"tim burton" + 0.023*"library" + 0.018*"michael moore" + 0.015*"blindfold" + 0.014*"depressing" + 0.011*"golden raspberry (worst actor)"
2022-12-10 20:30:09,205 : INFO : topic #37 (0.020): 0.185*"War" + 0.109*"world war ii" + 0.103*"Drama" + 0.047*"history" + 0.047*"war" + 0.035*"jim carrey" + 0.025*"Action" + 0.019*"70mm" + 0.014*"nazis" + 0.013*"mental illness"
2022-12-10 20:30:09,206 : INFO : topic #38 (0.020): 0.142*"drugs" + 0.037*"samuel l. jackson" + 0.035*"peter sellers" + 0.030*"ewan mcgregor" + 0.024*"divx" + 0.022*"poverty" + 0.020*"addiction" + 0.019*"hulu" + 0.018*"michael caine" + 0.017*"Drama"
2022-12-10 20:30:09,208 : INFO : topic diff=0.119826, rho=0.346261
2022-12-10 20:30:09,209 : INFO : PROGRESS: pass 2, at document #10000/10681
2022-12-10 20:30:09,510 : INFO : merging changes from 2000 documents into a model

2022-12-10 20:30:10,861 : INFO : topic #11 (0.020): 0.127*"religion" + 0.115*"keanu reeves" + 0.020*"dark" + 0.018*"reality tv" + 0.018*"courtesan" + 0.018*"slash" + 0.017*"slavery" + 0.017*"facebook rec" + 0.014*"class issues" + 0.012*"product placement"
2022-12-10 20:30:10,863 : INFO : topic diff=0.113857, rho=0.327201
2022-12-10 20:30:10,865 : INFO : PROGRESS: pass 3, at document #8000/10681
2022-12-10 20:30:11,147 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:30:11,182 : INFO : topic #20 (0.020): 0.086*"biography" + 0.060*"Drama" + 0.043*"biopic" + 0.042*"ingmar bergman" + 0.035*"ensemble cast" + 0.031*"julia roberts" + 0.030*"suicide" + 0.028*"corvallis library" + 0.027*"multiple storylines" + 0.025*"medieval"
2022-12-10 20:30:11,183 : INFO : topic #47 (0.020): 0.059*"serial killer" + 0.046*"psychology" + 0.045*"brad pitt" + 0.041*"edward norton" + 0.033*"heist" + 0.031*"hayao miyazaki" + 0.031*"steven spielberg" + 0.029*"crime" + 0.029

2022-12-10 20:30:12,704 : INFO : topic diff=0.057320, rho=0.310978
2022-12-10 20:30:12,706 : INFO : PROGRESS: pass 4, at document #6000/10681
2022-12-10 20:30:13,031 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:30:13,065 : INFO : topic #40 (0.020): 0.236*"r" + 0.117*"clearplay" + 0.053*"prison" + 0.051*"movie to see" + 0.039*"Drama" + 0.033*"morgan freeman" + 0.025*"friendship" + 0.020*"john wayne" + 0.019*"mel brooks" + 0.017*"conspiracy"
2022-12-10 20:30:13,066 : INFO : topic #5 (0.020): 0.268*"classic" + 0.066*"musical" + 0.046*"afi 100 (laughs)" + 0.028*"Musical" + 0.027*"national film registry" + 0.020*"afi 100" + 0.019*"john travolta" + 0.012*"delights" + 0.012*"adapted from b'way" + 0.012*"breakthroughs"
2022-12-10 20:30:13,067 : INFO : topic #49 (0.020): 0.076*"national film registry" + 0.049*"imdb top 250" + 0.035*"black and white" + 0.032*"afi 100" + 0.030*"tumey's dvds" + 0.027*"afi 100 (thrills)" + 0.024*"afi 100 (movie quotes)"

2022-12-10 20:30:14,982 : INFO : topic #6 (0.020): 0.098*"Musical" + 0.090*"politics" + 0.067*"satire" + 0.052*"sean connery" + 0.048*"nicolas cage" + 0.030*"terrorism" + 0.030*"dvd-r" + 0.029*"police" + 0.022*"political" + 0.020*"good dialogue"
2022-12-10 20:30:14,982 : INFO : topic #28 (0.020): 0.083*"oscar (best cinematography)" + 0.066*"Drama" + 0.052*"oscar (best supporting actress)" + 0.047*"oscar (best actress)" + 0.041*"oscar (best actor)" + 0.039*"in netflix queue" + 0.027*"remade" + 0.019*"marlon brando" + 0.017*"70mm" + 0.016*"Romance"
2022-12-10 20:30:14,983 : INFO : topic #32 (0.020): 0.137*"johnny depp" + 0.064*"clint eastwood" + 0.045*"vhs" + 0.043*"western" + 0.041*"jackie chan" + 0.038*"spaghetti western" + 0.032*"sergio leone" + 0.031*"kung fu" + 0.027*"india" + 0.026*"david lynch"
2022-12-10 20:30:14,984 : INFO : topic #36 (0.020): 0.091*"cult film" + 0.049*"russell crowe" + 0.034*"1980s" + 0.033*"downbeat" + 0.032*"adam sandler" + 0.025*"peter jackson" + 0.021*"80s"

2022-12-10 20:30:16,941 : INFO : topic #37 (0.020): 0.179*"War" + 0.106*"world war ii" + 0.101*"Drama" + 0.061*"war" + 0.045*"jim carrey" + 0.044*"history" + 0.028*"Action" + 0.023*"nazis" + 0.018*"wwii" + 0.015*"mental illness"
2022-12-10 20:30:16,941 : INFO : topic #43 (0.020): 0.067*"surreal" + 0.056*"stanley kubrick" + 0.037*"satirical" + 0.035*"cynical" + 0.032*"narrated" + 0.029*"dreamlike" + 0.029*"irreverent" + 0.026*"quirky" + 0.025*"biting" + 0.023*"hallucinatory"
2022-12-10 20:30:16,944 : INFO : topic diff=0.233043, rho=0.284665
2022-12-10 20:30:16,946 : INFO : PROGRESS: pass 6, at document #4000/10681
2022-12-10 20:30:17,304 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:30:17,340 : INFO : topic #46 (0.020): 0.089*"action" + 0.064*"sci-fi" + 0.047*"fantasy" + 0.033*"adventure" + 0.028*"dvd" + 0.024*"seen at the cinema" + 0.023*"seen more than once" + 0.021*"Adventure" + 0.021*"space" + 0.020*"harrison ford"
2022-12-10 20:30:17,341

2022-12-10 20:30:19,350 : INFO : topic #0 (0.020): 0.115*"can't remember" + 0.110*"based on a tv show" + 0.087*"directorial debut" + 0.071*"Comedy" + 0.056*"ummarti2006" + 0.055*"keira knightley" + 0.028*"australia" + 0.026*"immigrants" + 0.026*"dani2006" + 0.025*"australian"
2022-12-10 20:30:19,353 : INFO : topic diff=0.116521, rho=0.284665
2022-12-10 20:30:19,356 : INFO : PROGRESS: pass 7, at document #2000/10681
2022-12-10 20:30:19,777 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:30:19,890 : INFO : topic #34 (0.020): 0.069*"ghosts" + 0.049*"bill murray" + 0.044*"television" + 0.028*"courtroom drama" + 0.027*"secret service" + 0.026*"courtroom" + 0.019*"roman polanski" + 0.018*"18th century" + 0.018*"opera" + 0.017*"smoking"
2022-12-10 20:30:19,892 : INFO : topic #13 (0.020): 0.056*"fairy tale" + 0.054*"jane austen" + 0.037*"assassination" + 0.035*"historical" + 0.033*"road trip" + 0.033*"cold war" + 0.029*"sequel" + 0.023*"gerard depardi

2022-12-10 20:30:22,009 : INFO : PROGRESS: pass 7, at document #10681/10681
2022-12-10 20:30:22,118 : INFO : merging changes from 681 documents into a model of 10681 documents
2022-12-10 20:30:22,160 : INFO : topic #21 (0.020): 0.074*"Sci-Fi" + 0.064*"aliens" + 0.044*"will smith" + 0.034*"space" + 0.033*"tommy lee jones" + 0.030*"Action" + 0.026*"ridley scott" + 0.025*"sci-fi" + 0.025*"futuristmovies.com" + 0.021*"monster"
2022-12-10 20:30:22,161 : INFO : topic #37 (0.020): 0.229*"War" + 0.125*"Drama" + 0.095*"world war ii" + 0.051*"history" + 0.048*"war" + 0.039*"Action" + 0.023*"jim carrey" + 0.022*"nazis" + 0.011*"mental illness" + 0.010*"wwii"
2022-12-10 20:30:22,162 : INFO : topic #26 (0.020): 0.043*"Drama" + 0.030*"james bond" + 0.021*"assassin" + 0.020*"poignant" + 0.020*"reflective" + 0.019*"atmospheric" + 0.019*"bittersweet" + 0.018*"bond" + 0.018*"007" + 0.017*"lyrical"
2022-12-10 20:30:22,163 : INFO : topic #46 (0.020): 0.077*"action" + 0.065*"fantasy" + 0.049*"sci-fi" + 0.0

2022-12-10 20:30:24,339 : INFO : topic #3 (0.020): 0.131*"oscar (best picture)" + 0.056*"oscar (best directing)" + 0.053*"oscar (best actor)" + 0.037*"al pacino" + 0.027*"oscar (best supporting actor)" + 0.024*"afi 100 (cheers)" + 0.022*"Drama" + 0.020*"great acting" + 0.019*"tumey's dvds" + 0.019*"civil war"
2022-12-10 20:30:24,340 : INFO : topic #15 (0.020): 0.109*"disney" + 0.100*"animation" + 0.087*"pixar" + 0.050*"pirates" + 0.040*"Animation" + 0.039*"children" + 0.030*"Children" + 0.028*"angelina jolie" + 0.024*"disney animated feature" + 0.022*"cartoon"
2022-12-10 20:30:24,342 : INFO : topic #14 (0.020): 0.060*"criterion" + 0.055*"disturbing" + 0.053*"Drama" + 0.051*"tense" + 0.046*"atmospheric" + 0.031*"tumey's dvds" + 0.030*"stylized" + 0.029*"bleak" + 0.024*"erlend's dvds" + 0.023*"menacing"
2022-12-10 20:30:24,344 : INFO : topic diff=0.050891, rho=0.264069
2022-12-10 20:30:24,646 : INFO : -20.979 per-word bound, 2066694.4 perplexity estimate based on a held-out corpus of 681

2022-12-10 20:30:27,079 : INFO : topic #27 (0.020): 0.319*"Horror" + 0.150*"Thriller" + 0.127*"Mystery" + 0.040*"Drama" + 0.030*"Fantasy" + 0.030*"easily confused with other movie(s) (title)" + 0.025*"Film-Noir" + 0.019*"eerie" + 0.015*"tumey's dvds" + 0.013*"franchise"
2022-12-10 20:30:27,082 : INFO : topic diff=0.056291, rho=0.255317
2022-12-10 20:30:27,084 : INFO : PROGRESS: pass 9, at document #10000/10681
2022-12-10 20:30:27,516 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:30:27,705 : INFO : topic #7 (0.020): 0.106*"romance" + 0.059*"Romance" + 0.043*"chick flick" + 0.040*"boring" + 0.027*"Comedy" + 0.025*"girlie movie" + 0.025*"love story" + 0.024*"baseball" + 0.024*"whimsical" + 0.023*"comedy"
2022-12-10 20:30:27,706 : INFO : topic #11 (0.020): 0.130*"religion" + 0.106*"keanu reeves" + 0.026*"product placement" + 0.022*"slavery" + 0.021*"facebook rec" + 0.021*"courtesan" + 0.018*"reality tv" + 0.017*"social message" + 0.016*"cross-dr

2022-12-10 20:30:29,845 : INFO : topic diff=0.069619, rho=0.247382
2022-12-10 20:30:29,846 : INFO : PROGRESS: pass 10, at document #8000/10681
2022-12-10 20:30:30,141 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:30:30,260 : INFO : topic #16 (0.020): 0.269*"nudity (topless)" + 0.184*"nudity (topless - brief)" + 0.067*"nudity (rear)" + 0.028*"pg" + 0.026*"claymation" + 0.022*"military" + 0.018*"childhood" + 0.018*"weird" + 0.016*"aardman" + 0.010*"shark"
2022-12-10 20:30:30,262 : INFO : topic #23 (0.020): 0.081*"high school" + 0.081*"teen" + 0.065*"cars" + 0.034*"dance" + 0.031*"marx brothers" + 0.029*"eddie murphy" + 0.024*"(s)vcd" + 0.023*"car chase" + 0.023*"Comedy" + 0.019*"sandra bullock"
2022-12-10 20:30:30,263 : INFO : topic #30 (0.020): 0.237*"Adventure" + 0.115*"Children" + 0.104*"Comedy" + 0.100*"Fantasy" + 0.077*"Action" + 0.070*"70mm" + 0.046*"Animation" + 0.045*"Drama" + 0.024*"Musical" + 0.009*"submarine"
2022-12-10 20:30:30,264

2022-12-10 20:30:32,537 : INFO : topic #28 (0.020): 0.087*"oscar (best cinematography)" + 0.073*"Drama" + 0.057*"oscar (best supporting actress)" + 0.049*"in netflix queue" + 0.044*"oscar (best actress)" + 0.029*"remade" + 0.019*"marlon brando" + 0.014*"exceptional acting" + 0.013*"china" + 0.013*"alcoholism"
2022-12-10 20:30:32,538 : INFO : topic #12 (0.020): 0.126*"comic book" + 0.101*"superhero" + 0.074*"dystopia" + 0.061*"holocaust" + 0.055*"super-hero" + 0.032*"adapted from:comic" + 0.024*"kidnapping" + 0.022*"batman" + 0.020*"alter ego" + 0.016*"Action"
2022-12-10 20:30:32,539 : INFO : topic #27 (0.020): 0.321*"Horror" + 0.154*"Thriller" + 0.127*"Mystery" + 0.039*"Drama" + 0.028*"Fantasy" + 0.027*"easily confused with other movie(s) (title)" + 0.026*"Film-Noir" + 0.017*"eerie" + 0.015*"slasher" + 0.015*"franchise"
2022-12-10 20:30:32,541 : INFO : topic #41 (0.020): 0.116*"remake" + 0.068*"christmas" + 0.063*"new york city" + 0.044*"family" + 0.036*"new york" + 0.036*"bibliothek" 

2022-12-10 20:30:34,372 : INFO : topic #15 (0.020): 0.166*"disney" + 0.091*"animation" + 0.076*"pixar" + 0.047*"Animation" + 0.043*"children" + 0.039*"Children" + 0.034*"disney animated feature" + 0.025*"angelina jolie" + 0.024*"pirates" + 0.020*"cartoon"
2022-12-10 20:30:34,373 : INFO : topic #33 (0.020): 0.112*"comedy" + 0.108*"Comedy" + 0.056*"funny" + 0.036*"parody" + 0.036*"dark comedy" + 0.033*"quirky" + 0.032*"seen more than once" + 0.029*"coen brothers" + 0.028*"hilarious" + 0.022*"black comedy"
2022-12-10 20:30:34,374 : INFO : topic #27 (0.020): 0.309*"Horror" + 0.157*"Thriller" + 0.127*"Mystery" + 0.040*"Drama" + 0.029*"Film-Noir" + 0.029*"easily confused with other movie(s) (title)" + 0.029*"Fantasy" + 0.016*"eerie" + 0.015*"franchise" + 0.015*"tumey's dvds"
2022-12-10 20:30:34,375 : INFO : topic #44 (0.020): 0.109*"mafia" + 0.087*"music" + 0.053*"martin scorsese" + 0.050*"organized crime" + 0.042*"rock and roll" + 0.031*"Musical" + 0.030*"wired 50 greatest soundtracks" + 0.

2022-12-10 20:30:37,199 : INFO : topic #42 (0.020): 0.084*"robin williams" + 0.077*"japan" + 0.067*"martial arts" + 0.037*"samurai" + 0.035*"akira kurosawa" + 0.033*"orson welles" + 0.032*"divorce" + 0.030*"Drama" + 0.025*"kurosawa" + 0.022*"jonossa"
2022-12-10 20:30:37,200 : INFO : topic #30 (0.020): 0.236*"Adventure" + 0.129*"Children" + 0.112*"Comedy" + 0.106*"Fantasy" + 0.078*"Action" + 0.049*"Animation" + 0.046*"Drama" + 0.044*"70mm" + 0.020*"Musical" + 0.012*"submarine"
2022-12-10 20:30:37,203 : INFO : topic diff=0.175800, rho=0.227387
2022-12-10 20:30:37,204 : INFO : PROGRESS: pass 13, at document #4000/10681
2022-12-10 20:30:37,494 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:30:37,533 : INFO : topic #4 (0.020): 0.072*"sports" + 0.062*"Drama" + 0.055*"boxing" + 0.045*"underrated" + 0.045*"british" + 0.039*"london" + 0.034*"sven's to see list" + 0.033*"hw drama" + 0.030*"sylvester stallone" + 0.028*"inspirational"
2022-12-10 20:30:37

2022-12-10 20:30:38,839 : INFO : topic diff=0.091201, rho=0.227387
2022-12-10 20:30:38,841 : INFO : PROGRESS: pass 14, at document #2000/10681
2022-12-10 20:30:39,137 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:30:39,169 : INFO : topic #22 (0.020): 0.172*"betamax" + 0.091*"dvd-video" + 0.082*"clv" + 0.038*"library on hold" + 0.034*"aviation" + 0.026*"terry gilliam" + 0.019*"dvd collection" + 0.017*"steven seagal" + 0.016*"gilliam" + 0.016*"family drama"
2022-12-10 20:30:39,171 : INFO : topic #40 (0.020): 0.270*"r" + 0.138*"clearplay" + 0.064*"movie to see" + 0.056*"prison" + 0.044*"Drama" + 0.035*"morgan freeman" + 0.028*"friendship" + 0.021*"conspiracy" + 0.015*"1970s" + 0.012*"mel brooks"
2022-12-10 20:30:39,172 : INFO : topic #9 (0.020): 0.264*"based on a book" + 0.112*"adapted from:book" + 0.053*"Drama" + 0.042*"stephen king" + 0.042*"jack nicholson" + 0.035*"based on book" + 0.027*"imdb bottom 100" + 0.022*"literary adaptation" + 0.02

2022-12-10 20:30:40,721 : INFO : topic #43 (0.020): 0.077*"surreal" + 0.040*"narrated" + 0.038*"satirical" + 0.032*"cynical" + 0.032*"irreverent" + 0.031*"quirky" + 0.030*"dreamlike" + 0.027*"biting" + 0.023*"hallucinatory" + 0.021*"stanley kubrick"
2022-12-10 20:30:40,722 : INFO : topic #35 (0.020): 0.355*"Comedy" + 0.314*"Drama" + 0.201*"Romance" + 0.013*"lesbian" + 0.008*"sean penn" + 0.008*"bibliothek" + 0.007*"philip seymour hoffman" + 0.005*"m. night shyamalan" + 0.005*"oscar (best foreign language film)" + 0.003*"library vhs"
2022-12-10 20:30:40,723 : INFO : topic #31 (0.020): 0.160*"zombies" + 0.097*"pg-13" + 0.056*"horror" + 0.028*"zombie" + 0.026*"infidelity" + 0.024*"movie to see" + 0.023*"betrayal" + 0.022*"books" + 0.022*"joaquin phoenix" + 0.021*"cult classic"
2022-12-10 20:30:40,726 : INFO : topic diff=0.089153, rho=0.221727
2022-12-10 20:30:40,728 : INFO : PROGRESS: pass 15, at document #2000/10681
2022-12-10 20:30:41,036 : INFO : merging changes from 2000 documents int

2022-12-10 20:30:42,319 : INFO : topic #37 (0.020): 0.194*"War" + 0.112*"Drama" + 0.102*"world war ii" + 0.057*"war" + 0.049*"history" + 0.036*"Action" + 0.031*"jim carrey" + 0.018*"africa" + 0.017*"nazis" + 0.014*"wwii"
2022-12-10 20:30:42,322 : INFO : topic diff=0.039867, rho=0.216470
2022-12-10 20:30:42,515 : INFO : -20.968 per-word bound, 2051264.9 perplexity estimate based on a held-out corpus of 681 documents with 5199 words
2022-12-10 20:30:42,516 : INFO : PROGRESS: pass 15, at document #10681/10681
2022-12-10 20:30:42,640 : INFO : merging changes from 681 documents into a model of 10681 documents
2022-12-10 20:30:42,683 : INFO : topic #40 (0.020): 0.310*"r" + 0.159*"clearplay" + 0.074*"movie to see" + 0.049*"Drama" + 0.032*"prison" + 0.029*"friendship" + 0.027*"morgan freeman" + 0.016*"conspiracy" + 0.013*"1970s" + 0.013*"don cheadle"
2022-12-10 20:30:42,685 : INFO : topic #39 (0.020): 0.150*"time travel" + 0.054*"adultery" + 0.046*"motorcycle" + 0.046*"post apocalyptic" + 0.04

2022-12-10 20:30:43,943 : INFO : PROGRESS: pass 16, at document #10000/10681
2022-12-10 20:30:44,234 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:30:44,270 : INFO : topic #40 (0.020): 0.282*"r" + 0.124*"clearplay" + 0.057*"movie to see" + 0.046*"Drama" + 0.039*"prison" + 0.029*"morgan freeman" + 0.025*"friendship" + 0.018*"john wayne" + 0.018*"conspiracy" + 0.015*"don cheadle"
2022-12-10 20:30:44,272 : INFO : topic #7 (0.020): 0.108*"romance" + 0.062*"Romance" + 0.045*"chick flick" + 0.041*"boring" + 0.026*"girlie movie" + 0.025*"baseball" + 0.024*"love story" + 0.024*"whimsical" + 0.024*"Comedy" + 0.019*"wedding"
2022-12-10 20:30:44,273 : INFO : topic #45 (0.020): 0.258*"less than 300 ratings" + 0.117*"Drama" + 0.059*"nudity (topless - notable)" + 0.038*"tim burton" + 0.031*"not corv lib" + 0.026*"library" + 0.017*"depressing" + 0.016*"seen 2007" + 0.016*"blindfold" + 0.013*"michael moore"
2022-12-10 20:30:44,275 : INFO : topic #41 (0.020)

2022-12-10 20:30:45,873 : INFO : topic #39 (0.020): 0.182*"time travel" + 0.054*"adultery" + 0.041*"motorcycle" + 0.038*"post apocalyptic" + 0.035*"post-apocalyptic" + 0.030*"jude law" + 0.024*"dystopia" + 0.019*"want to own" + 0.019*"ian mckellen" + 0.018*"hollywood"
2022-12-10 20:30:45,874 : INFO : topic #48 (0.020): 0.089*"bruce willis" + 0.081*"twist ending" + 0.063*"netflix" + 0.047*"coming of age" + 0.033*"journalism" + 0.031*"sexy" + 0.026*"Drama" + 0.025*"avi" + 0.022*"to see" + 0.022*"talking animals"
2022-12-10 20:30:45,875 : INFO : topic #8 (0.020): 0.142*"anime" + 0.090*"tom hanks" + 0.077*"true story" + 0.056*"based on a true story" + 0.027*"drama" + 0.026*"good" + 0.025*"japan" + 0.023*"interesting" + 0.021*"not funny" + 0.018*"archaeology"
2022-12-10 20:30:45,876 : INFO : topic #15 (0.020): 0.140*"disney" + 0.096*"animation" + 0.079*"pixar" + 0.046*"Animation" + 0.040*"children" + 0.037*"Children" + 0.033*"pirates" + 0.029*"disney animated feature" + 0.025*"angelina joli

2022-12-10 20:30:47,785 : INFO : topic #47 (0.020): 0.065*"serial killer" + 0.056*"psychology" + 0.053*"brad pitt" + 0.042*"edward norton" + 0.032*"steven spielberg" + 0.032*"heist" + 0.028*"matt damon" + 0.024*"crime" + 0.021*"hayao miyazaki" + 0.017*"scary"
2022-12-10 20:30:47,786 : INFO : topic #34 (0.020): 0.093*"ghosts" + 0.044*"bill murray" + 0.041*"television" + 0.037*"courtroom" + 0.033*"courtroom drama" + 0.023*"roman polanski" + 0.022*"court" + 0.018*"secret service" + 0.017*"2.5" + 0.016*"opera"
2022-12-10 20:30:47,789 : INFO : topic diff=0.054279, rho=0.202691
2022-12-10 20:30:47,792 : INFO : PROGRESS: pass 18, at document #8000/10681
2022-12-10 20:30:48,103 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:30:48,148 : INFO : topic #16 (0.020): 0.275*"nudity (topless)" + 0.186*"nudity (topless - brief)" + 0.067*"nudity (rear)" + 0.028*"pg" + 0.027*"claymation" + 0.022*"military" + 0.018*"childhood" + 0.017*"weird" + 0.017*"aardman" +

2022-12-10 20:30:49,686 : INFO : topic diff=0.027116, rho=0.198652
2022-12-10 20:30:49,688 : INFO : PROGRESS: pass 19, at document #6000/10681
2022-12-10 20:30:49,989 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:30:50,026 : INFO : topic #5 (0.020): 0.277*"classic" + 0.065*"musical" + 0.045*"afi 100 (laughs)" + 0.033*"Musical" + 0.029*"national film registry" + 0.021*"afi 100" + 0.018*"john travolta" + 0.017*"70mm" + 0.016*"adapted from b'way" + 0.015*"breakthroughs"
2022-12-10 20:30:50,027 : INFO : topic #36 (0.020): 0.084*"cult film" + 0.046*"peter jackson" + 0.044*"russell crowe" + 0.038*"downbeat" + 0.031*"adam sandler" + 0.029*"1980s" + 0.022*"80s" + 0.021*"new zealand" + 0.019*"not available from netflix" + 0.014*"must see!"
2022-12-10 20:30:50,028 : INFO : topic #11 (0.020): 0.129*"religion" + 0.115*"keanu reeves" + 0.035*"island" + 0.021*"slavery" + 0.018*"courtesan" + 0.017*"reality tv" + 0.016*"class issues" + 0.016*"slash" + 0.016

2022-12-10 20:30:51,871 : INFO : topic #9 (0.020): 0.245*"based on a book" + 0.112*"adapted from:book" + 0.054*"Drama" + 0.049*"stephen king" + 0.041*"jack nicholson" + 0.037*"based on book" + 0.026*"imdb bottom 100" + 0.024*"stupid" + 0.021*"literary adaptation" + 0.021*"los angeles"
2022-12-10 20:30:51,872 : INFO : topic #36 (0.020): 0.089*"cult film" + 0.047*"russell crowe" + 0.040*"downbeat" + 0.032*"adam sandler" + 0.032*"1980s" + 0.030*"peter jackson" + 0.020*"80s" + 0.020*"not available from netflix" + 0.020*"new zealand" + 0.016*"virus"
2022-12-10 20:30:51,873 : INFO : topic #15 (0.020): 0.162*"disney" + 0.091*"animation" + 0.076*"pixar" + 0.049*"Animation" + 0.043*"children" + 0.040*"Children" + 0.033*"disney animated feature" + 0.026*"angelina jolie" + 0.025*"pirates" + 0.020*"cartoon"
2022-12-10 20:30:51,874 : INFO : topic #42 (0.020): 0.092*"robin williams" + 0.089*"martial arts" + 0.070*"japan" + 0.039*"samurai" + 0.036*"akira kurosawa" + 0.029*"Drama" + 0.027*"orson welle

2022-12-10 20:30:53,770 : INFO : topic #15 (0.020): 0.167*"disney" + 0.094*"animation" + 0.074*"pixar" + 0.047*"Animation" + 0.043*"children" + 0.038*"Children" + 0.030*"disney animated feature" + 0.029*"pirates" + 0.024*"angelina jolie" + 0.021*"cartoon"
2022-12-10 20:30:53,771 : INFO : topic #23 (0.020): 0.084*"high school" + 0.061*"teen" + 0.051*"cars" + 0.035*"dance" + 0.028*"marx brothers" + 0.027*"sandra bullock" + 0.023*"Comedy" + 0.021*"kate winslet" + 0.020*"(s)vcd" + 0.020*"eddie murphy"
2022-12-10 20:30:53,773 : INFO : topic diff=0.143643, rho=0.191248
2022-12-10 20:30:53,776 : INFO : PROGRESS: pass 21, at document #4000/10681
2022-12-10 20:30:54,129 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:30:54,178 : INFO : topic #13 (0.020): 0.054*"fairy tale" + 0.044*"jane austen" + 0.041*"road trip" + 0.036*"sequel" + 0.030*"assassination" + 0.029*"cold war" + 0.027*"historical" + 0.022*"kids" + 0.021*"heartwarming" + 0.020*"gerard depar

2022-12-10 20:30:55,733 : INFO : PROGRESS: pass 22, at document #2000/10681
2022-12-10 20:30:56,075 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:30:56,112 : INFO : topic #2 (0.020): 0.278*"Documentary" + 0.121*"to see" + 0.055*"documentary" + 0.030*"mockumentary" + 0.029*"star trek" + 0.025*"kevin smith" + 0.022*"movie to see" + 0.020*"dogs" + 0.019*"sexuality" + 0.016*"owen wilson"
2022-12-10 20:30:56,114 : INFO : topic #34 (0.020): 0.073*"ghosts" + 0.050*"bill murray" + 0.043*"television" + 0.030*"courtroom" + 0.029*"courtroom drama" + 0.026*"secret service" + 0.020*"roman polanski" + 0.017*"18th century" + 0.017*"opera" + 0.016*"court"
2022-12-10 20:30:56,115 : INFO : topic #31 (0.020): 0.143*"zombies" + 0.076*"pg-13" + 0.074*"horror" + 0.031*"cult classic" + 0.027*"zombie" + 0.024*"sam raimi" + 0.023*"campy" + 0.021*"infidelity" + 0.021*"books" + 0.020*"joaquin phoenix"
2022-12-10 20:30:56,115 : INFO : topic #33 (0.020): 0.109*"Comedy" 

2022-12-10 20:30:57,781 : INFO : topic #22 (0.020): 0.209*"betamax" + 0.086*"dvd-video" + 0.053*"clv" + 0.051*"library on hold" + 0.036*"aviation" + 0.019*"family drama" + 0.019*"seen 2008" + 0.018*"terry gilliam" + 0.013*"gilliam" + 0.013*"dvd collection"
2022-12-10 20:30:57,782 : INFO : topic #20 (0.020): 0.100*"biography" + 0.071*"Drama" + 0.055*"death" + 0.045*"suicide" + 0.045*"biopic" + 0.038*"corvallis library" + 0.031*"ensemble cast" + 0.028*"julia roberts" + 0.023*"forest whitaker" + 0.023*"history"
2022-12-10 20:30:57,783 : INFO : topic #39 (0.020): 0.156*"time travel" + 0.055*"adultery" + 0.045*"motorcycle" + 0.045*"post apocalyptic" + 0.042*"post-apocalyptic" + 0.035*"jude law" + 0.024*"bechdel test:fail" + 0.023*"dystopia" + 0.019*"want to own" + 0.017*"old"
2022-12-10 20:30:57,784 : INFO : topic #42 (0.020): 0.092*"japan" + 0.088*"martial arts" + 0.063*"robin williams" + 0.038*"akira kurosawa" + 0.035*"samurai" + 0.035*"Drama" + 0.028*"seen 2006" + 0.027*"divorce" + 0.027

2022-12-10 20:30:59,363 : INFO : topic #34 (0.020): 0.075*"ghosts" + 0.058*"bill murray" + 0.040*"television" + 0.038*"courtroom" + 0.032*"courtroom drama" + 0.024*"roman polanski" + 0.021*"ben stiller" + 0.019*"las vegas" + 0.018*"court" + 0.016*"secret service"
2022-12-10 20:30:59,364 : INFO : topic #40 (0.020): 0.278*"r" + 0.124*"clearplay" + 0.058*"movie to see" + 0.047*"Drama" + 0.040*"prison" + 0.029*"morgan freeman" + 0.025*"friendship" + 0.018*"conspiracy" + 0.018*"john wayne" + 0.015*"1970s"
2022-12-10 20:30:59,366 : INFO : topic diff=0.033403, rho=0.184615
2022-12-10 20:30:59,543 : INFO : -20.964 per-word bound, 2045057.6 perplexity estimate based on a held-out corpus of 681 documents with 5199 words
2022-12-10 20:30:59,544 : INFO : PROGRESS: pass 23, at document #10681/10681
2022-12-10 20:30:59,645 : INFO : merging changes from 681 documents into a model of 10681 documents
2022-12-10 20:30:59,680 : INFO : topic #14 (0.020): 0.059*"criterion" + 0.053*"disturbing" + 0.051*"Dra

2022-12-10 20:31:00,983 : INFO : topic diff=0.039945, rho=0.181547
2022-12-10 20:31:00,985 : INFO : PROGRESS: pass 24, at document #10000/10681
2022-12-10 20:31:01,235 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:31:01,267 : INFO : topic #29 (0.020): 0.095*"nudity (full frontal - notable)" + 0.054*"Drama" + 0.052*"shakespeare" + 0.045*"based on a play" + 0.036*"christianity" + 0.034*"adapted from:play" + 0.029*"religion" + 0.021*"biblical" + 0.021*"disability" + 0.019*"fascism"
2022-12-10 20:31:01,268 : INFO : topic #26 (0.020): 0.045*"Drama" + 0.033*"james bond" + 0.021*"reflective" + 0.021*"poignant" + 0.020*"007" + 0.020*"atmospheric" + 0.019*"bittersweet" + 0.019*"lyrical" + 0.019*"bond" + 0.018*"deliberate"
2022-12-10 20:31:01,269 : INFO : topic #13 (0.020): 0.062*"fairy tale" + 0.048*"road trip" + 0.041*"sequel" + 0.040*"jane austen" + 0.026*"historical" + 0.025*"gerard depardieu" + 0.024*"swashbuckler" + 0.024*"assassination" + 0.024

2022-12-10 20:31:03,090 : INFO : topic #47 (0.020): 0.063*"serial killer" + 0.057*"psychology" + 0.050*"brad pitt" + 0.041*"edward norton" + 0.034*"heist" + 0.028*"matt damon" + 0.028*"steven spielberg" + 0.027*"hayao miyazaki" + 0.023*"crime" + 0.016*"scary"
2022-12-10 20:31:03,091 : INFO : topic #22 (0.020): 0.259*"betamax" + 0.101*"dvd-video" + 0.068*"clv" + 0.030*"aviation" + 0.020*"library on hold" + 0.019*"terry gilliam" + 0.017*"dvd collection" + 0.015*"seen 2008" + 0.014*"gilliam" + 0.012*"steven seagal"
2022-12-10 20:31:03,092 : INFO : topic #5 (0.020): 0.270*"classic" + 0.062*"musical" + 0.045*"afi 100 (laughs)" + 0.036*"Musical" + 0.030*"national film registry" + 0.019*"70mm" + 0.018*"afi 100" + 0.016*"john travolta" + 0.016*"adapted from b'way" + 0.015*"breakthroughs"
2022-12-10 20:31:03,093 : INFO : topic #8 (0.020): 0.138*"anime" + 0.090*"tom hanks" + 0.078*"true story" + 0.058*"based on a true story" + 0.027*"drama" + 0.025*"good" + 0.025*"japan" + 0.023*"interesting" + 

2022-12-10 20:31:04,875 : INFO : topic #18 (0.020): 0.082*"magic" + 0.079*"gay" + 0.062*"racism" + 0.044*"pg13" + 0.040*"Drama" + 0.027*"food" + 0.027*"social commentary" + 0.023*"homosexuality" + 0.020*"19th century" + 0.019*"denzel washington"
2022-12-10 20:31:04,876 : INFO : topic #19 (0.020): 0.062*"nudity (full frontal)" + 0.061*"drama" + 0.035*"Drama" + 0.034*"philip k. dick" + 0.028*"vietnam war" + 0.027*"rape" + 0.025*"vietnam" + 0.022*"very good" + 0.020*"to see" + 0.016*"slow"
2022-12-10 20:31:04,879 : INFO : topic diff=0.046241, rho=0.175844
2022-12-10 20:31:04,882 : INFO : PROGRESS: pass 26, at document #8000/10681
2022-12-10 20:31:05,164 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:31:05,205 : INFO : topic #2 (0.020): 0.287*"Documentary" + 0.104*"to see" + 0.064*"documentary" + 0.026*"mockumentary" + 0.025*"sexuality" + 0.023*"star trek" + 0.021*"dogs" + 0.021*"kevin smith" + 0.018*"movie to see" + 0.017*"in netflix queue"
2022

2022-12-10 20:31:06,644 : INFO : topic diff=0.023122, rho=0.173186
2022-12-10 20:31:06,650 : INFO : PROGRESS: pass 27, at document #6000/10681
2022-12-10 20:31:07,001 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:31:07,037 : INFO : topic #18 (0.020): 0.081*"magic" + 0.079*"gay" + 0.062*"racism" + 0.044*"pg13" + 0.040*"Drama" + 0.027*"food" + 0.027*"social commentary" + 0.023*"homosexuality" + 0.020*"19th century" + 0.019*"denzel washington"
2022-12-10 20:31:07,038 : INFO : topic #32 (0.020): 0.135*"johnny depp" + 0.067*"clint eastwood" + 0.059*"vhs" + 0.045*"jackie chan" + 0.037*"western" + 0.031*"kung fu" + 0.031*"leonardo dicaprio" + 0.030*"spaghetti western" + 0.029*"propaganda" + 0.029*"david lynch"
2022-12-10 20:31:07,039 : INFO : topic #26 (0.020): 0.042*"Drama" + 0.035*"james bond" + 0.024*"007" + 0.023*"bond" + 0.021*"mel gibson" + 0.021*"reflective" + 0.020*"atmospheric" + 0.019*"poignant" + 0.018*"lyrical" + 0.017*"bittersweet"
202

2022-12-10 20:31:09,001 : INFO : topic #7 (0.020): 0.113*"romance" + 0.065*"Romance" + 0.054*"chick flick" + 0.038*"boring" + 0.030*"girlie movie" + 0.027*"baseball" + 0.023*"Comedy" + 0.020*"love story" + 0.019*"whimsical" + 0.017*"wedding"
2022-12-10 20:31:09,002 : INFO : topic #5 (0.020): 0.292*"classic" + 0.062*"musical" + 0.045*"afi 100 (laughs)" + 0.034*"Musical" + 0.030*"national film registry" + 0.021*"afi 100" + 0.016*"70mm" + 0.015*"adapted from b'way" + 0.015*"john travolta" + 0.015*"breakthroughs"
2022-12-10 20:31:09,003 : INFO : topic #31 (0.020): 0.127*"zombies" + 0.074*"horror" + 0.072*"pg-13" + 0.039*"cult classic" + 0.025*"infidelity" + 0.025*"campy" + 0.024*"zombie" + 0.024*"joaquin phoenix" + 0.024*"sam raimi" + 0.020*"books"
2022-12-10 20:31:09,006 : INFO : topic diff=0.022572, rho=0.170646
2022-12-10 20:31:09,008 : INFO : PROGRESS: pass 28, at document #6000/10681
2022-12-10 20:31:09,254 : INFO : merging changes from 2000 documents into a model of 10681 documents
2

2022-12-10 20:31:10,473 : INFO : topic #17 (0.020): 0.330*"Sci-Fi" + 0.071*"Horror" + 0.068*"Action" + 0.044*"video game adaptation" + 0.027*"animals" + 0.025*"movie to see" + 0.022*"football" + 0.018*"futuristic" + 0.015*"milla jovovich" + 0.013*"g"
2022-12-10 20:31:10,474 : INFO : topic #34 (0.020): 0.074*"ghosts" + 0.051*"bill murray" + 0.044*"television" + 0.030*"courtroom" + 0.030*"courtroom drama" + 0.026*"secret service" + 0.021*"roman polanski" + 0.017*"18th century" + 0.017*"opera" + 0.017*"court"
2022-12-10 20:31:10,476 : INFO : topic diff=0.124178, rho=0.168215
2022-12-10 20:31:10,477 : INFO : PROGRESS: pass 29, at document #4000/10681
2022-12-10 20:31:10,741 : INFO : merging changes from 2000 documents into a model of 10681 documents
2022-12-10 20:31:10,773 : INFO : topic #31 (0.020): 0.127*"zombies" + 0.074*"horror" + 0.072*"pg-13" + 0.039*"cult classic" + 0.025*"infidelity" + 0.025*"campy" + 0.024*"zombie" + 0.024*"joaquin phoenix" + 0.024*"sam raimi" + 0.020*"books"
2022

2022-12-10 20:31:11,899 : INFO : LdaModel lifecycle event {'msg': 'trained LdaModel<num_terms=15261, num_topics=50, decay=0.5, chunksize=2000> in 68.59s', 'datetime': '2022-12-10T20:31:11.899611', 'gensim': '4.2.0', 'python': '3.9.15 (main, Nov 24 2022, 14:39:17) [MSC v.1916 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.19045-SP0', 'event': 'created'}


In [5]:
#  評価
metric_calculator = MetricCalculator()
metrics = metric_calculator.calc(
    movielens.test.rating.tolist(), recommend_result.rating.tolist(),
    movielens.test_user2items, recommend_result.user2items, k=10)
print(metrics)

rmse=0.000, Precision@K=0.004, Recall@K=0.012
