<a href="https://colab.research.google.com/github/momokaakiyama/cinema/blob/main/chapter5/colab/LDA_content.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/oreilly-japan/RecommenderSystems/blob/main/chapter5/colab/LDA_content.ipynb)

# Latent Dirichlet Allocation (LDA)(内容ベースフィルタリング)

In [1]:
# Colab用のnotebookです。このnotebook1枚でデータのダウンロードから、レコメンドまで完結するようになっています。（予測評価は含めていません。）
# MovieLensデータがまだダウンロードされてなければこのセルを実行して、ダウンロードしてください
# MovieLensデータの分析は、data_download.ipynbをご参照ください

#1 データのダウンロードと解凍
!wget -nc --no-check-certificate https://files.grouplens.org/datasets/movielens/ml-10m.zip -P ../data
!unzip -n ../data/ml-10m.zip -d ../data/

--2026-02-08 01:37:03--  https://files.grouplens.org/datasets/movielens/ml-10m.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.96.204
Connecting to files.grouplens.org (files.grouplens.org)|128.101.96.204|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 65566137 (63M) [application/zip]
Saving to: ‘../data/ml-10m.zip’


2026-02-08 01:37:07 (22.1 MB/s) - ‘../data/ml-10m.zip’ saved [65566137/65566137]

Archive:  ../data/ml-10m.zip
   creating: ../data/ml-10M100K/
  inflating: ../data/ml-10M100K/allbut.pl  
  inflating: ../data/ml-10M100K/movies.dat  
  inflating: ../data/ml-10M100K/ratings.dat  
  inflating: ../data/ml-10M100K/README.html  
  inflating: ../data/ml-10M100K/split_ratings.sh  
  inflating: ../data/ml-10M100K/tags.dat  


In [2]:
#2 Movielensのデータの読み込み（データ量が多いため、読み込みに時間がかかる場合があります）
import pandas as pd

# movieIDとタイトル名のみ使用
m_cols = ['movie_id', 'title', 'genre']
movies = pd.read_csv('../data/ml-10M100K/movies.dat', names=m_cols, sep='::' , encoding='latin-1', engine='python')

# genreをlist形式で保持する
movies['genre'] = movies.genre.apply(lambda x:x.split('|'))


# ユーザが付与した映画のタグ情報の読み込み
t_cols = ['user_id', 'movie_id', 'tag', 'timestamp']
user_tagged_movies = pd.read_csv('../data/ml-10M100K/tags.dat', names=t_cols, sep='::', engine='python')

# tagを小文字にする
user_tagged_movies['tag'] = user_tagged_movies['tag'].str.lower()


# tagを映画ごとにlist形式で保持する
movie_tags = user_tagged_movies.groupby('movie_id').agg({'tag':list})

# タグ情報を結合する
movies = movies.merge(movie_tags, on='movie_id', how='left')

# 評価値データの読み込み
r_cols = ['user_id', 'movie_id', 'rating', 'timestamp']
ratings = pd.read_csv('../data/ml-10M100K/ratings.dat', names=r_cols, sep='::', engine='python')


# データ量が多いため、ユーザー数を1000に絞って、試していく
valid_user_ids = sorted(ratings.user_id.unique())[:1000]
ratings = ratings[ratings["user_id"].isin(valid_user_ids)]


# 映画のデータと評価のデータを結合する
movielens = ratings.merge(movies, on='movie_id')

print(f'unique_users={len(movielens.user_id.unique())}, unique_movies={len(movielens.movie_id.unique())}')

# 学習用とテスト用にデータを分割する
# 各ユーザの直近の５件の映画を評価用に使い、それ以外を学習用とする
# まずは、それぞれのユーザが評価した映画の順序を計算する
# 直近付与した映画から順番を付与していく(1始まり)

movielens['timestamp_rank'] = movielens.groupby(
    'user_id')['timestamp'].rank(ascending=False, method='first')
movielens_train = movielens[movielens['timestamp_rank'] > 5]
movielens_test = movielens[movielens['timestamp_rank']<= 5]

unique_users=1000, unique_movies=6736


In [3]:
#3 LDAの学習に必要なパラメタを定義
# 因子数(トピック数)
factors = 50
# エポック数
n_epochs = 30

In [4]:
movie_content = movies.copy()
# tagが付与されていない映画もあるが、genreはすべての映画に付与されている
# tagとgenreを結合したものを映画のコンテンツ情報として似ている映画を探して推薦していく
# tagがない映画に関しては、NaNになっているので、空のリストに変換してから処理をする
movie_content["tag_genre"] = movie_content["tag"].fillna("").apply(list) + movie_content["genre"].apply(list)
movie_content["tag_genre"] = movie_content["tag_genre"].apply(lambda x: list(map(str, x)))

#4 タグとジャンルをあわせたリストデータを作成
tag_genre_data = movie_content.tag_genre.tolist()

In [5]:
#5 ColabのPythonにgensimをインストールする
import sys
!{sys.executable} -m pip install gensim

Collecting gensim
  Downloading gensim-4.4.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (8.4 kB)
Downloading gensim-4.4.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (27.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.9/27.9 MB[0m [31m65.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: gensim
Successfully installed gensim-4.4.0


In [6]:
from gensim.corpora.dictionary import Dictionary

#6 LDAのインプットに使えるデータに変換する
common_dictionary = Dictionary(tag_genre_data)
common_corpus = [common_dictionary.doc2bow(text) for text in tag_genre_data]

In [7]:
import gensim

#7 LDAの学習（50個のトピックに分割する）
lda_model = gensim.models.LdaModel(
    common_corpus, id2word=common_dictionary, num_topics=factors, passes=n_epochs
)

In [8]:
# (参考)topic0の単語一覧（LDAが見つけた潜在トピックを構成する単語を確認）
for token_id, score in lda_model.get_topic_terms(0, topn=10):
    word = common_dictionary.id2token[token_id]
    print(f'word={word}, score={score}')

word=christmas, score=0.08566116541624069
word=hayao miyazaki, score=0.041716646403074265
word=paris, score=0.04064499959349632
word=Comedy, score=0.0354841984808445
word=movie to see, score=0.029465563595294952
word=adam sandler, score=0.0294365044683218
word=IMAX, score=0.028592832386493683
word=torture, score=0.023575806990265846
word=dvd, score=0.01721859723329544
word=feel-good, score=0.01699959859251976


In [9]:
# (参考)サムライという単語のトピック(各トピックの所属確率)
lda_model[common_dictionary.doc2bow(['samurai'])]

[(0, np.float32(0.010000044)),
 (1, np.float32(0.010000044)),
 (2, np.float32(0.010000044)),
 (3, np.float32(0.010000044)),
 (4, np.float32(0.010000044)),
 (5, np.float32(0.010000044)),
 (6, np.float32(0.010000044)),
 (7, np.float32(0.010000044)),
 (8, np.float32(0.010000044)),
 (9, np.float32(0.010000044)),
 (10, np.float32(0.010000044)),
 (11, np.float32(0.010000044)),
 (12, np.float32(0.010000044)),
 (13, np.float32(0.010000044)),
 (14, np.float32(0.010000044)),
 (15, np.float32(0.010000044)),
 (16, np.float32(0.010000044)),
 (17, np.float32(0.010000044)),
 (18, np.float32(0.010000044)),
 (19, np.float32(0.010000044)),
 (20, np.float32(0.010000044)),
 (21, np.float32(0.010000044)),
 (22, np.float32(0.010000044)),
 (23, np.float32(0.010000044)),
 (24, np.float32(0.010000044)),
 (25, np.float32(0.010000044)),
 (26, np.float32(0.010000044)),
 (27, np.float32(0.010000044)),
 (28, np.float32(0.010000044)),
 (29, np.float32(0.010000044)),
 (30, np.float32(0.010000044)),
 (31, np.float32(0

In [10]:
#8 各映画のトピックを格納
lda_topics = lda_model[common_corpus]

#9 各映画に最も確率の高いトピックを１つ取り出し格納していく
movie_topics = []
movie_topic_scores = []
for movie_index, lda_topic in enumerate(lda_topics):
    sorted_topic = sorted(lda_topics[movie_index], key=lambda x: -x[1])
    # 最も確率の高いトピック
    movie_topic, topic_score = sorted_topic[0]
    movie_topics.append(movie_topic)
    movie_topic_scores.append(topic_score)
movie_content["topic"] = movie_topics
movie_content["topic_score"] = movie_topic_scores
movie_content

Unnamed: 0,movie_id,title,genre,tag,tag_genre,topic,topic_score
0,1,Toy Story (1995),"[Adventure, Animation, Children, Comedy, Fantasy]","[pixar, pixar, pixar, animation, pixar, animat...","[pixar, pixar, pixar, animation, pixar, animat...",4,0.696643
1,2,Jumanji (1995),"[Adventure, Children, Fantasy]","[for children, game, animals, joe johnston, ro...","[for children, game, animals, joe johnston, ro...",33,0.349239
2,3,Grumpier Old Men (1995),"[Comedy, Romance]","[funniest movies, comedinha de velhinhos engra...","[funniest movies, comedinha de velhinhos engra...",17,0.601216
3,4,Waiting to Exhale (1995),"[Comedy, Drama, Romance]",[girl movie],"[girl movie, Comedy, Drama, Romance]",29,0.755000
4,5,Father of the Bride Part II (1995),[Comedy],"[steve martin, pregnancy, remake, steve martin...","[steve martin, pregnancy, remake, steve martin...",41,0.453622
...,...,...,...,...,...,...,...
10676,65088,Bedtime Stories (2008),"[Adventure, Children, Comedy]",,"[Adventure, Children, Comedy]",33,0.755000
10677,65091,Manhattan Melodrama (1934),"[Crime, Drama, Romance]",,"[Crime, Drama, Romance]",29,0.390923
10678,65126,Choke (2008),"[Comedy, Drama]","[chuck palahniuk, based on book]","[chuck palahniuk, based on book, Comedy, Drama]",36,0.390100
10679,65130,Revolutionary Road (2008),"[Drama, Romance]",[toplist08],"[toplist08, Drama, Romance]",29,0.673333


In [11]:
from collections import defaultdict
from collections import Counter

#10 各ユーザーのレコメンドリストを作成していく
# ユーザーが高く評価した映画が、どのトピックに所属していることが多いかをカウントする
# 一番多いトピックをユーザーの好きなトピックとみなして、そのトピックの映画をおすすめする

movielens_train_high_rating = movielens_train[movielens_train.rating >= 4]
user_evaluated_movies = movielens_train.groupby("user_id").agg({"movie_id": list})["movie_id"].to_dict()

movie_id2index = dict(zip(movie_content.movie_id.tolist(), range(len(movie_content))))
pred_user2items = defaultdict(list)
for user_id, data in movielens_train_high_rating.groupby("user_id"):
    # ユーザーが高く評価した映画
    evaluated_movie_ids = user_evaluated_movies[user_id]
    # 直近閲覧した映画を取得
    movie_ids = data.sort_values("timestamp")["movie_id"].tolist()[-10:]

    movie_indexes = [movie_id2index[id] for id in movie_ids]

    # 直近閲覧した映画のトピックを取得して、出現回数をカウントする
    topic_counter = Counter([movie_topics[i] for i in movie_indexes])
    # 一番出現回数が多かったトピックを取得
    frequent_topic = topic_counter.most_common(1)[0][0]
    # そのトピックの映画の中でもスコアが高いものをおすすめする
    topic_movies = (
        movie_content[movie_content.topic == frequent_topic]
        .sort_values("topic_score", ascending=False)
        .movie_id.tolist()
    )

    for movie_id in topic_movies:
        if movie_id not in evaluated_movie_ids:
            pred_user2items[user_id].append(movie_id)
        if len(pred_user2items[user_id]) == 10:
            break

In [12]:
#11 User 2 が高く評価した映画と、それらに割り当てられたトピック
user2_high_rated_movies = movielens_train_high_rating[movielens_train_high_rating.user_id==2]
display(user2_high_rated_movies.merge(movie_content[['movie_id', 'topic', 'topic_score']], on='movie_id', how='left'))

Unnamed: 0,user_id,movie_id,rating,timestamp,title,genre,tag,timestamp_rank,topic,topic_score
0,2,110,5.0,868245777,Braveheart (1995),"[Action, Drama, War]","[bullshit history, medieval, bloodshed, hero, ...",8.0,24,0.638021
1,2,260,5.0,868244562,Star Wars: Episode IV - A New Hope (a.k.a. Sta...,"[Action, Adventure, Sci-Fi]","[desert, quotable, lucas, gfei own it, seen mo...",17.0,49,0.510658
2,2,590,5.0,868245608,Dances with Wolves (1990),"[Adventure, Drama, Western]","[afi 100, lame, native, biopic, american india...",11.0,24,0.782918
3,2,1210,4.0,868245644,Star Wars: Episode VI - Return of the Jedi (1983),"[Action, Adventure, Sci-Fi]","[desert, fantasy, sci-fi, space, lucas, gfei o...",10.0,49,0.552832


In [13]:
#12 上記の結果から、user_id=2はtopic47の映画が一番多く評価しているため、システムはtopic49に属する未評価の映画を推薦する。
#User2に推薦された映画と、それらに割り当てられたトピック
user2_recommended_movie_ids = pred_user2items[2]
user2_recommended_movies = movies[movies.movie_id.isin(user2_recommended_movie_ids)]
display(user2_recommended_movies.merge(movie_content[['movie_id', 'topic', 'topic_score']], on='movie_id', how='left').sort_values('topic_score', ascending=False))

Unnamed: 0,movie_id,title,genre,tag,topic,topic_score
8,6296,"Mighty Wind, A (2003)","[Comedy, Drama, Musical]","[folk music, christopher guest, mockumentary, ...",49,0.662262
5,1923,There's Something About Mary (1998),"[Comedy, Romance]","[dumbest movie ever, not a very inteligent com...",49,0.618537
1,1288,This Is Spinal Tap (1984),"[Comedy, Musical]","[directorial debut, music business, christophe...",49,0.528249
0,1196,Star Wars: Episode V - The Empire Strikes Back...,"[Action, Adventure, Sci-Fi]","[lucas, george lucas, george lucas, gfei own i...",49,0.487119
7,2628,Star Wars: Episode I - The Phantom Menace (1999),"[Action, Adventure, Sci-Fi]","[far future, lucas, fantasy, george lucas, luc...",49,0.462712
9,33493,Star Wars: Episode III - Revenge of the Sith (...,"[Action, Adventure, Fantasy, Sci-Fi]","[lucas, divx, space, space opera, space, on co...",49,0.461033
6,2193,Willow (1988),"[Action, Adventure, Fantasy]","[fantasy, middle earth, ron howard, val kilmer...",49,0.458228
3,1449,Waiting for Guffman (1996),[Comedy],"[christopher guest, hysterical, mockumentary, ...",49,0.438035
2,1296,"Room with a View, A (1986)","[Comedy, Drama, Romance]","[seen 2008, e. m. forster, e. m. forster, bibl...",49,0.429506
4,1894,Six Days Seven Nights (1998),"[Adventure, Comedy, Romance]","[leading man too old., mediocre, vbnbvn, harri...",49,0.429128


このように、推薦された映画の多くも`topic`49に属していることが確認できる。これが、コンテンツベースレコメンデーションの動作原理である。ユーザーが高く評価したコンテンツの主題（トピック）を特定し、その主題に合致する新しいコンテンツを推薦している。

In [14]:
#13 今回作成したuser_id=-1のデータにtopicをマージした
import pandas as pd

user_ratings_df = pd.read_csv('/content/myselfdata.csv')
display(user_ratings_df.merge(movie_content[['movie_id', 'topic', 'topic_score']], on='movie_id', how='left'))

Unnamed: 0,user_id,movie_id,rating,timestamp,title,genre,tag,timestamp_rank,topic,topic_score
0,-1,4896,5.0,1770030000.0,Harry Potter and the Sorcerer's Stone (a.k.a. ...,"['Adventure', 'Children', 'Fantasy']","['based on a book', 'franchise', 'adventure', ...",1,49,0.266884
1,-1,5816,5.0,1770030000.0,Harry Potter and the Chamber of Secrets (2002),"['Adventure', 'Children', 'Fantasy']","['based on a book', 'franchise', 'harry potter...",2,49,0.317388
2,-1,8368,4.0,1770030000.0,Harry Potter and the Prisoner of Azkaban (2004),"['Adventure', 'Children', 'Fantasy']","['based on a book', 'franchise', 'magic', 'har...",3,48,0.256361
3,-1,40815,4.5,1770030000.0,Harry Potter and the Goblet of Fire (2005),"['Adventure', 'Fantasy', 'Thriller']","['based on a book', 'big budget', 'franchise',...",4,48,0.281685
4,-1,54001,0.5,1770030000.0,Harry Potter and the Order of the Phoenix (2007),"['Adventure', 'Drama', 'Fantasy', 'IMAX']","['franchise', 'too short', 'alan rickman', 'br...",5,29,0.207292
5,-1,5971,3.0,1770030000.0,My Neighbor Totoro (Tonari no Totoro) (1988),"['Animation', 'Children', 'Fantasy']","['anime', 'miyazaki', 'anime', 'hayao miyazaki...",6,27,0.417354
6,-1,5618,5.0,1770030000.0,Spirited Away (Sen to Chihiro no kamikakushi) ...,"['Adventure', 'Animation', 'Children', 'Fantasy']","['anime', ""holy christ it's fantastic in every...",7,27,0.355508
7,-1,1022,4.5,1770030000.0,Cinderella (1950),"['Animation', 'Children', 'Fantasy', 'Musical'...","['disney', 'disney', 'library vhs', 'fairy tal...",8,4,0.392563
8,-1,2125,3.0,1770030000.0,Ever After: A Cinderella Story (1998),"['Comedy', 'Drama', 'Fantasy', 'Romance']","['definitely for girls', 'lovely', 'xvgb', 'fu...",9,16,0.233516
9,-1,63239,5.0,1770030000.0,Cinderella (1997),"['Children', 'Fantasy', 'Musical', 'Romance']",,10,36,0.298633


In [15]:
#14 user=-1のレコメンドリストを作成していく
# ユーザーが高く評価した映画が、どのトピックに所属していることが多いかをカウントする
# 一番多いトピックをユーザーの好きなトピックとみなして、そのトピックの映画をおすすめする

user_ratings_df_high_rated = user_ratings_df[user_ratings_df['rating'] >= 4.0]

movie_id2index = dict(zip(movie_content.movie_id.tolist(), range(len(movie_content))))
movie_indexes = [movie_id2index[id] for id in user_ratings_df_high_rated['movie_id']]
# 直近閲覧した映画のトピックを取得して、出現回数をカウントする
topic_counter = Counter([movie_topics[i] for i in movie_indexes])
# 一番出現回数が多かったトピックを取得
frequent_topic = topic_counter.most_common(1)[0][0]
# そのトピックの映画の中でもスコアが高いものをおすすめする
topic_movies = (
    movie_content[movie_content.topic == frequent_topic]
    .sort_values("topic_score", ascending=False)
    .movie_id.tolist()
)

pred_user2items = defaultdict(list)
for movie_id in topic_movies:
    if movie_id not in evaluated_movie_ids:
        pred_user2items[-1].append(movie_id)

In [22]:
# 15　推薦された映画の詳細データを抽出
#user_id=-1はtopic4の映画を一番多く評価しているため、システムはtopic4に属する未評価の映画を推薦する。
recommended_movies = movie_content[
    movie_content['movie_id'].isin(pred_user2items[-1])
]

display(recommended_movies[['movie_id', 'title', 'genre', 'tag',  'topic','topic_score']])

Unnamed: 0,movie_id,title,genre,tag,topic,topic_score
47,48,Pocahontas (1995),"[Animation, Children, Musical, Romance]","[disney, eric goldberg, mike gabriel, ø§ø­ø³ø§...",4,0.672112
236,239,"Goofy Movie, A (1995)","[Animation, Children, Comedy, Romance]","[disney, katottava, disney animated feature, f...",4,0.507766
325,329,Star Trek: Generations (1994),"[Action, Adventure, Drama, Sci-Fi]","[far future, space, trekie, enterprise, futuri...",4,0.723285
360,364,"Lion King, The (1994)","[Adventure, Animation, Children, Drama, Musical]","[talking animals, disney, disney, educational,...",4,0.695704
415,419,"Beverly Hillbillies, The (1993)",[Comedy],"[hillbillies, tv show, television, oil slick, ...",4,0.303530
...,...,...,...,...,...,...
10489,62336,FLCL (2000),"[Animation, Comedy, Fantasy, Sci-Fi]",[anime],4,0.320041
10516,62764,Black Moon (1975),"[Fantasy, Mystery, Sci-Fi, War]",,4,0.216566
10523,62834,Babylon 5: The Legend of the Rangers: To Live ...,[Sci-Fi],,4,0.510000
10524,62836,Babylon 5: The Lost Tales - Voices in the Dark...,[Sci-Fi],[below r],4,0.510000


In [17]:
#(参考) 上記user_id=-1の学習データで、4以上の評価を付けた映画一覧
display(user_ratings_df[user_ratings_df['rating'] >= 4.0])

Unnamed: 0,user_id,movie_id,rating,timestamp,title,genre,tag,timestamp_rank
0,-1,4896,5.0,1770030000.0,Harry Potter and the Sorcerer's Stone (a.k.a. ...,"['Adventure', 'Children', 'Fantasy']","['based on a book', 'franchise', 'adventure', ...",1
1,-1,5816,5.0,1770030000.0,Harry Potter and the Chamber of Secrets (2002),"['Adventure', 'Children', 'Fantasy']","['based on a book', 'franchise', 'harry potter...",2
2,-1,8368,4.0,1770030000.0,Harry Potter and the Prisoner of Azkaban (2004),"['Adventure', 'Children', 'Fantasy']","['based on a book', 'franchise', 'magic', 'har...",3
3,-1,40815,4.5,1770030000.0,Harry Potter and the Goblet of Fire (2005),"['Adventure', 'Fantasy', 'Thriller']","['based on a book', 'big budget', 'franchise',...",4
6,-1,5618,5.0,1770030000.0,Spirited Away (Sen to Chihiro no kamikakushi) ...,"['Adventure', 'Animation', 'Children', 'Fantasy']","['anime', ""holy christ it's fantastic in every...",7
7,-1,1022,4.5,1770030000.0,Cinderella (1950),"['Animation', 'Children', 'Fantasy', 'Musical'...","['disney', 'disney', 'library vhs', 'fairy tal...",8
9,-1,63239,5.0,1770030000.0,Cinderella (1997),"['Children', 'Fantasy', 'Musical', 'Romance']",,10
10,-1,594,4.0,1770030000.0,Snow White and the Seven Dwarfs (1937),"['Animation', 'Children', 'Drama', 'Fantasy', ...","['disney', 'classic', 'disney', 'national film...",11
14,-1,595,4.0,1770030000.0,Beauty and the Beast (1991),"['Animation', 'Children', 'Fantasy', 'Musical'...","['fairy tale', 'disney', 'disney', 'disney', '...",15
18,-1,916,5.0,1770030000.0,Roman Holiday (1953),"['Comedy', 'Romance']","[""good in it's day"", 'classic', 'audrey hepbur...",19


In [19]:
# (参考)topic4の単語一覧（LDAが見つけた潜在トピックを構成する単語を確認）
for token_id, score in lda_model.get_topic_terms(4, topn=20):
    word = common_dictionary.id2token[token_id]
    print(f'word={word}, score={score}')

word=Sci-Fi, score=0.1752476841211319
word=disney, score=0.09599000960588455
word=animation, score=0.07868126034736633
word=pixar, score=0.07776305824518204
word=based on a tv show, score=0.05630519986152649
word=Animation, score=0.051646966487169266
word=Children, score=0.021276479586958885
word=disney animated feature, score=0.020379772409796715
word=cartoon, score=0.017029492184519768
word=star trek, score=0.013752744533121586
word=computer animation, score=0.01218960527330637
word=weird, score=0.011850929819047451
word=cgi, score=0.010478068143129349
word=Adventure, score=0.009757015854120255
word=animated, score=0.009209433570504189
word=Fantasy, score=0.009056366048753262
word=erlend's dvds, score=0.008974751457571983
word=awful, score=0.008445936255156994
word=children, score=0.007884478196501732
word=father-son relationship, score=0.007571449503302574
