In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("../data/animes_data.csv")

In [3]:
df.columns

Index(['anime_id', 'title', 'main_picture', 'ja_title', 'synopsis', 'genres',
       'media', 'episodes', 'rating', 'members', 'start_date', 'season',
       'source', 'studios'],
      dtype='object')

In [4]:
df = df.drop(columns = ['title', 'main_picture', 'synopsis','episodes', 'rating', 'media', 'members','start_date', 'season',
       'source', 'studios'])

In [5]:
df.head(10)

Unnamed: 0,anime_id,ja_title,genres
0,1,カウボーイビバップ,"Action,Adult Cast,Award Winning,Sci-Fi,Space"
1,5,カウボーイビバップ 天国の扉,"Action,Adult Cast,Sci-Fi,Space"
2,6,トライガン,"Action,Adult Cast,Adventure,Sci-Fi,Shounen"
3,7,Witch Hunter ROBIN (ウイッチハンターロビン),"Action,Detective,Drama,Mystery,Supernatural"
4,15,アイシールド21,"Shounen,Sports,Team Sports"
5,16,ハチミツとクローバー,"Adult Cast,Comedy,Drama,Josei,Love Polygon,Rom..."
6,17,ハングリーハート Wild Striker,"Comedy,Shounen,Slice of Life,Sports,Team Sports"
7,18,頭文字〈イニシャル〉D FOURTH STAGE,"Action,Drama,Racing,Seinen"
8,19,モンスター,"Adult Cast,Drama,Mystery,Psychological,Seinen,..."
9,20,ナルト,"Action,Adventure,Fantasy,Martial Arts,Shounen"


In [6]:
genres = df['genres'].map(lambda x: x.split(',')).to_list()
genre_col = list()
for i in genres:
    genre_col.extend(i)
genre_col = list(set(genre_col))

#ジャンル名のカラムの長さを確認
print(len(genre_col))

76


In [7]:
#One-Hot
rows = []
for index, row in enumerate(genres):
    row_list = np.array([0] * len(genre_col))
    index_list = [genre_col.index(item) for item in row]
    row_list[index_list] = 1
    rows.append(list(row_list))
genre_df = pd.DataFrame(rows, columns = genre_col)
one_hot_data = pd.concat([df, genre_df], axis= 1) #横結合

In [8]:
genre_df.head()

Unnamed: 0,Action,School,Military,Performing Arts,Sci-Fi,Kids,Visual Arts,Slice of Life,Mecha,Ecchi,...,Samurai,Josei,Reincarnation,Team Sports,Shoujo,Combat Sports,Shounen,Comedy,Idols (Female),Drama
0,1,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,1,0,0,0


In [9]:
#one-hot表現の部分で配列を作成
item_vectors = np.array(one_hot_data[genre_col])

#行ごとのベクトルノルム
norm = np.matrix(np.linalg.norm(item_vectors, axis=1))

#コサイン類似度の式を使って類似度行列を作成
sim_mat = np.array(np.dot(item_vectors, item_vectors.T)/np.dot(norm.T, norm))

In [10]:
itemindex = dict()
for num, item_id in enumerate(one_hot_data.anime_id):
    itemindex[item_id] = num

In [19]:
#ここから推薦システムに入力

In [11]:
#anime_idを指定してindexを検索、row_numに格納
row_num = itemindex[31490] #idを入れることでindexを取得

#類似度行列のrow_numの列でのトップ10を抽出
top10_index = np.argsort(sim_mat[row_num])[::-1][1:11]

top10_index

array([1369, 1027,  108, 1076,  107,  106,  105, 3534, 1220, 1254])

In [12]:
rec_id = list()
for search_index in top10_index:
    for anime_id, index in itemindex.items():
        if index == search_index:
            rec_id.append(anime_id)
rec_id

[5252, 2472, 139, 2889, 138, 137, 136, 34577, 3848, 4155]

In [13]:
#df.query("anime_id == [5252, 2472, 139, 2889, 138, 137, 136, 34577, 3848, 4155]")
df[df["anime_id"].isin(rec_id)]

Unnamed: 0,anime_id,ja_title,genres
105,136,HUNTER×HUNTER（ハンター×ハンター）,"Action,Adventure,Fantasy,Shounen"
106,137,HUNTER×HUNTER Original Video Animation,"Action,Adventure,Fantasy,Shounen"
107,138,HUNTER×HUNTER Greed Island,"Action,Adventure,Fantasy,Shounen"
108,139,HUNTER×HUNTER G・I Final,"Action,Adventure,Fantasy,Shounen"
1027,2472,劇場版NARUTO -ナルト- 疾風伝,"Action,Adventure,Fantasy,Shounen"
1076,2889,劇場版 BLEACH The DiamondDust Rebellion もう一つの氷輪丸,"Action,Adventure,Fantasy,Shounen"
1220,3848,ＯＮＥ ＰＩＥＣＥ（ワンピース） エピソードオブチョッパー＋冬に咲く、奇跡の桜,"Action,Adventure,Fantasy,Shounen"
1254,4155,ワンピース　フィルム　ストロングワールド,"Action,Adventure,Fantasy,Shounen"
1369,5252,ワンピース ロマンスドーンストーリー,"Action,Adventure,Fantasy,Shounen"
3534,34577,七つの大罪 戒めの復活,"Action,Adventure,Fantasy,Shounen"


In [14]:
df[df['ja_title'].str.contains('ワンピース')]

Unnamed: 0,anime_id,ja_title,genres
338,460,ワンピース ねじまき島の冒険,"Action,Adventure,Fantasy,Shounen"
339,461,ワンピース 珍獣島のチョッパー王国,"Action,Adventure,Fantasy,Shounen"
340,462,ワンピース デッドエンドの冒険,"Action,Adventure,Fantasy,Shounen"
341,463,ワンピース呪われた聖剣,"Action,Adventure,Fantasy,Shounen"
342,464,ワンピース オマツリ男爵と秘密の島,"Action,Adventure,Drama,Fantasy,Shounen"
343,465,ワンピース THE MOVIE カラクリ城のメカ巨兵,"Action,Adventure,Fantasy,Shounen"
344,466,ワンピース 倒せ！海賊ギャンザック,"Action,Adventure,Fantasy,Shounen"
666,1094,ワンピース ルフィ落下!秘境・海のヘソの大冒険,"Action,Adventure,Fantasy,Shounen"
714,1237,ワンピース 大海原にひらけ! でっかいでっカイ父の夢!,"Action,Adventure,Fantasy,Shounen"
715,1238,ワンピース守れ! 最後の大舞台,"Action,Adventure,Fantasy,Shounen"


In [15]:
#ランダム推薦
import random
a = [random.randint(0, 4973) for _ in range(0, 10)]

In [16]:
df.iloc[a]

Unnamed: 0,anime_id,ja_title,genres
154,195,おねがい☆ティーチャー,"Drama,Romance,School,Sci-Fi"
129,164,もののけ姫,"Action,Adventure,Award Winning,Fantasy"
673,1110,マーメイドメロディーぴちぴちピッチピュア,"Adventure,Comedy,Mahou Shoujo,Music,Romance,Sh..."
484,755,僕のセクシャルハラスメント,"Boys Love,Hentai,Workplace"
4192,39417,グランベルム,"Action,Fantasy,Mahou Shoujo,Mecha"
2752,24641,ばくあね 弟しぼっちゃうぞ! THE ANIMATION,Hentai
3072,31297,東京喰種 トーキョーグール【PINTO】,"Action,Fantasy,Gore,Horror,Psychological,Seinen"
2026,11889,アイドルマスター 765プロという物語,"Comedy,Drama,Idols (Female),Music"
2393,17901,All Alone With You,"Fantasy,Music"
1464,6203,ささめきこと,"Comedy,Girls Love,Romance,School,Seinen"


In [None]:
#アイテムベース推薦