In [1]:
import pandas as pd
import numpy as np 
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

In [2]:
ratings = pd.read_csv('../data/ratings.csv')
anime = pd.read_csv('../data/animes_data.csv')

In [5]:
# animeとratingsの2つのデータフレームをマージさせる
mergeddf = ratings.merge(anime, left_on = 'anime_id', right_on = 'anime_id', suffixes= ['_user', ''])
 
# 合体したデータフレームの最初の5行を表示
mergeddf.head()

Unnamed: 0,user_id,anime_id,rating_user,title,main_picture,ja_title,synopsis,genres,media,episodes,rating,members,start_date,season,source,studios
0,1,454,3,.hack//Gift,https://api-cdn.myanimelist.net/images/anime/2...,.hack//GIFT,As an expression of gratitude for the heroes o...,"Comedy,Fantasy,Video Game",ova,1,6.1,20534,2003-11-16,fall_2003,original,Bee Train
1,63,454,5,.hack//Gift,https://api-cdn.myanimelist.net/images/anime/2...,.hack//GIFT,As an expression of gratitude for the heroes o...,"Comedy,Fantasy,Video Game",ova,1,6.1,20534,2003-11-16,fall_2003,original,Bee Train
2,117,454,0,.hack//Gift,https://api-cdn.myanimelist.net/images/anime/2...,.hack//GIFT,As an expression of gratitude for the heroes o...,"Comedy,Fantasy,Video Game",ova,1,6.1,20534,2003-11-16,fall_2003,original,Bee Train
3,174,454,4,.hack//Gift,https://api-cdn.myanimelist.net/images/anime/2...,.hack//GIFT,As an expression of gratitude for the heroes o...,"Comedy,Fantasy,Video Game",ova,1,6.1,20534,2003-11-16,fall_2003,original,Bee Train
4,252,454,10,.hack//Gift,https://api-cdn.myanimelist.net/images/anime/2...,.hack//GIFT,As an expression of gratitude for the heroes o...,"Comedy,Fantasy,Video Game",ova,1,6.1,20534,2003-11-16,fall_2003,original,Bee Train


In [6]:
mergeddf = mergeddf[['user_id','ja_title','rating_user']]
mergeddf = mergeddf.drop_duplicates(['user_id', 'ja_title'])

In [7]:
mergeddf.head()

Unnamed: 0,user_id,ja_title,rating_user
0,1,.hack//GIFT,3
1,63,.hack//GIFT,5
2,117,.hack//GIFT,0
3,174,.hack//GIFT,4
4,252,.hack//GIFT,10


In [8]:
anime_pivot = mergeddf.pivot(index= 'ja_title',columns='user_id',values='rating_user').fillna(0)
anime_pivot_sparse = csr_matrix(anime_pivot.values)

In [9]:
anime_pivot.head()

user_id,1,2,3,4,5,6,7,8,10,11,...,108013,108014,108015,108017,108018,108019,108020,108021,108023,108024
ja_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-クラナド-　もうひとつの世界　智代編,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0
-シーキューブ- 林間学校こんふゅーじょん!,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
-ナルト- 疾風伝,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0
.HACK//G.U. RETURNER,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
.hack//G.U. Trilogy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
# Scikit-learnのライブラリを利用します
# n_neiborsやalgorithm、metricなど重要なアーギュメントを設定
knn = NearestNeighbors(n_neighbors=9,algorithm= 'brute', metric= 'cosine')

# 前処理したデータセットでモデルを訓練
model_knn = knn.fit(anime_pivot_sparse)

In [14]:
anime[anime['ja_title'].str.contains('けいおん!')]

Unnamed: 0,anime_id,title,main_picture,ja_title,synopsis,genres,media,episodes,rating,members,start_date,season,source,studios
1406,5680,K-On!,https://api-cdn.myanimelist.net/images/anime/1...,けいおん!,A fresh high school year always means much to ...,"CGDCT,Comedy,Music,School",tv,13,7.85,1011863,2009-04-03,spring_2009,4_koma_manga,Kyoto Animation
1532,6862,K-On!: Live House!,https://api-cdn.myanimelist.net/images/anime/9...,けいおん! ライブハウス!,"It is almost the end of the year, and Houkago ...","CGDCT,Comedy,Music,School",special,1,7.84,155456,2010-01-19,winter_2010,4_koma_manga,Kyoto Animation
1616,7791,K-On!!,https://cdn.myanimelist.net/images/anime/12/76...,けいおん!!,"It is the new year, which means that the senio...","Award Winning,CGDCT,Comedy,Music,School",tv,26,8.17,665824,2010-04-07,spring_2010,4_koma_manga,Kyoto Animation
1770,9734,K-On!!: Keikaku!,https://cdn.myanimelist.net/images/anime/7/269...,けいおん!! 計画!,"The summer holidays are coming to an end, but ...","CGDCT,Comedy,Music,School,Slice of Life",special,1,7.85,121522,2011-03-16,winter_2011,original,Kyoto Animation


In [36]:
Anime = 'けいおん!'
distance, indice = model_knn.kneighbors(anime_pivot.iloc[anime_pivot.index== Anime].values.reshape(1,-1),n_neighbors=11)
for i in range(0, len(distance.flatten())):
    if i == 0:
        print('Recommendations if you like the anime 「{0}」:\n'.format(anime_pivot[anime_pivot.index== Anime].index[0]))
    else:
        print('{0}: {1} with distance: {2}'.format(i,anime_pivot.index[indice.flatten()[i]],distance.flatten()[i]))

Recommendations if you like the anime 「けいおん!」:

1: けいおん!! with distance: 0.23370186597779508
2: 映画 けいおん！ with distance: 0.43791396460107757
3: けいおん! ライブハウス! with distance: 0.5030632650349689
4: 涼宮ハルヒの憂鬱 with distance: 0.5085741307115972
5: とらドラ！ with distance: 0.5236105390952059
6: らき☆すた with distance: 0.5268155619455077
7: CLANNAD with distance: 0.5330354033158567
8: Angel Beats!（エンジェルビーツ!） with distance: 0.5357501396934233
9: 俺の妹がこんなに可愛いわけがない with distance: 0.5486100987133172
10: 中二病でも恋がしたい! with distance: 0.550061330799839
