# Content Filtering

In [1]:
import numpy as np
import pandas as pd

In [2]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
df = pd.DataFrame([
    {'title': 'A', 'genre': 'Pop', 'label': 'PT. A'},
    {'title': 'B', 'genre': 'Keroncong', 'label': 'PT. A'},
    {'title': 'C', 'genre': 'Dangdut', 'label': 'PT. A'},
    {'title': 'D', 'genre': 'Pop', 'label': 'PT. B'},
    {'title': 'E', 'genre': 'Keroncong', 'label': 'PT. B'},
    {'title': 'F', 'genre': 'Dangdut', 'label': 'PT. B'},
    {'title': 'G', 'genre': 'Pop', 'label': 'PT. C'},
    {'title': 'H', 'genre': 'Keroncong', 'label': 'PT. C'},
    {'title': 'I', 'genre': 'Dangdut', 'label': 'PT. C'},
    {'title': 'J', 'genre': 'Pop', 'label': 'PT. C'}])
df

Unnamed: 0,title,genre,label
0,A,Pop,PT. A
1,B,Keroncong,PT. A
2,C,Dangdut,PT. A
3,D,Pop,PT. B
4,E,Keroncong,PT. B
5,F,Dangdut,PT. B
6,G,Pop,PT. C
7,H,Keroncong,PT. C
8,I,Dangdut,PT. C
9,J,Pop,PT. C


In [4]:
ecv = CountVectorizer()
mgenre = ecv.fit_transform(df['genre'])
ecv.get_feature_names()

['dangdut', 'keroncong', 'pop']

In [5]:
mgenre

<10x3 sparse matrix of type '<class 'numpy.int64'>'
	with 10 stored elements in Compressed Sparse Row format>

In [6]:
mgenre.toarray()

array([[0, 0, 1],
       [0, 1, 0],
       [1, 0, 0],
       [0, 0, 1],
       [0, 1, 0],
       [1, 0, 0],
       [0, 0, 1],
       [0, 1, 0],
       [1, 0, 0],
       [0, 0, 1]])

In [7]:
cosScore = cosine_similarity(mgenre)
cosScore

array([[1., 0., 0., 1., 0., 0., 1., 0., 0., 1.],
       [0., 1., 0., 0., 1., 0., 0., 1., 0., 0.],
       [0., 0., 1., 0., 0., 1., 0., 0., 1., 0.],
       [1., 0., 0., 1., 0., 0., 1., 0., 0., 1.],
       [0., 1., 0., 0., 1., 0., 0., 1., 0., 0.],
       [0., 0., 1., 0., 0., 1., 0., 0., 1., 0.],
       [1., 0., 0., 1., 0., 0., 1., 0., 0., 1.],
       [0., 1., 0., 0., 1., 0., 0., 1., 0., 0.],
       [0., 0., 1., 0., 0., 1., 0., 0., 1., 0.],
       [1., 0., 0., 1., 0., 0., 1., 0., 0., 1.]])

In [8]:
cosScore.shape

(10, 10)

In [9]:
produk_mirip = list(enumerate(cosScore[7]))
produk_mirip = sorted(produk_mirip, key=lambda x: x[1], reverse=True)
produk_mirip

[(1, 1.0),
 (4, 1.0),
 (7, 1.0),
 (0, 0.0),
 (2, 0.0),
 (3, 0.0),
 (5, 0.0),
 (6, 0.0),
 (8, 0.0),
 (9, 0.0)]

In [10]:
for i in produk_mirip[:5]:
    print(df.iloc[i[0]])

title            B
genre    Keroncong
label        PT. A
Name: 1, dtype: object
title            E
genre    Keroncong
label        PT. B
Name: 4, dtype: object
title            H
genre    Keroncong
label        PT. C
Name: 7, dtype: object
title        A
genre      Pop
label    PT. A
Name: 0, dtype: object
title          C
genre    Dangdut
label      PT. A
Name: 2, dtype: object


In [11]:
df2 = pd.read_csv('~/Downloads/anime.csv')
df2 = df2.iloc[0:850]
df2.head(20)

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266
5,32935,Haikyuu!!: Karasuno Koukou VS Shiratorizawa Ga...,"Comedy, Drama, School, Shounen, Sports",TV,10,9.15,93351
6,11061,Hunter x Hunter (2011),"Action, Adventure, Shounen, Super Power",TV,148,9.13,425855
7,820,Ginga Eiyuu Densetsu,"Drama, Military, Sci-Fi, Space",OVA,110,9.11,80679
8,15335,Gintama Movie: Kanketsu-hen - Yorozuya yo Eien...,"Action, Comedy, Historical, Parody, Samurai, S...",Movie,1,9.1,72534
9,15417,Gintama&#039;: Enchousen,"Action, Comedy, Historical, Parody, Samurai, S...",TV,13,9.11,81109


In [12]:
len(df2['name'])

850

In [13]:
df2.isnull().sum()

anime_id    0
name        0
genre       0
type        0
episodes    0
rating      0
members     0
dtype: int64

In [14]:
ext = CountVectorizer(tokenizer = lambda x: x.split(', '))
mgenre = ext.fit_transform(df2['genre'])
print(len(ext.get_feature_names()))
print(ext.get_feature_names())

40
['action', 'adventure', 'cars', 'comedy', 'dementia', 'demons', 'drama', 'ecchi', 'fantasy', 'game', 'harem', 'historical', 'horror', 'josei', 'kids', 'magic', 'martial arts', 'mecha', 'military', 'music', 'mystery', 'parody', 'police', 'psychological', 'romance', 'samurai', 'school', 'sci-fi', 'seinen', 'shoujo', 'shoujo ai', 'shounen', 'shounen ai', 'slice of life', 'space', 'sports', 'super power', 'supernatural', 'thriller', 'vampire']


In [15]:
mgenre

<850x40 sparse matrix of type '<class 'numpy.int64'>'
	with 3696 stored elements in Compressed Sparse Row format>

In [16]:
mgenre.toarray()

array([[0, 0, 0, ..., 1, 0, 0],
       [1, 1, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ...,
       [1, 0, 0, ..., 1, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [17]:
cs = cosine_similarity(mgenre)

In [18]:
animesuka = 'Hunter x Hunter (2011)'
indexsuka = df2[df2['name'] == animesuka].index[0]
indexsuka

6

In [19]:
animesama = list(enumerate(cs[indexsuka]))

In [20]:
animesamasortir = sorted(animesama, key = lambda x:x[1], reverse = True)
animesamasortir

[(6, 1.0),
 (112, 1.0),
 (145, 1.0),
 (146, 1.0),
 (202, 1.0),
 (784, 0.8944271909999159),
 (398, 0.8660254037844388),
 (698, 0.8660254037844388),
 (182, 0.8164965809277261),
 (271, 0.8164965809277261),
 (352, 0.8164965809277261),
 (753, 0.8164965809277261),
 (74, 0.7559289460184544),
 (206, 0.7559289460184544),
 (231, 0.7559289460184544),
 (241, 0.7559289460184544),
 (515, 0.7559289460184544),
 (566, 0.7559289460184544),
 (588, 0.7559289460184544),
 (644, 0.7559289460184544),
 (113, 0.75),
 (175, 0.75),
 (304, 0.75),
 (339, 0.75),
 (371, 0.75),
 (513, 0.75),
 (561, 0.75),
 (579, 0.75),
 (629, 0.75),
 (821, 0.75),
 (680, 0.7071067811865475),
 (64, 0.6708203932499369),
 (86, 0.6708203932499369),
 (95, 0.6708203932499369),
 (101, 0.6708203932499369),
 (178, 0.6708203932499369),
 (257, 0.6708203932499369),
 (268, 0.6708203932499369),
 (290, 0.6708203932499369),
 (461, 0.6708203932499369),
 (486, 0.6708203932499369),
 (526, 0.6708203932499369),
 (582, 0.6708203932499369),
 (615, 0.67082039

In [21]:
animesama = list(filter(lambda x:x[1]>0.7, animesama))
list(animesama)

[(6, 1.0),
 (74, 0.7559289460184544),
 (112, 1.0),
 (113, 0.75),
 (145, 1.0),
 (146, 1.0),
 (175, 0.75),
 (182, 0.8164965809277261),
 (202, 1.0),
 (206, 0.7559289460184544),
 (231, 0.7559289460184544),
 (241, 0.7559289460184544),
 (271, 0.8164965809277261),
 (304, 0.75),
 (339, 0.75),
 (352, 0.8164965809277261),
 (371, 0.75),
 (398, 0.8660254037844388),
 (513, 0.75),
 (515, 0.7559289460184544),
 (561, 0.75),
 (566, 0.7559289460184544),
 (579, 0.75),
 (588, 0.7559289460184544),
 (629, 0.75),
 (644, 0.7559289460184544),
 (680, 0.7071067811865475),
 (698, 0.8660254037844388),
 (753, 0.8164965809277261),
 (784, 0.8944271909999159),
 (821, 0.75)]

In [22]:
for i in animesama:
    print(df2.iloc[i[0]]['name'])

Hunter x Hunter (2011)
One Piece
Hunter x Hunter
Noragami Aragoto
Hunter x Hunter OVA
Hunter x Hunter: Greed Island Final
Katekyo Hitman Reborn!
Saint Seiya: The Lost Canvas - Meiou Shinwa 2
Hunter x Hunter: Greed Island
Dragon Ball Z
One Piece: Episode of Merry - Mou Hitori no Nakama no Monogatari
One Piece: Episode of Nami - Koukaishi no Namida to Nakama no Kizuna
Saint Seiya: The Lost Canvas - Meiou Shinwa
D.Gray-man
Noragami
One Piece Film: Strong World Episode 0
Ginga Nagareboshi Gin
Noragami Aragoto OVA
D.Gray-man Hallow
Dragon Ball Kai (2014)
Lupin III (2015)
Saint Seiya: Meiou Hades Meikai-hen
Shin Mazinger Shougeki! Z-hen
Dragon Ball Kai
Lupin III: Part II
Claymore
Michiko to Hatchin
Noragami OVA
One Piece: Episode of Luffy - Hand Island no Bouken
Naruto: Shippuuden Movie 6 - Road to Ninja
Saint Seiya: Meiou Hades Elysion-hen


In [23]:
ext2 = CountVectorizer(tokenizer = lambda x: x.split(', '))
mtype = ext.fit_transform(df2['type'])

In [24]:
mtype.toarray()

array([[1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 1],
       ...,
       [0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 0, 1]])

In [25]:
mtype.shape

(850, 6)

In [26]:
cs2 = cosine_similarity(mtype)

In [27]:
animesama2 = list(enumerate(cs2[indexsuka]))

In [28]:
animesama2 = list(filter(lambda x:x[1]>0.7, animesama2))
list(animesama2)

[(1, 1.0),
 (2, 1.0),
 (3, 1.0),
 (4, 1.0),
 (5, 1.0),
 (6, 1.0),
 (9, 1.0),
 (10, 1.0),
 (12, 1.0),
 (13, 1.0),
 (14, 1.0),
 (16, 1.0),
 (17, 1.0),
 (19, 1.0),
 (20, 1.0),
 (22, 1.0),
 (23, 1.0),
 (26, 1.0),
 (27, 1.0),
 (28, 1.0),
 (29, 1.0),
 (30, 1.0),
 (31, 1.0),
 (32, 1.0),
 (34, 1.0),
 (36, 1.0),
 (38, 1.0),
 (39, 1.0),
 (40, 1.0),
 (42, 1.0),
 (43, 1.0),
 (44, 1.0),
 (46, 1.0),
 (47, 1.0),
 (49, 1.0),
 (50, 1.0),
 (51, 1.0),
 (53, 1.0),
 (54, 1.0),
 (56, 1.0),
 (57, 1.0),
 (58, 1.0),
 (61, 1.0),
 (62, 1.0),
 (64, 1.0),
 (67, 1.0),
 (68, 1.0),
 (69, 1.0),
 (72, 1.0),
 (73, 1.0),
 (74, 1.0),
 (75, 1.0),
 (76, 1.0),
 (78, 1.0),
 (79, 1.0),
 (80, 1.0),
 (82, 1.0),
 (83, 1.0),
 (84, 1.0),
 (86, 1.0),
 (87, 1.0),
 (88, 1.0),
 (91, 1.0),
 (93, 1.0),
 (94, 1.0),
 (95, 1.0),
 (96, 1.0),
 (97, 1.0),
 (98, 1.0),
 (99, 1.0),
 (100, 1.0),
 (101, 1.0),
 (104, 1.0),
 (105, 1.0),
 (106, 1.0),
 (108, 1.0),
 (109, 1.0),
 (110, 1.0),
 (111, 1.0),
 (112, 1.0),
 (113, 1.0),
 (114, 1.0),
 (116, 1.0)

In [29]:
for i in animesama2[:5]:
    print(df2.iloc[i[0]]['name'])

Fullmetal Alchemist: Brotherhood
Gintama°
Steins;Gate
Gintama&#039;
Haikyuu!!: Karasuno Koukou VS Shiratorizawa Gakuen Koukou


In [30]:
m3 = CountVectorizer().fit_transform(df2['genre'], df2['type'])

In [31]:
cs3 = cosine_similarity(m3)

In [32]:
animesama3 = list(enumerate(cs3[indexsuka]))

In [33]:
animesama3 = list(filter(lambda x:x[1]>0.7, animesama3))
list(animesama3)

[(6, 0.9999999999999999),
 (74, 0.7905694150420948),
 (86, 0.7302967433402215),
 (112, 0.9999999999999999),
 (145, 0.9999999999999999),
 (146, 0.9999999999999999),
 (175, 0.7999999999999999),
 (178, 0.7302967433402215),
 (182, 0.7905694150420948),
 (202, 0.9999999999999999),
 (206, 0.7453559924999298),
 (231, 0.7905694150420948),
 (241, 0.7905694150420948),
 (271, 0.7905694150420948),
 (352, 0.8451542547285165),
 (398, 0.7745966692414834),
 (513, 0.7999999999999999),
 (515, 0.7453559924999298),
 (566, 0.7453559924999298),
 (579, 0.7999999999999999),
 (582, 0.7302967433402215),
 (588, 0.7453559924999298),
 (644, 0.7905694150420948),
 (698, 0.7745966692414834),
 (717, 0.7302967433402215),
 (753, 0.8451542547285165),
 (784, 0.8451542547285165),
 (821, 0.7999999999999999)]

In [34]:
animesamasortir = sorted(animesama3, key = lambda x:x[1], reverse = True)
animesamasortir

[(6, 0.9999999999999999),
 (112, 0.9999999999999999),
 (145, 0.9999999999999999),
 (146, 0.9999999999999999),
 (202, 0.9999999999999999),
 (352, 0.8451542547285165),
 (753, 0.8451542547285165),
 (784, 0.8451542547285165),
 (175, 0.7999999999999999),
 (513, 0.7999999999999999),
 (579, 0.7999999999999999),
 (821, 0.7999999999999999),
 (74, 0.7905694150420948),
 (182, 0.7905694150420948),
 (231, 0.7905694150420948),
 (241, 0.7905694150420948),
 (271, 0.7905694150420948),
 (644, 0.7905694150420948),
 (398, 0.7745966692414834),
 (698, 0.7745966692414834),
 (206, 0.7453559924999298),
 (515, 0.7453559924999298),
 (566, 0.7453559924999298),
 (588, 0.7453559924999298),
 (86, 0.7302967433402215),
 (178, 0.7302967433402215),
 (582, 0.7302967433402215),
 (717, 0.7302967433402215)]

In [35]:
for i in animesama3:
    print(df2.iloc[i[0]]['name'])

Hunter x Hunter (2011)
One Piece
Shingeki no Kyojin
Hunter x Hunter
Hunter x Hunter OVA
Hunter x Hunter: Greed Island Final
Katekyo Hitman Reborn!
Boku no Hero Academia
Saint Seiya: The Lost Canvas - Meiou Shinwa 2
Hunter x Hunter: Greed Island
Dragon Ball Z
One Piece: Episode of Merry - Mou Hitori no Nakama no Monogatari
One Piece: Episode of Nami - Koukaishi no Namida to Nakama no Kizuna
Saint Seiya: The Lost Canvas - Meiou Shinwa
One Piece Film: Strong World Episode 0
Noragami Aragoto OVA
D.Gray-man Hallow
Dragon Ball Kai (2014)
Saint Seiya: Meiou Hades Meikai-hen
Shin Mazinger Shougeki! Z-hen
Bleach
Dragon Ball Kai
Claymore
Noragami OVA
Shingeki no Kyojin OVA
One Piece: Episode of Luffy - Hand Island no Bouken
Naruto: Shippuuden Movie 6 - Road to Ninja
Saint Seiya: Meiou Hades Elysion-hen


# Collaborative Filtering

In [36]:
rating = pd.read_csv('~/Downloads/rating.csv')
rating.head()

Unnamed: 0,user_id,anime_id,rating
0,1,20,-1
1,1,24,-1
2,1,79,-1
3,1,226,-1
4,1,241,-1


In [39]:
data = pd.merge(df2, rating, on = 'anime_id')
data.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating_x,members,user_id,rating_y
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630,99,5
1,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630,152,10
2,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630,244,10
3,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630,271,10
4,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630,278,-1


In [41]:
mat = data.pivot_table(index = 'user_id', columns = 'name', values = 'rating_y')
mat.head()

name,91 Days,AKB0048: Next Stage,Ajin Part 1: Shoudou,Akachan to Boku,Akagami no Shirayuki-hime,Akagami no Shirayuki-hime 2nd Season,Akame ga Kill!,Akatsuki no Yona,Akira,Angel Beats!,...,Zetsuen no Tempest,Zoku Natsume Yuujinchou,Zoku Sayonara Zetsubou Sensei,ef: A Tale of Melodies.,ef: A Tale of Memories.,xxxHOLiC,xxxHOLiC Kei,xxxHOLiC Movie: Manatsu no Yoru no Yume,xxxHOLiC Rou,xxxHOLiC Shunmuki
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,-1.0,,,-1.0,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,8.0,,,,...,8.0,,,,,,,,,
4,,,,,,,,,,-1.0,...,,,,,,,,,,
5,,,,,,,4.0,,8.0,3.0,...,,,7.0,,,2.0,,,,


In [74]:
kimi_rating = mat['Naruto']
kimi_rating

user_id
1       -1.0
2        NaN
3        8.0
4        NaN
5        6.0
        ... 
73512    NaN
73513    NaN
73514    NaN
73515    NaN
73516    NaN
Name: Naruto, Length: 72052, dtype: float64

In [75]:
similar_kimi = mat.corrwith(kimi_rating)

  c = cov(x, y, rowvar)
  c *= np.true_divide(1, fact)


In [76]:
corr_kimi = pd.DataFrame(similar_kimi, columns = ['Correlation'])
corr_kimi.dropna(inplace = True)
corr_kimi.head(20)

Unnamed: 0_level_0,Correlation
name,Unnamed: 1_level_1
91 Days,0.499982
AKB0048: Next Stage,0.593548
Ajin Part 1: Shoudou,0.451589
Akachan to Boku,0.519318
Akagami no Shirayuki-hime,0.506684
Akagami no Shirayuki-hime 2nd Season,0.500054
Akame ga Kill!,0.590885
Akatsuki no Yona,0.560429
Akira,0.569814
Angel Beats!,0.602401


In [77]:
data['ratings count'] = pd.DataFrame(data.groupby('name')['rating_y'].count())

In [78]:
data

Unnamed: 0,anime_id,name,genre,type,episodes,rating_x,members,user_id,rating_y,ratings count
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630,99,5,
1,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630,152,10,
2,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630,244,10,
3,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630,271,10,
4,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630,278,-1,
...,...,...,...,...,...,...,...,...,...,...
3337693,120,Fruits Basket,"Comedy, Drama, Fantasy, Romance, Shoujo, Slice...",TV,26,7.80,242553,73490,10,
3337694,120,Fruits Basket,"Comedy, Drama, Fantasy, Romance, Shoujo, Slice...",TV,26,7.80,242553,73491,10,
3337695,120,Fruits Basket,"Comedy, Drama, Fantasy, Romance, Shoujo, Slice...",TV,26,7.80,242553,73495,7,
3337696,120,Fruits Basket,"Comedy, Drama, Fantasy, Romance, Shoujo, Slice...",TV,26,7.80,242553,73500,9,


In [79]:
data.groupby('name')['rating_y'].count().sort_values(ascending = False).head()

name
Death Note                         39340
Sword Art Online                   30583
Shingeki no Kyojin                 29584
Code Geass: Hangyaku no Lelouch    27718
Elfen Lied                         27506
Name: rating_y, dtype: int64

In [80]:
data.groupby('name')['rating_y'].count()

name
91 Days                                    1825
AKB0048: Next Stage                         876
Ajin Part 1: Shoudou                        375
Akachan to Boku                             249
Akagami no Shirayuki-hime                  3305
                                           ... 
xxxHOLiC                                   5475
xxxHOLiC Kei                               3413
xxxHOLiC Movie: Manatsu no Yoru no Yume    2365
xxxHOLiC Rou                               1513
xxxHOLiC Shunmuki                          1974
Name: rating_y, Length: 832, dtype: int64

In [81]:
ratings = pd.DataFrame(data.groupby('name')['rating_y'].mean())
ratings['ratings count'] = pd.DataFrame(data.groupby('name')['rating_y'].count())
ratings

Unnamed: 0_level_0,rating_y,ratings count
name,Unnamed: 1_level_1,Unnamed: 2_level_1
91 Days,6.593973,1825
AKB0048: Next Stage,6.205479,876
Ajin Part 1: Shoudou,5.984000,375
Akachan to Boku,6.212851,249
Akagami no Shirayuki-hime,6.312557,3305
...,...,...
xxxHOLiC,6.592146,5475
xxxHOLiC Kei,6.720774,3413
xxxHOLiC Movie: Manatsu no Yoru no Yume,6.313742,2365
xxxHOLiC Rou,6.403173,1513


In [82]:
corr_kimi['ratings count'] = ratings['ratings count']
corr_kimi[corr_kimi['ratings count'] > 1500].sort_values('Correlation', ascending = False).head(20)

Unnamed: 0_level_0,Correlation,ratings count
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Naruto,1.0,25925
Bleach,0.720531,12845
InuYasha,0.704638,9328
Shaman King,0.696407,7814
Katekyo Hitman Reborn!,0.694202,6739
The Law of Ueki,0.693139,2186
Rurouni Kenshin: Meiji Kenkaku Romantan,0.690855,7900
Romeo x Juliet,0.690726,3187
Shijou Saikyou no Deshi Kenichi,0.69025,5654
Slayers,0.688689,3395
