Here I want to make a simple recommender system to find the similarity between shows, users and to help me predict whether a user will enjoy a particular anime.

In [51]:
# Import relevant libraries 

import pandas as pd
import numpy as np
import scipy as sp
import sklearn
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity
import operator
%matplotlib inline

In [75]:
anime = pd.read_csv('animes.csv').drop(columns=['synopsis','link'])
rating = pd.read_csv('reviews.csv').drop(columns=['anime_uid','text','link','scores'])
anime.head()

Unnamed: 0,uid,title,genre,aired,episodes,members,popularity,ranked,score,img_url
0,28891,Haikyuu!! Second Season,"['Comedy', 'Sports', 'Drama', 'School', 'Shoun...","Oct 4, 2015 to Mar 27, 2016",25.0,489888,141,25.0,8.82,https://cdn.myanimelist.net/images/anime/9/766...
1,23273,Shigatsu wa Kimi no Uso,"['Drama', 'Music', 'Romance', 'School', 'Shoun...","Oct 10, 2014 to Mar 20, 2015",22.0,995473,28,24.0,8.83,https://cdn.myanimelist.net/images/anime/3/671...
2,34599,Made in Abyss,"['Sci-Fi', 'Adventure', 'Mystery', 'Drama', 'F...","Jul 7, 2017 to Sep 29, 2017",13.0,581663,98,23.0,8.83,https://cdn.myanimelist.net/images/anime/6/867...
3,5114,Fullmetal Alchemist: Brotherhood,"['Action', 'Military', 'Adventure', 'Comedy', ...","Apr 5, 2009 to Jul 4, 2010",64.0,1615084,4,1.0,9.23,https://cdn.myanimelist.net/images/anime/1223/...
4,31758,Kizumonogatari III: Reiketsu-hen,"['Action', 'Mystery', 'Supernatural', 'Vampire']","Jan 6, 2017",1.0,214621,502,22.0,8.83,https://cdn.myanimelist.net/images/anime/3/815...


In [76]:
rating.head()

Unnamed: 0,uid,profile,score
0,255938,DesolatePsyche,8
1,259117,baekbeans,10
2,253664,skrn,7
3,8254,edgewalker00,9
4,291149,aManOfCulture99,10


Join the two dataframes on the anime_id columns

In [77]:
# Merging anime_show and rating dataframe with inner join
merged = rating.merge(anime, on = 'uid', suffixes= ['_user', ''], how='inner')
# Renaming column
merged.rename(columns = {'uid':'anime_id'}, inplace = True)
merged.rename(columns = {'score_user':'user_rating'}, inplace = True)
# Dropping extra column
print(merged.shape)
merged.head()

(12783, 12)


Unnamed: 0,anime_id,profile,user_rating,title,genre,aired,episodes,members,popularity,ranked,score,img_url
0,29323,Slushpuppy282,7,"Oyaji no, Imo no Kamisama.",['Slice of Life'],"Dec 31, 2014",1.0,360,11732,8664.0,5.9,https://cdn.myanimelist.net/images/anime/2/705...
1,29323,Slushpuppy282,7,"Oyaji no, Imo no Kamisama.",['Slice of Life'],"Dec 31, 2014",1.0,360,11732,8664.0,5.9,https://cdn.myanimelist.net/images/anime/2/705...
2,30968,ParaParaJMo,9,Kokoro no Catchball,"['Kids', 'Sports']",2005,1.0,100,15323,12764.0,6.7,https://cdn.myanimelist.net/images/anime/2/745...
3,30968,ParaParaJMo,9,Kokoro no Catchball,"['Kids', 'Sports']",2005,1.0,100,15323,12764.0,6.7,https://cdn.myanimelist.net/images/anime/2/745...
4,38440,Jolon,9,Shikizakura,"['Action', 'Sci-Fi', 'Drama']",2021 to ?,12.0,1419,8859,,,https://cdn.myanimelist.net/images/anime/1203/...


In [78]:
merged2 = merged.dropna()
print(merged2.shape)
merged2.head()

(10636, 12)


Unnamed: 0,anime_id,profile,user_rating,title,genre,aired,episodes,members,popularity,ranked,score,img_url
0,29323,Slushpuppy282,7,"Oyaji no, Imo no Kamisama.",['Slice of Life'],"Dec 31, 2014",1.0,360,11732,8664.0,5.9,https://cdn.myanimelist.net/images/anime/2/705...
1,29323,Slushpuppy282,7,"Oyaji no, Imo no Kamisama.",['Slice of Life'],"Dec 31, 2014",1.0,360,11732,8664.0,5.9,https://cdn.myanimelist.net/images/anime/2/705...
2,30968,ParaParaJMo,9,Kokoro no Catchball,"['Kids', 'Sports']",2005,1.0,100,15323,12764.0,6.7,https://cdn.myanimelist.net/images/anime/2/745...
3,30968,ParaParaJMo,9,Kokoro no Catchball,"['Kids', 'Sports']",2005,1.0,100,15323,12764.0,6.7,https://cdn.myanimelist.net/images/anime/2/745...
8,27501,Mimi_Taylor,9,Sore Ike! Anpanman: Anpanman to Tanoshii Nakam...,"['Kids', 'Adventure', 'Fantasy']","Jul 24, 1999",1.0,171,13933,13953.0,7.1,https://cdn.myanimelist.net/images/anime/9/666...


Pivoting table of users on one axis and tv show names along the other. This helps us in defining the similarity between users and shows to better predict who will like what.

In [82]:
piv = merged2.pivot_table(index=['profile'], columns=['title'], values='user_rating',fill_value=0)

In [None]:
print(piv.shape)
piv.head()

Transposing the Matrix (eg. pivoted table)

In [None]:
X = piv.T
X.shape

Decomposing the Matrix - Lowering the total features

In [None]:
# Truncates matrix in 12 Synthetic features
SVD = TruncatedSVD(n_components=40,random_state=12)

resultant_matrix = SVD.fit_transform(X)

resultant_matrix.shape

Generating a Correlation Matrix

In [None]:
corr_mat = np.corrcoef(resultant_matrix)
corr_mat.shape


In [None]:
anime_titles = piv.columns
anime_list = list(anime_titles)


In [None]:
corr_naruto = corr_mat[1780]
corr_naruto.shape

Recommending top 10% of Hightly Correlated Anime

In [None]:
list(anime_titles[(corr_naruto<1.0)&(corr_naruto>0.9)])

Function to recommend the top correlated Anime, (top inputted percentage)

In [None]:
def rec_anime(x,float):
    x_corr = anime_list.index(f'{x.title()}')
    x_corr
    corr_x = corr_mat[x_corr]
    print(f'Top {int(float*100)} Percentile Correlational match to Anime: {x.title()}')
    corr_anime = list(anime_titles[(corr_x < 1.0) & (corr_x > (1.0-float))])
    return corr_anime


In [None]:
rec_anime('naruto',.1)

In [None]:
rec_anime('one piece',0.1)