In [70]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
pd.set_option("max_colwidth", None)

In [71]:
# load anime dataset
anime_df = pd.read_csv('dataset/anime.csv')
usecols = ["MAL_ID", "Name", "Score", "Genres", "Type", "Episodes", "Premiered",
           "Studios", "Source", "Rating", "Completed"]

anime_df=pd.read_csv('dataset/anime.csv',usecols=usecols)
anime_df.head()

Unnamed: 0,MAL_ID,Name,Score,Genres,Type,Episodes,Premiered,Studios,Source,Rating,Completed
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,Spring 1998,Sunrise,Original,R - 17+ (violence & profanity),718161
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space",Movie,1,Unknown,Bones,Original,R - 17+ (violence & profanity),208333
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen",TV,26,Spring 1998,Madhouse,Manga,PG-13 - Teens 13 or older,343492
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, Magic",TV,26,Summer 2002,Sunrise,Original,PG-13 - Teens 13 or older,46165
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",TV,52,Fall 2004,Toei Animation,Manga,PG - Children,7314


In [85]:
# load top users with anime rating dataset
top_animes_count = 1000
rating_popular_anime = pd.read_csv('dataset/top_anime_unsupervised_use.csv')
animes_popular = anime_df.sort_values('Completed', ascending=False)[0:top_animes_count]
top_animes = animes_popular['MAL_ID'].unique()
rating_popular_anime = rating_popular_anime[rating_popular_anime.MAL_ID.isin(top_animes)]
rating_popular_anime.head()

         user_id  MAL_ID  rating
3            478      20       7
5            478      24       8
7            478      43       6
8            478      47       8
9            478      48       6
...          ...     ...     ...
5167863   352930   16918       9
5167869   352930   17074      10
5167895   352930   18001       8
5167902   352930   18245       7
5167903   352930   18247       8

[1630284 rows x 3 columns]


In [102]:
n_users = rating_popular_anime.user_id.unique()
n_movies = rating_popular_anime.MAL_ID.unique()
sparsity = round(1.0 - len(rating_popular_anime) / float(n_users.shape[0] * n_movies.shape[0]), 3)
print('The sparsity level of rating_popular_anime dataset is ' +  str(sparsity * 100) + '%')

The sparsity level of rating_popular_anime dataset is 45.6%


In [108]:
# user item interaction matrix
anime_matrix_UII=rating_popular_anime.pivot_table(index='user_id',columns='MAL_ID',values='rating').fillna(0)
anime_matrix_UII.head()

MAL_ID,1,5,6,18,19,20,24,30,31,32,...,40716,40776,40839,40902,40956,41120,41168,41226,41353,41389
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
478,0.0,0.0,0.0,0.0,0.0,7.0,8.0,0.0,0.0,0.0,...,0.0,7.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0
781,7.0,0.0,10.0,8.0,10.0,7.0,7.0,0.0,0.0,0.0,...,8.0,9.0,7.0,4.0,7.0,0.0,6.0,7.0,7.0,3.0
853,9.0,10.0,8.0,9.0,8.0,9.0,0.0,8.0,0.0,8.0,...,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0
890,9.0,9.0,9.0,8.0,10.0,7.0,2.0,8.0,7.0,9.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
912,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,...,8.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0


In [109]:
test_user_rating = anime_matrix_UII[1]
test_user_rating

user_id
478        0.0
781        7.0
853        9.0
890        9.0
912        0.0
          ... 
352832     0.0
352835     0.0
352922     0.0
352924    10.0
352930    10.0
Name: 1, Length: 2998, dtype: float64

In [133]:
# generalize above exploration to a fit function
def predict(input_MAL_ID, recommend_count):
    '''
    :return: dataframe of recommended anime
    '''
    user_rating_input = anime_matrix_UII[input_MAL_ID]
    similar_to_input_anime = anime_matrix_UII.corrwith(user_rating_input)
    corr_input = pd.DataFrame(similar_to_input_anime, columns=['correlation'])
    corr_input.dropna(inplace=True)
    corr_input.reset_index(inplace=True)
    corr_input = pd.merge(corr_input, anime_df, on="MAL_ID")
    recommendation = corr_input[corr_input['Completed'] > 10000].sort_values(by='correlation', ascending=False).head(recommend_count+1)
    recommendation.drop(recommendation.index[recommendation['MAL_ID'] == input_MAL_ID], inplace=True)
    return recommendation

In [134]:
# test functionality on another anime
recommendation_test = predict(1, 5)
recommendation_test

Unnamed: 0,MAL_ID,correlation,Name,Score,Genres,Type,Episodes,Premiered,Studios,Source,Rating,Completed
1,5,0.653634,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space",Movie,1,Unknown,Bones,Original,R - 17+ (violence & profanity),208333
2,6,0.468294,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen",TV,26,Spring 1998,Madhouse,Manga,PG-13 - Teens 13 or older,343492
49,205,0.44087,Samurai Champloo,8.5,"Action, Adventure, Comedy, Historical, Samurai, Shounen",TV,26,Spring 2004,Manglobe,Original,R - 17+ (violence & profanity),551621
84,467,0.420354,Koukaku Kidoutai: Stand Alone Complex,8.45,"Action, Military, Sci-Fi, Police, Mecha, Seinen",TV,26,Fall 2002,Production I.G,Manga,R - 17+ (violence & profanity),170891
11,43,0.397354,Koukaku Kidoutai,8.29,"Action, Mecha, Police, Psychological, Sci-Fi, Seinen",Movie,1,Unknown,Production I.G,Manga,R+ - Mild Nudity,325682


In [158]:
recommendation_for_all = animes_popular[:100].sort_values(by='Score', ascending=False)[:10]

In [164]:
def recommend_for_user(user_id_input):
    '''
    Take user_id to get top-rated anime by the user, then for each anime make prediction
    return the highest 10 correlation anime
    :param user_id_input: user id
    :return: dataframe of recommended anime
    '''
    recommendation_df = pd.DataFrame(columns=['MAL_ID', 'correlation'])
    users_groups = rating_popular_anime.groupby('user_id')
    if user_id_input in n_users:
        user_fav_anime = users_groups.get_group(user_id_input).sort_values(by='rating', ascending=False)[:5]['MAL_ID'].tolist()
        for user_fav_MAL_ID in user_fav_anime:
            r = predict(user_fav_MAL_ID, 5)
            recommendation_df = recommendation_df.append(r, ignore_index=True)

        recommendation_df.sort_values('correlation', ascending=False).drop_duplicates('MAL_ID')
        return recommendation_df[:10]
    else:
        return recommendation_for_all

In [165]:
recommend_for_user(478)

Unnamed: 0,MAL_ID,correlation,Name,Score,Genres,Type,Episodes,Premiered,Studios,Source,Rating,Completed
0,23281,0.703873,Psycho-Pass 2,7.42,"Action, Sci-Fi, Police, Psychological",TV,11,Fall 2014,Tatsunoko Production,Original,R - 17+ (violence & profanity),352491.0
1,21339,0.523466,Psycho-Pass Movie,7.74,"Action, Military, Police, Sci-Fi",Movie,1,Unknown,Production I.G,Original,R - 17+ (violence & profanity),141876.0
2,23283,0.414906,Zankyou no Terror,8.12,"Mystery, Psychological, Thriller",TV,11,Summer 2014,MAPPA,Original,R - 17+ (violence & profanity),547800.0
3,14513,0.362664,Magi: The Labyrinth of Magic,8.07,"Action, Adventure, Fantasy, Magic, Shounen",TV,25,Fall 2012,A-1 Pictures,Manga,PG-13 - Teens 13 or older,535193.0
4,28223,0.359191,Death Parade,8.2,"Game, Mystery, Psychological, Drama, Thriller",TV,12,Winter 2015,Madhouse,Original,R - 17+ (violence & profanity),857277.0
5,356,0.579805,Fate/stay night,7.34,"Action, Supernatural, Magic, Romance, Fantasy",TV,24,Winter 2006,Studio Deen,Visual novel,R - 17+ (violence & profanity),510563.0
6,10087,0.437722,Fate/Zero,8.34,"Action, Supernatural, Magic, Fantasy",TV,13,Fall 2011,ufotable,Light novel,R - 17+ (violence & profanity),773576.0
7,11741,0.42827,Fate/Zero 2nd Season,8.59,"Action, Supernatural, Magic, Fantasy",TV,12,Spring 2012,ufotable,Light novel,R - 17+ (violence & profanity),649100.0
8,27821,0.410404,Fate/stay night: Unlimited Blade Works Prologue,8.1,"Action, Fantasy, Magic, Supernatural",Special,1,Unknown,ufotable,Visual novel,PG-13 - Teens 13 or older,207405.0
9,28701,0.403618,Fate/stay night: Unlimited Blade Works 2nd Season,8.33,"Action, Fantasy, Magic, Supernatural",TV,13,Spring 2015,ufotable,Visual novel,R - 17+ (violence & profanity),506098.0
