## Data Import Part

In [4]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
pd.set_option("max_colwidth", None)

In [5]:
# load anime dataset
usecols = ["MAL_ID", "Name", "Score", "Genres", "Type", "Episodes", "Premiered",
           "Studios", "Source", "Rating", "Completed"]
anime_df = pd.read_csv('dataset/anime.csv', usecols=usecols)

# import rating for anime that users marked complete
user_rating_data = pd.read_csv('dataset/rating_complete.csv')

<font size="5">As the dataset is huge, we will limit the data to top 3000 users who gave ratings and top 1000 anime that are reviewed.</font>


In [6]:
top_users_count = 3000
top_animes_count = 1000

In [22]:
user_ranking = user_rating_data.groupby(['user_id']).count().sort_values('anime_id',ascending=False)
top_users = user_ranking[0:top_users_count]
# key: user_id, value:{mal_id:score}
user_anime_dict = dict()
def populate_user_anime_dict(user_id):
    piece = user_rating_data.loc[user_rating_data['user_id'] == user_id]
    current_user = {}
    for index, row in piece.iterrows():
        mal_id = row["anime_id"]
        score = row["rating"]

        current_user[mal_id] = score

    user_anime_dict[user_id] = current_user
for index, row in top_users.iterrows():
    user_id  = index
    populate_user_anime_dict(user_id)

<font size="5">The code above generated dataframe to be used from the user_anime_dict.
</font>

In [75]:
expand = sorted([(k,k1,v1) for k,v in user_anime_dict.items() for k1,v1 in v.items()],
            key=lambda x: (x[0], x[1]))
df_top = pd.DataFrame(expand, columns=['user_id','MAL_ID','rating'])
animes_popular = anime_df.sort_values('Completed', ascending=False)[0:top_animes_count]
top_animes = animes_popular['MAL_ID'].unique()
rating_popular_anime = df_top[df_top.MAL_ID.isin(top_animes)]
#top_users = rating_popular_anime.user_id.unique()
rating_popular_anime.head()

Unnamed: 0,user_id,MAL_ID,rating
3,478,20,7
5,478,24,8
7,478,43,6
8,478,47,8
9,478,48,6


<font size="5">df_top expanded the mal_id with each user_id, this could be used by other unsupervised training model, so we store the dataframe into a csv file.
</font>

In [11]:
# df_top.to_csv('dataset/top_anime_unsupervised_use.csv', index=False)

## Evaluation Part
<font size="5">Before going to the model part, we first borrow the model from supervised training to evaluate the unsupervised model training result.</font>

In [157]:
import random
import numpy as np
from sklearn.linear_model import LogisticRegression

user_genre_count = pd.read_csv('dataset/users_genres_count.csv')
top_animes_complete = pd.read_csv('dataset/top_animes_complete.csv')

def generate_sets_genre_count(uid_str, current_user, genre_str):
    uid_str_list = uid_str.split(", ")
    genre_str_list = genre_str.split(", ")
    uid_dict = dict()
    for pair in uid_str_list:
        s_pair = pair.split("|")
        uid = int(s_pair[0])
        if uid == current_user:
            continue
        score = int(s_pair[1])
        uid_dict[uid] = 1
    X_list = []
    y_list = []
    for index, row in user_genre_count.iterrows():
        uid = int(row[0])
        if uid == current_user:
            continue
        genres = []
        for genre in genre_str_list:
            genres.append(row[genre])
        temp = []
        for i in range(0, len(genres)):
            temp.append(genres[i])
        arr = np.array(temp)
        X_list.append(arr)
        if uid in uid_dict:
            y_list.append(1)
        else:
            y_list.append(0)
    X = np.array(X_list)
    y = np.array(y_list)
    return X, y

def get_user_info(user_id, genre_str):
    user_info = user_genre_count.loc[user_genre_count['user_id'] == user_id]
    user_info = user_info.iloc[0]
    genre_str_list = genre_str.split(", ")
    temp = []
    for genre in genre_str_list:
        temp.append(user_info[genre])
    user_line = np.array([np.array(temp)])
    return user_line

def predict(user_id, mal_id, user_line=None):
    anime_info = top_animes_complete.loc[top_animes_complete['MAL_ID'] == mal_id].iloc[0]
    if user_line is None:
        user_line = get_user_info(user_id, anime_info["genres"])
    X, y = generate_sets_genre_count(anime_info["completed_user_ids"], user_id, anime_info["genres"])
    clf = LogisticRegression().fit(X, y)
    result = clf.predict(user_line)
    return result[0]

def generate_random_test_users(test_users_count):
    return random.sample(list(top_users.index), test_users_count)

#test_users = [272784, 98466, 11716, 45741, 208254]

def evaluate_unsupervised(algorithm):
    test_users = generate_random_test_users(5)
    for user in test_users:
        r = algorithm(user)
        count = 0
        r_mal_id = get_mal_id(r)
        #print(r_mal_id)
        for mal_id in r_mal_id:
            current_result = predict(user, mal_id)
            if current_result == 1:
                count += 1
        print("Precision for user " + str(user) + " is: " + str(count / len(r)))

def get_mal_id(recommendation_df):
    return list(recommendation_df['MAL_ID'])

## Model Part
## User-based KNN

<font size="5">After sort out the animes associated with each user, now we will perform unsupervised learning with KNN. To fit the model, we will further process data to create a sparse user item matrix to fit into the data. As the calculation will be very costly, we also use the limited data with 1000 animes and 3000 users.
</font>


In [158]:
anime_matrix_UII_user_based = rating_popular_anime.pivot_table(index='MAL_ID', columns='user_id', values='rating').fillna(0)
anime_matrix_UII_user_based.head()

user_id,478,781,853,890,912,943,1177,1231,1393,1397,...,352366,352478,352660,352669,352761,352832,352835,352922,352924,352930
MAL_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,7.0,9.0,9.0,0.0,9.0,8.0,9.0,0.0,8.0,...,7.0,10.0,9.0,9.0,9.0,0.0,0.0,0.0,10.0,10.0
5,0.0,0.0,10.0,9.0,0.0,0.0,8.0,0.0,0.0,8.0,...,7.0,9.0,8.0,8.0,7.0,0.0,0.0,0.0,9.0,7.0
6,0.0,10.0,8.0,9.0,0.0,0.0,7.0,0.0,0.0,8.0,...,8.0,8.0,0.0,7.0,10.0,10.0,9.0,0.0,8.0,10.0
18,0.0,8.0,9.0,8.0,0.0,0.0,8.0,0.0,0.0,7.0,...,0.0,0.0,0.0,0.0,0.0,10.0,0.0,0.0,8.0,8.0
19,0.0,10.0,8.0,10.0,0.0,0.0,10.0,9.0,0.0,0.0,...,0.0,10.0,0.0,9.0,0.0,10.0,0.0,7.0,9.0,9.0


In [159]:
def predict_user_based(input_u_ID, recommend_count):
    user_rating_input = anime_matrix_UII_user_based[input_u_ID]
    similar_to_input_anime = anime_matrix_UII_user_based.corrwith(user_rating_input)
    corr_input = pd.DataFrame(similar_to_input_anime, columns=['correlation'])
    corr_input.dropna(inplace=True)
    corr_input.reset_index(inplace=True)
    corr_input = corr_input[(corr_input['user_id'].isin(top_users.index))]
    corr_input.drop(corr_input.index[corr_input['user_id'] == input_u_ID], inplace=True)
    corr_input = corr_input.sort_values(by='correlation', ascending=False).head(recommend_count+1)
    return corr_input

In [160]:
def recommend_user_based(user_id_input):
    rec = []
    # we will find the closest 10 users
    similar_users = predict_user_based(user_id_input, 10)
    u_groups=rating_popular_anime.groupby('user_id')
    user_current_history = u_groups.get_group(user_id_input)['MAL_ID'].tolist()
    for index, s_user in similar_users.iterrows():
        # based on similar users watching rating history, make recommendation
        similar_user_rec = u_groups.get_group(s_user[0]).sort_values(by='rating')
        similar_user_rec_mal_id = similar_user_rec['MAL_ID'].tolist()
        rec.extend([x for x in similar_user_rec_mal_id if x not in user_current_history])

    return animes_popular[animes_popular['MAL_ID'].isin(rec)][:10]

recommend_user_based(478)

Unnamed: 0,MAL_ID,Name,Score,Genres,Type,Episodes,Premiered,Studios,Source,Rating,Completed
1393,1535,Death Note,8.63,"Mystery, Police, Psychological, Supernatural, Thriller, Shounen",TV,37,Fall 2006,Madhouse,Manga,R - 17+ (violence & profanity),2146116
6614,11757,Sword Art Online,7.25,"Action, Game, Adventure, Romance, Fantasy",TV,25,Summer 2012,A-1 Pictures,Light novel,PG-13 - Teens 13 or older,1907261
11185,31964,Boku no Hero Academia,8.11,"Action, Comedy, School, Shounen, Super Power",TV,13,Spring 2016,Bones,Manga,PG-13 - Teens 13 or older,1655900
11308,32281,Kimi no Na wa.,8.96,"Romance, Supernatural, School, Drama",Movie,1,Unknown,CoMix Wave Films,Original,PG-13 - Teens 13 or older,1462143
11914,33486,Boku no Hero Academia 2nd Season,8.33,"Action, Comedy, Super Power, School, Shounen",TV,25,Spring 2017,Bones,Manga,PG-13 - Teens 13 or older,1389299
9383,25777,Shingeki no Kyojin Season 2,8.45,"Action, Military, Mystery, Super Power, Drama, Fantasy, Shounen",TV,12,Spring 2017,Wit Studio,Manga,R - 17+ (violence & profanity),1337762
8551,21881,Sword Art Online II,6.79,"Action, Game, Adventure, Romance, Fantasy",TV,24,Summer 2014,A-1 Pictures,Light novel,PG-13 - Teens 13 or older,1199824
9011,23755,Nanatsu no Taizai,7.89,"Action, Adventure, Ecchi, Fantasy, Magic, Shounen, Supernatural",TV,24,Fall 2014,A-1 Pictures,Manga,PG-13 - Teens 13 or older,1151443
6474,11061,Hunter x Hunter (2011),9.1,"Action, Adventure, Fantasy, Shounen, Super Power",TV,148,Fall 2011,Madhouse,Manga,PG-13 - Teens 13 or older,1094486
8625,22199,Akame ga Kill!,7.53,"Action, Adventure, Drama, Fantasy, Shounen",TV,24,Summer 2014,White Fox,Manga,R - 17+ (violence & profanity),1088784


In [162]:
evaluate_unsupervised(recommend_user_based)

Precision for user 213257 is: 1.0
Precision for user 318221 is: 0.7
Precision for user 44567 is: 0.8
Precision for user 34095 is: 0.8
Precision for user 326935 is: 1.0


## KNN item based model
<font size="5">Now let's try KNN item based approach. There are two different ways to calculate the similarities, cosine for distance between items, and pearson for correlation between items.
</font>

In [163]:
# user item interaction matrix
anime_matrix_UII = rating_popular_anime.pivot_table(index='user_id', columns='MAL_ID', values='rating').fillna(0)
anime_matrix_UII.head()

MAL_ID,1,5,6,18,19,20,24,30,31,32,...,40716,40776,40839,40902,40956,41120,41168,41226,41353,41389
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
478,0.0,0.0,0.0,0.0,0.0,7.0,8.0,0.0,0.0,0.0,...,0.0,7.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0
781,7.0,0.0,10.0,8.0,10.0,7.0,7.0,0.0,0.0,0.0,...,8.0,9.0,7.0,4.0,7.0,0.0,6.0,7.0,7.0,3.0
853,9.0,10.0,8.0,9.0,8.0,9.0,0.0,8.0,0.0,8.0,...,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0
890,9.0,9.0,9.0,8.0,10.0,7.0,2.0,8.0,7.0,9.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
912,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,...,8.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0


In [164]:
def predict_knn_item_based_pearson(input_MAL_ID, recommend_count):
    '''
    :return: dataframe of recommended anime
    '''
    user_rating_input = anime_matrix_UII[input_MAL_ID]
    similar_to_input_anime = anime_matrix_UII.corrwith(user_rating_input)
    corr_input = pd.DataFrame(similar_to_input_anime, columns=['correlation'])
    corr_input.dropna(inplace=True)
    corr_input.reset_index(inplace=True)
    corr_input = pd.merge(corr_input, anime_df, on="MAL_ID")
    recommendation = corr_input[corr_input['Completed'] > 10000].sort_values(by='correlation', ascending=False).head(recommend_count+1)
    recommendation.drop(recommendation.index[recommendation['MAL_ID'] == input_MAL_ID], inplace=True)
    return recommendation

In [165]:
# test input using MAL_ID 1
test_user_rating = anime_matrix_UII[1]
test_user_rating.head()
# test functionality using anime with MAL ID 1, 5 nearest neighbours
recommendation_item_based_pearson_test = predict_knn_item_based_pearson(1, 5)
recommendation_item_based_pearson_test

Unnamed: 0,MAL_ID,correlation,Name,Score,Genres,Type,Episodes,Premiered,Studios,Source,Rating,Completed
1,5,0.653634,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space",Movie,1,Unknown,Bones,Original,R - 17+ (violence & profanity),208333
2,6,0.468294,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen",TV,26,Spring 1998,Madhouse,Manga,PG-13 - Teens 13 or older,343492
49,205,0.44087,Samurai Champloo,8.5,"Action, Adventure, Comedy, Historical, Samurai, Shounen",TV,26,Spring 2004,Manglobe,Original,R - 17+ (violence & profanity),551621
84,467,0.420354,Koukaku Kidoutai: Stand Alone Complex,8.45,"Action, Military, Sci-Fi, Police, Mecha, Seinen",TV,26,Fall 2002,Production I.G,Manga,R - 17+ (violence & profanity),170891
11,43,0.397354,Koukaku Kidoutai,8.29,"Action, Mecha, Police, Psychological, Sci-Fi, Seinen",Movie,1,Unknown,Production I.G,Manga,R+ - Mild Nudity,325682


In [166]:
recommendation_for_all = animes_popular[:100].sort_values(by='Score', ascending=False)[:10]

In [167]:
def recommend_item_based_pearson(user_id_input):
    '''
    Take user_id to get top-rated anime by the user, then for each anime make prediction
    return the highest 10 correlation anime
    :param user_id_input: user id
    :return: dataframe of recommended anime
    '''
    recommendation_df = pd.DataFrame(columns=['MAL_ID', 'correlation'])
    users_groups = rating_popular_anime.groupby('user_id')
    if user_id_input in top_users:
        user_fav_anime = users_groups.get_group(user_id_input).sort_values(by='rating', ascending=False)[:5]['MAL_ID'].tolist()
        for user_fav_MAL_ID in user_fav_anime:
            r = predict_knn_item_based_pearson(user_fav_MAL_ID, 10)
            recommendation_df = recommendation_df.append(r, ignore_index=True)

        recommendation_df.sort_values('correlation', ascending=False).drop_duplicates('MAL_ID')
        return recommendation_df[:10]
    else:
        return recommendation_for_all

In [168]:
recommendation_item_based_knn_pearson = recommend_item_based_pearson(478)
recommendation_item_based_knn_pearson

Unnamed: 0,MAL_ID,Name,Score,Genres,Type,Episodes,Premiered,Studios,Source,Rating,Completed
3971,5114,Fullmetal Alchemist: Brotherhood,9.19,"Action, Military, Adventure, Comedy, Drama, Magic, Fantasy, Shounen",TV,64,Spring 2009,Bones,Manga,R - 17+ (violence & profanity),1644938
5683,9253,Steins;Gate,9.11,"Thriller, Sci-Fi",TV,24,Spring 2011,White Fox,Visual novel,PG-13 - Teens 13 or older,1134756
14963,38524,Shingeki no Kyojin Season 3 Part 2,9.1,"Action, Drama, Fantasy, Military, Mystery, Shounen, Super Power",TV,10,Spring 2019,Wit Studio,Manga,R - 17+ (violence & profanity),906824
6474,11061,Hunter x Hunter (2011),9.1,"Action, Adventure, Fantasy, Shounen, Super Power",TV,148,Fall 2011,Madhouse,Manga,PG-13 - Teens 13 or older,1094486
9886,28851,Koe no Katachi,9.0,"Drama, School, Shounen",Movie,1,Unknown,Kyoto Animation,Manga,PG-13 - Teens 13 or older,1151644
11308,32281,Kimi no Na wa.,8.96,"Romance, Supernatural, School, Drama",Movie,1,Unknown,CoMix Wave Films,Original,PG-13 - Teens 13 or older,1462143
3537,4181,Clannad: After Story,8.96,"Slice of Life, Comedy, Supernatural, Drama, Romance",TV,24,Fall 2008,Kyoto Animation,Visual novel,PG-13 - Teens 13 or older,641323
2656,2904,Code Geass: Hangyaku no Lelouch R2,8.91,"Action, Military, Sci-Fi, Super Power, Drama, Mecha",TV,25,Spring 2008,Sunrise,Original,R - 17+ (violence & profanity),1060730
11624,32935,Haikyuu!!: Karasuno Koukou vs. Shiratorizawa Gakuen Koukou,8.87,"Comedy, Sports, Drama, School, Shounen",TV,10,Fall 2016,Production I.G,Manga,PG-13 - Teens 13 or older,619835
14306,37510,Mob Psycho 100 II,8.84,"Action, Slice of Life, Comedy, Supernatural",TV,13,Winter 2019,Bones,Web manga,PG-13 - Teens 13 or older,623709


In [169]:
evaluate_unsupervised(recommend_item_based_pearson)

Precision for user 210238 is: 1.0
Precision for user 35785 is: 1.0
Precision for user 330827 is: 0.8
Precision for user 15083 is: 1.0
Precision for user 165485 is: 0.9


## KNN Item-based - Cosine
<font size="5">Now let's try cosine based calculation approach
</font>


In [170]:
# load top users with anime rating dataset
#rating_data = rating_popular_anime.merge(anime_df, left_on = 'MAL_ID', right_on = 'MAL_ID', how = 'left')
rating_data = df_top.merge(anime_df, left_on = 'MAL_ID', right_on = 'MAL_ID', how = 'left')
rating_data = rating_data[["user_id", "Name", "MAL_ID","rating"]]
combine_anime_rating = rating_data.dropna(axis = 0, subset = ['Name'])
anime_ratingCount = (combine_anime_rating.groupby(by = ['Name'])['rating'].count().reset_index()[['Name', 'rating']])
rating_data = combine_anime_rating.merge(anime_ratingCount, left_on = 'Name', right_on = 'Name', how = 'left')
rating_data = rating_data.drop(columns = "rating_x")
rating_data = rating_data.rename(columns={"rating_y": "rating"})
piviot_table = rating_data.pivot_table(index="Name",columns="user_id", values="rating").fillna(0)
piviot_table.head()

user_id,478,781,853,890,912,943,1177,1231,1393,1397,...,352366,352478,352660,352669,352761,352832,352835,352922,352924,352930
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"""0""",0.0,0.0,0.0,307.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,307.0,0.0,0.0
"""Aesop"" no Ohanashi yori: Ushi to Kaeru, Yokubatta Inu",0.0,0.0,0.0,12.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""Bungaku Shoujo"" Kyou no Oyatsu: Hatsukoi",0.0,0.0,0.0,0.0,0.0,0.0,0.0,922.0,0.0,922.0,...,0.0,922.0,0.0,0.0,922.0,0.0,0.0,0.0,0.0,922.0
"""Bungaku Shoujo"" Memoire",0.0,0.0,0.0,0.0,0.0,0.0,0.0,982.0,0.0,982.0,...,0.0,0.0,982.0,982.0,982.0,0.0,0.0,982.0,0.0,982.0
"""Bungaku Shoujo"" Movie",0.0,0.0,0.0,0.0,0.0,0.0,0.0,1196.0,0.0,1196.0,...,0.0,1196.0,1196.0,1196.0,1196.0,0.0,0.0,0.0,1196.0,1196.0


In [171]:
from scipy.sparse import csr_matrix
piviot_table_matrix = csr_matrix(piviot_table.values)
# create knn model and fit matrix
from sklearn.neighbors import NearestNeighbors
item_based_cosine_model = NearestNeighbors(metric="cosine", algorithm="brute")
item_based_cosine_model.fit(piviot_table_matrix)

NearestNeighbors(algorithm='brute', metric='cosine')

In [172]:
def predict_knn_item_based_cosine(anime_name_input, n_neighbors):
    query = piviot_table.loc[anime_name_input, :].values.reshape(1, -1)
    distance, suggestions = item_based_cosine_model.kneighbors(query, n_neighbors)
    recommendation = {}
    for i in range(0, len(distance.flatten())):
        if i != 0:
            recommendation[piviot_table.index[suggestions.flatten()[i]]] = distance.flatten()[i]
    return recommendation

<font size="5">To see the result better, we use name as input this time.
</font>

In [173]:
predict_knn_item_based_cosine('To Heart: Remember My Memories', 10)

{'To Heart': 0.2732727223710337,
 'To Heart: Remember My Memories Specials': 0.29498835462217965,
 'To Heart 2': 0.30237780857340457,
 'To Heart 2 OVA': 0.35235157990445853,
 'To Heart 2 AD': 0.3677287767366777,
 'To Heart 2 AD Plus': 0.38812794166617737,
 'To Heart 2 Special': 0.40202581826607475,
 'To Heart Omake': 0.4271426754758034,
 'To Heart 2 Adnext': 0.4333862988611539}

In [174]:
def recommend_item_based_cosine(user_id_input):
    '''
    Take user_id to get top-rated anime by the user, then for each anime make prediction
    return the highest 10 correlation anime
    :param user_id_input: user id
    :return: dataframe of recommended anime
    '''
    recommendation_df = pd.DataFrame(columns=['MAL_ID'])
    users_groups = rating_popular_anime.groupby('user_id')
    if user_id_input in top_users:
        user_fav_anime = users_groups.get_group(user_id_input).sort_values(by='rating', ascending=False)[:5]['MAL_ID'].tolist()
        for user_fav_MAL_ID in user_fav_anime:
            anime_name=anime_df[anime_df['MAL_ID']==user_fav_MAL_ID]['Name'].values[0]
            r = predict_knn_item_based_cosine(anime_name, 10)
            r_df = anime_df[(anime_df['Name'].isin(r.keys()))]
            distances = []
            for index, row in r_df.iterrows():
                distances.append(r[row['Name']])
            r_df.insert(2, "Distance", distances, True)
            recommendation_df = recommendation_df.append(r_df, ignore_index=True)

        recommendation_df = recommendation_df.sort_values(by='Distance', ascending=True).drop_duplicates('MAL_ID')
        recommendation_df = recommendation_df[recommendation_df['MAL_ID'].isin(list(rating_popular_anime['MAL_ID']))]

        return recommendation_df[:10]
    else:
        return recommendation_for_all

In [175]:
recommendation_item_based_knn_pearson = recommend_item_based_cosine(478)
recommendation_item_based_knn_pearson

Unnamed: 0,MAL_ID,Name,Score,Genres,Type,Episodes,Premiered,Studios,Source,Rating,Completed
3971,5114,Fullmetal Alchemist: Brotherhood,9.19,"Action, Military, Adventure, Comedy, Drama, Magic, Fantasy, Shounen",TV,64,Spring 2009,Bones,Manga,R - 17+ (violence & profanity),1644938
5683,9253,Steins;Gate,9.11,"Thriller, Sci-Fi",TV,24,Spring 2011,White Fox,Visual novel,PG-13 - Teens 13 or older,1134756
14963,38524,Shingeki no Kyojin Season 3 Part 2,9.1,"Action, Drama, Fantasy, Military, Mystery, Shounen, Super Power",TV,10,Spring 2019,Wit Studio,Manga,R - 17+ (violence & profanity),906824
6474,11061,Hunter x Hunter (2011),9.1,"Action, Adventure, Fantasy, Shounen, Super Power",TV,148,Fall 2011,Madhouse,Manga,PG-13 - Teens 13 or older,1094486
9886,28851,Koe no Katachi,9.0,"Drama, School, Shounen",Movie,1,Unknown,Kyoto Animation,Manga,PG-13 - Teens 13 or older,1151644
11308,32281,Kimi no Na wa.,8.96,"Romance, Supernatural, School, Drama",Movie,1,Unknown,CoMix Wave Films,Original,PG-13 - Teens 13 or older,1462143
3537,4181,Clannad: After Story,8.96,"Slice of Life, Comedy, Supernatural, Drama, Romance",TV,24,Fall 2008,Kyoto Animation,Visual novel,PG-13 - Teens 13 or older,641323
2656,2904,Code Geass: Hangyaku no Lelouch R2,8.91,"Action, Military, Sci-Fi, Super Power, Drama, Mecha",TV,25,Spring 2008,Sunrise,Original,R - 17+ (violence & profanity),1060730
11624,32935,Haikyuu!!: Karasuno Koukou vs. Shiratorizawa Gakuen Koukou,8.87,"Comedy, Sports, Drama, School, Shounen",TV,10,Fall 2016,Production I.G,Manga,PG-13 - Teens 13 or older,619835
14306,37510,Mob Psycho 100 II,8.84,"Action, Slice of Life, Comedy, Supernatural",TV,13,Winter 2019,Bones,Web manga,PG-13 - Teens 13 or older,623709


Now let's combine our recommendation result from unsupervised knn with supervised training model to see the accuracy result.

In [177]:
evaluate_unsupervised(recommend_item_based_cosine)

Precision for user 343191 is: 1.0
Precision for user 259584 is: 0.9
Precision for user 222635 is: 1.0
Precision for user 337507 is: 0.9
Precision for user 214123 is: 1.0
