In [1]:
import pandas as pd
import numpy as np
import scipy as sp
from sklearn.metrics.pairwise import cosine_similarity
import operator

In [2]:
# load anime dataset
anime_df = pd.read_csv('anime.csv')
anime_df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [3]:
# load rating dataset
rating_df = pd.read_csv('rating.csv')
rating_df.head()

Unnamed: 0,user_id,anime_id,rating
0,1,20,-1
1,1,24,-1
2,1,79,-1
3,1,226,-1
4,1,241,-1


# Data preprocessing

Before building the recommendation system, we need to preprocess the data, which includes handling missing values and drop duplicated items.

In [35]:
# to make computation faster, we will only use the first 35000 users
rating_df = rating_df[rating_df['user_id'] <= 9000]

In [36]:
# preprocess the rating dataset, mark -1 as NaN
rating_df['rating'] = rating_df['rating'].replace(-1, np.nan)

In [37]:
# delete duplicate items
rating_df = rating_df.drop_duplicates(['user_id', 'anime_id'])

In [38]:
# create a user-item interaction matrix
user_item_matrix = rating_df.pivot(index='user_id', columns='anime_id', values='rating')

# User-based Collaborative Filtering

To implement user-based collaborative filtering, we need to calculate the similarity between users based on their ratings. We can use similarity metrics such as cosine similarity or Pearson correlation.

In [39]:
# calculate user similarity using cosine similarity
user_sim = cosine_similarity(user_item_matrix.fillna(0))

To make recommendations for a target user, we identify users similar to the target user and recommend anime that they have rated positively.

In [45]:
def user_based_recommendations(user_id,user_item_matrix,user_sim,n=5):
    # get similarity scores for the user
    user_scores = user_sim[user_id - 1]

    # sort users based on their similarity score in descending order
    sorted_user_scores = sorted(enumerate(user_scores), key=lambda x: x[1], reverse=True)

    # Initialize empty list to store the recommendations
    recommendations = []

    for user,score in sorted_user_scores[1:]: # exclude the user itself
        # get anime that user rated positively
        anime_rated = user_item_matrix.loc[user_item_matrix.index[user -1]]
        positive_anime_rated = anime_rated[anime_rated >= 6].index

        # exclude anime that the user already rated
        target_anime = user_item_matrix.loc[user_id].dropna().index
        recommendations.extend(set(positive_anime_rated) - set(target_anime))

        # limit the number of recommendations to n
        if len(recommendations) >= n:
            break

    return recommendations[:n]

# Item based Collaborative filtering

To implement item-based collaborative filtering, we calculate the similarity between anime based on user ratings.

In [41]:
# calculate item similarity using cosine similarity
item_sim = cosine_similarity(user_item_matrix.fillna(0).T)

To make item-based recommendations, we identify anime similar to those the user has already rated positively.

In [46]:
def item_based_recommendations(user_id,user_item_matrix,item_sim,n=5):
    # get the anime that the user rated positively
    positively_rated_anime = user_item_matrix.loc[user_id][user_item_matrix.loc[user_id] >= 6].index

    # Initialize empty list to store the recommendations
    recommended_animes = []

    for anime_id in positively_rated_anime:
        # get anime that are similar to the positively rated anime
        similar_animes = item_sim[anime_id - 1]

        # sort anime based on their similarity score in descending order
        sorted_similar_animes = sorted(enumerate(similar_animes), key=lambda x: x[1], reverse=True)

        for anime,score in sorted_similar_animes[1:]:
            # exclude anime that the user already rated
            if anime not in user_item_matrix.loc[user_id].dropna().index:
                recommended_animes.append(anime)

            # limit the number of recommendations to n
            if len(recommended_animes) >= n:
                break
    
    return recommended_animes[:n]

In [56]:
# choose a user id for recommendation test
user_id = 9

# test user-based recommendation
user_recommendation = user_based_recommendations(user_id,user_item_matrix,user_sim,n=5)

# test item-based recommendation
item_recommendation = item_based_recommendations(user_id,user_item_matrix,item_sim,n=5)

In [57]:
# display the recommendations
print(f"User-based recommendation for user {user_id}:")
for anime_id in user_recommendation:
    anime_name = anime_df.loc[anime_df['anime_id'] == anime_id]['name'].values[0]
    print(f" - {anime_name}")

print(f"\nItem-based recommendation for user {user_id}:")
for anime_id in item_recommendation:
    anime_name = anime_df.loc[anime_df['anime_id'] == anime_id]['name'].values[0]
    print(f" - {anime_name}")

User-based recommendation for user 9:
 - Campione!: Matsurowanu Kamigami to Kamigoroshi no Maou
 - Yamada-kun to 7-nin no Majo (TV)
 - Re:Zero kara Hajimeru Isekai Seikatsu
 - Haiyore! Nyaruko-san
 - Ore ga Ojousama Gakkou ni &quot;Shomin Sample&quot; Toshite Gets♥Sareta Ken

Item-based recommendation for user 9:
 - Sazae-san
 - Gad Guard
 - Naruto Movie 1: Dai Katsugeki!! Yuki Hime Shinobu Houjou Dattebayo!
 - Shin Tenchi Muyou!
 - Shinkon Gattai Godannar!! 2nd Season
