In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [9]:
# Import libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
import operator
import scipy as sp

In [3]:
# Get data

anime_df = pd.read_csv("/content/drive/MyDrive/anime.csv")
rating_df = pd.read_csv("/content/drive/MyDrive/rating.csv")

In [4]:
# Merging the data

merged_table = rating_df.merge(anime_df, left_on = 'anime_id', right_on = 'anime_id', suffixes= ['_user', ''])
merged_table= merged_table[merged_table.user_id <=20000]

merged_table=merged_table[['user_id', 'name', 'rating_user']]
merged_table.head()

Unnamed: 0,user_id,name,rating_user
0,1,Naruto,-1
1,3,Naruto,8
2,5,Naruto,6
3,6,Naruto,-1
4,10,Naruto,-1


In [6]:
merged_table.head()

Unnamed: 0,user_id,name,rating_user
0,1,Naruto,-1
1,3,Naruto,8
2,5,Naruto,6
3,6,Naruto,-1
4,10,Naruto,-1


In [7]:
# Creating a pivot table which helps in defining the similarity between users and predict.
pivot = merged_table.pivot_table(index=['user_id'], columns=['name'], values='rating_user')
pivot.head()

name,&quot;0&quot;,&quot;Bungaku Shoujo&quot; Kyou no Oyatsu: Hatsukoi,&quot;Bungaku Shoujo&quot; Memoire,&quot;Bungaku Shoujo&quot; Movie,&quot;Eiji&quot;,.hack//G.U. Returner,.hack//G.U. Trilogy,.hack//G.U. Trilogy: Parody Mode,.hack//Gift,.hack//Intermezzo,...,lilac (bombs Jun Togawa),makemagic,s.CRY.ed,vivi,xxxHOLiC,xxxHOLiC Kei,xxxHOLiC Movie: Manatsu no Yoru no Yume,xxxHOLiC Rou,xxxHOLiC Shunmuki,◯
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,2.0,,,,,


In [11]:
# Normalizing data by subtracting average rating

normalized_data = pivot.apply(lambda x: (x-np.mean(x))/(np.max(x)-np.min(x)), axis=1)

# Users with 0 rating is dropped from pivot

normalized_data.fillna(0, inplace=True)
normalized_data = normalized_data.T
normalized_data = normalized_data.loc[:, (normalized_data != 0).any(axis=0)]

# data in a sparse matrix format to be read by the following functions

p_sparse = sp.sparse.csr_matrix(normalized_data.values)

# Using cosine similarity to find similarity value for each user/item

item_sim = cosine_similarity(p_sparse)
user_sim = cosine_similarity(p_sparse.T)

# Changing matrix to DF

item_sim_df = pd.DataFrame(item_sim, index = normalized_data.index, columns = normalized_data.index)
user_sim_df = pd.DataFrame(user_sim, index = normalized_data.columns, columns = normalized_data.columns)


In [12]:
# Top similar animes

def top_animes(anime_name):
    count = 1
    print('Users who watch {} also like:\n'.format(anime_name))
    for item in item_sim_df.sort_values(by = anime_name, ascending = False).index[1:11]:
        print('{}'.format( item))
        count +=1  

# Top similar users

def top_users(user):
    
    if user not in normalized_data.columns:
        return('Unavailable data for user {}'.format(user))
    
    print('Most Similar Users:\n')
    sim_values = user_sim_df.sort_values(by=user, ascending=False).loc[:,user].tolist()[1:11]
    sim_users = user_sim_df.sort_values(by=user, ascending=False).index[1:11]
    zipped = zip(sim_users, sim_values,)
    for user, sim in zipped:
        print('User #{0}, Similarity value: {1:.2f}'.format(user, sim))

In [13]:
top_animes('Naruto')

Users who watch Naruto also like:

Bleach
Dragon Ball
Dragon Ball Z
Fairy Tail
Death Note
Ao no Exorcist
Fullmetal Alchemist
D.Gray-man
Soul Eater
Dragon Ball GT


In [14]:
top_users(3)

Most Similar Users:

User #2277, Similarity value: 0.50
User #4647, Similarity value: 0.45
User #3225, Similarity value: 0.41
User #13143, Similarity value: 0.40
User #15384, Similarity value: 0.35
User #6563, Similarity value: 0.34
User #1038, Similarity value: 0.33
User #1406, Similarity value: 0.33
User #17866, Similarity value: 0.33
User #3028, Similarity value: 0.32
