# Anime Recommendation System using Nearest Neighbors

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)

# Load the datasets

* 2 different datasets will be loaded in to dataframes 
* Dataset can be downloaded in https://www.kaggle.com/CooperUnion/anime-recommendations-database

In [None]:
anime = pd.read_csv('../input/anime-recommendations-database/anime.csv')
rating = pd.read_csv('../input/anime-recommendations-database/rating.csv')

### anime.csv

* anime_id - myanimelist.net's unique id identifying an anime.
* name - full name of anime.
* genre - comma separated list of genres for this anime.
* type - movie, TV, OVA, etc.
* episodes - how many episodes in this show. (1 if movie).
* rating - average rating out of 10 for this anime.
* members - number of community members that are in this anime's "group".

### rating.csv

* user_id - non identifiable randomly generated user id.
* anime_id - the anime that this user has rated.
* rating - rating out of 10 this user has assigned (-1 if the user watched it but didn't assign a rating).

In [None]:
print('anime.csv (shape):',anime.shape)
print('rating.csv (shape):',rating.shape)

In [None]:
anime.head()

In [None]:
rating.head()

In [None]:
# checking for null values

anime.isnull().sum()

In [None]:
# filling all anime without rating with 0

anime.fillna({'rating':0},inplace=True)

Exploratory data analysis is on the other notebook. (Anime Recommendation using Pearson r correlation.)

# Collaborative Filtering using Nearest Neighbors

<br>

```
* In this recommendation system, we will be utilizing the collaborative filtering technique.
* By using this technique, the system will recommend anime based on the nearest rating between the ratings of 
  user's anime and the ratings of other anime.
* For example, I watched 10 anime and gave each of them a rating. Now, my friend watched an anime from my 
  anime list and now asks me to recommend three anime. With that, I will recommend three anime with closest 
  rating to the rating I gave for the anime that my friend watched.
```

### Process

<br>

```
* Remove anime with low count of ratings and users who gave low count of ratings
* Construct Rating Matrix
* Convert rating matrix to csr matrix to save memory
* Fit the csr rating matrix into nearest neighbor
* Retrieve ten nearest neighbor
* Output ten recommended anime
```


### Remove anime with low count of ratings and users who gave low count of ratings

* We will only consider popular anime (rating count over 250) and users who gave lots of rating on different anime (>100)

In [None]:
anime_rating_count = rating.groupby(by='anime_id').count()['rating'].reset_index().rename(columns={'rating':'rating_count'})
anime_rating_count['rating_count'].describe()

In [None]:
filtered_anime = anime_rating_count[anime_rating_count['rating_count']>250]

In [None]:
# anime with over 250 rating count

filtered_anime.head()

In [None]:
user_rating_count = rating.groupby(by='user_id').count()['rating'].reset_index().rename(columns={'rating':'rating_count'})
user_rating_count['rating_count'].describe()

In [None]:
# users who gave over 100 ratings to different anime

filtered_user = user_rating_count[user_rating_count['rating_count']>100]

In [None]:
filtered_user.head()

In [None]:
filtered_rating_anime = rating[rating['anime_id'].isin(filtered_anime['anime_id'])]
filtered_rating = filtered_rating_anime[filtered_rating_anime['user_id'].isin(filtered_user['user_id'])]

In [None]:
# this dataset now contains popular anime and users wth high rating counts

filtered_rating.head()

### Construct Rating Matrix

* We will construct a matrix by using pivot table wherein anime id will be indexes and user id in columns

In [None]:
# we can see that most of the values are zero since most of the users does not have ratings for every anime

rating_matrix = filtered_rating.pivot_table(index='anime_id',columns='user_id',values='rating').fillna(0)
print(rating_matrix.shape)
rating_matrix.head()

### Convert rating matrix to csr matrix to save memory

In [None]:
from scipy.sparse import csr_matrix
csr_rating_matrix =  csr_matrix(rating_matrix.values)

In [None]:
print(csr_rating_matrix)

### Fit the matrix into nearest neighbor

* We are using unsupervised algorithm nearest neighbor.
* This algorithm will find k nearest data point which will be the recommended anime to watch.
* We will also use cosine similarity as the metric for the algorithm.

In [None]:
from sklearn.neighbors import NearestNeighbors

recommender = NearestNeighbors(metric='cosine')
# fit the csr matrix to the algorithm
recommender.fit(csr_rating_matrix)

### Retrieve ten nearest neighbors

In [None]:
# getting the anime_id of the user's anime

user_anime = anime[anime['name']=='Bleach']
user_anime

In [None]:
user_anime_index = np.where(rating_matrix.index==int(user_anime['anime_id']))[0][0]

# this index is from rating matrix not from the anime dataset
user_anime_index

In [None]:
# getting the ratings based on the index

user_anime_ratings = rating_matrix.iloc[user_anime_index]
user_anime_ratings

In [None]:
# we need to convert this into 2d array (with only 1 row) since the algorithm does not accept 1d array

user_anime_ratings_reshaped = user_anime_ratings.values.reshape(1,-1)
user_anime_ratings_reshaped

In [None]:
# the ratings will be plotted and will return 11 indices and distances of nearest neighbors
# note that these indices are based on the indices of rating matrix

distances, indices = recommender.kneighbors(user_anime_ratings_reshaped,n_neighbors=11)

In [None]:
# indices of nearest neighbors (based on rating matrix)

indices

In [None]:
# distances of nearest neighbors to the user's anime

distances

###  Output ten recommended anime

In [None]:
# the returned indices will be used to get anime id(index) on rating matrix
# these indices are the nearest neighbors
# we are excluding the first element since the first nearest neighbor is itself

nearest_neighbors_indices = rating_matrix.iloc[indices[0]].index[1:]

In [None]:
nearest_neighbors = pd.DataFrame({'anime_id': nearest_neighbors_indices})
pd.merge(nearest_neighbors,anime,on='anime_id',how='left')