# 1. Importing Libraries

In [None]:
import os 
import pandas as pd
import seaborn as sns
import numpy as np

from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import LabelBinarizer, MultiLabelBinarizer, MinMaxScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix
from sklearn.decomposition import PCA




* #### Loading Datasets

In [None]:
usecols = ["MAL_ID", "Name", "Score", "Genres", "Type", "Episodes", "Premiered",
           "Studios", "Source", "Rating", "Members"]
anime_recom =pd.read_csv('../input/anime-recommendation-database-2020/anime.csv',usecols=usecols)
rating_data = pd.read_csv('../input/anime-recommendation-database-2020/rating_complete.csv')
anime = pd.read_csv('../input/anime-recommendation-database-2020/anime.csv',low_memory=True)

In [None]:
anime.rename(columns = {'MAL_ID':'anime_id'},inplace=True)
rating_data.rename(columns = {'MAL_ID':'anime_id'},inplace=True)

# 2. Recommendation

* #### Function to remove Unknown values from Columns

In [None]:
def process_multilabel(series):
    series = series.split(",")
    if "Unknown" in series:
        series.remove("Unknown")
    return series

anime_recom["Genres"] = anime_recom["Genres"].map(process_multilabel)
anime_recom["Studios"] = anime_recom["Studios"].map(process_multilabel)
anime_recom["Score"] = anime_recom["Score"].replace("Unknown", 0).astype(float)
anime_recom["Episodes"] = anime_recom["Episodes"].replace("Unknown", 0).astype(int)
anime_recom.head()

### 2.1 Feature Extraction and Feature Engineering

In [None]:
def preprocessing_category(df, column, is_multilabel=False):
    
    # Binarise labels
    lb = LabelBinarizer()
    if is_multilabel:
        lb = MultiLabelBinarizer()

    expandedLabelData = lb.fit_transform(df[column])
    labelClasses = lb.classes_

    # Create a pandas.DataFrame from our output
    category_df = pd.DataFrame(expandedLabelData, columns=labelClasses)
    del df[column]
    return pd.concat([df, category_df], axis=1)

anime_metadata = anime_recom.copy()
anime_metadata = preprocessing_category(anime_metadata, "Type")
anime_metadata = preprocessing_category(anime_metadata, "Premiered")
anime_metadata = preprocessing_category(anime_metadata, "Studios", is_multilabel=True)
anime_metadata = preprocessing_category(anime_metadata, "Source")
anime_metadata = preprocessing_category(anime_metadata, "Rating")

Genres = anime_metadata["Genres"]
ID_NAME = anime_metadata[["MAL_ID", "Name"]]

#Deleting Repeated Columns from the Dataframe
del anime_metadata["Genres"]
del anime_metadata["MAL_ID"]
del anime_metadata["Name"]
del anime_metadata["Unknown"]

#### 2.2 Normalization


In [None]:
anime_metadata[["Score", "Episodes", "Members"]] = MinMaxScaler().fit_transform(anime_metadata[["Score", "Episodes", "Members"]])
anime_metadata = anime_metadata.values

* #### Extracting  Keywords from the Dataframe

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfv = TfidfVectorizer(min_df=3,  max_features=None, 
            strip_accents='unicode', analyzer='word',token_pattern=r'\w{1,}',
            ngram_range=(1, 3),
            stop_words = 'english')

# Filling NaNs with empty string
genres_original = anime_recom['Genres'].fillna('').astype(str)
genres_vector_tf_idf = tfv.fit_transform(genres_original)

genres_vector_one_hot = preprocessing_category(pd.DataFrame(Genres), "Genres", True).values

In [None]:
print("anime_metadata.shape:", anime_metadata.shape)
print("genres_vector_tf_idf.shape:", genres_vector_tf_idf.shape)
print("genres_vector_one_hot.shape:", genres_vector_one_hot.shape)

# 3.1.  Content Based Recommendation using KNN

* #### Creating a Recommendation function using KNN Model

In [None]:
def get_recommended(vector, query_index, n_neighbors=10):
    model_knn = NearestNeighbors(metric='cosine', n_neighbors=n_neighbors)
    model_knn.fit(csr_matrix(vector))

    distances, indices = model_knn.kneighbors(vector[query_index,:].reshape(1, -1), n_neighbors = n_neighbors)
    result = []
    for i in range(0, len(distances.flatten())):
        index = indices.flatten()[i]
        if index == query_index:
            continue
        result.append(anime_recom.iloc[index])
        
    return pd.DataFrame(result)

* #### Select Anime Id on the basis of which recommendations will be done

In [None]:
query_index = ID_NAME[ID_NAME.MAL_ID == 5231].index[0]
anime_recom.iloc[[query_index]]

* ####  **Based on Type and Studios**

In [None]:
get_recommended(anime_metadata, query_index, 10)

* ####  **Based on Keywords of the anime**

In [None]:
get_recommended(genres_vector_tf_idf, query_index, 10)

* ####  **Based on Genres**

In [None]:
get_recommended(genres_vector_one_hot, query_index, 10)

* ####  **Based on all Aspects**

In [None]:
all_data = np.concatenate((anime_metadata, genres_vector_tf_idf.todense(), genres_vector_one_hot), axis=1)
all_data.shape

In [None]:
get_recommended(all_data, query_index, 10)

* ####  **Based on Top Features**

In [None]:
reduced_all_data = PCA(n_components=250).fit_transform(all_data)
get_recommended(reduced_all_data, query_index, 10)

# 3.2. Recommendations using Collaborative Filtering

* #### Preparing Data for Collaborating Filtering

In [None]:
users_count = rating_data.groupby("user_id").size().reset_index()
users_count.columns = ["user_id", "anime_count"]

print(users_count.shape)

filtered_users = users_count[users_count.anime_count >= 300]
users = set(filtered_users.user_id)

print(len(users))

In [None]:
rating_data = rating_data[rating_data.user_id.isin(users)]
print ("rating_data.shape:", rating_data.shape)
print (rating_data.info())

In [None]:
unique_users = {int(x): i for i,x in enumerate(rating_data.user_id.unique())}
unique_items = {int(x): i for i,x in enumerate(anime_recom.MAL_ID.unique())}
print(len(unique_items), len(unique_users))

anime_collabolative_filter = np.zeros((len(unique_items), len(unique_users)))

for user_id, anime_id, rating in rating_data.values:
    anime_collabolative_filter[unique_items[anime_id], unique_users[user_id]] = rating

* #### Recommendation based on Colaborative Filtering

In [None]:
get_recommended(anime_collabolative_filter, query_index, 10)

# 3.3. Recommendation based on Similarity

In [None]:
df = pd.merge(rating_data,anime[["anime_id","Name"]], left_on = "anime_id", right_on = "anime_id").drop("anime_id", axis = 1)
df.head()

In [None]:
count_rating = df.groupby("Name")["rating"].count().sort_values(ascending = False)
count_rating

* #### Keeping only the animes with at least r ratings in the DataFrame

In [None]:
r = 10000
more_than_r_ratings = count_rating[count_rating.apply(lambda x: x >= r)].index

df_r = df[df['Name'].apply(lambda x: x in more_than_r_ratings)]

In [None]:
df_r.info()

* #### Creating a pivot table for recommendation

In [None]:
df_recom = df_r.pivot_table(index='user_id',columns='Name',values='rating')
df_recom.iloc[:5,:5]

In [None]:
df_r.Name.value_counts().head(10)


* #### Creating a function to get the correlation of one anime with others.
    


In [None]:
def find_corr(df, name):

    similar_to_movie = df.corrwith(df[name])
    similar_to_movie = pd.DataFrame(similar_to_movie,columns=['Correlation'])
    similar_to_movie = similar_to_movie.sort_values(by = 'Correlation', ascending = False)
    return similar_to_movie

* #### Arguments to the function are :
    1.   ####   df (DataFrame):  with user_id as rows and movie titles as column and ratings as values
    1.   ####   name (str): Name of the anime
    
* #### And it Returns a DataFrame with the correlation of the anime with all others
        

 ## Choose an Anime 

In [None]:
# Let's choose an anime
anime1 = 'Naruto'

# Let's try with "Death Note"

# Recommendations
find_corr(df_recom, anime1).head(20)

* ### Not Recommended

In [None]:
find_corr(df_recom, anime1).tail(10)