# Building an Anime Recommender System

In [1]:
import datasets, utils
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.sparse import csr_matrix, vstack

In [2]:
animes = pd.read_csv('data/AnimeList.csv')
anime_dict = {a.anime_id: a.title for i,a in animes.iterrows()}

# If an English title exists. Replace the original title.
for i,a in animes.iterrows():
    if pd.isna(a.title_english):
        anime_dict[a.anime_id] = a.title_english
        
anime_ids = animes.sort_values(by="anime_id").anime_id.unique()

In [3]:
size = 3e6
reader = pd.read_csv('data/UserAnimeList.csv', chunksize = size)

chunk_list = []
userlist = []

# Each chunk is in dataframe format
for chunk in reader:  
    chunk = chunk[['username', 'anime_id', 'my_score']]
    chunk = chunk[chunk.my_score != 0]
    
    # Transform Dataframe to (N=#users x K=#animes) matrix, with scores as values
    chunk = chunk.pivot(index="username", columns="anime_id", values="my_score")
    
    # Add dimensions so that we can easily vstack
    chunk = chunk.reindex(columns = anime_ids)
    
    # Extract userlist
    userlist = userlist + chunk.index.tolist()
    
    chunk = csr_matrix(chunk.fillna(0))  # Convert to SciPy sparse matrix
    
    chunk_list.append(chunk)

In [4]:
X = vstack(chunk_list, format="csr")

In [5]:
from sklearn.decomposition import NMF
model = NMF(n_components=7, init='nndsvda', shuffle=True)
W = model.fit_transform(X)
H = model.components_

In [None]:
PYTHON_ID = ul.index('Manuel') -2

prediction = pd.Series(np.dot(W[PYTHON_ID],H), index=anime_ids)
_, watched = X[PYTHON_ID].nonzero()
watched = anime_ids[watched]
prediction = prediction.drop(list(watched))  # watched has python index notation!

for i in prediction.sort_values(ascending=False).head(50).index:
    print(anime_dict[i])

In [44]:
from sklearn.manifold import TSNE
from sklearn.decomposition import TruncatedSVD
svd = TruncatedSVD(n_components=7, n_iter=7)
X_trans = svd.fit_transform(X)

In [None]:
X_embedded = TSNE(n_components=2).fit_transform(X_trans)

In [None]:
plt.scatter(X_embedded[0,:], X_embedded[1,:])