# Building an Anime Recommender System

In [1]:
import datasets, utils
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.sparse import csr_matrix, vstack

MY_ID = 156618

In [2]:
animes = pd.read_csv('data/AnimeList.csv')
anime_dict = [{f'{a.anime_id}': a.title} for i,a in animes.iterrows()]
anime_ids = animes.sort_values(by="anime_id").anime_id.unique()

In [None]:
size = 3e6
reader = pd.read_csv('data/UserAnimeList.csv', chunksize = size)

chunk_list = []

# Each chunk is in dataframe format
for chunk in reader:  
    chunk = chunk[['username', 'anime_id', 'my_score']]
    chunk = chunk[chunk.my_score != 0]
    included_ids = chunk.anime_id.unique()
    
    # Transform Dataframe to (N=#users x K=#animes) matrix, with scores as values
    chunk = chunk.pivot(index="username", columns="anime_id", values="my_score")
    
    # Add dimensions so that we can easily vstack
    missing_cols = sorted(set(anime_ids) - set(included_ids))
    chunk = chunk.reindex(columns = chunk.columns.tolist() + missing_cols)
    
    chunk = csr_matrix(chunk.fillna(0))  # Convert to SciPy sparse matrix
    
    chunk_list.append(chunk)

In [None]:
X = vstack(chunk_list, format="csr")

In [None]:
from sklearn.decomposition import NMF
model = NMF(n_components=7, init='nndsvda', shuffle=True)
W = model.fit_transform(X)
H = model.components_

In [None]:
H.shape