## Anime Collaborative Filtering Example

https://www.kaggle.com/CooperUnion/anime-recommendations-database

In [None]:
from fastai.collab import *
from fastai.tabular import *

In [None]:
ratings = pd.read_csv("popular_ratings.csv")
print(ratings.shape)

In [None]:
ratings.head()

In [None]:
animes = pd.read_csv("popular_animes.csv")
print(animes.shape)

In [None]:
animes.head()

In [None]:
data = CollabDataBunch.from_df(
    ratings,
    seed=24,
    user_name = "user_id",
    item_name = "name",
    rating_name = "rating",
    bs=128
)

In [None]:
data.show_batch()

In [None]:
data

In [None]:
y_range = [0, 10.5]
learn = collab_learner(data, n_factors=40, y_range=y_range, wd=2e-2)

In [None]:
print(learn.loss_func)
print(learn.model)

In [None]:
learn.lr_find()
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(20, 1e-2)

In [None]:
learn.save('dotprod')

## Interpretation

In [None]:
learn.load('dotprod');

### Movie Bias

In [None]:
#animes is sorted by descending number of ratings.
animes_sorted = animes.sort_values(by=['calculated_average_rating'], ascending=False)
top_animes = animes_sorted.name.values
animes_sorted = animes_sorted.assign(bias = learn.bias(top_animes, is_item=True))

In [None]:
(animes_sorted[["calculated_average_rating", "bias"]]
.plot(x="calculated_average_rating", y="bias", kind="scatter"))

## Movie Weights

In [None]:
anime_weights = learn.weight(top_animes, is_item=True)
animes_sorted = animes_sorted.assign(weights = anime_weights)

In [None]:
anime_pca = anime_weights.pca(3)
anime_pca.shape

In [None]:
fac0,fac1,fac2 = anime_pca.t()

In [None]:
animes_sorted = animes_sorted.assign(
    factor0 = anime_pca.t()[0],
    factor1 = anime_pca.t()[1],
    factor2 = anime_pca.t()[2]
)

In [None]:
for t in (True, False):
    for i in ("factor0", "factor1", "factor2"):
        print(animes_sorted[["name", "bias", "factor0", "factor1", "factor2"]]
         .sort_values(by=[i], ascending=t)
         .head())

In [None]:
n = 50
idxs = list(range(n//2)) + list(range(len(top_animes)-n//2, len(top_animes)))
X = fac0[idxs]
Y = fac1[idxs]
plt.figure(figsize=(15,15))
plt.scatter(X, Y)
for i, x, y in zip(top_animes[idxs], X, Y):
    plt.text(x,y,i, color=np.random.rand(3)*0.7, fontsize=11)
plt.show()

In [None]:
!jupyter nbconvert anime-collab-filter.ipynb --to html --output 2019-07-17_anime-collab-filter