In [None]:
%reload_ext autoreload
%autoreload 2

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
import numpy as np
import pandas as pd
from scipy import sparse
from admm_slim import DenseSlim

## Use benchmark data
- Download from kaggle, unzip, and place rating.csv to data directory.
- URL - https://www.kaggle.com/CooperUnion/anime-recommendations-database

In [None]:
anime_rating = pd.read_csv("./data/rating.csv")
anime_rating = anime_rating[anime_rating.rating > 0]

In [None]:
# filter rare users/animes
user_counts = anime_rating.user_id.value_counts()
item_counts = anime_rating.anime_id.value_counts()

target_users = user_counts.index[user_counts >= 10]
target_items = item_counts.index[item_counts >= 100]

anime_rating = anime_rating[
    (anime_rating.user_id.isin(target_users)) &
    (anime_rating.anime_id.isin(target_items))
]

In [None]:
users = anime_rating.user_id.unique().tolist()
animes = anime_rating.anime_id.unique().tolist()

user_id_dict = dict([(v, index) for index, v in enumerate(users)])
anime_id_dict = dict([(v, index) for index, v in enumerate(animes)])

In [None]:
user_ids = [user_id_dict[v] for v in anime_rating.user_id]
anime_ids = [anime_id_dict[v] for v in anime_rating.anime_id]

In [None]:
data = np.ones(len(anime_rating))
coodinates = (user_ids, anime_ids)
X = sparse.coo_matrix((data, coodinates), dtype=np.int8)

In [None]:
X

## Fit Dense SLIM model

In [None]:
model = DenseSlim(lambda_2=1000)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test = train_test_split(X, random_state=123)

In [None]:
X_train.shape, X_test.shape

In [None]:
model.fit(X_train)

In [None]:
n_target_user = 1000

In [None]:
recommended = model.recommend(X_test.tocsr()[:n_target_user], top=20)

In [None]:
def evaluate_score(X, recommended):
    hits, recalls, APs = [], [], []

    X_csr = X.tocsr()
    positives = X_csr.sum(axis=1).A1
    
    for irow, n_positive in enumerate(positives):
        recommended_row = recommended[irow]

        # 対象行で非ゼロの要素を抽出する
        start = X_csr.indptr[irow]
        end = X_csr.indptr[irow + 1]
        flags = np.isin(recommended_row, X_csr.indices[start:end])

        n_hit = sum(flags)
        hits.append(any(flags))
        recalls.append(n_hit / n_positive)
        precisions = np.cumsum(flags) / (np.arange(len(flags)) + 1)
        APs.append(np.mean(precisions))

        
    stats = {
        'hit': np.mean(hits),
        'recall': np.mean(recalls),
        'MAP': np.mean(APs),
    }
    return stats

In [None]:
evaluate_score(X_test.tocsr()[:n_target_user], recommended)