# NMF - No watch times
No watch times - only adjacency **user - (tag) - author** matrix.

## Init

In [1]:
import numpy as np
import pandas as pd
from scipy import sparse
from functools import partial

import plotly.express as px


# import implicit # Matrix Factorization

from sklearn.decomposition import NMF
from sklearn.model_selection import KFold
from cmfrec import CMF_implicit

In [2]:
import sys
sys.path.append("../")
from src.data_preprocessing import TrainTestGenerator
from src.evaluator import Evaluator

In [3]:
show_ploty = False

In [4]:
data_dir = "../data/"
data_generator = TrainTestGenerator(data_dir)

## Model

In [5]:
# Model wrapper

class CMF_recommender:
    def __init__(self, k=32):
        self.model = CMF_implicit(
            # verbose=False,
            # method="als",
            k=k,
            nonneg=True,
            random_state=1,
            niter=100,
            # lambda_=1e+1
        )

    def fit(self, data: pd.DataFrame):
        data = data.copy()
        data["weight"] = 1  # Binary adjacency matrix (no weights)
        data = data.rename(columns={
            "userID": "UserId",
            "artistID": "ItemId",
            "weight": "Rating"
        })
        self.model.fit(data)

    def recommend(self, user_id, n):
        recommendations = self.model.topN(user_id, n=n)
        return recommendations

## Evaluation

In [6]:
# Evaluator (forward chaining)

evaluator = Evaluator(CMF_recommender, data_generator)
evaluator.evaluate()

evaluator.save_results("../results/cmf_binary_ranks.csv", "../results/cmf_binary_times.csv")

In [7]:
# Hit Rate

evaluator.get_hit_rates()

Unnamed: 0,cases,5,10,25,50,500
2008,4556,0.019974,0.031826,0.052239,0.070457,0.321115
2009,4687,0.018349,0.028803,0.050779,0.075741,0.255814
2010,6133,0.021197,0.032774,0.060166,0.104027,0.277841
2011,1129,0.059345,0.093003,0.154119,0.210806,0.431355


In [8]:
# Mean Reciprocal Rank

evaluator.get_mrr()

Unnamed: 0,cases,mrr
2008,2608,0.027396
2009,3086,0.020595
2010,4306,0.023603
2011,878,0.060311


In [9]:
# Times

evaluator.get_times()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
task,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
model_fit,4.0,2.398635,0.499831,1.913306,2.020709,2.351856,2.729782,2.977525
model_init,4.0,2.4e-05,9e-06,1.5e-05,1.7e-05,2.4e-05,3.1e-05,3.4e-05
recommend_user,2622.0,0.000693,0.000377,0.000337,0.000515,0.000654,0.000795,0.008431


In [10]:
evaluator.get_fit_per_year_times()

Unnamed: 0_level_0,tag,time
task,Unnamed: 1_level_1,Unnamed: 2_level_1
model_fit,model_fit_2008,1.913306
model_fit,model_fit_2009,2.056509
model_fit,model_fit_2010,2.647202
model_fit,model_fit_2011,2.977525
