# Non-negative Matrix Factorization (CMF)
## Init

In [1]:
import numpy as np
import pandas as pd
from scipy import sparse
from functools import partial

import plotly.express as px


# import implicit # Matrix Factorization

from sklearn.decomposition import NMF
from sklearn.model_selection import KFold
from cmfrec import CMF_implicit

In [2]:
import sys
sys.path.append("../")
from src.data_preprocessing import TrainTestGenerator
from src.evaluator import Evaluator

In [3]:
show_ploty = False

In [4]:
data_dir = "../data/"
data_generator = TrainTestGenerator(data_dir)

## Model

In [5]:
# Model wrapper

class CMF_recommender:
    def __init__(self, k=32):
        self.model = CMF_implicit(
            # verbose=False,
            # method="als",
            nonneg=True,
            k=k,
            random_state=1,
            niter=100,
            # lambda_=1e+1
        )

    def fit(self, data: pd.DataFrame):
        data = data.copy()
        data = data.rename(columns={
            "userID": "UserId",
            "artistID": "ItemId",
            "weight": "Rating"
        })
        self.model.fit(data)

    def recommend(self, user_id, n):
        recommendations = self.model.topN(user_id, n=n)
        return recommendations

## Evaluation

In [6]:
# Evaluator (forward chaining)

evaluator = Evaluator(CMF_recommender, data_generator)
evaluator.evaluate()

evaluator.save_results("../results/cmf_nonneg_ranks.csv", "../results/cmf_nonneg_times.csv")

In [7]:
# Hit Rate

evaluator.get_hit_rates()

Unnamed: 0,cases,5,10,25,50,500
2008,4556,0.006804,0.013169,0.025461,0.039508,0.267998
2009,4687,0.006187,0.012375,0.024109,0.041604,0.226371
2010,6133,0.009946,0.018262,0.044676,0.068645,0.295125
2011,1129,0.034544,0.048716,0.088574,0.13729,0.376439


In [8]:
# Mean Reciprocal Rank

evaluator.get_mrr()

Unnamed: 0,cases,mrr
2008,2608,0.013239
2009,3086,0.009615
2010,4306,0.015174
2011,878,0.034374


In [9]:
# Times

evaluator.get_times()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
task,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
model_fit,4.0,4.622826,0.443652,4.167398,4.28204,4.638031,4.978817,5.047843
model_init,4.0,2.4e-05,1.2e-05,1.5e-05,1.8e-05,2e-05,2.6e-05,4.2e-05
recommend_user,2622.0,0.000767,0.000396,0.000374,0.000604,0.000728,0.000853,0.011483


In [10]:
evaluator.get_fit_per_year_times()

Unnamed: 0_level_0,tag,time
task,Unnamed: 1_level_1,Unnamed: 2_level_1
model_fit,model_fit_2008,4.167398
model_fit,model_fit_2009,4.320253
model_fit,model_fit_2010,5.047843
model_fit,model_fit_2011,4.955809
