# NMF - Non-negative matrix factorization

## Init

In [1]:
import numpy as np
import pandas as pd
from scipy import sparse
from functools import partial

import plotly.express as px
from sklearn.preprocessing import OrdinalEncoder

In [2]:
import sys
sys.path.append("../")
from src.data_preprocessing import TrainTestGenerator
from src.evaluator import Evaluator

In [3]:
data_dir = "../data/"
data_generator = TrainTestGenerator(data_dir)

## NMF code

In [4]:
def update_H(W, H, V):
    numerator = W.T.dot(V)
    denominator = W.T.dot(W).dot(H) + 1e-10
    H = H*(numerator / denominator)
    return H

def update_W(W, H, V):
    numerator = V.dot(H.T)
    denominator = W.dot(H).dot(H.T) + 1e-10
    W = W*(numerator / denominator)
    return W

In [5]:
def do_nnmf(V, rank=10, iter=100):
    
    # Initialize 
    n, m = V.shape
    
    W = np.abs(np.random.randn(1, n, rank))[0]
    H = np.abs(np.random.randn(1, rank, m))[0]
    
    loss = []
    for i in range(iter):
        H = update_H(W, H, V)
        W = update_W(W, H, V)
        
        loss.append(sum((V - W.dot(H)).flatten()**2))

    return H, W, loss

## Model

In [6]:
# Model wrapper

class NMF_recommender:
    def __init__(self, rank=10, iter=100):
        self.rank = rank
        self.iter = iter
        
    def fit(self, data: pd.DataFrame):
        np.random.seed(1)
        
        # Default rankings when userID is not in training set
        self.default_recommendation = data["artistID"].value_counts().index.tolist()

        data = data.copy()
        
        self.user_encoder = OrdinalEncoder()
        self.artist_encoder = OrdinalEncoder()
        
        data[["userID_ordinal"]] = self.user_encoder.fit_transform(
            data[["userID"]].values
        ).astype(int)
        data[["artistID_ordinal"]] = self.artist_encoder.fit_transform(
            data[["artistID"]].values
        ).astype(int)

        X = np.zeros([data["userID"].nunique(), data["artistID"].nunique()])
        X = np.zeros([data["userID"].nunique(), data["artistID"].nunique()])
        X[data["userID_ordinal"], data["artistID_ordinal"]] = 1        

        H, W, loss = do_nnmf(X, self.rank, self.iter)
        self.H = H
        self.W = W

    def recommend(self, user_id, n):
        try: 
            user_idx = self.user_encoder.transform(np.array([[user_id]]))[0,0]
        except ValueError:
            recommendations = self.default_recommendation
        else:
            user_idx = int(user_idx)

            scores = self.W[user_idx, :].dot(self.H)

            recommendations_idx = np.argsort(scores)
            recommendations_idx = recommendations_idx[:n]

            recommendations = self.artist_encoder.inverse_transform(recommendations_idx.reshape(-1, 1))[:, 0]
        
        return recommendations

## Evaluation

In [7]:
# Evaluator (forward chaining)

evaluator = Evaluator(NMF_recommender, data_generator)
evaluator.evaluate()

In [8]:
# Hit Rate

evaluator.get_hit_rates()

Unnamed: 0,cases,5,10,25,50,500
2008,4556,0.018876,0.034021,0.055092,0.089991,0.268876
2009,4687,0.024749,0.041178,0.077022,0.110518,0.308086
2010,6133,0.023969,0.052503,0.083809,0.118213,0.303114
2011,1129,0.009743,0.021258,0.043401,0.065545,0.175376


In [9]:
# Mean Reciprocal Rank

evaluator.get_mrr()

Unnamed: 0,cases,mrr
2008,2608,0.026005
2009,3086,0.028124
2010,4306,0.028787
2011,878,0.013321


In [10]:
# Hit Rate
# rank: 1, iter: 1
evaluator = Evaluator(partial(NMF_recommender, rank=1, iter=1), data_generator)
evaluator.evaluate()

evaluator.get_hit_rates()

Unnamed: 0,cases,5,10,25,50,500
2008,4556,0.019315,0.03446,0.055531,0.09043,0.277875
2009,4687,0.024749,0.041604,0.077662,0.112225,0.315554
2010,6133,0.023969,0.052503,0.083972,0.118213,0.305071
2011,1129,0.009743,0.021258,0.044287,0.069088,0.186005


In [11]:
# # Hit Rate
# # rank: 10, iter: 100
# evaluator = Evaluator(partial(NMF_recommender, rank=20, iter=150), data_generator)
# evaluator.evaluate()

# evaluator.get_hit_rates()