# Using `Implicit` and Alternating Least Squares (ALS) for Matrix Factorization

This follow's Ben Frederickson's [Finding Similar Music using Matrix Factorization](https://www.benfrederickson.com/matrix-factorization/).

## Preliminaries

**Next two lines are for pretty output for all prints in a Pandas cell, not just the last.**

In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

**`DataSci` contains generally helpful data science stuff, while `plotHelpers` includes plot functions specifically.**

In [2]:
import sys
sys.path.append('../../../work/Mlib')
from utility import DataSci as util
from utility import ModelTrain as mt
import plotHelpers as ph

ModuleNotFoundError: No module named 'utility'

### Python imports

In [157]:
import argparse
import codecs
import logging
import time
import sys

import numpy as np
import scipy.sparse as sparse
import itertools
import copy
from sklearn.metrics import mean_squared_error

import tqdm

from implicit.als import AlternatingLeastSquares
from implicit.approximate_als import (AnnoyAlternatingLeastSquares,
                                      FaissAlternatingLeastSquares,
                                      NMSLibAlternatingLeastSquares)
from implicit.evaluation import (precision_at_k, mean_average_precision_at_k,
                                 ndcg_at_k, AUC_at_k, train_test_split)
from implicit.bpr import BayesianPersonalizedRanking
from implicit.datasets.lastfm import get_lastfm
from implicit.datasets.movielens import get_movielens
from implicit.datasets.reddit import get_reddit
from implicit.datasets.sketchfab import get_sketchfab
from implicit.datasets.million_song_dataset import get_msd_taste_profile
from implicit.lmf import LogisticMatrixFactorization
from implicit.nearest_neighbours import (BM25Recommender, CosineRecommender,
                                         TFIDFRecommender, bm25_weight)

### Global dicts

In [4]:
models = {"als":  AlternatingLeastSquares,
          "nmslib_als": NMSLibAlternatingLeastSquares,
          "annoy_als": AnnoyAlternatingLeastSquares,
          "faiss_als": FaissAlternatingLeastSquares,
          "tfidf": TFIDFRecommender,
          "cosine": CosineRecommender,
          "bpr": BayesianPersonalizedRanking,
          "lmf": LogisticMatrixFactorization,
          "bm25": BM25Recommender}

dataSets = {"lastfm": get_lastfm,
            "movielens": get_movielens,
            "reddit": get_reddit,
            "sketchfab": get_sketchfab,
            "million_song": get_msd_taste_profile}

## Functions

### `trainTestSplit()`

In [46]:
# def trainTestSplit(ratings, splitCount, fraction=None):
#     """
#     Stolen from Ethan Rosenthal's Intro to Implicit Matrix Factorization:
#     Classic ALS with Sketchfab Models
#     https://www.ethanrosenthal.com/2016/10/19/implicit-mf-part-1/

#     Split recommendation data into train and test sets

#     In order to track precision@k as an optimization metric, it's necessary to only work with .
#     A k of 5 would be nice. However, if I move 5 items from training to test for some of the users, then they may not have any data left in the training set (remember they had a minimum 5 likes). Thus, the train_test_split only looks for people who have at least 2*k (10 in this case) likes before moving some of their data to the test set. This obviously biases our cross-validation towards users with more likes. So it goes.

#     Params
#     ------
#     ratings : scipy.sparse matrix
#         Interactions between users and items.
#     splitCount : int
#         Number of user-item-interactions per user to move
#         from training to test set.
#     fractions : float
#         Fraction of users to split off some of their
#         interactions into test set. If None, then all
#         users are considered.
#     """

#     # Note: likely not the fastest way to do things below.
#     train = ratings.copy().tocoo()
#     test = sparse.lil_matrix(train.shape)

#     if fraction:
#         try:
#             userIndex = np.random.choice(
#                 np.where(np.bincount(train.row) >= splitCount*2)[0],
#                 replace=False,
#                 size=np.int32(np.floor(fraction*train.shape[0]))
#             ).tolist()
#         except ValueError:
#             print(f"Not enough users with > {2*splitCount} "
#                   f"interactions to obtain a fraction of {fraction}.")
#         print('Try succeeded!')
#     else:
#         userIndex = range(train.shape[0])

#     train = train.tolil()

#     for user in userIndex:
#         testRatings = np.random.choice(ratings.getrow(user).indices,
#                                        size=splitCount,
#                                        replace=False)
#         train[user, testRatings] = 0.0
#         # These are just 1.0 right now
#         test[user, testRatings] = ratings[user, testRatings]

#     # Test and training are truly disjoint
#     assert(train.multiply(test).nnz == 0)
#     return train.tocsr(), test.tocsr(), userIndex

### `calculateMSE()`

In [53]:
# def calculateMSE(model, ratings, userIndex=None):
#     preds = model.predict_for_customers()
#     if userIndex:
#         return mean_squared_error(ratings[userIndex, :].toarray().ravel(),
#                                   preds[userIndex, :].ravel())

#     return mean_squared_error(ratings.toarray().ravel(),
#                               preds.ravel())

### `precisionAtK()`

In [54]:
# def precisionAtK(model, ratings, k=5, userIndex=None):
#     if not userIndex:
#         userIndex = range(ratings.shape[0])
#     ratings = ratings.tocsr()
#     precisions = []
#     # Note: line below may become infeasible for large datasets.
#     predictions = model.predict_for_customers()
#     for user in userIndex:
#         # In case of large dataset, compute predictions row-by-row like below
#         # predictions = np.array([model.predict(row, i) for i in xrange(ratings.shape[1])])
#         topK = np.argsort(-predictions[user, :])[:k]
#         labels = ratings.getrow(user).indices
#         precision = float(len(set(topK) & set(labels))) / float(k)
#         precisions.append(precision)
#     return np.mean(precisions) 

### `calculateSimilarArtists()`

In [58]:
def calculateSimilarArtists(outputFilename, dataset, modelName="als"):
    """
    Generates a list of similar artists in lastfm by utilizing the
    'similar_items' api of the models
    """

    artists, users, plays = fetchDataset(dataset, volubility=2)
    model = getModel(modelName, volubility=2)

    # if we're training an ALS based model, weight input for last.fm
    # by bm25
    if issubclass(model.__class__, AlternatingLeastSquares):
        # lets weight these models by bm25weight.
        logging.debug("weighting matrix by bm25_weight")
        plays = bm25_weight(plays, K1=100, B=0.8)

        # also disable building approximate recommend index
        model.approximate_recommend = False

    # this is actually disturbingly expensive:
    plays = plays.tocsr()

    logging.debug("training model %s", modelName)
    start = time.time()
    model.fit(plays)
    logging.debug("trained model '%s' in %0.2fs", modelName,
                  time.time() - start)

    # write out similar artists by popularity
    start = time.time()
    logging.debug("calculating top artists")

    user_count = np.ediff1d(plays.indptr)
    to_generate = sorted(np.arange(len(artists)), key=lambda x: -user_count[x])

    # write out as a TSV of artistid, otherartistid, score
    logging.debug("writing similar items")
    with tqdm.tqdm(total=len(to_generate)) as progress:
        with codecs.open(outputFilename, "w", "utf8") as o:
            for artistid in to_generate:
                artist = artists[artistid]
                for other, score in model.similar_items(artistid, 11):
                    o.write("%s\t%s\t%s\n" % (artist, artists[other], score))
                progress.update(1)

    logging.debug("generated similar artists in %0.2fs",  time.time() - start)

### `calculateRecommendations()`



In [59]:
def calculateRecommendations(outputFilename, modelName="als"):
    """
    Generates artist recommendations for each user in the dataset
    """

    artists, users, plays = fetchDataset(dataset, volubility=2)
    model = getModel(modelName, volubility=2)

    # if we're training an ALS based model, weight input for last.fm
    # by bm25
    if issubclass(model.__class__, AlternatingLeastSquares):
        # lets weight these models by bm25weight.
        logging.debug("weighting matrix by bm25_weight")
        plays = bm25_weight(plays, K1=100, B=0.8)

        # also disable building approximate recommend index
        model.approximate_similar_items = False

    # this is actually disturbingly expensive:
    plays = plays.tocsr()

    logging.debug("training model %s", modelName)
    start = time.time()
    model.fit(plays)
    logging.debug(f"trained model '{modelName}' in "
                  f"{time.time() - start:0.2fs}")

    # generate recommendations for each user and write out to a file
    start = time.time()
    user_plays = plays.T.tocsr()
    with tqdm.tqdm(total=len(users)) as progress:
        with codecs.open(outputFilename, "w", "utf8") as o:
            for userid, username in enumerate(users):
                for artistid, score in model.recommend(userid, user_plays):
                    o.write("%s\t%s\t%s\n" % (username, artists[artistid],
                                              score))
                progress.update(1)
    logging.debug("generated recommendations in %0.2fs",  time.time() - start)

## Parser stuff, for command line form

In [14]:
# if __name__ == "__main__":
    myDescription = ("Generates similar artists on the last.fm dataset or "
                     "generates personalized recommendations for each user.")
    parser = \
        argparse.ArgumentParser(description=myDescription,
                                formatter_class=argparse
                                .ArgumentDefaultsHelpFormatter)

    helpStr = 'Output file name. (Omit to go with parameter-based naming)'
    parser.add_argument('--output-base', type=str,  # default='similar-artists'
                        dest='outputfile', help=helpStr)
    helpStr = f"model to calculate ({', '.join(models.keys())})"
    parser.add_argument('--model', type=str, default='als',
                        dest='model', help=helpStr)
    helpStr = f"dataset ({', '.join(dataSets.keys())})"
    parser.add_argument('--dataset', type=str, default='lastfm',
                        dest='dataset', help=helpStr)
    helpStr = ("Recommend items for each user rather than calculate "
               "similar_items")
    parser.add_argument('--recommend',
                        help=helpStr,
                        action="store_true")
    helpStr = "Parameters to pass to the model, formatted as 'KEY=VALUE"
    parser.add_argument('--param', action='append',
                        help=helpStr)

    args = parser.parse_args()
    print("args:\n", args)

    if args.outputfile:
        outFile = args.outputfile
    elif args.recommend:
        outFile = f"recommend-{args.model}-{args.dataset}.tsv"
    else:
        outFile = f"similarItems-{args.model}-{args.dataset}.tsv"
    print(f"Writing output to {outFile}")

    logging.basicConfig(level=logging.DEBUG)

    if args.recommend:
        calculateRecommendations(outFile, args.dataset,
                                 modelName=args.model)
    else:
        calculateSimilarArtists(outFile, args.dataset,
                                modelName=args.model)

usage: ipykernel_launcher.py [-h] [--output-base OUTPUTFILE] [--model MODEL]
                             [--dataset DATASET] [--recommend] [--param PARAM]
ipykernel_launcher.py: error: unrecognized arguments: -f /home/mark/.local/share/jupyter/runtime/kernel-f2b58063-e603-4e47-af8a-97a1769cd2a4.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


### `getModel()`

Creates an instance of one of:

|key|model name|
|:--|:--|
|als|Alternating LeastSquares|
|nmslib_als|NMS lib Alternating LeastSquares|
|annoy_als|Annoy Alternating LeastSquares|
|faiss_als|Faiss Alternating LeastSquares|
|tfidf|TF-IDF recommender|
|cosine|Cosine recommender|
|bpr|Bayesian personalized ranking|
|lmf|Logistic matrix factorization|
|bm25|BM-25 based recommender|

In [102]:
def getModel(modelName, volubility=1, params=None):
    """
    Instantiates a model class, using provided params, or defaults

    INPUTS:
        modelName		str, one of ['als', 'nmslib_als', 'annoy_als',
                        'faiss_als', 'tfidf', 'cosine', 'bpr', 'lmf', 'bm25']
        params			dict, "suitable" key-value pairs for model
                        description, training conditions, etc.
    """
    if volubility > 0:
        print("getting model %s" % modelName)

    modelClass = models.get(modelName)
    if not modelClass:
        raise ValueError("Unknown Model '%s'" % modelName)

    # some default params
    if not params:
        if issubclass(modelClass, AlternatingLeastSquares):
            params = {'factors': 32, 'dtype': np.float32}
        elif modelName == "bm25":
            params = {'K1': 100, 'B': 0.5}
        elif modelName == "bpr":
            params = {'factors': 63}
        elif modelName == "lmf":
            params = {'factors': 30, "iterations": 40, "regularization": 1.5}
        else:
            params = {}

    if volubility > 1:
        print(modelName.title)

    return modelClass(**params)

### `fetchDataset()`

Get data in convenient sparse matrix format:

|key|dataset name|
|:--|:--|
|lastfm||
|movielens||
|reddit||
|sketchfab||
|million_song||

In [103]:
def fetchDataset(dataset, volubility=1):
    """
    If not already in cache directory, /data1/mark/implicit_datasets,
    fetches a data set, storing copy in cache.
    
    INPUT:
        dataset		str, one of ['lastfm', 'movielens', 'reddit',
                    'sketchfab', 'million_song']
    """

    if volubility > 0:
        print(f"getting dataset {dataset}")
    getdata = dataSets.get(dataset)

    if not getdata:
        raise ValueError(f"Unknown Model {dataset}")
    artists, users, plays = getdata()

    if volubility > 1:
        print(f"type(artists): {type(artists)}")
        print(f"type(users): {type(users)}")
        print(f"type(plays): {type(plays)}")

    return artists, users, plays

### `printLog()`

In [380]:
def printLog(row, header=False, spacing=12, outFile=None):
    if outFile is None:
        outFile = sys.stdout
    top = ''
    middle = ''
    bottom = ''
    for r in row:
        top += '+{}'.format('-'*spacing)
        if isinstance(r, str):
            middle += '| {0:^{1}} '.format(r, spacing-2)
        elif isinstance(r, int):
            middle += '| {0:^{1}} '.format(r, spacing-2)
        elif isinstance(r, float):
            middle += '| {0:^{1}.5f} '.format(r, spacing-2)
        bottom += '+{}'.format('='*spacing)
    top += '+'
    middle += '|'
    bottom += '+'
    if header:
        outFile.write(top + "\n")
        outFile.write(middle + "\n")
        outFile.write(bottom + "\n")
        outFile.flush()
    else:
        outFile.write(middle + "\n")
        outFile.write(top + "\n")
        outFile.flush()

### `learningCurve()`

In [381]:
def learningCurve(model, train, test, epochs, outFile=None,
                  k=5, showProgress=True, numThreads=12):
#     if not userIndex:
#         userIndex = range(train.shape[0])
    prevEpoch = 0

    pAtK = []
    MAPatK = []
    NDCGatK = []
    AUCatK = []

    headers = ['epochs', 'p@k', 'MAP@k', 'NDCG@k', 'AUC@k']
    printLog(headers, header=True, outFile=outFile)

    for epoch in epochs:
        model.iterations = epoch - prevEpoch
        if not hasattr(model, 'user_vectors'):
            model.fit(train, show_progress=showProgress)
        else:
            model.fit_partial(train, show_progress=showProgress)
        pAtK.append(precision_at_k(model, train.T.tocsr(), test.T.tocsr(),
                                   K=k, show_progress=showProgress, num_threads=numThreads))
        MAPatK.append(mean_average_precision_at_k(model, train.T.tocsr(), test.T.tocsr(),
                                                  K=k, show_progress=showProgress,
                                                  num_threads=numThreads))
        NDCGatK.append(ndcg_at_k(model, train.T.tocsr(), test.T.tocsr(),
                                 K=k, show_progress=showProgress, num_threads=numThreads))
        AUCatK.append(AUC_at_k(model, train.T.tocsr(), test.T.tocsr(),
                               K=k, show_progress=showProgress, num_threads=numThreads))
        row = [epoch, pAtK[-1], MAPatK[-1], NDCGatK[-1], AUCatK[-1]]
        printLog(row, outFile=outFile)
        prevEpoch = epoch

    return model, pAtK, MAPatK, NDCGatK, AUCatK

### `gridSearchLearningCurve()`

In [382]:
def gridSearchLearningCurve(modelName, train, test, paramGrid, numThreads=12,
                            k=5, showProgress=True, epochs=range(2, 10, 2),
                            LCfile='../LearningCurves.txt'):
    """
    "Inspired" (stolen) from sklearn gridsearch
    https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/model_selection/_search.py
    """

    curves = []
    keys, values = zip(*paramGrid.items())

    with open(LCfile, 'w') as outFile:
        for val in itertools.product(*values):
            params = dict(zip(keys, val))
            thisModel = getModel(modelName, volubility=2)
            outFile.write(str(type(thisModel)) + "\n")
            outFile.flush()

            printLine = []
            for key, value in params.items():
                setattr(thisModel, key, value)
                printLine.append((key, value))

            outFile.write(' | '.join(f'{key}: {value}' for (key, value) in printLine) + "\n")
            outFile.flush()

            _, pAtK, MAPatK, NDCGatK, AUCatK = \
                learningCurve(thisModel, train, test, epochs, k=k, outFile=outFile,
                              showProgress=showProgress, numThreads=numThreads)

            curves.append({'params': params,
                           'p@k': pAtK, 'MAP@k': MAPatK, 'NDCG@k': NDCGatK, 'AUC@k': AUCatK})
            del thisModel

    return curves

## Do some grid search

### Fetch data

In [154]:
dataset = 'lastfm'
artists, users, plays = fetchDataset(dataset, volubility=2)

print(artists.shape, users.shape, plays.shape)

getting dataset lastfm
type(artists): <class 'numpy.ndarray'>
type(users): <class 'numpy.ndarray'>
type(plays): <class 'scipy.sparse.csr.csr_matrix'>
(292385,) (358868,) (292385, 358868)


### Train test spit

In [145]:
train, test = train_test_split(ratings, train_percentage=0.8)

### Set parameter ranges

In [383]:
myFactors = [int(round(f)) for f in np.logspace(np.log10(20), 2, 7)]
myλ = np.logspace(-4, 2, 7)
myα = np.logspace(np.log10(5), np.log10(200), 6)
print(myFactors)
print(myλ)
print(myα)

[20, 26, 34, 45, 58, 76, 100]
[1.e-04 1.e-03 1.e-02 1.e-01 1.e+00 1.e+01 1.e+02]
[  5.          10.45639553  21.86724148  45.73050519  95.63524998
 200.        ]


In [384]:
paramGrid = {'factors': myFactors,
             'regularization': myλ,
             'alpha': myα}

# params = {'factors': 30, "iterations": 40, "regularization": 1.5}

In [385]:
modelName = 'als'
myEpochs = range(5, 25)
myLCfile = '../LearningCurvesLast.fm.0.txt'

## The big grid search

In [386]:
curves = gridSearchLearningCurve(modelName, train, test, paramGrid, numThreads=0,
                                 showProgress=False, k=5, epochs=myEpochs, LCfile=myLCfile)

getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method tit

getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method tit

getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method title of str object at 0x7ffa9314df48>
getting model als
<built-in method tit

In [387]:
import pandas as pd
print(curves[0])

{'params': {'factors': 20, 'regularization': 0.0001, 'alpha': 5.000000000000001}, 'p@k': [0.40704407657197356, 0.4056475236732747, 0.40721438790108316, 0.4090878125212889, 0.4089175011921793, 0.40956468424279585, 0.40990530690101507, 0.40973499557190546, 0.4098031201035493, 0.40987124463519314, 0.4102459295592343, 0.41007561823012467, 0.41004155596430275, 0.41051842768580965, 0.41068873901491926, 0.41109748620478237, 0.4108931126098508, 0.41017780502759044, 0.41017780502759044, 0.41065467674909734], 'MAP@k': [0.31651459518156544, 0.3163268179559385, 0.3174892330627796, 0.3187137191025713, 0.31888180295584645, 0.3199278523181124, 0.31980780556935945, 0.319602727624096, 0.31922367621887254, 0.3193521432646827, 0.31970165461137806, 0.31948396922680516, 0.31929872269155035, 0.3197239706992068, 0.3200214878618887, 0.32040076932987327, 0.3202092190749638, 0.31969627114276783, 0.31970220676200467, 0.3200024846778202], 'NDCG@k': [0.4166576898603389, 0.41613753478321275, 0.4172093932504659, 0.4

### Massage list of dicts to get useful DataFrame

In [388]:
thang = [curves[x]['params'] for x in range(len(curves))]
df0 = pd.DataFrame(thang)
# df0.head()
# df0.tail()

In [389]:
blah = [curves[x]['p@k'] for x in range(len(curves))]
df1 = pd.DataFrame(blah)
# df1.head()
# df1.tail()

In [390]:
blah = [curves[x]['MAP@k'] for x in range(len(curves))]
df2 = pd.DataFrame(blah)
# df2.head()
# df2.tail()

In [391]:
blah = [curves[x]['NDCG@k'] for x in range(len(curves))]
df3 = pd.DataFrame(blah)
# df3.head()
# df3.tail()

In [392]:
blah = [curves[x]['AUC@k'] for x in range(len(curves))]
df4 = pd.DataFrame(blah)
# df4.head()
# df4.tail()

In [393]:
df = pd.concat([df0, df1, df2, df3,df4], axis=1)
df.head()
df.tail()

Unnamed: 0,factors,regularization,alpha,0,1,2,3,4,5,6,...,10,11,12,13,14,15,16,17,18,19
0,20,0.0001,5.0,0.407044,0.405648,0.407214,0.409088,0.408918,0.409565,0.409905,...,0.54798,0.548018,0.548014,0.548071,0.548067,0.548148,0.548154,0.548061,0.548007,0.548078
1,20,0.0001,10.456396,0.408747,0.411302,0.41154,0.411881,0.411677,0.411847,0.412017,...,0.547781,0.547691,0.547744,0.547716,0.547792,0.547854,0.547839,0.547878,0.547873,0.547884
2,20,0.0001,21.867241,0.406158,0.408475,0.408986,0.408918,0.409599,0.410314,0.41011,...,0.547207,0.547209,0.547335,0.547313,0.547315,0.547282,0.547258,0.547206,0.547179,0.547202
3,20,0.0001,45.730505,0.408304,0.407793,0.408645,0.407759,0.408475,0.407691,0.408134,...,0.547613,0.547711,0.547778,0.547706,0.5477,0.547726,0.547751,0.547696,0.547631,0.547607
4,20,0.0001,95.63525,0.405784,0.408066,0.408373,0.409667,0.409905,0.410348,0.409428,...,0.547462,0.547526,0.54755,0.547646,0.547693,0.547738,0.54773,0.547856,0.547819,0.547915


Unnamed: 0,factors,regularization,alpha,0,1,2,3,4,5,6,...,10,11,12,13,14,15,16,17,18,19
289,100,100.0,10.456396,0.384188,0.395361,0.402923,0.406499,0.408679,0.411302,0.411915,...,0.546976,0.547028,0.547225,0.547336,0.547401,0.547494,0.547562,0.547552,0.547563,0.547659
290,100,100.0,21.867241,0.384495,0.397098,0.403127,0.40609,0.409224,0.410076,0.412528,...,0.547113,0.547129,0.547163,0.547302,0.547362,0.547422,0.547563,0.547585,0.547615,0.547717
291,100,100.0,45.730505,0.383848,0.395463,0.402786,0.405613,0.407078,0.408407,0.411132,...,0.546891,0.546983,0.547088,0.547121,0.547046,0.547085,0.54718,0.547186,0.547278,0.547361
292,100,100.0,95.63525,0.385415,0.396928,0.402888,0.406329,0.408168,0.410791,0.411779,...,0.54701,0.547148,0.547139,0.547169,0.547294,0.547351,0.547401,0.5475,0.547511,0.547634
293,100,100.0,200.0,0.384018,0.396144,0.402923,0.405103,0.407759,0.408883,0.410689,...,0.546885,0.547117,0.547201,0.547299,0.547392,0.547447,0.547533,0.547444,0.547469,0.547549


In [394]:
df.set_index(['factors', 'regularization', 'alpha'], inplace=True)
df.head()
df.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0,1,2,3,4,5,6,7,8,9,...,10,11,12,13,14,15,16,17,18,19
factors,regularization,alpha,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
20,0.0001,5.0,0.407044,0.405648,0.407214,0.409088,0.408918,0.409565,0.409905,0.409735,0.409803,0.409871,...,0.54798,0.548018,0.548014,0.548071,0.548067,0.548148,0.548154,0.548061,0.548007,0.548078
20,0.0001,10.456396,0.408747,0.411302,0.41154,0.411881,0.411677,0.411847,0.412017,0.41137,0.411268,0.410961,...,0.547781,0.547691,0.547744,0.547716,0.547792,0.547854,0.547839,0.547878,0.547873,0.547884
20,0.0001,21.867241,0.406158,0.408475,0.408986,0.408918,0.409599,0.410314,0.41011,0.409803,0.409497,0.409122,...,0.547207,0.547209,0.547335,0.547313,0.547315,0.547282,0.547258,0.547206,0.547179,0.547202
20,0.0001,45.730505,0.408304,0.407793,0.408645,0.407759,0.408475,0.407691,0.408134,0.409088,0.410042,0.409633,...,0.547613,0.547711,0.547778,0.547706,0.5477,0.547726,0.547751,0.547696,0.547631,0.547607
20,0.0001,95.63525,0.405784,0.408066,0.408373,0.409667,0.409905,0.410348,0.409428,0.409735,0.408747,0.408781,...,0.547462,0.547526,0.54755,0.547646,0.547693,0.547738,0.54773,0.547856,0.547819,0.547915


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0,1,2,3,4,5,6,7,8,9,...,10,11,12,13,14,15,16,17,18,19
factors,regularization,alpha,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
100,100.0,10.456396,0.384188,0.395361,0.402923,0.406499,0.408679,0.411302,0.411915,0.41338,0.414776,0.416173,...,0.546976,0.547028,0.547225,0.547336,0.547401,0.547494,0.547562,0.547552,0.547563,0.547659
100,100.0,21.867241,0.384495,0.397098,0.403127,0.40609,0.409224,0.410076,0.412528,0.414878,0.41699,0.418319,...,0.547113,0.547129,0.547163,0.547302,0.547362,0.547422,0.547563,0.547585,0.547615,0.547717
100,100.0,45.730505,0.383848,0.395463,0.402786,0.405613,0.407078,0.408407,0.411132,0.41338,0.41464,0.415355,...,0.546891,0.546983,0.547088,0.547121,0.547046,0.547085,0.54718,0.547186,0.547278,0.547361
100,100.0,95.63525,0.385415,0.396928,0.402888,0.406329,0.408168,0.410791,0.411779,0.413414,0.416002,0.417672,...,0.54701,0.547148,0.547139,0.547169,0.547294,0.547351,0.547401,0.5475,0.547511,0.547634
100,100.0,200.0,0.384018,0.396144,0.402923,0.405103,0.407759,0.408883,0.410689,0.412324,0.414606,0.416547,...,0.546885,0.547117,0.547201,0.547299,0.547392,0.547447,0.547533,0.547444,0.547469,0.547549


In [398]:
metrics = ['p@k', 'MAP@k', 'NDCG@k', 'AUC@k']
# epochs = range(20)
df.columns = pd.MultiIndex.from_product([metrics, myEpochs])# , names=['metric', 'epoch'])
df.head(8)
df.tail(8)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,p@k,p@k,p@k,p@k,p@k,p@k,p@k,p@k,p@k,p@k,...,AUC@k,AUC@k,AUC@k,AUC@k,AUC@k,AUC@k,AUC@k,AUC@k,AUC@k,AUC@k
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,5,6,7,8,9,10,11,12,13,14,...,15,16,17,18,19,20,21,22,23,24
factors,regularization,alpha,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2
20,0.0001,5.0,0.407044,0.405648,0.407214,0.409088,0.408918,0.409565,0.409905,0.409735,0.409803,0.409871,...,0.54798,0.548018,0.548014,0.548071,0.548067,0.548148,0.548154,0.548061,0.548007,0.548078
20,0.0001,10.456396,0.408747,0.411302,0.41154,0.411881,0.411677,0.411847,0.412017,0.41137,0.411268,0.410961,...,0.547781,0.547691,0.547744,0.547716,0.547792,0.547854,0.547839,0.547878,0.547873,0.547884
20,0.0001,21.867241,0.406158,0.408475,0.408986,0.408918,0.409599,0.410314,0.41011,0.409803,0.409497,0.409122,...,0.547207,0.547209,0.547335,0.547313,0.547315,0.547282,0.547258,0.547206,0.547179,0.547202
20,0.0001,45.730505,0.408304,0.407793,0.408645,0.407759,0.408475,0.407691,0.408134,0.409088,0.410042,0.409633,...,0.547613,0.547711,0.547778,0.547706,0.5477,0.547726,0.547751,0.547696,0.547631,0.547607
20,0.0001,95.63525,0.405784,0.408066,0.408373,0.409667,0.409905,0.410348,0.409428,0.409735,0.408747,0.408781,...,0.547462,0.547526,0.54755,0.547646,0.547693,0.547738,0.54773,0.547856,0.547819,0.547915
20,0.0001,200.0,0.403978,0.404387,0.404523,0.404523,0.404626,0.405,0.404455,0.405477,0.405954,0.406397,...,0.547004,0.547061,0.547285,0.547211,0.547233,0.547126,0.54718,0.547334,0.547313,0.547317
20,0.001,5.0,0.408202,0.409701,0.411438,0.411438,0.411029,0.410518,0.410723,0.409973,0.409633,0.40919,...,0.547554,0.547335,0.547565,0.547569,0.547577,0.547564,0.547495,0.547562,0.547584,0.547579
20,0.001,10.456396,0.410382,0.411234,0.412971,0.413312,0.412085,0.411438,0.411268,0.411404,0.41137,0.410791,...,0.547796,0.5478,0.547908,0.547959,0.547885,0.547912,0.547892,0.547915,0.547881,0.547802


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,p@k,p@k,p@k,p@k,p@k,p@k,p@k,p@k,p@k,p@k,...,AUC@k,AUC@k,AUC@k,AUC@k,AUC@k,AUC@k,AUC@k,AUC@k,AUC@k,AUC@k
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,5,6,7,8,9,10,11,12,13,14,...,15,16,17,18,19,20,21,22,23,24
factors,regularization,alpha,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2
100,10.0,95.63525,0.385823,0.390388,0.391648,0.39628,0.395293,0.397302,0.398665,0.398256,0.398154,0.39938,...,0.550904,0.551131,0.551264,0.551262,0.55136,0.551065,0.551173,0.551159,0.551043,0.550916
100,10.0,200.0,0.386402,0.389264,0.392942,0.393964,0.394918,0.396928,0.398392,0.398494,0.398154,0.399789,...,0.550472,0.550611,0.550545,0.550546,0.55071,0.550712,0.55077,0.550779,0.550881,0.551004
100,100.0,5.0,0.385006,0.394543,0.401867,0.405477,0.408304,0.410382,0.412494,0.413822,0.415968,0.41682,...,0.547013,0.547146,0.547215,0.547364,0.54743,0.547457,0.547457,0.547456,0.547485,0.547492
100,100.0,10.456396,0.384188,0.395361,0.402923,0.406499,0.408679,0.411302,0.411915,0.41338,0.414776,0.416173,...,0.546976,0.547028,0.547225,0.547336,0.547401,0.547494,0.547562,0.547552,0.547563,0.547659
100,100.0,21.867241,0.384495,0.397098,0.403127,0.40609,0.409224,0.410076,0.412528,0.414878,0.41699,0.418319,...,0.547113,0.547129,0.547163,0.547302,0.547362,0.547422,0.547563,0.547585,0.547615,0.547717
100,100.0,45.730505,0.383848,0.395463,0.402786,0.405613,0.407078,0.408407,0.411132,0.41338,0.41464,0.415355,...,0.546891,0.546983,0.547088,0.547121,0.547046,0.547085,0.54718,0.547186,0.547278,0.547361
100,100.0,95.63525,0.385415,0.396928,0.402888,0.406329,0.408168,0.410791,0.411779,0.413414,0.416002,0.417672,...,0.54701,0.547148,0.547139,0.547169,0.547294,0.547351,0.547401,0.5475,0.547511,0.547634
100,100.0,200.0,0.384018,0.396144,0.402923,0.405103,0.407759,0.408883,0.410689,0.412324,0.414606,0.416547,...,0.546885,0.547117,0.547201,0.547299,0.547392,0.547447,0.547533,0.547444,0.547469,0.547549


### For each count of epochs, find best parameters for each metric

#### Optimize `p@5`

In [434]:
indices = df.index
cmaxs = df['p@k'].max()
print("     epoch  factors\t      λ\t       α\tind\t    p@k")
for e in myEpochs:
    ind = np.argmax(df[('p@k', e)] == cmaxs[e])
    (factors, regularization, alpha) = indices[ind]
    print(f"\t{e:2d}\t{factors:3d}\t{regularization:7.3f}\t {alpha:7.3f}\t{ind:3d}\t{cmaxs[e]:7.5f}")

     epoch  factors	      λ	       α	ind	    p@k
	 5	 26	 10.000	  95.635	 76	0.41852
	 6	 26	 10.000	  95.635	 76	0.42172
	 7	 26	 10.000	  95.635	 76	0.42292
	 8	 26	  0.000	  95.635	 46	0.42329
	 9	 26	 10.000	 200.000	 77	0.42241
	10	 26	 10.000	  45.731	 75	0.42258
	11	 26	 10.000	  45.731	 75	0.42285
	12	 26	 10.000	  95.635	 76	0.42339
	13	 26	 10.000	  95.635	 76	0.42367
	14	 58	100.000	 200.000	209	0.42534
	15	 58	100.000	 200.000	209	0.42656
	16	 58	100.000	 200.000	209	0.42704
	17	 58	100.000	 200.000	209	0.42782
	18	 58	100.000	 200.000	209	0.42830
	19	 58	100.000	 200.000	209	0.42939
	20	 58	100.000	 200.000	209	0.42983
	21	 58	100.000	 200.000	209	0.43000
	22	 58	100.000	 200.000	209	0.43058
	23	 58	100.000	 200.000	209	0.43113
	24	 58	100.000	 200.000	209	0.43092


#### Optimize `MAP@5`

In [433]:
cmaxs = df['MAP@k'].max()
print("     epoch  factors\t      λ\t       α\tind\t  MAP@k")
for e in myEpochs:
    ind = np.argmax(df[('MAP@k', e)] == cmaxs[e])
    (factors, regularization, alpha) = indices[ind]
    print(f"\t{e:2d}\t{factors:3d}\t{regularization:7.3f}\t {alpha:7.3f}\t{ind:3d}\t{cmaxs[e]:7.5f}")

     epoch  factors	      λ	       α	ind	  MAP@k
	 5	 26	 10.000	  95.635	 76	0.32846
	 6	 26	 10.000	  95.635	 76	0.33150
	 7	 26	 10.000	  95.635	 76	0.33246
	 8	 26	 10.000	  95.635	 76	0.33288
	 9	 58	100.000	 200.000	209	0.33267
	10	 58	100.000	 200.000	209	0.33506
	11	 58	100.000	 200.000	209	0.33753
	12	 58	100.000	 200.000	209	0.33872
	13	 58	100.000	 200.000	209	0.34034
	14	 58	100.000	 200.000	209	0.34198
	15	 58	100.000	 200.000	209	0.34278
	16	 58	100.000	 200.000	209	0.34315
	17	 58	100.000	 200.000	209	0.34391
	18	 58	100.000	 200.000	209	0.34431
	19	 58	100.000	 200.000	209	0.34534
	20	 58	100.000	 200.000	209	0.34575
	21	 58	100.000	 200.000	209	0.34624
	22	 58	100.000	 200.000	209	0.34681
	23	 58	100.000	 200.000	209	0.34720
	24	 58	100.000	 200.000	209	0.34729


#### Optimize `NDCG@5`

In [436]:
cmaxs = df['NDCG@k'].max()
print("     epoch  factors\t      λ\t       α\tind\t NDCG@k")
for e in myEpochs:
    ind = np.argmax(df[('NDCG@k', e)] == cmaxs[e])
    (factors, regularization, alpha) = indices[ind]
    print(f"\t{e:2d}\t{factors:3d}\t{regularization:7.3f}\t {alpha:7.3f}\t{ind:3d}\t{cmaxs[e]:7.5f}")

     epoch  factors	      λ	       α	ind	 NDCG@k
	 5	 26	 10.000	  95.635	 76	0.43034
	 6	 26	 10.000	  95.635	 76	0.43318
	 7	 26	 10.000	  95.635	 76	0.43379
	 8	 26	 10.000	  95.635	 76	0.43402
	 9	 26	 10.000	 200.000	 77	0.43393
	10	 26	 10.000	  95.635	 76	0.43347
	11	 58	100.000	 200.000	209	0.43364
	12	 58	100.000	 200.000	209	0.43514
	13	 58	100.000	 200.000	209	0.43699
	14	 58	100.000	 200.000	209	0.43913
	15	 58	100.000	 200.000	209	0.44023
	16	 58	100.000	 200.000	209	0.44078
	17	 58	100.000	 200.000	209	0.44169
	18	 58	100.000	 200.000	209	0.44227
	19	 58	100.000	 200.000	209	0.44331
	20	 58	100.000	 200.000	209	0.44385
	21	 58	100.000	 200.000	209	0.44442
	22	 58	100.000	 200.000	209	0.44490
	23	 58	100.000	 200.000	209	0.44548
	24	 58	100.000	 200.000	209	0.44555


#### Optimize `AUC@5`

In [437]:
cmaxs = df['AUC@k'].max()
print("     epoch  factors\t      λ\t       α\tind\t  AUC@k")
for e in myEpochs:
    ind = np.argmax(df[('AUC@k', e)] == cmaxs[e])
    (factors, regularization, alpha) = indices[ind]
    print(f"\t{e:2d}\t{factors:3d}\t{regularization:7.3f}\t {alpha:7.3f}\t{ind:3d}\t{cmaxs[e]:7.5f}")

     epoch  factors	      λ	       α	ind	  AUC@k
	 5	 34	  0.100	  21.867	104	0.55038
	 6	 34	  0.100	  21.867	104	0.55080
	 7	 34	  0.100	   5.000	102	0.55073
	 8	100	  0.100	 200.000	275	0.55103
	 9	 45	  0.100	  10.456	145	0.55098
	10	 45	  0.100	  10.456	145	0.55118
	11	 34	 10.000	  45.731	117	0.55107
	12	 45	  0.100	  10.456	145	0.55121
	13	 76	 10.000	  10.456	241	0.55208
	14	 76	 10.000	  10.456	241	0.55222
	15	 76	 10.000	  10.456	241	0.55153
	16	 45	  0.100	  10.456	145	0.55146
	17	 76	 10.000	  21.867	242	0.55206
	18	 76	 10.000	  21.867	242	0.55149
	19	100	 10.000	  95.635	286	0.55136
	20	 34	 10.000	  95.635	118	0.55144
	21	 34	 10.000	  95.635	118	0.55131
	22	 76	 10.000	  95.635	244	0.55132
	23	 76	 10.000	  10.456	241	0.55123
	24	 76	 10.000	  10.456	241	0.55146
