# Evaluation of Best Model

On the evaluation step, I calculate metrics for `PureSVD` model on the A and B splits of the dataset. Also, I average metrics across splits.

In [2]:
import warnings
warnings.filterwarnings("ignore")

## Load test data

In [15]:
K = 10
data_interim_dir = '../data/interim/'
test_data_dir = '../benchmark/data/'
data_filenames = [f'u{t}.{split}' for t in ['a', 'b'] for split in ['base', 'test']]
data_filenames

['ua.base', 'ua.test', 'ub.base', 'ub.test']

In [16]:
import pickle


data = {}
for i in range(0, len(data_filenames), 2):
    base_filename, test_filename = data_filenames[i:i+2]
    data_title = base_filename.split('.')[0]
    with open(test_data_dir + base_filename + '.pickle', 'rb') as base:
        with open(test_data_dir + test_filename + '.pickle', 'rb') as test:
            with open(test_data_dir + base_filename + '.df.pickle', 'rb') as base_df:
                with open(test_data_dir + test_filename + '.df.pickle', 'rb') as test_df:
                    data[data_title] = (pickle.load(base), pickle.load(test), pickle.load(base_df), pickle.load(test_df))

data.keys()

dict_keys(['ua', 'ub'])

## Load best model 

In [17]:
model_path = f'../models/best_model.pickle'

with open(model_path, 'rb') as pickle_file:
    best_model = pickle.load(pickle_file)

## Test on A and B splits

I compute Classification and Ranking metrics [implemeted in RecTools](https://rectools.readthedocs.io/en/stable/api/rectools.metrics.html), namely:
- F1Beta
- Normalized DCG
- Mean Average Precision
- Mean Reciprocal Rank
- Serendipity (added for the interest)

For all metrics, $k=10$ is used.

In [33]:
from rectools.metrics.scoring import calc_metrics
from rectools.metrics import F1Beta, MAP, MRR, Serendipity, MeanInvUserFreq


metrics_name = {
    'F1Beta': F1Beta,
    'MRR': MRR,
    'MAP': MAP,
    'Novelty': MeanInvUserFreq,
    'Serendipity': Serendipity
}
metrics = {}
for metric_name, metric in metrics_name.items():
    for k in (1, 5, 10):
        metrics[f'{metric_name}@{k}'] = metric(k=k)

In [35]:
def load_data(fold):
    return data[f'u{fold}']

In [36]:
from rectools import Columns


def train_and_evaluate(model, fold_data, fold=None):
    train, test, train_df, test_df = fold_data
    model.fit(train)
    recos = model.recommend(
        users=train_df[Columns.User].unique(),
        dataset=train,
        k=K,
        filter_viewed=True,
    )
    if fold is not None:
        recos.to_csv(data_interim_dir + 'recos_' + fold + '.csv')
    return calc_metrics(
        metrics,
        reco=recos,
        interactions=test_df,
        prev_interactions=train_df,
        catalog=train_df[Columns.Item].unique()
    )

In [37]:
from tabulate import tabulate


def print_metrics_table(metrics_dict):
    table = []

    for metric_name, metric_value in metrics_dict.items():
        table.append([metric_name, metric_value])

    print(tabulate(table, headers=['Metric', 'Value'], tablefmt='pretty'))


def test_model(model, save_recos=False, folds=['a', 'b']):
    total_metrics = {k: 0 for k in metrics.keys()}
    for fold in folds:
        fold_data = load_data(fold)
        if save_recos:
            fold_metrics = train_and_evaluate(model, fold_data, fold)
        else:
            fold_metrics = train_and_evaluate(model, fold_data, fold)
        for metric_name, metric_value in fold_metrics.items():
            total_metrics[metric_name] += metric_value
        
        print(f"Fold {fold}:")
        print_metrics_table(fold_metrics)
        print()
    
    average_metrics = {metric_name: total_value / len(folds) for metric_name, total_value in total_metrics.items()}
    print(f"Average across test folds:")
    print_metrics_table(average_metrics)

In [38]:
test_model(best_model, save_recos=True)

Fold a:
+----------------+-----------------------+
|     Metric     |         Value         |
+----------------+-----------------------+
|    F1Beta@1    |  0.08387158970403934  |
|    F1Beta@5    |  0.21124072110286318  |
|   F1Beta@10    |  0.24284199363732767  |
|     MRR@1      |  0.46129374337221635  |
|     MRR@5      |  0.6001767408978438   |
|     MRR@10     |  0.6133220555808042   |
|     MAP@1      |  0.04612937433722163  |
|     MAP@5      |  0.11619476846942381  |
|     MAP@10     |  0.1495090811156559   |
|   Novelty@1    |   1.622449717317695   |
|   Novelty@5    |   1.828821999261622   |
|   Novelty@10   |  1.9779028021482392   |
| Serendipity@1  | 0.0067597081250315594 |
| Serendipity@5  | 0.006558602231985053  |
| Serendipity@10 | 0.005997134272584964  |
+----------------+-----------------------+

Fold b:
+----------------+----------------------+
|     Metric     |        Value         |
+----------------+----------------------+
|    F1Beta@1    | 0.08117227417333463  