In [1]:
%load_ext autoreload
%autoreload 2

import sys
import os
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib

sys.path.append("../../../src")
sys.path.append("../")
from common import config
from evaluation.evaluator import Evaluator
import helpers

plt.rcParams["figure.figsize"] = [12, 8]

## Preparing data

In [2]:
paths = config.Paths(dataset_name="jobs_published", target_users_name="all")
N_RESTRICT_TO = 10
MODELS_TO_PREPARE = [
    "als",
    "prod2vec",
    "rp3beta",
    "slim",
    "lightfm",
    "toppop",
    "random",
    "perfect",
    "perfect_cf",
    "perfect_cf_d3",
]

In [3]:
%%time
# load interactions
from data.initializer import DataLoaderSaver

data = DataLoaderSaver()

data.load_interactions(paths.test)

interactions = data.interactions

CPU times: user 10.9 s, sys: 463 ms, total: 11.3 s
Wall time: 11.3 s


In [4]:
interactions_per_user = interactions.groupby("user").size().reset_index().astype(int)
interactions_per_user.columns = ["user", "nb_interactions"]

interactions_per_user = interactions_per_user[
    interactions_per_user["nb_interactions"] >= N_RESTRICT_TO
].astype({"user": str})

In [5]:
def read_recommendations(recommendations_path, file_name):
    return pd.read_csv(
        os.path.join(recommendations_path, file_name),
        header=None,
        compression="gzip",
        dtype=str,
    )


def save_recommendations(df, folder_path, file_name):

    folder_path.mkdir(parents=True, exist_ok=True)
    df.to_csv(folder_path / file_name, compression="gzip", index=None, header=None)

In [6]:
file_names = [
    file_name
    for file_name in os.listdir(paths.recommendations_folder)
    if file_name.split(".")[0] in MODELS_TO_PREPARE
]

In [7]:
for file_name in file_names:
    recommendations = read_recommendations(paths.recommendations_folder, file_name)
    nb_columns = recommendations.shape[1]
    restricted = pd.merge(
        recommendations, interactions_per_user, left_on=0, right_on="user"
    ).iloc[:, :nb_columns]

    save_recommendations(
        restricted,
        paths.recommendations_folder.parent / f"at_least_{N_RESTRICT_TO}_interactions",
        file_name,
    )