### System rekomendacyjny oparty o model filtrowania kolaboratywnego

Import niezbędnych bibliotek, definicja ustawień logowania i stałych

In [1]:
from lightfm import LightFM
from lightfm.cross_validation import random_train_test_split
from lightfm.data import Dataset
import pandas as pd

import logging as log
from os.path import dirname, join, abspath
import sys
sys.path.insert(0, abspath(join(dirname('recommender_system'), '..')))

from src.utils.helper import (
    get_newest_existing_model_version,
    read_data_from_gziped_file,
    pickle_model_results,
    train_lightfm_model
)

In [45]:
logger = log.getLogger()
logger.setLevel(log.INFO)

In [58]:
BASE_PATH = abspath(join(dirname('recommender_system'), '../../..'))
RESULTS_PATH = f'{BASE_PATH}/results'

Załadowanie danych z plików do obiektów DataFrame

In [47]:
ratings = read_data_from_gziped_file(path=f'{BASE_PATH}/recommender_system/input_data/All_Beauty.json.gz')
ratings_df = pd.DataFrame.from_dict(ratings)

INFO:root:Reading data from file All_Beauty.json.gz ...
INFO:root:Retrieved 371345 records from file All_Beauty.json.gz


Konstrukcja zbioru danych

In [48]:
dataset = Dataset()
dataset.fit((x['reviewerID'] for _, x in ratings_df.iterrows()),
            (x['asin'] for _, x in ratings_df.iterrows()))

# Control numbers
num_users, num_items = dataset.interactions_shape()
log.info(f"Dataset has been created. Interactions shape: num users {num_users}, num items {num_items}.")

INFO:root:Dataset has been created. Interactions shape: num users 324038, num items 32586.


Zbudowanie macierzy interakcji

In [49]:
(interactions, weights) = dataset.build_interactions(((x['reviewerID'], x['asin'])
                                                      for _, x in ratings_df.iterrows()))

Podział danych na zbiór testowy i treningowy

In [50]:
(train, test) = random_train_test_split(interactions, test_percentage=0.2)
logger.info("Dataset has been split into train and test")

INFO:root:Dataset has been split into train and test


Stworzenie czterech rodzajów modeli:
1. Model z mechanizmem optymalizacji adagrad oraz funkcją straty WARP.
2. Model z mechanizmem optymalizacji adadelta oraz funkcją straty WARP.
3. Model z mechanizmem optymalizacji adagrad oraz funkcją straty k-OS WARP.
4. Model z mechanizmem optymalizacji adadelta oraz funkcją straty k-OS WARP

In [51]:
alpha = 1e-3
epochs = 70

adagrad_warp_model = LightFM(no_components=30,
                        loss='warp',
                        learning_schedule='adagrad',
                        user_alpha=alpha,
                        item_alpha=alpha)

adadelta_warp_model = LightFM(no_components=30,
                        loss='warp',
                        learning_schedule='adadelta',
                        user_alpha=alpha,
                        item_alpha=alpha)

adagrad_kos_warp_model = LightFM(no_components=30,
                        loss='warp-kos',
                        learning_schedule='adagrad',
                        user_alpha=alpha,
                        item_alpha=alpha)

adadelta_kos_warp_model = LightFM(no_components=30,
                        loss='warp-kos',
                        learning_schedule='adadelta',
                        user_alpha=alpha,
                        item_alpha=alpha)

Trenowanie modelu z mechanizmem optymalizacji adagrad oraz funkcją straty WARP

In [52]:
model_name = "adagrad_warp_model"
model_results_path = f'{RESULTS_PATH}/{model_name}'

adagrad_warp_model_auc, adagrad_warp_model_duration = train_lightfm_model(epochs, adagrad_warp_model, model_name, model_results_path, test, train)

version = get_newest_existing_model_version(model_results_path) + 1

pickle_model_results(
    adagrad_warp_model_auc,
    dataset,
    adagrad_warp_model_duration,
    adagrad_warp_model,
    model_name,
    model_results_path,
    version
)

INFO:root:Model adagrad_warp_model has been trained in 1 epochs


Trenowanie modelu z mechanizmem optymalizacji adadelta oraz funkcją straty WARP

In [60]:
model_name = "adadelta_warp_model"
model_results_path = f'{RESULTS_PATH}/{model_name}'

adadelta_warp_model_auc, adadelta_warp_model_duration = train_lightfm_model(epochs, adadelta_warp_model, model_name, test, train)

version = get_newest_existing_model_version(model_results_path) + 1

pickle_model_results(
    adadelta_warp_model_auc,
    dataset,
    adadelta_warp_model_duration,
    adadelta_warp_model,
    model_name,
    model_results_path,
    version
)

INFO:root:Model adadelta_warp_model has been trained in 1 epochs
INFO:root:Saving adadelta_warp_model_v2 components to pickle file...
INFO:root:File /home/mborettini/recommender_system/results/adadelta_warp_model/adadelta_warp_model_v2.pkl saved
INFO:root:File /home/mborettini/recommender_system/results/adadelta_warp_model/adadelta_warp_model_auc_v2.pkl saved
INFO:root:File /home/mborettini/recommender_system/results/adadelta_warp_model/adadelta_warp_model_duration_v2.pkl saved
INFO:root:File /home/mborettini/recommender_system/results/adadelta_warp_model/dataset_v2.pkl saved
INFO:root:Done


Trenowanie modelu z mechanizmem optymalizacji adagrad oraz funkcją straty k-OS WARP

In [43]:
model_name = "adagrad_kos_warp_model"
model_results_path = f'{RESULTS_PATH}/{model_name}'

adagrad_kos_warp_model_auc, adagrad_kos_warp_model_duration = train_lightfm_model(epochs, adagrad_kos_warp_model, model_name, test, train)

version = get_newest_existing_model_version(model_results_path) + 1

pickle_model_results(
    adagrad_kos_warp_model_auc,
    dataset,
    adagrad_kos_warp_model_duration,
    adagrad_kos_warp_model,
    model_name,
    model_results_path,
    version
)

INFO:root:Model adagrad_kos_warp_model has been trained in 70 epochs.


Trenowanie modelu z mechanizmem optymalizacji adadelta oraz funkcją straty k-OS WARP

In [45]:
model_name = "adadelta_kos_warp_model"
model_results_path = f'{RESULTS_PATH}/{model_name}'

adadelta_kos_warp_model_auc, adadelta_kos_warp_model_duration = train_lightfm_model(epochs, adadelta_kos_warp_model, model_name, test, train)

version = get_newest_existing_model_version(model_results_path) + 1

pickle_model_results(
    adadelta_kos_warp_model_auc,
    dataset,
    adadelta_kos_warp_model_duration,
    adadelta_kos_warp_model,
    model_name,
    model_results_path,
    version
)

INFO:root:Model adadelta_kos_warp_model has been trained in 70 epochs.
