### Generowanie predykcji dla użytkowników

Import niezbędnych bibliotek, definicja ustawień logowania i stałych

In [15]:
import pandas as pd

import logging as log
from os.path import dirname, join, abspath
import sys
sys.path.insert(0, abspath(join(dirname('recommender_system'), '..')))

from src.utils.helper import (
    generate_recommendations,
    get_newest_existing_model_version,
    read_data_from_gziped_file,
    save_data_to_pkl,
    select_random_users,
    unpickle
)

In [2]:
logger = log.getLogger()
logger.setLevel(log.INFO)

In [3]:
BASE_PATH = abspath(join(dirname('recommender_system'), '../../..'))
RESULTS_PATH = f'{BASE_PATH}/results'

Załadowanie danych z plików do obiektów DataFrame

In [4]:
ratings = read_data_from_gziped_file(path=f'{BASE_PATH}/recommender_system/input_data/All_Beauty.json.gz')
ratings_df = pd.DataFrame.from_dict(ratings)

INFO:root:Reading data from file All_Beauty.json.gz...
INFO:root:Retrieved 371345 records from file All_Beauty.json.gz


In [5]:
items = read_data_from_gziped_file(path=f'{BASE_PATH}/recommender_system/input_data/meta_All_Beauty.json.gz')
items_df = pd.DataFrame.from_dict(ratings)

INFO:root:Reading data from file meta_All_Beauty.json.gz...
INFO:root:Retrieved 32892 records from file meta_All_Beauty.json.gz


Deduplikacja danych

In [6]:
log.info(f"Duplicated product asins: {items_df.duplicated(subset='asin').sum()}.")
items_df = items_df.drop_duplicates(subset='asin')
log.info(f"Rows number after data cleaning: {len(items_df)}.")

INFO:root:Duplicated product asins: 338759.
INFO:root:Rows number after data cleaning: 32586.


In [8]:
user_ids = list(ratings_df.drop_duplicates(subset="reviewerID")["reviewerID"])
asins = list(items_df["asin"])

print(f"Number of user ids: {len(user_ids)}\nNumber of item ids: {len(asins)}")

Number of user ids: 324038
Number of item ids: 32586


Generowanie rekomendacji z modelu z mechanizmem optymalizacji adagrad oraz funkcją straty WARP

In [17]:
model_name = "adagrad_warp_model"

model_results_path = f'{RESULTS_PATH}/{model_name}'
version = get_newest_existing_model_version(model_results_path)

dataset = unpickle(f'{RESULTS_PATH}/{model_name}/dataset_v{version}.pkl')
model = unpickle(f'{RESULTS_PATH}/{model_name}/{model_name}_v{version}.pkl')

recommendations = generate_recommendations(dataset, asins, model, user_ids)

save_data_to_pkl(f'{RESULTS_PATH}/recommendations/recommendations_{model_name}_v{version}.pkl', recommendations)


INFO:root:File /home/mborettini/recommender_system/results/recommendations/recommendations_adagrad_warp_model_v5.pkl saved


{'A1FBW5LJLWLYPK': ['B000FOI48G', 'B000GLRREU', '1620213982'], 'A2L7Y152XM8TZB': ['B00HSBLUNK', 'B00LMNK9DM', 'B010Q0U06C'], 'A10YTGRF93WEUM': ['B006UV6L8S', 'B015A95L36', 'B00LMNK9DM'], 'A1OGZE807J8YPS': ['B000FOI48G', 'B000GLRREU', '1620213982'], 'A3TITX958TAC25': ['B00LMNK9DM', 'B00HSBLUNK', 'B00PEV7E6I']}


Generowanie rekomendacji z modelu z mechanizmem optymalizacji adadelta oraz funkcją straty WARP

In [9]:
model_name = "adadelta_warp_model"

model_results_path = f'{RESULTS_PATH}/{model_name}'
version = get_newest_existing_model_version(model_results_path)

dataset = unpickle(f'{RESULTS_PATH}/{model_name}/dataset_v{version}.pkl')
model = unpickle(f'{RESULTS_PATH}/{model_name}/{model_name}_v{version}.pkl')

recommendations = generate_recommendations(dataset, asins, model, user_ids)

save_data_to_pkl(f'{RESULTS_PATH}/recommendations/recommendations_{model_name}_v{version}.pkl', recommendations)

INFO:root:Data have been pickled.


Generowanie rekomendacji z modelu z mechanizmem optymalizacji adagrad oraz funkcją straty k-OS WARP

In [10]:
model_name = "adagrad_kos_warp_model"

model_results_path = f'{RESULTS_PATH}/{model_name}'
version = get_newest_existing_model_version(model_results_path)

dataset = unpickle(f'{RESULTS_PATH}/{model_name}/dataset_v{version}.pkl')
model = unpickle(f'{RESULTS_PATH}/{model_name}/{model_name}_v{version}.pkl')

recommendations = generate_recommendations(dataset, asins, model, user_ids)

save_data_to_pkl(f'{RESULTS_PATH}/recommendations/recommendations_{model_name}_v{version}.pkl', recommendations)

INFO:root:Data have been pickled.


Generowanie rekomendacji z modelu z mechanizmem optymalizacji adadelta oraz funkcją straty k-OS WARP

In [11]:
model_name = "adadelta_kos_warp_model"

model_results_path = f'{RESULTS_PATH}/{model_name}'
version = get_newest_existing_model_version(model_results_path)

dataset = unpickle(f'{RESULTS_PATH}/{model_name}/dataset_v{version}.pkl')
model = unpickle(f'{RESULTS_PATH}/{model_name}/{model_name}_v{version}.pkl')

recommendations = generate_recommendations(dataset, asins, model, user_ids)

save_data_to_pkl(f'{RESULTS_PATH}/recommendations/recommendations_{model_name}_v{version}.pkl', recommendations)

INFO:root:Data have been pickled.
