In [None]:
from cornac.data import Reader
from cornac.datasets import movielens
from cornac.data import Dataset, FeatureModality
from cornac.eval_methods import RatioSplit, StratifiedSplit
from cornac.metrics import RMSE
from cornac.models import  MF, VAECF, NeuMF, PMF, WMF
import pandas as pd
import numpy as np
import cornac


In [None]:
reader = Reader()
rating_data_pd = pd.read_csv(
    "../data/yelp-100K/indexed_interactions.csv",
    sep="\t",
    header=0,
    names=["itemID", "Category", "userID", "Rating", "Gender"],
)
rating_data = rating_data_pd[["userID", "itemID", "Rating"]].to_numpy()
rating_data.__len__()
rating_data_pd

In [3]:
restaurants = pd.read_csv(
    "../data/yelp-100K/i_id_mapping.csv",
    sep="\t",
    header=0,
    names=["item_id", "Category", "itemID"],
)
restaurants = restaurants.sort_values(by="itemID")

unique_categories = [
    "Active Life & Fitness",
    "Arts & Entertainment",
    "Automotive",
    "Bars & Nightlife",
    "Coffee,Tea & Desserts",
    "Drinks & Spirits",
    "Education & Learning",
    "Event Services",
    "Family & Kids",
    "Food & Restaurants",
    "Health & Beauty",
    "Home & Garden",
    "Miscellaneous",
    "Outdoor Activities",
    "Public Services & Community",
    "Shopping & Fashion",
    "Specialty Food & Groceries",
    "Sports & Recreation",
    "Technology & Electronics",
    "Travel & Transportation",
    "Asian",
]
for c in unique_categories:
    restaurants[c] = 0
for index, row in restaurants.iterrows():
    cats = row["Category"].split("|")
    for cat in cats:
        restaurants.at[index, cat] = 1

cat = restaurants[unique_categories]
# cat[:1]
item_features_numpy = cat.to_numpy()
item_features = {
    str(item_id): {"category_" + str(idx): value for idx, value in enumerate(row)}
    for item_id, row in enumerate(item_features_numpy)
}
# ids = list(range(0, 3416))

In [None]:
restaurants

In [None]:
users = pd.read_csv(
    "../data/yelp-100K/u_id_mapping.csv",
    sep="\t",
    header=0,
    names=["user_id", "Gender", "userID"],
)
gender_map = {"M": 0, "F": 1}
users["Gender"] = users["Gender"].map(gender_map)
users = users.sort_values(by="userID")
users = users[["Gender", "userID"]]

user_features_numpy = users.to_numpy()
print(user_features_numpy.shape)

In [None]:
dataset = rating_data
dataset

In [None]:

ratio_split = StratifiedSplit(
    data=dataset,
    test_size=0.2,
    rating_threshold=0,
    # val_size=0.1,
    seed=123,
    verbose=True,
    fmt="UIR",
)

hr_10 = cornac.metrics.HitRatio(k=20)
ndcg_10 = cornac.metrics.NDCG(k=20)
recall_10 = cornac.metrics.Recall(k=20)
prec_10 = cornac.metrics.Precision(k=20)
auc = cornac.metrics.AUC()
map = cornac.metrics.MAP()

epochs = [20, 40, 60, 80, 100]
models = []
# class cornac.models.vaecf.recom_vaecf.
# VAECF(name='VAECF', k=10, autoencoder_structure=[20], act_fn='tanh',
# likelihood='mult', n_epochs=100, batch_size=100, learning_rate=0.001, beta=1.0, trainable=True, verbose=False, seed=None, use_gpu=False)[source]
# for i in range(len(epochs)):
#     models.append(
#         NeuMF(
#            name=f"NeuMFe={epochs[i]}",
#             num_factors=8,
#             layers=( 32, 16, 8),
#             act_fn="sigmoid",
#             reg=0.0,
#             num_epochs=epochs[i],
#             batch_size=256,
#             num_neg=3,
#             lr=0.001,
#             learner="adam",
#             backend="tensorflow",
#             early_stopping=None,
#             trainable=True,
#             verbose=True,
#             seed=123,
#         )
#     )
# model = WMF(name=f'WMF{100}', k=200, lambda_u=0.01, lambda_v=0.01, a=1, b=0.01, learning_rate=0.001, batch_size=128, max_iter=100, trainable=True, verbose=True, init_params=None, seed=123)

# model = MF(
#             name=f"MF{40}",
#             k=10,
#             backend="cpu",
#             optimizer="adam",
#             max_iter=40,
#             learning_rate=0.01,
#             batch_size=256,
#             lambda_reg=0.02,
#             dropout=0.0,
#             use_bias=True,
#             early_stop=False,
#             num_threads=0,
#             trainable=True,
#             verbose=False,
#             init_params=None,
#             seed=123,
#         )
model =  NeuMF(
           name=f"NeuMFe={60}",
            num_factors=8,
            layers=( 32, 16, 8),
            act_fn="sigmoid",
            reg=0.0,
            num_epochs=60,
            batch_size=256,
            num_neg=3,
            lr=0.001,
            learner="adam",
            backend="tensorflow",
            early_stopping=None,
            trainable=True,
            verbose=True,
            seed=123,
        )

# model = VAECF(
#             k=10,
#             autoencoder_structure=[20],
#             name=f"vaee{80}",
#             act_fn="tanh",
#             likelihood="mult",
#             n_epochs=80,
#             batch_size=100,
#             learning_rate=0.001,
#             beta=1.0,
#             seed=123,
#             verbose=True,
#         )
models = [model]
cornac.Experiment(
    ratio_split, models=models, metrics=[hr_10, ndcg_10, recall_10, auc, prec_10, map]
).run()

In [None]:
user_ids = users["userID"].to_numpy()
item_ids = restaurants["itemID"].to_numpy()
item_ids

In [17]:
# get the top_k ratings for all users:
top_k = 100
reco_matrix = np.zeros((len(models), len(user_ids), top_k), dtype=int)
reco_matrix_mapped_items = np.zeros(
    (len(models), len(user_ids), len(item_ids)), dtype=int
)
reco_matrix_mapped_scores = np.zeros(
    (len(models), len(user_ids), len(item_ids)), dtype=float
)
reco_matrix_all = np.zeros((len(models), len(user_ids), len(item_ids)), dtype=int)


for u in user_ids:
    for i in range(len(models)):
        reco_items = models[i].recommend(u)
        items_mapped, mapped_scores = models[i].rank(
            user_idx=u, item_indices=list(item_ids)
        )
        reco_matrix_mapped_items[i][u] = items_mapped
        reco_matrix_mapped_scores[i][u] = mapped_scores
        reco_matrix_all[i][u] = reco_items
        reco_matrix[i][u] = reco_items[:top_k]

        # print(reco_matrix[0][3])

In [None]:
# test_set_data = pd.DataFrame(ratio_split.test_set.uir_tuple).transpose()
# test_set_data.columns = ["uid", "iid", "rating"]
# test_set_data = test_set_data.astype({"uid": "int", "iid": "int", "rating": "int"})
# r_global_uid_map = {v: k for k, v in ratio_split.global_uid_map.items()}
# r_global_iid_map = {v: k for k, v in ratio_split.global_iid_map.items()}

# test_set_data["uid"] = test_set_data["uid"].map(r_global_uid_map)
# test_set_data["iid"] = test_set_data["iid"].map(r_global_iid_map)
# test_set_data

In [24]:
# train_set_data = pd.DataFrame(ratio_split.train_set.uir_tuple).transpose()
# train_set_data.columns = ["uid", "iid", "rating"]
# train_set_data = train_set_data.astype({"uid": "int", "iid": "int", "rating": "int"})
# r_global_uid_map = {v: k for k, v in ratio_split.global_uid_map.items()}
# r_global_iid_map = {v: k for k, v in ratio_split.global_iid_map.items()}

# train_set_data["uid"] = train_set_data["uid"].map(r_global_uid_map)
# train_set_data["iid"] = train_set_data["iid"].map(r_global_iid_map)

In [19]:
np.save("reco_matrix_neumf_yelp100k_100", reco_matrix)

In [21]:
from collections import OrderedDict
sorted_by_values = OrderedDict(
    sorted(models[0].iid_map.items(), key=lambda item: item[1])
)
keys_sorted_by_values = list(sorted_by_values.keys())

reco_items_scores_all = [OrderedDict() for _ in range(len(user_ids))]

for u in user_ids:
    actual_index_u = u
    mapped_index_u = models[0].uid_map[actual_index_u]
    mapped_scores = reco_matrix_mapped_scores[0][mapped_index_u]
    ordered_dict = OrderedDict(zip(keys_sorted_by_values, mapped_scores))
    reco_items_scores_all[actual_index_u] = ordered_dict

In [None]:
import pickle

with open("score_dicts_neumf_yelp100k.pkl", "wb") as file:
    pickle.dump(reco_items_scores_all, file)

print("List of OrderedDicts saved to 'score_dicts.pkl'.")