# Load Data
Some Notes: \
For small predictive factors, running NeuMF without pre-training can achieve better performance than GMF and MLP. For large predictive factors, pre-training NeuMF can yield better performance (may need tune regularization for GMF and MLP).

The Rating that are above 1 are all converted into 1 for implicit feedback. The data loader can be adjusted to either load in explicit feedback or implicit at "self.preprocess_ratings = self._binarize(ratings)"

In [None]:
%cd /content/drive/MyDrive/Neural-CF

In [3]:
from data import YelpLoader, SampleGenerator
import pandas as pd
import numpy as np

# DataLoader
data_dir = '/content/drive/MyDrive/Neural-CF/Yelp-Dataset/subset/subset_review.json'
yelp_loader = YelpLoader(data_dir)
yelp_rating = yelp_loader.get_yelp_rating()
print(yelp_rating.head())

sample_generator = SampleGenerator(ratings=yelp_rating)
negative = sample_generator._sample_negative(ratings=yelp_rating) # negative df which items that user didn't rated
# print(negative)

predict_data = pd.merge(yelp_rating, negative[['userId','negative_samples']], on='userId')
_users, _items, negative_users, negative_items = [], [], [], []
for row in predict_data.itertuples():
    _users.append(int(row.userId))
    _items.append(int(row.itemId))
    for i in range(len(row.negative_samples)):
        negative_users.append(int(row.userId))
        negative_items.append(int(row.negative_samples[i]))
predict_data = [torch.LongTensor(_users), torch.LongTensor(_items),
                torch.LongTensor(negative_users), torch.LongTensor(negative_items)]

print(predict_data) # [test_users, test_items, negative_users, negative_items]

Loading Data....here
   userId  itemId                 user_id             business_id  rating
0       0       0  -3s52C4zL_DHRK0ULG6qtg  -kqjc8DxxRac4cz2qTKCLw     4.0
1       0       1  -3s52C4zL_DHRK0ULG6qtg  0QYWhij_YZ7Lyk9F6213Sg     5.0
2       0       2  -3s52C4zL_DHRK0ULG6qtg  1YflE3DkiCZGgLnf3paLnA     5.0
3       0       3  -3s52C4zL_DHRK0ULG6qtg  2BMk_drsikKWslJCXmQtjQ     2.0
4       0       4  -3s52C4zL_DHRK0ULG6qtg  2IahpaBR4U2Kdy9HF28EQA     2.5


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ratings['rating'][ratings['rating'] > 0] = 1.0


[tensor([  0,   0,   0,  ..., 960, 960, 960]), tensor([  0,   1,   2,  ...,  54, 774,  56]), tensor([  0,   0,   0,  ..., 960, 960, 960]), tensor([250, 320, 368,  ..., 651, 880,  72])]


# Load in Trained Model

In [4]:
from neumf import NeuMFArchitecture
import torch

neumf = torch.load('./Torch-NCF/checkpoints_pretrain/pretrain_neumf_factor8neg4_Epoch5_HR0.0710_NDCG0.0339.model')
print(neumf.keys())

use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

## Load in the pretrained neuralmf to the neumf.py architecture
config = {
    'num_users': 961,
    'num_items': 1000,
    'latent_dim_mf': 8,
    'latent_dim_mlp': 8,
    'num_negative': 4,
    'layers': [16,64,32,16,8],
    'use_cuda': True,
}

neumf_model = NeuMFArchitecture(config)
neumf_model.load_state_dict(neumf)
neumf_model = neumf_model.to(device)
neumf_model.eval()

with torch.no_grad():
  test_users, test_items = predict_data[0].to(device), predict_data[1].to(device)
  negative_users, negative_items = predict_data[2].to(device), predict_data[3].to(device)
  test_scores = neumf_model(test_users, test_items)
  negative_scores = neumf_model(negative_users, negative_items)

# Predict TopK recommended items

In [8]:
from collections import OrderedDict

topK = 10

_users = negative_users
_items = negative_items
_scores = negative_scores

# Convert lists to NumPy arrays
user_array = _users.cpu().numpy()
item_array = _items.cpu().numpy()
scores_array = _scores.cpu().numpy().flatten()

# Get unique user IDs
unique_user_ids = np.unique(user_array)

# Initialize an empty array to store reordered scores
reordered_scores = np.zeros_like(scores_array)
reordered_items = np.zeros_like(item_array)

# Loop through unique user IDs and reorder scores
userID = 75
for user_id in unique_user_ids:
    if user_id == userID:
      print(user_id)
      user_mask = (user_array == user_id)
      user_scores = scores_array[user_mask]
      items = item_array[user_mask]

      ordered_indices = np.argsort(user_scores)[::-1]  # Reverse order to get descending order

      reordered_scores[user_mask] = user_scores[ordered_indices]
      reordered_items[user_mask] = items[ordered_indices]

      unique_reordered_scores = list(OrderedDict.fromkeys(reordered_scores[user_mask]))
      unique_reordered_items = list(OrderedDict.fromkeys(reordered_items[user_mask]))
      # print(unique_reordered_scores)

      topK_item = unique_reordered_items[:topK]
      print(topK_item)


75
[452, 201, 674, 588, 907, 405, 932, 905, 833, 882]


In [9]:
# Display corresponding business_id
corresponding_business_ids = yelp_rating.loc[yelp_rating['itemId'].isin(topK_item), ['itemId', 'business_id']]
corresponding_business_ids

Unnamed: 0,itemId,business_id
252,201,wL-ZX1ttdSssmf3RQUmV2w
547,405,0ZsqqzHu1HHkDdIKoivi5g
614,452,T4LfF6fT4_5UuP6ZpMjUGQ
673,201,wL-ZX1ttdSssmf3RQUmV2w
894,588,_V6hl1oGkTV2KbGeax_HPA
...,...,...
42468,405,0ZsqqzHu1HHkDdIKoivi5g
42550,907,ITOkOiX-hfsazVPpFFQVJA
42603,907,ITOkOiX-hfsazVPpFFQVJA
42638,674,lRbHFOIFuusN2WOR_ypQ_A
