In [14]:
import pandas as pd
import torch
import torch_geometric as pyg

from torch import nn
import torch.optim as optim
import torch.nn.functional as F
from torch_geometric.loader import NeighborLoader

import pandas as pd
import networkx as nx
import numpy as np
from sklearn.preprocessing import MinMaxScaler

from tqdm import tqdm

from sentence_transformers import SentenceTransformer

from torch_geometric.transforms import RandomLinkSplit, RandomNodeSplit
import torch_geometric.transforms as T
from torch_geometric.data import HeteroData

from torch_geometric import seed_everything

seed_everything(42)  # so that the results are reproducible


In [2]:
if torch.cuda.is_available():
    device = 'cuda'
elif torch.backends.mps.is_available():
    device = 'mps'
else:
    device = 'cpu'
print(device)

mps


In [3]:
device = "cpu"

## Load data

In [105]:
df_books = pd.read_csv('../data/books.csv')[['book_id', 'title', 'authors']]    # TODO: think about using also the columns

df_ratings = pd.read_csv('../data/ratings.csv') # .sample(50000)  # FIXME: remove the sampling on the final run

print(df_books.columns)

Index(['book_id', 'title', 'authors'], dtype='object')


In [5]:
# Create features
model = SentenceTransformer('all-MiniLM-L6-v2', device=device)

In [6]:
# Books features
df_books["text_to_embed"] = "Title: " + df_books["title"] + " Authors: " + df_books["authors"]
with torch.no_grad():
    titles_emb = model.encode(df_books['text_to_embed'].values, device=device, show_progress_bar=True, batch_size=32)
    
del model
torch.cuda.empty_cache()    

books_features = torch.tensor(titles_emb)
print("Books features shape:", books_features.shape)

# Users features: as we don't have any information we will use random features
# users_features = torch.rand(df_ratings['user_id'].nunique(), 768, device=device)
# print("Users features shape:", users_features.shape)

Batches: 100%|██████████| 313/313 [00:41<00:00,  7.62it/s]


Books features shape: torch.Size([10000, 384])


In [7]:
# embedding users

# # Create a bipartite graph
B = nx.Graph()
# Add nodes with the node attribute "bipartite"
B.add_nodes_from(df_ratings['user_id'].unique(), bipartite=0)  # Users
B.add_nodes_from(df_ratings['book_id'].unique(), bipartite=1)  # Books

# Add edges between users and books
for _, row in tqdm(df_ratings.iterrows(), total=df_ratings.shape[0], desc="Adding edges"):
    B.add_edge(row['user_id'], row['book_id'], weight=row['rating'])

# Compute metrics
centrality = nx.degree_centrality(B)
print('degree centrality computed')
pagerank = nx.pagerank(B, weight='weight')
print('pagerank computed')
average_rating = df_ratings.groupby('user_id')['rating'].mean()
print('all metrics computed')

# # Prepare feature vectors for users
features = pd.DataFrame(index=df_ratings['user_id'].unique())
features['degree'] = [centrality[node] for node in features.index]
features['pagerank'] = [pagerank[node] for node in features.index]
features['average_rating'] = [average_rating.get(node, 0) for node in features.index]  # Add average ratings

# # Normalize features
scaler = MinMaxScaler()
features_scaled = pd.DataFrame(scaler.fit_transform(features), index=features.index, columns=features.columns)

# # Display the normalized features
users_features = features_scaled.to_numpy()

features_scaled.head() 

Adding edges: 100%|██████████| 50000/50000 [00:01<00:00, 41683.76it/s]


degree centrality computed
pagerank computed
all metrics computed


Unnamed: 0,degree,pagerank,average_rating
42562,0.012048,0.01051,0.5
43232,0.018072,0.014371,0.4375
37244,0.006024,0.011248,1.0
53366,0.006024,0.006042,0.375
29634,0.006024,0.011373,0.75


In [8]:
# Merge the two dataframes keeping user_id, book_id, rating, title, authors
df_ratings = pd.merge(df_ratings, df_books, on='book_id')
df_ratings.head()

Unnamed: 0,user_id,book_id,rating,title,authors,text_to_embed
0,42562,2757,3,"Ahab's Wife, or The Star-Gazer",Sena Jeter Naslund,"Title: Ahab's Wife, or The Star-Gazer Authors:..."
1,43232,134,4,"City of Glass (The Mortal Instruments, #3)",Cassandra Clare,"Title: City of Glass (The Mortal Instruments, ..."
2,37244,1463,5,"Enchanters' End Game (The Belgariad, #5)",David Eddings,"Title: Enchanters' End Game (The Belgariad, #5..."
3,53366,71,2,Frankenstein,"Mary Wollstonecraft Shelley, Percy Bysshe Shel...",Title: Frankenstein Authors: Mary Wollstonecra...
4,29634,3339,4,"The Atlantis Complex (Artemis Fowl, #7)",Eoin Colfer,"Title: The Atlantis Complex (Artemis Fowl, #7)..."


In [9]:
# Create a mapping from the user_id to a unique consecutive value in the range [0, num_users]:
unique_user_id = df_ratings['user_id'].unique()
unique_user_id = pd.DataFrame(data={
    'user_id': unique_user_id, 
    'mapped_user_id': pd.RangeIndex(len(unique_user_id))
    })
print("Mapping of user IDs to consecutive values:")
print("==========================================")
print(unique_user_id.head())
print()

# Create a mapping from the book_id to a unique consecutive value in the range [0, num_books]:
unique_book_id = df_ratings['book_id'].unique()
unique_book_id = pd.DataFrame(data={
    'book_id': unique_book_id,
    'mapped_book_id': pd.RangeIndex(len(unique_book_id))
    })
print("Mapping of book IDs to consecutive values:")
print("===========================================")
print(unique_book_id.head())
print()

Mapping of user IDs to consecutive values:
   user_id  mapped_user_id
0    42562               0
1    43232               1
2    37244               2
3    53366               3
4    29634               4

Mapping of book IDs to consecutive values:
   book_id  mapped_book_id
0     2757               0
1      134               1
2     1463               2
3       71               3
4     3339               4



In [10]:
df_ratings = df_ratings.merge(unique_user_id, on='user_id')
df_ratings = df_ratings.merge(unique_book_id, on='book_id')

# With this, we are ready to create the edge_index representation in COO format
# following the PyTorch Geometric semantics:
edge_index = torch.stack([
    torch.tensor(df_ratings['mapped_user_id'].values), 
    torch.tensor(df_ratings['mapped_book_id'].values)]
    , dim=0)

print(edge_index[:, :10])

tensor([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])


In [67]:

# Create the heterogeneous graph data object:
data = HeteroData()

# Add the user nodes:
data['user'].x = torch.tensor(users_features,).float()  # (num_users, num_users_features)

# Add the book nodes:
data['book'].x = torch.tensor(titles_emb,).float()  # (num_books, num_books_features)

# Add the rating edges:
data['user', 'rates', 'book'].edge_index = edge_index  # (2, num_ratings)

# Add the rating labels:
rating = torch.from_numpy(df_ratings['rating'].values).float()
data['user', 'rates', 'book'].edge_label = rating  # [num_ratings]

# We also need to make sure to add the reverse edges from books to users
# in order to let a GNN be able to pass messages in both directions.
# We can leverage the `T.ToUndirected()` transform for this from PyG:
data = T.ToUndirected()(data)

# With the above transformation we also got reversed labels for the edges.
# We remove them
del data['book', 'rev_rates', 'user'].edge_label

print(data['user'].num_nodes,len(unique_user_id))
assert data['user'].num_nodes == len(unique_user_id)
assert data['user', 'rates', 'book'].num_edges == len(df_ratings)

data

31995 31995


HeteroData(
  user={ x=[31995, 3] },
  book={ x=[10000, 384] },
  (user, rates, book)={
    edge_index=[2, 50000],
    edge_label=[50000],
  },
  (book, rev_rates, user)={ edge_index=[2, 50000] }
)

## Splitting data

In [68]:
## designed for transductive learning
tfs = RandomLinkSplit(is_undirected=True, 
                      num_val=0.1,
                      num_test=0.1,
                      neg_sampling_ratio=0.0,
                      add_negative_train_samples=False,
                      edge_types=[('user', 'rates', 'book')],
                      rev_edge_types=[('book', 'rev_rates', 'user')],
                      )

train_data, val_data, test_data = tfs(data)

In [69]:
data

HeteroData(
  user={ x=[31995, 3] },
  book={ x=[10000, 384] },
  (user, rates, book)={
    edge_index=[2, 50000],
    edge_label=[50000],
  },
  (book, rev_rates, user)={ edge_index=[2, 50000] }
)

In [70]:
# training has 80% of the edges (plus their label) and the reverse edges
# validation has 80% of the edges and the reverse edges, plus 10% of other edges with their label
# test has 90% of the edges and the reverse edges, plus 10% of other edges with their label
train_data, val_data, test_data

(HeteroData(
   user={ x=[31995, 3] },
   book={ x=[10000, 384] },
   (user, rates, book)={
     edge_index=[2, 40000],
     edge_label=[40000],
     edge_label_index=[2, 40000],
   },
   (book, rev_rates, user)={ edge_index=[2, 40000] }
 ),
 HeteroData(
   user={ x=[31995, 3] },
   book={ x=[10000, 384] },
   (user, rates, book)={
     edge_index=[2, 40000],
     edge_label=[5000],
     edge_label_index=[2, 5000],
   },
   (book, rev_rates, user)={ edge_index=[2, 40000] }
 ),
 HeteroData(
   user={ x=[31995, 3] },
   book={ x=[10000, 384] },
   (user, rates, book)={
     edge_index=[2, 45000],
     edge_label=[5000],
     edge_label_index=[2, 5000],
   },
   (book, rev_rates, user)={ edge_index=[2, 45000] }
 ))

In [71]:
# saving splitted data so that the same dataset can be used for matrix factorization in order to have a fair comparison

# train set
train_labels = train_data['user', 'rates', 'book'].edge_label
train_label_index =  train_data['user', 'rates', 'book'].edge_label_index

concat = torch.cat([train_label_index, train_labels.unsqueeze(0)], dim=0)
train_set = pd.DataFrame(concat.T.numpy(), columns=['user', 'book', 'rating'])
train_set.to_csv('../data/splitted_data/train_set.csv', index=False)

# test set
test_labels = test_data['user', 'rates', 'book'].edge_label
test_label_index =  test_data['user', 'rates', 'book'].edge_label_index

concat = torch.cat([test_label_index, test_labels.unsqueeze(0)], dim=0)
test_set = pd.DataFrame(concat.T.numpy(), columns=['user', 'book', 'rating'])
test_set.to_csv('../data/splitted_data/test_set.csv', index=False)

## Creating the model

In [102]:
from torch_geometric.nn import SAGEConv, to_hetero

class GNNEncoder(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = SAGEConv((-1, -1), hidden_channels)
        self.conv2 = SAGEConv((-1, -1), hidden_channels)
        self.conv3 = SAGEConv((-1, -1), hidden_channels)
        self.conv4 = SAGEConv((-1, -1), out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index).relu()
        x = self.conv3(x, edge_index).relu()
        x = self.conv4(x, edge_index)
        return x


class EdgeDecoder(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.lin1 = torch.nn.Linear(2 * hidden_channels, hidden_channels)
        self.lin2 = torch.nn.Linear(hidden_channels, hidden_channels)
        self.lin3 = torch.nn.Linear(hidden_channels, 1)

    def forward(self, z_dict, edge_label_index):
        row, col = edge_label_index
        z = torch.cat([z_dict['user'][row], z_dict['book'][col]], dim=-1)

        z = self.lin1(z).relu()
        z = self.lin2(z).relu()
        z = self.lin3(z)
        return z.view(-1)


class Model(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.encoder = GNNEncoder(hidden_channels, hidden_channels)
        self.encoder = to_hetero(self.encoder, data.metadata(), aggr='sum')
        self.decoder = EdgeDecoder(hidden_channels)

    def forward(self, x_dict, edge_index_dict, edge_label_index):
        z_dict = self.encoder(x_dict, edge_index_dict)
        return self.decoder(z_dict, edge_label_index)


model = Model(hidden_channels=100).to(device)
print(model)

Model(
  (encoder): GraphModule(
    (conv1): ModuleDict(
      (user__rates__book): SAGEConv((-1, -1), 100, aggr=mean)
      (book__rev_rates__user): SAGEConv((-1, -1), 100, aggr=mean)
    )
    (conv2): ModuleDict(
      (user__rates__book): SAGEConv((-1, -1), 100, aggr=mean)
      (book__rev_rates__user): SAGEConv((-1, -1), 100, aggr=mean)
    )
    (conv3): ModuleDict(
      (user__rates__book): SAGEConv((-1, -1), 100, aggr=mean)
      (book__rev_rates__user): SAGEConv((-1, -1), 100, aggr=mean)
    )
    (conv4): ModuleDict(
      (user__rates__book): SAGEConv((-1, -1), 100, aggr=mean)
      (book__rev_rates__user): SAGEConv((-1, -1), 100, aggr=mean)
    )
  )
  (decoder): EdgeDecoder(
    (lin1): Linear(in_features=200, out_features=100, bias=True)
    (lin2): Linear(in_features=100, out_features=100, bias=True)
    (lin3): Linear(in_features=100, out_features=1, bias=True)
  )
)


In [100]:
from torch_geometric.loader import HGTLoader

train_loader = HGTLoader(
    train_data,
    num_samples=[1024] * 4,  
    shuffle=True,
    batch_size=128,
    input_nodes=("user", None),  # setting to None means that we will use all nodes
)

val_loader = HGTLoader(
    val_data,
    num_samples=[1024] * 4,
    shuffle=False,
    batch_size=128,
    input_nodes=("user", None),
)

# NOTE: also the reverse edges are sampled

In [103]:
for n, p in enumerate(train_loader):
    print(p)
    print(p['user'].input_id)
    for index in p['user', 'rates', 'book'].edge_label_index[0]:
        if index not in p['user'].input_id:
            print(index) # shows that edge label in dex aren't in the input
    if n == 0:
        break

HeteroData(
  user={
    x=[3200, 3],
    n_id=[3200],
    input_id=[128],
    batch_size=128,
  },
  book={
    x=[1286, 384],
    n_id=[1286],
  },
  (user, rates, book)={
    edge_index=[2, 4727],
    edge_label=[4727],
    edge_label_index=[2, 4727],
    e_id=[4727],
  },
  (book, rev_rates, user)={
    edge_index=[2, 5166],
    e_id=[5166],
  }
)
tensor([ 5385, 18826,  5544, 11458,  3771, 23374,  9489,  4003, 31322, 12647,
        17099, 23383,  2488, 14851, 25011, 20932, 18461, 19894, 14588,  8380,
        29122,  7869, 23987, 29040, 24489,  8537, 30482, 13281,  8838,  5201,
        16174,  5510, 30802, 13725,  7098, 15802, 13291, 18200, 30780, 21101,
        16173, 23289, 29103, 10067, 28452, 23056, 21761,  8208, 29241, 22062,
        17682, 10757,  3493, 20733,  5147,  1758, 24203, 31018, 14075,  4681,
        26140,  8137, 24727, 25946,  8352, 16601, 14935, 13699, 31077,   107,
        22939, 23799, 12000, 26111, 18616, 24479, 21548, 25328, 19135, 11456,
        23021, 17023, 

In [94]:
data.x_dict

{'user': tensor([[0.0120, 0.0105, 0.5000],
         [0.0181, 0.0144, 0.4375],
         [0.0060, 0.0112, 1.0000],
         ...,
         [0.7771, 0.7127, 0.5000],
         [0.0000, 0.0050, 0.7500],
         [0.0000, 0.0039, 0.7500]]),
 'book': tensor([[ 0.0317, -0.0143, -0.0042,  ..., -0.0083, -0.0774,  0.0136],
         [-0.0539,  0.0106,  0.0059,  ...,  0.0068, -0.0008,  0.0677],
         [ 0.0088,  0.0140,  0.0650,  ..., -0.0836,  0.0070,  0.0468],
         ...,
         [-0.0520,  0.0453,  0.0045,  ..., -0.0343, -0.0043, -0.0555],
         [ 0.0411, -0.0194,  0.0850,  ..., -0.0029,  0.0665,  0.0228],
         [-0.0981,  0.0178,  0.0294,  ..., -0.0602,  0.0098,  0.0154]])}

In [104]:
# Training Loop
def train(model, data_loader, optimizer, criterion):
    model.train()
    total_loss = 0
    for batch in tqdm(data_loader):
        # print(p['user', 'rates', 'book'].edge_label_index.max())
        batch = batch.to(device)
        optimizer.zero_grad()
        pred = model(data.x_dict, batch.edge_index_dict, batch['user', 'rates', 'book'].edge_label_index)
        loss = criterion(pred, batch['user', 'rates', 'book'].edge_label.float())
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / (len(data_loader.dataset) / 128)

# Testing Loop
def test(model, data_loader, criterion):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch in data_loader:
            batch = batch.to(device)
            pred = model(data.x_dict, batch.edge_index_dict, batch['user', 'rates', 'book'].edge_label_index)
            loss = criterion(pred, batch['user', 'rates', 'book'].edge_label.float())
            total_loss += loss.item()
    return total_loss / (len(data_loader.dataset) / 128)

# Main training and testing routines
optimizer = optim.Adam(model.parameters(), lr=0.01)
criterion = nn.MSELoss()

train_losses = []
val_losses = []

num_epochs = 10
for epoch in range(num_epochs):
    train_loss = train(model, train_loader, optimizer, criterion)
    val_loss = test(model, val_loader, criterion)
    
    # val_loss = test(model, val_data, criterion)
    print(f'Epoch: {epoch+1}, Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')
    val_losses.append(val_loss)
    train_losses.append(train_loss)

# Optionally, after training, you can evaluate your model on the test dataset
# test_loss = test(model, test_data, criterion)
# print(f'Test Loss: {test_loss:.4f}')


100%|██████████| 250/250 [00:32<00:00,  7.71it/s]


Epoch: 1, Training Loss: 0.8052, Validation Loss: 0.3004


100%|██████████| 250/250 [00:32<00:00,  7.69it/s]


Epoch: 2, Training Loss: 0.3502, Validation Loss: 0.3006


100%|██████████| 250/250 [00:34<00:00,  7.16it/s]


Epoch: 3, Training Loss: 0.3397, Validation Loss: 0.3139


 15%|█▌        | 38/250 [00:05<00:28,  7.36it/s]


KeyboardInterrupt: 

In [None]:
# # Training Loop
# def train(model, data_loader, optimizer, criterion):
#     model.train()
#     total_loss = 0
#     for batch in data_loader:
#         batch = batch.to(device)
#         optimizer.zero_grad()
#         pred = model(batch.x_dict, batch.edge_index_dict, batch['user', 'rates', 'book'].edge_label_index)
#         print("sium")
#         loss = criterion(pred, batch['user', 'rates', 'book'].edge_label.float())
#         loss.backward()
#         optimizer.step()
#         total_loss += loss.item() * batch.num_graphs
#     return total_loss / len(data_loader.dataset)

# # Testing Loop
# def test(model, data_loader, criterion):
#     model.eval()
#     total_loss = 0
#     with torch.no_grad():
#         for batch in data_loader:
#             batch = batch.to(device)
#             pred = model(batch.x_dict, batch.edge_index_dict, batch['user', 'rates', 'book'].edge_label_index)
#             loss = criterion(pred, batch['user', 'rates', 'book'].edge_label.float())
#             total_loss += loss.item() * batch.num_graphs
#     return total_loss / len(data_loader.dataset)

# # Main training and testing routines
# optimizer = optim.Adam(model.parameters(), lr=0.01)
# criterion = nn.MSELoss()

# num_epochs = 10
# for epoch in range(num_epochs):
#     train_loss = train(model, train_dataloader, optimizer, criterion)
#     val_loss = test(model, val_data, criterion)
#     print(f'Epoch: {epoch+1}, Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')

# # Optionally, after training, you can evaluate your model on the test dataset
# test_loss = test(model, test_data, criterion)
# print(f'Test Loss: {test_loss:.4f}')


In [None]:
# Training Loop
def train(model, data, optimizer, criterion):
    model.train()
    if device != 'cpu':
        data = data.to(device)
    optimizer.zero_grad()
    
    pred = model(data.x_dict, data.edge_index_dict, data['user', 'rates', 'book'].edge_label_index)
    loss = criterion(pred, data['user', 'rates', 'book'].edge_label.float())
    loss.backward()
    optimizer.step()
    total_loss = loss.item()
    return total_loss 

# Testing Loop
def test(model, data, criterion):
    model.eval()
    with torch.no_grad():
        if device != 'cpu':
            data = data.to(device)
        pred = model(data.x_dict, data.edge_index_dict, data['user', 'rates', 'book'].edge_label_index)
        loss = criterion(pred, data['user', 'rates', 'book'].edge_label.float())
        total_loss = loss.item()
    return total_loss

# Main training and testing routines
optimizer = optim.Adam(model.parameters(), lr=0.005)
criterion = nn.MSELoss()

train_losses = []
valid_losses = []

num_epochs = 100
for epoch in range(num_epochs):
    train_loss = train(model, train_data, optimizer, criterion)
    val_loss = test(model, val_data, criterion)
    print(f'Epoch: {epoch+1}, Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')
    train_losses.append(train_loss)
    valid_losses.append(val_loss)

# Optionally, after training, you can evaluate your model on the test dataset
test_loss = test(model, test_data, criterion)
print(f'Test Loss: {test_loss:.4f}')

In [None]:
test_loss = test(model, test_data, criterion)

In [None]:
print(f'Test Loss: {test_loss:.4f}')

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Plot the training and validation losses
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='Training Loss')
plt.plot(valid_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# Save model 
torch.save(model.state_dict(), "model.pt")

## Validation & metrics

In [None]:
model = Model(hidden_channels=10).to(device)
model.load_state_dict(torch.load("model.pt"))

In [None]:
pred_review = model(test_data.x_dict, test_data.edge_index_dict, test_data['user', 'rates', 'book'].edge_label_index)

In [None]:
import seaborn as sns
sns.boxplot(pred_review.cpu().detach().numpy())

In [None]:
test_data['user', 'rates', 'book'].edge_label_index

In [None]:
test_data['user', 'rates', 'book'].edge_label

In [None]:
# Convert tensors to numpy arrays
user_ids_np = test_data['user', 'rates', 'book'].edge_label_index[0].numpy()
book_ids_np = test_data['user', 'rates', 'book'].edge_label_index[1].numpy()
ratings_np = test_data['user', 'rates', 'book'].edge_label.numpy()
ratings_pred_np = pred_review.detach().numpy()

# Create a dictionary with the data
data = {
    'user_id': user_ids_np,
    'book_id': book_ids_np,
    'rating': ratings_np, 
    'predicted_rating': ratings_pred_np
}

# Create a pandas DataFrame
df_ratings = pd.DataFrame(data)

In [None]:
import sys
sys.path.append('../src')
from evaluation_metrics import *

k = 10
top_k_recommendations = get_top_k_recommendations(df_ratings, k)
actual_items = get_actual_items(df_ratings) # ground truth

# Evaluate the recommendations
mean_precision, mean_recall, mean_f1 = evaluate_recommendations(top_k_recommendations, actual_items, k)
print(f"Mean Precision@{k}: {mean_precision}")
print(f"Mean Recall@{k}: {mean_recall}")
print(f"Mean F1 Score@{k}: {mean_f1}")

Matrix factorization:

Mean Precision@10: 0.7722234424908242
Mean Recall@10: 0.5475533441372822
Mean F1 Score@10: 0.6128487333956821

In [None]:
todo
visualization on how the data looks like


report 
objective and motivation 
analysis of the data
method: improving over matrix factorization baseline
results
future study: even an idea about how to use diversity