# Notebook for training and testing Graph Neural Network for Neural Collaborative Filtering


In [None]:
from torch_geometric.data import DataLoader
from gncf import GNCF
from graph_epinions_dataset import GraphEpinionsDataset
from epinions_graph import *
import torch
from tqdm import tqdm

In [None]:
# Set device
if torch.cuda.is_available():
    device = torch.device('cuda')
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

# Load train data to determine train/test split
train_data = pd.read_csv('data/train.csv')
train_idx = len(train_data)

# Return node features and mapping from user/item to index
_, user_mapping = load_node_csv('data/all_data.csv', index_col='user')
_, item_mapping = load_node_csv('data/all_data.csv', index_col='item')

# Return edge index and edge attributes
edge_index, edge_label = load_edge_csv('data/all_data.csv', 
                                      src_index_col='user', 
                                      src_mapping=user_mapping, 
                                      dst_index_col='item', 
                                      dst_mapping=item_mapping,
                                      encoders={'label': IdentityEncoder(dtype=torch.long)})
    

data = HeteroData()
data = data.to(device)
data['user'].x = torch.arange(len(user_mapping))
data['item'].x = torch.arange(len(item_mapping))
data['user', 'rates', 'item'].edge_index = edge_index
data['user', 'rates', 'item'].edge_label = edge_label

train_data = data.clone()
test_data = data.clone()

# Partition in training and test set
train_data['user', 'rates','item'].edge_index = data['user', 'rates', 'item'].edge_index[:, :train_idx]
train_data['user', 'rates','item'].edge_label = data['user', 'rates', 'item'].edge_label[:train_idx]
train_data['user', 'rates', 'item'].edge_label_index = data['user', 'rates', 'item'].edge_index[:, :train_idx]

test_data['user', 'rates','item'].edge_index = data['user', 'rates', 'item'].edge_index[:, :train_idx]
test_data['user', 'rates','item'].edge_label = data['user', 'rates', 'item'].edge_label[train_idx:]
test_data['user', 'rates', 'item'].edge_label_index = data['user', 'rates', 'item'].edge_index[:, train_idx:]

# Add a reverse edge for user aggregration/item modeling
train_data = ToUndirected()(train_data)
del train_data['item', 'rev_rates', 'user'].edge_label  # Remove "reverse" label.
test_data = ToUndirected()(test_data)
del test_data['item', 'rev_rates', 'user'].edge_label  # Remove "reverse" label.

print(train_data)
print(test_data)          

In [None]:
def train(model, optimizer, criterion, epochs=10):
    model.train()

    losses = []
    for epoch in range(epochs):
        optimizer.zero_grad()
        pred = model(train_data.x_dict, train_data.edge_index_dict,
                    train_data['user', 'item'].edge_label_index)
        target = (train_data['user', 'item'].edge_label).float()
        loss = criterion(pred, target)
        loss.backward()
        optimizer.step()
        losses.append(loss.item())

        if (epoch + 1) % 10 == 0:
            print(f'Epoch {epoch + 1}/{epochs} Loss {loss.item()}')
    return losses

In [None]:
# Define model parameters
num_users = len(user_mapping)
num_items = len(item_mapping)
user_emb_dim = 128
item_emb_dim = 128
hidden_channels = 32
out_channels = 16
metadata = train_data.metadata()

# Create model
model = GNCF(num_users, num_items, user_emb_dim, item_emb_dim, hidden_channels, out_channels, metadata)
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

losses = train(model, optimizer, criterion, epochs=300)