# Book Recommender System

In [1]:
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl.function as fn
from dgl.nn import GraphConv
import pandas as pd
import numpy as np

In [2]:
ratings = pd.read_csv("Data/Ratings.csv")
#print(f"{ratings.dtypes}\n")
#print(ratings.head())
ratings

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6
...,...,...,...
1149775,276704,1563526298,9
1149776,276706,0679447156,0
1149777,276709,0515107662,10
1149778,276721,0590442449,10


In [3]:
ratings["Book-Rating"].value_counts()

0     716109
8     103736
10     78610
7      76457
9      67541
5      50974
6      36924
4       8904
3       5996
2       2759
1       1770
Name: Book-Rating, dtype: int64

In [4]:
books = pd.read_csv("Data/Books.csv", dtype={3: str})
#print(f"Books Data Shape: {books.shape} \n")
#print(f"{books.dtypes}\n")
#print(books.head(3))

books

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,0195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,0002005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,0060973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,0374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,0393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...
...,...,...,...,...,...,...,...,...
271355,0440400988,There's a Bat in Bunk Five,Paula Danziger,1988,Random House Childrens Pub (Mm),http://images.amazon.com/images/P/0440400988.0...,http://images.amazon.com/images/P/0440400988.0...,http://images.amazon.com/images/P/0440400988.0...
271356,0525447644,From One to One Hundred,Teri Sloat,1991,Dutton Books,http://images.amazon.com/images/P/0525447644.0...,http://images.amazon.com/images/P/0525447644.0...,http://images.amazon.com/images/P/0525447644.0...
271357,006008667X,Lily Dale : The True Story of the Town that Ta...,Christine Wicker,2004,HarperSanFrancisco,http://images.amazon.com/images/P/006008667X.0...,http://images.amazon.com/images/P/006008667X.0...,http://images.amazon.com/images/P/006008667X.0...
271358,0192126040,Republic (World's Classics),Plato,1996,Oxford University Press,http://images.amazon.com/images/P/0192126040.0...,http://images.amazon.com/images/P/0192126040.0...,http://images.amazon.com/images/P/0192126040.0...


In [5]:
print('Nan-values by column')
print(books.isna().sum())

Nan-values by column
ISBN                   0
Book-Title             0
Book-Author            1
Year-Of-Publication    0
Publisher              2
Image-URL-S            0
Image-URL-M            0
Image-URL-L            3
dtype: int64


In [6]:
users = pd.read_csv("Data/Users.csv")
#print(f"Users Data Shape: {users.shape} \n")
#print(f"{users.dtypes}\n")
#print(users.head())
#print('Nan-values by column')
#print(users.isna().sum())

users

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",
...,...,...,...
278853,278854,"portland, oregon, usa",
278854,278855,"tacoma, washington, united kingdom",50.0
278855,278856,"brampton, ontario, canada",
278856,278857,"knoxville, tennessee, usa",


## Preprocessing

In [7]:
# We will only use users and books present in the ratings dataset 
lessen_user_ids = {userid: idx for idx, userid in enumerate(ratings['User-ID'].unique())} #renumber IDs to reduce inactive users
ratings['New-User-ID'] = ratings['User-ID'].map(lessen_user_ids)
user_ids = list(ratings['New-User-ID'].unique())
num_users = len(set(user_ids))

# Map book identifiers (ISBN) to a unique integer identifier for datatype compatibility of dgl
isbn_to_id = {isbn: idx for idx, isbn in enumerate(ratings['ISBN'].unique())}
ratings['Book-ID'] = ratings['ISBN'].map(isbn_to_id)
book_ids = list(ratings['Book-ID'].unique())
num_books = len(set(book_ids))

print(f'There are {len(user_ids)} unique users, and {len(book_ids)} unique books in the ratings dataset.')
 
# Remove users and books not included in the ratings dataset
books['Book-ID'] = books['ISBN'].map(isbn_to_id)
books_clean = books[books['Book-ID'].isin(book_ids)]
books_clean_ids = books_clean['Book-ID'].unique()
percent_books_missing = round((num_books-len(books_clean_ids))/num_books*100, 0)

print(f'There are around {percent_books_missing}% of books in the graph missing in the books data')

users['New-User-ID'] = users['User-ID'].map(lessen_user_ids)
users_clean = users[users['New-User-ID'].isin(user_ids)]

There are 105283 unique users, and 340556 unique books in the ratings dataset.
There are around 21.0% of books in the graph missing in the books data


1/5 of the books that have rating information do not have further information on the books dataset. However, as our objective is to investigate a user-based recommender system, this is irrelevant. We are able to embed the age and location data of users. As the age data is sparse, location data will be our main source of information.

### Base model 

The following code is based mainly on [this Kaggle project](https://www.kaggle.com/code/fahadmehfoooz/book-recommendation-system) as our baseline model/way to determine the scoring for recommended systems. 

In [8]:
user_rating_df = ratings.merge(users_clean, left_on = 'User-ID', right_on = 'User-ID')
user_rating_df

Unnamed: 0,User-ID,ISBN,Book-Rating,New-User-ID_x,Book-ID,Location,Age,New-User-ID_y
0,276725,034545104X,0,0,0,"tyler, texas, usa",,0.0
1,276726,0155061224,5,1,1,"seattle, washington, usa",,1.0
2,276727,0446520802,0,2,2,"h, new south wales, australia",16.0,2.0
3,276729,052165615X,3,3,3,"rijeka, n/a, croatia",16.0,3.0
4,276729,0521795028,6,3,4,"rijeka, n/a, croatia",16.0,3.0
...,...,...,...,...,...,...,...,...
1149775,276704,1563526298,9,105278,226347,"cedar park, texas, usa",,105278.0
1149776,276706,0679447156,0,105279,7295,"quebec, quebec, canada",18.0,105279.0
1149777,276709,0515107662,10,105280,12065,"mannington, west virginia, usa",38.0,105280.0
1149778,276721,0590442449,10,105281,78598,"providence, rhode island, usa",14.0,105281.0


In [9]:
book_user_rating = books_clean.merge(user_rating_df, left_on = 'ISBN',right_on = 'ISBN')
book_user_rating = book_user_rating[['ISBN', 'Book-Title', 'Book-Author', 'User-ID', 'Book-Rating']]
book_user_rating.reset_index(drop=True, inplace = True)

In [10]:
d ={}
for i,j in enumerate(book_user_rating.ISBN.unique()):
    d[j] =i
book_user_rating['unique_id_book'] = book_user_rating['ISBN'].map(d)
book_user_rating

Unnamed: 0,ISBN,Book-Title,Book-Author,User-ID,Book-Rating,unique_id_book
0,0195153448,Classical Mythology,Mark P. O. Morford,2,0,0
1,0002005018,Clara Callan,Richard Bruce Wright,8,5,1
2,0002005018,Clara Callan,Richard Bruce Wright,11400,0,1
3,0002005018,Clara Callan,Richard Bruce Wright,11676,8,1
4,0002005018,Clara Callan,Richard Bruce Wright,41385,0,1
...,...,...,...,...,...,...
1031131,0440400988,There's a Bat in Bunk Five,Paula Danziger,276463,7,270146
1031132,0525447644,From One to One Hundred,Teri Sloat,276579,4,270147
1031133,006008667X,Lily Dale : The True Story of the Town that Ta...,Christine Wicker,276680,0,270148
1031134,0192126040,Republic (World's Classics),Plato,276680,0,270149


## Note
The matrix could not be generated due to significant level of potential cells generated by the pivot operation, thus, only 25k values are taken.

In [11]:
top_users = book_user_rating['User-ID'].value_counts().head(25000).index
top_books = book_user_rating['ISBN'].value_counts().head(25000).index


filtered_df = book_user_rating[book_user_rating['User-ID'].isin(top_users) & book_user_rating['ISBN'].isin(top_books)]

In [12]:
users_books_pivot_matrix_df = filtered_df.pivot(index='User-ID', columns='unique_id_book', values='Book-Rating').fillna(0)

In [13]:
users_books_pivot_matrix_df

unique_id_book,1,3,5,14,18,19,20,21,26,27,...,200983,202721,202858,206478,207047,207905,211778,213175,216604,242947
User-ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
44,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
278838,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
278843,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
278849,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
278851,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
users_books_pivot_matrix_df = users_books_pivot_matrix_df.values
users_books_pivot_matrix_df

array([[5., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [15]:
%%time 
from scipy.sparse.linalg import svds

NUMBER_OF_FACTORS_MF = 15

#Performs matrix factorization of the original user item matrix
U, sigma, Vt = svds(users_books_pivot_matrix_df, k = NUMBER_OF_FACTORS_MF)

In [16]:
sigma = np.diag(sigma)
sigma.shape

(15, 15)

In [17]:
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) 
all_user_predicted_ratings

array([[ 1.62992369e-03, -7.96688151e-05,  1.74241255e-03, ...,
        -1.29732512e-05, -2.77965402e-04,  1.03031736e-05],
       [ 1.75972105e-03,  3.53753414e-04,  2.39309070e-03, ...,
        -5.59043184e-05,  2.29810704e-04,  3.55624878e-04],
       [-1.10039267e-16, -2.21102829e-17, -4.92502738e-17, ...,
        -2.82481351e-18, -1.95597555e-16, -1.57502303e-18],
       ...,
       [-3.49879326e-05,  5.85305748e-05,  2.29358951e-05, ...,
        -2.53933980e-05,  6.28585274e-04,  5.45062455e-05],
       [ 5.12240230e-03,  3.37804276e-04,  6.36386495e-03, ...,
        -2.77432983e-04, -8.31703348e-04,  2.41251880e-04],
       [-2.17779347e-03,  1.13588515e-03, -1.86067297e-03, ...,
         2.14495407e-04, -3.06683737e-03, -8.30582340e-04]])

In [18]:
def top_cosine_similarity(data, book_id, top_n=10):
    index = book_id 
    book_row = data[index, :]
    magnitude = np.sqrt(np.einsum('ij, ij -> i', data, data))
    similarity = np.dot(book_row, data.T) / (magnitude[index] * magnitude)
    sort_indexes = np.argsort(-similarity)
    return sort_indexes[:top_n]

def similar_books(book_user_rating, book_id, top_indexes):
    print('Recommendations for {0}: \n'.format(
    book_user_rating[book_user_rating.unique_id_book == book_id]['Book-Title'].values[0]))
    for id in top_indexes + 1:
        print(book_user_rating[book_user_rating.unique_id_book == id]['Book-Title'].values[0])

In [33]:
warnings.filterwarnings('ignore', category=RuntimeWarning, message='invalid value encountered in divide')

k = 50
movie_id = 2002  
top_n = 5
sliced = Vt.T[:, :k] # representative data

top_similarities = top_cosine_similarity(sliced, movie_id, top_n)
recommended_books = similar_books(book_user_rating, movie_id, top_similarities)

Recommendations for A Cold Day for Murder: 

The White Puma
Rules of Prey
Where There's Smoke
Les llengÃ¼es d'Africa (Biblioteca universal EmpÃºries)
The Relationship Cure: A Five-Step Guide for Building Better Connections with Family, Friends, and Lovers


## Evaluation of the baseline

### By RMSE:

In [34]:
def calculate_rmse(test_set, predictions):
    # Ensure indices are set for joining
    test_set = test_set.set_index(['User-ID', 'ISBN'])
    predictions = predictions.set_index(['User-ID', 'ISBN'])
    
    # Join the test and prediction dataframes on 'User-ID' and 'ISBN'
    combined = test_set.join(predictions, lsuffix='_actual', rsuffix='_pred')
    
    # Calculate RMSE
    rmse = np.sqrt(mean_squared_error(combined['Book-Rating_actual'], combined['Book-Rating_pred']))
    return rmse

# Setting up data structures for GNN

In [None]:
# Make graph
src = torch.tensor(ratings['New-User-ID'].values)
dst = torch.tensor(ratings['Book-ID'].values)

edges = {
    ('user', 'rating', 'book'): (src, dst)
}

g = dgl.heterograph(edges, num_nodes_dict={'user': num_users, 'book': num_books})
print(g)

In [None]:
# Weight the edges by ratings
rating_data = ratings['Book-Rating'].values
ratings_tensor = torch.tensor(rating_data, dtype=torch.float32)
g.edges['rating'].data['rating'] = ratings_tensor 

In [None]:
# Add age to user feature
ages = users_clean['Age'].values
ages_tensor = torch.tensor(ages, dtype=torch.float32)
g.nodes['user'].data['age'] = ages_tensor

We extract the country from the location by obtaining the expression after the last comma in e.g. nyc, new york, usa

In [None]:
users_clean['Country'] = users_clean['Location'].str.rsplit(',', n=1).str[-1].str.strip()
country_counts = users_clean['Country'].value_counts(normalize=True) 

# We see that less frequent locations do not always contain country names, so we remove values of locations representing less than 1%
rare_countries = country_counts[country_counts < 0.01].index
users_clean.loc[users_clean['Country'].isin(rare_countries), 'Country'] = np.nan

country_ids = {country: idx for idx, country in enumerate(users_clean['Country'].unique())}  # map country to a unique integer
users_clean['CountryId'] = users_clean['Country'].map(country_ids)
countries = users_clean['CountryId'].values
countries_tensor = torch.tensor(countries, dtype=torch.float32)
g.nodes['user'].data['country'] = countries_tensor

In [None]:
print(g.nodes['user'])
print(g.nodes['book'])

### Basic graph info

In [None]:
print(g)  # Prints the basic info of the graph, such as number of nodes and edges per type

# Print number of nodes for each type
print("Number of users:", g.number_of_nodes('user'))
print("Number of books:", g.number_of_nodes('book'))

# Print number of edges
print("Number of ratings:", g.number_of_edges('rating'))

### Node and Edge feature inspection

In [None]:
# Print user node features
print("User features:", g.nodes['user'].data.keys())

# Print book node features, if any
print("Book features:", g.nodes['book'].data.keys())

# Print edge features
print("Edge features:", g.edges['rating'].data.keys())

# Example to print specific feature details:
print("Sample user ages:", g.nodes['user'].data['age'][:5])  # prints first 5 user ages
print("Sample ratings:", g.edges['rating'].data['rating'][:5])  # prints first 5 ratings

### Eliminate isolated nodes if any

In [None]:
compact_g = dgl.compact_graphs(g)

### Create synthetic features for book based on degree of the node

In [None]:
book_in_degrees = g.in_degrees(etype=('user', 'rating', 'book')).float().unsqueeze(1)

In [None]:
g.nodes['book'].data['in_degree'] = book_in_degrees

In [None]:
data = g
data

In [None]:
node_types = g.ntypes
edge_types = g.etypes
print("Node types:", node_types)
print("Edge types:", edge_types)
print('Number of rating edges:', g.number_of_edges('rating'))

# Creating the GNN

## Architecture

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn import SAGEConv

class GNNEncoder(nn.Module):
    def __init__(self, in_feats, hidden_channels):
        super(GNNEncoder, self).__init__()
        self.conv1 = SAGEConv(in_feats, hidden_channels, 'mean')
        self.conv2 = SAGEConv(hidden_channels, hidden_channels, 'mean')

    def forward(self, g, features):
        h = self.conv1(g, features)
        h = F.relu(h)
        h = self.conv2(g, h)
        print(f"Encoder output shape: {h.shape}")  # Debugging output
        return h

class BookEmbedding(nn.Module):
    def __init__(self, num_books, embedding_dim):
        super(BookEmbedding, self).__init__()
        self.book_embedding = nn.Embedding(num_books, embedding_dim)

    def forward(self, book_ids):
        embeddings = self.book_embedding(book_ids)
        print(f"Book embedding shape: {embeddings.shape}") 
        return embeddings

class EdgeDecoder(nn.Module):
    def __init__(self, hidden_channels):
        super(EdgeDecoder, self).__init__()
        self.lin1 = torch.nn.Linear(2 * hidden_channels, hidden_channels)
        self.lin2 = torch.nn.Linear(hidden_channels, 1)

    def forward(self, g, h_user, h_book):
        with g.local_scope():
            g.nodes['user'].data['h'] = h_user
            g.nodes['book'].data['h'] = h_book
            
            # Ensure edge function is only applied where it should be
            if 'rating' in g.etypes:
                g.apply_edges(func=lambda edges: {
                    'score': self.lin2(F.relu(self.lin1(torch.cat([edges.src['h'], edges.dst['h']], dim=1))))
                }, etype='rating')
            return g.edata.get('score', torch.tensor([]))  # Handle case where no edges exist


class Model(nn.Module):
    def __init__(self, num_books, in_feats_user, hidden_channels):
        super(Model, self).__init__()
        self.user_encoder = GNNEncoder(in_feats_user, hidden_channels)
        self.book_embedding = BookEmbedding(num_books, hidden_channels)
        self.decoder = EdgeDecoder(hidden_channels)

    def forward(self, g, user_features, book_ids):
        h_user = self.user_encoder(g, user_features)
        h_book = self.book_embedding(book_ids)
        return self.decoder(g, h_user, h_book)

## Split graph for training and validation set

In [None]:
def split_graph(g, proportion=0.8):
    # Split edges randomly for training and validation
    num_edges = g.number_of_edges('rating')
    all_edges = np.arange(num_edges)
    np.random.shuffle(all_edges)
    
    train_size = int(num_edges * proportion)
    train_edges = all_edges[:train_size]
    val_edges = all_edges[train_size:]
    
    # Create subgraphs based on the edges
    g_train = dgl.edge_subgraph(g, train_edges, relabel_nodes=False)
    g_val = dgl.edge_subgraph(g, val_edges, relabel_nodes=False)
    
    return g_train, g_val

In [None]:
g_train, g_val = split_graph(g, proportion=0.8)

age_tensor_val = g_val.nodes['user'].data['age'].unsqueeze(1)
country_tensor_val = g_val.nodes['user'].data['country'].unsqueeze(1)
user_features_val = torch.cat([age_tensor_val, country_tensor_val], dim=1)

# Assuming book_ids are just the indices of the books, adjust if necessary
book_ids_train = torch.arange(g_train.number_of_nodes('book'))
book_ids_val = torch.arange(g_val.number_of_nodes('book'))

## Train

`criterion` - MSE loss that calculates the average squared difference between the predicted outputs and the actual target values.

In [None]:
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [None]:
book_ids_train = g_train.nodes['book'].data['orig_id']
book_ids_val = g_val.nodes['book'].data['orig_id']

In [None]:
def train(model, g, user_features, book_ids, ratings, optimizer, criterion):
    model.train()  # Set the model to training mode
    optimizer.zero_grad()  # Clear gradients before each backward pass

    # Move data to the correct device
    g = g.to(device)
    user_features = user_features.to(device)
    book_ids = book_ids.to(device)
    ratings = ratings.to(device)

    # Forward pass
    predictions = model(g, user_features, book_ids)
    loss = criterion(predictions, ratings)

    # Backward pass and optimization
    loss.backward()
    optimizer.step()

    return loss.item()

In [None]:
def validate(model, g, user_features, book_ids, ratings, criterion):
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # Disable gradient calculation for evaluation, which saves memory and computations
        g = g.to(device)
        user_features = user_features.to(device)
        book_ids = book_ids.to(device)
        ratings = ratings.to(device)

        # Forward pass
        predictions = model(g, user_features, book_ids)
        loss = criterion(predictions, ratings)

    return loss.item()

In [None]:
num_epochs = 50
for epoch in range(num_epochs):
    train_loss = train(model, g_train, user_features_train, book_ids_train, ratings_train, optimizer, criterion)
    val_loss = validate(model, g_val, user_features_val, book_ids_val, ratings_val, criterion)
    print(f'Epoch {epoch + 1}: Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

## Evaluate model

In [None]:
def evaluate(model, g, user_features, book_features, labels, criterion):
    model.eval()
    with torch.no_grad():
        outputs = model(g, user_features, book_features)
        loss = criterion(outputs, labels)
    return loss.item()

In [None]:
# Validation loss
validation_loss = evaluate(model, g_val, user_features_val, book_features_val, ratings_val, criterion)
print(f'Validation Loss: {validation_loss}')