## Train a GNN-based XGBoost Model
#### Goals
* Train a GNN (GraphSAGE) model that produces node (transaction) embeddings.
* Use these node embeddings to train an XGBoost model.
* Save the trained GNN and XGBoost models for inference.

__Prerequisite__: The preprocessing notebook must be executed before running this notebook.

#### Dataset names

In [1]:
# Name of the datasets to choose from
TABFORMER = "TabFormer"
SPARKOV = "Sparkov"

### Select the dataset to train the models on

__Note__:  This notebook works for both __TabFormer__ and __Sparkov__ dataset. 
Make sure that the right dataset is selected.
For yhe TabFormer dataset, set

```code
    DATASET = TABFORMER
```
and for the Sparkov dataset, set

```code
    DATASET = SPARKOV
```

In [2]:
# Change this to either TABFORMER or SPARKOV
DATASET = TABFORMER


#### Import necessary libraries, packages, and functions

In [3]:
# General-purpose libraries and OS handling
import os
from typing import Tuple, Dict
import json
from collections import defaultdict

# GPU-accelerated libraries (torch, cupy, cudf, rmm)
import torch
import cupy
import cudf
import rmm
from rmm.allocators.cupy import rmm_cupy_allocator
from rmm.allocators.torch import rmm_torch_allocator

# Reinitialize RMM and set allocators to manage memory efficiently on GPU
rmm.reinitialize(devices=[0], pool_allocator=True, managed_memory=True)
cupy.cuda.set_allocator(rmm_cupy_allocator)
torch.cuda.memory.change_current_allocator(rmm_torch_allocator)

# PyTorch and related libraries
import torch.nn.functional as F
import torch.nn as nn

# PyTorch Geometric and cuGraph libraries for GNNs and graph handling
import cugraph_pyg
from cugraph_pyg.loader import NeighborLoader
import torch_geometric
from torch_geometric.nn import SAGEConv

# Enable GPU memory spilling to CPU with cuDF to handle larger datasets
from cugraph.testing.mg_utils import enable_spilling  # noqa: E402
enable_spilling()

# XGBoost for machine learning model building
import xgboost as xgb

# Numerical operations with cupy and numpy
import cupy as cp
import numpy as np

# Machine learning metrics from sklearn
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

#### Some config parameters for neighborhood sampler and training

In [4]:
args = type('', (), {})()

args.out_channels = 2
args.batch_size = 1024
args.fan_out = 10
args.use_cross_weights = True
args.cross_weights = None

##### Path to pre-processed data and directory to save models

In [5]:
dateset_name_to_path= defaultdict(lambda: "../data/TabFormer")

dateset_name_to_path['TabFormer'] = '../data/TabFormer'
dateset_name_to_path['Sparkov'] = '../data/Sparkov'
args.dataset_base_path = dateset_name_to_path[DATASET]

args.dataset_root = os.path.join(args.dataset_base_path, 'gnn')
args.model_root_dir = os.path.join(args.dataset_base_path, 'models')

#### Read number of transactions nodes that was saved during preprocessing

In [None]:

# Number of transactions nodes were saved in variables.json during training
with open(os.path.join(args.dataset_base_path, 'variables.json'), 'r') as json_file:
    num_transaction_nodes = json.load(json_file)['NUM_TRANSACTION_NODES']

num_transaction_nodes

#### Define a GraphSAGE model

In [7]:
class GraphSAGE(torch.nn.Module):
    """
    GraphSAGE model for graph-based learning.

    This model learns node embeddings by aggregating information from a node's 
    neighborhood using multiple graph convolutional layers.

    Parameters:
    ----------
    in_channels : int
        The number of input features for each node.
    hidden_channels : int
        The number of hidden units in each layer, controlling the embedding dimension.
    out_channels : int
        The number of output features (or classes) for the final layer.
    n_hops : int
        The number of GraphSAGE layers (or hops) used to aggregate information 
        from neighboring nodes.
    dropout_prob : float, optional (default=0.25)
        The probability of dropping out nodes during training for regularization.
    """
    def __init__(self, in_channels, hidden_channels, out_channels, n_hops, dropout_prob=0.25):
        super(GraphSAGE, self).__init__()

        # list of conv layers
        self.convs = nn.ModuleList()
        # add first conv layer to the list
        self.convs.append(SAGEConv(in_channels, hidden_channels))
        # add the remaining conv layers to the list
        for _ in range(n_hops - 1):
            self.convs.append(SAGEConv(hidden_channels, hidden_channels))
        
        # output layer
        self.fc = nn.Linear(hidden_channels, out_channels)        

    def forward(self, x, edge_index, return_hidden=False):

        for conv in self.convs:
            x = conv(x, edge_index)
            x = F.relu(x)
            x = F.dropout(x, p=0.5, training=self.training)
            
        if return_hidden:
            return x
        else:
            return self.fc(x)



#### Define a function to train the GraphSAGE model
__Note__: This function is called a few times if grid search is used to find better hyper-parameters.

In [8]:
def train_gnn(model, loader, optimizer, criterion)->float:
    """
    Trains the GraphSAGE model for one epoch.

    Parameters:
    ----------
    model : torch.nn.Module
        The GNN model to be trained.
    loader : tcugraph_pyg.loader.NeighborLoader
        DataLoader that provides batches of graph data for training.
    optimizer : torch.optim.Optimizer
        Optimizer used to update the model's parameters.
    criterion : torch.nn.Module
        Loss function used to calculate the difference between predictions and targets.

    Returns:
    -------
    float
        The average training loss over all batches for this epoch.
    """
    model.train()
    total_loss = 0
    batch_count = 0
    for batch in loader:
        batch_count += 1
        optimizer.zero_grad()

        batch_size = batch.batch_size
        out = model(batch.x[:,:].to(torch.float32), batch.edge_index)[:batch_size]
        y = batch.y[:batch_size].view(-1).to(torch.long)
        loss = criterion(out, y)
        loss.backward()

        optimizer.step()
        total_loss += loss.item()
    return total_loss / batch_count




#### Define a function to extract node (transaction) embeddings from the second-to-last layer of the GraphSAGE model


In [9]:

def extract_embeddings(model, loader)->Tuple[torch.Tensor, torch.Tensor]:
    """
    Extracts node embeddings produced by the GraphSAGE model.

    Parameters:
    ----------
    model : torch.nn.Module
        The model used to generate embeddings, typically a pre-trained neural network.
    loader : cugraph_pyg.loader.NeighborLoader
        NeighborLoader that provides batches of data for embedding extraction.

    Returns:
    -------
    Tuple[torch.Tensor, torch.Tensor]
        A tuple containing two tensors:
        - embeddings: A tensor containing embeddings for each input sample in the dataset.
        - labels: A tensor containing the corresponding labels for each sample.
    """
    model.eval()
    embeddings = []
    labels = []
    with torch.no_grad():
        for batch in loader:
            batch_size = batch.batch_size
            hidden = model(batch.x[:,:].to(torch.float32), batch.edge_index, return_hidden=True)[:batch_size]
            embeddings.append(hidden)  # Keep embeddings on GPU
            labels.append(batch.y[:batch_size].view(-1).to(torch.long))
    embeddings = torch.cat(embeddings, dim=0)  # Concatenate embeddings on GPU
    labels = torch.cat(labels, dim=0)  # Concatenate labels on GPU
    return embeddings, labels



#### Define a function to evaluate the GraphSAGE model


In [10]:

def evaluate_gnn(model, loader) -> float:
    """
    Evaluates the performance of the GraphSAGE model.

    Parameters:
    ----------
    model : torch.nn.Module
        The GNN model to be evaluated.
    loader : cugraph_pyg.loader.NeighborLoader
        NeighborLoader that provides batches of data for evaluation.

    Returns:
    -------
    float
        The average f1-score computed over all batches.
    """

    model.eval()
    all_preds = []
    all_labels = []
    total_pos_seen = 0
    with torch.no_grad():
        for batch in loader:

            batch_size = batch.batch_size
            out = model(batch.x[:,:].to(torch.float32), batch.edge_index)[:batch_size]
            preds = out.argmax(dim=1)
            y = batch.y[:batch_size].view(-1).to(torch.long)
            
            all_preds.append(preds.cpu().numpy())
            all_labels.append(y.cpu().numpy())
            total_pos_seen += (y.cpu().numpy()==1).sum()

    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels)

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, zero_division=0)
    recall = recall_score(all_labels, all_preds, zero_division=0)
    f1 = f1_score(all_labels, all_preds, zero_division=0)
    # roc_auc = roc_auc_score(all_labels, all_preds)

    print(f"\nGNN Model Evaluation:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    # print(f"ROC AUC: {roc_auc:.4f}")
    return f1


#### Define a function to compute validation loss GraphSAGE model

In [11]:

def validation_loss(model, loader, criterion)->float:
    """
    Computes the average validation loss for the GraphSAGE model.

    Parameters:
    ----------
    model : torch.nn.Module
        The model for which the validation loss is calculated.
    loader : cugraph_pyg.loader.NeighborLoader
        NeighborLoader that provides batches of validation data.
    criterion : torch.nn.Module
        Loss function used to compute the loss between predictions and targets.

    Returns:
    -------
    float
        The average validation loss over all batches.
    """
    model.eval()
    with torch.no_grad():
        total_loss = 0
        batch_count = 0
        for batch in loader:
            batch_count += 1
            batch_size = batch.batch_size
            out = model(batch.x[:,:].to(torch.float32), batch.edge_index)[:batch_size]
            y = batch.y[:batch_size].view(-1).to(torch.long)
            loss = criterion(out, y)
            total_loss += loss.item()
    return total_loss / batch_count



#### Define a function to train a XGBoost model

In [12]:

from torch.utils.dlpack import to_dlpack

def train_xgboost(embeddings, labels)->xgb.Booster:
    """
    Trains an XGBoost classifier on the provided embeddings and labels.

    Parameters:
    ----------
    embeddings : torch.Tensor
        The input feature embeddings for transaction nodes.
    labels : torch.Tensor
        The target labels (Fraud or Non-fraud) transaction, with the same length as the number of 
        rows in `embeddings`.

    Returns:
    -------
    xgboost.Booster
        A trained XGBoost model fitted on the provided data.
    """

    labels_cudf = cudf.Series(cp.from_dlpack(to_dlpack(labels)))
    embeddings_cudf = cudf.DataFrame(cp.from_dlpack(to_dlpack(embeddings)))

    # Convert data to DMatrix format for XGBoost on GPU
    dtrain = xgb.DMatrix(embeddings_cudf, label=labels_cudf)

    # Set XGBoost parameters for GPU usage
    param = {
        'max_depth': 6,
        'learning_rate': 0.2,
        'objective': 'binary:logistic',  # Binary classification
        'eval_metric': 'logloss',
        'tree_method': 'hist',  # Use GPU
        'device': 'cuda'
    }

    # Train the XGBoost model
    bst = xgb.train(param, dtrain, num_boost_round=100)
    
    return bst



#### Define a function to evaluate the XGBoost model


In [13]:

from cuml.metrics import confusion_matrix

def evaluate_xgboost(bst, embeddings, labels):
    """
    Evaluates the performance of a XGBoost model by calculating different metrics.

    Parameters:
    ----------
    bst : xgboost.Booster
        The trained XGBoost model to be evaluated.
    embeddings : torch.Tensor
        The input feature embeddings for transaction nodes.
    labels : torch.Tensor
        The target labels (Fraud or Non-fraud) transaction, with the same length as the number of 
        rows in `embeddings`.
    Returns:
    -------
    A tuple containing f1-score, recall, precision, accuracy and the confusion matrix
    """

    # Convert embeddings to cuDF DataFrame
    embeddings_cudf = cudf.DataFrame(cp.from_dlpack(to_dlpack(embeddings)))
    
    # Create DMatrix for the test embeddings
    dtest = xgb.DMatrix(embeddings_cudf)
    
    # Predict using XGBoost on GPU
    preds = bst.predict(dtest)
    pred_labels = (preds > 0.5).astype(int)

    # Move labels to CPU for evaluation
    labels_cpu = labels.cpu().numpy()

    # Compute evaluation metrics
    accuracy = accuracy_score(labels_cpu, pred_labels)
    precision = precision_score(labels_cpu, pred_labels, zero_division=0)
    recall = recall_score(labels_cpu, pred_labels, zero_division=0)
    f1 = f1_score(labels_cpu, pred_labels, zero_division=0)
    roc_auc = roc_auc_score(labels_cpu, preds)
    conf_mat = confusion_matrix(labels.cpu().numpy(), pred_labels)
    
    return f1, recall, precision, accuracy, conf_mat

#### Define a class to stop training once the model stops improving

In [14]:
class EarlyStopping:
    """
    EarlyStopping class to halt training when a monitored metric stops improving.
    
    Parameters:
    ----------
    patience : int, optional (default=10)
        The number of epochs with no improvement after which training will be stopped.
    min_delta : float, optional (default=0)
        The minimum change in the monitored metric to qualify as an improvement. 
        If the change is smaller than `min_delta`, it is considered as no improvement.
    """
    def __init__(self, patience=10, min_delta=0):
        
        self.patience = patience
        self.min_delta = min_delta
        self.best_loss = float('inf')
        self.counter = 0

    def check_early_stopping(self, val_loss):

        if self.best_loss - val_loss > self.min_delta:
            self.best_loss = val_loss
            self.counter = 0  # Reset counter if there's an improvement
        else:
            self.counter += 1  # Increment counter if no improvement
            
        if self.counter >= self.patience:
            return True
        return False



### Define a function to load data and create graph
* loads edges and create graph using cugraph-pyg
* loads preprocessed features associated with the graph nodes

In [15]:

def load_data(
    dataset_root : str,
    edge_filename: str = 'edges.csv',
    label_filename: str = 'labels.csv',
    node_feature_filename: str = 'features.csv',
    has_edge_feature: bool = False,
    edge_src_col: str = 'src',
    edge_dst_col: str = 'dst',
    edge_att_col: str = 'type'
) -> Tuple[
    Tuple[torch_geometric.data.FeatureStore, torch_geometric.data.GraphStore],
    Dict[str, torch.Tensor],
    int,
    int,
]:
    # Load the Graph data
    edge_path   = os.path.join(dataset_root, edge_filename)
    edge_data = cudf.read_csv(edge_path, header=None, names=[edge_src_col, edge_dst_col, edge_att_col], dtype=['int32','int32','float'])
    
    num_nodes = max(edge_data[edge_src_col].max(), edge_data[ edge_dst_col].max()) + 1 
    src_tensor = torch.as_tensor(edge_data[edge_src_col], device='cuda')
    dst_tensor = torch.as_tensor(edge_data[edge_dst_col], device='cuda')

    

    graph_store = cugraph_pyg.data.GraphStore()
    graph_store[("n", "e", "n"), "coo", False, (num_nodes, num_nodes)] = [src_tensor, dst_tensor] 

    
    edge_feature_store = None
    if has_edge_feature:
        from cugraph_pyg.data import  TensorDictFeatureStore
        edge_feature_store = TensorDictFeatureStore()
        edge_attr = torch.as_tensor(edge_data[edge_att_col], device='cuda')
        edge_feature_store[("n", "e", "n"), "rel"] = edge_attr.unsqueeze(1)
 
    
    del(edge_data)
    
    # load the label
    label_path   = os.path.join (dataset_root, label_filename)
    label_data = cudf.read_csv(label_path, header=None, dtype=['int32'])
    y_label_tensor = torch.as_tensor(label_data['0'], device='cuda')
    num_classes = label_data['0'].unique().count()

    wt_data = None
    if (args.use_cross_weights):
        if (args.cross_weights is None):
            num_labels_rows = label_data.size
            counts = label_data.value_counts()
            wt_data = torch.as_tensor(counts.sum()/counts, device='cuda', dtype=torch.float32)
            wt_data = wt_data/wt_data.sum()

            if (num_classes > 2):
                wt_data = wt_data.T
        else:
            wt_data = torch.as_tensor(args.cross_weights, device='cuda')

    del(label_data)
    
    # load the features
    feature_path   = os.path.join(dataset_root, node_feature_filename)
    feature_data = cudf.read_csv(feature_path)
    
    feature_columns = feature_data.columns
    
    col_tensors = []
    for c in feature_columns:
        t = torch.as_tensor(feature_data[c].values, device='cuda')
        col_tensors.append(t)

    x_feature_tensor = torch.stack(col_tensors).T

    
    feature_store = cugraph_pyg.data.TensorDictFeatureStore()
    feature_store["node", "x"] = x_feature_tensor
    feature_store["node", "y"] = y_label_tensor

    num_features = len(feature_columns)
    
    return (
        (feature_store, graph_store),
        edge_feature_store,
        num_nodes,
        num_features,
        num_classes,
        wt_data,
    )



### Define a function to train the GraphSAGE model for particular values of hyper-parameters.

In [16]:

def train_model_with_config(params, verbose=False):

    data, ef_store, num_nodes, num_features, num_classes, cross_wt_data = load_data(args.dataset_root)
    
    num_folds = params['n_splits']  # Number of folds
    fold_size = num_transaction_nodes // num_folds

    # Perform cross-validation
    validation_losses = []
    for k in range(num_folds):
        training_nodes = torch.cat(
            (
                torch.arange(0, k * fold_size).unsqueeze(dim=0),
                torch.arange((k+1) * fold_size, num_transaction_nodes).unsqueeze(dim=0)
            ),
            dim=1
        ).squeeze(0)

        validation_nodes = torch.arange(k * fold_size, (k+1) * fold_size)
        
        # Create NeighborLoader for both training and testing (using cuGraph NeighborLoader)
        train_loader = NeighborLoader(
            data,
            num_neighbors=[args.fan_out, args.fan_out],
            batch_size=args.batch_size,
            input_nodes= training_nodes,
            shuffle=True
        )

        # Use same graph but different seed nodes
        validation_loader = NeighborLoader(
            data,
            num_neighbors=[args.fan_out, args.fan_out],
            batch_size=args.batch_size,
            input_nodes= validation_nodes,
            shuffle=False
        )
        
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        # Define the model
        model = GraphSAGE(
            in_channels=num_features,
            hidden_channels=params['hidden_channels'],
            out_channels=args.out_channels,
            n_hops=params['n_hops'],
            dropout_prob=0.25).to(device)


        # Define optimizer and loss function for GNN
        optimizer = torch.optim.Adam(model.parameters(),
                                    lr=params['learning_rate'],
                                    weight_decay=params['weight_decay'])

        # criterion = torch.nn.CrossEntropyLoss(
        #     weight=cross_wt_data).to(device)  # Weighted loss function
        
        criterion = torch.nn.CrossEntropyLoss(
            weight=torch.tensor([0.1, 0.9], dtype=torch.float32)).to(device)  # Weighted loss function

        # Set up the early stopping object
        early_stopping = EarlyStopping(patience=3, min_delta=0.01)
        
        best_val_loss = float('inf')
        num_epoch_for_best_loss = 0

        # Train the GNN model
        for epoch in range(params['num_epochs']):
            train_loss = train_gnn(model, train_loader, optimizer, criterion)
            val_loss = validation_loss(model, validation_loader, criterion)
            if verbose:
                print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

            # Check early stopping criteria
            if early_stopping.check_early_stopping(val_loss):
                if verbose:
                    print(f"Early stopping triggered at epoch {epoch+1}.")
                break

            # Save the best model based on validation loss
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                num_epoch_for_best_loss = epoch
        # Save validation loss for the current fold
        validation_losses.append(best_val_loss)
    return np.mean(validation_losses), model, num_epoch_for_best_loss


#### Parameter grid to search for better hyper-parameters

__Note__: To execute the notebook faster, we commented out the grid search

In [17]:
## Uncomment this cell to find the best hyperparameters in the parameter grid
# from sklearn.model_selection import ParameterGrid
# # Define the hyperparameter grid
# param_grid = {
#     'n_splits': [5],
#     'n_hops': [1, 2],
#     'learning_rate': [0.005, 0.01],
#     'hidden_channels': [32, 64],
#     'num_epochs': [8, 16],
#     'weight_decay': [1e-5],
     
# }
# grid = list(ParameterGrid(param_grid))

#### Search for better hyper-parameters

In [18]:
## Uncomment this cell to find the best hyperparameters in the parameter grid
# best_val_loss = float('inf')
# epoch = 0
# best_params = None
# for params in grid:
#     val_loss, _, epoch = train_model_with_config(params, verbose=False)
#     if val_loss < best_val_loss:
#         best_params = params
#         best_val_loss = val_loss

In [19]:
# best_params

In [20]:
# Comment out this cell to train on new dataset  
best_params = {
    'n_hops': 1,
    'learning_rate': 0.005,
    'hidden_channels': 32,
    'num_epochs': 16,
    'weight_decay': 1e-5,     
}

#### Train and save the GraphSAGE model

In [None]:

data, ef_store, num_nodes, num_features, num_classes, cross_wt_data = load_data(args.dataset_root)

# Train on entire dataset
train_loader = NeighborLoader(
    data,
    num_neighbors=[args.fan_out, args.fan_out],
    batch_size=args.batch_size,
    input_nodes= torch.arange(num_transaction_nodes),
    shuffle=True
)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
# Define the model
model = GraphSAGE(
    in_channels=num_features,
    hidden_channels=best_params['hidden_channels'],
    out_channels=args.out_channels,
    n_hops=best_params['n_hops'],
    dropout_prob=0.25).to(device)


# Define optimizer and loss function for GNN
optimizer = torch.optim.Adam(model.parameters(),
                            lr=best_params['learning_rate'],
                            weight_decay=best_params['weight_decay'])


criterion = torch.nn.CrossEntropyLoss(
    weight=torch.tensor([0.1, 0.9], dtype=torch.float32)).to(device)  # Weighted loss function

# Set up the early stopping object
early_stopping = EarlyStopping(patience=3, min_delta=0.01)

best_train_loss = float('inf')

# Train the GNN model

for epoch in range(best_params['num_epochs']):
    train_loss = train_gnn(model, train_loader, optimizer, criterion)
    
    # Check early stopping criteria
    if early_stopping.check_early_stopping(train_loss):
        print(f"Early stopping triggered at epoch {epoch+1}.")
        break

    # Save the best model based on validation loss
    if train_loss < best_train_loss:
        best_train_loss = train_loss
        if not os.path.exists(args.model_root_dir):
            os.makedirs(args.model_root_dir)
        torch.save(model, os.path.join(args.model_root_dir, 'node_embedder.pth'))

        print(f"Model saved at epoch {epoch+1} with training loss {best_train_loss:.4f}.")


### Train the XGBoost model based on embeddings produced by the GraphSAGE model

In [None]:
# NeighborLoader for training data

data, ef_store, num_nodes, num_features, num_classes, cross_wt_data = load_data(args.dataset_root)

train_loader = NeighborLoader(
    data,
    num_neighbors=[args.fan_out, args.fan_out],
    batch_size=args.batch_size,
    input_nodes= torch.arange(num_transaction_nodes),
    shuffle=True
)


In [23]:
# Set the device to GPU if available; otherwise, default to CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Extract embeddings from the second-to-last layer and keep them on GPU
embeddings, labels = extract_embeddings(model, train_loader)

# Train an XGBoost model on the extracted embeddings (on GPU)
bst = train_xgboost(embeddings.to(device), labels.to(device))
            
xgb_model_path = os.path.join(args.model_root_dir, 'embedding_based_xgb_model.json')

if not os.path.exists(os.path.dirname(xgb_model_path)):
    os.makedirs(os.path.dirname(xgb_model_path))

bst.save_model(xgb_model_path)
    

### Evaluation the model on unseen data

##### Load and prepare test data


In [24]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
test_path = os.path.join(args.dataset_base_path, 'xgb/test.csv')
test_data = cudf.read_csv(test_path)

X = torch.tensor(test_data.iloc[:, :-1].values).to(torch.float32)
y = torch.tensor(test_data.iloc[:, -1].values).to(torch.long)


##### Extract embeddings of the transactions using the GraphSAGE model

In [25]:

model.eval()
f1_value = 0.0
with torch.no_grad():
    test_embeddings = model(
        X.to(device), torch.tensor([[], []], dtype=torch.int).to(device), return_hidden=True)


##### Evaluate the XGBoost model

In [None]:

f1, recall, precision, accuracy, conf_mat = evaluate_xgboost(bst, test_embeddings, y)

print(f"\nXGBoost Evaluation:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print('Confusion Matrix:', conf_mat)

## Copyright and License
<hr/>
Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

<br/>

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
 
 http://www.apache.org/licenses/LICENSE-2.0
 
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.