In [1]:
from tgml.data import TigerGraph

tgraph = TigerGraph(
    host="http://18.222.126.26", # Replace with your instance ip
    graph="OGBNProducts",
    username="tigergraph",
    password="tigergraph",
    token_auth=False
)

In [2]:
tgraph.info()

Using graph 'OGBNProducts'
---- Graph OGBNProducts
Vertex Types: 
  - VERTEX Product(PRIMARY_ID id INT, x LIST<DOUBLE>, y INT, train_mask BOOL, val_mask BOOL, test_mask BOOL, tmp_id INT, tmp_id2 INT, tmp_id3 INT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"
Edge Types: 
  - UNDIRECTED EDGE Purchased(FROM Product, TO Product)

Graphs: 
  - Graph OGBNProducts(Product:v, Purchased:e)
Jobs: 
Queries: 
  - get_vertex_number(string v_type, string filter_by) (installed v2)
  - shuffle_vertices(string tmp_id) (installed v2)
  - tg_neighbor_sampler_x_y_train_mask_val_mask_test_mask(string vertex_filename, string edge_filename, int batch_id, int num_batches, int num_neighbors, int num_hops, string filter_by, string tmp_id) (installed v2)






In [3]:
tgraph.number_of_vertices()

2449029

In [4]:
tgraph.number_of_edges()

61859139

In [5]:
print(
    "Number of vertices in training set:",
    tgraph.number_of_vertices(filter_by="train_mask"),
)
print(
    "Number of vertices in validation set:",
    tgraph.number_of_vertices(filter_by="val_mask"),
)
print(
    "Number of vertices in test set:", tgraph.number_of_vertices(filter_by="test_mask")
)

Number of vertices in training set: 196615
Number of vertices in validation set: 39323
Number of vertices in test set: 2213091


In [6]:
hp = {
    "batch_size": 1024,
    "num_neighbors": 20,
    "num_hops": 2,
    "hidden_dim": 128,
    "num_layers": 2,
    "dropout": 0.1,
    "lr":0.01,
    "l2_penalty":0
}

In [7]:
from tgml.dataloaders import NeighborLoader

In [8]:
train_loader = NeighborLoader(
    graph=tgraph,
    tmp_id="tmp_id",
    v_in_feats="x",
    v_out_labels="y:int",
    v_extra_feats="train_mask:bool,val_mask:bool,test_mask:bool",
    output_format="PyG",
    batch_size=hp["batch_size"],
    num_neighbors=hp["num_neighbors"],
    num_hops=hp["num_hops"],
    shuffle=True,
    filter_by="train_mask",
)

In [9]:
valid_loader = NeighborLoader(
    graph=tgraph,
    tmp_id="tmp_id2",
    v_in_feats="x",
    v_out_labels="y:int",
    v_extra_feats="train_mask:bool,val_mask:bool,test_mask:bool",
    output_format="PyG",
    batch_size=hp["batch_size"],
    num_neighbors=hp["num_neighbors"],
    num_hops=hp["num_hops"],
    shuffle=False,
    filter_by="val_mask",
)

In [10]:
test_loader = NeighborLoader(
    graph=tgraph,
    tmp_id="tmp_id3",
    v_in_feats="x",
    v_out_labels="y:int",
    v_extra_feats="train_mask:bool,val_mask:bool,test_mask:bool",
    output_format="PyG",
    batch_size=hp["batch_size"],
    num_neighbors=hp["num_neighbors"],
    num_hops=hp["num_hops"],
    shuffle=False,
    filter_by="test_mask",
)

In [11]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GraphSAGE

In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = GraphSAGE(
    in_channels=100, # dimension of x feature vectors
    hidden_channels=hp["hidden_dim"],
    num_layers=hp["num_layers"],
    out_channels=47,
    dropout=hp["dropout"],
).to(device)

optimizer = torch.optim.Adam(
    model.parameters(), lr=hp["lr"], weight_decay=hp["l2_penalty"]
)

In [13]:
from datetime import datetime

from tgml.metrics import Accumulator, Accuracy
from torch.utils.tensorboard import SummaryWriter

In [None]:
log_dir = "logs/products/graphsage/subgraph/" + datetime.now().strftime("%Y%m%d-%H%M%S")
train_log = SummaryWriter(log_dir+"/train")
valid_log = SummaryWriter(log_dir+"/valid")
global_steps = 0
logs = {}
for epoch in range(10):
    # Train
    model.train()
    epoch_train_loss = Accumulator()
    epoch_train_acc = Accuracy()
    for bid, batch in enumerate(train_loader):
        batchsize = batch.x.shape[0]
        batch.to(device)
        # Forward pass
        out = model(batch.x, batch.edge_index)
        # Calculate loss
        loss = F.cross_entropy(out[batch.train_mask], batch.y[batch.train_mask])
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        epoch_train_loss.update(loss.item() * batchsize, batchsize)
        # Predict on training data
        with torch.no_grad():
            pred = out.argmax(dim=1)
            epoch_train_acc.update(pred[batch.train_mask], batch.y[batch.train_mask])
        # Log training status after each batch
        logs["loss"] = epoch_train_loss.mean
        logs["acc"] = epoch_train_acc.value
        print(
            "Epoch {}, Train Batch {}, Loss {:.4f}, Accuracy {:.4f}".format(
                epoch, bid, logs["loss"], logs["acc"]
            )
        )
        train_log.add_scalar("Loss", logs["loss"], global_steps)
        train_log.add_scalar("Accuracy", logs["acc"], global_steps)
        train_log.flush()
        global_steps += 1
    # Evaluate
    model.eval()
    epoch_val_loss = Accumulator()
    epoch_val_acc = Accuracy()
    for batch in valid_loader:
        batchsize = batch.x.shape[0]
        batch.to(device)
        with torch.no_grad():
            # Forward pass
            out = model(batch.x, batch.edge_index)
            # Calculate loss
            valid_loss = F.cross_entropy(out[batch.val_mask], batch.y[batch.val_mask])
            epoch_val_loss.update(valid_loss.item() * batchsize, batchsize)
            # Prediction
            pred = out.argmax(dim=1)
            epoch_val_acc.update(pred[batch.val_mask], batch.y[batch.val_mask])
    # Log testing result after each epoch
    logs["val_loss"] = epoch_val_loss.mean
    logs["val_acc"] = epoch_val_acc.value
    print(
        "Epoch {}, Valid Loss {:.4f}, Valid Accuracy {:.4f}".format(
            epoch, logs["val_loss"], logs["val_acc"]
        )
    )
    valid_log.add_scalar("Loss", logs["val_loss"], global_steps)
    valid_log.add_scalar("Accuracy", logs["val_acc"], global_steps)
    valid_log.flush()



Epoch 0, Train Batch 0, Loss 3.8985, Accuracy 0.0089
Epoch 0, Train Batch 1, Loss 3.7180, Accuracy 0.0809
Epoch 0, Train Batch 2, Loss 3.1002, Accuracy 0.2480
Epoch 0, Train Batch 3, Loss 2.8102, Accuracy 0.3180
Epoch 0, Train Batch 4, Loss 2.5880, Accuracy 0.3554
Epoch 0, Train Batch 5, Loss 2.4245, Accuracy 0.3873
Epoch 0, Train Batch 6, Loss 2.3049, Accuracy 0.4119
Epoch 0, Train Batch 7, Loss 2.1939, Accuracy 0.4438
Epoch 0, Train Batch 8, Loss 2.0948, Accuracy 0.4751
Epoch 0, Train Batch 9, Loss 2.0261, Accuracy 0.4946
Epoch 0, Train Batch 10, Loss 1.9456, Accuracy 0.5152
Epoch 0, Train Batch 11, Loss 1.8806, Accuracy 0.5317
Epoch 0, Train Batch 12, Loss 1.8342, Accuracy 0.5429
Epoch 0, Train Batch 13, Loss 1.7811, Accuracy 0.5555
Epoch 0, Train Batch 14, Loss 1.7376, Accuracy 0.5663
Epoch 0, Train Batch 15, Loss 1.7014, Accuracy 0.5757
Epoch 0, Train Batch 16, Loss 1.6696, Accuracy 0.5841
Epoch 0, Train Batch 17, Loss 1.6354, Accuracy 0.5923
Epoch 0, Train Batch 18, Loss 1.6029, 

In [None]:
model.eval()
acc = Accuracy()
for batch in test_loader:
    batch.to(device)
    with torch.no_grad():
        pred = model(batch.x, batch.edge_index).argmax(dim=1)
        acc.update(pred[batch.test_mask], batch.y[batch.test_mask])
print("Accuracy: {:.4f}".format(acc.value))