Skip to content

Commit

Permalink
example: add a dgl GAT example (#714)
Browse files Browse the repository at this point in the history
* add dgl example

Signed-off-by: Jinjing.Zhou <allenzhou@tensorchord.ai>

* add lisence head

Signed-off-by: Jinjing.Zhou <allenzhou@tensorchord.ai>
  • Loading branch information
VoVAllen committed Aug 3, 2022
1 parent 51b841d commit 116271c
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 0 deletions.
7 changes: 7 additions & 0 deletions examples/dgl/README.md
@@ -0,0 +1,7 @@
# Example using dgl

The script is borrowed from https://github.com/dmlc/dgl/tree/master/examples/pytorch/gat.

To run on cpu, run `envd up` directly.

To run on gpu, run `envd up -f :build_gpu`.
28 changes: 28 additions & 0 deletions examples/dgl/build.envd
@@ -0,0 +1,28 @@
def build():
# Use ubuntu20.04 as base image and install python
base(os="ubuntu20.04", language="python3")

# Add the packages you are using here
install.python_packages(["numpy", "dgl", "torch"])

# Select the shell environment you like
shell("zsh")

io.mount(src="~/.envd/data/dgl", dest="~/.dgl")

def build_gpu():
# Use ubuntu20.04 as base image and install python
base(os="ubuntu20.04", language="python3")

# install cuda
install.cuda(version="11.6", cudnn="8")

# Add the packages you are using here
install.python_packages(["numpy"])
install.python_packages(["torch --extra-index-url https://download.pytorch.org/whl/cu116"])
install.python_packages(["dgl-cu113 -f https://data.dgl.ai/wheels/repo.html"])

# Select the shell environment you like
shell("zsh")

io.mount(src="~/.envd/data/dgl", dest="~/.dgl")
108 changes: 108 additions & 0 deletions examples/dgl/train.py
@@ -0,0 +1,108 @@
# Copyright 2022 The envd Authors
# Copyright 2022 The dgl Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl.nn as dglnn
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset
from dgl import AddSelfLoop
import argparse

class GAT(nn.Module):
def __init__(self,in_size, hid_size, out_size, heads):
super().__init__()
self.gat_layers = nn.ModuleList()
# two-layer GAT
self.gat_layers.append(dglnn.GATConv(in_size, hid_size, heads[0], feat_drop=0.6, attn_drop=0.6, activation=F.elu))
self.gat_layers.append(dglnn.GATConv(hid_size*heads[0], out_size, heads[1], feat_drop=0.6, attn_drop=0.6, activation=None))

def forward(self, g, inputs):
h = inputs
for i, layer in enumerate(self.gat_layers):
h = layer(g, h)
if i == 1: # last layer
h = h.mean(1)
else: # other layer(s)
h = h.flatten(1)
return h

def evaluate(g, features, labels, mask, model):
model.eval()
with torch.no_grad():
logits = model(g, features)
logits = logits[mask]
labels = labels[mask]
_, indices = torch.max(logits, dim=1)
correct = torch.sum(indices == labels)
return correct.item() * 1.0 / len(labels)

def train(g, features, labels, masks, model):
# define train/val samples, loss function and optimizer
train_mask = masks[0]
val_mask = masks[1]
loss_fcn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=5e-3, weight_decay=5e-4)

#training loop
for epoch in range(200):
model.train()
logits = model(g, features)
loss = loss_fcn(logits[train_mask], labels[train_mask])
optimizer.zero_grad()
loss.backward()
optimizer.step()
acc = evaluate(g, features, labels, val_mask, model)
print("Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} "
. format(epoch, loss.item(), acc))

if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--dataset", type=str, default="cora",
help="Dataset name ('cora', 'citeseer', 'pubmed').")
args = parser.parse_args()
print(f'Training with DGL built-in GATConv module.')

# load and preprocess dataset
transform = AddSelfLoop() # by default, it will first remove self-loops to prevent duplication
if args.dataset == 'cora':
data = CoraGraphDataset(transform=transform)
elif args.dataset == 'citeseer':
data = CiteseerGraphDataset(transform=transform)
elif args.dataset == 'pubmed':
data = PubmedGraphDataset(transform=transform)
else:
raise ValueError('Unknown dataset: {}'.format(args.dataset))
g = data[0]
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Use {device}")
g = g.int().to(device)
features = g.ndata['feat']
labels = g.ndata['label']
masks = g.ndata['train_mask'], g.ndata['val_mask'], g.ndata['test_mask']

# create GAT model
in_size = features.shape[1]
out_size = data.num_classes
model = GAT(in_size, 8, out_size, heads=[8,1]).to(device)

# model training
print('Training...')
train(g, features, labels, masks, model)

# test the model
print('Testing...')
acc = evaluate(g, features, labels, masks[2], model)
print("Test accuracy {:.4f}".format(acc))

0 comments on commit 116271c

Please sign in to comment.