In [1]:
import dgl
import torch
from tqdm import tqdm
import dgl.function as fn
import torch.nn as nn
from torch.utils.data import DataLoader
import dgl.nn as dglnn
import torch.nn.functional as F
from torch.optim import Adam
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

################################################################################
The 'datapipes', 'dataloader2' modules are deprecated and will be removed in a
future torchdata release! Please see https://github.com/pytorch/data/issues/1196
to learn more and leave feedback.
################################################################################

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
benign_graphs, benign_labels = dgl.load_graphs("benign.bin")
malicious_graphs, malicious_labels = dgl.load_graphs("malicious.bin")

: 

In [3]:
graphs = benign_graphs+malicious_graphs
labels = torch.cat([benign_labels['labels'], malicious_labels['labels']])

dataset = list(zip(graphs, labels))

In [4]:
def custom_collate_fn(batch):
    graphs, labels = zip(*batch)
    batched_graph = dgl.batch(graphs)
    batch_labels = torch.stack(labels)

    return batched_graph, batch_labels

In [5]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(dataset, test_size=0.2, random_state=42)

train_dataloader = DataLoader(train, batch_size=1, shuffle=True, collate_fn=custom_collate_fn)
test_dataloader = DataLoader(test, batch_size=1, shuffle=False, collate_fn=custom_collate_fn)


In [6]:
class RGCN(nn.Module):
    def __init__(self, in_feats, hid_feats, out_feats, rel_names):
        super().__init__()

        self.conv1 = dglnn.HeteroGraphConv({
            rel: dglnn.GraphConv(in_feats, hid_feats)
            for rel in rel_names}, aggregate='mean')
        
        self.conv2 = dglnn.HeteroGraphConv({
            rel: dglnn.GraphConv(hid_feats, out_feats)
            for rel in rel_names}, aggregate='mean')
    
    def forward(self, graph, inputs):
        
        h = self.conv1(graph, inputs)
        h = {k: F.relu(v) for k, v in h.items()}
        h = self.conv2(graph, h)

        return h

class HeteroClassifier(nn.Module):
    def __init__(self, in_dim, hidden_dim, n_classes, rel_names):
        super().__init__()

        self.rgcn = RGCN(in_dim, hidden_dim, hidden_dim, rel_names)
        self.classify = nn.Linear(hidden_dim, n_classes)

    def forward(self, g):
        inputs = {ntype: g.nodes[ntype].data['h'] for ntype in g.ntypes}

        h = self.rgcn(g, inputs)

        with g.local_scope():
            for ntype in g.ntypes:
                if ntype in h:
                    g.nodes[ntype].data['h'] = h[ntype]
                else:
                    continue
            # Calculate graph representation by average readout.

            hg = None

            for ntype in g.ntypes:
                if hg is None:
                    hg = dgl.mean_nodes(g, 'h', ntype=ntype)
                else:
                    hg = hg + dgl.mean_nodes(g, 'h', ntype=ntype)
                
            return self.classify(hg)

In [7]:
unique_rel_names = set()

for g in graphs:
    unique_rel_names.update(g.etypes)

unique_rel_names = sorted(unique_rel_names)

In [None]:
from torch.optim import Adam
model = HeteroClassifier(1, 1, 2, unique_rel_names)
optimiser = Adam(model.parameters(), lr=0.01, weight_decay=1e-4)
loss_fn = nn.CrossEntropyLoss()
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for graph, label in train_dataloader:
        label = label.long()

        logits = model(graph)

        loss = loss_fn(logits, label)
        optimiser.zero_grad()
        loss.backward()
        optimiser.step()

        total_loss += loss.item()

    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(train_dataloader)}')
    
    
        

In [None]:
    from sklearn.metrics import classification_report
    model.eval()
    
    preds = []
    labels = []

    with torch.no_grad():
        for graph, label in test_dataloader:
            logits = model(graph)
            preds.append(torch.argmax(logits, dim=1))
            labels.append(label)

    report = classification_report(labels, preds)
    print(report)



In [10]:
import torch.nn as nn
import torch.nn.functional as F
import dgl.nn as dglnn

class GNN(nn.Module):
    def __init__(self, in_feats, hidden_feats, out_feats):
        super(GNN, self).__init__()
        self.fc = nn.Linear(in_feats, hidden_feats)
        self.classify = nn.Linear(hidden_feats, out_feats)

    def forward(self, graph):

        with graph.local_scope():
            graph_feats = 0

            for ntype in graph.ntypes:
                graph_feats += dgl.mean_nodes(graph, 'h', ntype=ntype)

            h = self.fc(graph_feats)
            hg = F.relu(h)

            return self.classify(hg)

In [11]:


in_feats = 1
hidden_feats = 256
out_feats = 2

model = GNN(in_feats, hidden_feats, out_feats)
optimiser = Adam(model.parameters(), lr=0.01)
loss_fn = nn.CrossEntropyLoss()

In [None]:
    num_epochs = 50

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0

        for graph, label in train_dataloader:

            logits = model(graph)
            loss = loss_fn(logits, label.long())

            optimiser.zero_grad()
            loss.backward()
            optimiser.step()

            total_loss += loss.item()

        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(train_dataloader)}')

    # Evaluate
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for graph, label in test_dataloader:

            logits = model(graph)
            preds = torch.argmax(logits, dim=1)

            correct += (preds == label).sum().item()
            total += len(label)

    accuracy = correct / total
    print(f"Test Accuracy: {accuracy * 100:.2f}%")