# Detecting Anomalies using GNNs

## Exploring ACM, BlogCatalog, and Flickr datasets

Ding, Kaize, et al. "Deep anomaly detection on attributed networks." Proceedings of the 2019 SIAM international conference on data mining. Society for Industrial and Applied Mathematics, 2019.

In [1]:
import numpy as np
import scipy.sparse as sp
import scipy.io as sio
import math

import torch
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module
import torch.nn as nn
import torch.nn.functional as F

from sklearn.metrics import roc_auc_score
from datetime import datetime
import argparse

Utils

In [2]:
def load_anomaly_detection_dataset(dataset, datadir='data'):
    
    data_mat = sio.loadmat(f'{datadir}/{dataset}.mat')
    adj = data_mat['Network']
    feat = data_mat['Attributes']
    truth = data_mat['Label']
    truth = truth.flatten()

    adj_norm = normalize_adj(adj + sp.eye(adj.shape[0]))
    adj_norm = adj_norm.toarray()
    adj = adj + sp.eye(adj.shape[0])
    adj = adj.toarray()
    feat = feat.toarray()
    return adj_norm, feat, truth, adj


def normalize_adj(adj):
    """Symmetrically normalize adjacency matrix."""
    adj = sp.coo_matrix(adj)
    rowsum = np.array(adj.sum(1))
    d_inv_sqrt = np.power(rowsum, -0.5).flatten()
    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
    d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
    return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo()

Defining the Graph Convolutional layer

In [3]:
class GraphConvolution(Module):
    def __init__(self, in_features, out_features, bias=True):
        super(GraphConvolution, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.FloatTensor(in_features, out_features))
        if bias:
            self.bias = Parameter(torch.FloatTensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)

    def forward(self, input, adj):
        support = torch.mm(input, self.weight)
        output = torch.spmm(adj, support)
        if self.bias is not None:
            return output + self.bias
        else:
            return output

    def __repr__(self):
        return self.__class__.__name__ + ' (' \
               + str(self.in_features) + ' -> ' \
               + str(self.out_features) + ')'

Defining the Model

<center><img src="images\anomaly_framework.png" width=1500></center>

In [4]:
class Encoder(nn.Module):
    def __init__(self, nfeat, nhid, dropout):
        super(Encoder, self).__init__()

        self.gc1 = GraphConvolution(nfeat, nhid)
        self.gc2 = GraphConvolution(nhid, nhid)
        self.dropout = dropout

    def forward(self, x, adj):
        x = F.relu(self.gc1(x, adj))
        x = F.dropout(x, self.dropout, training=self.training)
        x = F.relu(self.gc2(x, adj))

        return x

class Attribute_Decoder(nn.Module):
    def __init__(self, nfeat, nhid, dropout):
        super(Attribute_Decoder, self).__init__()

        self.gc1 = GraphConvolution(nhid, nhid)
        self.gc2 = GraphConvolution(nhid, nfeat)
        self.dropout = dropout

    def forward(self, x, adj):
        x = F.relu(self.gc1(x, adj))
        x = F.dropout(x, self.dropout, training=self.training)
        x = F.relu(self.gc2(x, adj))

        return x

class Structure_Decoder(nn.Module):
    def __init__(self, nhid, dropout):
        super(Structure_Decoder, self).__init__()

        self.gc1 = GraphConvolution(nhid, nhid)
        self.dropout = dropout

    def forward(self, x, adj):
        x = F.relu(self.gc1(x, adj))
        x = F.dropout(x, self.dropout, training=self.training)
        x = x @ x.T

        return x

class Dominant(nn.Module):
    def __init__(self, feat_size, hidden_size, dropout):
        super(Dominant, self).__init__()
        
        self.shared_encoder = Encoder(feat_size, hidden_size, dropout)
        self.attr_decoder = Attribute_Decoder(feat_size, hidden_size, dropout)
        self.struct_decoder = Structure_Decoder(hidden_size, dropout)
    
    def forward(self, x, adj):
        # encode
        x = self.shared_encoder(x, adj)
        # decode feature matrix
        x_hat = self.attr_decoder(x, adj)
        # decode adjacency matrix
        struct_reconstructed = self.struct_decoder(x, adj)
        # return reconstructed matrices
        return struct_reconstructed, x_hat

Train

In [5]:
def loss_func(adj, A_hat, attrs, X_hat, alpha):
    # Attribute reconstruction loss
    diff_attribute = torch.pow(X_hat - attrs, 2)
    attribute_reconstruction_errors = torch.sqrt(torch.sum(diff_attribute, 1))
    attribute_cost = torch.mean(attribute_reconstruction_errors)

    # structure reconstruction loss
    diff_structure = torch.pow(A_hat - adj, 2)
    structure_reconstruction_errors = torch.sqrt(torch.sum(diff_structure, 1))
    structure_cost = torch.mean(structure_reconstruction_errors)


    cost =  alpha * attribute_reconstruction_errors + (1-alpha) * structure_reconstruction_errors

    return cost, structure_cost, attribute_cost


def train_dominant(adj, adj_label, attrs, label, args):
    model = Dominant(feat_size = attrs.size(1), hidden_size = args.hidden_dim, dropout = args.dropout)


    if args.device == 'cuda':
        device = torch.device(args.device)
        adj = adj.to(device)
        adj_label = adj_label.to(device)
        attrs = attrs.to(device)
        model = model.cuda()
        
    
    optimizer = torch.optim.Adam(model.parameters(), lr = args.lr)
    

    for epoch in range(args.epoch):
        model.train()
        optimizer.zero_grad()
        A_hat, X_hat = model(attrs, adj)
        loss, struct_loss, feat_loss = loss_func(adj_label, A_hat, attrs, X_hat, args.alpha)
        l = torch.mean(loss)
        l.backward()
        optimizer.step()        
        print("Epoch:", '%04d' % (epoch), "train_loss=", "{:.5f}".format(l.item()), "train/struct_loss=", "{:.5f}".format(struct_loss.item()),"train/feat_loss=", "{:.5f}".format(feat_loss.item()))

        if (epoch+1)%100 == 0:
            model.eval()
            A_hat, X_hat = model(attrs, adj)
            loss, struct_loss, feat_loss = loss_func(adj_label, A_hat, attrs, X_hat, args.alpha)
            score = loss.detach().cpu().numpy()
            print("Epoch:", '%04d' % (epoch), 'Auc', roc_auc_score(label, score))
    
    return model

In [6]:
parser = argparse.ArgumentParser()
parser.add_argument('--dataset', default='BlogCatalog', help='dataset name: Flickr/ACM/BlogCatalog')
parser.add_argument('--hidden_dim', type=int, default=64, help='dimension of hidden embedding (default: 64)')
parser.add_argument('--epoch', type=int, default=20, help='Training epoch')
parser.add_argument('--lr', type=float, default=5e-3, help='learning rate')
parser.add_argument('--dropout', type=float, default=0.3, help='Dropout rate')
parser.add_argument('--alpha', type=float, default=0.8, help='balance parameter')
parser.add_argument('--device', default='cuda', type=str, help='cuda/cpu')


import sys
sys.argv = ['']
del sys

args = parser.parse_args()

device = torch.device(args.device)

adj, attrs, label, adj_label = load_anomaly_detection_dataset(args.dataset)

adj = torch.FloatTensor(adj)
adj_label = torch.FloatTensor(adj_label)
attrs = torch.FloatTensor(attrs)

model = train_dominant(adj, adj_label, attrs, label, args)

Epoch: 0000 train_loss= 4.46566 train/struct_loss= 17.29420 train/feat_loss= 1.25853
Epoch: 0001 train_loss= 3.34449 train/struct_loss= 11.89572 train/feat_loss= 1.20668
Epoch: 0002 train_loss= 2.83390 train/struct_loss= 9.46589 train/feat_loss= 1.17591
Epoch: 0003 train_loss= 2.59062 train/struct_loss= 8.25295 train/feat_loss= 1.17504
Epoch: 0004 train_loss= 2.49817 train/struct_loss= 7.79087 train/feat_loss= 1.17499
Epoch: 0005 train_loss= 2.47739 train/struct_loss= 7.68719 train/feat_loss= 1.17493
Epoch: 0006 train_loss= 2.47245 train/struct_loss= 7.66345 train/feat_loss= 1.17471
Epoch: 0007 train_loss= 2.47587 train/struct_loss= 7.68061 train/feat_loss= 1.17469
Epoch: 0008 train_loss= 2.47772 train/struct_loss= 7.68976 train/feat_loss= 1.17471
Epoch: 0009 train_loss= 2.46989 train/struct_loss= 7.65066 train/feat_loss= 1.17470
Epoch: 0010 train_loss= 2.46806 train/struct_loss= 7.64161 train/feat_loss= 1.17467
Epoch: 0011 train_loss= 2.46812 train/struct_loss= 7.64195 train/feat_loss

In [7]:
model.eval()
A_hat, X_hat = model(attrs.to(device), adj.to(device))
loss, struct_loss, feat_loss = loss_func(adj_label.to(device), A_hat, attrs.to(device), X_hat, args.alpha)
score = loss.detach().cpu().numpy()
print('Auc', roc_auc_score(label, score))

Auc 0.814068062296349
