In [12]:
from scipy.sparse import data
import torch
import torch.nn as nn
import numpy as np
import scipy.sparse as sp
import scipy.io as sio
from sklearn.metrics import roc_auc_score
from datetime import datetime
import argparse

from model import Dominant

In [13]:
def normalize_adj(adj):
    """Symmetrically normalize adjacency matrix."""
    #co ordinate matrix 
    adj = sp.coo_matrix(adj)
    rowsum = np.array(adj.sum(1))
    d_inv_sqrt = np.power(rowsum, -0.5).flatten()
    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
    d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
    return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo()

def load_anomaly_detection_dataset(dataset, datadir='data'):
    
    data_mat = sio.loadmat(f'{datadir}/{dataset}.mat')
    adj = data_mat['Network']
    feat = data_mat['Attributes']
    truth = data_mat['Label']
    truth = truth.flatten()

    adj_norm = normalize_adj(adj + sp.eye(adj.shape[0]))
    adj_norm = adj_norm.toarray()
    adj = adj + sp.eye(adj.shape[0])
    adj = adj.toarray()
    feat = feat.toarray()
    return adj_norm, feat, truth, adj

In [7]:
import torch.nn as nn
import torch.nn.functional as F
import torch
from layers import GraphConvolution

class Encoder(nn.Module):
    def __init__(self, nfeat, nhid, dropout):
        super(Encoder, self).__init__()

        self.gc1 = GraphConvolution(nfeat, nhid)
        self.gc2 = GraphConvolution(nhid, nhid)
        self.dropout = dropout

    def forward(self, x, adj):
        x = F.relu(self.gc1(x, adj))
        x = F.dropout(x, self.dropout, training=self.training)
        x = F.relu(self.gc2(x, adj))

        return x

class Attribute_Decoder(nn.Module):
    def __init__(self, nfeat, nhid, dropout):
        super(Attribute_Decoder, self).__init__()

        self.gc1 = GraphConvolution(nhid, nhid)
        self.gc2 = GraphConvolution(nhid, nfeat)
        self.dropout = dropout

    def forward(self, x, adj):
        x = F.relu(self.gc1(x, adj))
        x = F.dropout(x, self.dropout, training=self.training)
        x = F.relu(self.gc2(x, adj))

        return x

class Structure_Decoder(nn.Module):
    def __init__(self, nhid, dropout):
        super(Structure_Decoder, self).__init__()

        self.gc1 = GraphConvolution(nhid, nhid)
        self.dropout = dropout

    def forward(self, x, adj):
        x = F.relu(self.gc1(x, adj))
        x = F.dropout(x, self.dropout, training=self.training)
        x = x @ x.T

        return x

class Dominant(nn.Module):
    def __init__(self, feat_size, hidden_size, dropout):
        super(Dominant, self).__init__()
        
        self.shared_encoder = Encoder(feat_size, hidden_size, dropout)
        self.attr_decoder = Attribute_Decoder(feat_size, hidden_size, dropout)
        self.struct_decoder = Structure_Decoder(hidden_size, dropout)
    
    def forward(self, x, adj):
        # encode
        x = self.shared_encoder(x, adj)
        # decode feature matrix
        x_hat = self.attr_decoder(x, adj)
        # decode adjacency matrix
        struct_reconstructed = self.struct_decoder(x, adj)
        # return reconstructed matrices
        return struct_reconstructed, x_hat

In [18]:
def loss_func(adj, A_hat, attrs, X_hat, alpha):
    # Attribute reconstruction loss
    diff_attribute = torch.pow(X_hat - attrs, 2)
    attribute_reconstruction_errors = torch.sqrt(torch.sum(diff_attribute, 1))
    attribute_cost = torch.mean(attribute_reconstruction_errors)

    # structure reconstruction loss
    diff_structure = torch.pow(A_hat - adj, 2)
    structure_reconstruction_errors = torch.sqrt(torch.sum(diff_structure, 1))
    structure_cost = torch.mean(structure_reconstruction_errors)


    cost =  alpha * attribute_reconstruction_errors + (1-alpha) * structure_reconstruction_errors

    return cost, structure_cost, attribute_cost

In [19]:
def train_dominant(dataset="BlogCatalog", hidden_dim=64, epoch=100, lr=5e-3, dropout=0.3, alpha=0.8, device="cpu"):
    adj, attrs, label, adj_label = load_anomaly_detection_dataset(dataset)
    adj = torch.FloatTensor(adj)
    adj_label = torch.FloatTensor(adj_label)
    attrs = torch.FloatTensor(attrs)
    
    model = Dominant(feat_size = attrs.size(1), hidden_size = hidden_dim, dropout = dropout)


    if device == 'cuda':
        device = torch.device(device)
        adj = adj.to(device)
        adj_label = adj_label.to(device)
        attrs = attrs.to(device)
        model = model.cuda()
        
    
    optimizer =  (model.parameters(), lr = lr)
    
    for epoch in range(epoch):
        model.train()
        optimizer.zero_grad()
        A_hat, X_hat = model(attrs, adj)
        loss, struct_loss, feat_loss = loss_func(adj_label, A_hat, attrs, X_hat, alpha)
        l = torch.mean(loss)
        l.backward()
        optimizer.step()        
        print("Epoch:", '%04d' % (epoch), "train_loss=", "{:.5f}".format(l.item()), "train/struct_loss=", "{:.5f}".format(struct_loss.item()),"train/feat_loss=", "{:.5f}".format(feat_loss.item()))

        if epoch%10 == 0 or epoch == epoch - 1:
            model.eval()
            A_hat, X_hat = model(attrs, adj)
            loss, struct_loss, feat_loss = loss_func(adj_label, A_hat, attrs, X_hat, alpha)
            score = loss.detach().cpu().numpy()
            print("Epoch:", '%04d' % (epoch), 'Auc', roc_auc_score(label, score))

In [20]:
train_dominant()

Epoch: 0000 train_loss= 3.82228 train/struct_loss= 14.05821 train/feat_loss= 1.26330
Epoch: 0000 Auc 0.8090687611160287
Epoch: 0001 train_loss= 3.05492 train/struct_loss= 10.46063 train/feat_loss= 1.20349
Epoch: 0002 train_loss= 2.68756 train/struct_loss= 8.73441 train/feat_loss= 1.17585
Epoch: 0003 train_loss= 2.52950 train/struct_loss= 7.94792 train/feat_loss= 1.17490
Epoch: 0004 train_loss= 2.48355 train/struct_loss= 7.71805 train/feat_loss= 1.17492
Epoch: 0005 train_loss= 2.47342 train/struct_loss= 7.66737 train/feat_loss= 1.17493
Epoch: 0006 train_loss= 2.47558 train/struct_loss= 7.67836 train/feat_loss= 1.17488
Epoch: 0007 train_loss= 2.47481 train/struct_loss= 7.67495 train/feat_loss= 1.17478
Epoch: 0008 train_loss= 2.47047 train/struct_loss= 7.65333 train/feat_loss= 1.17476
Epoch: 0009 train_loss= 2.47022 train/struct_loss= 7.65202 train/feat_loss= 1.17477
Epoch: 0010 train_loss= 2.47194 train/struct_loss= 7.66070 train/feat_loss= 1.17475
Epoch: 0010 Auc 0.813704950109756
Epoch

Epoch: 0094 train_loss= 2.46640 train/struct_loss= 7.63312 train/feat_loss= 1.17472
Epoch: 0095 train_loss= 2.46666 train/struct_loss= 7.63441 train/feat_loss= 1.17472
Epoch: 0096 train_loss= 2.46642 train/struct_loss= 7.63319 train/feat_loss= 1.17472
Epoch: 0097 train_loss= 2.46654 train/struct_loss= 7.63381 train/feat_loss= 1.17472
Epoch: 0098 train_loss= 2.46618 train/struct_loss= 7.63202 train/feat_loss= 1.17472
Epoch: 0099 train_loss= 2.46686 train/struct_loss= 7.63541 train/feat_loss= 1.17472


In [39]:

data_mat = sio.loadmat(f'data/BlogCatalog.mat')
adj = data_mat['Network']
feat = data_mat['Attributes']
truth = data_mat['Label']
truth = truth.flatten()

In [40]:
truth

array([0, 0, 0, ..., 0, 0, 0], dtype=uint8)

In [41]:
np.unique(truth, return_counts=True)

(array([0, 1], dtype=uint8), array([4898,  298]))

In [43]:
adj_norm = normalize_adj(adj + sp.eye(adj.shape[0]))


<5196x5196 sparse matrix of type '<class 'numpy.float64'>'
	with 350577 stored elements in COOrdinate format>

In [46]:
adj_norm.toarray()

array([[0.0012987 , 0.00156096, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.00156096, 0.00187617, 0.00195477, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.00195477, 0.00203666, ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.0625    , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.05263158,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.05263158]])

In [47]:
adj_norm = normalize_adj(adj + sp.eye(adj.shape[0]))
adj_norm = adj_norm.toarray()
adj = adj + sp.eye(adj.shape[0])
adj = adj.toarray()
feat = feat.toarray()

In [49]:
adj_norm

array([[0.0012987 , 0.00156096, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.00156096, 0.00187617, 0.00195477, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.00195477, 0.00203666, ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.0625    , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.05263158,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.05263158]])

In [67]:
adj_norm[0][2]

0.0