In [1]:
import pandas as pd
import numpy as np
import tqdm

# Pre processing

In [2]:
from graph_data import GraphDataset
gdata = GraphDataset(root='/anomalyvol/data/')

Processing...
Done!


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch_geometric.transforms as T
from torch_geometric.nn import EdgeConv, global_mean_pool

class EdgeNet(nn.Module):
    def __init__(self, input_dim=4, hidden_dim=2, output_dim=1, aggr='add'):
        super(EdgeNet, self).__init__()
        convnn = nn.Sequential(nn.Linear(2*(input_dim), 32),
                               nn.ReLU(),
                               nn.Linear(32, hidden_dim),
                               nn.ReLU(),
                               nn.Linear(hidden_dim, 32),
                               nn.ReLU(),
                               nn.Linear(32, input_dim),
                               nn.ReLU()
        )
        
        self.batchnorm = nn.BatchNorm1d(input_dim)

        self.inputnet =  nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU()
        )

        self.edgenetwork = nn.Sequential(nn.Linear(2*(hidden_dim+input_dim),
                                                   2*hidden_dim),
                                                    nn.ReLU())

        self.outputnet = nn.Sequential(nn.Linear(hidden_dim+input_dim, hidden_dim),
                                       nn.ReLU(),
                                       nn.Linear(hidden_dim, output_dim),
                                       nn.Sigmoid())

        self.nodenetwork = EdgeConv(nn=convnn,aggr=aggr)

    def forward(self, data):
        X = self.batchnorm(data.x)
        H = self.nodenetwork(X,data.edge_index)
        data.x = H
        #row,col = data.edge_index        
        #output = self.edgenetwork(torch.cat([data.x[row],data.x[col]],dim=-1)).squeeze(-1)
        #output = self.outputnet(x_sum)
        output = H
        return output


In [4]:
import torch
from torch_geometric.data import Data, DataLoader
import os
import os.path as osp
import math
import argparse
import torch.nn.functional as F
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc

input_dim = 4
hidden_dim = 2
fulllen = len(gdata)
tv_frac = 0.10
tv_num = math.ceil(fulllen*tv_frac)
splits = np.cumsum([fulllen-2*tv_num,tv_num,tv_num])
batch_size = 32
n_epochs = 10
lr = 0.01
patience = 10
device = 'cuda'
model_fname = 'EdgeNet'

In [5]:
train_dataset = torch.utils.data.Subset(gdata,np.arange(start=0,stop=splits[0]))
valid_dataset = torch.utils.data.Subset(gdata,np.arange(start=splits[1],stop=splits[2]))
test_dataset = torch.utils.data.Subset(gdata,np.arange(start=splits[0],stop=splits[1]))
train_loader = DataLoader(train_dataset, batch_size=batch_size, pin_memory=True, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, pin_memory=True, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, pin_memory=True, shuffle=False)

train_samples = len(train_dataset)
valid_samples = len(valid_dataset)
test_samples = len(test_dataset)

model = EdgeNet(input_dim=input_dim,hidden_dim=hidden_dim).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = lr)

In [8]:
@torch.no_grad()
def test(model,loader,total,batch_size):
    model.eval()

    sum_loss = 0
    t = tqdm.tqdm(enumerate(loader),total=total/batch_size)
    for i,data in t:
        data = data.to(device)
        batch_output = model(data)
        batch_loss_item = F.mse_loss(batch_output, data.y).item()
        sum_loss += batch_loss_item
        t.set_description("batch loss = %.5f" % (batch_loss_item))
        t.refresh() # to show immediately the update

    return sum_loss/(i+1)

def train(model, optimizer, epoch, loader, total, batch_size):
    model.train()
    model_fname = 'EdgeNet'

    sum_loss = 0.
    t = tqdm.tqdm(enumerate(loader),total=total/batch_size)
    for i,data in t:
        data = data.to(device)
        optimizer.zero_grad()
        batch_output = model(data)
        batch_loss = F.mse_loss(batch_output, data.y)
        batch_loss.backward()
        batch_loss_item = batch_loss.item()
        t.set_description("batch loss = %.5f" % batch_loss_item)
        t.refresh() # to show immediately the update
        sum_loss += batch_loss_item
        optimizer.step()
    
    return sum_loss/(i+1)

In [9]:
stale_epochs = 0
for epoch in range(0, n_epochs):
    epoch_loss = train(model, optimizer, epoch, train_loader, train_samples, batch_size)
    valid_loss, valid_acc, valid_eff, valid_fp, valid_fn, valid_pur = test(model, valid_loader, valid_samples, batch_size)
    print('Epoch: {:02d}, Training Loss: {:.4f}'.format(epoch, epoch_loss))
    print('               Validation Loss: {:.4f}'.format(valid_loss))

    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        modpath = osp.join(os.getcwd(),model_fname+'.best.pth')
        print('New best model saved to:',modpath)
        torch.save(model.state_dict(),modpath)
        stale_epochs = 0
    else:
        print('Stale epoch')
        stale_epochs += 1
    if stale_epochs >= patience:
        print('Early stopping after %i stale epochs'%patience)
        break

  0%|          | 0/59.6875 [00:00<?, ?it/s]


IndexError: Only integers, slices (`:`), list, tuples, and long or bool tensors are valid indices (got int64).