In [12]:
import argparse, time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl import DGLGraph
from dgl.data import register_data_args, load_data
from dgl.data import BitcoinOTC
import datetime
from dgl.nn.pytorch import GraphConv
import time

import os
import json
from collections import defaultdict, Counter
from tqdm import tqdm
import torch.optim as optim
from torch.optim import lr_scheduler
import time
import copy
import matplotlib.pyplot as plt
from os import listdir
from os.path import isfile, join

In [2]:
out_path = '/misc/vlgscratch4/BrunaGroup/rj1408/dynamic_nn/models/static_gcn/btcotc/'

In [3]:
num_gpus = torch.cuda.device_count()
if num_gpus > 0:
    device = 'cuda'
else:
    device = 'cpu'

In [4]:
def removeSelfEdges(edgeList, colFrom, colTo):
    mask = edgeList[:, colFrom] - edgeList[:, colTo] != 0
    edgeList = edgeList[mask]
    return edgeList

In [9]:
data  = np.loadtxt('../soc-sign-bitcoinotc.csv', delimiter=',').astype(np.int64)
data[:, 0:2] = data[:, 0:2] - data[:, 0:2].min()
data = removeSelfEdges(data, 0, 1)
num_nodes = data[:, 0:2].max() - data[:, 0:2].min() + 1
delta = datetime.timedelta(days=14).total_seconds()
time_index = np.around(
    (data[:, 3] - data[:, 3].min())/delta).astype(np.int64)

In [5]:
#Loading 
graphs = []

data  = np.loadtxt('../soc-sign-bitcoinotc.csv', delimiter=',').astype(np.int64)
data[:, 0:2] = data[:, 0:2] - data[:, 0:2].min()
data = removeSelfEdges(data, 0, 1)
num_nodes = data[:, 0:2].max() - data[:, 0:2].min() + 1
delta = datetime.timedelta(days=14).total_seconds()
time_index = np.around(
    (data[:, 3] - data[:, 3].min())/delta).astype(np.int64)

prevind = 0
for i in range(time_index.max()):
    g = DGLGraph()
    g.add_nodes(num_nodes)
    row_mask = time_index <= i
    edges = data[row_mask][:, 0:2]
    rate = data[row_mask][:, 2]
    diffmask = np.arange(len(edges)) >= prevind
    g.add_edges(edges[:, 0], edges[:, 1])
    g.edata['feat'] = torch.FloatTensor(rate.reshape(-1, 1))
    g.edata['diff'] = diffmask
    g.ndata['feat'] = torch.zeros(num_nodes, node_dim)
    
    g.add_edges(g.nodes(), g.nodes())
        
    selfedgemask = np.zeros(g.number_of_edges(), dtype = bool)
    selfedgemask[-g.number_of_nodes():] = True
    g.edata['self_edge'] = selfedgemask
    
    graphs.append(g)
    prevind = len(edges)
    
train_graph = graphs[94]
valid_graphs = graphs[95:109]
test_graphs = graphs[109:]

NameError: name 'node_dim' is not defined

In [6]:
class GCN(nn.Module):
    def __init__(self,
                 in_feats,
                 n_hidden,
                 n_layers,
                 activation,
                 dropout):
        super(GCN, self).__init__()
        self.layers = nn.ModuleList()
        # input layer
        self.layers.append(GraphConv(in_feats, n_hidden, activation=activation))
        # hidden layers
        for i in range(n_layers - 1):
            self.layers.append(GraphConv(n_hidden, n_hidden, activation=activation))
        # output layer
        self.outlayer = nn.Linear(2 * n_hidden, 1, bias = True)
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, features, g):
        h = features
        for i, layer in enumerate(self.layers):
            if i != 0:
                h = self.dropout(h)
            h = layer(g, h)
        srcfeatures = torch.stack(list(map(lambda nd: h[nd], g.all_edges()[0])))
        destfeatures = torch.stack(list(map(lambda nd: h[nd], g.all_edges()[1])))
        edgefeatures = torch.cat((srcfeatures, destfeatures), dim = 1)
        outputs = self.outlayer(edgefeatures)
        outputs = 20*torch.sigmoid(outputs) - 1
        return outputs

In [7]:
def evaluate_loss(model, criterion, device, valid_graphs):
    model.eval()
    
    epoch_loss = 0
    num_samples = 0
    
    #validation phase
    with torch.set_grad_enabled(False):
        for val_graph in valid_graphs:
            outputs = model(val_graph.ndata['feat'], val_graph)
            labels = val_graph.edata['feat']
            outputs = outputs[val_graph.edata['diff']]
            labels = labels[val_graph.edata['diff']]
            num_samples += labels.shape[0] 
            loss = criterion(outputs, labels)
            epoch_loss += (loss.item()* labels.shape[0])
    
    return epoch_loss/num_samples

In [11]:
def evaluate_loss(modelpath, device):
     # create GCN model
    checkpoint = torch.load(modelpath, map_location=device)
    hyperdic = checkpoint['hyperparams']
    node_dim = hyperdic['node_dim']
    n_layers = hyperdic['n_layers']
    dropout = hyperdic['dropout']
    wt_decay = hyperdic['wt_decay']
    model = GCN(node_dim, node_dim, n_layers, F.relu, dropout)
    model.to(device)
    model.load_state_dict(checkpoint['model_state_dict'])
    criterion = nn.MSELoss()
    
    #Loading 
    graphs = []
    prevind = 0
    for i in range(time_index.max()):
        g = DGLGraph()
        g.add_nodes(num_nodes)
        row_mask = time_index <= i
        edges = data[row_mask][:, 0:2]
        rate = data[row_mask][:, 2]
        diffmask = np.arange(len(edges)) >= prevind
        g.add_edges(edges[:, 0], edges[:, 1])
        g.edata['feat'] = torch.FloatTensor(rate.reshape(-1, 1))
        g.edata['diff'] = diffmask
        g.ndata['feat'] = torch.zeros(num_nodes, node_dim)
        g.add_edges(g.nodes(), g.nodes())
        selfedgemask = np.zeros(g.number_of_edges(), dtype = bool)
        selfedgemask[-g.number_of_nodes():] = True
        g.edata['self_edge'] = selfedgemask

        graphs.append(g)
        prevind = len(edges)

    test_graphs = graphs[109:]
    tst_loss = evaluate_loss(model, criterion, device, test_graphs)
    return tst_loss, checkpoint['epoch']

In [None]:
def getBestModel(direc, device):
    bsttestloss = 100000000
    onlyfiles = [f for f in listdir(direc) if isfile(join(direc, f))]

In [None]:
 # create GCN model
model = GCN(node_dim, node_dim, n_layers, F.relu, dropout)
model.to(device)
criterion = nn.MSELoss()
model_parameters = [p for p in model.parameters() if p.requires_grad]
optimizer = optim.Adam(model_parameters, lr=learning_rate, weight_decay = wt_decay)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=stpsize, gamma=0.1)
hyper_params = {'node_dim' : node_dim,
    'n_layers' : n_layers,
    'dropout' : dropout,
    'wt_decay' : wt_decay }

bst_model = train_model(model, criterion, optimizer, exp_lr_scheduler, device, out_path, hyper_params, n_epochs)