In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import scipy.sparse as sp
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import f1_score, precision_score, recall_score
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

from tqdm import tqdm

In [2]:
torch.__version__

'1.4.0'

In [3]:
torch.cuda.get_device_name(1)

'Tesla V100-PCIE-32GB'

In [4]:
class GraphGenerator(nn.Module):
    def __init__(self, k, in_channels, dim, alpha=3.0):
        
        '''
        dim - dimension of the node embeddings 
        alpha - control saturation of the tanh
        
        produces nonsymmetric adjacency matrix
        
        '''
        
        super(GraphGenerator, self).__init__()
        
        
        self.lin1 = nn.Linear(in_channels,dim)


        self.k = k
        self.dim = dim
        self.alpha = alpha

    def forward(self, x):

        nodevec1 = x
        nodevec2 = nodevec1
        
#         print(nodevec1.shape)
        nodevec1 = torch.tanh(self.alpha*self.lin1(nodevec1))
        nodevec2 = torch.tanh(self.alpha*self.lin1(nodevec2))
        
        a = torch.mm(nodevec1, nodevec2.transpose(1,0))
        adj = F.relu(torch.tanh(self.alpha*a))
        
        mask = torch.zeros(x.size(0), x.size(0)).type_as(x)
        mask.fill_(float('0'))
        
        s1,t1 = adj.topk(self.k,1)
        
        mask.scatter_(1,t1,s1.fill_(1))
        
        adj = adj*mask
        
        adj = self.normalize(adj)
        
        return adj
    
    def normalize(self, adj):
        
        adj = adj + torch.eye(adj.size(0)).type_as(adj)
        d = adj.sum(1)
        dv = d
        a = adj / dv.view(-1, 1)
        
        return a
k = 2
dim = 2048
alpha = 3
static_feat = torch.rand(4,27)
gu = GraphGenerator(k, 27, dim)

x = torch.randint(0,10000, (4,1))

adj = gu(static_feat)
adj
# y - torch.transpose(y,0,1)

tensor([[0.3333, 0.0000, 0.3333, 0.3333],
        [0.0000, 0.3333, 0.3333, 0.3333],
        [0.0000, 0.0000, 0.6667, 0.3333],
        [0.0000, 0.0000, 0.3333, 0.6667]], grad_fn=<DivBackward0>)

In [5]:
class MixHop(torch.nn.Module):

    def __init__(self, in_channels, out_channels, powers):
        super(MixHop, self).__init__()
        
        self.w_list = nn.ModuleList()
        
        for i in range(powers):
            
            lin = nn.Linear(in_channels, out_channels)
            self.w_list.append(lin)
            
            
#     def init_parameters(self):
#         """
#         Initializing weights.
#         """
#         torch.nn.init.xavier_uniform_(self.weight_matrix)
#         torch.nn.init.xavier_uniform_(self.bias)

    def forward(self, norm_adj, x):
        
        adj_power = torch.eye(norm_adj.size(0)).type_as(x)
        
        X = []
        
        for lin in self.w_list:
            
            prod = F.relu(torch.mm(adj_power, lin(x)))

            X.append(prod)
            
            adj_power = torch.mm(adj_power, norm_adj)
        
        x = torch.stack(X, dim=1)

        x = torch.sum(x, dim=1)
        
        x = F.dropout(x, p=0.3, training=self.training)
        return x


In [6]:
class GCN(torch.nn.Module):

    def __init__(self, in_channels, out_channels):
        super(GCN, self).__init__()
        
        self.W = nn.Linear(in_channels, out_channels)

    def forward(self, norm_adj, x):
            
        x = F.relu(torch.mm(norm_adj, self.W(x)))
        
        return x


In [7]:
class GNN_MixHop(nn.Module):
    
    def __init__(self, in_channels, dim,  layers, powers, k, p=0.5):
        super(GNN_MixHop, self).__init__()

        self.p = p
        
        self.mh_layers = nn.ModuleList()
        
        self.graph_gen = GraphGenerator(k, in_channels, dim)
        
        in_channels = in_channels
        
        for l in layers:
            mh = MixHop(in_channels, l, powers)
            in_channels = l
            
            self.mh_layers.append(mh)
                
        self.linear = nn.Linear(layers[len(layers) - 1],1)
        
    def forward(self, x):
        
        adj = self.graph_gen(x)

        for mh in self.mh_layers:
            x = mh(adj, x)
            x = F.dropout(x, p=self.p, training=self.training)

        
        x = self.linear(x)
#         print(x)
        x = torch.sigmoid(x)
        
        return x
    

In [8]:
class GNN_GCN(nn.Module):
    
    def __init__(self, in_channels, dim,  layers, powers, k, p=0.5):
        super(GNN_GCN, self).__init__()
        
        self.p = p
        
        self.mh_layers = nn.ModuleList()
        
        self.graph_gen = GraphGenerator(k, in_channels, dim)
        
        in_channels = in_channels
        
        for l in layers:
            gcn = GCN(in_channels, l)
            in_channels = l
            
            self.mh_layers.append(gcn)
                
        self.linear = nn.Linear(layers[len(layers) - 1],1)
        
        
    def forward(self, x):
        
        adj = self.graph_gen(x)

        for gcn in self.mh_layers:
            
            x = gcn(adj, x)
            x = F.dropout(x, p=self.p, training=self.training)

        
        x = self.linear(x)
#         print(x)
        x = torch.sigmoid(x)
        
        return x
    

In [33]:
import pickle
# load preprocessed data
with open("../tdata.pickle","rb") as f :
    processed_data = pickle.load(f)
print(processed_data.keys())

train = processed_data['train']
train_y = processed_data['train_y']

valid = processed_data['valid']
valid_y = processed_data['valid_y']


dict_keys(['train', 'train_y', 'valid', 'valid_y'])


In [34]:
def normalize(x, i):
    x_cut = x[:,:i]
    x_cut = (x_cut - x_cut.mean(0)) / x_cut.std(0)
    
    x[:, :i] = x_cut
    
    return x

train = normalize(train, 15)
valid = normalize(valid, 15)

In [35]:
valid.shape, valid_y.shape, train.shape, train_y.shape

((134457, 29), (134457,), (30957, 29), (30957,))

In [36]:
class MTGNNDataset(Dataset):

    def __init__(self, X, y):
         
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):

        x = self.X[idx,:]
        y = self.y[idx]

        return torch.FloatTensor(x), torch.FloatTensor([y])

batch_size = 128
num_workers = 32

train_dataset = MTGNNDataset(train, train_y)
train_loader = DataLoader(train_dataset, 
                          batch_size=batch_size, 
                          shuffle=False, 
                          num_workers=num_workers, 
                          pin_memory=True, 
                          drop_last=True)

val_dataset = MTGNNDataset(valid, valid_y)
val_loader = DataLoader(val_dataset, 
                          batch_size=batch_size, 
                          shuffle=False, 
                          num_workers=num_workers, 
                          pin_memory=True, 
                          drop_last=True)

In [37]:
class Model(pl.LightningModule):
    def __init__(self, in_channels, dim,  layers, powers, k, p=0.5):
        super(Model,self).__init__()
        
        self.gnn = GNN_MixHop(in_channels, dim, layers, powers, k, p=p)
        self.lr = 1e-3
        self.l2 = 1e-4
        
    def _weight_init(self):
        for p in self.parameters():
            if p.dim() > 1 and p.requires_grad:
                nn.init.kaiming_normal_(p)

    def forward(self, x):
        
        x = self.gnn(x)
        
        return x

    def training_step(self, batch, batch_idx):
      
        X, y = batch        
        y_hat = self.forward(X)
        
        loss = F.binary_cross_entropy(y_hat.flatten(), y.flatten())
                
        return loss
    
    def validation_step(self, batch, batch_idx):
        
         
        X, y = batch
     
        y_hat = self.forward(X)
                
        loss = F.binary_cross_entropy(y_hat.flatten(), y.flatten())
                   
        threshold = 0.3
        y_hat[y_hat >= threshold] = 1
        y_hat[y_hat < threshold] = 0

        
        return {'val_loss': loss, "preds": y_hat, "targets": y}
    
    def validation_epoch_end(self, val_step_outputs):
        
        y_true = []
        y_pred = []
        
        loss = torch.mean(torch.stack([x['val_loss'] for x in val_step_outputs])).detach().cpu()
        
        for pred in val_step_outputs:
                
            y_hat = pred["preds"]
            y = pred["targets"]
                                
            y_true.extend(y.detach().cpu().numpy().tolist())
            y_pred.extend(y_hat.detach().cpu().numpy().tolist())
        
        f1 = f1_score(y_true, y_pred, average='macro')
        prec = precision_score(y_true, y_pred, average='macro')
        recall = recall_score(y_true, y_pred, average='macro')
        
        self.log('val_loss', loss, prog_bar=True)

        self.log("f1", f1, prog_bar=True)
        self.log("prec", prec, prog_bar=True)
        self.log("recall", recall, prog_bar=True)

        
    def configure_optimizers(self):
        
        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.l2)
        
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)
        
    
        return [optimizer], [scheduler]
    
    def train_dataloader(self):
        return train_loader
    
    def val_dataloader(self):
        return val_loader 

In [38]:
epochs = 200

in_channels = 29
dim = 40
layers = [32, 32] 
powers = 4
k = 8
p=0.2

model = Model(in_channels, dim, layers, powers, k, p)

# checkpoint_callback = ModelCheckpoint(
#     monitor='f1',
#     filename='MTGNN-{epoch:02d}-{f1:.4f}',
#     save_top_k=0,
#     mode='max',
# )

# early_stopping = EarlyStopping(monitor='val_loss', patience=10)

trainer = pl.Trainer(
    max_epochs=epochs,
    gpus=[0],
    num_sanity_val_steps=1,
    check_val_every_n_epoch=10,
#     gradient_clip_val=5
)

# training
trainer.fit(model)

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]

  | Name | Type       | Params
------------------------------------
0 | gnn  | GNN_MixHop | 11.3 K
------------------------------------
11.3 K    Trainable params
0         Non-trainable params
11.3 K    Total params


Validation sanity check: 0it [00:00, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x2acef100fc20>
Traceback (most recent call last):
  File "/home/intern/robertkim/.conda/envs/robert3/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 961, in __del__
    self._shutdown_workers()
  File "/home/intern/robertkim/.conda/envs/robert3/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 941, in _shutdown_workers
    w.join()
  File "/home/intern/robertkim/.conda/envs/robert3/lib/python3.7/multiprocessing/process.py", line 138, in join
    assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process


Epoch 9: 100%|██████████| 241/241 [00:08<00:00, 27.34it/s, loss=0.17, v_num=14, val_loss=0.879, f1=0, prec=0, recall=0]  
Epoch 9: 100%|██████████| 241/241 [00:25<00:00,  9.32it/s, loss=0.17, v_num=14, val_loss=0.355, f1=0.596, prec=0.634, recall=0.579]
Epoch 19: 100%|██████████| 241/241 [00:08<00:00, 27.77it/s, loss=0.158, v_num=14, val_loss=0.355, f1=0.596, prec=0.634, recall=0.579] 
Epoch 19: 100%|██████████| 241/241 [00:25<00:00,  9.42it/s, loss=0.158, v_num=14, val_loss=0.357, f1=0.605, prec=0.64, recall=0.588] 
Epoch 29: 100%|██████████| 241/241 [00:08<00:00, 28.13it/s, loss=0.157, v_num=14, val_loss=0.357, f1=0.605, prec=0.64, recall=0.588] 
Epoch 29: 100%|██████████| 241/241 [00:27<00:00,  8.86it/s, loss=0.157, v_num=14, val_loss=0.353, f1=0.605, prec=0.645, recall=0.586]
Epoch 39: 100%|██████████| 241/241 [00:08<00:00, 28.93it/s, loss=0.159, v_num=14, val_loss=0.353, f1=0.605, prec=0.645, recall=0.586] 
Epoch 39: 100%|██████████| 241/241 [00:25<00:00,  9.36it/s, loss=0.159, v_

1

In [None]:
# in_channels = 27
# dim = 40
# layers = [32, 32] 
# powers = 2
# k = 16
# clip = 5

# device = 'cuda:0'

# m = GNN_GCN(in_channels, dim, layers, powers, k)
# m.to(device)
        
# optimizer = torch.optim.Adam(m.parameters(), lr=1e-3, weight_decay=1e-4)


# epochs = 100

# for i in range(epochs):
    
#     m.train()
#     print('train epoch ', i+1)
#     for x, y in tqdm(train_loader):
        
#         x, y = x.to(device), y.to(device)
        
#         y_hat = m(x.to(device))
        
# #         print(y_hat)

#         optimizer.zero_grad()
#         loss = F.binary_cross_entropy(y_hat.flatten(), y.flatten())
#         loss.backward()
        
#         torch.nn.utils.clip_grad_norm_(m.parameters(), clip)
#         optimizer.step()
    
#     m.eval()
    
#     val_loss = []

#     preds = []
#     targets = []    
    
#     for x, y in val_loader:
        
#         x, y = x.to(device), y.to(device)
        
#         y_hat = m(x.to(device))
        
#         loss = F.binary_cross_entropy(y_hat.flatten(), y.flatten()).item()
        
#         val_loss.append(loss)
        
# #         print(y_hat.flatten())
#         threshold = 0.3
#         y_hat[y_hat >= threshold] = 1
#         y_hat[y_hat < threshold] = 0
                                        
#         targets.extend(y.detach().cpu().numpy().tolist())
#         preds.extend(y_hat.detach().cpu().numpy().tolist())
        
#     f1 = f1_score(targets,preds, average='macro')
    
#     val_loss = np.mean(val_loss)
# #     print(targets, preds)
#     print('val_loss ', val_loss)
#     print('f1 ', f1)
    
    
# # # for p in model.parameters():
# # #     print(p.grad.norm())

# # # y_hat