In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import scipy.sparse as sp
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import f1_score
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

from tqdm import tqdm

In [3]:
torch.__version__

'1.4.0'

In [4]:
torch.cuda.get_device_name(1)

'Tesla V100-PCIE-32GB'

In [5]:
class GraphGenerator(nn.Module):
    def __init__(self, k, in_channels, dim, alpha=3.0):
        
        '''
        dim - dimension of the node embeddings 
        alpha - control saturation of the tanh
        
        produces nonsymmetric adjacency matrix
        
        '''
        
        super(GraphGenerator, self).__init__()
        
        
        self.lin1 = nn.Linear(in_channels,dim)


        self.k = k
        self.dim = dim
        self.alpha = alpha

    def forward(self, x):

        nodevec1 = x
        nodevec2 = nodevec1
        
#         print(nodevec1.shape)
        nodevec1 = torch.tanh(self.alpha*self.lin1(nodevec1))
        nodevec2 = torch.tanh(self.alpha*self.lin1(nodevec2))
        
        a = torch.mm(nodevec1, nodevec2.transpose(1,0))
        adj = F.relu(torch.tanh(self.alpha*a))
        
        mask = torch.zeros(x.size(0), x.size(0)).type_as(x)
        mask.fill_(float('0'))
        
        s1,t1 = adj.topk(self.k,1)
        
        mask.scatter_(1,t1,s1.fill_(1))
        
        adj = adj*mask
        
        adj = self.normalize(adj)
        
        return adj
    
    def normalize(self, adj):
        
        adj = adj + torch.eye(adj.size(0)).type_as(adj)
        d = adj.sum(1)
        dv = d
        a = adj / dv.view(-1, 1)
        
        return a
k = 2
dim = 2048
alpha = 3
static_feat = torch.rand(4,27)
gu = GraphGenerator(k, 27, dim)

x = torch.randint(0,10000, (4,1))

adj = gu(static_feat)
adj
# y - torch.transpose(y,0,1)

tensor([[0.3333, 0.0000, 0.3333, 0.3333],
        [0.0000, 0.3333, 0.3333, 0.3333],
        [0.0000, 0.0000, 0.6667, 0.3333],
        [0.0000, 0.0000, 0.3333, 0.6667]], grad_fn=<DivBackward0>)

In [6]:
class MixHop(torch.nn.Module):

    def __init__(self, in_channels, out_channels, powers):
        super(MixHop, self).__init__()
        
        self.w_list = nn.ModuleList()
        
        for i in range(powers):
            
            lin = nn.Linear(in_channels, out_channels)
            self.w_list.append(lin)
            
            
#     def init_parameters(self):
#         """
#         Initializing weights.
#         """
#         torch.nn.init.xavier_uniform_(self.weight_matrix)
#         torch.nn.init.xavier_uniform_(self.bias)

    def forward(self, norm_adj, x):
        
        adj_power = torch.eye(norm_adj.size(0)).type_as(x)
        
        X = []
        
        for lin in self.w_list:
            
            prod = F.relu(torch.mm(adj_power, lin(x)))

            X.append(prod)
            
            adj_power = torch.mm(adj_power, norm_adj)
        
        x = torch.stack(X, dim=1)

        x = torch.sum(x, dim=1)
        
        x = F.dropout(x, p=0.3, training=self.training)
        return x


In [7]:
class GCN(torch.nn.Module):

    def __init__(self, in_channels, out_channels):
        super(GCN, self).__init__()
        
        self.W = nn.Linear(in_channels, out_channels)

    def forward(self, norm_adj, x):
            
        x = F.relu(torch.mm(norm_adj, self.W(x)))
        
        return x


In [8]:
class GNN_MixHop(nn.Module):
    
    def __init__(self, in_channels, dim,  layers, powers, k):
        super(GNN_MixHop, self).__init__()
        
        self.mh_layers = nn.ModuleList()
        
        self.graph_gen = GraphGenerator(k, in_channels, dim)
        
        in_channels = in_channels
        
        for l in layers:
            mh = MixHop(in_channels, l, powers)
            in_channels = l
            
            self.mh_layers.append(mh)
                
        self.linear = nn.Linear(layers[len(layers) - 1],1)
        
        
    def forward(self, x):
        
        adj = self.graph_gen(x)

        for mh in self.mh_layers:
            x = mh(adj, x)
            x = F.dropout(x, p=0.3, training=self.training)

        
        x = self.linear(x)
#         print(x)
        x = torch.sigmoid(x)
        
        return x
    

In [9]:
class GNN_GCN(nn.Module):
    
    def __init__(self, in_channels, dim,  layers, powers, k):
        super(GNN_GCN, self).__init__()
        
        self.mh_layers = nn.ModuleList()
        
        self.graph_gen = GraphGenerator(k, in_channels, dim)
        
        in_channels = in_channels
        
        for l in layers:
            gcn = GCN(in_channels, l)
            in_channels = l
            
            self.mh_layers.append(gcn)
                
        self.linear = nn.Linear(layers[len(layers) - 1],1)
        
        
    def forward(self, x):
        
        adj = self.graph_gen(x)

        for gcn in self.mh_layers:
            
            x = gcn(adj, x)
            x = F.dropout(x, p=0.3, training=self.training)

        
        x = self.linear(x)
#         print(x)
        x = torch.sigmoid(x)
        
        return x
    

In [17]:
import pickle
# load preprocessed data
with open("Dual-Attentive-Tree-aware-Embedding/processed_data.pickle","rb") as f :
    processed_data = pickle.load(f)
print(processed_data.keys())

xgb_trainx = processed_data["xgboost_data"]["train_x"]
xgb_trainy = processed_data["xgboost_data"]["train_y"]
xgb_validx = processed_data["xgboost_data"]["valid_x"]
xgb_validy = processed_data["xgboost_data"]["valid_y"]
xgb_testx = processed_data["xgboost_data"]["test_x"]
xgb_testy = processed_data["xgboost_data"]["test_y"]

dict_keys(['raw', 'xgboost_data', 'revenue'])


In [18]:
def normalize(x, i):
    x_cut = x[:,:i]
    x_cut = (x_cut - x_cut.mean(0)) / x_cut.std(0)
    
    x[:, :i] = x_cut
    
    return x

xgb_trainx = normalize(xgb_trainx, 15)
xgb_validx = normalize(xgb_validx, 15)

In [19]:
xgb_validx[100:110]

array([[-0.23068742,  0.0450329 , -0.18982874, -0.3068576 , -0.15670664,
        -0.05921159, -0.09469758, -0.17137938,  0.43892181,  0.43892254,
         0.43861831,  0.44075298, -1.72084165, -1.62464043, -2.37415772,
         0.        ,  0.        ,  0.        ,  1.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ],
       [-0.24384909, -0.29701643, -0.18330379, -0.29769346, -0.1578157 ,
        -0.06284325, -0.21135304, -0.18821133,  0.43857269,  0.43857189,
         0.43861831,  0.44075298, -1.72084165, -1.62464043, -2.37415772,
         0.        ,  0.        ,  0.        ,  1.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ],
       [-0.043825  , -0.26290795, -0.17947537, -0.30031178, -0.15259673,
        -0.05833124, -0.22296814, -0.1872739 ,  0.43857698,  0.43857712,
         0.43861831,  0.44075298, -1.72084165, -1.6246

In [16]:
class MTGNNDataset(Dataset):

    def __init__(self, X, y):
         
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):

        x = self.X[idx,:]
        y = self.y[idx]

        return torch.FloatTensor(x), torch.FloatTensor([y])

batch_size = 256
num_workers = 32

train_dataset = MTGNNDataset(xgb_trainx, xgb_trainy)
train_loader = DataLoader(train_dataset, 
                          batch_size=batch_size, 
                          shuffle=True, 
                          num_workers=num_workers, 
                          pin_memory=True, 
                          drop_last=True)

val_dataset = MTGNNDataset(xgb_validx, xgb_validy)
val_loader = DataLoader(val_dataset, 
                          batch_size=batch_size, 
                          shuffle=False, 
                          num_workers=num_workers, 
                          pin_memory=True, 
                          drop_last=True)

In [17]:
class Model(pl.LightningModule):
    def __init__(self, in_channels, dim,  layers, powers, k):
        super(Model,self).__init__()
        
        self.gnn = GNN_MixHop(in_channels, dim, layers, powers, k)
        self.lr = 1e-3
        self.l2 = 1e-4
    
    def _weight_init(self):
        for p in self.parameters():
            if p.dim() > 1 and p.requires_grad:
                nn.init.kaiming_normal_(p)

    def forward(self, x):
        
        x = self.gnn(x)
        
        return x

    def training_step(self, batch, batch_idx):
      
        X, y = batch        
        y_hat = self.forward(X)
        
        loss = F.binary_cross_entropy(y_hat.flatten(), y.flatten())
                
        return loss
    
    def validation_step(self, batch, batch_idx):
        
         
        X, y = batch
     
        y_hat = self.forward(X)
                
        loss = F.binary_cross_entropy(y_hat.flatten(), y.flatten())
                   
        threshold = 0.4
        y_hat[y_hat >= threshold] = 1
        y_hat[y_hat < threshold] = 0

        
        return {'val_loss': loss, "preds": y_hat, "targets": y}
    
    def validation_epoch_end(self, val_step_outputs):
        
        y_true = []
        y_pred = []
        
        loss = torch.mean(torch.stack([x['val_loss'] for x in val_step_outputs])).detach().cpu()
        
        for pred in val_step_outputs:
                
            y_hat = pred["preds"]
            y = pred["targets"]
                                
            y_true.extend(y.detach().cpu().numpy().tolist())
            y_pred.extend(y_hat.detach().cpu().numpy().tolist())
        
        f1 = f1_score(y_true, y_pred, average='macro')
        
        self.log('val_loss', loss, prog_bar=True)
        self.log("f1", f1, prog_bar=True)

        
    def configure_optimizers(self):
        
#         optimizer = RangerLars(self.parameters(), lr=self.lr, weight_decay=self.l2)
#         scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer,gamma=0.99)
#         return [optimizer], [scheduler]

        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.l2)
    
        return optimizer
    
    def train_dataloader(self):
        return train_loader
    
    def val_dataloader(self):
        return val_loader 

In [18]:
epochs = 200

in_channels = 27
dim = 40
layers = [32, 32] 
powers = 3
k = 3

model = Model(in_channels, dim, layers, powers, k)

checkpoint_callback = ModelCheckpoint(
    monitor='f1',
    filename='MTGNN-{epoch:02d}-{f1:.4f}',
    save_top_k=0,
    mode='max',
)

early_stopping = EarlyStopping(monitor='val_loss', patience=10)

trainer = pl.Trainer(
    max_epochs=epochs,
    gpus=[0],
    num_sanity_val_steps=1,
    check_val_every_n_epoch=10,
#     gradient_clip_val=5
)

# training
trainer.fit(model)

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]

  | Name | Type       | Params
------------------------------------
0 | gnn  | GNN_MixHop | 7.0 K 
------------------------------------
7.0 K     Trainable params
0         Non-trainable params
7.0 K     Total params


Epoch 9: 100%|██████████| 234/234 [00:06<00:00, 37.30it/s, loss=0.12, v_num=57, val_loss=0.494, f1=0.487] 
Epoch 9: 100%|██████████| 234/234 [00:08<00:00, 27.51it/s, loss=0.12, v_num=57, val_loss=0.303, f1=0.503]
Epoch 19: 100%|██████████| 234/234 [00:06<00:00, 37.69it/s, loss=0.133, v_num=57, val_loss=0.303, f1=0.503]
Epoch 19: 100%|██████████| 234/234 [00:08<00:00, 28.52it/s, loss=0.133, v_num=57, val_loss=0.296, f1=0.505]
Epoch 29: 100%|██████████| 234/234 [00:06<00:00, 37.82it/s, loss=0.109, v_num=57, val_loss=0.296, f1=0.505] 
Epoch 29: 100%|██████████| 234/234 [00:08<00:00, 28.34it/s, loss=0.109, v_num=57, val_loss=0.306, f1=0.506]
Epoch 32:  59%|█████▉    | 139/234 [00:04<00:02, 33.21it/s, loss=0.105, v_num=57, val_loss=0.306, f1=0.506]


1

In [15]:
# in_channels = 27
# dim = 40
# layers = [32, 32] 
# powers = 2
# k = 16
# clip = 5

# device = 'cuda:0'

# m = GNN_GCN(in_channels, dim, layers, powers, k)
# m.to(device)
        
# optimizer = torch.optim.Adam(m.parameters(), lr=1e-3, weight_decay=1e-4)


# epochs = 100

# for i in range(epochs):
    
#     m.train()
#     print('train epoch ', i+1)
#     for x, y in tqdm(train_loader):
        
#         x, y = x.to(device), y.to(device)
        
#         y_hat = m(x.to(device))
        
# #         print(y_hat)

#         optimizer.zero_grad()
#         loss = F.binary_cross_entropy(y_hat.flatten(), y.flatten())
#         loss.backward()
        
#         torch.nn.utils.clip_grad_norm_(m.parameters(), clip)
#         optimizer.step()
    
#     m.eval()
    
#     val_loss = []

#     preds = []
#     targets = []    
    
#     for x, y in val_loader:
        
#         x, y = x.to(device), y.to(device)
        
#         y_hat = m(x.to(device))
        
#         loss = F.binary_cross_entropy(y_hat.flatten(), y.flatten()).item()
        
#         val_loss.append(loss)
        
# #         print(y_hat.flatten())
#         threshold = 0.3
#         y_hat[y_hat >= threshold] = 1
#         y_hat[y_hat < threshold] = 0
                                        
#         targets.extend(y.detach().cpu().numpy().tolist())
#         preds.extend(y_hat.detach().cpu().numpy().tolist())
        
#     f1 = f1_score(targets,preds, average='macro')
    
#     val_loss = np.mean(val_loss)
# #     print(targets, preds)
#     print('val_loss ', val_loss)
#     print('f1 ', f1)
    
    
# # # for p in model.parameters():
# # #     print(p.grad.norm())

# # # y_hat