In [1]:
import dgl.data
import torch
import torch.nn as nn
import torch.nn.functional as F
import pickle

In [2]:
import torch
import random
import numpy as np

torch.manual_seed(0)
random.seed(0)
np.random.seed(0)

# data

In [3]:
graphs_list = dgl.load_graphs('./res_data/ad_graphs.dgl')[0]

In [4]:
f_ad_index = open('./res_data/ad_index','rb')
ad_index = pickle.load(f_ad_index)

f_index_ad = open('./res_data/index_ad','rb')
index_ad = pickle.load(f_index_ad)

f_ds_ad_label = open('./res_data/ds_ad_label','rb')
ds_ad_label = pickle.load(f_ds_ad_label)

# model

In [5]:
from dgl.nn import GraphConv
from dgl.nn import SAGEConv

In [6]:
class GCN(nn.Module):
    def __init__(self, in_feats, h_feats, num_classes):
        super(GCN ,self).__init__()
        #self.conv1 = GraphConv(in_feats, h_feats)
        #self.conv2 = GraphConv(h_feats, num_classes)
        self.conv1 = SAGEConv(in_feats,h_feats,aggregator_type='mean')
        self.conv2 = SAGEConv(h_feats, num_classes,aggregator_type='mean')
    def forward(self, g, in_feat):
        h = self.conv1(g, in_feat)
        h = F.relu(h)
        h = self.conv2(g, h)
        return h

In [7]:
class embedGCN(nn.Module):
    def __init__(self, in_feats, h_feats, num_classes):
        super(embedGCN ,self).__init__()
        self.node_embed = nn.Embedding(in_feats, h_feats)
        self.gcn = GCN(h_feats, h_feats, num_classes)
        
    def forward(self, g):
        new_feat = self.node_embed(torch.arange(start=0,end=g.num_nodes(),dtype=torch.int64))
        #print(new_feat.shape)
        #print(new_feat[0])
        h = self.gcn(g,new_feat)
        return h


In [8]:
class seqEmbedGCN(nn.Module):
    def __init__(self, in_feats, h_feats, num_classes):
        super(seqEmbedGCN ,self).__init__()
        self.embedGCN = embedGCN(in_feats, h_feats, h_feats)
        self.rnn = nn.LSTM(h_feats,h_feats,num_layers=2,batch_first=False)
        self.outlayer = nn.Linear(h_feats,num_classes)
        
    def forward(self, graphs,debug=False):
        ##graphs: 
        seq_input = torch.stack([self.embedGCN(g) for g in graphs],0)
        #seq_input = torch.randn(5, 2708, 16)
        if debug:
            print('input',seq_input.shape)

        output, (hn, cn) = self.rnn(seq_input)
        if debug:
            print('o',output.shape)
            print('h',hn.shape)
            print('c',cn.shape)
        output = self.outlayer(output)
        if debug:
            print('last_out',output.shape)
        return output
    
    def getGCNout(self,graphs,debug=False):
        return self.embedGCN(g)

In [9]:
model = seqEmbedGCN(graphs_list[0].num_nodes(), 16, 1)
g = graphs_list[0]

In [10]:
out = model([g,g],True)

input torch.Size([2, 368, 16])
o torch.Size([2, 368, 16])
h torch.Size([2, 368, 16])
c torch.Size([2, 368, 16])
last_out torch.Size([2, 368, 1])


In [11]:
out.shape

torch.Size([2, 368, 1])

# train

In [29]:
def seq_train(graphs_list, model):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    best_val_acc = 0
    best_test_acc = 0
    loss_fn = torch.nn.MSELoss()

    for e in range(500):
        beg_pos = 0
        seq_len = 2

        for i in range(seq_len,len(graphs_list)+seq_len,seq_len):
            #print(i,graphs_list[beg_pos:i])
            input_graph = graphs_list[beg_pos:i]
            #print(input_graph)
            #print('embed',model.embedGCN.node_embed.weight[0])
            # Forward
            logits = model(input_graph)

            pred = logits.argmax(1)
            label = torch.stack([g.ndata['y'] for g in input_graph],0)
            # Compute loss
            # Note that you should only compute the losses of the nodes in the training set.
            
            loss =loss_fn(logits,label)


            # Backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if e % 100 == 0:
                print("epoch:%d; step:%d :loss:%f"%(e,i,loss))
                #print(model.embedGCN.node_embed.weight)
            beg_pos = i

In [30]:
g = graphs_list[0]
model = seqEmbedGCN(g.num_nodes(), 8, 1)
seq_train(graphs_list, model)

epoch:0; step:2 :loss:0.164750
epoch:0; step:4 :loss:0.148153
epoch:0; step:6 :loss:0.122323
epoch:0; step:8 :loss:0.103119
epoch:100; step:2 :loss:0.000243
epoch:100; step:4 :loss:0.000211
epoch:100; step:6 :loss:0.000163
epoch:100; step:8 :loss:0.000261
epoch:200; step:2 :loss:0.000216
epoch:200; step:4 :loss:0.000185
epoch:200; step:6 :loss:0.000132
epoch:200; step:8 :loss:0.000226
epoch:300; step:2 :loss:0.000203
epoch:300; step:4 :loss:0.000170
epoch:300; step:6 :loss:0.000121
epoch:300; step:8 :loss:0.000202
epoch:400; step:2 :loss:0.000187
epoch:400; step:4 :loss:0.000155
epoch:400; step:6 :loss:0.000111
epoch:400; step:8 :loss:0.000173


In [14]:
##
print(model.embedGCN.node_embed.weight)

Parameter containing:
tensor([[-0.5746, -0.0588, -3.6979,  ...,  1.0839,  0.5945,  2.7897],
        [ 0.8305, -0.7315,  0.8775,  ...,  1.1419, -0.6689, -1.0674],
        [-2.1685, -1.4817, -0.0759,  ...,  0.3293,  1.7991,  1.9383],
        ...,
        [-0.1571, -0.8844, -0.4559,  ...,  0.4278,  1.4550, -0.3525],
        [ 2.2011, -0.8131,  1.0844,  ..., -0.8853, -0.2358, -1.3186],
        [ 1.8563,  0.0440, -0.5163,  ...,  0.1553,  0.4654, -2.2332]],
       requires_grad=True)


In [15]:
model.embedGCN.node_embed(torch.tensor(0))

tensor([-0.5746, -0.0588, -3.6979, -1.4440, -0.6106,  1.0839,  0.5945,  2.7897],
       grad_fn=<EmbeddingBackward0>)

In [16]:
index_ad[0]

'130200'

In [17]:
model.embedGCN.node_embed(torch.tensor(ad_index['110000']))

tensor([ 0.0918,  1.9054, -0.0982, -0.3155, -0.6081,  0.4646, -1.0662, -0.6121],
       grad_fn=<EmbeddingBackward0>)

In [18]:
a = model.embedGCN.node_embed(torch.tensor(0))

In [19]:
b = model.embedGCN.node_embed(torch.tensor(ad_index['110000']))

In [20]:
torch.nn.functional.cosine_similarity(a,b,dim=0)

tensor(-0.0662, grad_fn=<SumBackward1>)

# eval

In [21]:
def get_cosvalue(ad_a,ad_b):
    a = model.embedGCN.node_embed(torch.tensor(ad_index[ad_a]))
    b = model.embedGCN.node_embed(torch.tensor(ad_index[ad_b]))
    return torch.nn.functional.cosine_similarity(a,b,dim=0)

In [22]:
get_cosvalue('110000','130200')

tensor(-0.0662, grad_fn=<SumBackward1>)

# save

In [35]:
import datetime 

In [40]:
dt = datetime.datetime.now()
timestamp = "%s-%s-%s-%s-%s"%(dt.year,dt.month,dt.day,dt.hour,dt.minute)
ckPath = './model_res/%s'%(timestamp)  

In [41]:

torch.save(model.state_dict(), ckPath)

In [43]:
model.getGCNout(graphs_list[0]).shape

torch.Size([368, 8])