In [1]:
### this one to test the dgl 
import sys
sys.path.append("/home/n/nguyenpk/CS6208/GNN_ERC/baseline/DialogueGCN-mianzhang")

### Classifier

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import dgcn

log = dgcn.utils.get_logger()


class Classifier(nn.Module):
    def __init__(self, input_dim, hidden_size, tag_size, args):
        super(Classifier, self).__init__()
        self.emotion_att = MaskedEmotionAtt(input_dim)
        self.lin1 = nn.Linear(input_dim, hidden_size)
        self.drop = nn.Dropout(args.drop_rate)
        self.lin2 = nn.Linear(hidden_size, tag_size)
        if args.class_weight:
            self.loss_weights = torch.tensor([1 / 0.086747, 1 / 0.144406, 1 / 0.227883,
                                              1 / 0.160585, 1 / 0.127711, 1 / 0.252668]).to(args.device)
            self.nll_loss = nn.NLLLoss(self.loss_weights)
        else:
            self.nll_loss = nn.NLLLoss()

    def get_prob(self, h, text_len_tensor):
        # h_hat = self.emotion_att(h, text_len_tensor)
        # hidden = self.drop(F.relu(self.lin1(h_hat)))
        hidden = self.drop(F.relu(self.lin1(h)))
        scores = self.lin2(hidden)
        log_prob = F.log_softmax(scores, dim=-1)

        return log_prob

    def forward(self, h, text_len_tensor):
        log_prob = self.get_prob(h, text_len_tensor)
        y_hat = torch.argmax(log_prob, dim=-1)

        return y_hat

    def get_loss(self, h, label_tensor, text_len_tensor):
        log_prob = self.get_prob(h, text_len_tensor)
        loss = self.nll_loss(log_prob, label_tensor)

        return loss


class MaskedEmotionAtt(nn.Module):

    def __init__(self, input_dim):
        super(MaskedEmotionAtt, self).__init__()
        self.lin = nn.Linear(input_dim, input_dim)

    def forward(self, h, text_len_tensor):
        batch_size = text_len_tensor.size(0)
        x = self.lin(h)  # [node_num, H]
        ret = torch.zeros_like(h)
        s = 0
        for bi in range(batch_size):
            cur_len = text_len_tensor[bi].item()
            y = x[s: s + cur_len]
            z = h[s: s + cur_len]
            scores = torch.mm(z, y.t())  # [L, L]
            probs = F.softmax(scores, dim=1)
            out = z.unsqueeze(0) * probs.unsqueeze(-1)  # [1, L, H] x [L, L, 1] --> [L, L, H]
            out = torch.sum(out, dim=1)  # [L, H]
            ret[s: s + cur_len, :] = out
            s += cur_len

        return ret




  from .autonotebook import tqdm as notebook_tqdm


### EdgeAtt

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import dgcn

log = dgcn.utils.get_logger()


class EdgeAtt(nn.Module):

    def __init__(self, g_dim, args):
        super(EdgeAtt, self).__init__()
        self.device = args.device
        self.wp = args.wp
        self.wf = args.wf

        self.weight = nn.Parameter(torch.zeros((g_dim, g_dim)).float(), requires_grad=True)
        var = 2. / (self.weight.size(0) + self.weight.size(1))
        self.weight.data.normal_(0, var)

    def forward(self, node_features, text_len_tensor, edge_ind):
        batch_size, mx_len = node_features.size(0), node_features.size(1)
        alphas = []

        weight = self.weight.unsqueeze(0).unsqueeze(0)
        att_matrix = torch.matmul(weight, node_features.unsqueeze(-1)).squeeze(-1)  # [B, L, D_g]
        for i in range(batch_size):
            cur_len = text_len_tensor[i].item()
            alpha = torch.zeros((mx_len, 110)).to(self.device)
            for j in range(cur_len):
                s = j - self.wp if j - self.wp >= 0 else 0
                e = j + self.wf if j + self.wf <= cur_len - 1 else cur_len - 1
                tmp = att_matrix[i, s: e + 1, :]  # [L', D_g]
                feat = node_features[i, j]  # [D_g]
                score = torch.matmul(tmp, feat)
                probs = F.softmax(score)  # [L']
                alpha[j, s: e + 1] = probs
            alphas.append(alpha)

        return alphas

# class EdgeAtt(nn.Module):
#
#     def __init__(self, g_dim, args):
#         super(EdgeAtt, self).__init__()
#         self.device = args.device
#         self.wp = args.wp
#         self.wf = args.wf
#         self.lin = nn.Linear(g_dim, 110)
#
#     def forward(self, node_features, text_len_tensor, edge_ind):
#         h = self.lin(node_features)  # [B, L, mx]
#         alphas = F.softmax(h, dim=-1)
#         # alphas = torch.ones((node_features.size(0), node_features.size(1), 110))
#         return alphas


### GCN

In [4]:
### original

In [11]:
import torch
import torch.nn as nn
from dgl.nn.pytorch import RelGraphConv as RGCNConv
from dgl.nn.pytorch import GraphConv
import dgcn
log = dgcn.utils.get_logger()

class GCN(nn.Module):

    def __init__(self, g_dim, h1_dim, h2_dim, args):
        super(GCN, self).__init__()
        self.num_relations = 2 * args.n_speakers ** 2
        self.conv1 = RGCNConv(g_dim, h1_dim, self.num_relations, num_bases=30)
        # self.conv1 = myRGCNConv(g_dim, h1_dim, self.num_relations, num_bases=30)
        self.conv2 = GraphConv(h1_dim, h2_dim)

    def forward(self, node_features, edge_index, edge_norm, edge_type):

        # x = self.conv1(node_features, edge_index, edge_type)
        # x = self.conv2(x, edge_index, edge_weight=edge_norm)

        x = self.conv1(node_features, edge_index, edge_type, edge_norm=edge_norm)
        # log.info("x.shape = {}, {}".format(x.shape, edge_norm.view(-1, 1).shape) )
        x = self.conv2(x, edge_index)


        return x

### SeqContext


In [12]:
import torch
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence


class SeqContext(nn.Module):

    def __init__(self, u_dim, g_dim, args):
        super(SeqContext, self).__init__()
        self.input_size = u_dim
        self.hidden_dim = g_dim
        if args.rnn == "lstm":
            self.rnn = nn.LSTM(self.input_size, self.hidden_dim // 2, dropout=args.drop_rate,
                               bidirectional=True, num_layers=2, batch_first=True)
        elif args.rnn == "gru":
            self.rnn = nn.GRU(self.input_size, self.hidden_dim // 2, dropout=args.drop_rate,
                              bidirectional=True, num_layers=2, batch_first=True)

    def forward(self, text_len_tensor, text_tensor):
        packed = pack_padded_sequence(
            text_tensor,
            text_len_tensor.cpu(),
            batch_first=True,
            enforce_sorted=False
        )
        print(len(self.rnn(packed, None)))
        # rnn_out, (_, _) = self.rnn(packed, None)
        rnn_out,  _ = self.rnn(packed, None)
        rnn_out, _ = pad_packed_sequence(rnn_out, batch_first=True)

        return rnn_out


### function

In [13]:
import numpy as np
import torch

import dgcn

log = dgcn.utils.get_logger()


def batch_graphify(features, lengths, speaker_tensor, wp, wf, edge_type_to_idx, att_model, device):
    node_features, edge_index, edge_norm, edge_type = [], [], [], []
    batch_size = features.size(0)
    length_sum = 0
    edge_ind = []
    edge_index_lengths = []

    for j in range(batch_size):
        edge_ind.append(edge_perms(lengths[j].cpu().item(), wp, wf))

    edge_weights = att_model(features, lengths, edge_ind)

    for j in range(batch_size):
        cur_len = lengths[j].item()
        node_features.append(features[j, :cur_len, :])
        perms = edge_perms(cur_len, wp, wf)
        perms_rec = [(item[0] + length_sum, item[1] + length_sum) for item in perms]
        length_sum += cur_len
        edge_index_lengths.append(len(perms))

        for item, item_rec in zip(perms, perms_rec):
            edge_index.append(torch.tensor([item_rec[0], item_rec[1]]))
            edge_norm.append(edge_weights[j][item[0], item[1]])
            # edge_norm.append(edge_weights[j, item[0], item[1]])

            speaker1 = speaker_tensor[j, item[0]].item()
            speaker2 = speaker_tensor[j, item[1]].item()
            if item[0] < item[1]:
                c = '0'
            else:
                c = '1'
            edge_type.append(edge_type_to_idx[str(speaker1) + str(speaker2) + c])

    node_features = torch.cat(node_features, dim=0).to(device)  # [E, D_g]
    edge_index = torch.stack(edge_index).t().contiguous().to(device)  # [2, E]
    edge_norm = torch.stack(edge_norm).to(device)  # [E]
    edge_type = torch.tensor(edge_type).long().to(device)  # [E]
    edge_index_lengths = torch.tensor(edge_index_lengths).long().to(device)  # [B]

    return node_features, edge_index, edge_norm, edge_type, edge_index_lengths


def edge_perms(length, window_past, window_future):
    """
    Method to construct the edges of a graph (a utterance) considering the past and future window.
    return: list of tuples. tuple -> (vertice(int), neighbor(int))
    """

    all_perms = set()
    array = np.arange(length)
    for j in range(length):
        perms = set()

        if window_past == -1 and window_future == -1:
            eff_array = array
        elif window_past == -1:  # use all past context
            eff_array = array[:min(length, j + window_future + 1)]
        elif window_future == -1:  # use all future context
            eff_array = array[max(0, j - window_past):]
        else:
            eff_array = array[max(0, j - window_past):min(length, j + window_future + 1)]

        for item in eff_array:
            perms.add((j, item))
        all_perms = all_perms.union(perms)
    return list(all_perms)


### DialogueGCN

In [14]:
import torch
import torch.nn as nn

# from .SeqContext import SeqContext
# from .EdgeAtt import EdgeAtt
# from .GCN import GCN
# from .Classifier import Classifier
# from .functions import batch_graphify
import dgcn

log = dgcn.utils.get_logger()


class DialogueGCN(nn.Module):

    def __init__(self, args):
        super(DialogueGCN, self).__init__()
        u_dim = 100
        g_dim = 200
        h1_dim = 100
        h2_dim = 100
        hc_dim = 100
        tag_size = 6

        self.wp = args.wp
        self.wf = args.wf
        self.device = args.device

        self.rnn = SeqContext(u_dim, g_dim, args)
        self.edge_att = EdgeAtt(g_dim, args)
        self.gcn = GCN(g_dim, h1_dim, h2_dim, args)
        self.clf = Classifier(g_dim + h2_dim, hc_dim, tag_size, args)

        edge_type_to_idx = {}
        for j in range(args.n_speakers):
            for k in range(args.n_speakers):
                edge_type_to_idx[str(j) + str(k) + '0'] = len(edge_type_to_idx)
                edge_type_to_idx[str(j) + str(k) + '1'] = len(edge_type_to_idx)
        self.edge_type_to_idx = edge_type_to_idx
        log.debug(self.edge_type_to_idx)

    def get_rep(self, data):
        node_features = self.rnn(data["text_len_tensor"], data["text_tensor"]) # [batch_size, mx_len, D_g]
        features, edge_index, edge_norm, edge_type, edge_index_lengths = batch_graphify(
            node_features, data["text_len_tensor"], data["speaker_tensor"], self.wp, self.wf,
            self.edge_type_to_idx, self.edge_att, self.device)

        graph_out = self.gcn(features, edge_index, edge_norm, edge_type)

        return graph_out, features

    def forward(self, data):
        graph_out, features = self.get_rep(data)
        out = self.clf(torch.cat([features, graph_out], dim=-1), data["text_len_tensor"])

        return out

    def get_loss(self, data):
        graph_out, features = self.get_rep(data)
        loss = self.clf.get_loss(torch.cat([features, graph_out], dim=-1),
                                 data["label_tensor"], data["text_len_tensor"])

        return loss


## Read data and check training

In [15]:
## import necessary lib

In [16]:
import os
base_path = "/home/n/nguyenpk/CS6208/GNN_ERC/baseline/DialogueGCN-mianzhang"
data_path = os.path.join(base_path, "data/iemocap/ckpt/data.pkl")
batch_size = 32
device  = "cuda:0"
learning_rate = 0.0003
max_grad_value = -1
weight_decay = 1e-8
optimizer = "adam"

class Namespace:
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)

args = Namespace(batch_size=batch_size, 
                 device=device,
                 learning_rate=learning_rate,
                 max_grad_value=max_grad_value, 
                 weight_decay=weight_decay, 
                 optimizer=optimizer, 
                 from_begin=True,
                 epochs=1,
                 drop_rate=0.5,
                 wp=10,
                 wf=10,
                 n_speakers=2,
                 hidden_size=100,
                 rnn='gru',
                 class_weight=True,
                 seed=24,
                 
)
data = dgcn.utils.load_pkl(data_path)
trainset = dgcn.Dataset(data["train"], args.batch_size)
devset = dgcn.Dataset(data["dev"], args.batch_size)
testset = dgcn.Dataset(data["test"], args.batch_size)

model_file = "./save/model.pt"
model = DialogueGCN(args).to(device)
opt = dgcn.Optim(learning_rate, max_grad_value, weight_decay)
opt.set_parameters(model.parameters(), optimizer)

# coach = dgcn.Coach(trainset, devset, testset, model, opt, args)
# ret = coach.train()

#     # Save.
#     checkpoint = {
#         "best_dev_f1": ret[0],
#         "best_epoch": ret[1],
#         "best_state": ret[2],
#     }
# torch.save(checkpoint, model_file)

In [17]:
## -- test
idx = 0
data = trainset[idx]
for k, v in data.items():
    data[k] = v.to(args.device)

In [18]:
node_features = model.rnn(data["text_len_tensor"], data["text_tensor"]) # [batch_size, mx_len, D_g]
features, edge_index, edge_norm, edge_type, edge_index_lengths = batch_graphify(
    node_features, data["text_len_tensor"], data["speaker_tensor"], args.wp, args.wf,
    model.edge_type_to_idx, model.edge_att, args.device)

# graph_out = self.gcn(features, edge_index, edge_norm, edge_type)

2


  probs = F.softmax(score)  # [L']


In [40]:
import dgl
from dgl.nn import RelGraphConv
from dgl.nn.pytorch import GraphConv

g_dim = 200
h1_dim = 100
h2_dim = 100
hc_dim = 100
tag_size = 6


g = dgl.graph((edge_index[0], edge_index[1]))
g.norm = edge_norm
# g.etypes= edge_type
conv = RelGraphConv(g_dim, h1_dim, h2_dim, regularizer='basis', num_bases=30).cuda()
conv1 = GraphConv(h1_dim, h2_dim).cuda()
# res = conv(g, feat, etype)

res = conv(g, features, edge_type)
res2 = conv1(g, res)

In [42]:
g

Graph(num_nodes=1567, num_edges=29387,
      ndata_schemes={}
      edata_schemes={})

In [None]:
g_dim = 200
        h1_dim = 100
        h2_dim = 100
        hc_dim = 100
        tag_size = 6

        self.wp = args.wp
        self.wf = args.wf
        self.device = args.device

        self.rnn = SeqContext(u_dim, g_dim, args)
        self.edge_att = EdgeAtt(g_dim, args)
        self.gcn = GCN(g_dim, h1_dim, h2_dim, args)

In [None]:
import dgl
import numpy as np
import torch as th
from dgl.nn import RelGraphConv
>>>
g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3]))
feat = th.ones(6, 10)
conv = RelGraphConv(10, 2, 3, regularizer='basis', num_bases=2)
etype = th.tensor([0,1,2,0,1,2])
res = conv(g, feat, etype)


res
tensor([[ 0.3996, -2.3303],
        [-0.4323, -0.1440],
        [ 0.3996, -2.3303],
        [ 2.1046, -2.8654],
        [-0.4323, -0.1440],
        [-0.1309, -1.0000]], grad_fn=<AddBackward0>)

In [None]:
##
epoch_loss = 0
self.model.train()
for epoch in range(1, args.epochs)
    for idx in range(len(trainset):
        model.zero_grad()
        data = trainset[idx]
        for k, v in data.items():
            data[k] = v.to(args.device)
        nll = model.get_loss(data)
        epoch_loss += nll.item()
        nll.backward()
        self.opt.step()