In [9]:
import unicodedata
import string
import re
import random
import time
import math
import os
import torch
import torch.nn as nn

import torch.nn.functional as F
import numpy as np

from stanfordcorenlp import StanfordCoreNLP
from torch.autograd import Variable
from torch import optim

In [587]:
nlp = StanfordCoreNLP(r'../stanford-corenlp-full-2017-06-09')

sentence = 'which you step on to activate it'
de = nlp.dependency_parse(sentence)

In [23]:
print ('Dependency Parsing:', de)

Dependency Parsing: [('ROOT', 0, 3), ('dobj', 3, 1), ('nsubj', 3, 2), ('mark', 6, 4), ('mark', 6, 5), ('advcl', 3, 6), ('dobj', 6, 7)]


In [375]:
dep_sentences = []
dep_sentences.append(de)
dep_sentences.append(de)
dep_sentences.append(de)
dep_sentences.append(de)

# Initialize Variables

In [606]:
_DEP_LABELS = ['ROOT', 'DOBJ','ADV', 'ADV-GAP', 'AMOD', 'APPO', 'BNF', 'CONJ', 'COORD', 'DEP',
               'DEP-GAP', 'DIR', 'DIR-GAP', 'DIR-OPRD', 'DIR-PRD', 'DTV', 'EXT',
               'EXT-GAP', 'EXTR', 'GAP-LGS', 'GAP-LOC', 'GAP-LOC-PRD', 'GAP-MNR',
               'GAP-NMOD', 'GAP-OBJ', 'GAP-OPRD', 'GAP-PMOD', 'GAP-PRD', 'GAP-PRP',
               'GAP-SBJ', 'GAP-TMP', 'GAP-VC', 'HMOD', 'HYPH', 'IM', 'LGS', 'LOC',
               'LOC-OPRD', 'LOC-PRD', 'LOC-TMP', 'MNR', 'MNR-PRD', 'MNR-TMP', 'NAME',
               'NMOD', 'NSUBJ','OBJ', 'OPRD', 'P', 'PMOD', 'POSTHON', 'PRD', 'PRD-PRP',
               'PRD-TMP', 'PRN', 'PRP', 'PRT', 'PUT', 'SBJ', 'SUB', 'SUFFIX',
                'TITLE', 'TMP', 'VC', 'VOC']


_DEP_LABELS_DICT = {label:ix for ix, label in enumerate(_DEP_LABELS)}

In [605]:
len(_DEP_LABELS)

65

In [596]:
#Settings batch variables

SEQ_LEN = len(sentence)
BATCH_SIZE = len(dep_sentences)

In [597]:
#Initialize adjancencies matrixes

adj_arc_in = np.zeros((BATCH_SIZE* SEQ_LEN, 2), dtype='int32')
adj_lab_in = np.zeros((BATCH_SIZE* SEQ_LEN), dtype='int32')

adj_arc_out = np.zeros((BATCH_SIZE * SEQ_LEN, 2), dtype='int32')
adj_lab_out = np.zeros((BATCH_SIZE * SEQ_LEN), dtype='int32')

#Initialize mask matrix

mask_in = np.zeros((BATCH_SIZE * SEQ_LEN), dtype='float32')
mask_out = np.zeros((BATCH_SIZE * SEQ_LEN), dtype='float32')

mask_loop = np.ones((BATCH_SIZE * SEQ_LEN, 1), dtype='float32')

#Get adjacency matrix for incoming and outgoing arcs
for idx_sentence, dep_sentence in enumerate(dep_sentences):
    for idx_arc, arc in enumerate(dep_sentence):
        if(arc[0] != 'ROOT') and arc[0].upper() in _DEP_LABELS:
            #get index of words in the sentence
            arc_1 = int(arc[1]) - 1
            arc_2 = int(arc[2]) - 1

            idx = (idx_arc) + idx_sentence * BATCH_LEN
            
            #Make adjacency matrix for incoming arcs
            adj_arc_in[idx] = np.array([idx_sentence, arc_2]) 
            adj_lab_in[idx] = np.array([_DEP_LABELS_DICT[arc[0].upper()]]) 
            
            #Setting mask to consider that index
            mask_in[idx] = 1

            #Make adjacency matrix for outgoing arcs
            adj_arc_out[idx] = np.array([idx_sentence, arc_1])   
            adj_lab_out[idx] = np.array([_DEP_LABELS_DICT[arc[0].upper()]])
            
            #Setting mask to consider that index
            mask_out[idx] = 1

adj_arc_in = torch.LongTensor(np.transpose(adj_arc_in)) 
adj_arc_out = torch.LongTensor(np.transpose(adj_arc_out))

adj_lab_in = Variable(torch.LongTensor(adj_lab_in))
adj_lab_out = Variable(torch.LongTensor(adj_lab_out))

mask_in = Variable(torch.FloatTensor(mask_in.reshape((BATCH_SIZE * SEQ_LEN, 1))))
mask_out = Variable(torch.FloatTensor(mask_out.reshape((BATCH_SIZE * SEQ_LEN, 1))))
mask_loop = Variable(torch.FloatTensor(mask_loop))

# Syntactic GCN

In [602]:
class SintacticGCN(nn.Module):
    def __init__(self, num_inputs, num_units,
                 num_labels,
                 dropout = 0.,
                 in_arcs = True,
                 out_arcs = True,
                 batch_first = False):       
        super(SintacticGCN, self).__init__()      

        self.in_arcs = in_arcs
        self.out_arcs = out_arcs
        
        self.retain = 1. - dropout
        self.num_inputs = num_inputs
        self.num_units = num_units
        self.num_labels = num_labels
        self.batch_first = batch_first
        
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        
        if in_arcs:
            self.V_in = Variable(torch.FloatTensor(self.num_inputs, self.num_units))
            nn.init.xavier_normal(self.V_in)
            
            self.b_in = Variable(torch.FloatTensor(num_labels, self.num_units))
            nn.init.constant(self.b_in, 0)
            
            self.V_in_gate = Variable(torch.FloatTensor(self.num_inputs, 1))
            nn.init.uniform(self.V_in_gate)
            
            self.b_in_gate = Variable(torch.FloatTensor(num_labels, 1))
            nn.init.constant(self.b_in_gate, 1)

        if out_arcs:
            self.V_out = Variable(torch.FloatTensor(self.num_inputs, self.num_units))
            nn.init.xavier_normal(self.V_out)
            
            self.b_out = Variable(torch.FloatTensor(num_labels, self.num_units))
            nn.init.constant(self.b_in, 0)
            
            self.V_out_gate = Variable(torch.FloatTensor(self.num_inputs, 1))
            nn.init.uniform(self.V_out_gate)
            
            self.b_out_gate = Variable(torch.FloatTensor(num_labels, 1))
            nn.init.constant(self.b_out_gate, 1)
        
        self.W_self_loop = Variable(torch.FloatTensor(self.num_inputs, self.num_units))
        nn.init.xavier_normal(self.W_self_loop)        
        
        self.W_self_loop_gate = Variable(torch.FloatTensor(self.num_inputs, 1))
        nn.init.uniform(self.W_self_loop_gate)

    def forward(self, encoder_outputs,
                 arc_tensor_in, arc_tensor_out,
                 label_tensor_in, label_tensor_out,
                 mask_in, mask_out,  # batch* t, degree
                 mask_loop):

        if(not self.batch_first):
            encoder_outputs = encoder_outputs.permute(1, 0, 2).contiguous()
        
        batch_size, seq_len, _ = encoder_outputs.shape
        max_degree = 1
        input_ = encoder_outputs.view((batch_size * seq_len , self.num_inputs))  # [b* t, h]
        
        if self.in_arcs:
            input_in = torch.mm(input_, self.V_in)  # [b* t, h] * [h,h] = [b*t, h]
            second_in = self.b_in.index_select(0, label_tensor_in)  # [b* t* 1, h]
            in_ = (input_in + second_in).view((batch_size, seq_len, 1, self.num_units))

            # compute gate weights
            input_in_gate = torch.mm(input_, self.V_in_gate)  # [b* t, h] * [h,h] = [b*t, h]
            second_in_gate = self.b_in_gate.index_select(0, label_tensor_in)
            in_gate = (input_in_gate + second_in_gate).view((batch_size, seq_len, 1))

            max_degree += 1
            
        if self.out_arcs:           
            input_out = torch.mm(input_, self.V_out)  # [b* t, h] * [h,h] = [b* t, h]
            second_out = self.b_out.index_select(0, label_tensor_out)     
            
            degr = int(input_out.shape[0] / batch_size / seq_len)
            max_degree += degr

            out_ = (input_out + second_out).view((batch_size, seq_len, degr, self.num_units))

            # compute gate weights
            input_out_gate = torch.mm(input_, self.V_out_gate)  # [b* t, h] * [h,h] = [b* t, h]
            second_out_gate = self.b_out_gate.index_select(0, label_tensor_out)
            out_gate = (input_out_gate + second_out_gate).view((batch_size, seq_len, degr))
       
        same_input = torch.mm(encoder_outputs.view(-1,encoder_outputs.size(2)), self.W_self_loop).\
                        view(encoder_outputs.size(0), encoder_outputs.size(1), -1)
        same_input = same_input.view(encoder_outputs.size(0), encoder_outputs.size(1), 1, self.W_self_loop.size(1))
        
        same_input_gate = torch.mm(encoder_outputs.view(-1, encoder_outputs.size(2)), self.W_self_loop_gate)\
                                .view(encoder_outputs.size(0), encoder_outputs.size(1), -1)

        if self.in_arcs and self.out_arcs:
            potentials = torch.cat((in_, out_, same_input), dim=2)  # [b, t,  mxdeg, h]         
            potentials_gate = torch.cat((in_gate, out_gate, same_input_gate), dim=2)  # [b, t,  mxdeg, h]
            mask_soft = torch.cat((mask_in, mask_out, mask_loop), dim=1)  # [b* t, mxdeg]
        elif self.out_arcs:
            potentials = torch.cat((out_, same_input), dim=2)  # [b, t,  2*mxdeg+1, h]
            potentials_gate = torch.cat((out_gate, same_input_gate), dim=2)  # [b, t,  mxdeg, h]
            mask_soft = torch.cat((mask_out, mask_loop), dim=1)  # [b* t, mxdeg]
        elif self.in_arcs:
            potentials = torch.cat((in_, same_input), dim=2)  # [b, t,  2*mxdeg+1, h]
            potentials_gate = torch.cat((in_gate, same_input_gate), dim=2)  # [b, t,  mxdeg, h]
            mask_soft = torch.cat((mask_in, mask_loop), dim=1)  # [b* t, mxdeg]

        potentials_ = potentials.permute(3, 0, 1, 2).contiguous()  # [h, b, t, mxdeg]
        potentials_resh = potentials_.view((self.num_units,
                                               batch_size * seq_len,
                                               max_degree))  # [h, b * t, mxdeg]

        potentials_r = potentials_gate.view((batch_size * seq_len,
                                                  max_degree))  # [h, b * t, mxdeg]
        # calculate the gate
        probs_det_ = self.sigmoid(potentials_r) * mask_soft  # [b * t, mxdeg]
        potentials_masked = potentials_resh * mask_soft * probs_det_  # [h, b * t, mxdeg]

        
        #if self.retain == 1 or deterministic:
        #    pass
        #else:
        #    drop_mask = self._srng.binomial(potentials_resh.shape[1:], p=self.retain, dtype=input.dtype)
        #    potentials_masked /= self.retain
        #    potentials_masked *= drop_mask

        potentials_masked_ = potentials_masked.sum(dim=2)  # [h, b * t]
        potentials_masked_ = self.relu(potentials_masked_)

        result_ = potentials_masked_.permute(1, 0).contiguous()   # [b * t, h]
        result_ = result_.view((batch_size, seq_len, self.num_units))  # [ b, t, h]

        return result_

# Testing

In [603]:
embedding = nn.Embedding(10, 10)

batch = Variable(torch.ones(SEQ_LEN, BATCH_SIZE)).type(torch.LongTensor)
embed = embedding(batch)

print(embed.size())

torch.Size([32, 4, 10])


In [604]:
gcn = SintacticGCN(10, 5, 65)
gcn(embed,
                 adj_arc_in, adj_arc_out,
                 adj_lab_in, adj_lab_out,
                 mask_in, mask_out,  
                 mask_loop)

Variable containing:
(0 ,.,.) = 
  0.0257  0.0220  0.1397  0.0000  0.0000
  0.1744  0.6544  0.0000  0.0000  0.0000
  0.1744  0.6544  0.0000  0.0000  0.0000
  0.0257  0.0220  0.1397  0.0000  0.0000
  0.0257  0.0220  0.1397  0.0000  0.0000
  0.0257  0.0220  0.1397  0.0000  0.0000
  0.1744  0.6544  0.0000  0.0000  0.0000
  0.0257  0.0220  0.1397  0.0000  0.0000
  0.0257  0.0220  0.1397  0.0000  0.0000
  0.0257  0.0220  0.1397  0.0000  0.0000
  0.0257  0.0220  0.1397  0.0000  0.0000
  0.0257  0.0220  0.1397  0.0000  0.0000
  0.0257  0.0220  0.1397  0.0000  0.0000
  0.0257  0.0220  0.1397  0.0000  0.0000
  0.0257  0.0220  0.1397  0.0000  0.0000
  0.0257  0.0220  0.1397  0.0000  0.0000
  0.1744  0.6544  0.0000  0.0000  0.0000
  0.1744  0.6544  0.0000  0.0000  0.0000
  0.0257  0.0220  0.1397  0.0000  0.0000
  0.0257  0.0220  0.1397  0.0000  0.0000
  0.0257  0.0220  0.1397  0.0000  0.0000
  0.1744  0.6544  0.0000  0.0000  0.0000
  0.0257  0.0220  0.1397  0.0000  0.0000
  0.0257  0.0220  0.1397