In [1]:
import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
from tree import *
from torch.nn.utils.rnn import pad_sequence
import argparse
import codecs
import json
import linecache
import logging
import os
import pickle
import random
import sys
from collections import Counter, defaultdict
from copy import copy, deepcopy

import nltk
import numpy as np
import simplejson as json
import torch
from allennlp.modules.elmo import batch_to_ids
from lxml import etree
from nltk import word_tokenize
from nltk.tokenize import TreebankWordTokenizer
from torch.utils.data import DataLoader, Dataset


import warnings
warnings.filterwarnings('always')

In [2]:
def set_seed(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

In [3]:

def read_sentence_depparsed(file_path):
    with open(file_path, 'r') as f:
        data = json.load(f)
        return data


def get_dataset(dataset_name):
    rest_train = 'data/semeval14/Restaurants_Train_v2_biaffine_depparsed_with_energy.json'
    rest_test = 'data/semeval14/Restaurants_Test_Gold_biaffine_depparsed_with_energy.json'

    laptop_train = 'data/semeval14/Laptop_Train_v2_biaffine_depparsed.json'
    laptop_test = 'data/semeval14/Laptops_Test_Gold_biaffine_depparsed.json'

    twitter_train = 'data/twitter/train_biaffine.json'
    twitter_test = 'data/twitter/test_biaffine.json'

    ds_train = {'rest': rest_train,
                'laptop': laptop_train, 
                'twitter': twitter_train}
    ds_test = {'rest': rest_test,
               'laptop': laptop_test, 
               'twitter': twitter_test}

    train = list(read_sentence_depparsed(ds_train[dataset_name]))
    test = list(read_sentence_depparsed(ds_test[dataset_name]))
    
    print("Read {} Train set: {}".format(dataset_name, len(train)))
    print("Read {} Test set: {}".format(dataset_name, len(test)))

    return train, test

In [4]:
def get_rolled_and_unrolled_data(input_data, args):
    # A hand-picked set of part of speech tags that we see contributes to ABSA.
    all_unrolled = []
    # Make sure the tree is successfully built.
    zero_dep_counter = 0

    # Sentiment counters
    total_counter = defaultdict(int)
    sentiments_lookup = {'negative': 0, 'positive': 1, 'neutral': 2}

    # for seeking 'but' examples
    for e in input_data:
        e['tokens'] = [x.lower() for x in e['tokens']]
        aspects = []
        sentiments = []
        froms = []
        tos = []
        dep_tags = []
        dep_index = []
        dep_dirs = []

        # Classify based on POS-tags
        pos_class = e['tags']

        # Iterate through aspects in a sentence and reshape the dependency tree.
        for i in range(len(e['aspect_sentiment'])):
            aspect = e['aspect_sentiment'][i][0].lower()
            # We would tokenize the aspect while at it.
            aspect = word_tokenize(aspect)
            sentiment = sentiments_lookup[e['aspect_sentiment'][i][1]]
            frm = e['from_to'][i][0]
            to = e['from_to'][i][1]

            aspects.append(aspect)
            sentiments.append(sentiment)
            froms.append(frm)
            tos.append(to)

            # Center on the aspect.
            dep_tag, dep_idx, dep_dir = reshape_dependency_tree(frm, to, e['dependencies'], tokens=e['tokens'], max_hop=args.max_hop)

            # Because of tokenizer differences, aspect opsitions are off, so we find the index and try again.
            if len(dep_tag) == 0:
                zero_dep_counter += 1
                as_sent = e['aspect_sentiment'][i][0].split()
                as_start = e['tokens'].index(as_sent[0])
                # print(e['tokens'], e['aspect_sentiment'], e['dependencies'],as_sent[0])
                as_end = e['tokens'].index(
                    as_sent[-1]) if len(as_sent) > 1 else as_start + 1
                print("Debugging: as_start as_end ", as_start, as_end)
                dep_tag, dep_idx, dep_dir = reshape_dependency_tree(as_start, as_end, e['dependencies'], tokens=e['tokens'], max_hop=args.max_hop)
                if len(dep_tag) == 0:  # for debugging
                    print("Debugging: zero_dep",
                          e['aspect_sentiment'][i][0], e['tokens'])
                    print("Debugging: ". e['dependencies'])
                else:
                    zero_dep_counter -= 1

            dep_tags.append(dep_tag)
            dep_index.append(dep_idx)
            dep_dirs.append(dep_dir)

            total_counter[e['aspect_sentiment'][i][1]] += 1

            # Unrolling
            all_unrolled.append(
                {'sentence': e['tokens'], 'tags': e['tags'], 'pos_class': pos_class, 'aspect': aspect, 'sentiment': sentiment,
                    'predicted_dependencies': e['predicted_dependencies'], 'predicted_heads': e['predicted_heads'],
                 'from': frm, 'to': to, 'dep_tag': dep_tag, 'dep_idx': dep_idx, 'dep_dir':dep_dir,'dependencies': e['dependencies']})

    return all_unrolled

In [5]:
def load_datasets_and_vocabs(args):
    train, test = get_dataset(args.dataset_name)
    # Our model takes unrolled data, currently we don't consider the MAMS cases(future experiments)
    train_all_unrolled = get_rolled_and_unrolled_data(train, args)
    test_all_unrolled = get_rolled_and_unrolled_data(test, args)
    print("Size of the train dataset: {}".format(len(train_all_unrolled)))
    print("Size of the test dataset: {}".format(len(test_all_unrolled)))

    # Build word vocabulary(part of speech, dep_tag) and save pickles.
    word_vecs, word_vocab, dep_tag_vocab, pos_tag_vocab = load_and_cache_vocabs(train_all_unrolled+test_all_unrolled, args)

    embedding = torch.from_numpy(np.asarray(word_vecs, dtype=np.float32))
    args.glove_embedding = embedding

    train_dataset = ASBA_Depparsed_Dataset(train_all_unrolled, args, word_vocab, dep_tag_vocab, pos_tag_vocab)
    test_dataset = ASBA_Depparsed_Dataset(test_all_unrolled, args, word_vocab, dep_tag_vocab, pos_tag_vocab)

    return train_dataset, test_dataset, word_vocab, dep_tag_vocab, pos_tag_vocab



def reshape_dependency_tree(as_start, as_end, dependencies, tokens=None, max_hop = 5):
    '''
    Adding multi hops
    This function is at the core of our algo, it reshape the dependency tree and center on the aspect.
    In open-sourced edition, I choose not to take energy(the soft prediction of dependency from parser)
    into consideration. For it requires tweaking allennlp's source code, and the energy is space-consuming.
    And there are no significant difference in performance between the soft and the hard(with non-connect) version.

    '''
    dep_tag = []
    dep_idx = []
    dep_dir = []
    # 1 hop

    for i in range(as_start, as_end):
        for dep in dependencies:
            if i == dep[1] - 1:
                # not root, not aspect
                if (dep[2] - 1 < as_start or dep[2] - 1 >= as_end) and dep[2] != 0 and dep[2] - 1 not in dep_idx:
                    if str(dep[0]) != 'punct':  # and tokens[dep[2] - 1] not in stopWords
                        dep_tag.append(dep[0])
                        dep_dir.append(1)
                    else:
                        dep_tag.append('<pad>')
                        dep_dir.append(0)
                    dep_idx.append(dep[2] - 1)
            elif i == dep[2] - 1:
                # not root, not aspect
                if (dep[1] - 1 < as_start or dep[1] - 1 >= as_end) and dep[1] != 0 and dep[1] - 1 not in dep_idx:
                    if str(dep[0]) != 'punct':  # and tokens[dep[1] - 1] not in stopWords
                        dep_tag.append(dep[0])
                        dep_dir.append(2)
                    else:
                        dep_tag.append('<pad>')
                        dep_dir.append(0)
                    dep_idx.append(dep[1] - 1)

    # add aspect and index, to make sure length matches len(tokens)
    for idx, token in enumerate(tokens):
        if idx not in dep_idx:
            dep_tag.append('<pad>')
            dep_dir.append(0)
            dep_idx.append(idx)

    index = [i[0] for i in sorted(enumerate(dep_idx), key=lambda x:x[1])]
    dep_tag = [dep_tag[i] for i in index]
    dep_idx = [dep_idx[i] for i in index]
    dep_dir = [dep_dir[i] for i in index]

    assert len(tokens) == len(dep_idx), 'length wrong'
    return dep_tag, dep_idx, dep_dir




import pickle

def load_and_cache_vocabs(data, args):
    '''
    Build vocabulary of words, part of speech tags, dependency tags and cache them.
    Load glove embedding if needed.'''
    
    if not True:
        with open("cached_word_vocab.pkl", 'rb') as f:
            word_vocab = pickle.load(f)
        
        with open("cached_word_vecs.pkl", 'rb') as f:
            word_vecs = pickle.load(f)
        
        with open("cached_dep_tag_vocab.pkl", 'rb') as f:
            dep_tag_vocab = pickle.load(f)
            
        with open("cached_pos_tag_vocab.pkl", 'rb') as f:
            pos_tag_vocab = pickle.load(f)
        #pos_tag_vocab = build_pos_tag_vocab(data, min_freq=0)
        
    
    else:
        
        # Build or load word vocab and glove embeddings.
        word_vocab = build_text_vocab(data)
        print('Word vocab size: {}'.format(word_vocab['len']))
        #with open("cached_word_vocab.pkl", 'wb') as f:
            #pickle.dump(word_vocab, f, -1)
        
        word_vecs = load_glove_embedding(word_vocab['itos'], args.glove_dir, 0.25, args.embedding_dim)
        #print('Word vecs size')
        #with open("cached_word_vecs.pkl", 'wb') as f:
            #pickle.dump(word_vecs, f, -1)
            
        # Build vocab of dependency tags
        dep_tag_vocab = build_dep_tag_vocab(data, min_freq=0)
        #print('dep_tag_vocab size: {}'.format(dep_tag_vocab['len']))
        #with open("cached_dep_tag_vocab.pkl", 'wb') as f:
            #pickle.dump(dep_tag_vocab, f, -1)
            
        # Build vocab of part of speech tags.
        pos_tag_vocab = build_pos_tag_vocab(data, min_freq=0)
        #print('pos_tag_vocab size: {}'.format(pos_tag_vocab['len']))
        #with open("cached_pos_tag_vocab.pkl", 'wb') as f:
            #pickle.dump(pos_tag_vocab, f, -1)

    return word_vecs, word_vocab, dep_tag_vocab, pos_tag_vocab


def load_glove_embedding(word_list, glove_dir, uniform_scale, dimension_size):
    glove_words = []
    with open(os.path.join(glove_dir, 'glove.840B.300d.txt'), 'r', encoding="utf8") as fopen:
        for line in fopen:
            glove_words.append(line.strip().split(' ')[0])
    word2offset = {w: i for i, w in enumerate(glove_words)}
    word_vectors = []
    for word in word_list:
        if word in word2offset:
            line = linecache.getline(os.path.join(
                glove_dir, 'glove.840B.300d.txt'), word2offset[word]+1)
            assert(word == line[:line.find(' ')].strip())
            word_vectors.append(np.fromstring(
                line[line.find(' '):].strip(), sep=' ', dtype=np.float32))
        elif word == '<pad>':
            word_vectors.append(np.zeros(dimension_size, dtype=np.float32))
        else:
            word_vectors.append(
                np.random.uniform(-uniform_scale, uniform_scale, dimension_size))
    return word_vectors


def _default_unk_index():
    return 1


def build_text_vocab(data, vocab_size=100000, min_freq=2):
    counter = Counter()
    for d in data:
        s = d['sentence']
        counter.update(s)

    itos = ['[PAD]', '[UNK]']
    min_freq = max(min_freq, 1)

    # sort by frequency, then alphabetically
    words_and_frequencies = sorted(counter.items(), key=lambda tup: tup[0])
    words_and_frequencies.sort(key=lambda tup: tup[1], reverse=True)

    for word, freq in words_and_frequencies:
        if freq < min_freq or len(itos) == vocab_size:
            break
        itos.append(word)
    # stoi is simply a reverse dict for itos
    stoi = defaultdict(_default_unk_index)
    stoi.update({tok: i for i, tok in enumerate(itos)})

    return {'itos': itos, 'stoi': stoi, 'len': len(itos)}


def build_pos_tag_vocab(data, vocab_size=1000, min_freq=1):
    """
    Part of speech tags vocab.
    """
    counter = Counter()
    for d in data:
        tags = d['tags']
        counter.update(tags)

    itos = ['<pad>']
    min_freq = max(min_freq, 1)

    # sort by frequency, then alphabetically
    words_and_frequencies = sorted(counter.items(), key=lambda tup: tup[0])
    words_and_frequencies.sort(key=lambda tup: tup[1], reverse=True)

    for word, freq in words_and_frequencies:
        if freq < min_freq or len(itos) == vocab_size:
            break
        itos.append(word)
    # stoi is simply a reverse dict for itos
    stoi = defaultdict()
    stoi.update({tok: i for i, tok in enumerate(itos)})

    return {'itos': itos, 'stoi': stoi, 'len': len(itos)}



def build_dep_tag_vocab(data, vocab_size=1000, min_freq=0):
    counter = Counter()
    for d in data:
        tags = d['dep_tag']
        counter.update(tags)

    itos = ['<pad>', '<unk>']
    min_freq = max(min_freq, 1)

    # sort by frequency, then alphabetically
    words_and_frequencies = sorted(counter.items(), key=lambda tup: tup[0])
    words_and_frequencies.sort(key=lambda tup: tup[1], reverse=True)

    for word, freq in words_and_frequencies:
        if freq < min_freq or len(itos) == vocab_size:
            break
        if word == '<pad>':
            continue
        itos.append(word)
    # stoi is simply a reverse dict for itos
    stoi = defaultdict(_default_unk_index)
    stoi.update({tok: i for i, tok in enumerate(itos)})

    return {'itos': itos, 'stoi': stoi, 'len': len(itos)}


class ASBA_Depparsed_Dataset(Dataset):
    def __init__(self, data, args, word_vocab, dep_tag_vocab, pos_tag_vocab):
        self.data = data
        self.args = args
        self.word_vocab = word_vocab
        self.dep_tag_vocab = dep_tag_vocab
        self.pos_tag_vocab = pos_tag_vocab

        self.convert_features()

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        e = self.data[idx]
        items = e['dep_tag_ids'], e['pos_class'], e['text_len'], e['aspect_len'], e['sentiment'], e['dep_rel_ids'], e['predicted_heads'], e['aspect_position'], e['dep_dir_ids']
        non_bert_items = e['sentence_ids'], e['aspect_ids']
        items_tensor = non_bert_items + items
        items_tensor = tuple(torch.tensor(t) for t in items_tensor)

        return items_tensor

    

    def convert_features(self):
        '''
        Convert sentence, aspects, pos_tags, dependency_tags to ids.
        '''
        for i in range(len(self.data)):
       
            self.data[i]['sentence_ids'] = [self.word_vocab['stoi'][w] for w in self.data[i]['sentence']]
            self.data[i]['aspect_ids'] = [self.word_vocab['stoi'][w] for w in self.data[i]['aspect']]
           

            self.data[i]['text_len'] = len(self.data[i]['sentence'])
            self.data[i]['aspect_position'] = [0] * self.data[i]['text_len']
            try:  # find the index of aspect in sentence
                for j in range(self.data[i]['from'], self.data[i]['to']):
                    self.data[i]['aspect_position'][j] = 1
            except:
                for term in self.data[i]['aspect']:
                    self.data[i]['aspect_position'][self.data[i]
                                                    ['sentence'].index(term)] = 1

            self.data[i]['dep_tag_ids'] = [self.dep_tag_vocab['stoi'][w]
                                           for w in self.data[i]['dep_tag']]
            self.data[i]['dep_dir_ids'] = [idx
                                           for idx in self.data[i]['dep_dir']]
            self.data[i]['pos_class'] = [self.pos_tag_vocab['stoi'][w]
                                             for w in self.data[i]['tags']]
            self.data[i]['aspect_len'] = len(self.data[i]['aspect'])

            self.data[i]['dep_rel_ids'] = [self.dep_tag_vocab['stoi'][r]
                                           for r in self.data[i]['predicted_dependencies']]


def my_collate(batch):
    '''
    Pad sentence and aspect in a batch.
    Sort the sentences based on length.
    Turn all into tensors.
    '''
    sentence_ids, aspect_ids, dep_tag_ids, pos_class, text_len, aspect_len, sentiment, dep_rel_ids, dep_heads, aspect_positions, dep_dir_ids = zip(
        *batch)  # from Dataset.__getitem__()
    text_len = torch.tensor(text_len)
    aspect_len = torch.tensor(aspect_len)
    sentiment = torch.tensor(sentiment)

    # Pad sequences.
    sentence_ids = pad_sequence(
        sentence_ids, batch_first=True, padding_value=0)
    aspect_ids = pad_sequence(aspect_ids, batch_first=True, padding_value=0)
    aspect_positions = pad_sequence(
        aspect_positions, batch_first=True, padding_value=0)

    dep_tag_ids = pad_sequence(dep_tag_ids, batch_first=True, padding_value=0)
    dep_dir_ids = pad_sequence(dep_dir_ids, batch_first=True, padding_value=0)
    pos_class = pad_sequence(pos_class, batch_first=True, padding_value=0)

    dep_rel_ids = pad_sequence(dep_rel_ids, batch_first=True, padding_value=0)
    dep_heads = pad_sequence(dep_heads, batch_first=True, padding_value=0)

    # Sort all tensors based on text len.
    _, sorted_idx = text_len.sort(descending=True)
    sentence_ids = sentence_ids[sorted_idx]
    aspect_ids = aspect_ids[sorted_idx]
    aspect_positions = aspect_positions[sorted_idx]
    dep_tag_ids = dep_tag_ids[sorted_idx]
    dep_dir_ids = dep_dir_ids[sorted_idx]
    pos_class = pos_class[sorted_idx]
    text_len = text_len[sorted_idx]
    aspect_len = aspect_len[sorted_idx]
    sentiment = sentiment[sorted_idx]
    dep_rel_ids = dep_rel_ids[sorted_idx]
    dep_heads = dep_heads[sorted_idx]

    return sentence_ids, aspect_ids, dep_tag_ids, pos_class, text_len, aspect_len, sentiment, dep_rel_ids, dep_heads, aspect_positions, dep_dir_ids

In [6]:
def mask_logits(target, mask):
    return target * mask + (1 - mask) * (-1e30)

class Gating(nn.Module):
    def __init__(self, layer_num, dim):
        super().__init__()
        self.layer_num = layer_num
        self.linear = nn.ModuleList([nn.Linear(dim, dim) for _ in range(layer_num)])
        self.gate = nn.ModuleList([nn.Linear(dim, dim) for _ in range(layer_num)])

    def forward(self, x):
        for i in range(self.layer_num):
            gate = torch.sigmoid(self.gate[i](x))
            nonlinear = F.relu(self.linear[i](x))
            x = gate * nonlinear + (1 - gate) * x
        return x

class DotprodAttention(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, feature, dep_tags, dmask):
        '''
        C feature/context [N, L, D]
        Q dep_tags_v          [N, D]
        mask dmask          [N, L]
        '''

        Q = dep_tags
        Q = Q.unsqueeze(2)  # (N, D, 1)
        dot_prod = torch.bmm(feature, Q)  # (N, L, 1)
        dmask = dmask.unsqueeze(2)  # (N, D, 1)
        attention_weight = mask_logits(dot_prod, dmask)  # (N, L ,1)
        attention = F.softmax(attention_weight, dim=1)  # (N, L, 1)

        out = torch.bmm(feature.transpose(1, 2), attention)  # (N, D, 1)
        out = out.squeeze(2)
        out = torch.sigmoid(out)
        # (N, D), ([N, L]), (N, L, 1)
        return out

class RelationAttention(nn.Module):
    def __init__(self, in_dim = 300, hidden_dim = 64):
        # in_dim: the dimension fo query vector
        super().__init__()

        self.fc1 = nn.Linear(in_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, 1)

    def forward(self, feature, dep_tags_v, dmask):
        '''
        C feature/context [N, L, D]
        Q dep_tags_v          [N, L, D]
        mask dmask          [N, L]
        '''
        Q = self.fc1(dep_tags_v)
        Q = self.relu(Q)
        Q = self.fc2(Q)  # (N, L, 1)
        Q = Q.squeeze(2)
        Q = F.softmax(mask_logits(Q, dmask), dim=1)

        Q = Q.unsqueeze(2)
        out = torch.bmm(feature.transpose(1, 2), Q)
        out = out.squeeze(2)
        # out = F.sigmoid(out)
        return out  # ([N, L])

In [7]:
class TextGraphAttentionNetwork(nn.Module):
    def __init__(self, args, dep_tag_num):
        super(TextGraphAttentionNetwork, self).__init__()
        self.args = args

        num_embeddings, embed_dim = args.glove_embedding.shape
        self.embed = nn.Embedding(num_embeddings, embed_dim)
        self.embed.weight = nn.Parameter(args.glove_embedding, requires_grad=False)
        self.dropout = nn.Dropout(args.dropout)
        self.tanh = nn.Tanh()
        self.gate = Gating(args.num_layers, args.embedding_dim)
        self.rnn = nn.GRU(input_size=args.embedding_dim, hidden_size=args.hidden_size, bidirectional=True, batch_first=True, num_layers=args.num_layers)
    
        self.gat = [DotprodAttention().to(args.device) for i in range(args.num_heads)]
        self.gat_dep = [RelationAttention(in_dim = args.embedding_dim).to(args.device) for i in range(args.num_heads)]
        
        last_hidden_size = args.hidden_size*4

        layers = [nn.Linear(last_hidden_size, args.final_hidden_size), nn.ReLU()]
        
        for _ in range(args.num_mlps-1):
            layers += [nn.Linear(args.final_hidden_size, args.final_hidden_size), nn.ReLU()]
            
        self.dep_embed = nn.Embedding(dep_tag_num, args.embedding_dim)
        gcn_input_dim = args.hidden_size * 2
        self.fc = nn.Linear(args.embedding_dim, 2*args.hidden_size)
        self.fcs = nn.Sequential(*layers)
        self.fc_final = nn.Linear(args.final_hidden_size, args.num_classes)
        self.linear = nn.Linear(gcn_input_dim, gcn_input_dim)

    def forward(self, sentence, aspect, pos_class, dep_tags, text_len, aspect_len, dep_rels, dep_heads, aspect_position, dep_dirs):

        '''
        Forward takes:
            sentence: sentence_id of size (batch_size, text_length)
            aspect: aspect_id of size (batch_size, aspect_length)
            pos_class: pos_tag_id of size (batch_size, text_length)
            dep_tags: dep_tag_id of size (batch_size, text_length)
            text_len: (batch_size,) length of each sentence
            aspect_len: (batch_size, ) aspect length of each sentence
            dep_rels: (batch_size, text_length) relation
            dep_heads: (batch_size, text_length) which node adjacent to that node
            aspect_position: (batch_size, text_length) mask, with the position of aspect as 1 and others as 0
            dep_dirs: (batch_size, text_length) the directions each node to the aspect
        '''
        fmask = (torch.zeros_like(sentence) != sentence).float()  # (N，L)
        feature = self.embed(sentence)  # (N, L, D)
        feature = self.dropout(feature)
        feature = self.gate(feature)
        feature = self.fc(feature) # (N,L,D)
        
        dep_feature = self.dep_embed(dep_tags) 
        dep_feature = self.gate(dep_feature)

        dep_out = [g(feature, dep_feature, fmask).unsqueeze(1) for g in self.gat_dep] #(N, 1, D) * num_heads
        dep_out = torch.cat(dep_out, dim = 1) # (N, H, D)
        dep_out = dep_out.mean(dim = 1) # (N, D)
        
        #print("Shape of the feature", feature.shape)
        #print("Shape of the dep_out", dep_out.shape)
        
        gat_out = self.linear(feature) # (N, L, D)
        #print("Shape of the gat_out right now", gat_out.shape)
        fmask = fmask.unsqueeze(2)
        gat_out = gat_out * fmask
        
        #print("Shape of the gat_out after fmask", gat_out.shape)
        gat_out = F.relu(torch.sum(gat_out, dim = 1)) # (N, D)
        #print("Shape of the gat_out", gat_out.shape)
        
        feature_out = torch.cat([dep_out,  gat_out], dim = 1) # (N, D')
       
        # feature_out = gat_out
        #############################################################################################
        x = self.dropout(feature_out)
        x = self.fcs(x)
        logit = self.fc_final(x)
        return logit

In [8]:
class Parameters:
    def __init__(self):
        pass 

args = Parameters()
 # Required parameters
args.dataset_name = 'rest' # 'rest' 'laptop' 'twitter'
args.output_dir = 'data/output-gcn'
args.num_classes = 3
args.cuda_id = '3'
args.seed = 2022

# Model parameters
args.glove_dir = 'glove'
args.num_layers = 2
args.max_hop = 4
args.num_heads = 6
args.dropout = 0
args.num_gcn_layers = 1
args.gc_mem_dim = 300

args.gcn_dropout = 0.2
# GAT
args.gat = True
args.gat_out = True


args.gat_attention_type = 'dotprod' # 'linear' 'dotprod' 'gcn'
args.embedding_type = 'glove' # 'glove' 'bert'
args.embedding_dim = 300
args.dep_relation_embed_dim = 300
args.hidden_size = 300
args.final_hidden_size = 300
args.num_mlps = 2
# Training parameters
args.per_gpu_train_batch_size = 16
args.per_gpu_eval_batch_size = 32
args.gradient_accumulation_steps = 2
args.learning_rate = 1e-3
args.weight_decay = 0.0
args.adam_epsilon = 1e-8
args.max_grad_norm = 1.0
args.num_train_epochs = 30
args.max_steps = -1
args.logging_steps = 50

import pickle

In [9]:
def get_input_from_batch(args, batch):
    # sentence_ids, aspect_ids, dep_tag_ids, pos_class, text_len, aspect_len, sentiment, dep_rel_ids, dep_heads, aspect_positions
    inputs = {  'sentence': batch[0],
                'aspect': batch[1], # aspect token
                'dep_tags': batch[2], # reshaped
                'pos_class': batch[3],
                'text_len': batch[4],
                'aspect_len': batch[5],
                'dep_rels': batch[7], # adj no-reshape
                'dep_heads': batch[8],
                'aspect_position': batch[9],
                'dep_dirs': batch[10]
                }
    labels = batch[6]

    return inputs, labels

def train(args, train_dataset, model, eval_dataset):
    '''Train the model'''
    args.train_batch_size = args.per_gpu_train_batch_size
    args.eval_batch_size = args.per_gpu_eval_batch_size

    train_sampler = RandomSampler(train_dataset)
    eval_sampler = SequentialSampler(eval_dataset)

    train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.train_batch_size, collate_fn=my_collate)
    eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size, collate_fn=my_collate)


    parameters = filter(lambda param: param.requires_grad, model.parameters())
    optimizer = torch.optim.Adam(parameters, lr=args.learning_rate)

    # Train
    print("Started Training...")
    print("Num examples = {}".format(len(train_dataset)))
    print("Num Epochs = {}".format(args.num_train_epochs))
    print("Instantaneous batch size per GPU = {}".format(args.per_gpu_train_batch_size))

    all_eval_results = []

    set_seed(args)

    for epoch in range(args.num_train_epochs):
        model.train()
        train_loss = 0.0

        for step, batch in enumerate(train_dataloader):
            model.zero_grad()
            batch = tuple(t.to(args.device) for t in batch)
            inputs, labels = get_input_from_batch(args, batch)
            logit = model(**inputs)
            loss = F.cross_entropy(logit, labels)
            loss.backward()
            train_loss += loss.item()
            optimizer.step()

        model.eval()
        eval_loss = 0.0
        preds = None
        out_label_ids = None
        input_store = None
        
        for step, batch in enumerate(eval_dataloader):
            with torch.no_grad():
                batch = tuple(t.to(args.device) for t in batch)
                inputs, labels = get_input_from_batch(args, batch)
                logits = model(**inputs)
                loss = F.cross_entropy(logits, labels)
                eval_loss += loss.item()
    
                
                if preds is None:
                    inputs_indxs = [inputs['sentence'].detach().cpu().numpy()]
                    preds = logits.detach().cpu().numpy()
                    out_label_ids = labels.detach().cpu().numpy()
                    #print(len(inputs_indxs), len(logits), len(labels))
                else:
                    current_inputs = inputs['sentence'].detach().cpu().numpy()
                    inputs_indxs.append(current_inputs)
                    
                    preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                    out_label_ids = np.append(out_label_ids, labels.detach().cpu().numpy(), axis=0)
                    
        
        preds = np.argmax(preds, axis=1)
        result = compute_metrics(preds, out_label_ids)
    
        result['train_loss'] = train_loss/len(train_dataloader)
        result['eval_loss'] = eval_loss/len(eval_dataloader)
        result['epoch'] = epoch

        all_eval_results.append(result)
        print("Epoch: {}; train_loss: {}; eval_loss: {}; eval_acc: {}; eval_f1: {}".format(epoch + 1, np.round(train_loss/len(train_dataloader), 4), np.round(eval_loss/len(eval_dataloader), 4), np.round(result['acc'], 4), np.round(result['f1'], 4)))
    
    
    with open("preds.pkl", 'wb') as f:
        pickle.dump(preds, f, -1) 
        
    
    with open("inputs_indxs.pkl", 'wb') as f:
        pickle.dump(inputs_indxs, f, -1) 
        
    with open("out_label_ids.pkl", 'wb') as f:
        pickle.dump(out_label_ids, f, -1) 
            
    return all_eval_results

def compute_metrics(preds, labels):
    acc = accuracy_score(y_true=labels, y_pred=preds)
    f1 = f1_score(y_true=labels, y_pred=preds, average='macro')
    precision = precision_score(y_true=labels, y_pred=preds, average='macro')
    recall = recall_score(y_true=labels, y_pred=preds, average='macro')
    return {
        "acc": acc,
        "f1": f1,
        "precision": precision,
        "recall": recall
    }


In [10]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
args.device = device
print('Device is {}'.format(args.device))

# Set seed
set_seed(args)
# Load datasets and vocabs
train_dataset, test_dataset, word_vocab, dep_tag_vocab, pos_tag_vocab= load_datasets_and_vocabs(args)
args.dep_tag_vocab_size = len(dep_tag_vocab['stoi'])
#dep_tag_vocab

Device is cuda
Read rest Train set: 1978
Read rest Test set: 600
Size of the train dataset: 3602
Size of the test dataset: 1120
Word vocab size: 3508


In [11]:
dep_tag_vocab

{'itos': ['<pad>',
  '<unk>',
  'det',
  'nsubj',
  'amod',
  'conj',
  'pobj',
  'dobj',
  'cc',
  'prep',
  'dep',
  'poss',
  'nn',
  'advmod',
  'rcmod',
  'appos',
  'cop',
  'nsubjpass',
  'num',
  'partmod',
  'aux',
  'ccomp',
  'advcl',
  'predet',
  'auxpass',
  'infmod',
  'xcomp',
  'neg',
  'mark',
  'possessive',
  'pcomp',
  'iobj',
  'parataxis',
  'csubj',
  'npadvmod',
  'tmod',
  'preconj',
  'prt',
  'expl',
  'acomp',
  'discourse',
  'mwe'],
 'stoi': defaultdict(<function __main__._default_unk_index()>,
             {'<pad>': 0,
              '<unk>': 1,
              'det': 2,
              'nsubj': 3,
              'amod': 4,
              'conj': 5,
              'pobj': 6,
              'dobj': 7,
              'cc': 8,
              'prep': 9,
              'dep': 10,
              'poss': 11,
              'nn': 12,
              'advmod': 13,
              'rcmod': 14,
              'appos': 15,
              'cop': 16,
              'nsubjpass': 17,
      

## Prelim results

| Metrics\Datasets      | Twitter | Restaurant | Laptop     | 
| :---        |    :----:   |  :----:|        ---: |
| Precision      | 0.70       | 0.632 | 0.5950  |
| Recall   | 0.60        | 0.582 | 0.6069     |
| F1   | 0.623        | 0.573 | 0.5919     |
| Accuracy   | 0.669        | 0.7366 | 0.6489     |


In [12]:
model = TextGraphAttentionNetwork(args, dep_tag_vocab['len']).to(args.device)
model

TextGraphAttentionNetwork(
  (embed): Embedding(3508, 300)
  (dropout): Dropout(p=0, inplace=False)
  (tanh): Tanh()
  (gate): Gating(
    (linear): ModuleList(
      (0): Linear(in_features=300, out_features=300, bias=True)
      (1): Linear(in_features=300, out_features=300, bias=True)
    )
    (gate): ModuleList(
      (0): Linear(in_features=300, out_features=300, bias=True)
      (1): Linear(in_features=300, out_features=300, bias=True)
    )
  )
  (rnn): GRU(300, 300, num_layers=2, batch_first=True, bidirectional=True)
  (dep_embed): Embedding(42, 300)
  (fc): Linear(in_features=300, out_features=600, bias=True)
  (fcs): Sequential(
    (0): Linear(in_features=1200, out_features=300, bias=True)
    (1): ReLU()
    (2): Linear(in_features=300, out_features=300, bias=True)
    (3): ReLU()
  )
  (fc_final): Linear(in_features=300, out_features=3, bias=True)
  (linear): Linear(in_features=600, out_features=600, bias=True)
)

In [13]:
# Train
all_eval_results = train(args, train_dataset, model, test_dataset)
best_eval_result = max(all_eval_results, key=lambda x: x['acc']) 
print("Best Eval result is: ")
print(best_eval_result)

Started Training...
Num examples = 3602
Num Epochs = 30
Instantaneous batch size per GPU = 16
Epoch: 1; train_loss: 0.7712; eval_loss: 0.6023; eval_acc: 0.7411; eval_f1: 0.5316
Epoch: 2; train_loss: 0.6486; eval_loss: 0.6111; eval_acc: 0.7821; eval_f1: 0.6468
Epoch: 3; train_loss: 0.568; eval_loss: 0.5313; eval_acc: 0.7875; eval_f1: 0.6867
Epoch: 4; train_loss: 0.4965; eval_loss: 0.5922; eval_acc: 0.7705; eval_f1: 0.672
Epoch: 5; train_loss: 0.4451; eval_loss: 0.6142; eval_acc: 0.7732; eval_f1: 0.6412
Epoch: 6; train_loss: 0.4234; eval_loss: 0.6755; eval_acc: 0.7598; eval_f1: 0.6352
Epoch: 7; train_loss: 0.3905; eval_loss: 0.634; eval_acc: 0.758; eval_f1: 0.6388
Epoch: 8; train_loss: 0.3397; eval_loss: 0.6438; eval_acc: 0.7625; eval_f1: 0.6348
Epoch: 9; train_loss: 0.3041; eval_loss: 0.9568; eval_acc: 0.7598; eval_f1: 0.6227
Epoch: 10; train_loss: 0.283; eval_loss: 0.9613; eval_acc: 0.7625; eval_f1: 0.6296
Epoch: 11; train_loss: 0.2592; eval_loss: 0.9596; eval_acc: 0.7393; eval_f1: 0.6

In [14]:
test_sentences = []

In [15]:
with open("preds.pkl", 'rb') as f:
    preds = pickle.load(f)
    
with open("inputs_indxs.pkl", 'rb') as f:
    inputs_indxs = pickle.load(f)

with open("out_label_ids.pkl", 'rb') as f:
    out_label_ids = pickle.load(f)

In [16]:
def i2token(sentence):
    sents = []
    for index in sentence:
        sents.append(word_vocab['itos'][index])
    
    sents = ' '.join(sents)
    return sents

flat_inputs = []

for item in inputs_indxs:
    for subitem in item:
        flat_inputs.append(subitem)
len(flat_inputs)


#sentiments_lookup = {'negative': 0, 'positive': 1, 'neutral': 2}

miss_classifications = []
correct_classifications = []

for i in range(len(flat_inputs)):
    index_to_token = i2token(flat_inputs[i])
    if out_label_ids[i]!=preds[i]:
        miss_classifications.append([index_to_token, out_label_ids[i], preds[i]])
    
    else:
        correct_classifications.append([index_to_token, out_label_ids[i], preds[i]])

In [17]:
miss_classifications

[['i love the drinks , esp lychee martini , and the food is also very good . [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]',
  1,
  2],
 ['i [UNK] the people at go sushi , it never [UNK] . [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]',
  1,
  2],
 ['try the rose roll ( not on menu ) . [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]',
  2,
  1],
 ['great food but the service was dreadful ! [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]',
  1,
  0],
 ["the sangria 's - [UNK] down . [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]",
  0,
  1],
 ['how pretentious and inappropriate for mj grill to claim that it provides power lunch and dinners ! [PAD] [PAD

In [18]:
correct_classifications

[['from the beginning , we were met by friendly staff memebers , and the convienent parking at chelsea piers made it easy for us to get to the boat .',
  1,
  1],
 ['from the beginning , we were met by friendly staff memebers , and the convienent parking at chelsea piers made it easy for us to get to the boat .',
  1,
  1],
 ['anytime and everytime i find myself in the neighborhood i will go to sushi rose for fresh sushi and great portions all at a reasonable price . [PAD] [PAD] [PAD]',
  1,
  1],
 ['anytime and everytime i find myself in the neighborhood i will go to sushi rose for fresh sushi and great portions all at a reasonable price . [PAD] [PAD] [PAD]',
  1,
  1],
 ['anytime and everytime i find myself in the neighborhood i will go to sushi rose for fresh sushi and great portions all at a reasonable price . [PAD] [PAD] [PAD]',
  1,
  1],
 ['certainly not the best sushi in new york , however , it is always fresh , and the place is very clean , [UNK] . [PAD] [PAD] [PAD] [PAD] [PAD