In [1]:
import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
from tree import *
from torch.nn.utils.rnn import pad_sequence
import argparse
import codecs
import json
import linecache
import logging
import os
import pickle
import random
import sys
from collections import Counter, defaultdict
from copy import copy, deepcopy

import nltk
import numpy as np
import simplejson as json
import torch
from allennlp.modules.elmo import batch_to_ids
from lxml import etree
from nltk import word_tokenize
from nltk.tokenize import TreebankWordTokenizer
from torch.utils.data import DataLoader, Dataset


import warnings
warnings.filterwarnings('always')

In [2]:
def set_seed(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

In [3]:
def read_sentence_depparsed(file_path):
    with open(file_path, 'r') as f:
        data = json.load(f)
        return data


def get_dataset(dataset_name):
    rest_train = 'data/semeval14/Restaurants_Train_v2_biaffine_depparsed_with_energy.json'
    rest_test = 'data/semeval14/Restaurants_Test_Gold_biaffine_depparsed_with_energy.json'

    laptop_train = 'data/semeval14/Laptop_Train_v2_biaffine_depparsed.json'
    laptop_test = 'data/semeval14/Laptops_Test_Gold_biaffine_depparsed.json'

    twitter_train = 'data/twitter/train_biaffine.json'
    twitter_test = 'data/twitter/test_biaffine.json'

    ds_train = {'rest': rest_train,
                'laptop': laptop_train, 
                'twitter': twitter_train}
    ds_test = {'rest': rest_test,
               'laptop': laptop_test, 
               'twitter': twitter_test}

    train = list(read_sentence_depparsed(ds_train[dataset_name]))
    test = list(read_sentence_depparsed(ds_test[dataset_name]))
    
    print("Read {} Train set: {}".format(dataset_name, len(train)))
    print("Read {} Test set: {}".format(dataset_name, len(test)))

    return train, test

In [4]:
def get_rolled_and_unrolled_data(input_data, args):
    # A hand-picked set of part of speech tags that we see contributes to ABSA.
    all_unrolled = []
    # Make sure the tree is successfully built.
    zero_dep_counter = 0

    # Sentiment counters
    total_counter = defaultdict(int)
    sentiments_lookup = {'negative': 0, 'positive': 1, 'neutral': 2}

    # for seeking 'but' examples
    for e in input_data:
        e['tokens'] = [x.lower() for x in e['tokens']]
        aspects = []
        sentiments = []
        froms = []
        tos = []
        dep_tags = []
        dep_index = []
        dep_dirs = []

        # Classify based on POS-tags
        pos_class = e['tags']

        # Iterate through aspects in a sentence and reshape the dependency tree.
        for i in range(len(e['aspect_sentiment'])):
            aspect = e['aspect_sentiment'][i][0].lower()
            # We would tokenize the aspect while at it.
            aspect = word_tokenize(aspect)
            sentiment = sentiments_lookup[e['aspect_sentiment'][i][1]]
            frm = e['from_to'][i][0]
            to = e['from_to'][i][1]

            aspects.append(aspect)
            sentiments.append(sentiment)
            froms.append(frm)
            tos.append(to)

            # Center on the aspect.
            dep_tag, dep_idx, dep_dir = reshape_dependency_tree(frm, to, e['dependencies'], tokens=e['tokens'], max_hop=args.max_hop)

            # Because of tokenizer differences, aspect opsitions are off, so we find the index and try again.
            if len(dep_tag) == 0:
                zero_dep_counter += 1
                as_sent = e['aspect_sentiment'][i][0].split()
                as_start = e['tokens'].index(as_sent[0])
                # print(e['tokens'], e['aspect_sentiment'], e['dependencies'],as_sent[0])
                as_end = e['tokens'].index(
                    as_sent[-1]) if len(as_sent) > 1 else as_start + 1
                print("Debugging: as_start as_end ", as_start, as_end)
                dep_tag, dep_idx, dep_dir = reshape_dependency_tree(as_start, as_end, e['dependencies'], tokens=e['tokens'], max_hop=args.max_hop)
                if len(dep_tag) == 0:  # for debugging
                    print("Debugging: zero_dep",
                          e['aspect_sentiment'][i][0], e['tokens'])
                    print("Debugging: ". e['dependencies'])
                else:
                    zero_dep_counter -= 1

            dep_tags.append(dep_tag)
            dep_index.append(dep_idx)
            dep_dirs.append(dep_dir)

            total_counter[e['aspect_sentiment'][i][1]] += 1

            # Unrolling
            all_unrolled.append(
                {'sentence': e['tokens'], 'tags': e['tags'], 'pos_class': pos_class, 'aspect': aspect, 'sentiment': sentiment,
                    'predicted_dependencies': e['predicted_dependencies'], 'predicted_heads': e['predicted_heads'],
                 'from': frm, 'to': to, 'dep_tag': dep_tag, 'dep_idx': dep_idx, 'dep_dir':dep_dir,'dependencies': e['dependencies']})

    return all_unrolled

In [5]:
def load_datasets_and_vocabs(args):
    train, test = get_dataset(args.dataset_name)
    # Our model takes unrolled data, currently we don't consider the MAMS cases(future experiments)
    train_all_unrolled = get_rolled_and_unrolled_data(train, args)
    test_all_unrolled = get_rolled_and_unrolled_data(test, args)
    print("Size of the train dataset: {}".format(len(train_all_unrolled)))
    print("Size of the test dataset: {}".format(len(test_all_unrolled)))

    # Build word vocabulary(part of speech, dep_tag) and save pickles.
    word_vecs, word_vocab, dep_tag_vocab, pos_tag_vocab = load_and_cache_vocabs(train_all_unrolled+test_all_unrolled, args)

    train_dataset = ASBA_Depparsed_Dataset(train_all_unrolled, args, word_vocab, dep_tag_vocab, pos_tag_vocab)
    test_dataset = ASBA_Depparsed_Dataset(test_all_unrolled, args, word_vocab, dep_tag_vocab, pos_tag_vocab)

    return train_dataset, test_dataset, word_vocab, dep_tag_vocab, pos_tag_vocab



def reshape_dependency_tree(as_start, as_end, dependencies, tokens=None, max_hop = 5):
    '''
    Adding multi hops
    This function is at the core of our algo, it reshape the dependency tree and center on the aspect.
    In open-sourced edition, I choose not to take energy(the soft prediction of dependency from parser)
    into consideration. For it requires tweaking allennlp's source code, and the energy is space-consuming.
    And there are no significant difference in performance between the soft and the hard(with non-connect) version.

    '''
    dep_tag = []
    dep_idx = []
    dep_dir = []
    # 1 hop

    for i in range(as_start, as_end):
        for dep in dependencies:
            if i == dep[1] - 1:
                # not root, not aspect
                if (dep[2] - 1 < as_start or dep[2] - 1 >= as_end) and dep[2] != 0 and dep[2] - 1 not in dep_idx:
                    if str(dep[0]) != 'punct':  # and tokens[dep[2] - 1] not in stopWords
                        dep_tag.append(dep[0])
                        dep_dir.append(1)
                    else:
                        dep_tag.append('<pad>')
                        dep_dir.append(0)
                    dep_idx.append(dep[2] - 1)
            elif i == dep[2] - 1:
                # not root, not aspect
                if (dep[1] - 1 < as_start or dep[1] - 1 >= as_end) and dep[1] != 0 and dep[1] - 1 not in dep_idx:
                    if str(dep[0]) != 'punct':  # and tokens[dep[1] - 1] not in stopWords
                        dep_tag.append(dep[0])
                        dep_dir.append(2)
                    else:
                        dep_tag.append('<pad>')
                        dep_dir.append(0)
                    dep_idx.append(dep[1] - 1)

    # add aspect and index, to make sure length matches len(tokens)
    for idx, token in enumerate(tokens):
        if idx not in dep_idx:
            dep_tag.append('<pad>')
            dep_dir.append(0)
            dep_idx.append(idx)

    index = [i[0] for i in sorted(enumerate(dep_idx), key=lambda x:x[1])]
    dep_tag = [dep_tag[i] for i in index]
    dep_idx = [dep_idx[i] for i in index]
    dep_dir = [dep_dir[i] for i in index]

    assert len(tokens) == len(dep_idx), 'length wrong'
    return dep_tag, dep_idx, dep_dir




import pickle

def load_and_cache_vocabs(data, args):
    '''
    Build vocabulary of words, part of speech tags, dependency tags and cache them.
    Load glove embedding if needed.'''
    
    word_vocab = None
    word_vecs = None
    dep_tag_vocab = build_dep_tag_vocab(data, min_freq=0)
    pos_tag_vocab = build_pos_tag_vocab(data, min_freq=0)

    return word_vecs, word_vocab, dep_tag_vocab, pos_tag_vocab





def _default_unk_index():
    return 1


def build_text_vocab(data, vocab_size=100000, min_freq=2):
    counter = Counter()
    for d in data:
        s = d['sentence']
        counter.update(s)

    itos = ['[PAD]', '[UNK]']
    min_freq = max(min_freq, 1)

    # sort by frequency, then alphabetically
    words_and_frequencies = sorted(counter.items(), key=lambda tup: tup[0])
    words_and_frequencies.sort(key=lambda tup: tup[1], reverse=True)

    for word, freq in words_and_frequencies:
        if freq < min_freq or len(itos) == vocab_size:
            break
        itos.append(word)
    # stoi is simply a reverse dict for itos
    stoi = defaultdict(_default_unk_index)
    stoi.update({tok: i for i, tok in enumerate(itos)})

    return {'itos': itos, 'stoi': stoi, 'len': len(itos)}


def build_pos_tag_vocab(data, vocab_size=1000, min_freq=1):
    """
    Part of speech tags vocab.
    """
    counter = Counter()
    for d in data:
        tags = d['tags']
        counter.update(tags)

    itos = ['<pad>']
    min_freq = max(min_freq, 1)

    # sort by frequency, then alphabetically
    words_and_frequencies = sorted(counter.items(), key=lambda tup: tup[0])
    words_and_frequencies.sort(key=lambda tup: tup[1], reverse=True)

    for word, freq in words_and_frequencies:
        if freq < min_freq or len(itos) == vocab_size:
            break
        itos.append(word)
    # stoi is simply a reverse dict for itos
    stoi = defaultdict()
    stoi.update({tok: i for i, tok in enumerate(itos)})

    return {'itos': itos, 'stoi': stoi, 'len': len(itos)}



def build_dep_tag_vocab(data, vocab_size=1000, min_freq=0):
    counter = Counter()
    for d in data:
        tags = d['dep_tag']
        counter.update(tags)

    itos = ['<pad>', '<unk>']
    min_freq = max(min_freq, 1)

    # sort by frequency, then alphabetically
    words_and_frequencies = sorted(counter.items(), key=lambda tup: tup[0])
    words_and_frequencies.sort(key=lambda tup: tup[1], reverse=True)

    for word, freq in words_and_frequencies:
        if freq < min_freq or len(itos) == vocab_size:
            break
        if word == '<pad>':
            continue
        itos.append(word)
    # stoi is simply a reverse dict for itos
    stoi = defaultdict(_default_unk_index)
    stoi.update({tok: i for i, tok in enumerate(itos)})

    return {'itos': itos, 'stoi': stoi, 'len': len(itos)}


class ASBA_Depparsed_Dataset(Dataset):
    def __init__(self, data, args, word_vocab, dep_tag_vocab, pos_tag_vocab):
        self.data = data
        self.args = args
        self.word_vocab = word_vocab
        self.dep_tag_vocab = dep_tag_vocab
        self.pos_tag_vocab = pos_tag_vocab

        self.convert_features()

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        e = self.data[idx]
        items = e['dep_tag_ids'], e['pos_class'], e['text_len'], e['aspect_len'], e['sentiment'], e['dep_rel_ids'], e['predicted_heads'], e['aspect_position'], e['dep_dir_ids']
        bert_items = e['input_cat_ids'], e['segment_ids']
        items_tensor = tuple(torch.tensor(t) for t in bert_items)
        items_tensor += tuple(torch.tensor(t) for t in items)

        return items_tensor
    
    
    def convert_features(self):
        '''
        Convert sentence, aspects, pos_tags, dependency_tags to ids.
        '''
        for i in range(len(self.data)):
            self.convert_features_bert(i)
            self.data[i]['text_len'] = len(self.data[i]['sentence'])
            self.data[i]['aspect_position'] = [0] * self.data[i]['text_len']
            try:  # find the index of aspect in sentence
                for j in range(self.data[i]['from'], self.data[i]['to']):
                    self.data[i]['aspect_position'][j] = 1
            except:
                for term in self.data[i]['aspect']:
                    self.data[i]['aspect_position'][self.data[i]
                                                    ['sentence'].index(term)] = 1

            self.data[i]['dep_tag_ids'] = [self.dep_tag_vocab['stoi'][w]
                                           for w in self.data[i]['dep_tag']]
            self.data[i]['dep_dir_ids'] = [idx
                                           for idx in self.data[i]['dep_dir']]
            self.data[i]['pos_class'] = [self.pos_tag_vocab['stoi'][w]
                                             for w in self.data[i]['tags']]
            self.data[i]['aspect_len'] = len(self.data[i]['aspect'])

            self.data[i]['dep_rel_ids'] = [self.dep_tag_vocab['stoi'][r]
                                           for r in self.data[i]['predicted_dependencies']]
    

    def convert_features_bert(self, i):
        """
        BERT features.
        convert sentence to feature. 
        """
        cls_token = "[CLS]"
        sep_token = "[SEP]"
        pad_token = 0
        # tokenizer = self.args.tokenizer

        tokens = []
        word_indexer = []
        aspect_tokens = []
        aspect_indexer = []

        for word in self.data[i]['sentence']:
            word_tokens = self.args.tokenizer.tokenize(word)
            token_idx = len(tokens)
            tokens.extend(word_tokens)
            # word_indexer is for indexing after bert, feature back to the length of original length.
            word_indexer.append(token_idx)

        # aspect
        for word in self.data[i]['aspect']:
            word_aspect_tokens = self.args.tokenizer.tokenize(word)
            token_idx = len(aspect_tokens)
            aspect_tokens.extend(word_aspect_tokens)
            aspect_indexer.append(token_idx)

        # The convention in BERT is:
        # (a) For sequence pairs:
        #  tokens:   [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]
        #  type_ids:   0   0  0    0    0     0       0   0   1  1  1  1   1   1
        # (b) For single sequences:
        #  tokens:   [CLS] the dog is hairy . [SEP]
        #  type_ids:   0   0   0   0  0     0   0

        tokens = [cls_token] + tokens + [sep_token]
        aspect_tokens = [cls_token] + aspect_tokens + [sep_token]
        word_indexer = [i+1 for i in word_indexer]
        aspect_indexer = [i+1 for i in aspect_indexer]

        input_ids = self.args.tokenizer.convert_tokens_to_ids(tokens)
        input_aspect_ids = self.args.tokenizer.convert_tokens_to_ids(aspect_tokens)

        # check len of word_indexer equals to len of sentence.
        assert len(word_indexer) == len(self.data[i]['sentence'])
        assert len(aspect_indexer) == len(self.data[i]['aspect'])

        # THE STEP:Zero-pad up to the sequence length, save to collate_fn.

        input_cat_ids = input_ids + input_aspect_ids[1:]
        segment_ids = [0] * len(input_ids) + [1] * len(input_aspect_ids[1:])

        self.data[i]['input_cat_ids'] = input_cat_ids
        self.data[i]['segment_ids'] = segment_ids


def my_collate_pure_bert(batch):
    '''
    Pad sentence and aspect in a batch.
    Sort the sentences based on length.
    Turn all into tensors.
    Process bert feature
    Pure Bert: cat text and aspect, cls to predict.
    Test indexing while at it?
    '''
    # sentence_ids, aspect_ids
    input_cat_ids, segment_ids, dep_tag_ids, pos_class, text_len, aspect_len, sentiment, dep_rel_ids, dep_heads, aspect_positions, dep_dir_ids = zip(
        *batch)  # from Dataset.__getitem__()

    text_len = torch.tensor(text_len)
    aspect_len = torch.tensor(aspect_len)
    sentiment = torch.tensor(sentiment)

    # Pad sequences.
    input_cat_ids = pad_sequence(
        input_cat_ids, batch_first=True, padding_value=0)
    segment_ids = pad_sequence(segment_ids, batch_first=True, padding_value=0)

    aspect_positions = pad_sequence(
        aspect_positions, batch_first=True, padding_value=0)

    dep_tag_ids = pad_sequence(dep_tag_ids, batch_first=True, padding_value=0)
    dep_dir_ids = pad_sequence(dep_dir_ids, batch_first=True, padding_value=0)
    pos_class = pad_sequence(pos_class, batch_first=True, padding_value=0)

    dep_rel_ids = pad_sequence(dep_rel_ids, batch_first=True, padding_value=0)
    dep_heads = pad_sequence(dep_heads, batch_first=True, padding_value=0)

    # Sort all tensors based on text len.
    _, sorted_idx = text_len.sort(descending=True)
    input_cat_ids = input_cat_ids[sorted_idx]
    segment_ids = segment_ids[sorted_idx]
    aspect_positions = aspect_positions[sorted_idx]
    dep_tag_ids = dep_tag_ids[sorted_idx]

    dep_dir_ids = dep_dir_ids[sorted_idx]
    pos_class = pos_class[sorted_idx]
    text_len = text_len[sorted_idx]
    aspect_len = aspect_len[sorted_idx]
    sentiment = sentiment[sorted_idx]
    dep_rel_ids = dep_rel_ids[sorted_idx]
    dep_heads = dep_heads[sorted_idx]

    return input_cat_ids, segment_ids, dep_tag_ids, pos_class, text_len, aspect_len, sentiment, dep_rel_ids, dep_heads, aspect_positions, dep_dir_ids

In [6]:
from transformers import BertModel, BertConfig, BertPreTrainedModel, BertTokenizer


class Pure_Bert(nn.Module):
    '''
    Bert for sequence classification.
    '''

    def __init__(self, args, hidden_size=256):
        super(Pure_Bert, self).__init__()

        config = BertConfig.from_pretrained(args.bert_model_dir)
        self.tokenizer = BertTokenizer.from_pretrained(args.bert_model_dir)
        self.bert = BertModel.from_pretrained(args.bert_model_dir, config=config)

        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        layers = [nn.Linear(config.hidden_size, hidden_size), nn.ReLU(), nn.Linear(hidden_size, args.num_classes)]
        self.classifier = nn.Sequential(*layers)

    def forward(self, input_ids, token_type_ids):
        outputs = self.bert(input_ids, token_type_ids=token_type_ids)
        # pool output is usually *not* a good summary of the semantic content of the input,
        # you're often better with averaging or poolin the sequence of hidden-states for the whole input sequence.
        pooled_output = outputs[1]
        # pooled_output = torch.mean(pooled_output, dim = 1)
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)

        return logits

In [7]:
class Parameters:
    def __init__(self):
        pass 

args = Parameters()
 # Required parameters
args.dataset_name = 'laptop' # 'rest' 'laptop' 'twitter'
args.output_dir = 'data/output-gcn'
args.num_classes = 3
args.cuda_id = '3'
args.seed = 2022

# Model parameters
args.glove_dir = 'glove'
args.num_layers = 2
args.max_hop = 4
args.num_heads = 6
args.dropout = 0
args.num_gcn_layers = 1
args.gc_mem_dim = 300

args.gcn_dropout = 0.2
# GAT
args.gat = True
args.gat_out = True


args.gat_attention_type = 'dotprod' # 'linear' 'dotprod' 'gcn'
args.embedding_type = 'bert' # 'glove' 'bert'
args.embedding_dim = 300
args.dep_relation_embed_dim = 300
args.hidden_size = 300
args.final_hidden_size = 300
args.num_mlps = 2
# Training parameters
args.per_gpu_train_batch_size = 16
args.per_gpu_eval_batch_size = 32
args.gradient_accumulation_steps = 2
args.learning_rate = 1e-3
args.weight_decay = 0.0
args.adam_epsilon = 1e-8
args.max_grad_norm = 1.0
args.num_train_epochs = 30
args.max_steps = -1
args.logging_steps = 50

import pickle

In [19]:
def get_input_from_batch(args, batch):
    inputs = {  'input_ids': batch[0],
                'token_type_ids': batch[1]}
    labels = batch[6]
    return inputs, labels

def train(args, train_dataset, model, eval_dataset):
    '''Train the model'''
    args.train_batch_size = args.per_gpu_train_batch_size
    args.eval_batch_size = args.per_gpu_eval_batch_size

    train_sampler = RandomSampler(train_dataset)
    eval_sampler = SequentialSampler(eval_dataset)

    train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.train_batch_size, collate_fn=my_collate_pure_bert)
    eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size, collate_fn=my_collate_pure_bert)


    parameters = filter(lambda param: param.requires_grad, model.parameters())
    optimizer = torch.optim.Adam(parameters, lr=args.learning_rate)

    # Train
    print("Started Training...")
    print("Num examples = {}".format(len(train_dataset)))
    print("Num Epochs = {}".format(args.num_train_epochs))
    print("Instantaneous batch size per GPU = {}".format(args.per_gpu_train_batch_size))

    all_eval_results = []

    set_seed(args)

    for epoch in range(args.num_train_epochs):
        model.train()
        train_loss = 0.0

        for step, batch in enumerate(train_dataloader):
            model.zero_grad()
            batch = tuple(t.to(args.device) for t in batch)
            inputs, labels = get_input_from_batch(args, batch)
            logit = model(**inputs)
            loss = F.cross_entropy(logit, labels)
            loss.backward()
            train_loss += loss.item()
            optimizer.step()

        model.eval()
        eval_loss = 0.0
        preds = None
        out_label_ids = None
        input_store = None
        
        for step, batch in enumerate(eval_dataloader):
            with torch.no_grad():
                batch = tuple(t.to(args.device) for t in batch)
                inputs, labels = get_input_from_batch(args, batch)
                logits = model(**inputs)
                loss = F.cross_entropy(logits, labels)
                eval_loss += loss.item()
    
                
                if preds is None:
                    inputs_indxs = [inputs['input_ids'].detach().cpu().numpy()]
                    preds = logits.detach().cpu().numpy()
                    out_label_ids = labels.detach().cpu().numpy()
                    #print(len(inputs_indxs), len(logits), len(labels))
                else:
                    current_inputs = inputs['input_ids'].detach().cpu().numpy()
                    inputs_indxs.append(current_inputs)
                    
                    preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                    out_label_ids = np.append(out_label_ids, labels.detach().cpu().numpy(), axis=0)
                    
        
        preds = np.argmax(preds, axis=1)
        result = compute_metrics(preds, out_label_ids)
    
        result['train_loss'] = train_loss/len(train_dataloader)
        result['eval_loss'] = eval_loss/len(eval_dataloader)
        result['epoch'] = epoch

        all_eval_results.append(result)
        print("Epoch: {}; train_loss: {}; eval_loss: {}; eval_acc: {}; eval_f1: {}".format(epoch + 1, np.round(train_loss/len(train_dataloader), 4), np.round(eval_loss/len(eval_dataloader), 4), np.round(result['acc'], 4), np.round(result['f1'], 4)))
    
    
    with open("preds_bert.pkl", 'wb') as f:
        pickle.dump(preds, f, -1) 
        
    
    with open("inputs_ids_bert.pkl", 'wb') as f:
        pickle.dump(inputs_indxs, f, -1) 
        
    with open("out_label_ids_bert.pkl", 'wb') as f:
        pickle.dump(out_label_ids, f, -1) 
            
    return all_eval_results

def compute_metrics(preds, labels):
    acc = accuracy_score(y_true=labels, y_pred=preds)
    f1 = f1_score(y_true=labels, y_pred=preds, average='macro')
    precision = precision_score(y_true=labels, y_pred=preds, average='macro')
    recall = recall_score(y_true=labels, y_pred=preds, average='macro')
    return {
        "acc": acc,
        "f1": f1,
        "precision": precision,
        "recall": recall
    }


In [20]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
args.device = device
print('Device is {}'.format(args.device))
args.bert_model_dir = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(args.bert_model_dir)
args.tokenizer = tokenizer
# Set seed
set_seed(args)
# Load datasets and vocabs
train_dataset, test_dataset, word_vocab, dep_tag_vocab, pos_tag_vocab= load_datasets_and_vocabs(args)
args.dep_tag_vocab_size = len(dep_tag_vocab['stoi'])
#dep_tag_vocab

Device is cuda
Read laptop Train set: 1462
Read laptop Test set: 411
Size of the train dataset: 2313
Size of the test dataset: 638


## Prelim results

| Metrics\Datasets      | Twitter | Restaurant | Laptop     | 
| :---        |    :----:   |  :----:|        ---: |
| Precision      | 0.70       | 0.632 | 0.5950  |
| Recall   | 0.60        | 0.582 | 0.6069     |
| F1   | 0.623        | 0.573 | 0.5919     |
| Accuracy   | 0.669        | 0.7366 | 0.6489     |


In [21]:
model = Pure_Bert(args).to(args.device)
model

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Pure_Bert(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
  

In [22]:
# Train
all_eval_results = train(args, train_dataset, model, test_dataset)
best_eval_result = max(all_eval_results, key=lambda x: x['acc']) 
print("Best Eval result is: ")
print(best_eval_result)

Started Training...
Num examples = 2313
Num Epochs = 30
Instantaneous batch size per GPU = 16


  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 1; train_loss: 1.1411; eval_loss: 1.103; eval_acc: 0.2006; eval_f1: 0.1114


  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 2; train_loss: 1.0606; eval_loss: 1.0526; eval_acc: 0.5345; eval_f1: 0.2322


  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 3; train_loss: 1.0615; eval_loss: 1.0715; eval_acc: 0.5345; eval_f1: 0.2322


  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 4; train_loss: 1.054; eval_loss: 1.0851; eval_acc: 0.5345; eval_f1: 0.2322


  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 5; train_loss: 1.0551; eval_loss: 1.0825; eval_acc: 0.5345; eval_f1: 0.2322


  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 6; train_loss: 1.0545; eval_loss: 1.0794; eval_acc: 0.5345; eval_f1: 0.2322


  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 7; train_loss: 1.0562; eval_loss: 1.0753; eval_acc: 0.5345; eval_f1: 0.2322


  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 8; train_loss: 1.0548; eval_loss: 1.0737; eval_acc: 0.5345; eval_f1: 0.2322


  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 9; train_loss: 1.0554; eval_loss: 1.0725; eval_acc: 0.5345; eval_f1: 0.2322


  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 10; train_loss: 1.0557; eval_loss: 1.0974; eval_acc: 0.5345; eval_f1: 0.2322


  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 11; train_loss: 1.0553; eval_loss: 1.0737; eval_acc: 0.5345; eval_f1: 0.2322


  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 12; train_loss: 1.0561; eval_loss: 1.0517; eval_acc: 0.5345; eval_f1: 0.2322


  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 13; train_loss: 1.0555; eval_loss: 1.076; eval_acc: 0.5345; eval_f1: 0.2322


  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 14; train_loss: 1.056; eval_loss: 1.0825; eval_acc: 0.5345; eval_f1: 0.2322


  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 15; train_loss: 1.056; eval_loss: 1.0781; eval_acc: 0.5345; eval_f1: 0.2322


  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 16; train_loss: 1.0556; eval_loss: 1.0985; eval_acc: 0.5345; eval_f1: 0.2322


  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 17; train_loss: 1.0548; eval_loss: 1.0771; eval_acc: 0.5345; eval_f1: 0.2322


  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 18; train_loss: 1.0543; eval_loss: 1.0811; eval_acc: 0.5345; eval_f1: 0.2322


  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 19; train_loss: 1.0558; eval_loss: 1.0727; eval_acc: 0.5345; eval_f1: 0.2322


  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 20; train_loss: 1.0545; eval_loss: 1.0796; eval_acc: 0.5345; eval_f1: 0.2322


  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 21; train_loss: 1.0547; eval_loss: 1.0698; eval_acc: 0.5345; eval_f1: 0.2322


  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 22; train_loss: 1.0545; eval_loss: 1.0865; eval_acc: 0.5345; eval_f1: 0.2322


  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 23; train_loss: 1.0577; eval_loss: 1.072; eval_acc: 0.5345; eval_f1: 0.2322


KeyboardInterrupt: 

In [None]:
test_sentences = []

In [None]:
with open("preds.pkl", 'rb') as f:
    preds = pickle.load(f)
    
with open("inputs_indxs.pkl", 'rb') as f:
    inputs_indxs = pickle.load(f)

with open("out_label_ids.pkl", 'rb') as f:
    out_label_ids = pickle.load(f)

In [None]:
def i2token(sentence):
    sents = []
    for index in sentence:
        sents.append(word_vocab['itos'][index])
    
    sents = ' '.join(sents)
    return sents

flat_inputs = []

for item in inputs_indxs:
    for subitem in item:
        flat_inputs.append(subitem)
len(flat_inputs)


#sentiments_lookup = {'negative': 0, 'positive': 1, 'neutral': 2}

miss_classifications = []
correct_classifications = []

for i in range(len(flat_inputs)):
    index_to_token = i2token(flat_inputs[i])
    if out_label_ids[i]!=preds[i]:
        miss_classifications.append([index_to_token, out_label_ids[i], preds[i]])
    
    else:
        correct_classifications.append([index_to_token, out_label_ids[i], preds[i]])

In [None]:
miss_classifications

In [None]:
correct_classifications