In [1]:
import collections
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from torchcrf import CRF
from sklearn import preprocessing

import collections
import random
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import pickle


def train_test_split_sentences(X, y, test_size=0.3, random_state=0, shuffle=True) :
    X_by_sentences = []
    y_by_sentences = []
    sentence = []
    sentence_tags = []
    for line, tag in zip(X, y):
        if line == '':
            if len(sentence) != 0:
                sentence.append([sentence[-1][0]+1, '<eos>', '', ''])
                sentence_tags.append('<eos>')
                X_by_sentences.append(sentence)
                y_by_sentences.append(sentence_tags)
                sentence = []
                sentence_tags = []
        else:
            sentence.append(line)
            sentence_tags.append(tag)

    X, X_val, y, y_val = train_test_split(X_by_sentences, y_by_sentences, test_size=test_size, random_state=random_state, shuffle=shuffle)

    X = [y for x in X for y in x]
    X_val = [y for x in X_val for y in x]
    y = [y_1 for x in y for y_1 in x]
    y_val = [y for x in y_val for y in x]

    return X, X_val, y, y_val


def write_data(X_test, y_test, filename):
    #xi_test format: num, word, lem, POS
    with open(filename, 'w', encoding='utf-8') as f:
        is_first = True
        for xi_test, yi_test in zip(X_test, y_test):
            if (xi_test[1] == '<eos>'):# or (yi_test == '<eos>'):
                continue
            if (not is_first) and xi_test[0] == 1:
                f.write('\n')
            yi_test = yi_test if yi_test != '<eos>' else ''
            f.write(f'{xi_test[0]}\t{xi_test[1]}\t-\t{xi_test[3]}\tO\t0\t\t{yi_test}\n')
            is_first = False
        f.write('\n')


def extract_data(data_in):
    X, y = [], []
    for i, line in enumerate(data_in):
        if line != '\n':
            num, word, lem, pos, mwe, _, _, supersenses, _ = line[:-1].split('\t')
            X.append([int(num), word, lem, pos])
            y.append(supersenses)
        else :
            X.append('')
            y.append('')
    return X, y

"""
def transform(X, y, X_val, y_val, max_len=16, batch_size=64):
    #le = preprocessing.LabelEncoder()
    #le.fit(y+y_val)#+["<eos>"])
    #int_labels = le.transform(y)
    #int_labels_val = le.transform(y_val)
    le = collections.defaultdict(lambda: len(le))
    le['<eos>'] = 0
    int_labels = [le[yi] for yi in y]
    int_labels_val = [le[yi] for yi in y_val]
    
    #le_vocab = preprocessing.LabelEncoder()
    #vocab = [text for _, text, _, _ in X]#+["<eos>"]
    #vocab_val = [text for _, text, _, _ in X_val]#+["<eos>"]
    ##int_texts = le_vocab.fit_transform(vocab)
    ##int_texts_val = le_vocab.fit_transform(vocab_val)
    #le_vocab.fit(vocab+vocab_val)
    #int_texts = le_vocab.transform(vocab)
    #int_texts_val = le_vocab.transform(vocab_val)
    le_vocab = collections.defaultdict(lambda: len(le_vocab))
    le_vocab['<eos>'] = 0
    vocab = [text for _, text, _, _ in X]
    vocab_val = [text for _, text, _, _ in X_val]
    int_texts = [le_vocab[word] for word in vocab]
    int_texts_val = [le_vocab[word] for word in vocab_val]
    
    def encode_sentence(int_texts, int_labels):
        temp_x, temp_y, final_y, final_x = [], [], [], []
        #eos_symbol = le_vocab.transform(['<eos>'])
        eos_symbol = le_vocab['<eos>']
        for x, y in zip(int_texts, int_labels) :
            temp_x.append(x)
            temp_y.append(y)
            if x == eos_symbol :
                final_x.append(temp_x)
                final_y.append(temp_y)
                temp_x = []
                temp_y = []
        
        X = torch.zeros(len(final_x), max_len).long()
        Y = torch.zeros(len(final_y), max_len).long()
        for i, (text, label) in enumerate(zip(final_x, final_y)):
            length = min(max_len, len(text))
            X[i,:length] = torch.LongTensor(text[:length])
            Y[i,:length] = torch.LongTensor(label[:length])
        
        print(len(int_texts))
        print(len(final_x))
        print(X.size())
        
        return X, Y

    #print(len(int_texts))
    #print(len(int_texts_val))
    
    X_train, Y_train = encode_sentence(int_texts, int_labels)
    X_valid, Y_valid = encode_sentence(int_texts_val, int_labels_val)
    
    #print(X_valid.size())
    #print(X_train.size())
    
    train_set = TensorDataset(X_train, Y_train)
    valid_set = TensorDataset(X_valid, Y_valid)

    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
    valid_loader = DataLoader(valid_set, batch_size=batch_size)
    
    return vocab, le_vocab, le, train_loader, valid_loader
"""

le_pos = collections.defaultdict(lambda: len(le_pos))
def transform(X, y, X_val, y_val, max_len=16, batch_size=64):
    #le = preprocessing.LabelEncoder()
    #le.fit(y+y_val)#+["<eos>"])
    #int_labels = le.transform(y)
    #int_labels_val = le.transform(y_val)
    le = collections.defaultdict(lambda: len(le))
    le['<eos>'] = 0
    int_labels = [le[yi] for yi in y]
    int_labels_val = [le[yi] for yi in y_val]
    
    #le_vocab = preprocessing.LabelEncoder()
    #vocab = [text for _, text, _, _ in X]#+["<eos>"]
    #vocab_val = [text for _, text, _, _ in X_val]#+["<eos>"]
    ##int_texts = le_vocab.fit_transform(vocab)
    ##int_texts_val = le_vocab.fit_transform(vocab_val)
    #le_vocab.fit(vocab+vocab_val)
    #int_texts = le_vocab.transform(vocab)
    #int_texts_val = le_vocab.transform(vocab_val)
    le_vocab = collections.defaultdict(lambda: len(le_vocab))
    le_vocab['<eos>'] = 0
    #vocab = [text for _, text, _, pos in X]
    vocab = [[text, pos] for _, text, _, pos in X]
    #vocab_val = [text for _, text, _, pos in X_val]
    vocab_val = [[text, pos] for _, text, _, pos in X_val]
    int_texts = [[le_vocab[word], le_pos[pos]] for word, pos in vocab]
    int_texts_val = [[le_vocab[word], le_pos[pos]] for word, pos in vocab_val]
    
    def encode_sentence(int_texts, int_labels):
        temp_x, temp_y, final_y, final_x = [], [], [], []
        #eos_symbol = le_vocab.transform(['<eos>'])
        eos_symbol = le_vocab['<eos>']
        for x, y in zip(int_texts, int_labels) :
            temp_x.append(x)
            temp_y.append(y)
            if x[0] == eos_symbol :
                final_x.append(temp_x)
                final_y.append(temp_y)
                temp_x = []
                temp_y = []
        
        X = torch.zeros(len(final_x), max_len, 2).long()
        Y = torch.zeros(len(final_y), max_len).long()
        for i, (text, label) in enumerate(zip(final_x, final_y)):
            length = min(max_len, len(text))
            X[i,:length] = torch.LongTensor(text[:length])
            Y[i,:length] = torch.LongTensor(label[:length])
        
        print(len(int_texts))
        print(len(final_x))
        print(X.size())
        
        return X, Y

    #print(len(int_texts))
    #print(len(int_texts_val))
    
    X_train, Y_train = encode_sentence(int_texts, int_labels)
    X_valid, Y_valid = encode_sentence(int_texts_val, int_labels_val)
    
    #print(X_valid.size())
    #print(X_train.size())
    
    train_set = TensorDataset(X_train, Y_train)
    valid_set = TensorDataset(X_valid, Y_valid)

    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
    valid_loader = DataLoader(valid_set, batch_size=batch_size)
    
    return vocab, le_vocab, le, train_loader, valid_loader


def load_pretrained_weights(filename, vocab_size=None, le_vocab=None, from_pickle=False, dim=300):
    """
    from_pickle: 'pretrained_weights.pkl'
    from_vec: 'wiki-news-300d-1M.vec'
    """
    if from_pickle:
        with open(filename, 'rb') as fin:
            pretrained_weights = pickle.load(fin)
    else: # from_vec
        pretrained_weights = torch.zeros(vocab_size, dim)
        with open(filename, encoding="utf-8") as fp:
            fp.readline()
            for line in fp:
                tokens = line.strip().split()
                #if tokens[0].lower() in le_vocab.classes_:
                if tokens[0].lower() in le_vocab:
                    pretrained_weights[le_vocab[tokens[0].lower()]] = torch.FloatTensor([float(x) for x in tokens[1:]])
                    #pretrained_weights[le_vocab.transform([tokens[0].lower()])] = torch.FloatTensor([float(x) for x in tokens[1:]])
    return pretrained_weights

In [2]:
from Model import Model

class GRU(nn.Module):
    def __init__(self, pretrained_weights, le, embed_size=300, hidden_size=128):
        super().__init__()
        #self.embed = nn.Embedding(len(le.classes_), embed_size, padding_idx=le.transform(['<eos>'])[0])
        self.embed = nn.Embedding(len(le), embed_size, padding_idx=le['<eos>'])
        self.embed.weight = nn.Parameter(pretrained_weights, requires_grad=False)
        self.rnn = nn.GRU(embed_size, hidden_size, bias=False, num_layers=1, bidirectional=False, batch_first=True)
        self.dropout = nn.Dropout(0.3)
        #self.decision = nn.Linear(hidden_size * 1 * 1, len(le.classes_))
        self.decision = nn.Linear(hidden_size * 1 * 1, len(le))

    def forward(self, x):
        embed = self.embed(x)
        output, hidden = self.rnn(embed)
        return self.decision(self.dropout(output))

class GRU_with_POS(nn.Module):
    def __init__(self, pretrained_weights, le, embed_size=300, hidden_size=128):
        super().__init__()
        #self.embed = nn.Embedding(len(le.classes_), embed_size, padding_idx=le.transform(['<eos>'])[0])
        self.embed = nn.Embedding(len(le), embed_size, padding_idx=le['<eos>'])
        self.embed.weight = nn.Parameter(pretrained_weights, requires_grad=False)
        self.rnn = nn.GRU(embed_size+1, hidden_size, bias=False, num_layers=1, bidirectional=False, batch_first=True)
        self.dropout = nn.Dropout(0.3)
        #self.decision = nn.Linear(hidden_size * 1 * 1, len(le.classes_))
        self.decision = nn.Linear(hidden_size * 1 * 1, len(le))

    def forward(self, x):
        embed = self.embed(x[:, :, 0])
        #print(embed.size())
        #print(x[:, :, 1].view(x.size(0), x.size(1), 1).size())
        #print(torch.cat((x[:, :, 1].view(x.size(0), x.size(1), 1), embed), dim=2).size())
        concat = torch.cat((x[:, :, 1].view(x.size(0), x.size(1), 1), embed), dim=2)
        output, hidden = self.rnn(concat)
        return self.decision(self.dropout(output))


class NN_Model(Model): # Compatible with torch model only
    def __init__(self, model, criterion=nn.CrossEntropyLoss(), optim=optim.Adam):
        super().__init__()
        self.model = model
        self.criterion = criterion
        self.optim = optim

    def fit(self, train_loader, valid_loader, epochs):
        optimizer = self.optim(filter(lambda param: param.requires_grad, self.model.parameters()))
        for epoch in range(epochs):
            self.model.train()
            total_loss = num = 0
            for x, y in train_loader:
                optimizer.zero_grad()
                y_scores = self.model(x)
                loss = self.criterion(y_scores.view(y.size(0) * y.size(1), -1), y.view(y.size(0) * y.size(1)))
                loss.backward()
                optimizer.step()
                total_loss += loss.item()
                num += len(y)
            print(epoch+1, total_loss / num, *self.perf(valid_loader))
    
    def perf(self, loader):
        self.model.eval()
        total_loss = correct = num_loss = num_perf = 0
        for x, y in loader:
            with torch.no_grad():
                y_scores = self.model(x)
                loss = self.criterion(y_scores.view(y.size(0) * y.size(1), -1), y.view(y.size(0) * y.size(1)))
                y_pred = torch.max(y_scores, 2)[1]
                mask = (y != 0)
                correct += torch.sum((y_pred.data == y) * mask)
                total_loss += loss.item()
                num_loss += len(y)
                num_perf += torch.sum(mask).item()
        return total_loss / num_loss, correct.item() / num_perf

    def predict(self, X_test):
        self.model.eval()
        return torch.max(self.model(X_test), 2)[1]

    def score(self, X_test, y_test, exception_class): # TODO
        #return super().score(X_test, y_test, exception_class)
        """
        Returns:
            precision = #(correct)/#(predicted)
            recall = #(correct)/#(gold)
            F1_score = 2*precision*recall / (precision + recall)
            accuracy = #(correct: supersense or no-supersense)/#(tokens)
        """

        y_hat = self.predict(X_test)
        
        y_test = y_test.view((-1,))
        y_hat = y_hat.view((-1,))
        print(len(y_test))

        nb_predicted, nb_gold, nb_tokens = 0, 0, len(y_test)
        nb_correct, accuracy = 0, 0
        for yi_hat, yi in zip(y_hat, y_test):
            if yi_hat == yi:
                accuracy += 1
            if yi_hat != exception_class:
                nb_predicted += 1
            if yi != exception_class:
                nb_gold += 1
            if (yi != exception_class) and (yi == yi_hat):
                nb_correct += 1

        precision = nb_correct/nb_predicted if nb_predicted != 0 else 0.
        recall = nb_correct/nb_gold if nb_gold != 0 else 0.
        f1_score = 2*precision*recall / (precision + recall) if (precision + recall) != 0 else 0.
        accuracy = accuracy/nb_tokens
        return precision, recall, f1_score, accuracy

In [3]:
data = open('../dimsum-data-1.5/dimsum16.train', 'r').readlines()
data_test = open('../dimsum-data-1.5/dimsum16.test.blind', 'r', encoding='utf-8').readlines()

X, y = extract_data(data)
X, X_val, y, y_val = train_test_split_sentences(X, y)
write_data(X_val, y_val, 'val.gold')

In [4]:
vocab, le_vocab, le, train_loader, valid_loader = transform(X, y, X_val, y_val, max_len=16, batch_size=64)

55050
3359
torch.Size([3359, 16, 2])
23575
1440
torch.Size([1440, 16, 2])


In [5]:
import pickle

#pretrained_weights = load_pretrained_weights('wiki-news-300d-1M.vec', vocab_size=len(vocab), le_vocab=le_vocab)
#with open(f"pretrained_weights_2.pkl", 'wb') as fo:
#    pickle.dump(pretrained_weights, fo)

pretrained_weights = load_pretrained_weights('pretrained_weights_2.pkl', from_pickle=True)

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

gru_model = NN_Model(model=GRU_with_POS(pretrained_weights, le_vocab, embed_size=300, hidden_size=128),
                     criterion=nn.CrossEntropyLoss(),
                     optim=optim.Adam
                    )

gru_model.fit(train_loader, valid_loader, epochs=25)

1 0.07108283731404356 0.031353670441442064 0.6704186766720267
2 0.028384675969677194 0.024640491273668078 0.6636293252337562
3 0.023536625026272465 0.021717457721630733 0.6658328866654756
4 0.021074600573202158 0.019613308128383425 0.6721457923887797
5 0.019119259004402672 0.017932635959651735 0.6735751295336787
6 0.017760595472556703 0.016742323463161787 0.6756595795366566
7 0.016750457972492198 0.01588693434993426 0.6773866952534096
8 0.01593866599177491 0.015167261246177885 0.6886427252694896
9 0.015287824154751896 0.014554420196347766 0.6991840867131202
10 0.014800362925545095 0.01403190435634719 0.7028169852897386
11 0.01430757218224905 0.013589147478342056 0.7109761181585373
12 0.0139155463648538 0.013227879794107543 0.7181823595974034
13 0.013579118443443932 0.012910589368806945 0.7286641653266631
14 0.013274554874116943 0.012686140131619242 0.7348579596212257
15 0.013053311393813882 0.012373704090714454 0.7412899767732714
16 0.012773082099453471 0.012144850608375337 0.744148651

In [7]:
"""
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

gru_model = NN_Model(model=GRU(pretrained_weights, le_vocab, embed_size=300, hidden_size=128),
                     criterion=nn.CrossEntropyLoss(),
                     optim=optim.Adam
                    )

gru_model.fit(train_loader, valid_loader, epochs=25)
"""

"\ndevice = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n\ngru_model = NN_Model(model=GRU(pretrained_weights, le_vocab, embed_size=300, hidden_size=128),\n                     criterion=nn.CrossEntropyLoss(),\n                     optim=optim.Adam\n                    )\n\ngru_model.fit(train_loader, valid_loader, epochs=25)\n"

In [8]:
X[:30]

[[1, 'Men', 'men', 'NOUN'],
 [2, 's', 's', 'PART'],
 [3, 'and', 'and', 'CONJ'],
 [4, 'Boys', 'boy', 'NOUN'],
 [5, 'Barbers', 'barber', 'NOUN'],
 [6, ',', ',', 'PUNCT'],
 [7, 'on', 'on', 'ADP'],
 [8, 'the', 'the', 'DET'],
 [9, 'number', 'number', 'NOUN'],
 [10, '9', '9', 'NUM'],
 [11, 'Bus', 'bus', 'NOUN'],
 [12, 'route', 'route', 'NOUN'],
 [13, '.', '.', 'PUNCT'],
 [14, '<eos>', '', ''],
 [1, 'I', 'i', 'PRON'],
 [2, 'was', 'be', 'AUX'],
 [3, 'told', 'tell', 'VERB'],
 [4, 'management', 'management', 'NOUN'],
 [5, 'would', 'would', 'AUX'],
 [6, 'call', 'call', 'VERB'],
 [7, 'me', 'me', 'PRON'],
 [8, 'back', 'back', 'ADP'],
 [9, 'but', 'but', 'CONJ'],
 [10, 'still', 'still', 'ADV'],
 [11, 'waiting', 'wait', 'VERB'],
 [12, 'for', 'for', 'ADP'],
 [13, 'that', 'that', 'DET'],
 [14, 'call', 'call', 'NOUN'],
 [15, '.', '.', 'PUNCT'],
 [16, '<eos>', '', '']]

In [9]:
def transform_test(le_vocab, le, X, y=None, max_len=16):
    vocab = [[text, pos] for _, text, _, pos in X]
    #int_texts = le_vocab.transform(vocab)
    #int_labels = le.transform(y)
    int_texts = [[le_vocab[word], le_pos[pos]] for word, pos in vocab]
    int_labels = [le[yi] for yi in y]
    
    def encode_sentence(int_texts, int_labels):
        temp_x, temp_y, final_y, final_x = [], [], [], []
        #eos_symbol = le_vocab.transform(['<eos>'])
        eos_symbol = le_vocab['<eos>']
        for x, y in zip(int_texts, int_labels) :
            temp_x.append(x)
            temp_y.append(y)
            if x[0] == eos_symbol :
                final_x.append(temp_x)
                final_y.append(temp_y)
                temp_x = []
                temp_y = []
        
        X = torch.zeros(len(final_x), max_len, 2).long()
        Y = torch.zeros(len(final_y), max_len).long()
        for i, (text, label) in enumerate(zip(final_x, final_y)):
            length = min(max_len, len(text))
            X[i,:length] = torch.LongTensor(text[:length])
            Y[i,:length] = torch.LongTensor(label[:length])
        
        #print(len(int_texts))
        #print(len(final_x))
        #print(X.size())
        
        return X, Y

    #print(len(int_texts))
    #print(len(int_texts_val))
    
    X_test, Y_test = encode_sentence(int_texts, int_labels)
    
    return X_test, Y_test


"""
def transform_test(le_vocab, le, X, y=None, max_len=16):
    vocab = [text for _, text, _, _ in X]
    #int_texts = le_vocab.transform(vocab)
    #int_labels = le.transform(y)
    int_texts = [le_vocab[word] for word in vocab]
    int_labels = [le[yi] for yi in y]
    
    
    def encode_sentence(int_texts, int_labels):
        temp_x, temp_y, final_y, final_x = [], [], [], []
        #eos_symbol = le_vocab.transform(['<eos>'])
        eos_symbol = le_vocab['<eos>']
        for x, y in zip(int_texts, int_labels) :
            temp_x.append(x)
            temp_y.append(y)
            if x == eos_symbol :
                final_x.append(temp_x)
                final_y.append(temp_y)
                temp_x = []
                temp_y = []
        
        X = torch.zeros(len(final_x), max_len).long()
        Y = torch.zeros(len(final_y), max_len).long()
        for i, (text, label) in enumerate(zip(final_x, final_y)):
            length = min(max_len, len(text))
            X[i,:length] = torch.LongTensor(text[:length])
            Y[i,:length] = torch.LongTensor(label[:length])
        
        #print(len(int_texts))
        #print(len(final_x))
        #print(X.size())
        
        return X, Y

    #print(len(int_texts))
    #print(len(int_texts_val))
    
    X_test, Y_test = encode_sentence(int_texts, int_labels)
    
    return X_test, Y_test
"""

def align_pred(X_orig, y_pred, le):
    y_trf_pred = []
    i_y = 0
    i = 0
    max_i = y_pred.size(1)
    is_first = True
    
    for xi in X_orig:
        if (not is_first) and xi[0] == 1:
            i_y += 1
            i = 0
        
        if i >= max_i:
            next_pred = le['']
        else:
            next_pred = y_pred[i_y][i].item()
        
        y_trf_pred.append( next_pred )
        i += 1
        is_first = False
    return y_trf_pred

In [10]:
X_test, y_test = transform_test(le_vocab, le, X[:30], y[:30])
#print(X_test.size())
gru_model.predict(X_test)

tensor([[ 1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  4,  2,  0,  0,  0],
        [ 2, 15,  6,  1,  2,  6,  2,  2,  2,  2, 19,  2,  2,  2,  2,  0]])

In [11]:
print(X_test)
print(y[:10])
print(y_test)

tensor([[[ 1,  0],
         [ 2,  1],
         [ 3,  2],
         [ 4,  0],
         [ 5,  0],
         [ 6,  3],
         [ 7,  4],
         [ 8,  5],
         [ 9,  0],
         [10,  6],
         [11,  0],
         [12,  0],
         [13,  3],
         [ 0,  7],
         [ 0,  0],
         [ 0,  0]],

        [[14,  8],
         [15,  9],
         [16, 10],
         [17,  0],
         [18,  9],
         [19, 10],
         [20,  8],
         [21,  4],
         [22,  2],
         [23, 11],
         [24, 10],
         [25,  4],
         [26,  5],
         [19,  0],
         [13,  3],
         [ 0,  7]]])
['n.group', '', '', '', '', '', '', '', 'n.communication', '']
tensor([[1, 2, 2, 2, 2, 2, 2, 2, 3, 2, 4, 5, 2, 0, 0, 0],
        [2, 2, 6, 1, 2, 6, 2, 2, 2, 2, 7, 2, 2, 8, 2, 0]])


In [12]:
#gru_model.score(X_test, y_test, le.transform(['<eos>'])[0])
#gru_model.score(X_test, y_test, le.transform([''])[0])
gru_model.score(X_test, y_test, le[''])

32


(0.7272727272727273, 0.6153846153846154, 0.6666666666666667, 0.8125)

In [13]:
rev_le = {v: k for k, v in le.items()}
rev_le[0]

'<eos>'

In [14]:
X_val_enc, y_val_enc = transform_test(le_vocab, le, X_val, y_val)
rev_le = {v: k for k, v in le.items()}

y_hat_val = gru_model.predict(X_val_enc)
y_hat_val_align = align_pred(X_val, y_hat_val, le)
y_hat_val_align = [rev_le[yi] for yi in y_hat_val_align] # Decode
write_data(X_val, y_hat_val_align, 'val.pred')

In [15]:
print(len(X_val))
y_hat_val.size()

23575


torch.Size([1440, 16])

In [16]:
!python ../dimsum-data-1.5/scripts/dimsumeval.py val.gold val.pred

[40m[97m23
23
23
TEST 23 [('They', 'O', 'O'), ('might', 'O', 'O'), ('want', 'O', 'O'), ('to', 'O', 'O'), ('change', 'O', 'O'), ('the', 'O', 'O'), ('name', 'O', 'O'), ('to', 'O', 'O'), ('reflect', 'O', 'O'), ('the', 'O', 'O'), ('new', 'O', 'O'), ('yorkedness', 'O', 'O'), ('of', 'O', 'O'), ('the', 'O', 'O'), ('pizza', 'O', 'O'), (',', 'O', 'O'), ('scrummy', 'O', 'O'), ('yummy', 'O', 'O'), ('sounds', 'O', 'O'), ('gimmicky', 'O', 'O'), ('to', 'O', 'O'), ('me', 'O', 'O'), ('.', 'O', 'O')]
33
33
33
TEST 33 [('@_liinds', 'O', 'O'), ('yehhh', 'O', 'O'), ('!', 'O', 'O'), ('i', 'O', 'O'), ('thinkk', 'O', 'O'), ('i', 'O', 'O'), ('got', 'O', 'O'), ('it', 'O', 'O'), ('(', 'O', 'O'), (':', 'O', 'O'), ('the', 'O', 'O'), ('guy', 'O', 'O'), ('told', 'O', 'O'), ('me', 'O', 'O'), ("'ll", 'O', 'O'), ('call', 'O', 'O'), ('me', 'O', 'O'), ('when', 'O', 'O'), ('he', 'O', 'O'), ('gets', 'O', 'O'), ('my', 'O', 'O'), ('application', 'O', 'O'), ('to', 'O', 'O'), ('the', 'O', 'O'), ('person', 'O', 'O'), ('!', '

17
TEST 17 [('Actually', 'O', 'O'), ('working', 'O', 'O'), ('out', 'O', 'O'), ('there', 'O', 'O'), ('was', 'O', 'O'), ('good', 'O', 'O'), ('-', 'O', 'O'), ('the', 'O', 'O'), ('machines', 'O', 'O'), ('are', 'O', 'O'), ('nice', 'O', 'O'), ('and', 'O', 'O'), ('the', 'O', 'O'), ('classes', 'O', 'O'), ('are', 'O', 'O'), ('fun', 'O', 'O'), ('.', 'O', 'O')]
12
12
12
TEST 12 [('He', 'O', 'O'), ('responded', 'O', 'O'), ('"', 'O', 'O'), ('we', 'O', 'O'), ('have', 'O', 'O'), ('problem', 'O', 'O'), ('with', 'O', 'O'), ("'", 'O', 'O'), ('people', 'O', 'O'), ("'", 'O', 'O'), ('"', 'O', 'O'), ('.', 'O', 'O')]
22
22
22
TEST 22 [('There', 'O', 'O'), ('are', 'O', 'O'), ('some', 'O', 'O'), ('constraints', 'O', 'O'), ('as', 'O', 'O'), ('to', 'O', 'O'), ('how', 'O', 'O'), ('high', 'O', 'O'), ('or', 'O', 'O'), ('how', 'O', 'O'), ('low', 'O', 'O'), ('you', 'O', 'O'), ('can', 'O', 'O'), ('set', 'O', 'O'), ('your', 'O', 'O'), ('price', 'O', 'O'), ('when', 'O', 'O'), ('posting', 'O', 'O'), ('your', 'O', 'O'), (

18
18
18
TEST 18 [('RT', 'O', 'O'), ('@Daigetsbusi', 'O', 'O'), (':', 'O', 'O'), ('After', 'O', 'O'), ('@Rob_Manuchi', 'O', 'O'), ('Bday', 'O', 'O'), ('Bash', 'O', 'O'), ('I', 'O', 'O'), ("'m", 'O', 'O'), ('not', 'O', 'O'), ('doing', 'O', 'O'), ('nothing', 'O', 'O'), ('Else', 'O', 'O'), ('if', 'O', 'O'), ('its', 'O', 'O'), ('local', 'O', 'O'), ('...', 'O', 'O'), ('Nope', 'O', 'O')]
32
32
32
TEST 32 [('In', 'O', 'O'), ('today', 'O', 'O'), ("'s", 'O', 'O'), ('instant', 'O', 'O'), ('world', 'O', 'O'), (',', 'O', 'O'), ('there', 'O', 'O'), ("'s", 'O', 'O'), ('no', 'O', 'O'), ('reason', 'O', 'O'), ('for', 'O', 'O'), ('the', 'O', 'O'), ('instructor', 'O', 'O'), ('not', 'O', 'O'), ('to', 'O', 'O'), ('even', 'O', 'O'), ('have', 'O', 'O'), ('given', 'O', 'O'), ('us', 'O', 'O'), ('a', 'O', 'O'), ('phone', 'O', 'O'), ('call', 'O', 'O'), ('or', 'O', 'O'), ('e-mail', 'O', 'O'), ('if', 'O', 'O'), ('she', 'O', 'O'), ('was', 'O', 'O'), ('going', 'O', 'O'), ('to', 'O', 'O'), ('be', 'O', 'O'), ('late', 

TEST 3 [('They', 'O', 'O'), ('refused', 'O', 'O'), ('.', 'O', 'O')]
4
4
4
TEST 4 [('Who', 'O', 'O'), ('does', 'O', 'O'), ('that', 'O', 'O'), ('?!', 'O', 'O')]
2
2
2
TEST 2 [('Feel', 'O', 'O'), ('good', 'O', 'O')]
15
15
15
TEST 15 [('It', 'O', 'O'), ('also', 'O', 'O'), ('came', 'O', 'O'), ('with', 'O', 'O'), ('free', 'O', 'O'), ('balance', 'O', 'O'), ('and', 'O', 'O'), ('rotation', 'O', 'O'), ('for', 'O', 'O'), ('the', 'O', 'O'), ('life', 'O', 'O'), ('of', 'O', 'O'), ('the', 'O', 'O'), ('tires', 'O', 'O'), ('!', 'O', 'O')]
9
9
9
TEST 9 [('We', 'O', 'O'), ('signed', 'O', 'O'), ('our', 'O', 'O'), ('name', 'O', 'O'), ('in', 'O', 'O'), ('about', 'O', 'O'), ('6:00', 'O', 'O'), ('pm', 'O', 'O'), ('.', 'O', 'O')]
17
17
17
TEST 17 [('National', 'O', 'O'), ('Dance', 'O', 'O'), ('Day', 'O', 'O'), ('NUMBER', 'O', 'O'), ('Instructional', 'O', 'O'), ('Master', 'O', 'O'), ('Class', 'O', 'O'), ('-', 'O', 'O'), ('Mirrored', 'O', 'O'), ('and', 'O', 'O'), ('Slow', 'O', 'O'), ('*', 'O', 'O'), ('...', 'O',

45
TEST 45 [('He', 'O', 'O'), ('preceded', 'O', 'O'), ('to', 'O', 'O'), ('grab', 'O', 'O'), ('the', 'O', 'O'), ('slice', 'O', 'O'), ('off', 'O', 'O'), ('the', 'O', 'O'), ('countertop', 'O', 'O'), ('and', 'O', 'O'), ('throw', 'O', 'O'), ('it', 'O', 'O'), ('into', 'O', 'O'), ('the', 'O', 'O'), ('trash', 'O', 'O'), ('while', 'O', 'O'), ('yelling', 'O', 'O'), ('at', 'O', 'O'), ('me', 'O', 'O'), ('saying', 'O', 'O'), (',', 'O', 'O'), ('"', 'O', 'O'), ('you', 'O', 'O'), ('do', 'O', 'O'), ('not', 'O', 'O'), ('order', 'O', 'O'), ('what', 'O', 'O'), ('you', 'O', 'O'), ('do', 'O', 'O'), ('not', 'O', 'O'), ('know', 'O', 'O'), ('about', 'O', 'O'), ('"', 'O', 'O'), ('and', 'O', 'O'), ('"', 'O', 'O'), ('you', 'O', 'O'), ('do', 'O', 'O'), ("n't", 'O', 'O'), ('know', 'O', 'O'), ('how', 'O', 'O'), ('pizza', 'O', 'O'), ('is', 'O', 'O'), ('made', 'O', 'O'), ('"', 'O', 'O'), ('.', 'O', 'O')]
4
4
4
TEST 4 [('Extremely', 'O', 'O'), ('bad', 'O', 'O'), ('customer', 'O', 'O'), ('service', 'O', 'O')]
4
4
4
TEST

TEST 16 [('Plus', 'O', 'O'), ('they', 'O', 'O'), ('will', 'O', 'O'), ('overcharge', 'O', 'O'), ('you', 'O', 'O'), ('for', 'O', 'O'), ('just', 'O', 'O'), ('about', 'O', 'O'), ('everything', 'O', 'O'), (',', 'O', 'O'), ('and', 'O', 'O'), ('smile', 'O', 'O'), ('while', 'O', 'O'), ('doing', 'O', 'O'), ('it', 'O', 'O'), ('.', 'O', 'O')]
5
5
5
TEST 5 [('Best', 'O', 'O'), ('meat', 'O', 'O'), ('pies', 'O', 'O'), ('in', 'O', 'O'), ('Canada', 'O', 'O')]
31
31
31
TEST 31 [('FUCK', 'O', 'O'), ('3D', 'O', 'O'), ('!!!!', 'O', 'O'), ('They', 'O', 'O'), ('going', 'O', 'O'), ('to', 'O', 'O'), ('remake', 'O', 'O'), ('every', 'O', 'O'), ('single', 'O', 'O'), ('one', 'O', 'O'), ('of', 'O', 'O'), ('the', 'O', 'O'), ('starwars', 'O', 'O'), ('movies', 'O', 'O'), ('in', 'O', 'O'), ('3D', 'O', 'O'), ('and', 'O', 'O'), ('show', 'O', 'O'), ('new', 'O', 'O'), ('one', 'O', 'O'), ('every', 'O', 'O'), ('year', 'O', 'O'), ('from', 'O', 'O'), ('2012', 'O', 'O'), ('till', 'O', 'O'), ('2017', 'O', 'O'), ('.', 'O', 'O'),