# POS Tagging using Logistic Regression

## Imports and Initialisations

In [13]:
from collections import Counter
from copy import deepcopy
from math import exp
import matplotlib.pyplot as plt
import numpy as np
import nltk
import os
import pickle

nltk.download('brown')
nltk.download('universal_tagset')

from nltk.corpus import brown

[nltk_data] Downloading package brown to /Users/sounak/nltk_data...
[nltk_data]   Package brown is already up-to-date!
[nltk_data] Downloading package universal_tagset to
[nltk_data]     /Users/sounak/nltk_data...
[nltk_data]   Package universal_tagset is already up-to-date!


In [6]:
sents = brown.tagged_sents(tagset='universal')
_N = int(len(sents) * (8 / 10))
sents_train = sents[:_N]
sents_test = sents[_N:]

In [7]:
sents[0]

[('The', 'DET'),
 ('Fulton', 'NOUN'),
 ('County', 'NOUN'),
 ('Grand', 'ADJ'),
 ('Jury', 'NOUN'),
 ('said', 'VERB'),
 ('Friday', 'NOUN'),
 ('an', 'DET'),
 ('investigation', 'NOUN'),
 ('of', 'ADP'),
 ("Atlanta's", 'NOUN'),
 ('recent', 'ADJ'),
 ('primary', 'NOUN'),
 ('election', 'NOUN'),
 ('produced', 'VERB'),
 ('``', '.'),
 ('no', 'DET'),
 ('evidence', 'NOUN'),
 ("''", '.'),
 ('that', 'ADP'),
 ('any', 'DET'),
 ('irregularities', 'NOUN'),
 ('took', 'VERB'),
 ('place', 'NOUN'),
 ('.', '.')]

## One vs Many Logistic Regression

In [39]:
def save_obj(obj, name):
    if 'obj' not in os.listdir():
        os.mkdir('obj')
    with open('obj/'+ name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name):
    try:
        with open('obj/' + name + '.pkl', 'rb') as f:
            return pickle.load(f)
    except:
        return None

word_vecs = load_obj('word_vecs')

if not word_vecs:
    GLOVE_PATH = "/Users/sounak/Documents/clg/nlp/nlp-projects/data/glove.6B.300d.txt"
    f = open(GLOVE_PATH, 'r')
    word_vecs = {}
    for line in f.readlines():
        t = line.split(' ')
        word_vecs[t[0]] = np.array([float(_) for _ in t[1:]])
    f.close()
    save_obj(word_vecs, 'word_vecs')
    
print('word_vecs have been loaded')

word_vecs have been loaded


In [29]:
class Multiclass_LR:
    def _ascii(self, s):
        return int(''.join(str(ord(c)) for c in s))
        
    def tag_sent(self, sent, tags):
        sent_tagged = []
#         feature_extractors = {
#             'is_upper_first_letter': lambda sent, i: sent[i][0].isupper(),
#             'is_upper_all': lambda sent, i: sent[i].isupper(),
#             'is_hyphenated': lambda sent, i: '-' in sent[i],
#             'is_punctuation': lambda sent, i: sent[i] in self.puncs,
#             'previous_tag': lambda sent, i: self.tags.index(tags[i-1]) if i != 0 else -1,
#             'current_tag': lambda sent, i: self.tags.index(tags[i]),
# #             sent_tagged[i-1][1]
# #             '1_prefix': lambda sent, i: self._ascii(sent[i][0]) if len(sent[i]) >= 1 else 0,
# #             '2_prefix': lambda sent, i: self._ascii(sent[i][1]) if len(sent[i]) >= 2 else 0,
# #             '1_postfix': lambda sent, i: self._ascii(sent[i][-1]) if len(sent[i]) >= 1 else 0,
# #             '2_postfix': lambda sent, i: self._ascii(sent[i][-2]) if len(sent[i]) >= 2 else 0,
#         }
        for i, word in enumerate(sent):
#             x = np.array([int(ex(sent, i)) for k, ex in feature_extractors.items()])
            try:
                x = word_vecs[word.lower()]
            except KeyError:
                x = word_vecs['unk']
            max_tag = max([(word, tag) for tag in self.tags], key=lambda k: np.dot(np.append(x, 1), self.W[k[1]]))
            sent_tagged.append(max_tag)
        return sent_tagged
        
    def train_model(self):
        for tag in self.tags:
            print('Training for tag', tag)
            self.W[tag] = self.logistic_regression(tag)
    
    def sigmoid(self, a):
        return 1.0 / (1 + np.exp(-a))
        
    def logistic_regression(self, tag, num_steps=300000, learning_rate=5e-5, add_intercept=True):
        X = self.X[tag]
        Y = self.Y[tag]
        if add_intercept:
            intercept = np.ones((X.shape[0], 1))
            X = np.hstack((X, intercept))
            
        w = np.zeros(X.shape[1])
        for step in range(num_steps):
            scores = np.dot(X, w)
            predictions = self.sigmoid(scores)
            output_error_signal = Y - predictions
            gradient = np.dot(X.T, output_error_signal)
            w += learning_rate * gradient
        return w
        
    def loss(self, w, X, Y):
        w = np.array(w).reshape(-1, 1)
        sum = 0
        for i in range(Y.shape[0]):
            x = X[i].reshape(-1, 1)
            wTx = np.matmul(w.T, x)
            sum += Y[i] * wTx - np.log(1 + np.exp(wTx))
        return sum.reshape(())
        
    def get_feature_set(self):
        self.puncs = {'!', "'", "''", '(', ')', ',', '--', '.', ':', ';', '?', '[', ']', '``'}
        self.feature_extractors = {
            'is_upper_first_letter': lambda sent, i: sent[i][0][0].isupper(),
            'is_upper_all': lambda sent, i: sent[i][0].isupper(),
            'is_hyphenated': lambda sent, i: '-' in sent[i][0],
            'is_punctuation': lambda sent, i: sent[i][0] in self.puncs,
            'previous_tag': lambda sent, i: self.tags.index(sent[i-1][1]) if i != 0 else -1,
            'current_tag': lambda sent, i: self.tags.index(sent[i][1]),
#             '1_prefix': lambda sent, i: self._ascii(sent[i][0][0]) if len(sent[i][0]) >= 1 else 0,
#             '2_prefix': lambda sent, i: self._ascii(sent[i][0][1]) if len(sent[i][0]) >= 2 else 0,
#             '1_postfix': lambda sent, i: self._ascii(sent[i][0][-1]) if len(sent[i][0]) >= 1 else 0,
#             '2_postfix': lambda sent, i: self._ascii(sent[i][0][-2]) if len(sent[i][0]) >= 2 else 0,
        }
        
        for t in range(len(self.tags)):
            self.X[self.tags[t]] = []
            self.Y[self.tags[t]] = []
            for sent in self.sents:
                for i, (word, tag) in enumerate(sent):
#                     x = [int(ex(sent, i)) for k, ex in self.feature_extractors.items()]
                    try:
                        x = word_vecs[word.lower()]
                    except KeyError:
                        x = word_vecs['unk']
                    self.X[self.tags[t]].append(x)
                    self.Y[self.tags[t]].append(1 if tag == self.tags[t] else 0)
            self.X[self.tags[t]] = np.array(self.X[self.tags[t]])
            self.Y[self.tags[t]] = np.array(self.Y[self.tags[t]])

    def __init__(self, tagged_sents):
        self.sents = tagged_sents
        self.tags = list(set([tag for sent in self.sents for word, tag in sent]))
        self.X = {}
        self.Y = {}
        self.W = {}
        self.get_feature_set()
        self.train_model()
        pass

In [42]:
T = Multiclass_LR(sents_train[:100])

Training for tag DET
Training for tag ADP
Training for tag ADV
Training for tag NOUN
Training for tag ADJ
Training for tag CONJ
Training for tag PRT
Training for tag NUM
Training for tag PRON
Training for tag VERB
Training for tag .


In [45]:
score = 0
total_count = 0
for i, sent in enumerate(sents_test[:1000]):
    _sent = [word for word, tag in sent]
    _tags = [tag for word, tag in sent]
    res = T.tag_sent(_sent, _tags)
    print(res)
    N = len(sent)
    total_count += N
    for i in range(N):
        score += 1 if res[i][1] == sent[i][1] else 0
            
accuracy = (score / total_count) * 100
print('Accuracy', accuracy)

[('The', 'DET'), ('quarrel', 'DET'), ('ended', 'NOUN'), ('in', 'ADP'), ('a', 'DET'), ('ridiculous', 'NOUN'), ('draw', 'VERB'), (',', '.'), ('but', 'CONJ'), ('I', 'PRON'), ('must', 'VERB'), ('tell', 'NOUN'), ('you', 'PRON'), ('about', 'ADP'), ('it', 'PRON'), ('.', '.')]
[('Oh', '.'), (',', '.'), ('yes', '.'), (',', '.'), ("I'm", 'NOUN'), ('quite', 'ADV'), ('sure', 'VERB'), ("it's", 'NOUN'), ('important', 'DET'), (',', '.'), ('because', 'NOUN'), ('of', 'ADP'), ('the', 'DET'), ('Beech', 'ADV'), ('Pasture', 'DET'), ('.', '.')]
[("What's", 'NOUN'), ('that', 'ADP'), ('?', '.'), ('?', '.')]
[('Why', 'NOUN'), (',', '.'), ("that's", 'NOUN'), ('what', 'DET'), ('gave', 'VERB'), ('me', 'PRON'), ('the', 'DET'), ('feeling', 'NOUN'), (',', '.'), ('gave', 'VERB'), ('me', 'PRON'), ('as-it-were', 'NOUN'), ('the', 'DET'), ('spirit', 'VERB'), (',', '.'), ('the', 'DET'), ('demoniac', 'NOUN'), (',', '.'), ('evil', 'DET'), ('spirit', 'VERB'), ('of', 'ADP'), ('this', 'DET'), ('whole', 'NOUN'), ('affair', 'NOU

[('One', 'NUM'), ('month', 'NOUN'), ('ago', 'VERB'), (',', '.'), ('on', 'ADP'), ('the', 'DET'), ('20th', 'ADJ'), ('of', 'ADP'), ('October', 'VERB'), (',', '.'), ('was', 'VERB'), ('the', 'DET'), ('opening', 'VERB'), ('of', 'ADP'), ('the', 'DET'), ('gunning', 'VERB'), ('season', 'ADV'), ('in', 'ADP'), ('Massachusetts', 'ADJ'), ('.', '.')]
[('Not', 'ADV'), ('much', 'VERB'), ('to', 'PRT'), ('shoot', 'PRON'), (',', '.'), ('but', 'CONJ'), ('there', 'PRT'), ('are', 'VERB'), ('a', 'DET'), ('few', 'ADJ'), ('pheasant', 'CONJ'), ('.', '.')]
[('Rabbits', 'ADV'), (',', '.'), ('too', 'ADV'), (',', '.'), ('if', 'VERB'), ('you', 'PRON'), ('care', 'NOUN'), ('for', 'ADP'), ('them', 'PRON'), (',', '.'), ('which', 'DET'), ('most', 'ADV'), ('of', 'ADP'), ('the', 'DET'), ('folk', 'ADJ'), ('around', 'ADP'), ('here', 'VERB'), ("haven't", 'NOUN'), ('the', 'DET'), ('sense', 'DET'), ('to', 'PRT'), ('appreciate', 'VERB'), ('.', '.')]
[('Any', 'DET'), ('more', 'ADJ'), ('than', 'ADP'), ('they', 'PRON'), ('have', 'V

[('And', 'CONJ'), ('everybody', 'VERB'), ('has', 'VERB'), ('some', 'DET'), ('kind', 'NOUN'), ('of', 'ADP'), ('grudge', 'NOUN'), ('.', '.')]
[('I', 'PRON'), ('might', 'VERB'), ('have', 'VERB'), ('got', 'VERB'), ('hit', 'ADV'), ('by', 'ADP'), ('that', 'ADP'), ('truck', 'NOUN'), ('if', 'VERB'), ('it', 'PRON'), ("wasn't", 'NOUN'), ('for', 'ADP'), ('you', 'PRON'), ('.', '.')]
[('I', 'PRON'), ('believe', 'VERB'), ('in', 'ADP'), ('returning', 'ADV'), ('favors', 'VERB'), ('.', '.')]
[("I'll", 'NOUN'), ('do', 'VERB'), ('anything', 'VERB'), ('for', 'ADP'), ('somebody', 'VERB'), ('I', 'PRON'), ('like', 'ADP'), ('.', '.')]
[('It', 'PRON'), ("won't", 'NOUN'), ('cost', 'NOUN'), ('you', 'PRON'), ('a', 'DET'), ('cent', 'NOUN'), (',', '.'), ('Phil', 'NOUN'), ('.', '.')]
[('Go', 'VERB'), ('ahead', 'VERB'), ('and', 'CONJ'), ('try', 'VERB'), ('me', 'PRON'), ("''", '.'), ('!', '.'), ('!', '.')]
[('Phil', 'NOUN'), ('rubbed', 'VERB'), ('his', 'DET'), ('forehead', 'DET'), ('wearily', 'VERB'), ('.', '.')]
[('H

[('Really', 'NOUN'), (',', '.'), ('he', 'PRON'), ('said', 'VERB'), ('to', 'PRT'), ('himself', 'PRON'), (',', '.'), ('nobody', 'NOUN'), ('kills', 'NOUN'), ('a', 'DET'), ('man', 'NOUN'), ('just', 'ADV'), ('as', 'ADP'), ('a', 'DET'), ('favor', 'VERB'), ('!', '.'), ('!', '.')]
[('So', 'ADV'), ('you', 'PRON'), ('thought', 'VERB'), ('I', 'PRON'), ("didn't", 'NOUN'), ('mean', 'NOUN'), ('what', 'DET'), ('I', 'PRON'), ('said', 'VERB'), ('.', '.')]
[('The', 'DET'), ("stranger's", 'NOUN'), ('eyes', 'VERB'), ('were', 'VERB'), ('large', 'ADJ'), ('and', 'CONJ'), ('sad', 'NOUN'), (',', '.'), ('as', 'ADP'), ('if', 'VERB'), ('Phil', 'NOUN'), ('Haney', 'VERB'), ('had', 'VERB'), ('hurt', 'NOUN'), ('his', 'DET'), ('feelings', 'NOUN'), ('.', '.')]
[('It', 'PRON'), ('was', 'VERB'), ('like', 'ADP'), ('a', 'DET'), ('recurrent', 'ADV'), (',', '.'), ('annoying', 'NOUN'), ('dream', 'NOUN'), (',', '.'), ('but', 'CONJ'), ('now', 'VERB'), ('the', 'DET'), ('dream', 'NOUN'), ('was', 'VERB'), ('beginning', 'NOUN'), ('

[('The', 'DET'), ('cops', 'NOUN'), ("didn't", 'NOUN'), ('suspect', 'NOUN'), ('a', 'DET'), ('thing', 'NOUN'), (',', '.'), ('and', 'CONJ'), ('I', 'PRON'), ('thought', 'VERB'), ('it', 'PRON'), ('was', 'VERB'), ('a', 'DET'), ('coincidence', 'NOUN'), ('.', '.')]
[('After', 'ADP'), ('all', 'PRT'), (',', '.'), ('I', 'PRON'), ("didn't", 'NOUN'), ('know', 'PRON'), ('you', 'PRON'), (',', '.'), ('Pete', 'NOUN'), ('.', '.')]
[('It', 'PRON'), ('could', 'VERB'), ('have', 'VERB'), ('been', 'VERB'), ('an', 'DET'), ('accident', 'NOUN'), ("''", '.'), ('.', '.')]
[('He', 'PRON'), ('shrugged', 'ADV'), ('casually', 'ADV'), ('.', '.')]
[('``', '.'), ('But', 'CONJ'), ('if', 'VERB'), ('you', 'PRON'), ('say', 'NOUN'), ('you', 'PRON'), ('managed', 'ADV'), ('it', 'PRON'), ("''", '.'), ('The', 'DET'), ('stranger', 'VERB'), ('was', 'VERB'), ('hooked', 'VERB'), ('.', '.')]
[('His', 'DET'), ('eyes', 'VERB'), ('burned', 'NOUN'), ('feverishly', 'PRON'), ('.', '.')]
[('``', '.'), ('Yes', '.'), (',', '.'), ('yes', '.'),

[('I', 'PRON'), ('was', 'VERB'), ('reminded', 'VERB'), (',', '.'), ('amusedly', 'NOUN'), (',', '.'), ('by', 'ADP'), ('a', 'DET'), ('poem', 'ADJ'), ('of', 'ADP'), ('Kenneth', 'NOUN'), ("Patchen's", 'NOUN'), ('called', 'NOUN'), ('The', 'DET'), ('Murder', 'NOUN'), ('of', 'ADP'), ('Two', 'NUM'), ('Men', 'NOUN'), ('by', 'ADP'), ('a', 'DET'), ('Young', 'DET'), ('Kid', 'NOUN'), ('Wearing', 'VERB'), ('Lemon', 'NOUN'), ('Colored', 'VERB'), ('Gloves', 'ADJ'), (',', '.'), ('which', 'DET'), ('Patchen', 'NOUN'), ('himself', 'PRON'), ('read', 'NOUN'), ('on', 'ADP'), ('a', 'DET'), ('record', 'NOUN'), ('against', 'ADP'), ('jazz', 'ADJ'), ('background', 'VERB'), ('.', '.')]
[('The', 'DET'), ('poem', 'ADJ'), ('consisted', 'VERB'), ('of', 'ADP'), ('only', 'ADV'), ('two', 'NUM'), ('words', 'DET'), (',', '.'), ('the', 'DET'), ('word', 'NOUN'), ('``', '.'), ('Wait', 'VERB'), ("''", '.'), (',', '.'), ('repeated', 'ADV'), ('over', 'ADP'), ('and', 'CONJ'), ('over', 'ADP'), ('at', 'ADP'), ('irregular', 'VERB'),

[("Let's", 'NOUN'), ('do', 'VERB'), ('that', 'ADP'), ("''", '.'), ('.', '.')]
[('We', 'PRON'), ('did', 'VERB'), ('that', 'ADP'), ('and', 'CONJ'), ('found', 'VERB'), ('a', 'DET'), ('dirty', '.'), ('handkerchief', 'DET'), (',', '.'), ('some', 'DET'), ('matches', 'ADJ'), ('and', 'CONJ'), ('fourteen', 'VERB'), ('cents', 'NUM'), ('in', 'ADP'), ('change', 'VERB'), ('.', '.')]
[('We', 'PRON'), ('took', 'VERB'), ('the', 'DET'), ('matches', 'ADJ'), ('--', '.'), ('they', 'PRON'), ('were', 'VERB'), ('book', 'VERB'), ('matches', 'ADJ'), ('and', 'CONJ'), ('once', 'VERB'), ("they'd", 'NOUN'), ('been', 'VERB'), ('touched', 'VERB'), ('might', 'VERB'), ('retain', 'ADJ'), ('fingerprints', 'NOUN'), ('--', '.'), ('and', 'CONJ'), ('the', 'DET'), ('change', 'VERB'), ('.', '.')]
[('We', 'PRON'), ('discussed', 'VERB'), ('the', 'DET'), ('candle', 'VERB'), ('and', 'CONJ'), ('decided', 'VERB'), ('the', 'DET'), ('hypothetical', 'NOUN'), ('other', 'ADJ'), ('bum', 'NOUN'), ('would', 'VERB'), ('have', 'VERB'), ('lef

[('I', 'PRON'), ('talked', 'VERB'), ('first', 'ADJ'), (',', '.'), ('telling', 'NOUN'), ('him', 'PRON'), ('everything', 'VERB'), ('I', 'PRON'), ('knew', 'VERB'), ('about', 'ADP'), ('Seaton', 'VERB'), ('and', 'CONJ'), ('his', 'DET'), ('house', 'NOUN'), ('and', 'CONJ'), ('domestic', 'ADJ'), ('arrangements', 'ADJ'), ('.', '.')]
[('I', 'PRON'), ('drew', 'VERB'), ('diagrams', 'VERB'), ('and', 'CONJ'), ('floor', 'ADV'), ('plans', 'NOUN'), (';', '.'), (';', '.')]
[('he', 'PRON'), ('memorized', 'VERB'), ('them', 'PRON'), ('thoroughly', 'NOUN'), ('and', 'CONJ'), ('then', 'ADV'), ('we', 'PRON'), ('tore', '.'), ('them', 'PRON'), ('into', 'ADP'), ('tiny', 'VERB'), ('pieces', 'ADJ'), ('and', 'CONJ'), ('flushed', 'VERB'), ('them', 'PRON'), ('down', 'PRT'), ('.', '.')]
[('He', 'PRON'), ('gave', 'VERB'), ('me', 'PRON'), ('equivalent', 'NOUN'), ('and', 'CONJ'), ('even', 'VERB'), ('more', 'ADJ'), ('detailed', 'VERB'), ('dope', 'NOUN'), ('on', 'ADP'), ('Radic', 'VERB'), (',', '.'), ('including', 'NOUN'), 

[('Mike', 'VERB'), ('stopped', 'NOUN'), ('to', 'PRT'), ('cherish', 'VERB'), ('all', 'PRT'), ('his', 'DET'), ('brother', 'VERB'), ('selves', 'NOUN'), (',', '.'), ('the', 'DET'), ('many', 'ADJ'), ('threes-fulfilled', 'NOUN'), ('on', 'ADP'), ('Mars', 'NOUN'), (',', '.'), ('corporate', 'NOUN'), ('and', 'CONJ'), ('discorporate', 'NOUN'), (',', '.'), ('the', 'DET'), ('precious', 'VERB'), ('few', 'ADJ'), ('on', 'ADP'), ('Earth', 'VERB'), ('--', '.'), ('the', 'DET'), ('unknown', 'VERB'), ('powers', 'VERB'), ('of', 'ADP'), ('three', 'NUM'), ('on', 'ADP'), ('Earth', 'VERB'), ('that', 'ADP'), ('would', 'VERB'), ('be', 'VERB'), ('his', 'DET'), ('to', 'PRT'), ('merge', 'VERB'), ('with', 'ADP'), ('and', 'CONJ'), ('cherish', 'VERB'), ('now', 'VERB'), ('that', 'ADP'), ('at', 'ADP'), ('last', 'ADJ'), ('long', 'ADJ'), ('waiting', 'NOUN'), ('he', 'PRON'), ('grokked', 'NOUN'), ('and', 'CONJ'), ('cherished', 'NOUN'), ('himself', 'PRON'), ('.', '.')]
[('Mike', 'VERB'), ('remained', 'ADJ'), ('in', 'ADP'), ('

[('``', '.'), ('Larry', 'NOUN'), ('teaches', 'VERB'), ('plants', 'NOUN'), ('to', 'PRT'), ('grow', 'ADJ'), ('.', '.')]
[('I', 'PRON'), ('have', 'VERB'), ('helped', 'NOUN'), ('him', 'PRON'), ('.', '.')]
[('But', 'CONJ'), ('my', 'DET'), ('people', 'ADP'), ('--', '.'), ('Martians', 'ADP'), (',', '.'), ('I', 'PRON'), ('mean', 'NOUN'), (';', '.'), (';', '.')]
[('I', 'PRON'), ('now', 'VERB'), ('grok', 'ADJ'), ('you', 'PRON'), ('are', 'VERB'), ('my', 'DET'), ('people', 'ADP'), ('--', '.'), ('teach', 'VERB'), ('plants', 'NOUN'), ('another', 'NUM'), ('way', 'ADP'), ('.', '.')]
[('In', 'ADP'), ('the', 'DET'), ('other', 'ADJ'), ('hemisphere', 'DET'), ('it', 'PRON'), ('is', 'VERB'), ('growing', 'ADJ'), ('colder', 'ADP'), ('and', 'CONJ'), ('nymphs', 'ADV'), (',', '.'), ('those', 'ADP'), ('who', 'PRON'), ('stayed', 'ADV'), ('alive', 'VERB'), ('through', 'ADP'), ('the', 'DET'), ('summer', 'DET'), (',', '.'), ('are', 'VERB'), ('being', 'VERB'), ('brought', 'VERB'), ('into', 'ADP'), ('nests', 'DET'), ('

[('Digby', 'ADP'), ('was', 'VERB'), ('not', 'ADV'), ('pleased', 'NOUN'), ('with', 'ADP'), ('his', 'DET'), ('promotion', 'NOUN'), ('.', '.')]
[('The', 'DET'), ('Man', 'NOUN'), ('from', 'ADP'), ('Mars', 'NOUN'), ('had', 'VERB'), ('interrupted', 'NOUN'), ('him', 'PRON'), ('with', 'ADP'), ('his', 'DET'), ('work', 'VERB'), ('half', 'VERB'), ('finished', 'VERB'), ('--', '.'), ('and', 'CONJ'), ('that', 'ADP'), ('stupid', 'PRON'), ('jackass', 'NOUN'), ('Short', 'ADJ'), ('was', 'VERB'), ('certain', 'ADJ'), ('to', 'PRT'), ('louse', 'ADJ'), ('it', 'PRON'), ('up', 'PRT'), ('.', '.')]
[('Foster', 'ADJ'), ('listened', 'ADJ'), ('with', 'ADP'), ('angelic', 'DET'), ('patience', 'VERB'), ('until', 'CONJ'), ('Digby', 'ADP'), ('ran', 'NOUN'), ('down', 'PRT'), (',', '.'), ('then', 'ADV'), ('said', 'VERB'), (',', '.'), ('``', '.'), ('Listen', 'PRON'), (',', '.'), ('junior', 'VERB'), (',', '.'), ("you're", 'NOUN'), ('an', 'DET'), ('angel', 'VERB'), ('now', 'VERB'), ('--', '.'), ('so', 'ADV'), ('forget', 'VER

[('The', 'DET'), ('expense', 'NOUN'), ('and', 'CONJ'), ('time', 'NOUN'), ('involved', 'NOUN'), ('are', 'VERB'), ('astronomical', 'NOUN'), ('.', '.')]
[('However', 'ADV'), (',', '.'), ('we', 'PRON'), ('sent', 'VERB'), ('a', 'DET'), ('third', 'VERB'), ('vessel', 'VERB'), ('out', 'PRT'), (',', '.'), ('a', 'DET'), ('much', 'VERB'), ('smaller', 'VERB'), ('and', 'CONJ'), ('faster', 'ADV'), ('one', 'NUM'), ('than', 'ADP'), ('the', 'DET'), ('first', 'ADJ'), ('two', 'NUM'), ('.', '.')]
[('We', 'PRON'), ('have', 'VERB'), ('learned', 'VERB'), ('much', 'VERB'), ('about', 'ADP'), ('interstellar', 'ADP'), ('drives', 'NOUN'), ('since', 'ADP'), ('a', 'DET'), ('hundred', 'NOUN'), ('years', 'NOUN'), ('ago', 'VERB'), (';', '.'), (';', '.')]
[('that', 'ADP'), ('is', 'VERB'), ('all', 'PRT'), ('I', 'PRON'), ('can', 'VERB'), ('tell', 'NOUN'), ('you', 'PRON'), ('about', 'ADP'), ('them', 'PRON'), ('.', '.')]
[('``', '.'), ('But', 'CONJ'), ('the', 'DET'), ('third', 'VERB'), ('ship', 'VERB'), ('came', 'NOUN'), (

[('``', '.'), ('A', 'DET'), ('thousand', 'NOUN'), ('pardons', 'VERB'), ("''", '.'), (',', '.'), ('said', 'VERB'), ('Hal', 'NOUN'), ('.', '.')]
[('``', '.'), ('But', 'CONJ'), ('I', 'PRON'), ('have', 'VERB'), ('just', 'ADV'), ('thought', 'VERB'), ('of', 'ADP'), ('one', 'NUM'), ('thing', 'NOUN'), ('.', '.')]
[('I', 'PRON'), ('am', 'PRON'), ('married', 'NOUN'), ("''", '.'), ('.', '.')]
[('``', '.'), ('No', 'DET'), ('problem', 'NOUN'), ('at', 'ADP'), ('all', 'PRT'), ("''", '.'), (',', '.'), ('said', 'VERB'), ('Macneff', 'NOUN'), ('.', '.')]
[('``', '.'), ('There', 'PRT'), ('will', 'VERB'), ('be', 'VERB'), ('no', 'DET'), ('women', 'ADP'), ('aboard', 'VERB'), ('the', 'DET'), ('Gabriel', 'VERB'), ('.', '.')]
[('And', 'CONJ'), (',', '.'), ('if', 'VERB'), ('a', 'DET'), ('man', 'NOUN'), ('is', 'VERB'), ('married', 'NOUN'), (',', '.'), ('he', 'PRON'), ('will', 'VERB'), ('automatically', 'VERB'), ('be', 'VERB'), ('given', 'VERB'), ('a', 'DET'), ('divorce', 'NOUN'), ("''", '.'), ('.', '.')]
[('Hal',

[('Her', 'ADJ'), ('friends', 'NOUN'), ('and', 'CONJ'), ('professional', 'DET'), ('associates', 'NOUN'), ('would', 'VERB'), ('sympathize', 'PRON'), ('with', 'ADP'), ('her', 'ADJ'), (',', '.'), ('not', 'ADV'), ('because', 'NOUN'), ('she', 'ADJ'), ('had', 'VERB'), ('lost', 'CONJ'), ('a', 'DET'), ('beloved', 'DET'), ('husband', 'NOUN'), (',', '.'), ('but', 'CONJ'), ('because', 'NOUN'), ('she', 'ADJ'), ('had', 'VERB'), ('been', 'VERB'), ('married', 'NOUN'), ('to', 'PRT'), ('a', 'DET'), ('man', 'NOUN'), ('who', 'PRON'), ('thought', 'VERB'), ('unrealistically', 'VERB'), ('.', '.')]
[('If', 'VERB'), ('Hal', 'NOUN'), ('Yarrow', 'NUM'), ('had', 'VERB'), ('been', 'VERB'), ('killed', 'NOUN'), ('in', 'ADP'), ('a', 'DET'), ('crash', 'NOUN'), (',', '.'), ('he', 'PRON'), ('must', 'VERB'), ('have', 'VERB'), ('wanted', 'VERB'), ('it', 'PRON'), ('that', 'ADP'), ('way', 'ADP'), ('.', '.')]
[('There', 'PRT'), ('was', 'VERB'), ('no', 'DET'), ('such', 'ADJ'), ('thing', 'NOUN'), ('as', 'ADP'), ('an', 'DET'), 

[('His', 'DET'), ('duty', 'NOUN'), ('was', 'VERB'), ('to', 'PRT'), ('write', 'NOUN'), ('a', 'DET'), ('school', 'NOUN'), ('text', 'VERB'), ('and', 'CONJ'), ('to', 'PRT'), ('teach', 'VERB'), ('the', 'DET'), ('entire', 'DET'), ('personnel', 'NOUN'), ('of', 'ADP'), ('the', 'DET'), ('Gabriel', 'VERB'), ('how', 'ADP'), ('to', 'PRT'), ('speak', 'ADV'), ('Ozagen', 'NOUN'), ('.', '.')]
[('Yet', 'ADV'), (',', '.'), ('if', 'VERB'), ('he', 'PRON'), ('used', 'VERB'), ('all', 'PRT'), ('of', 'ADP'), ('the', 'DET'), ('little', 'VERB'), ('means', 'NOUN'), ('at', 'ADP'), ('his', 'DET'), ('disposal', 'ADJ'), (',', '.'), ('he', 'PRON'), ('would', 'VERB'), ('be', 'VERB'), ('instructing', 'PRON'), ('his', 'DET'), ('students', 'NOUN'), ('wrongly', 'PRON'), ('.', '.')]
[('Moreover', 'NOUN'), (',', '.'), ('even', 'VERB'), ('getting', 'NOUN'), ('this', 'DET'), ('across', 'ADP'), ('would', 'VERB'), ('be', 'VERB'), ('difficult', 'NOUN'), ('.', '.')]
[('For', 'ADP'), ('one', 'NUM'), ('thing', 'NOUN'), (',', '.'), 

[('Between', 'DET'), ('individuals', 'NOUN'), (',', '.'), ('this', 'DET'), ('process', 'NOUN'), ('is', 'VERB'), ('called', 'NOUN'), ('bargaining', 'VERB'), ('.', '.')]
[('When', 'ADV'), ('it', 'PRON'), ('is', 'VERB'), ('done', 'VERB'), ('between', 'DET'), ('races', 'NOUN'), ('or', 'CONJ'), ('nations', 'ADP'), (',', '.'), ('it', 'PRON'), ('is', 'VERB'), ('called', 'NOUN'), ('making', 'VERB'), ('a', 'DET'), ('treaty', 'ADP'), ('.', '.')]
[('And', 'CONJ'), ('the', 'DET'), ('major', 'ADJ'), ('part', 'DET'), ('of', 'ADP'), ('my', 'DET'), ('mission', 'NOUN'), ('to', 'PRT'), ('your', 'NOUN'), ('nest', 'NOUN'), ('is', 'VERB'), ('to', 'PRT'), ('make', 'VERB'), ('a', 'DET'), ('treaty', 'ADP'), ('between', 'DET'), ('your', 'NOUN'), ('race', 'NOUN'), ('and', 'CONJ'), ('mine', 'VERB'), ('.', '.')]
[('Recovering', 'ADP'), ('the', 'DET'), ('property', 'NOUN'), ('was', 'VERB'), ('much', 'VERB'), ('less', 'ADV'), ('important', 'DET'), ("''", '.'), ('.', '.')]
[('``', '.'), ('Strange', 'NOUN'), ("''", '

[('Jack', 'NOUN'), ('scanned', 'NOUN'), ('the', 'DET'), ('skies', 'VERB'), (',', '.'), ('the', 'DET'), ('boards', 'VERB'), (',', '.'), ('and', 'CONJ'), ('the', 'DET'), ('skies', 'VERB'), ('again', 'ADV'), ('.', '.')]
[('Nothing', 'VERB'), ('.', '.')]
[('No', 'DET'), ('--', '.'), ('there', 'PRT'), ('was', 'VERB'), ('a', 'DET'), ('tiny', 'VERB'), ('pip', 'NOUN'), ('on', 'ADP'), ('the', 'DET'), ('radar', 'VERB'), (';', '.'), (';', '.')]
[('and', 'CONJ'), ('it', 'PRON'), ('was', 'VERB'), ('getting', 'NOUN'), ('bigger', 'VERB'), ('rapidly', 'ADV'), ('.', '.')]
[('If', 'VERB'), ('that', 'ADP'), ('was', 'VERB'), ('the', 'DET'), ('skiff', 'NOUN'), (',', '.'), ('it', 'PRON'), ('was', 'VERB'), ('making', 'VERB'), ('unprecedented', 'NOUN'), ('speed', 'VERB'), ('.', '.')]
[('Then', 'ADV'), ('the', 'DET'), ('skiff', 'NOUN'), ('hove', 'NOUN'), ('into', 'ADP'), ('sight', 'VERB'), (',', '.'), ('just', 'ADV'), ('a', 'DET'), ('dot', 'NOUN'), ('of', 'ADP'), ('light', 'VERB'), ('at', 'ADP'), ('first', 'AD

[('``', '.'), ('Well', 'ADV'), (',', '.'), ("let's", 'NOUN'), ('take', 'VERB'), ('a', 'DET'), ('ground-level', 'VERB'), ('look', 'ADJ'), ('at', 'ADP'), ('the', 'DET'), ('country', 'ADP'), ('around', 'ADP'), ('here', 'VERB'), ("''", '.'), ('.', '.')]
[('The', 'DET'), ('facsiport', 'NOUN'), ('rolled', 'ADV'), ('open', 'ADJ'), ('on', 'ADP'), ('the', 'DET'), ('landscape', 'VERB'), ('.', '.')]
[('A', 'DET'), ('range', 'VERB'), ('of', 'ADP'), ('bluffs', 'ADJ'), ('hugged', 'NOUN'), ('the', 'DET'), ('horizon', 'VERB'), (',', '.'), ('the', 'DET'), ('color', 'VERB'), ('of', 'ADP'), ('decaying', 'ADJ'), ('moss', 'VERB'), ('.', '.')]
[('Above', 'ADV'), ('them', 'PRON'), (',', '.'), ('the', 'DET'), ('sky', 'VERB'), ('was', 'VERB'), ('the', 'DET'), ('black', 'NOUN'), ('of', 'ADP'), ('space', 'ADJ'), (',', '.'), ('or', 'CONJ'), ('the', 'DET'), ('almost', 'VERB'), ('equal', 'VERB'), ('black', 'NOUN'), ('of', 'ADP'), ('the', 'DET'), ('winter', 'NOUN'), ('sky', 'VERB'), ('above', 'ADV'), ('Minneapolis',

[('They', 'PRON'), ('curl', 'PRT'), ('up', 'PRT'), ('and', 'CONJ'), ('die', 'ADV'), ('at', 'ADP'), ('the', 'DET'), ('sight', 'VERB'), ('of', 'ADP'), ('something', 'VERB'), ('strange', 'NOUN'), ('and', 'CONJ'), ('alien', 'VERB'), ('--', '.'), ('like', 'ADP'), ('a', 'DET'), ('spaceship', 'NOUN'), ("''", '.'), ('.', '.')]
[('``', '.'), ('Maybe', 'VERB'), ("''", '.'), (',', '.'), ('the', 'DET'), ('captain', 'VERB'), ('admitted', 'VERB'), ('.', '.')]
[('``', '.'), ('At', 'ADP'), ('this', 'DET'), ('stage', 'NOUN'), ('of', 'ADP'), ('the', 'DET'), ('game', 'ADP'), ('anything', 'VERB'), ('could', 'VERB'), ('be', 'VERB'), ('possible', 'ADJ'), ('.', '.')]
[('But', 'CONJ'), ("there's", 'NOUN'), ('one', 'NUM'), ('possibility', 'VERB'), ('I', 'PRON'), ('particularly', 'NOUN'), ("don't", 'NOUN'), ('like', 'ADP'), ("''", '.'), ('.', '.')]
[('``', '.'), ('And', 'CONJ'), ('that', 'ADP'), ('is', 'VERB'), ("''", '.'), ('?', '.'), ('?', '.')]
[('``', '.'), ('Suppose', 'NOUN'), ('it', 'PRON'), ('was', 'VERB

## Test Report

- 56 with 40 sentences, 300d
- 71 with 100 sentences, 300d, 1000 sentences test
- 50 with 40 sentences, 50d
- 58 with 100 sentences, 50d