## Curriculum Example

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import scipy
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns
import re

In [2]:
# Import the data we just downloaded and installed.
from nltk.corpus import gutenberg, stopwords

# Grab and process the raw data.
print(gutenberg.fileids())

persuasion = gutenberg.raw('austen-persuasion.txt')
alice = gutenberg.raw('carroll-alice.txt')

# Print the first 100 characters of Alice in Wonderland.
print('\nRaw:\n', alice[0:100])

['austen-emma.txt', 'austen-persuasion.txt', 'austen-sense.txt', 'bible-kjv.txt', 'blake-poems.txt', 'bryant-stories.txt', 'burgess-busterbrown.txt', 'carroll-alice.txt', 'chesterton-ball.txt', 'chesterton-brown.txt', 'chesterton-thursday.txt', 'edgeworth-parents.txt', 'melville-moby_dick.txt', 'milton-paradise.txt', 'shakespeare-caesar.txt', 'shakespeare-hamlet.txt', 'shakespeare-macbeth.txt', 'whitman-leaves.txt']

Raw:
 [Alice's Adventures in Wonderland by Lewis Carroll 1865]

CHAPTER I. Down the Rabbit-Hole

Alice was


In [3]:
# This pattern matches all text between square brackets.
pattern = "[\[].*?[\]]"
persuasion = re.sub(pattern, "", persuasion)
alice = re.sub(pattern, "", alice)

# Print the first 100 characters of Alice again.
print('Title removed:\n', alice[0:100])

Title removed:
 

CHAPTER I. Down the Rabbit-Hole

Alice was beginning to get very tired of sitting by her sister on


In [4]:
# Now we'll match and remove chapter headings.
persuasion = re.sub(r'Chapter \d+', '', persuasion)
alice = re.sub(r'CHAPTER .*', '', alice)

# Ok, what's it look like now?
print('Chapter headings removed:\n', alice[0:100])

Chapter headings removed:
 



Alice was beginning to get very tired of sitting by her sister on the
bank, and of having nothin


In [5]:
# Remove newlines and other extra whitespace by splitting and rejoining.
persuasion = ' '.join(persuasion.split())
alice = ' '.join(alice.split())

# All done with cleanup? Let's see how it looks.
print('Extra whitespace removed:\n', alice[0:100])

Extra whitespace removed:
 Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to


In [6]:
# Here is a list of the stopwords identified by NLTK.
print(stopwords.words('english'))

['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', '

In [7]:
import spacy
nlp = spacy.load('en')

# All the processing work is done here, so it may take a while.
alice_doc = nlp(alice)
persuasion_doc = nlp(persuasion)

In [8]:
# Let's explore the objects we've built.
print("The alice_doc object is a {} object.".format(type(alice_doc)))
print("It is {} tokens long".format(len(alice_doc)))
print("The first three tokens are '{}'".format(alice_doc[:3]))
print("The type of each token is {}".format(type(alice_doc[0])))

The alice_doc object is a <class 'spacy.tokens.doc.Doc'> object.
It is 34430 tokens long
The first three tokens are 'Alice was beginning'
The type of each token is <class 'spacy.tokens.token.Token'>


In [9]:
from collections import Counter

# Utility function to calculate how frequently words appear in the text.
def word_frequencies(text, include_stop=True):
    
    # Build a list of words.
    # Strip out punctuation and, optionally, stop words.
    words = []
    for token in text:
        if not token.is_punct and (not token.is_stop or include_stop):
            words.append(token.text)
            
    # Build and return a Counter object containing word counts.
    return Counter(words)
    
# The most frequent words:
alice_freq = word_frequencies(alice_doc).most_common(10)
persuasion_freq = word_frequencies(persuasion_doc).most_common(10)
print('Alice:', alice_freq)
print('Persuasion:', persuasion_freq)

Alice: [('the', 1524), ('and', 796), ('to', 724), ('a', 611), ('I', 534), ('it', 524), ('she', 508), ('of', 499), ('said', 453), ('Alice', 394)]
Persuasion: [('the', 3120), ('to', 2775), ('and', 2738), ('of', 2563), ('a', 1529), ('in', 1346), ('was', 1329), ('had', 1177), ('her', 1159), ('I', 1121)]


In [10]:
# Use our optional keyword argument to remove stop words.
alice_freq = word_frequencies(alice_doc, include_stop=False).most_common(10)
persuasion_freq = word_frequencies(persuasion_doc, include_stop=False).most_common(10)
print('Alice:', alice_freq)
print('Persuasion:', persuasion_freq)

Alice: [('I', 534), ('said', 453), ('Alice', 394), ("n't", 215), ("'s", 190), ('little', 124), ('The', 102), ('like', 84), ('went', 83), ('know', 83)]
Persuasion: [('I', 1121), ('Anne', 497), ("'s", 485), ('She', 326), ('Captain', 297), ('Mrs', 291), ('Elliot', 288), ('Mr', 255), ('He', 225), ('Wentworth', 217)]


In [11]:
# Pull out just the text from our frequency lists.
alice_common = [pair[0] for pair in alice_freq]
persuasion_common = [pair[0] for pair in persuasion_freq]

# Use sets to find the unique values in each top ten.
print('Unique to Alice:', set(alice_common) - set(persuasion_common))
print('Unique to Persuasion:', set(persuasion_common) - set(alice_common))

Unique to Alice: {'little', 'like', 'The', 'Alice', 'said', "n't", 'went', 'know'}
Unique to Persuasion: {'Mr', 'Mrs', 'She', 'Captain', 'Wentworth', 'Elliot', 'He', 'Anne'}


In [12]:
# Utility function to calculate how frequently lemas appear in the text.
def lemma_frequencies(text, include_stop=True):
    
    # Build a list of lemas.
    # Strip out punctuation and, optionally, stop words.
    lemmas = []
    for token in text:
        if not token.is_punct and (not token.is_stop or include_stop):
            lemmas.append(token.lemma_)
            
    # Build and return a Counter object containing word counts.
    return Counter(lemmas)

# Instantiate our list of most common lemmas.
alice_lemma_freq = lemma_frequencies(alice_doc, include_stop=False).most_common(10)
persuasion_lemma_freq = lemma_frequencies(persuasion_doc, include_stop=False).most_common(10)
print('\nAlice:', alice_lemma_freq)
print('Persuasion:', persuasion_lemma_freq)

# Again, identify the lemmas common to one text but not the other.
alice_lemma_common = [pair[0] for pair in alice_lemma_freq]
persuasion_lemma_common = [pair[0] for pair in persuasion_lemma_freq]
print('Unique to Alice:', set(alice_lemma_common) - set(persuasion_lemma_common))
print('Unique to Persuasion:', set(persuasion_lemma_common) - set(alice_lemma_common))


Alice: [('-PRON-', 758), ('say', 476), ('alice', 396), ('be', 254), ('not', 231), ('go', 133), ('think', 131), ('little', 126), ('the', 109), ('look', 105)]
Persuasion: [('-PRON-', 2241), ('anne', 497), ("'s", 466), ('captain', 303), ('elliot', 295), ('mrs', 291), ('good', 289), ('know', 258), ('think', 256), ('mr', 255)]
Unique to Alice: {'alice', 'be', 'little', 'say', 'not', 'look', 'go', 'the'}
Unique to Persuasion: {'elliot', 'mrs', "'s", 'mr', 'captain', 'good', 'know', 'anne'}


In [13]:
# Initial exploration of sentences.
sentences = list(alice_doc.sents)
print("Alice in Wonderland has {} sentences.".format(len(sentences)))

example_sentence = sentences[2]
print("Here is an example: \n{}\n".format(example_sentence))

Alice in Wonderland has 1678 sentences.
Here is an example: 
There was nothing so VERY remarkable in that; nor did Alice think it so VERY much out of the way to hear the Rabbit say to itself, 'Oh dear!



In [14]:
# Look at some metrics around this sentence.
example_words = [token for token in example_sentence if not token.is_punct]
unique_words = set([token.text for token in example_words])

print(("There are {} words in this sentence, and {} of them are"
       " unique.").format(len(example_words), len(unique_words)))

There are 29 words in this sentence, and 25 of them are unique.


In [15]:
print(nlp("I need a break")[3].pos_)
print(nlp("I need to break the glass")[3].pos_)

NOUN
VERB


In [16]:
# View the part of speech for some tokens in our sentence.
print('\nParts of speech:')
for token in example_sentence[:9]:
    print(token.orth_, token.pos_)


Parts of speech:
There ADV
was VERB
nothing NOUN
so ADV
VERY ADV
remarkable ADJ
in ADP
that DET
; PUNCT


In [17]:
# View the dependencies for some tokens.
print('\nDependencies:')
for token in example_sentence[:9]:
    print(token.orth_, token.dep_, token.head.orth_)


Dependencies:
There expl was
was ROOT was
nothing attr was
so advmod remarkable
VERY advmod remarkable
remarkable amod nothing
in prep nothing
that pobj in
; punct was


In [18]:
# Extract the first ten entities.
entities = list(alice_doc.ents)[0:10]
for entity in entities:
    print(entity.label_, ' '.join(t.orth_ for t in entity))

PERSON Alice
DATE the hot day
PERSON Alice
PRODUCT Rabbit
PRODUCT Rabbit
PRODUCT WAISTCOAT - POCKET
PERSON Alice
PERSON Alice
PERSON Alice
ORDINAL First


In [19]:
# All of the uniqe entities spaCy thinks are people.
people = [entity.text for entity in list(alice_doc.ents) if entity.label_ == "PERSON"]
print(set(people))

{'Pinch', 'the Lobster Quadrille?', 'this:--', 'indeed:--', 'Stand', 'Serpent', 'Frog-Footman', 'Shall', 'Cheshire Puss', 'Mock Turtle', 'Edwin', 'Fish-Footman', 'the White Rabbit', 'Shakespeare', 'WILLIAM', 'William', 'Sixteenth', 'FUL SOUP', 'Brandy', 'M--', 'INSIDE', 'the Duchess', 'Morcar', 'Soup', 'Canary', 'Latin Grammar', 'Tut', 'Soles', 'The Fish-Footman', 'Turn', 'Soup of the evening', 'Run', 'Panther', 'the Lobster Quadrille', 'Hjckrrh', 'Longitude', 'Tillie', 'Stolen', 'the March Hare', 'The Mock Turtle', "the King: '", 'Fifteenth', "Dinah'll", 'Mary Ann', 'Beautiful Soup', 'Adventures', 'began:--', '--or', 'The White Rabbit', 'Said', 'Bill', 'Lacie', "Don't", 'Shy', 'Hush', 'the King', 'Jack', 'Fetch', 'Turtle Soup', 'Duchess', 'Kings', 'Seaography', 'Down', 'Prizes', 'Treacle', 'Duck', 'Ma', 'a Lobster Quadrille', 'Begin', 'Footman', 'Latitude', 'Mabel', "the Duchess: '", 'Queen', 'Idiot', "the Mock Turtle: '", 'The Queen', 'Majesty', 'Drink', 'Edgar Atheling', 'Stupid', '

## Curriculum Part 2

In [20]:
%matplotlib inline
import numpy as np
import pandas as pd
import scipy
import sklearn
import spacy
import matplotlib.pyplot as plt
import seaborn as sns
import re
from nltk.corpus import gutenberg, stopwords
from collections import Counter

In [21]:
# Utility function for standard text cleaning.
def text_cleaner(text):
    # Visual inspection identifies a form of punctuation spaCy does not
    # recognize: the double dash '--'.  Better get rid of it now!
    text = re.sub(r'--',' ',text)
    text = re.sub("[\[].*?[\]]", "", text)
    text = ' '.join(text.split())
    return text
    
# Load and clean the data.
persuasion = gutenberg.raw('austen-persuasion.txt')
alice = gutenberg.raw('carroll-alice.txt')

# The Chapter indicator is idiosyncratic
persuasion = re.sub(r'Chapter \d+', '', persuasion)
alice = re.sub(r'CHAPTER .*', '', alice)
    
alice = text_cleaner(alice)
persuasion = text_cleaner(persuasion)

In [22]:
# Parse the cleaned novels. This can take a bit.
nlp = spacy.load('en')
alice_doc = nlp(alice)
persuasion_doc = nlp(persuasion)

In [23]:
# Group into sentences.
alice_sents = [[sent, "Carroll"] for sent in alice_doc.sents]
persuasion_sents = [[sent, "Austen"] for sent in persuasion_doc.sents]

# Combine the sentences from the two novels into one data frame.
sentences = pd.DataFrame(alice_sents + persuasion_sents)
sentences.head()

Unnamed: 0,0,1
0,"(Alice, was, beginning, to, get, very, tired, ...",Carroll
1,"(So, she, was, considering, in, her, own, mind...",Carroll
2,"(There, was, nothing, so, VERY, remarkable, in...",Carroll
3,"(Oh, dear, !)",Carroll
4,"(I, shall, be, late, !, ')",Carroll


In [24]:
# Utility function to create a list of the 2000 most common words.
def bag_of_words(text):
    
    # Filter out punctuation and stop words.
    allwords = [token.lemma_
                for token in text
                if not token.is_punct
                and not token.is_stop]
    
    # Return the most common words.
    return [item[0] for item in Counter(allwords).most_common(2000)]
    

# Creates a data frame with features for each word in our common word set.
# Each value is the count of the times the word appears in each sentence.
def bow_features(sentences, common_words):
    
    # Scaffold the data frame and initialize counts to zero.
    df = pd.DataFrame(columns=common_words)
    df['text_sentence'] = sentences[0]
    df['text_source'] = sentences[1]
    df.loc[:, common_words] = 0
    
    # Process each row, counting the occurrence of words in each sentence.
    for i, sentence in enumerate(df['text_sentence']):
        
        # Convert the sentence to lemmas, then filter out punctuation,
        # stop words, and uncommon words.
        words = [token.lemma_
                 for token in sentence
                 if (
                     not token.is_punct
                     and not token.is_stop
                     and token.lemma_ in common_words
                 )]
        
        # Populate the row with word counts.
        for word in words:
            df.loc[i, word] += 1
        
        # This counter is just to make sure the kernel didn't hang.
        if i % 500 == 0:
            print("Processing row {}".format(i))
            
    return df

# Set up the bags.
alicewords = bag_of_words(alice_doc)
persuasionwords = bag_of_words(persuasion_doc)

# Combine bags to create a set of unique words.
common_words = set(alicewords + persuasionwords)

In [25]:
# Create our data frame with features. This can take a while to run.
word_counts = bow_features(sentences, common_words)
word_counts.head()

Processing row 0
Processing row 500
Processing row 1000
Processing row 1500
Processing row 2000
Processing row 2500
Processing row 3000
Processing row 3500
Processing row 4000
Processing row 4500
Processing row 5000


Unnamed: 0,lyme,caper,principally,frowning,acquaintance,strengthen,shame,penance,silent,inclination,...,expense,morcar,astonishment,bag,read,rate,zeal,liable,text_sentence,text_source
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,"(Alice, was, beginning, to, get, very, tired, ...",Carroll
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,"(So, she, was, considering, in, her, own, mind...",Carroll
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,"(There, was, nothing, so, VERY, remarkable, in...",Carroll
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,"(Oh, dear, !)",Carroll
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,"(I, shall, be, late, !, ')",Carroll


In [26]:
word_counts.shape

(5318, 3064)

In [27]:
from sklearn import ensemble
from sklearn.model_selection import train_test_split

rfc = ensemble.RandomForestClassifier()
Y = word_counts['text_source']
X = np.array(word_counts.drop(['text_sentence','text_source'], 1))

X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    Y,
                                                    test_size=0.4,
                                                    random_state=0)
train = rfc.fit(X_train, y_train)

print('Training set score:', rfc.score(X_train, y_train))
print('\nTest set score:', rfc.score(X_test, y_test))

Training set score: 0.9871473354231975

Test set score: 0.8862781954887218


In [28]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression()
train = lr.fit(X_train, y_train)
print(X_train.shape, y_train.shape)
print('Training set score:', lr.score(X_train, y_train))
print('\nTest set score:', lr.score(X_test, y_test))

(3190, 3062) (3190,)
Training set score: 0.9579937304075236

Test set score: 0.9158834586466166


In [29]:
clf = ensemble.GradientBoostingClassifier()
train = clf.fit(X_train, y_train)

print('Training set score:', clf.score(X_train, y_train))
print('\nTest set score:', clf.score(X_test, y_test))

Training set score: 0.8846394984326019

Test set score: 0.8740601503759399


In [30]:
# Clean the Emma data.
emma = gutenberg.raw('austen-emma.txt')
emma = re.sub(r'VOLUME \w+', '', emma)
emma = re.sub(r'CHAPTER \w+', '', emma)
emma = text_cleaner(emma)
print(emma[:100])

Emma Woodhouse, handsome, clever, and rich, with a comfortable home and happy disposition, seemed to


In [31]:
# Parse our cleaned data.
emma_doc = nlp(emma)

In [32]:
# Group into sentences.
persuasion_sents = [[sent, "Austen"] for sent in persuasion_doc.sents]
emma_sents = [[sent, "Austen"] for sent in emma_doc.sents]

# Emma is quite long, let's cut it down to the same length as Alice.
emma_sents = emma_sents[0:len(alice_sents)]

In [33]:
# Build a new Bag of Words data frame for Emma word counts.
# We'll use the same common words from Alice and Persuasion.
emma_sentences = pd.DataFrame(emma_sents)
emma_bow = bow_features(emma_sentences, common_words)

print('done')

Processing row 0
Processing row 500
Processing row 1000
Processing row 1500
done


In [34]:
# Now we can model it!
# Let's use logistic regression again.

# Combine the Emma sentence data with the Alice data from the test set.
X_Emma_test = np.concatenate((
    X_train[y_train[y_train=='Carroll'].index],
    emma_bow.drop(['text_sentence','text_source'], 1)
), axis=0)
y_Emma_test = pd.concat([y_train[y_train=='Carroll'],
                         pd.Series(['Austen'] * emma_bow.shape[0])])

# Model.
print('\nTest set score:', lr.score(X_Emma_test, y_Emma_test))
lr_Emma_predicted = lr.predict(X_Emma_test)
pd.crosstab(y_Emma_test, lr_Emma_predicted)


Test set score: 0.6976137211036539


col_0,Austen,Carroll
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1
Austen,1564,105
Carroll,706,307


## Challenge

In [35]:
from sklearn import svm
clf = svm.SVC()
clf.fit(X_train, y_train)


SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [36]:
print('Training set score:', clf.score(X_train, y_train))
print('\nTest set score:', clf.score(X_test, y_test))

Training set score: 0.6824451410658308

Test set score: 0.6917293233082706


In [37]:
alice_sent_len = [[len(sent), 'Carroll'] for sent in alice_doc.sents]
persuasion_sent_len = [[len(sent), 'Austen'] for sent in persuasion_doc.sents]

In [38]:
sent_len = pd.DataFrame(alice_sent_len + persuasion_sent_len)
sent_len.head(5)

Unnamed: 0,0,1
0,67,Carroll
1,63,Carroll
2,33,Carroll
3,3,Carroll
4,6,Carroll


In [39]:
sent_len.describe()

Unnamed: 0,0
count,5318.0
mean,24.957127
std,21.99476
min,1.0
25%,10.0
50%,18.0
75%,33.0
max,227.0


In [40]:
sent_len.columns = ['sent_len', 'author']

all_features = word_counts.join(sent_len)

In [41]:
all_features

Unnamed: 0,lyme,caper,principally,frowning,acquaintance,strengthen,shame,penance,silent,inclination,...,astonishment,bag,read,rate,zeal,liable,text_sentence,text_source,sent_len,author
0,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,"(Alice, was, beginning, to, get, very, tired, ...",Carroll,67,Carroll
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,"(So, she, was, considering, in, her, own, mind...",Carroll,63,Carroll
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,"(There, was, nothing, so, VERY, remarkable, in...",Carroll,33,Carroll
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,"(Oh, dear, !)",Carroll,3,Carroll
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,"(I, shall, be, late, !, ')",Carroll,6,Carroll
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,"((, when, she, thought, it, over, afterwards, ...",Carroll,126,Carroll
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,"(In, another, moment, down, went, Alice, after...",Carroll,23,Carroll
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,"(The, rabbit, -, hole, went, straight, on, lik...",Carroll,44,Carroll
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,"(Either, the, well, was, very, deep, ,, or, sh...",Carroll,37,Carroll
9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,"(First, ,, she, tried, to, look, down, and, ma...",Carroll,49,Carroll


In [42]:
for sent in alice_doc.sents:
    for token in sent:
        print([token.pos_, token.tag_, 'Carroll'])

['PROPN', 'NNP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['VERB', 'VBG', 'Carroll']
['PART', 'TO', 'Carroll']
['VERB', 'VB', 'Carroll']
['ADV', 'RB', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['ADP', 'IN', 'Carroll']
['VERB', 'VBG', 'Carroll']
['ADP', 'IN', 'Carroll']
['ADJ', 'PRP$', 'Carroll']
['NOUN', 'NN', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', ',', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['ADP', 'IN', 'Carroll']
['VERB', 'VBG', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PART', 'TO', 'Carroll']
['VERB', 'VB', 'Carroll']
['PUNCT', ':', 'Carroll']
['ADV', 'RB', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['ADV', 'RB', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['VERB', 'VBN', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['ADJ', 'PRP$', 'Carroll']
['NOUN', 'NN', 'Carroll']
['VERB', 'VBD', 'Carroll']
['VERB', 'VBG', 'Carroll']
['PUNCT', ',', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['PRON'

['PRON', 'PRP', 'Carroll']
['VERB', 'MD', 'Carroll']
['VERB', 'VB', 'Carroll']
['ADP', 'IN', 'Carroll']
['ADV', 'RB', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['NOUN', 'NN', 'Carroll']
['ADP', 'IN', 'Carroll']
['ADJ', 'PRP$', 'Carroll']
['NOUN', 'NNS', 'Carroll']
['PUNCT', '.', 'Carroll']
['INTJ', 'UH', 'Carroll']
['PUNCT', ',', 'Carroll']
['ADV', 'WRB', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBP', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'MD', 'Carroll']
['VERB', 'VB', 'Carroll']
['PART', 'RP', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', '.', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBP', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'MD', 'Carroll']
['PUNCT', ',', 'Carroll']
['ADP', 'IN', 'Carroll']
['PRON', 'PRP', 'Carroll']
['ADV', 'RB', 'Carroll']
['VERB', 'VBP', 'Carroll']
['ADV', 'WRB', 'Carroll']
['PART', 'TO', 'Carroll']
['VERB', 'VB', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['ADP', 

['VERB', 'VBD', 'Carroll']
['VERB', 'VBN', 'Carroll']
['ADV', 'RB', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['ADP', 'IN', 'Carroll']
['VERB', 'VBG', 'Carroll']
['NOUN', 'NN', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['PUNCT', 'HYPH', 'Carroll']
['ADP', 'IN', 'Carroll']
['PUNCT', 'HYPH', 'Carroll']
['DET', 'DT', 'Carroll']
['PUNCT', 'HYPH', 'Carroll']
['NOUN', 'NN', 'Carroll']
['NOUN', 'NNS', 'Carroll']
['PART', 'TO', 'Carroll']
['VERB', 'VB', 'Carroll']
['PUNCT', ',', 'Carroll']
['ADP', 'IN', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['ADV', 'RB', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['ADP', 'IN', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PART', 'TO', 'Carroll']
['VERB', 'VB', 'Carroll']
['PART', 'RP', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', '

['ADV', 'WRB', 'Carroll']
['ADJ', 'PDT', 'Carroll']
['DET', 'DT', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['PUNCT', '``', 'Carroll']
['PUNCT', "''", 'Carroll']
['CCONJ', 'CC', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['ADJ', 'PRP$', 'Carroll']
['NOUN', 'NNS', 'Carroll']
['ADP', 'IN', 'Carroll']
['ADJ', 'PRP$', 'Carroll']
['NOUN', 'NN', 'Carroll']
['ADP', 'IN', 'Carroll']
['ADP', 'IN', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['VERB', 'VBG', 'Carroll']
['NOUN', 'NNS', 'Carroll']
['PUNCT', ',', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['VERB', 'VBD', 'Carroll']
['PART', 'TO', 'Carroll']
['VERB', 'VB', 'Carroll']
['PRON', 'PRP', 'Carroll']
['PUNCT', ',', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['ADJ', 'PRP$', 'Carroll']
['NOUN', 'NN', 'Carroll']
['VERB', 'VBD', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['PUNCT', ',', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NNS', 'Carroll']

['DET', 'DT', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', ',', 'Carroll']
['VERB', 'VB', 'Carroll']
['PART', 'RP', 'Carroll']
['ADP', 'IN', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['DET', 'DT', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['PUNCT', '-LRB-', 'Carroll']
['ADP', 'IN', 'Carroll']
['PUNCT', ',', 'Carroll']
['ADP', 'IN', 'Carroll']
['ADJ', 'PDT', 'Carroll']
['ADJ', 'PRP$', 'Carroll']
['NOUN', 'NN', 'Carroll']
['ADP', 'IN', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', ',', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['DET', 'DT', 'Carroll']
['ADV', 'RB', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['NOUN', 'NN', 'Carroll']
['ADV', 'WRB', 'Carroll']
['ADV', 'RB', 'Carroll']
['ADV', 'RB', 'Carroll']
['NOUN', 'NN', 'Carroll']
['VERB', 'VBD', 'Carroll']
['VERB', 'VBN', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', '-RRB-', 'Carroll']
['ADV', 'RB', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VER

['PUNCT', ',', 'Carroll']
['ADJ', 'WP$', 'Carroll']
['NOUN', 'NN', 'Carroll']
['VERB', 'VBD', 'Carroll']
['VERB', 'VBN', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', ',', 'Carroll']
['VERB', 'VBD', 'Carroll']
['ADV', 'RB', 'Carroll']
['VERB', 'VBN', 'Carroll']
['ADP', 'IN', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['PUNCT', ',', 'Carroll']
['NOUN', 'WP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['NOUN', 'NNS', 'Carroll']
['PUNCT', ',', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['VERB', 'VBD', 'Carroll']
['VERB', 'VBN', 'Carroll']
['ADP', 'IN', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['ADP', 'IN', 'Carroll']
['NOUN', 'NN', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', '.', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['PUNCT', ',', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN'

['DET', 'DT', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['PUNCT', ',', 'Carroll']
['VERB', 'VBG', 'Carroll']
['ADV', 'RB', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['ADP', 'IN', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'MD', 'Carroll']
['PUNCT', '.', 'Carroll']
['DET', 'DT', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['NOUN', 'NN', 'Carroll']
['VERB', 'VBD', 'Carroll']
['PART', 'TO', 'Carroll']
['VERB', 'VB', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NNS', 'Carroll']
['PUNCT', ':', 'Carroll']
['DET', 'DT', 'Carroll']
['VERB', 'VBD', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', ',', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['NOUN', 'NNS', 'Carroll']
['VERB', 'VBD', 'Carroll']
['ADP', 'IN', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'MD', 'Carroll']
['ADV', 'RB', 'Carroll']
['VERB', 'VB', 'Carroll']
['PRON', 'PRP', 'Carroll']
['PUNCT', ',', 'Carroll']
['CCONJ', 'CC', '

['VERB', 'VBP', 'Carroll']
['NOUN', 'NNS', 'Carroll']
['PUNCT', '.', 'Carroll']
['ADV', 'WRB', 'Carroll']
['VERB', 'MD', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VB', 'Carroll']
['VERB', 'VBN', 'Carroll']
['PRON', 'PRP', 'Carroll']
['PUNCT', ',', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBP', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['PROPN', 'NNP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['ADP', 'IN', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['VERB', 'VBG', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['ADP', 'IN', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['NOUN', 'NN', 'Carroll']
['NOUN', 'NNS', 'Carroll']
['PUNCT', ',', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['PRON', 'PRP', 'Carroll']
['ADV', 'RB', 'Carroll']
['ADV', 'RB', 'Carroll']
['PU

['ADJ', 'JJR', 'Carroll']
['ADP', 'IN', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBP', 'Carroll']
['ADV', 'RB', 'Carroll']
['PUNCT', '.', 'Carroll']
['DET', 'DT', 'Carroll']
['VERB', 'MD', 'Carroll']
['VERB', 'VB', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', ',', 'Carroll']
['NUM', 'CD', 'Carroll']
['NOUN', 'NN', 'Carroll']
['ADV', 'RB', 'Carroll']
['PART', 'TO', 'Carroll']
['VERB', 'VB', 'Carroll']
['DET', 'DT', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['NOUN', 'NN', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['ADV', 'RB', 'Carroll']
['ADV', 'RB', 'Carroll']
['PART', 'TO', 'Carroll']
['VERB', 'VB', 'Carroll']
['NOUN', 'NNS', 'Carroll']
['PART', 'TO', 'Carroll']
['VERB', 'VB', 'Carroll']
['PUNCT', '.', 'Carroll']
['INTJ', 'UH', 'Carroll']
['PUNCT', ',', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'MD', 'Carroll']
['ADV', 'RB', 'Carroll']
['VERB', 'VB', 'Carroll']
['DET', 'DT', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['PUNCT', "''", 'C

['ADV', 'EX', 'Carroll']
['VERB', 'VBZ', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['ADV', 'RB', 'Carroll']
['DET', 'DT', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['PART', 'POS', 'Carroll']
['NOUN', 'NN', 'Carroll']
['ADP', 'IN', 'Carroll']
['PUNCT', '``', 'Carroll']
['VERB', 'VB', 'Carroll']
['PRON', 'PRP', 'Carroll']
['PUNCT', ',', 'Carroll']
['PRON', 'PRP', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['ADV', 'RB', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', ',', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['ADV', 'RB', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['ADP', 'IN', 'Carroll']
['NOUN', 'NNS', 'Carroll']
['PUNCT', '``', 'Carroll']
['VERB', 'VB', 'Carroll']
['PART', 'RP', 'Carroll']
['ADJ', 'PRP$', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['ADV', 'RB', 'Carroll']
['VERB', 'VBP', 'Carroll']
['ADV

['CCONJ', 'CC', 'Carroll']
['VERB', 'VBD', 'Carroll']
['PRON', 'PRP', 'Carroll']
['ADP', 'IN', 'Carroll']
['NUM', 'CD', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NNS', 'Carroll']
['PUNCT', ':', 'Carroll']
['PUNCT', "''", 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'MD', 'Carroll']
['VERB', 'VB', 'Carroll']
['VERB', 'VBN', 'Carroll']
['VERB', 'VBG', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBZ', 'Carroll']
['ADV', 'RB', 'Carroll']
['ADV', 'RB', 'Carroll']
['PUNCT', ',', 'Carroll']
['ADP', 'IN', 'Carroll']
['ADP', 'IN', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'MD', 'Carroll']
['ADV', 'RB', 'Carroll']
['VERB', 'VBN', 'Carroll']
['DET', 'DT', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PART', 'TO', 'Carroll']
['VERB', 'VB', 'Carroll']
['PRON', 'PRP', 'Carroll']
['PUNCT', '.', 'Carroll']
['INTJ', 'UH', 'Carroll']
['INTJ', 'UH', 'Carroll']
['PUNCT', '.', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'MD', 'Carroll']
['ADV', '

['VERB', 'VBD', 'Carroll']
['ADV', 'RB', 'Carroll']
['PUNCT', ',', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['VERB', 'VBP', 'Carroll']
['VERB', 'VBN', 'Carroll']
['ADV', 'RBS', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', ':', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['DET', 'DT', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['PUNCT', 'HYPH', 'Carroll']
['NOUN', 'NN', 'Carroll']
['ADP', 'IN', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['PUNCT', ',', 'Carroll']
['NOUN', 'WP', 'Carroll']
['VERB', 'VBZ', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['PUNCT', "''", 'Carroll']
['ADP', 'IN', 'Carroll']
['ADJ', 'PRP$', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', ',', 'Carroll']
['PUNCT', "''", 'Carroll']
['VERB', 'VBD', 'Carroll']
['DET

['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['VERB', 'VBG', 'Carroll']
['PRON', 'PRP', 'Carroll']
['ADP', 'IN', 'Carroll']
['ADP', 'IN', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['PUNCT', ',', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['VERB', 'VBD', 'Carroll']
['PART', 'TO', 'Carroll']
['VERB', 'VB', 'Carroll']
['PUNCT', ',', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['VERB', 'VBG', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['NOUN', 'NNS', 'Carroll']
['PUNCT', '.', 'Carroll']
['ADP', 'IN', 'Carroll']
['ADV', 'EX', 'Carroll']
['VERB', 'VBD', 'Carroll']
['PART', 'TO', 'Carroll']
['VERB', 'VB', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['ADP', 'IN', 'Carroll']
['VERB', 'VBG', 'Carroll']
['ADJ', 'PRP$', 'Carroll']
['NOUN', 'NN

['ADP', 'IN', 'Carroll']
['NUM', 'CD', 'Carroll']
['NOUN', 'NNS', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['NOUN', 'WP', 'Carroll']
['VERB', 'VBZ', 'Carroll']
['ADV', 'RB', 'Carroll']
['PUNCT', ',', 'Carroll']
['PUNCT', "''", 'Carroll']
['VERB', 'VBD', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['PUNCT', ',', 'Carroll']
['PUNCT', "''", 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'MD', 'Carroll']
['ADV', 'RB', 'Carroll']
['VERB', 'VB', 'Carroll']
['PART', 'TO', 'Carroll']
['VERB', 'VB', 'Carroll']
['ADP', 'IN', 'Carroll']
['PRON', 'PRP', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', ':', 'Carroll']
['ADV', 'WRB', 'Carroll']
['PUNCT', ',', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'MD', 'Carroll']
['VERB', 'VB', 'Carroll']
['PRON', 'PRP', 'Carroll']
['ADP', 'IN', 'Carroll']
['ADP', 'IN', 'Carroll']
['ADJ', 'PRP$', 'Carroll']
['NOUN', 'NNS', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['ADV'

['ADV', 'RB', 'Carroll']
['VERB', 'VB', 'Carroll']
['PUNCT', ',', 'Carroll']
['VERB', 'VBD', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', ',', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['DET', 'DT', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['NOUN', 'NN', 'Carroll']
['ADJ', 'WDT', 'Carroll']
['VERB', 'VBD', 'Carroll']
['VERB', 'VBG', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['VERB', 'VBG', 'Carroll']
['ADP', 'IN', 'Carroll']
['NOUN', 'NN', 'Carroll']
['ADP', 'IN', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['INTJ', 'UH', 'Carroll']
['VERB', 'MD', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VB', 'Carroll']
['PRON', 'PRP', 'Carroll']
['PUNCT', ',', 'Carroll']
['PUNCT', "''", 'Carroll']
['VERB', 'VBD', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['PUNCT', ',', 'Carroll']
['DET', 'DT', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['ADV', 'RB', 'Carroll']
['PUNCT', 

['ADV', 'RB', 'Carroll']
['PUNCT', ',', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['DET', 'DT', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['NOUN', 'NN', 'Carroll']
['VERB', 'VBN', 'Carroll']
['ADP', 'IN', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', '-LRB-', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['VERB', 'VBN', 'Carroll']
['PART', 'RP', 'Carroll']
['NOUN', 'NN', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', '-RRB-', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['VERB', 'VB', 'Carroll']
['ADV', 'RB', 'Carroll']
['VERB', 'VB', 'Carroll']
['PUNCT', ',', 'Carroll']
['PUNCT', "''", 'Carroll']
['VERB', 'VBD', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['PUNCT', ':', 'Carroll']
['PUNCT', "''", 'Carroll']
['DET', 'DT', 'Carroll']
['VERB', 'VBZ', 'Carroll']
['ADV', 'RB', 'Carroll']
['ADV', 'RB', 'Carroll']
['ADJ', 'PDT', 'Carroll']
['DET', 'DT', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['NOUN', 'NN', 'Carroll']
['AD

['PROPN', 'NNP', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'MD', 'Carroll']
['ADV', 'RB', 'Carroll']
['VERB', 'VBN', 'Carroll']
['PART', 'TO', 'Carroll']
['VERB', 'VB', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['PUNCT', "''", 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', ',', 'Carroll']
['PUNCT', "''", 'Carroll']
['PROPN', 'NNP', 'Carroll']
['ADV', 'RB', 'Carroll']
['VERB', 'VBD', 'Carroll']
['PUNCT', ',', 'Carroll']
['ADV', 'RB', 'Carroll']
['ADP', 'IN', 'Carroll']
['ADP', 'IN', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['VERB', 'VBN', 'Carroll']
['ADV', 'RB', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['PR

['VERB', 'VBZ', 'Carroll']
['DET', 'DT', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['NOUN', 'NN', 'Carroll']
['ADP', 'IN', 'Carroll']
['PRON', 'PRP', 'Carroll']
['PUNCT', ',', 'Carroll']
['PUNCT', "''", 'Carroll']
['VERB', 'VBD', 'Carroll']
['DET', 'DT', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['PUNCT', ',', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['ADV', 'RB', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['VERB', 'VBD', 'Carroll']
['PUNCT', ',', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['VERB', 'VBD', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', ',', 'Carroll']
['ADP', 'IN', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'MD', 'Carroll']
['VERB', 'VB', 'Carroll']
['ADP', 'IN', 'Carroll']
['NOUN', 'NNS', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['NOUN', '

['PUNCT', ',', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'MD', 'Carroll']
['ADV', 'RB', 'Carroll']
['VERB', 'VBN', 'Carroll']
['DET', 'DT', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', ',', 'Carroll']
['PUNCT', "''", 'Carroll']
['VERB', 'VBD', 'Carroll']
['DET', 'DT', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['PUNCT', ',', 'Carroll']
['PUNCT', "''", 'Carroll']
['ADV', 'WRB', 'Carroll']
['DET', 'DT', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['PART', 'RP', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['VERB', 'VBD', 'Carroll']
['PART', 'RP', 'Carroll']
['PUNCT', ',', 'Carroll']
['PUNCT', '``', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBZ', 'Carroll']
['VERB', 'VBG', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', '.', 'Carroll']
['ADP', 'IN', 'Carroll']
['ADP', 'IN', 'Carroll']
['ADJ', 'PRP$', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', '``', 'Carroll']
['PUNCT', "''", 'Carroll']
[

['ADJ', 'JJ', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['PUNCT', "''", 'Carroll']
['CCONJ', 'CC', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', ',', 'Carroll']
['PUNCT', "''", 'Carroll']
['PROPN', 'NNP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['PUNCT', ',', 'Carroll']
['ADV', 'RB', 'Carroll']
['VERB', 'VBG', 'Carroll']
['PART', 'TO', 'Carroll']
['VERB', 'VB', 'Carroll']
['DET', 'DT', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['ADV', 'RB', 'Carroll']
['ADV', 'RB', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['PUNCT', "''", 'Carroll']
['PUNCT', ',', 'Carroll']
['VERB', 'VBD', 'Carroll']
['DET', 'DT', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['PUNCT', ':', 'Carroll']
['PUNCT', "''", 'Carroll']
['A

['PUNCT', ':', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['NUM', 'CD', 'Carroll']
['NOUN', 'NNS', 'Carroll']
['PUNCT', ':', 'Carroll']
['DET', 'DT', 'Carroll']
['VERB', 'VBD', 'Carroll']
['VERB', 'VBN', 'Carroll']
['ADV', 'RB', 'Carroll']
['ADV', 'RB', 'Carroll']
['ADP', 'IN', 'Carroll']
['NOUN', 'NNS', 'Carroll']
['PUNCT', ',', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['VERB', 'VBD', 'Carroll']
['NUM', 'CD', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['NUM', 'CD', 'Carroll']
['PUNCT', ',', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NNS', 'Carroll']
['VERB', 'VBD', 'Carroll']
['PUNCT', '.', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['VERB', 'VBD', 'Carroll']
['DET', 'DT', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['NOUN', 'NNS', 'Carroll']
['PUNCT', ':', 'Carroll']
['ADV', 'EX', 'Carroll']
['VERB', 'VBD', 'Carroll']
['NUM', 'CD', 'Carroll']
['ADP', 'IN', 'Carroll']
['PRON', 'PRP', 'Carroll']
['PUNCT', ',', 'Carroll']
['CCONJ', 'CC', '

['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', '.', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['PUNCT', '``', 'Carroll']
['NOUN', 'WP', 'Carroll']
['ADP', 'IN', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['PUNCT', "''", 'Carroll']
['PUNCT', "''", 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['DET', 'DT', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['PART', 'POS', 'Carroll']
['NOUN', 'NNS', 'Carroll']
['PART', 'POS', 'Carroll']
['DET', 'DT', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['PUNCT', '.', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['DET', 'DT', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['NOUN', 'NN', 'Carroll']
['ADP', 'IN', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['INTJ', 'UH', 'Carroll']
['PUNCT', ',', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['DET', 'DT', 'Carroll']


['ADV', 'RB', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['ADP', 'IN', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['VERB', 'MD', 'Carroll']
['VERB', 'VB', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', ',', 'Carroll']
['PUNCT', "''", 'Carroll']
['VERB', 'VBD', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VB', 'Carroll']
['VERB', 'VBN', 'Carroll']
['ADP', 'IN', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', ',', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBP', 'Carroll']
['ADV', 'RB', 'Carroll']
['VERB', 'VB', 'Carroll']
['ADV', 'WRB', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['PUNCT', "''", 'Carroll']
['INTJ', 'UH', 'Carroll']
['PU

['PUNCT', ',', 'Carroll']
['ADP', 'IN', 'Carroll']
['ADV', 'RB', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'MD', 'Carroll']
['VERB', 'VB', 'Carroll']
['PRON', 'PRP', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['CCONJ', 'CC', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['PRON', 'PRP', 'Carroll']
['PART', 'RP', 'Carroll']
['ADV', 'RBR', 'Carroll']
['ADP', 'IN', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['PART', 'POS', 'Carroll']
['NOUN', 'NN', 'Carroll']
['ADP', 'IN', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['PUNCT', '.', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['ADV', 'RB', 'Carroll']
['ADV', 'RB', 'Carroll']
['ADP', 'IN', 'Carroll']
['VERB', 'VBG', 'Carroll']
['ADV', 'RB', 'Carroll']
['ADV', 'RB', 'Carroll']
['ADP', 'IN', 'Carroll']
['PRON', 'PRP', 'Carroll']
['PUNCT', ':', 'Carroll']
['ADV', 'RB', 'Carroll']
['PUNCT', ',', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['PROPN'

['PUNCT', '.', 'Carroll']
['ADJ', 'PDT', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['VERB', 'VBG', 'Carroll']
['DET', 'DT', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['ADV', 'RB', 'Carroll']
['VERB', 'VBD', 'Carroll']
['PART', 'RP', 'Carroll']
['VERB', 'VBG', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['NOUN', 'NNS', 'Carroll']
['PUNCT', ',', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['VERB', 'VBG', 'Carroll']
['PUNCT', '``', 'Carroll']
['ADP', 'IN', 'Carroll']
['ADP', 'IN', 'Carroll']
['ADJ', 'PRP$', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['CCONJ', 'CC', 'Carroll']
['PUNCT', '``', 'Carroll']
['ADP', 'IN', 'Carroll']
['ADP', 'IN', 'Carroll']
['ADJ', 'PRP$', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'WP', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VER

['PUNCT', ',', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['ADP', 'IN', 'Carroll']
['NOUN', 'NN', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', ',', 'Carroll']
['ADP', 'IN', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'MD', 'Carroll']
['ADV', 'RB', 'Carroll']
['VERB', 'VB', 'Carroll']
['PRON', 'PRP', 'Carroll']
['PUNCT', "''", 'Carroll']
['PUNCT', "''", 'Carroll']
['PRON', 'PRP', 'Carroll']
['ADV', 'RB', 'Carroll']
['VERB', 'VBD', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['ADV', 'RB', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['VERB', 'VBD', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['PUNCT', ',', 'Carroll']
['PUNCT', "''", 'Carroll']
['VERB', 'VBD', 'Carroll']
['DET', 'DT', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['PUNCT', '.', 'Carroll

['NOUN', 'NNS', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['VERB', 'VBD', 'Carroll']
['DET', 'DT', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['NOUN', 'NNS', 'Carroll']
['PUNCT', ',', 'Carroll']
['NOUN', 'NNS', 'Carroll']
['PUNCT', ',', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', ',', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['ADV', 'RB', 'Carroll']
['ADV', 'RB', 'Carroll']
['PUNCT', ':', 'Carroll']
['ADV', 'RB', 'Carroll']
['PUNCT', ',', 'Carroll']
['ADV', 'WRB', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VB', 'Carroll']
['VERB', 'VBN', 'Carroll']
['ADJ', 'PDT', 'Carroll']
['DET', 'DT', 'Carroll']
['ADV', 'RB', 'Carroll']
['PUNCT', 'HYPH', 'Carroll']
['NOUN', 'NN', 'Carroll']
['ADP', 'IN', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', "''", 'Carroll']
['PUNCT', "''", 'Carroll']
['DET', 'DT', 'Carroll']
['ADV', 'RB', 'Carroll']
['VER

['ADV', 'WRB', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBZ', 'Carroll']
['VERB', 'VBN', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['PUNCT', "''", 'Carroll']
['PRON', 'PRP', 'Carroll']
['ADV', 'RB', 'Carroll']
['VERB', 'VBD', 'Carroll']
['ADP', 'IN', 'Carroll']
['PRON', 'PRP', 'Carroll']
['PUNCT', ',', 'Carroll']
['PUNCT', "''", 'Carroll']
['VERB', 'VBD', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['ADV', 'WRB', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['PUNCT', "''", 'Carroll']
['NOUN', 'NN', 'Carroll']
['VERB', 'VBZ', 'Carroll']
['DET', 'DT', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['DET', 'DT', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['ADV', 'RB', 'Carroll']
['ADV', 'RB', 'Carroll']
['PUNCT', '.', 'Carr

['ADP', 'IN', 'Carroll']
['ADJ', 'PRP$', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['PUNCT', '``', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['ADV', 'RB', 'Carroll']
['VERB', 'VB', 'Carroll']
['PART', 'TO', 'Carroll']
['VERB', 'VB', 'Carroll']
['PUNCT', ',', 'Carroll']
['ADP', 'IN', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'MD', 'Carroll']
['ADV', 'RB', 'Carroll']
['VERB', 'VB', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['PUNCT', ',', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['PART', 'RP', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', ':', 'Carroll']
['PUNCT', "''", 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['ADP', 'IN', 'Carroll']
['ADJ', 'PRP$', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUN

['PUNCT', '``', 'Carroll']
['ADP', 'IN', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'MD', 'Carroll']
['VERB', 'VB', 'Carroll']
['PRON', 'PRP', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['PUNCT', '``', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['NOUN', 'NNS', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['PROPN', 'NNP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['PUNCT', ',', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', ',', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['ADV', 'RB', 'Carroll']
['PUNCT', ',', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['VERB',

['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['DET', 'DT', 'Carroll']
['ADJ', 'PRP$', 'Carroll']
['NOUN', 'NNS', 'Carroll']
['PART', 'RP', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['VERB', 'VB', 'Carroll']
['ADJ', 'PRP$', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', ',', 'Carroll']
['PUNCT', "''", 'Carroll']
['DET', 'DT', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['ADV', 'RB', 'Carroll']
['PUNCT', ',', 'Carroll']
['PUNCT', "''", 'Carroll']
['CCONJ', 'CC', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'MD', 'Carroll']
['VERB', 'VB', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBN', 'Carroll']
['PUNCT', ',', 'Carroll']
['ADP', 'IN', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBP', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['CCONJ', 'CC', 'Carroll']
['ADV', 'RB', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['PUNCT', '``', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBP', 'Carroll']
['DET', 'DT', 'Carroll

['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', ',', 'Carroll']
['VERB', 'VBG', 'Carroll']
['ADV', 'RB', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['PART', 'TO', 'Carroll']
['VERB', 'VB', 'Carroll']
['NOUN', 'WP', 'Carroll']
['DET', 'DT', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['NOUN', 'NN', 'Carroll']
['VERB', 'MD', 'Carroll']
['VERB', 'VB', 'Carroll']
['ADP', 'IN', 'Carroll']
['PUNCT', ',', 'Carroll']
['PUNCT', "''", 'Carroll']
['ADP', 'IN', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBP', 'Carroll']
['ADV', 'RB', 'Carroll']
['VERB', 'VBN', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['NOUN', 'NN', 'Carroll']
['ADV', 'RB', 'Carroll']
['PUNCT', ',', 'Carroll']
['PUNCT', "''", 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['ADP', 'IN', 'Carroll']
['PRON', 'PRP', 'Carroll']
['PUNCT', '.', 'Carroll']
['VERB', 'VB', 'Carroll']
['ADJ', 'PRP$', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', ',', 'Carroll']
['ADV', 'WRB', 'Carroll']
['DET', 'DT', 'Carroll']
['PROPN', 'NNP

['PUNCT', ',', 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'VBP', 'Carroll']
['ADV', 'RB', 'Carroll']
['PUNCT', ',', 'Carroll']
['PUNCT', "''", 'Carroll']
['VERB', 'VBD', 'Carroll']
['DET', 'DT', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['PUNCT', ',', 'Carroll']
['PUNCT', "''", 'Carroll']
['CCONJ', 'CC', 'Carroll']
['DET', 'DT', 'Carroll']
['VERB', 'VBZ', 'Carroll']
['DET', 'DT', 'Carroll']
['ADJ', 'JJS', 'Carroll']
['NOUN', 'NN', 'Carroll']
['ADP', 'IN', 'Carroll']
['PRON', 'PRP', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['PUNCT', '-LRB-', 'Carroll']
['DET', 'DT', 'Carroll']
['NOUN', 'NN', 'Carroll']
['DET', 'DT', 'Carroll']
['VERB', 'VBD', 'Carroll']
['ADJ', 'JJ', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', '-RRB-', 'Carroll']
['PUNCT', "''", 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'MD', 'Carroll']
['VERB', 'VB', 'Carroll']
['VERB', 'VBN', 'Carroll']
['NOUN', 'NN', 'Carroll']
['ADV', 'RB', 'Carroll']
['PART', 'POS', 'Carro

['NOUN', 'NN', 'Carroll']
['ADV', 'RB', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['PUNCT', '``', 'Carroll']
['VERB', 'VB', 'Carroll']
['ADJ', 'PRP$', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['VERB', 'VBD', 'Carroll']
['DET', 'DT', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['PUNCT', ',', 'Carroll']
['VERB', 'VBG', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['PRON', 'PRP', 'Carroll']
['VERB', 'MD', 'Carroll']
['ADV', 'RB', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['VERB', 'VBD', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['ADP', 'IN', 'Carroll']
['ADP', 'IN', 'Carroll']
['ADJ', 'PRP$', 'Carroll']
['NOUN', 'NN', 'Carroll']
['PUNCT', '.', 'Carroll']
['PUNCT', "''", 'Carroll']
['DET', 'DT', 'Carroll']
['PROPN', 'NNP', 'Carroll']
['VERB', 'VBD', 'Carroll']
['ADP', 'IN', 'Carroll']
['DET', 'DT', 'Carroll']


In [43]:
all_features.shape

(5318, 3066)

In [44]:
all_features.text_sentence.head(5)

0    (Alice, was, beginning, to, get, very, tired, ...
1    (So, she, was, considering, in, her, own, mind...
2    (There, was, nothing, so, VERY, remarkable, in...
3                                        (Oh, dear, !)
4                           (I, shall, be, late, !, ')
Name: text_sentence, dtype: object

In [45]:
from sklearn import ensemble
from sklearn.model_selection import train_test_split

rfc = ensemble.RandomForestClassifier()
Y = all_features['text_source']
X = np.array(all_features.drop(['text_sentence','text_source', 'author'], 1))

X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    Y,
                                                    test_size=0.4,
                                                    random_state=0)
train = rfc.fit(X_train, y_train)

print('Training set score:', rfc.score(X_train, y_train))
print('\nTest set score:', rfc.score(X_test, y_test))

Training set score: 0.986833855799373

Test set score: 0.8858082706766918


In [46]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression()
train = lr.fit(X_train, y_train)
print(X_train.shape, y_train.shape)
print('Training set score:', lr.score(X_train, y_train))
print('\nTest set score:', lr.score(X_test, y_test))

(3190, 3063) (3190,)
Training set score: 0.958307210031348

Test set score: 0.9177631578947368


In [47]:
clf = ensemble.GradientBoostingClassifier()
train = clf.fit(X_train, y_train)

print('Training set score:', clf.score(X_train, y_train))
print('\nTest set score:', clf.score(X_test, y_test))

Training set score: 0.8871473354231975

Test set score: 0.8707706766917294


In [48]:
from sklearn import svm
clf = svm.SVC()
clf.fit(X_train, y_train)


SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [49]:
print('Training set score:', clf.score(X_train, y_train))
print('\nTest set score:', clf.score(X_test, y_test))

Training set score: 0.6824451410658308

Test set score: 0.6917293233082706


In [50]:
# Utility function to calculate how frequently words appear in the text.
def word_freq(text, include_stop=True):
    
    # Build a list of words.
    # Strip out punctuation and, optionally, stop words.
    words = []
    for token in text:
        if not token.is_punct and (not token.is_stop or include_stop):
            words.append(token.text)
            
    # Build and return a Counter object containing word counts.
    return Counter(words)
    
# The most frequent words:
alice_freq = word_frequencies(alice_doc).most_common(10)
persuasion_freq = word_frequencies(persuasion_doc).most_common(10)
print('Alice:', alice_freq)
print('Persuasion:', persuasion_freq)

Alice: [('the', 1525), ('and', 800), ('to', 725), ('a', 612), ('I', 540), ('it', 527), ('she', 509), ('of', 499), ('said', 456), ('Alice', 395)]
Persuasion: [('the', 3120), ('to', 2775), ('and', 2739), ('of', 2564), ('a', 1529), ('in', 1346), ('was', 1330), ('had', 1177), ('her', 1159), ('I', 1121)]


In [52]:
sentences.head(5)

Unnamed: 0,0,1
0,"(Alice, was, beginning, to, get, very, tired, ...",Carroll
1,"(So, she, was, considering, in, her, own, mind...",Carroll
2,"(There, was, nothing, so, VERY, remarkable, in...",Carroll
3,"(Oh, dear, !)",Carroll
4,"(I, shall, be, late, !, ')",Carroll


In [53]:
list(sentences.columns)

[0, 1]

In [54]:
sentences.columns = ['sent_text', 'author']
list(sentences.columns)

['sent_text', 'author']

### Discussion

Curriculum:  
Random Forest: 0.987 // 0.890  
Logistic Regression: 0.958 // 0.916  
Gradient Boost: 0.887 // 0.874  

SVM: 0.682 // 0.692  

W/ Added Features:  
Random Forest: 0.988 // 0.890  
Logistic Regression: 0.958 // 0.918  
Gradient Boost: 0.887 // 0.871  
SVM: 0.682 // 0.692  

With one added feature, "sentence length", the accuracy of each of the models did not change in any significant way, which is what would be expected. The largest change among these was the test set of the gradient boost model, which actually decreased by 0.3%. Training accuracy improved very slightly for the random forest model but was unchanged for the other three models. Logistic Regression is currently the model that performs the best, at nearly 92% accuracy. 

I will continue to work on adding a part-of-speech feature, which I think will be improve accuracy more considerably. It would seem that the ways in which Jane Austen and Lewis Carroll use different parts of speech would potentially correlate much more strongly to the author than would general sentence length. 