## NLTK

In [1]:
import nltk

In [3]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/rachelrosenberg/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [6]:
from nltk.tokenize import sent_tokenize, word_tokenize
text = 'I am happy. I am sleepy. I am dreamy.'
sent_tokenize(text)

['I am happy.', 'I am sleepy.', 'I am dreamy.']

In [7]:
word_tokenize(text)

['I', 'am', 'happy', '.', 'I', 'am', 'sleepy', '.', 'I', 'am', 'dreamy', '.']

## Spacy

In [8]:
from spacy.lang.en import English
text = 'I am happy. I am sleepy. I am dreamy.' 
nlp = English() 
nlp.add_pipe(nlp.create_pipe('sentencizer'))
doc = nlp(text) 
[sent.string.strip() for sent in doc.sents]

['I am happy.', 'I am sleepy.', 'I am dreamy.']

In [9]:
[token.text for token in doc]

['I', 'am', 'happy', '.', 'I', 'am', 'sleepy', '.', 'I', 'am', 'dreamy', '.']

## StanfordNLP

In [11]:
import stanfordnlp
stanfordnlp.download('en')

Using the default treebank "en_ewt" for language "en".
Would you like to download the models for: en_ewt now? (Y/n)
Y

Default download directory: /Users/rachelrosenberg/stanfordnlp_resources
Hit enter to continue or type an alternate directory.


Downloading models for: en_ewt
Download location: /Users/rachelrosenberg/stanfordnlp_resources/en_ewt_models.zip


100%|██████████| 235M/235M [00:53<00:00, 4.36MB/s] 



Download complete.  Models saved to: /Users/rachelrosenberg/stanfordnlp_resources/en_ewt_models.zip
Extracting models file for: en_ewt
Cleaning up...Done.


In [12]:
text = 'I am happy. I am sleepy. I am dreamy.'
nlp = stanfordnlp.Pipeline()
doc = nlp(text)
[' '.join([token.text for token in sentence.tokens]).strip() for sentence in doc.sentences]

Use device: cpu
---
Loading: tokenize
With settings: 
{'model_path': '/Users/rachelrosenberg/stanfordnlp_resources/en_ewt_models/en_ewt_tokenizer.pt', 'lang': 'en', 'shorthand': 'en_ewt', 'mode': 'predict'}
---
Loading: pos
With settings: 
{'model_path': '/Users/rachelrosenberg/stanfordnlp_resources/en_ewt_models/en_ewt_tagger.pt', 'pretrain_path': '/Users/rachelrosenberg/stanfordnlp_resources/en_ewt_models/en_ewt.pretrain.pt', 'lang': 'en', 'shorthand': 'en_ewt', 'mode': 'predict'}
---
Loading: lemma
With settings: 
{'model_path': '/Users/rachelrosenberg/stanfordnlp_resources/en_ewt_models/en_ewt_lemmatizer.pt', 'lang': 'en', 'shorthand': 'en_ewt', 'mode': 'predict'}
Building an attentional Seq2Seq model...
Using a Bi-LSTM encoder
Using soft attention for LSTM.
Finetune all embeddings.
[Running seq2seq lemmatizer with edit classifier]
---
Loading: depparse
With settings: 
{'model_path': '/Users/rachelrosenberg/stanfordnlp_resources/en_ewt_models/en_ewt_parser.pt', 'pretrain_path': '/U



['I am happy .', 'I am sleepy .', 'I am dreamy .']

In [14]:
from functools import reduce

text = 'I am happy. I am sleepy. I am dreamy.'
nlp = stanfordnlp.Pipeline()
doc = nlp(text)
words_by_sentence = [[token.text for token in sentence.tokens] for sentence in doc.sentences]
reduce(lambda lst1,lst2: lst1 + lst2, words_by_sentence)

Use device: cpu
---
Loading: tokenize
With settings: 
{'model_path': '/Users/rachelrosenberg/stanfordnlp_resources/en_ewt_models/en_ewt_tokenizer.pt', 'lang': 'en', 'shorthand': 'en_ewt', 'mode': 'predict'}
---
Loading: pos
With settings: 
{'model_path': '/Users/rachelrosenberg/stanfordnlp_resources/en_ewt_models/en_ewt_tagger.pt', 'pretrain_path': '/Users/rachelrosenberg/stanfordnlp_resources/en_ewt_models/en_ewt.pretrain.pt', 'lang': 'en', 'shorthand': 'en_ewt', 'mode': 'predict'}
---
Loading: lemma
With settings: 
{'model_path': '/Users/rachelrosenberg/stanfordnlp_resources/en_ewt_models/en_ewt_lemmatizer.pt', 'lang': 'en', 'shorthand': 'en_ewt', 'mode': 'predict'}
Building an attentional Seq2Seq model...
Using a Bi-LSTM encoder
Using soft attention for LSTM.
Finetune all embeddings.
[Running seq2seq lemmatizer with edit classifier]
---
Loading: depparse
With settings: 
{'model_path': '/Users/rachelrosenberg/stanfordnlp_resources/en_ewt_models/en_ewt_parser.pt', 'pretrain_path': '/U



['I', 'am', 'happy', '.', 'I', 'am', 'sleepy', '.', 'I', 'am', 'dreamy', '.']