In [3]:
import spacy
import pandas as pd
import itertools as it
from spacy.en import English
parser = English()

In [4]:
nlp = spacy.load('en')

In [5]:
sample_review = "I bought this for my husband who plays the piano.  He is having a wonderful time playing these old hymns.  The music  is at times hard to read because we think the book was published for singing from more than playing from.  Great purchase though!"

In [7]:
%%time
#parsedEx = parser(example) = nlp(unicode(sample_review))
parsed_review = parser(unicode(sample_review))

Wall time: 232 ms


## Part Of Speech tagging POS

In [9]:
# Sentence Parsing

sents = []
# the "sents" property returns spans
# spans have indices into the original string
# where each index value represents a token
for span in parsed_review.sents:
    # go from the start to the end of each span, returning each token in the sentence
    # combine each token using join()
    sent = ''.join(parsed_review[i].string for i in range(span.start, span.end)).strip()
    sents.append(sent)

for sentence in sents:
    print(sentence)

I bought this for my husband who plays the piano.
He is having a wonderful time playing these old hymns.
The music  is at times hard to read because we think the book was published for singing from more than playing from.
Great purchase though!


In [10]:
for span in parsed_review.sents:
    sent = [parsed_review[i] for i in range(span.start, span.end)]
    break

for token in sent:
    print(token.orth_, token.pos_)

(u'I', u'PRON')
(u'bought', u'VERB')
(u'this', u'DET')
(u'for', u'ADP')
(u'my', u'ADJ')
(u'husband', u'NOUN')
(u'who', u'NOUN')
(u'plays', u'VERB')
(u'the', u'DET')
(u'piano', u'NOUN')
(u'.', u'PUNCT')
(u' ', u'SPACE')


# Dependency Parsing

In [13]:
# shown as: original token, dependency tag, head word, left dependents, right dependents
for token in parsed_review:
    print(token.orth_, token.dep_, token.head.orth_, [t.orth_ for t in token.lefts], [t.orth_ for t in token.rights])

(u'I', u'nsubj', u'bought', [], [])
(u'bought', u'ROOT', u'bought', [u'I'], [u'this', u'for', u'.'])
(u'this', u'dobj', u'bought', [], [])
(u'for', u'prep', u'bought', [], [u'husband'])
(u'my', u'poss', u'husband', [], [])
(u'husband', u'pobj', u'for', [u'my'], [u'plays'])
(u'who', u'nsubj', u'plays', [], [])
(u'plays', u'relcl', u'husband', [u'who'], [u'piano'])
(u'the', u'det', u'piano', [], [])
(u'piano', u'dobj', u'plays', [u'the'], [])
(u'.', u'punct', u'bought', [], [u' '])
(u' ', u'', u'.', [], [])
(u'He', u'nsubj', u'is', [], [])
(u'is', u'aux', u'having', [u'He'], [])
(u'having', u'ROOT', u'having', [u'is'], [u'time', u'playing', u'.'])
(u'a', u'det', u'time', [], [])
(u'wonderful', u'amod', u'time', [], [])
(u'time', u'dobj', u'having', [u'a', u'wonderful'], [])
(u'playing', u'conj', u'having', [], [u'hymns'])
(u'these', u'det', u'hymns', [], [])
(u'old', u'amod', u'hymns', [], [])
(u'hymns', u'dobj', u'playing', [u'these', u'old'], [])
(u'.', u'punct', u'having', [], [u' '])

# Dependency Tree

In [11]:
from nltk import Tree

In [12]:
def to_nltk_tree(node):
    if node.n_lefts + node.n_rights > 0:
        return Tree(node.orth_, [to_nltk_tree(child) for child in node.children])
    else:
        return node.orth_


[to_nltk_tree(sent.root).pretty_print() for sent in parsed_review.sents]

         bought                        
  _________|_________________________   
 |   |            for                | 
 |   |             |                 |  
 |   |          husband              | 
 |   |      _______|______           |  
 |   |     |            plays        | 
 |   |     |        ______|_____     |  
 |   |     |       |          piano  . 
 |   |     |       |            |    |  
 I  this   my     who          the     

               having                       
  _______________|________________________   
 |       |                   playing      | 
 |       |                      |         |  
 is     time                  hymns       . 
 |    ___|_______         ______|_____    |  
 He  a       wonderful these         old    

                            is                                  
       _____________________|_________________________________   
      |         |    |               think                    | 
      |         |    |       __________

[None, None, None, None]

# Named Entity Reference

In [14]:
ents = list(parsed_review.ents)
for entity in ents:
    print(entity.label, entity.label_, ' '.join(t.orth_ for t in entity))

In [16]:
for token in parsed_review:
    print(token.orth_, token.ent_type_,token.ent_iob_ )

(u'I', u'', u'O')
(u'bought', u'', u'O')
(u'this', u'', u'O')
(u'for', u'', u'O')
(u'my', u'', u'O')
(u'husband', u'', u'O')
(u'who', u'', u'O')
(u'plays', u'', u'O')
(u'the', u'', u'O')
(u'piano', u'', u'O')
(u'.', u'', u'O')
(u' ', u'', u'O')
(u'He', u'', u'O')
(u'is', u'', u'O')
(u'having', u'', u'O')
(u'a', u'', u'O')
(u'wonderful', u'', u'O')
(u'time', u'', u'O')
(u'playing', u'', u'O')
(u'these', u'', u'O')
(u'old', u'', u'O')
(u'hymns', u'', u'O')
(u'.', u'', u'O')
(u' ', u'', u'O')
(u'The', u'', u'O')
(u'music', u'', u'O')
(u' ', u'', u'O')
(u'is', u'', u'O')
(u'at', u'', u'O')
(u'times', u'', u'O')
(u'hard', u'', u'O')
(u'to', u'', u'O')
(u'read', u'', u'O')
(u'because', u'', u'O')
(u'we', u'', u'O')
(u'think', u'', u'O')
(u'the', u'', u'O')
(u'book', u'', u'O')
(u'was', u'', u'O')
(u'published', u'', u'O')
(u'for', u'', u'O')
(u'singing', u'', u'O')
(u'from', u'', u'O')
(u'more', u'', u'O')
(u'than', u'', u'O')
(u'playing', u'', u'O')
(u'from', u'', u'O')
(u'.', u'', u'O')
(u