In [3]:
import spacy

In [4]:
nlp = spacy.load('en_core_web_sm')

In [7]:
doc = nlp(u'Tesla is looking at buying U.S. startup for $6 million')

In [12]:
# .dep_ = syntactic dependency
for token in doc:
    print(token.text, token.pos_, token.dep_)

Tesla NOUN nsubj
is AUX aux
looking VERB ROOT
at ADP prep
buying VERB pcomp
U.S. PROPN compound
startup NOUN dobj
for ADP prep
$ SYM quantmod
6 NUM compound
million NUM pobj


In [13]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x7fbb25a374c0>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x7fbb25a370a0>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x7fbb33a5f7b0>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x7fbb25afd380>),
 ('lemmatizer',
  <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x7fbb25b087c0>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x7fbb33a5f890>)]

In [14]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [24]:
doc2 = nlp(u"Tesla isn't    looking into startups anymore.")

In [25]:
for token in doc2:
    print(token.text, token.pos_, token.dep_)

Tesla NOUN nsubj
is AUX aux
n't PART neg
    SPACE dep
looking VERB ROOT
into ADP prep
startups NOUN pobj
anymore ADV advmod
. PUNCT punct


In [19]:
doc2[0]

Tesla

In [26]:
# for some reason spacy is NOT correctly identifying Tesla as a proper noun UNLESS I add "The"
# to the start of the sentence
doc2[0].pos_

'NOUN'

In [31]:
# syntactic dependency
doc2[0].dep_

'nsubj'

In [32]:
# the original word text
doc2[0].text

'Tesla'

In [28]:
# the base form of the word (basically just uncapitalized)
doc2[0].lemma_

'tesla'

In [34]:
# part of speech
doc2[0].pos_

'NOUN'

In [29]:
# detailed part of speech tag (includes singular, plural)
doc2[0].tag_

'NN'

In [30]:
# the word shape - capitalization, punctuation, digits, etc
doc2[0].shape_

'Xxxxx'

In [35]:
# is the token an alphabetical char
doc2[0].is_alpha

True

In [36]:
# is the token one of the most common words in the language?
doc2[0].is_stop

False

In [37]:
doc3 = nlp(u'Although commmonly attributed to John Lennon from his song "Beautiful Boy", \
the phrase "Life is what happens to us while we are making other plans" was written by \
cartoonist Allen Saunders and published in Reader\'s Digest in 1957, when Lennon was 17.')

In [39]:
# a span of overall above text
# words are indexed, NOT characters
life_quote = doc3[16:30]
print(life_quote)

"Life is what happens to us while we are making other plans"


In [40]:
type(life_quote)

spacy.tokens.span.Span