In [2]:
import spacy

In [3]:
nlp = spacy.load('en_core_web_sm')

In [4]:
doc = nlp(u'Tesla is looking at buying U.S for $6 million')

In [5]:
'''
iterates through each token in the doc object (which is a processed text by spaCy) and prints out three things for every token:

token.text: The actual text of the token (word).

token.pos_: The part-of-speech tag, like noun (NOUN), verb (VERB), adjective (ADJ), etc.

token.dep_: The syntactic dependency label, which shows the grammatical relationship of the token to other words in the sentence (like subject, object, root, modifier, etc.).
'''


for token in doc :
    print(token.text, token.pos_, token.dep_)

Tesla PROPN nsubj
is AUX aux
looking VERB ROOT
at ADP prep
buying VERB pcomp
U.S PROPN dobj
for ADP prep
$ SYM quantmod
6 NUM compound
million NUM pobj


In [6]:
#a list of processing components (called “pipeline components”) that spaCy applies to a text sequentially when you call nlp(text).
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x13b0969f0>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x13b096db0>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x13b07db60>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x13b290210>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x13b292fd0>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x13b07d930>)]

In [11]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

doc2 = nlp("Tesla isn't looking into startups anymore")

In [12]:
doc2 = nlp("Tesla isn't looking into startups anymore.")

In [13]:
for token in doc2:
    print(token.text, token.pos_, token.dep_)

Tesla PROPN nsubj
is AUX aux
n't PART neg
looking VERB ROOT
into ADP prep
startups NOUN pobj
anymore ADV advmod
. PUNCT punct


In [14]:
doc2[1]

is

In [15]:
doc2[1].pos_

'AUX'

In [8]:
doc3 = nlp(u'Although commmonly attributed to John Lennon from his song "Beautiful Boy", \
the phrase "Life is what happens to us while we are making other plans" was written by \
cartoonist Allen Saunders and published in Reader\'s Digest in 1957, when Lennon was 17.')

In [9]:
#[16:30] means slicing tokens from index 16 up to (but not including) 30
life_quote = doc3[16:30] 

In [20]:
life_quote

"Life is what happens to us while we are making other plans"

In [21]:
type(life_quote)

spacy.tokens.span.Span

In [11]:
doc4 = nlp(u"This is first sentence. This is another sentence. This is the last sentence")

In [12]:
for sentence in doc4.sents: 
    print(sentence)

This is first sentence.
This is another sentence.
This is the last sentence


In [13]:
#True if the 9th token is marked as the beginning of a sentence,
#False otherwise.

doc4[8].is_sent_start

False