In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_sm')

In [12]:
doc = nlp(u'Tesla is looking at buying U.S. startup for $120 million')

In [14]:
for token in doc:
    print(token.text, token.pos_, token.lemma_, token.dep_)

Tesla PROPN Tesla nsubj
is AUX be aux
looking VERB look ROOT
at ADP at prep
buying VERB buy pcomp
U.S. PROPN U.S. compound
startup NOUN startup dobj
for ADP for prep
$ SYM $ quantmod
120 NUM 120 compound
million NUM million pobj


In [15]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x7f7e36fee0e0>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x7f7e36fed120>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x7f7e370ce6c0>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x7f7e372363c0>),
 ('lemmatizer',
  <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x7f7e36f7ff40>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x7f7e370ce8f0>)]

In [16]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [17]:
doc2 = nlp(u"Tata isn't looking into statrups anymore.")

In [18]:
for token in doc2:
    print(token.text, token.pos_, token.lemma_, token.dep_)
    

Tata PROPN Tata nsubj
is AUX be aux
n't PART not neg
looking VERB look ROOT
into ADP into prep
statrups NOUN statrup pobj
anymore ADV anymore advmod
. PUNCT . punct


In [24]:
long_string = nlp(u"""Presently I am working as a Lead Scientist at the newly built Wipro AI Lab, Bangalore, India.
Earlier I spent one semester stint at the Mahindra Ecole Centrale, Hyderabad, India as an Associate Professor. Before I spent three years working as an Assistant Professor at the IIIT Sri City, Andhra Pradesh. I was also associated with STRITNE, ISB as a Vising Scientist during July-Dec 2018.""")

In [25]:
part = long_string[16:30]

In [26]:
part

Bangalore, India.
Earlier I spent one semester stint at the Mahindra

In [27]:
type(part)

spacy.tokens.span.Span

In [28]:
type(long_string)

spacy.tokens.doc.Doc

In [29]:
doc1 = nlp(u'This is the first sentence. This is another sentence. This is the last sentence')

In [30]:
for sentence in doc1.sents:
    print(sentence)

This is the first sentence.
This is another sentence.
This is the last sentence


In [34]:
doc1[6].is_sent_start # is sentence start ? 

True

In [36]:
doc1[2].is_sent_start # not sentence start

False

In [39]:
text = u'"We\'re moving to N.D.L.S!"'

In [41]:
print(text)

"We're moving to N.D.L.S!"


In [42]:
doc = nlp(text)

In [43]:
for token in doc:
    print(token.text)

"
We
're
moving
to
N.D.L.S
!
"


In [45]:
doc1 = nlp(u"We're here to help! Send snail-email, email support@oursite.com or visit us at http://www.snail-mail.com!")

In [46]:
for token in doc1:
    print(token)

We
're
here
to
help
!
Send
snail
-
email
,
email
support@oursite.com
or
visit
us
at
http://www.snail-mail.com
!


In [47]:
doc3 = nlp(u"A 5km Delhi cab ride costs $10.42")

In [48]:
for token in doc3:
    print(token)

A
5
km
Delhi
cab
ride
costs
$
10.42


In [49]:
doc4 = nlp(u"Let's visit St. Louis in the U.S. next year")

In [50]:
for token in doc4:
    print(token)

Let
's
visit
St.
Louis
in
the
U.S.
next
year


In [54]:
len(doc4.vocab)

853

In [55]:
doc5 = nlp(u"It is better to give than recieve.")

In [56]:
doc5[-1]

.

In [57]:
doc5[2:5]

better to give

In [59]:
doc6 = nlp(u'Tata to build a Gujarat factory for $100 million')

In [60]:
for token in doc6:
    print(token.text, end=' | ')

Tata | to | build | a | Gujarat | factory | for | $ | 100 | million | 

In [65]:
for entity in doc6.ents:
    print(entity, entity.label_, str(spacy.explain(entity.label_)))
    print()

Tata ORG Companies, agencies, institutions, etc.

Gujarat GPE Countries, cities, states

$100 million MONEY Monetary values, including unit



In [66]:
doc7 = nlp(u'Autonomous cars shift insurance liability toward manufactures.')

In [67]:
for chunk in doc7.noun_chunks:
    print(chunk)

Autonomous cars
insurance liability
manufactures


In [68]:
from spacy import displacy

In [72]:
doc8 = nlp(u"Apple is going to build a steel factory in U.K. for $100 million.")

In [74]:
displacy.render(doc8, style='dep', jupyter=True, options={'distance': 80})