In [2]:
import spacy

In [3]:
nlp = spacy.load('en_core_web_sm')

In [4]:
doc = nlp("Tesla is looking at buying U.S. startup for $6 million")

In [5]:
for token in doc:
  print(token.text, token.pos_, token.dep_)

Tesla PROPN nsubj
is AUX aux
looking VERB ROOT
at ADP prep
buying VERB pcomp
U.S. PROPN compound
startup NOUN dobj
for ADP prep
$ SYM quantmod
6 NUM compound
million NUM pobj


In [6]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x19b69ecbb30>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x19b69ecac90>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x19b69dce0a0>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x19b6a093950>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x19b6a075e50>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x19b69dcde00>)]

In [7]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [8]:
doc2 = nlp(u"Tesla isn't  looking into startups anymore.")

In [9]:
for token in doc2:
  print(token.text, token.pos_, token.dep_)

Tesla PROPN nsubj
is AUX aux
n't PART neg
  SPACE dep
looking VERB ROOT
into ADP prep
startups NOUN pobj
anymore ADV advmod
. PUNCT punct


In [10]:
doc2[0]

Tesla

In [11]:
doc2[0].pos_

'PROPN'

In [12]:
doc2[0].dep_

'nsubj'

In [13]:
spacy.explain('nsubj')

'nominal subject'

In [14]:
doc2[4].lemma_

'look'

TOKENIZATION

In [15]:
string = "hey!! I'm Sathya"

In [16]:
doc3 = nlp(string)

In [17]:
for token in doc3:
    print(token.text, end ='|')

hey|!|!|I|'m|Sathya|

In [18]:
doc4 = nlp(u"We're here to help! Send snail-mail, email support@oursite.com or visit us at http://www.oursite.com!")

In [19]:
for t  in doc4:
  print(t)

We
're
here
to
help
!
Send
snail
-
mail
,
email
support@oursite.com
or
visit
us
at
http://www.oursite.com
!


In [20]:
doc5 = nlp(u'A 5km UBER cab costs $5')

In [21]:
for t in doc5:
    print(t)

A
5
km
UBER
cab
costs
$
5


In [22]:
len(doc5)

8

In [23]:
doc8 = nlp(u'Apple to build a Hong Kong factory for $6 million')

In [24]:
for ent in doc8.ents:
  print(ent.text+ ' - ' + ent.label_+' - '+str(spacy.explain(ent.label_)) )

Apple - ORG - Companies, agencies, institutions, etc.
Hong Kong - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [25]:
from spacy import displacy

In [26]:
doc = nlp(u'Apple is going to build a U.K. factory for $6 million.')

In [27]:
displacy.render(doc, style='dep', jupyter=True, options={'distance': 110})

In [28]:
doc = nlp(u'Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million.')
displacy.render(doc, style='ent', jupyter=True)