In [2]:
import spacy

In [3]:
spacy.__version__

'2.3.2'

In [4]:
nlp = spacy.load('en')

In [5]:
doc = nlp(u"The queen of Rome decided to rip off the flag! That's our flag.")

In [6]:
for token in doc:
    print(token.text, token.pos_)

The DET
queen NOUN
of ADP
Rome PROPN
decided VERB
to PART
rip VERB
off ADP
the DET
flag NOUN
! PUNCT
That DET
's AUX
our DET
flag NOUN
. PUNCT


In [7]:
doc = nlp(u"This is my house and I have to defend it!")

In [8]:
for token in doc:
    print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop)

This this DET DT nsubj Xxxx True True
is be AUX VBZ ROOT xx True True
my -PRON- DET PRP$ poss xx True True
house house NOUN NN attr xxxx True False
and and CCONJ CC cc xxx True True
I -PRON- PRON PRP nsubj X True True
have have AUX VBP ROOT xxxx True True
to to PART TO aux xx True True
defend defend VERB VB xcomp xxxx True False
it -PRON- PRON PRP dobj xx True True
! ! PUNCT . punct ! False False


In [9]:
from spacy.lang.en import English
lemmatizer = English.Defaults.create_lemmatizer()

In [10]:
lemmatizer('chuckles', 'NOUN')

['chuckles']

In [11]:
lemmatizer('fastest','ADJ')

['fastest']

In [12]:
import spacy
nlp = spacy.load('en_core_web_sm')
lemmatizer = nlp.Defaults.create_lemmatizer()

In [13]:
lemmatizer('chuckles', 'NOUN')

['chuckles']

In [14]:
lemmatizer('fastest','ADJ')

['fastest']

In [15]:
doc = nlp(u"I worked at some small company in Lower Austria")

In [16]:
for token in doc:
    print(token.text, token.lemma_)

I -PRON-
worked work
at at
some some
small small
company company
in in
Lower Lower
Austria Austria


In [17]:
doc = nlp(u"I worked at some small company in Lower Austria where Austrians were nice profesionals")

In [18]:
for token in doc:
    print(token.text, token.lemma_)

I -PRON-
worked work
at at
some some
small small
company company
in in
Lower low
Austria Austria
where where
Austrians Austrians
were be
nice nice
profesionals profesional


In [19]:
doc = nlp(u"The fastest cars are Ferraris")

In [20]:
for token in doc:
    print(token.text, token.lemma_)

The the
fastest fast
cars car
are be
Ferraris Ferraris


In [21]:
doc = nlp(u"Microsoft used to be Google from yesterday in the United States and Brazil")

In [22]:
for ent in doc.ents:
    print(ent.text, ent.label_)

Microsoft ORG
Google ORG
yesterday DATE
the United States GPE
Brazil GPE


#PARSING DE DEPENDENCIAS

In [23]:
doc = nlp(u'Book me a flight from Bangalore to Goa')

In [24]:
blr, goa = doc[5], doc[7]

In [25]:
list(blr.ancestors)

[from, flight, Book]

In [26]:
list(goa.ancestors)

[to, flight, Book]

In [26]:
for token in doc:
    print(token.text, token.pos_)

Book VERB
me PRON
a DET
flight NOUN
from ADP
Bangalore PROPN
to ADP
Goa PROPN


In [27]:
doc = nlp(u'Book a table at the restaurant and a taxi to the hotel')

In [28]:
for token in doc:
    print(token.text, token.pos_)

Book NOUN
a DET
table NOUN
at ADP
the DET
restaurant NOUN
and CCONJ
a DET
taxi NOUN
to ADP
the DET
hotel NOUN


In [29]:
tasks = doc[2], doc[8]
tasks_target = doc[5], doc[11]
print(tasks)
print(tasks_target)

(table, taxi)
(restaurant, hotel)


In [30]:
for target in tasks_target:
    print(f'target: {target}')
    for tok in target.ancestors:
        print(tok)
        if tok in tasks:   
            print(f"Booking of {tok} belongs to {target}")


target: restaurant
at
Book
target: hotel
to
taxi
Booking of taxi belongs to hotel
Book


In [31]:
for tok in tasks_target:
    print(tok.ancestors)

<generator object at 0x7fec71210e10>
<generator object at 0x7fec71210e10>


In [27]:
from spacy import displacy

In [None]:
displacy.serve(doc, style='dep')




Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...



In [28]:
doc = nlp(u'Book me a flight from Bangalore to Goa')

In [29]:
displacy.serve(doc, style='dep')




Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.


In [34]:
nlp = spacy.load('en')
doc = nlp(u'What are some places to visit in Berlin and stay in Lubeck')
places = doc[7], doc[11]
actions = doc[5], doc[9]

In [35]:
for place in places:
    for tok in place.ancestors:
        if tok in actions:
            print(f'User refers {place} to {tok}')
            break

User refers Berlin to visit
User refers Lubeck to stay


In [36]:
list(doc.noun_chunks)

[What, some places, Berlin, Lubeck]

In [37]:
doc = nlp(u'SpaceX is preparing to launch rockets for NASA')

In [38]:
list(doc.noun_chunks)

[SpaceX, rockets, NASA]

In [39]:
doc = nlp(u'How are you doing today?')

In [40]:
for token in doc:
    print(token.text, token.vector[:5])

How [ 0.6124835   1.4812975  -0.72636694  0.06934005  4.5061274 ]
are [-3.683414   1.6739068  0.8058587 -1.5030262 -1.456842 ]
you [-1.982389   0.1802355 -1.5821393 -0.9292287  3.5329905]
doing [ 1.4059318  -3.2353983  -0.499872   -1.289648   -0.42563045]
today [ 1.0131264 -3.218736  -2.291664  -1.326089  -0.6509166]
? [ 1.5639621  -1.0553905  -1.8733073  -1.7224176  -0.41755062]


In [41]:
hello_doc = nlp(u'hello')

In [42]:
hi_doc = nlp(u'hi')

In [43]:
hell_doc = nlp(u'hell')

In [44]:
print(hello_doc.similarity(hi_doc))

0.6818258855051886


  """Entry point for launching an IPython kernel.


In [45]:
print(hello_doc.similarity(hell_doc))

0.5552669805648011


  """Entry point for launching an IPython kernel.


In [46]:
print(hell_doc.similarity(hi_doc))

0.3648051750708799


  """Entry point for launching an IPython kernel.


In [47]:
hola_doc = nlp(u'hola')

In [48]:
print(hola_doc.similarity(hi_doc))

0.33996094101490265


  """Entry point for launching an IPython kernel.


In [49]:
print(hello_doc.similarity(hola_doc))

0.3505149803384891


  """Entry point for launching an IPython kernel.


In [50]:
can_doc = nlp(u'can')

In [51]:
may_doc = nlp(u'may')

In [52]:
print(can_doc.similarity(may_doc))

0.8242348395055965


  """Entry point for launching an IPython kernel.


In [53]:
greet_doc = nlp(u'greetings')

In [54]:
greet_doc.similarity(hello_doc)

  """Entry point for launching an IPython kernel.


0.06175779121239372

In [55]:
regards_doc = nlp(u'regards')

In [56]:
greet_doc.similarity(regards_doc)

  """Entry point for launching an IPython kernel.


0.6708363565850574

In [57]:
str1 = nlp(u'When the new season of The Crown will be released?')

In [58]:
str2 = nlp(u'The Crown next season release date?')

In [60]:
str1.similarity(str2)

  """Entry point for launching an IPython kernel.


0.6662054639549791

In [61]:
str2 = nlp(u'Is Neymar a good player?')

In [62]:
str1.similarity(str2)

  """Entry point for launching an IPython kernel.


0.5396660878965691

In [63]:
str1 = nlp(u'How successful is Neymar?')

In [64]:
str1.similarity(str2)

  """Entry point for launching an IPython kernel.


0.4999727840903807