In [58]:
import spacy
from spacy import displacy

In [6]:
nlp = spacy.load('en_core_web_sm')

In [9]:
doc =  nlp("Apple isn't looking at buying U.K. startup for $1 billion")

In [10]:
for token in doc:
  print(token.text)

Apple
is
n't
looking
at
buying
U.K.
startup
for
$
1
billion


**part of speech tagging (POS)**

In [11]:
doc

Apple isn't looking at buying U.K. startup for $1 billion

In [12]:
for token in doc:
  print(token.text, token.lemma_)

Apple Apple
is be
n't not
looking look
at at
buying buy
U.K. U.K.
startup startup
for for
$ $
1 1
billion billion


In [16]:
for token in doc:
  print(f'{token.text:{15}} {token.lemma_:{15}} {token.pos_:{10}} {token.is_stop}')

Apple           Apple           PROPN      False
is              be              AUX        True
n't             not             PART       True
looking         look            VERB       False
at              at              ADP        True
buying          buy             VERB       False
U.K.            U.K.            PROPN      False
startup         startup         NOUN       False
for             for             ADP        True
$               $               SYM        False
1               1               NUM        False
billion         billion         NUM        False


**Dependency Parsing**

In [18]:
for chunk in doc.noun_chunks:
  print(f'{chunk.text:{30}} {chunk.root.text:{15}} {chunk.root.dep_}')

Apple                          Apple           nsubj
U.K. startup                   startup         dobj


**Named entity recognition**

In [19]:
for ent in doc.ents:
  print(ent.text, ent.label_)

Apple ORG
U.K. GPE
$1 billion MONEY


## **Sentence segmentation**

In [20]:
for sent in doc.sents:
  print(sent)

Apple isn't looking at buying U.K. startup for $1 billion


In [21]:
doc1 = nlp("Welcome to home. Thanks for watching. Please be nice")

In [22]:
for sent in doc1.sents:
  print(sent)

Welcome to home.
Thanks for watching.
Please be nice


In [23]:
doc1 = nlp("Welcome to home? Thanks for watching")

In [24]:
for sent in doc1.sents:
  print(sent)

Welcome to home?
Thanks for watching


In [31]:
doc1 = nlp("Welcome to...KGP Talkie...Thanks for watching")

In [32]:
for sent in doc1.sents:
  print(sent)

Welcome to...KGP Talkie...
Thanks for watching


In [33]:
doc1 = nlp("Welcome to.*.KGP Talkie.*.Thanks for watching")

In [34]:
for sent in doc1.sents:
  print(sent)

Welcome to.*.KGP
Talkie.*.Thanks for watching


In [48]:
def set_rule(doc):
  for token in doc[:-1]:
    if token.text == '...':
      doc[token.i + 1].is_sent_start = True
  return doc


In [53]:
nlp.remove_pipe('set_rule')

('set_rule', <function __main__.set_rule>)

In [54]:
nlp.add_pipe(set_rule, before='parser')


In [55]:
doc = nlp("Welcome to KGP Talkie...Thanks...Like and subscribe!")

In [56]:
for sent in doc.sents:
  print(sent)

Welcome to KGP Talkie...
Thanks...
Like and subscribe!


In [57]:
for token in doc:
  print(token.text)

Welcome
to
KGP
Talkie
...
Thanks
...
Like
and
subscribe
!


## **Spacy visualization tool**

In [59]:
doc

Welcome to KGP Talkie...Thanks...Like and subscribe!

In [63]:
displacy.render(doc, style='dep', options={'compact':True, 'distance': 100})

'<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="en" id="620654de7f034b50989c31dac6f55d72-0" class="displacy" width="850" height="237.0" direction="ltr" style="max-width: none; height: 237.0px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr">\n<text class="displacy-token" fill="currentColor" text-anchor="middle" y="147.0">\n    <tspan class="displacy-word" fill="currentColor" x="50">Welcome</tspan>\n    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="50">VERB</tspan>\n</text>\n\n<text class="displacy-token" fill="currentColor" text-anchor="middle" y="147.0">\n    <tspan class="displacy-word" fill="currentColor" x="150">to</tspan>\n    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="150">ADP</tspan>\n</text>\n\n<text class="displacy-token" fill="currentColor" text-anchor="middle" y="147.0">\n    <tspan class="displacy-word" fill="currentColor" x="250">KGP</tspan>\n    <tspan class="displacy-tag

In [64]:
doc =  nlp("Apple isn't looking at buying U.K. startup for $1 billion")

In [65]:
displacy.render(doc,style='ent')

'<div class="entities" style="line-height: 2.5; direction: ltr">\n<mark class="entity" style="background: #7aecec; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">\n    Apple\n    <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem">ORG</span>\n</mark>\n isn\'t looking at buying \n<mark class="entity" style="background: #feca74; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">\n    U.K.\n    <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem">GPE</span>\n</mark>\n startup for \n<mark class="entity" style="background: #e4e7d2; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">\n    $1 billion\n    <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0