In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_md')

In [3]:
doc = nlp ('Apple is looking at buying U.K. startup for $1 billion')

#### SPLIT WORDS INTO TOKEN

In [4]:
for token in doc:
    print(token.text)

Apple
is
looking
at
buying
U.K.
startup
for
$
1
billion


#### PART OF SPEECH (POS) TAGGING

In [5]:
doc

Apple is looking at buying U.K. startup for $1 billion

In [6]:
for token in doc:
    print(token.text,token.lemma_)

Apple Apple
is be
looking look
at at
buying buy
U.K. U.K.
startup startup
for for
$ $
1 1
billion billion


In [7]:
for token in doc:
    print(f' {token.pos_:{10}} {token.text:>{15}} {token.lemma_:{15}} {token.is_stop}')

 PROPN                Apple Apple           False
 AUX                     is be              True
 VERB               looking look            False
 ADP                     at at              True
 VERB                buying buy             False
 PROPN                 U.K. U.K.            False
 NOUN               startup startup         False
 ADP                    for for             True
 SYM                      $ $               False
 NUM                      1 1               False
 NUM                billion billion         False


#### DEPENDENCY PARSING

In [8]:
for chunk in doc.noun_chunks:
    print(f'{chunk.text:{20}} {chunk.root.text:{15}} {chunk.root.dep_}')

Apple                Apple           nsubj
U.K. startup         startup         dobj


#### NAME ENTITY RECOGNITION

In [9]:
 for ent in doc.ents:
    print(f'{ent.text:>{10}} {ent.label_}')

     Apple ORG
      U.K. GPE
$1 billion MONEY


####  SENTENCE SEGMENTATION

In [10]:
for sent in doc.sents:
    print(sent)

Apple is looking at buying U.K. startup for $1 billion


In [11]:
doc1=nlp("It's me sivant! Have a great day :)")

In [12]:
for sent in doc1.sents:
    print(sent)

It's me sivant!
Have a great day :)


In [13]:
def set_rule(doc):
    for token in doc[:-1]:
        if token.text=='...':
            doc[token.i+1].is_sent_start = True
        return doc

In [14]:
nlp.add_pipe(set_rule,before = 'parser')

doc2 = nlp("It's me sivant...Have a great day...Don't worry be happy :)")

In [15]:
for sent in doc2.sents:
    print(sent)

It's me sivant...
Have a great day...
Don't worry be happy :)


In [16]:
for token in doc2:
    print(token.text)

It
's
me
sivant
...
Have
a
great
day
...
Don't
worry
be
happy
:)


#### VISUALIZATION

In [17]:
from spacy import displacy

In [18]:
displacy.render(doc1,style='dep')

In [19]:
displacy.render(doc1,style='dep',options={'compact':True})

In [20]:
displacy.render(doc1,style='dep',options={'compact':True,'distance':100})

In [21]:
displacy.render(doc,style='ent')