In [1]:
# Importing SpaCy
import spacy

In [2]:
# Loading the pretrained model
nlp = spacy.load('en_core_web_sm')

### Tokenization

In [3]:
# Applying input to the pretrained model.
doc = nlp("Apple isn't looking at buying U.K. startup for $1 billion")

In [4]:
# Displaying the token
for token in doc:
    print(token.text)

Apple
is
n't
looking
at
buying
U.K.
startup
for
$
1
billion


### Part-Of-Speech Tagging

In [5]:
# Applying lemmatization, identifying POS and Stop words.
for token in doc:
    print(f'{token.text:{15}} {token.lemma_:{15}} {token.pos_:{10}} {token.is_stop}')

Apple           Apple           PROPN      False
is              be              AUX        True
n't             not             PART       True
looking         look            VERB       False
at              at              ADP        True
buying          buy             VERB       False
U.K.            U.K.            PROPN      False
startup         startup         NOUN       False
for             for             ADP        True
$               $               SYM        False
1               1               NUM        False
billion         billion         NUM        False


### Dependency Parsing

In [6]:
for chunk in doc.noun_chunks:
    print(f'{chunk.text:{30}} {chunk.root.text:{15}} {chunk.root.dep_}')

Apple                          Apple           nsubj
U.K. startup                   startup         dobj


### Named Entity Recognition (NER)

In [7]:
for token in doc.ents:
    print(f'{token.text:{10}} {token.label_}')

Apple      ORG
U.K.       GPE
$1 billion MONEY


### Sentence Segmentation

In [8]:
for token in doc.sents:
    print(token)

Apple isn't looking at buying U.K. startup for $1 billion


In [9]:
doc2 = nlp('Welcome to my Restaurant. Thanks for Coming. Please do leave your valuable feedback')
for token in doc2.sents:
    print(token)

Welcome to my Restaurant.
Thanks for Coming.
Please do leave your valuable feedback


In [10]:
# Preparing Custom Rule for Sentence Segmentation.
def custom_rule(doc):
    for token in doc[:-1]:
        if token.text == '...':
            doc[token.i + 1].is_sent_start = True
    return doc

In [11]:
# Adding the rule in the pipeline
nlp.add_pipe(custom_rule, before = 'parser')

In [12]:
# checking for the same statement after applying the custom rule
doc3 = nlp('Welcome to my Restaurant...Thanks for Coming...Please do leave your valuable feedback')
for token in doc3.sents:
    print(token)

Welcome to my Restaurant...
Thanks for Coming...
Please do leave your valuable feedback


In [13]:
for token in doc3:
    print(token.text)

Welcome
to
my
Restaurant
...
Thanks
for
Coming
...
Please
do
leave
your
valuable
feedback


### Visualization

In [14]:
from spacy import displacy

In [15]:
displacy.render(doc, style='dep')