In [1]:
import spacy

In [4]:
nlp = spacy.load('en_core_web_sm')

text="This is one of the greatest films ever made. Brilliant acting by George C. Scott and Diane Riggs. This movie is both disturbing and extremely deep. Don't be fooled into believing this is just a comedy."
print(text)

This is one of the greatest films ever made. Brilliant acting by George C. Scott and Diane Riggs. This movie is both disturbing and extremely deep. Don't be fooled into believing this is just a comedy.


In [5]:
doc = nlp(text)

In [6]:
for token in doc:
    print(token)

This
is
one
of
the
greatest
films
ever
made
.
Brilliant
acting
by
George
C.
Scott
and
Diane
Riggs
.
This
movie
is
both
disturbing
and
extremely
deep
.
Do
n't
be
fooled
into
believing
this
is
just
a
comedy
.


### Spacy Lemmatization example (Spacy lemmatization is superiar to nltk lemma)

* Adjectives: happier, happiest → happy
* Adverbs: worse, worst → badly
* Nouns: dogs, children → dog, child
* Verbs: writes, writing, wrote, written → write

In [35]:
word_list=['organize', 'organizes', 'organizing','bad','badly','perhaps']
for token in nlp(' '.join(word_list)):
    print('{:15} | {:15} | {:8} | {:8} | {:11} | {:8} | {:8} | {:8} | {:8} |'.format(
            token.text, token.lemma_, token.pos_, token.tag_, token.dep_
            , token.shape_, token.is_alpha, token.is_stop, spacy.explain(token.tag_)))

organize        | organize        | AUX      | VB       | aux         | xxxx     |        1 |        0 | verb, base form |
organizes       | organize        | VERB     | VBZ      | ROOT        | xxxx     |        1 |        0 | verb, 3rd person singular present |
organizing      | organize        | VERB     | VBG      | xcomp       | xxxx     |        1 |        0 | verb, gerund or present participle |
bad             | bad             | ADJ      | JJ       | dobj        | xxx      |        1 |        0 | adjective (English), other noun-modifier (Chinese) |
badly           | badly           | ADV      | RB       | advmod      | xxxx     |        1 |        0 | adverb   |
perhaps         | perhaps         | ADV      | RB       | advmod      | xxxx     |        1 |        1 | adverb   |


In [22]:
' '.join(word_list)

'organize organizes organizing bad badly perhaps'

## Label detection

In [25]:
ner_text = "When I told John that I wanted to move to Alaska, he warned me that I'd have trouble finding a Starbucks there."
ner_doc = nlp(ner_text)

In [39]:
for ent in ner_doc.ents: #ents is entities
    print(ent.text,' : ',ent.label_,' : ', spacy.explain(ent.label_))


John  :  PERSON  :  People, including fictional
Alaska  :  GPE  :  Countries, cities, states
Starbucks  :  ORG  :  Companies, agencies, institutions, etc.


In [40]:
from spacy import displacy

displacy.render(ner_doc, style='ent')

## Sentence Boundary Detection (SBD)

In [33]:
sentence = "This is a sentence. This is another sentence. let's go to N.Y.!"
print(sentence.split('.'))

doc = nlp(sentence)
for sent in doc.sents:
    print(sent.text)

['This is a sentence', ' This is another sentence', " let's go to N", 'Y', '!']
This is a sentence.
This is another sentence.
let's go to N.Y.!
