## Spacy is a object oriented 
## NLTK is mainly a string processing library

In [1]:
import spacy

In [2]:
## Sentence tokenization
nlp = spacy.load('en_core_web_sm')
docs = nlp('$ two sunil giri. presenting spacy module to you all. hope you enjoy this lets deep dive in nlp')

In [3]:
for sentence in docs.sents:
    print(sentence)

$ two sunil giri.
presenting spacy module to you all.
hope you enjoy this lets deep dive in nlp


In [4]:
token1 = docs[1]
token1.text

'two'

In [5]:
# Spacy is smart enough to figure out meaning of english words
token1.like_num

True

In [6]:
token0 = docs[0]
token0.text

'$'

In [7]:
# Here for currency
token0.is_currency

True

In [8]:
## Word tokenization
for sentence in docs.sents:
    for word in sentence:
        print(word)

$
two
sunil
giri
.
presenting
spacy
module
to
you
all
.
hope
you
enjoy
this
lets
deep
dive
in
nlp


In [9]:
## Another way of word tokenize
print([token.text for token in docs])

['$', 'two', 'sunil', 'giri', '.', 'presenting', 'spacy', 'module', 'to', 'you', 'all', '.', 'hope', 'you', 'enjoy', 'this', 'lets', 'deep', 'dive', 'in', 'nlp']


In [10]:
text = '''sunil 21 seungiri841@gmail.com,
          roshan 20 roshan11khadka@gmail.com,
          roshni 19 roshni1@gmail.com'''

In [11]:
## Powerful feature of spacy
doc = nlp(text)
email = []
for word in doc:
    if word.like_email:
        email.append(word)
email

[seungiri841@gmail.com, roshan11khadka@gmail.com, roshni1@gmail.com]

In [12]:
## Now doing same thing with NLTK
import nltk
from nltk.tokenize import sent_tokenize

In [13]:
sent_tokenize('sunil giri. presenting spacy module to you all. hope you enjoy this lets deep dive in nlp')

['sunil giri.',
 'presenting spacy module to you all.',
 'hope you enjoy this lets deep dive in nlp']

In [14]:
from nltk.tokenize import word_tokenize
word_tokenize('hello. i am sunil giri presenting spacy module to you all. hope you enjoy this lets deep dive in nlp')

['hello',
 '.',
 'i',
 'am',
 'sunil',
 'giri',
 'presenting',
 'spacy',
 'module',
 'to',
 'you',
 'all',
 '.',
 'hope',
 'you',
 'enjoy',
 'this',
 'lets',
 'deep',
 'dive',
 'in',
 'nlp']