# Natural Language Processing
* Library Used : spaCy

In [1]:
# Import the spaCy Library
import spacy

In [3]:
# Load the English Smaller Version of spacy

nlp = spacy.load('en_core_web_sm')

In [4]:
# Create a Document Object for a Unicode String

doc = nlp(U'India has huge surge of I.T jobs with Business worth of $20 billion')

In [6]:
# Now we will try to print each Token along with some Token Attributes - '\t' used for Tab space

for token in doc:
    print(token.text, '\t', token.pos_, '\t', token.dep_, '\t', spacy.explain(token.tag_), '\t', spacy.explain(token.dep_))

India 	 PROPN 	 nsubj 	 noun, proper singular 	 nominal subject
has 	 AUX 	 ROOT 	 verb, 3rd person singular present 	 None
huge 	 ADJ 	 amod 	 adjective 	 adjectival modifier
surge 	 NOUN 	 dobj 	 noun, singular or mass 	 direct object
of 	 ADP 	 prep 	 conjunction, subordinating or preposition 	 prepositional modifier
I.T 	 ADJ 	 compound 	 adjective 	 compound
jobs 	 NOUN 	 pobj 	 noun, plural 	 object of preposition
with 	 ADP 	 prep 	 conjunction, subordinating or preposition 	 prepositional modifier
Business 	 NOUN 	 compound 	 noun, singular or mass 	 compound
worth 	 NOUN 	 pobj 	 noun, singular or mass 	 object of preposition
of 	 ADP 	 prep 	 conjunction, subordinating or preposition 	 prepositional modifier
$ 	 SYM 	 quantmod 	 symbol, currency 	 modifier of quantifier
20 	 NUM 	 compound 	 cardinal number 	 compound
billion 	 NUM 	 pobj 	 cardinal number 	 object of preposition


In [15]:
# For better Representation of Output we use F' - format

for token in doc:
    print(F'{token.text:{15}}{token.pos_:{10}} {token.dep_:{10}} {spacy.explain(token.tag_):{50}} {spacy.explain(token.dep_)}')

India          PROPN      nsubj      noun, proper singular                              nominal subject
has            AUX        ROOT       verb, 3rd person singular present                  None
huge           ADJ        amod       adjective                                          adjectival modifier
surge          NOUN       dobj       noun, singular or mass                             direct object
of             ADP        prep       conjunction, subordinating or preposition          prepositional modifier
I.T            ADJ        compound   adjective                                          compound
jobs           NOUN       pobj       noun, plural                                       object of preposition
with           ADP        prep       conjunction, subordinating or preposition          prepositional modifier
Business       NOUN       compound   noun, singular or mass                             compound
worth          NOUN       pobj       noun, singular or mass        

In [8]:
# Demonstration of lemmas(The Base form of the Word)

print(doc[1].text)
print(doc[1].lemma_)

has
have


In [9]:
#Demonstration of Word Text and Word shapes:

print(doc[0].text+' => '+doc[0].shape_)
print(doc[5].text+' => '+doc[5].shape_)

India => Xxxxx
I.T => X.X


In [13]:
#Demonstration to check for Alphabets and Stop Words:

#Example 1

print(doc[0])
print(doc[0].is_alpha)
print(doc[0].is_stop)
print(len(doc)) #Length of the Document

India
True
False
14


In [14]:
#Example 2

print(doc[10])
print(doc[10].is_alpha)
print(doc[10].is_stop)
print(len(doc)) #Length of the Document

of
True
True
14


## Demonstartion of a Sentence:

In [16]:
# Demonstration of Splitting a Paragraph ino Sentences
doc1 = nlp(U'Hello! Welcome to NLP Learning. This is a Wonderful Example to Understand NLP. I have Performed the Basic Implementation of NLP.')

In [17]:
doc1

Hello! Welcome to NLP Learning. This is a Wonderful Example to Understand NLP. I have Performed the Basic Implementation of NLP

In [18]:
for sentence in doc1.sents:
    print(sentence)

Hello!
Welcome to NLP Learning.
This is a Wonderful Example to Understand NLP.
I have Performed the Basic Implementation of NLP


In [19]:
# To check if the Token specified with Index is the Starting of a Sentence:
doc1[0].is_sent_start

True

In [23]:
#If the Answer is false then you wont get any Output - Applicable only for this Function.
doc1[3].is_sent_start

In [24]:
doc1.text

'Hello! Welcome to NLP Learning. This is a Wonderful Example to Understand NLP. I have Performed the Basic Implementation of NLP'

## Demonstration of Tokenization

In [27]:
# Creating a New String named String1
string1 = '"Hello! We\'re seeing the Example of Tokenizaton"'
print(string1)

"Hello! We're seeing the Example of Tokenizaton"


In [28]:
# Creating a Doc2 Object and Exploring all Tokens in string1
doc2 = nlp(string1)

In [29]:
for tk in doc2:
    print(tk.text, end=' || ')

" || Hello || ! || We || 're || seeing || the || Example || of || Tokenizaton || " || 

In [30]:
# To count number of Tokens
len(doc2)

11

In [32]:
# Demonstration to fetch Three Tokens from the Middle of the string
doc2[3:6]

We're seeing

In [34]:
# Demonstration to fetch last 5 Tokens of the string
doc2[-5:]

the Example of Tokenizaton"

In [35]:
# Note: Item Assignment is not Allowedin spacy
doc2[3] = doc[6]

TypeError: 'spacy.tokens.doc.Doc' object does not support item assignment

## Demonstration of Named Entity Recognition

In [38]:
# View Named Entities
doc3 = nlp(U'Google is Setting up $50 Million Comapny Branch in Delhi')
doc3

Google is Setting up $50 Million Comapny Branch in Delhi

In [41]:
for tk in doc3:
    print(tk.text, end = ' || ')
    
print('\n')

for entity in doc3.ents:
    print(entity.text+' - '+entity.label_+' - '+str(spacy.explain(entity.label_)))

Google || is || Setting || up || $ || 50 || Million || Comapny || Branch || in || Delhi || 

Google - ORG - Companies, agencies, institutions, etc.
$50 Million - MONEY - Monetary values, including unit
Comapny Branch - PERSON - People, including fictional
Delhi - GPE - Countries, cities, states


In [42]:
# Count number of Named Entities
len(doc3.ents)

4

## Built in Visualizers

In [44]:
# spacy includes a built-in Visualization tool called displacy.
# Demonstration to view all Entities in Displacy

#Import displacy
from spacy import displacy



In [46]:
#Example 1

doc4 = nlp(u'Google is Setting up $50 Million Company Branch in Delhi')
doc4

Google is Setting up $50 Million Company Branch in Delhi

In [47]:
#Style - Entity
displacy.render(doc4, style='ent', jupyter = True) 

In [48]:
#Example 2

doc5 = nlp(u'Pravin is Learning NLP')
doc5

Pravin is Learning NLP

In [49]:
#Style - Dependency
displacy.render(doc5, style='dep',jupyter = True, options = {'distance': 90} )