In [None]:
# import libraires
import spacy

## In-built NER tags

In [None]:
model = spacy.load("en_core_web_sm") #load pre-trained model
doc = "Sumit is an adjunct faculty at Upgrad. "
processed_doc = model(doc) #process input and perform NLP tasks

In [None]:
# information from POS tags
for doc in processed_doc:
  print(doc.text, '-->', doc.pos_)

Sumit --> NOUN
is --> AUX
an --> DET
adjunct --> ADJ
faculty --> NOUN
at --> ADP
Upgrad --> PROPN
. --> PUNCT


In [None]:
# NER tags, NER tag is present in entities section of doc
for ent in processed_doc.ents:
  print(ent.text, '-->', ent.label_, '-->', ent.start_char, '-->', ent.end_char)

Upgrad --> ORG --> 31 --> 37


In [None]:
# try another doc
doc2 = "Dr. Sumit is an adjunct faculty at UpGrad"
processed_doc2 = model(doc2)

In [None]:
# information from POS tags
for doc in processed_doc2:
  print(doc.text, '-->', doc.pos_)

Dr. --> PROPN
Sumit --> PROPN
is --> AUX
an --> DET
adjunct --> ADJ
faculty --> NOUN
at --> ADP
UpGrad --> PROPN


In [None]:
# NER tags in another doc
for ent in processed_doc2.ents:
  print(ent.text, '-->', ent.label_, '-->', ent.start_char, '-->', ent.end_char)

Sumit --> PERSON --> 4 --> 9
UpGrad --> ORG --> 35 --> 41


In [None]:
# IOB format tags
doc3 = 'Statue of Liberty is situated in New York, USA.'
processed_doc3 = model(doc3)

for doc in processed_doc3:
  print(doc.text, '-->', doc.ent_iob_, '-->', doc.ent_type_)

Statue --> O --> 
of --> O --> 
Liberty --> O --> 
is --> O --> 
situated --> O --> 
in --> O --> 
New --> B --> GPE
York --> I --> GPE
, --> O --> 
USA --> B --> GPE
. --> O --> 


In [None]:
# anonymization of data

email = ('Dear Family, Jose Luis and I have changed our dates, we are '
         'going to come to Aspen on the 23rd of December and leave on the '
         '30th of December. We would like to stay in the front bedroom of '
         'the Aspen Cottage so that Mark, Natalie and Zachary can stay in '
         'the guest cottage. Please let me know if there are any problems '
         'with this. If I do not hear anything, I will assume this is all '
         'o.k. with you.'
         'Love, Liz')

In [None]:
processed_email = model(email)

In [None]:
anonymized_email = list(email)

for ent in processed_email.ents:
  if ent.label_ == 'PERSON':
    for j in range(ent.start_char, ent.end_char):
      anonymized_email[j] = '*'

''.join(anonymized_email)

'Dear Family, ********* and I have changed our dates, we are going to come to Aspen on the 23rd of December and leave on the 30th of December. We would like to stay in the front bedroom of the Aspen Cottage so that ****, ******* and ******* can stay in the guest cottage. Please let me know if there are any problems with this. If I do not hear anything, I will assume this is all o.k. with you.Love, ***'

In [None]:
case1 = model('I drove away in my Jaguar.')
case2 = model('The deer ran away seeing the Jaguar.')

for item in case1.ents:
  print(item.text, item.label_)

for item in case2.ents:
  print(item.text, item.label_)

Jaguar ORG
Jaguar ORG


In [None]:
xx = model('Sofia is watching Game of Thrones on Android TV.')

for x in xx:
  print(x.text, x.pos_)

Sofia NOUN
is AUX
watching VERB
Game PROPN
of ADP
Thrones PROPN
on ADP
Android PROPN
TV NOUN
. PUNCT


In [None]:
### graded questions

import os
import nltk
from nltk.tokenize import sent_tokenize
import spacy

In [None]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
conn = open('/content/sample_data/data.txt')
data = conn.read()
conn.close()

In [None]:
print(data)

The stock price of a certain company was $100 a year ago, once this company came into boom phase then it's stock price rise to $200. On the other hand, when there was economic depression the stock price was $50. When the company had started the price of its stock was $10, which you can consider as the base price. The stock price of a certain company was $100 a year ago, once this company came into boom phase then it's stock price rise to $200. On the other hand, when there was economic depression the stock price was $50. When the company had started the price of its stock was $10, which you can consider as the base price. The stock price of a certain company was $100 a year ago, once this company came into boom phase then it's stock price rise to $200. On the other hand, when there was economic depression the stock price was $50. When the company had started the price of its stock was $10, which you can consider as the base price. The stock price of a certain company was $100 a year ag

In [None]:
sentences = sent_tokenize(data)
print(sentences)

["The stock price of a certain company was $100 a year ago, once this company came into boom phase then it's stock price rise to $200.", 'On the other hand, when there was economic depression the stock price was $50.', 'When the company had started the price of its stock was $10, which you can consider as the base price.', "The stock price of a certain company was $100 a year ago, once this company came into boom phase then it's stock price rise to $200.", 'On the other hand, when there was economic depression the stock price was $50.', 'When the company had started the price of its stock was $10, which you can consider as the base price.', "The stock price of a certain company was $100 a year ago, once this company came into boom phase then it's stock price rise to $200.", 'On the other hand, when there was economic depression the stock price was $50.', 'When the company had started the price of its stock was $10, which you can consider as the base price.', "The stock price of a certa

In [None]:
nlp = spacy.load('en_core_web_sm')

In [None]:
sent = 'Christmas'
doc = nlp(sent)

for d in doc:
  print(d.text, d.ent_iob_, d.ent_type_)


Christmas B DATE


In [None]:
for d in doc:
  print(d.text, d.dep_)

Harry nsubj
is aux
not neg
gardening ROOT
as mark
it nsubj
is aux
raining advcl
