In [2]:
import spacy 

In [3]:
nlp = spacy.blank('en')
data = 'Captain america ate 100$ of samosa. Then he said I can do this all day.'
doc = nlp(data)

for token in doc:
    print(token.text)

nlp.pipe_names

print(nlp.pipeline)

Captain
america
ate
100
$
of
samosa
.
Then
he
said
I
can
do
this
all
day
.
[]


In [4]:
# For english pipeline
nlp = spacy.load('en_core_web_sm')
print(nlp.pipeline)


doc = nlp(data)
# dep 
for token in doc:
    print(token.text , " | ", token.pos_ , " | ", token.lemma_)


[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec object at 0x7f0607acf100>), ('tagger', <spacy.pipeline.tagger.Tagger object at 0x7f0607acf700>), ('parser', <spacy.pipeline.dep_parser.DependencyParser object at 0x7f0607bdbdf0>), ('attribute_ruler', <spacy.pipeline.attributeruler.AttributeRuler object at 0x7f060787b100>), ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer object at 0x7f060774fc40>), ('ner', <spacy.pipeline.ner.EntityRecognizer object at 0x7f0607bdbe60>)]
Captain  |  PROPN  |  Captain
america  |  PROPN  |  america
ate  |  VERB  |  eat
100  |  NUM  |  100
$  |  NUM  |  $
of  |  ADP  |  of
samosa  |  PROPN  |  samosa
.  |  PUNCT  |  .
Then  |  ADV  |  then
he  |  PRON  |  he
said  |  VERB  |  say
I  |  PRON  |  I
can  |  AUX  |  can
do  |  VERB  |  do
this  |  PRON  |  this
all  |  DET  |  all
day  |  NOUN  |  day
.  |  PUNCT  |  .


In [5]:
doc = nlp("Tesla Inc is going to acquire twitter for $45 billion")

# knowing labels 
for ent in doc.ents:
    print(ent.text , " | ", ent.label_, " | ", str(spacy.explain(ent.label_))) # ent is entity, means a noun, a named entity recognition

# noun chunks
for chunk in doc.noun_chunks:
    print(chunk.text)

# visualizing with label information
from spacy import displacy
displacy.render(doc, style='ent', jupyter=True)

Tesla Inc  |  ORG  |  Companies, agencies, institutions, etc.
$45 billion  |  MONEY  |  Monetary values, including unit
Tesla Inc
twitter


##### __Controlling Pipelines items over the existing__(Custom Pipeline)

In [7]:
source_nlp = spacy.load('en_core_web_sm')
nlp = spacy.blank('en')

# nlp.add_pipe(source_nlp.create_pipe('sentencizer'))
nlp.add_pipe('ner', source=source_nlp)
print(nlp.pipe_names)

for ent in doc.ents:
    print(ent.text , " | ", ent.label_, " | ", str(spacy.explain(ent.label_)))

# noun chunks
for chunk in doc.noun_chunks:
    print(chunk.text)

['ner']
Tesla Inc  |  ORG  |  Companies, agencies, institutions, etc.
$45 billion  |  MONEY  |  Monetary values, including unit
Tesla Inc
twitter


#### __Execises__

##### 1.
- Get all the proper nouns from a given text in a list and also count how many of them.
- **Proper Noun** means a noun that names a particular person, place, or thing.

In [None]:
text = '''Ravi and Raju are the best friends from school days.They wanted to go for a world tour and 
visit famous cities like Paris, London, Dubai, Rome etc and also they called their another friend Mohan to take part of this world tour.
They started their journey from Hyderabad and spent next 3 months travelling all the wonderful cities in the world and cherish a happy moments!
'''

# https://spacy.io/usage/linguistic-features

#creating the nlp object
doc = nlp(text)   

##### 2. 
- Get all companies names from a given text and also the count of them.
- **Hint**: Use the spacy **ner** functionality 

In [None]:
text = '''The Top 5 companies in USA are Tesla, Walmart, Amazon, Microsoft, Google and the top 5 companies in 
India are Infosys, Reliance, HDFC Bank, Hindustan Unilever and Bharti Airtel'''


doc = nlp(text)
