# Spacy
- spaCy is a free, open-source library for advanced Natural Language Processing (NLP) in Python.
  

In [1]:
import spacy

In [3]:
nlp = spacy.load('en_core_web_sm')

In [6]:
doc = nlp(u'Tesla is looking at buying U.S, startup for $6 million')

In [19]:
for token in doc:
    print(token.text)

Tesla
is
looking
at
buying
U.S
,
startup
for
$
6
million


In [21]:
for token in doc:
    print(token.text,token.pos_, token.dep_)

Tesla PROPN nsubj
is AUX aux
looking VERB ROOT
at ADP prep
buying VERB pcomp
U.S PROPN dobj
, PUNCT punct
startup VERB conj
for ADP prep
$ SYM quantmod
6 NUM compound
million NUM pobj


In [23]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x236a6840e90>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x236a68410d0>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x236a67b7060>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x236a6ada250>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x236a6ae53d0>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x236a67b7220>)]

In [25]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [35]:
doc2 = nlp(u"Tesla isn't       looking into     startups anymore.")

In [37]:
for token in doc2:
    print(token.text,token.pos_,token.dep)

Tesla PROPN 429
is AUX 405
n't PART 425
       SPACE 414
looking VERB 8206900633647566924
into ADP 443
     SPACE 414
startups NOUN 439
anymore ADV 400
. PUNCT 445


In [49]:
doc2[0]

Tesla

In [51]:
doc2[0].pos_

'PROPN'

In [63]:
doc3 = (u""" Since its release in 2015, spaCy has become an industry standard with a huge ecosystem. 
Choose from a variety of plugins, integrate with your machine learning stack and build custom components and workflows.""")

In [107]:
take_some_words = doc3[0:89]

In [109]:
print(take_some_words)

Since its release in 2015, spaCy has become an industry standard with a huge ecosystem. 



# Tokenization

In [172]:
mystring = '"We\'re moving to L.A.!"'

In [174]:
print(mystring)

"We're moving to L.A.!"


In [176]:
doc = nlp(mystring)

In [178]:
for token in doc:
    print(token.text)

"
We
're
moving
to
L.A.
!
"


In [182]:
doc4 = nlp(u"we/'re here to help! Send snail-mail , email support2oursite.com, or visit us at http://www.oursite.com!")

In [188]:
for t in doc4:
    print(t)

we/'re
here
to
help
!
Send
snail
-
mail
,
email
support2oursite.com
,
or
visit
us
at
http://www.oursite.com
!


In [190]:
doc5 = nlp(u"A 5km NYC cab ride cost $12.50")

In [192]:
for t in doc5:
    print(t)

A
5
km
NYC
cab
ride
cost
$
12.50


In [196]:
doc6 =nlp(u"Apple top build a El Salvador factory for $6 million")

In [198]:
for t in doc6:
    print(t.text, end=' | ')

Apple | top | build | a | El | Salvador | factory | for | $ | 6 | million | 

### Print only the entidies : 
    - An "entity" refers to a significant piece of information or object identified and categorized within a text

In [210]:
for entity in doc6.ents:
    print(entity)

Apple
El Salvador
$6 million


In [221]:
for entity in doc6.ents:
    print(entity)
    print(entity.label_)
    print(str(spacy.explain(entity.label_)))
    print('\n')

Apple
ORG
Companies, agencies, institutions, etc.


El Salvador
GPE
Countries, cities, states


$6 million
MONEY
Monetary values, including unit




## DisplaCy Dependency Visualizer
### Intro displacy from spacy
- spaCy also comes with a built-in dependency visualizer that lets you check your model's predictions in your browser.
- You can pass in one or more Doc objects and start a web server, export HTML files or view the visualization directly from a Jupyter Notebook. 

In [258]:
from spacy import displacy

In [283]:
doc7 = nlp(u"Google gain 5 million")

In [285]:
displacy.render(doc7, style='dep', jupyter=True, options={'distance': 110} )
