# Using the spaCy Online Tutorial

You can get to the tutorial [here](https://course.spacy.io/)

In [1]:
import spacy
from spacy.matcher import Matcher
nlp = spacy.load('en_core_web_sm')

In [16]:
doc = nlp("She ate the pizza.")

In [17]:
for token in doc:
    print(token.text, token.pos_)

She PRON
ate VERB
the DET
pizza NOUN
. PUNCT


In [18]:
spacy.explain('DET')

'determiner'

In [19]:
for token in doc:
    print(token.text, token.pos_, token.dep_, token.head.text)

She PRON nsubj ate
ate VERB ROOT ate
the DET det pizza
pizza NOUN dobj ate
. PUNCT punct ate


In [20]:
doc = nlp(u'Apple is looking at buying U.K. startup for $1 billion')
for ent in doc.ents:
    print(ent.text, ent.label_)

Apple ORG
U.K. GPE
$1 billion MONEY


In [21]:
spacy.explain('dobj')

'direct object'

In [23]:
text = 'Apple is the first U.S. public company to reach $1 trillion market value'
doc = nlp(text)
print(doc.text)

Apple is the first U.S. public company to reach $1 trillion market value


In [24]:
text = "It’s official: Apple is the first U.S. public company to reach a $1 trillion market value"

# Process the text
doc = nlp(text)

for token in doc:
    # Get the token text, part-of-speech tag and dependency label
    token_text = token.text
    token_pos = token.pos_
    token_dep = token.dep_
    # This is for formatting only
    print("{:<12}{:<10}{:<10}".format(token_text, token_pos, token_dep))

It          PRON      nsubj     
’s          VERB      punct     
official    NOUN      ccomp     
:           PUNCT     punct     
Apple       PROPN     nsubj     
is          AUX       ROOT      
the         DET       det       
first       ADJ       amod      
U.S.        PROPN     nmod      
public      ADJ       amod      
company     NOUN      attr      
to          PART      aux       
reach       VERB      relcl     
a           DET       det       
$           SYM       quantmod  
1           NUM       compound  
trillion    NUM       nummod    
market      NOUN      compound  
value       NOUN      dobj      


In [25]:
spacy.explain('ccomp')

'clausal complement'

In [27]:
spacy.explain('aux')

'auxiliary'

In [29]:
for ent in doc.ents:
    # Print the entity text and its label
    print(ent.text, ent.label_)

Apple ORG
first ORDINAL
U.S. GPE
$1 trillion MONEY


In [31]:
text = 'New iPhone X release date leadked as Apple reveals pre-orders by mistake'
doc = nlp(text)

for ent in doc.ents:
    print(ent.text, ent.label_)

iphone_x = doc[1:3]

print('Missing entity: ', iphone_x.text)

New iPhone EVENT
Apple ORG
Missing entity:  iPhone X


In [49]:
simple_matcher = Matcher(nlp.vocab)

text = 'vin# WP0ZZZ99ZTS392124'
doc = nlp(text)
for token in doc:
    print(token.text, token.pos_)

# VIN
vin_pattern = [{'TEXT': 'vin'}, {'TEXT': '#'}, {'TEXT': {'REGEX': '[A-HJ-NPR-Z0-9]{17}'}}]
simple_matcher.add('VIN', None, vin_pattern)
simple_matches = simple_matcher(doc)

for match_id, start, end in simple_matches:
    span = doc[start:end]
    print(span.text)

vin PROPN
# NOUN
WP0ZZZ99ZTS392124 PROPN
vin# WP0ZZZ99ZTS392124
