In [1]:
import spacy
from spacy.lang.en import English

nlp = English()
doc = nlp('Stockholm is the capital of Sweden')
for token in doc:
    print(token)

Stockholm
is
the
capital
of
Sweden


In [2]:
# at a specific point
token = doc[-1]
token

Sweden

In [3]:
# Span of doc
doc[1:3]

is the

In [8]:
# Lexical attributes 
doc = nlp('it costs $5.00 for those 5 apples.')
print('index: ', [token.i for token in doc])
print('text', [token.text for token in doc])
print('punct', [token.is_punct for token in doc])
print('alpha', [token.is_alpha for token in doc])

index:  [0, 1, 2, 3, 4, 5, 6, 7, 8]
text ['it', 'costs', '$', '5.00', 'for', 'those', '5', 'apples', '.']
punct [False, False, False, False, False, False, False, False, True]
alpha [True, True, False, False, True, True, False, True, False]


In [31]:
doc = nlp("She ate the pizza")
for token in doc:
    print(token.text, token.pos_, token.tag_, token.head.text)

She   She
ate   ate
the   the
pizza   pizza


In [15]:
# German Text
from spacy.lang.de import German

nlp = German()
doc = nlp("Im Streit um die Migranten an der östlichen EU-Außengrenze fehlt es nicht an martialischer Rhetorik in Richtung Belarus. Die Diplomatie kam bisher zu kurz. Das ist die Stunde der OSZE, meint Roman Goncharenko.")
doc.text

'Im Streit um die Migranten an der östlichen EU-Außengrenze fehlt es nicht an martialischer Rhetorik in Richtung Belarus. Die Diplomatie kam bisher zu kurz. Das ist die Stunde der OSZE, meint Roman Goncharenko.'

In [17]:
doc = nlp("I like kangaroos and koalas in Austrialia")
tree_kang = doc[2:5]
print(tree_kang)

kangaroos and koalas


In [22]:
doc = nlp("Akasa Air on Tuesday placed an order for 72 Boeing 737 MAX jets, valued at nearly $9 billion at list prices - a deal that could help the US planemaker regain lost ground in one of the world's most promising markets.")
for token in doc:
    if token.like_num and doc[token.i - 1].text == '$':
        print(token.text)

9


In [27]:
#!python -m spacy download en_core_web_sm

Collecting en_core_web_sm==2.2.5
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.5/en_core_web_sm-2.2.5.tar.gz (12.0 MB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: en-core-web-sm
  Building wheel for en-core-web-sm (setup.py): started
  Building wheel for en-core-web-sm (setup.py): finished with status 'done'
  Created wheel for en-core-web-sm: filename=en_core_web_sm-2.2.5-py3-none-any.whl size=12011736 sha256=f0bd3fb15b58591facb6bfa4c948ff77f241f5095aaccc95f5245881ea2919c7
  Stored in directory: C:\Users\vlekkala\AppData\Local\Temp\1\pip-ephem-wheel-cache-at2od37_\wheels\b5\94\56\596daa677d7e91038cbddfcf32b591d0c915a1b3a3e3d3c79d
Successfully built en-core-web-sm
Installing collected packages: en-core-web-sm
Successfully installed en-core-web-sm-2.2.5
[+] Download and installation successful
You can now load the model via spacy.load('en_core_



In [34]:
# Parts of Speech Spacy
nlp = spacy.load("en_core_web_sm")

In [35]:
doc = nlp("She ate the pizza")
for token in doc:
    print(token.text, token.pos_, token.tag_, token.head.text)

She PRON PRP ate
ate VERB VBD ate
the DET DT pizza
pizza NOUN NN ate


In [36]:
text = "It's official that Apple is the first U.S. public company to reach a $1 Trillion market value"
doc = nlp(text)
for token in doc:
    token_text = token.text
    token_pos = token.pos_
    token_dep = token.dep_
    print("{:<12}{:<10}{:<10}".format(token_text, token_pos, token_dep))

It          PRON      nsubj     
's          AUX       ROOT      
official    ADJ       attr      
that        SCONJ     mark      
Apple       PROPN     nsubj     
is          AUX       ccomp     
the         DET       det       
first       ADJ       amod      
U.S.        PROPN     nmod      
public      ADJ       amod      
company     NOUN      attr      
to          PART      aux       
reach       VERB      relcl     
a           DET       det       
$           SYM       quantmod  
1           NUM       compound  
Trillion    NUM       nummod    
market      NOUN      compound  
value       NOUN      dobj      


In [37]:
text = "New IPhone X release date leaked as Apple reveals pre-order by mistake"
doc = nlp(text)
for ent in doc.ents:
    print(ent.text, ent.label_)

Apple ORG


In [38]:
spacy.explain('ORG')

'Companies, agencies, institutions, etc.'