## Finding words, phrases, names and concepts

In [1]:
from spacy.lang.pt import Portuguese

nlp = Portuguese()
nlp

<spacy.lang.pt.Portuguese at 0x7f4815690a90>

In [2]:
doc = nlp("Olá mundo!")

In [3]:
# Iterate over tokens in a Doc

for token in doc:
    print(token.text)

Olá
mundo
!


In [4]:
doc = nlp("Oi mundo!")

# Index into the Doc to get a single Token
token = doc[1]

# Get the token text via the .text attribute
print(token.text)

mundo


In [5]:
doc = nlp("Oi mundo!")

# A slice from the Doc is a Span object
span = doc[1:4]

# Get the span text via the .text attribute
print(span.text)

mundo!


In [6]:
# Attribs of a token

doc = nlp("O ingresso custa $50.")

print('Index:   ', [token.i for token in doc])
print('Text:    ', [token.text for token in doc])

print('is_alpha:', [token.is_alpha for token in doc])
print('is_punct:', [token.is_punct for token in doc])
print('like_num:', [token.like_num for token in doc])

Index:    [0, 1, 2, 3, 4, 5]
Text:     ['O', 'ingresso', 'custa', '$', '50', '.']
is_alpha: [True, True, True, False, False, False]
is_punct: [False, False, False, False, False, True]
like_num: [False, False, False, False, True, False]


### Statistical Models

Statistical models enable spaCy to make predictions in context.

In [7]:
import spacy

# Load the small English model
nlp = spacy.load('en_core_web_sm')

# Process a text
doc = nlp("She ate the pizza")

# Iterate over the tokens
for token in doc:
    # Print the text and the predicted part-of-speech tag
    print(token.text, token.pos_)

She PRON
ate VERB
the DET
pizza NOUN


In [8]:
# Load the small Portuguese model
nlp = spacy.load('pt_core_news_sm')

# Process a text
doc = nlp("Ela comeu uma pizza enorme")

# Iterate over the tokens
for token in doc:
    # Print the text and the predicted part-of-speech tag
    print(token.text, token.pos_)

Ela PRON
comeu VERB
uma DET
pizza NOUN
enorme ADJ


### Predicting part of speech tags
For each token in the Doc, we can print the text and the "pos underscore" attribute, the predicted part-of-speech tag.

In spaCy, attributes that return strings usually end with an underscore – attributes without the underscore return an ID.

In [9]:
nlp= spacy.load("en_core_web_sm")

doc = nlp("She ate the pizza")

for token in doc:
    print(token.text, token.pos_)

She PRON
ate VERB
the DET
pizza NOUN


In [10]:
nlp = spacy.load('pt_core_news_sm')

doc = nlp("Posso comprar ingresso com cartao de débito")

for token in doc:
    print(token.text, token.pos_)

Posso AUX
comprar VERB
ingresso NOUN
com ADP
cartao NOUN
de ADP
débito NOUN
