#### The nlp object

In [1]:
# Import the English language class
from spacy.lang.en import English

# Create the nlp object
nlp = English()

#### The Doc object

In [2]:
# Created by processing a string of text with the nlp object
doc = nlp("Hello World!")

# Iterate over tokens in a Doc
for token in doc:
    print(token.text)

Hello
World
!


#### The Token object

In [3]:
# Index into the Doc to get a single Token
token = doc[1]

# Get the token text via the .text attribute
print(token.text)

World


#### The Span object

In [4]:
# A slice from the Doc is a Span object
span = doc[1:4]

# Get the span text via the .text attribute
print(span.text)

World!


#### Lexical Attributes

In [5]:
doc = nlp("It costs $5.")

In [6]:
print('Index:   ', [token.i for token in doc])
print('Text:    ', [token.text for token in doc])

print('is_alpha:', [token.is_alpha for token in doc])
print('is_punct:', [token.is_punct for token in doc])
print('like_num:', [token.like_num for token in doc])

Index:    [0, 1, 2, 3, 4]
Text:     ['It', 'costs', '$', '5', '.']
is_alpha: [True, True, False, False, False]
is_punct: [False, False, False, False, True]
like_num: [False, False, False, True, False]


#### Check next token (could use for MD)

In [8]:
from spacy.lang.en import English

nlp = English()

# Process the text
doc = nlp(
    "In 1990, more than 60% of people in East Asia were in extreme poverty. "
    "Now less than 4% are."
)

# Iterate over the tokens in the doc
for token in doc:
    # Check if the token resembles a number
    if token.like_num:
        # Get the next token in the document
        next_token = doc[token.i+1]
        # Check if the next token's text equals '%'
        if next_token.text == "%":
            print("Percentage found:", token.text)

Percentage found: 60
Percentage found: 4
