In [27]:
import spacy

In [28]:
# When you process a text with the nlp object, spaCy creates a Doc object – short for "document".
# The Doc lets you access information about the text in a structured way
nlp = spacy.blank("en")

In [29]:
doc = nlp("Hello World!")
for token in doc:
    print(token)

Hello
World
!


In [30]:
# Span object
span = doc[1:3]
print(span)

World!


In [31]:
# Attributes
doc[2]

!

In [32]:
# Working with lexical
doc = nlp(
    "In 1990, more than 60% of people in East Asia were in extreme poverty. "
    "Now less than 4% are."
)

# Iterate over the tokens in the doc
for token in doc:
    # Check if the token resembles a number
    if token.is_digit:
        # Get the next token in the document
        next_token = doc[token.i + 1]
        # Check if the next token's text equals "%"
        if next_token.text == "%":
            print("Percentage found:", token.text)

Percentage found: 60
Percentage found: 4


In [33]:
# Load pre-trained pipeline
nlp = spacy.load("en_core_web_sm")

In [34]:
# Predicting speech tags
doc = nlp("Mario play video games tonight")
for token in doc:
    print(token.text, token.pos_, token.dep_, token.head.text)

Mario PROPN nsubj play
play VERB ROOT play
video NOUN compound games
games NOUN dobj play
tonight NOUN npadvmod play


In [35]:
# Predicting Named Entities
doc = nlp("We are going to order 1$ million dollar oil bottles")
for ent in doc.ents:
    print(ent.text, ent.label_)

1$ million dollar MONEY


In [36]:
# Matching
from spacy.matcher import Matcher
# Initialize the matcher with the shared vocab
matcher = Matcher(nlp.vocab)
# Define a pattern
pattern = [
    {"LEMMA": "love", "POS": "VERB"}
]
matcher.add("LOVE_PATTERN", [pattern])

doc = nlp("Frank loves to travel by bike")
matches = matcher(doc)

# Iterate over the matches
for match_id, start, end in matches:
    # Get the matched span
    matched_span = doc[start:end]
    print(matched_span.text)

loves
