In [4]:
import spacy

In [5]:
# Create a blank English nlp
nlp = spacy.blank("en")


# The Doc object

In [6]:
# Created by processing a string of text with the nlp object
doc=nlp("Hello world!")

# Iterate over tokens in a Doc
for token in doc:
    print(token.text)

Hello
world
!


In [9]:
token=doc[1]
print(token.text)
print(len(doc))

world
3


# The Span object


In [12]:
# A slice of the doc is a span object
span = doc[1:3]
print(span.text)

world!


# Lexical Attributes

In [13]:
doc=nlp("It costs $5.")

print("Index:   ",[token.i for token in doc])
print("Text:    ",[token.text for token in doc])
print("is_alpha:",[ token.is_alpha for token in doc]) #alphabetic characters
print("is_punct:",[token.is_punct for token in doc]) #punctuation
print("like_num:",[token.like_num for token in doc])#a number

Index:    [0, 1, 2, 3, 4]
Text:     ['It', 'costs', '$', '5', '.']
is_alpha: [True, True, False, False, False]
is_punct: [False, False, False, False, True]
like_num: [False, False, False, True, False]


In [14]:
# example

nlp = spacy.blank("en")

# Process the text
doc = nlp(
    "In 1990, more than 60% of people in East Asia were in extreme poverty. "
    "Now less than 4% are."
)

# Iterate over the tokens in the doc
for token in doc:
    # Check if the token resembles a number
    if token.like_num:
        # Get the next token in the document
        next_token = doc[token.i+1]
        # Check if the next token's text equals "%"
        if next_token.text == "%":
            print("Percentage found:", token.text)

Percentage found: 60
Percentage found: 4


# Trained pipelines

Models that enable spaCy to predict linguistic attributes in context
    - Part-of-speech tags
    - Syntactic dependencies
    - Named entities
Trained on labeled example texts
Can be updated with more examples to fine-tune predictions

In [17]:
# python -m spacy download en_core_web_sm
nlp = spacy.load("en_core_web_sm")
# Binary weights
# Vocabulary
# Meta information
# Configuration file
# Predicting Part-of-speech Tags
doc = nlp("She ate the pizza") # process text

for token in doc:
    # Print the text and the predicted part-of-speech tag
    print(token.text, token.pos_)


She PRON
ate VERB
the DET
pizza NOUN


In [20]:
# Predicting Syntactic Dependencies
for token in doc:
    print(token.text,token.pos_,token.dep_,token.head.text)
# The .dep_ attribute returns the predicted dependency label.
# The .head attribute returns the syntactic head token.

She PRON nsubj ate
ate VERB ROOT ate
the DET det pizza
pizza NOUN dobj ate


nsubj	    nominal     subject	    She
dobj	    direct      object	    pizza
det	        determiner  (article)	the

In [21]:
# Predicting Named Entities
# Process a text
doc = nlp("Apple is looking at buying U.K. startup for $1 billion")
# The doc.ents property lets you access the named entities predicted by the named entity recognition model.
# Iterate over the predicted entities
for ent in doc.ents:
    # Print the entity text and its label
    print(ent.text, ent.label_)

Apple ORG
U.K. GPE
$1 billion MONEY


# The spacy.explain method

In [26]:
print(spacy.explain("GPE"))
print(spacy.explain(("NNP")))
print(spacy.explain("dobj"))

Countries, cities, states
noun, proper singular
direct object


In [36]:
x=10
y=12
print(f'hello {x:<20}{y}')

hello 10                  12
