# Chapter 1: Finding words, phrases, names and concepts

## Getting started

In [7]:
# Import the English language class
from spacy.lang.en import English

# Create the nlp object
nlp = English()

# Process a text
doc = nlp("This is a sentence.")

# Print the document text
print(doc.text)

This is a sentence.


In [8]:
# Import the German language class
from spacy.lang.de import German

# Create the nlp object
nlp = German()

# Process a text (this is German for: "Kind regards!")
doc = nlp("Liebe Grüße!")

# Print the document text
print(doc.text)

Liebe Grüße!


In [9]:
# Import the Spanish language class
from spacy.lang.es import Spanish

# Create the nlp object
nlp = Spanish()

# Process a text (this is Spanish for: "How are you?")
doc = nlp("¿Cómo estás?")

# Print the document text
print(doc.text)

¿Cómo estás?


## Documents, spans and tokens

In [12]:
#Import the English language class and create the nlp object
from spacy.lang.en import English

nlp = English()

#Process the text
doc = nlp("I like tree kangaroos and narwhals.")

#Select the first token
first_token = doc[0]

#Print the first token's text
print(first_token.text)

I


In [14]:
#Import the English language class and create the nlp object
from spacy.lang.en import English

nlp = English()

#Process the text
doc = nlp("I like tree kangaroos and narwhals.")

#A slice of the Doc for "tree kangaroos"
tree_kangaroos = doc[2:4]
print(tree_kangaroos.text)

#A slice of the Doc for "tree kangaroos and narwhals" (without the ".")
tree_kangaroos_and_narwhals = doc[2:6]
print(tree_kangaroos_and_narwhals.text)

tree kangaroos
tree kangaroos and narwhals


## Lexical attributes

In [23]:
from spacy.lang.en import English

nlp = English()


doc = nlp( 
    "In 1990, more than 60% of people in East Asia were in extreme povetry. "
    "Now less than 4% are."
)

# Iterate over the tokens in the doc
for i, token in enumerate(doc):
    # Check if the token resembles a number
    if token.like_num:
        next_token = doc[i+1]
        if next_token.text == "%":
            print("Precentage found:", token.text)

Precentage found: 60
Precentage found: 4


## Statistical models

In [2]:
import spacy

# Load the small English model
nlp = spacy.load('en_core_web_sm')

# Process a text
doc = nlp("She ate the pizza")

# Iterate over the tokens
for token in doc:
    # Print the text and the predicated part-of-speech tag
    print(token.text, token.pos_)

She PRON
ate VERB
the DET
pizza NOUN


In [4]:
for token in doc:
    print(token.text, token.pos_, token.dep_, token.head.text)

She PRON nsubj ate
ate VERB ROOT ate
the DET det pizza
pizza NOUN dobj ate


![Dependency label scheme](out/img/dependency_label_scheme/dependency_label_scheme.png)

| Label | Description | Example |
|-------|-------------|---------|
| **nsubj** | nominal subject | She |
| **dobj** | direct object | pizza |
| **det** | determiner (article) | the |


In [5]:
# Process a text
doc = nlp(u"Apple is looking at buying U.K. startup for $1 billion")

# Iterate over the predicted entities
for ent in doc.ents:
    # Print the entity text and its label
    print(ent.text, ent.label_)

Apple ORG
U.K. GPE
$1 billion MONEY


In [6]:
spacy.explain('GPE')

'Countries, cities, states'

In [7]:
spacy.explain('NNP')

'noun, proper singular'

In [8]:
spacy.explain('dobj')

'direct object'