# Strings to hashes

In [1]:
%%writefile Strings_to_hashes-solution-1.py
# Look up the hash for the word "cause"
cause_hash = nlp.vocab.strings['cause']
print(cause_hash)

# Look up the cause_hash to get the string
cause_string = nlp.vocab.strings[cause_hash]
print(cause_string)

Writing Strings_to_hashes-solution-1.py


In [2]:
%%writefile Strings_to_hashes-solution-2.py
# Look up the hash for the string label "PERSON"
person_hash = nlp.vocab.strings['PERSON']
print(person_hash)

# Look up the person_hash to get the string
person_string = nlp.vocab.strings[person_hash]
print(person_string)

Writing Strings_to_hashes-solution-2.py


# Exercise: Vocab, hashes and lexemes

In [3]:
%%writefile Exercise_Vocab_hashes_lexemes-solution-1.py
print("The correct answer is: A \n Hashes can't be reversed. To prevent this problem, add the word to the new vocab by processing a text or looking up the string, or use the same vocab to resolve the hash back to a string.")

Writing Exercise_Vocab_hashes_lexemes-solution-1.py


# Creating a Doc

In [4]:
%%writefile Creating_a_Doc-solution-1.py
# Import the Doc class
from spacy.tokens import Doc

# Desired text: "spaCy is cool!"
words = ['spaCy', 'is', 'cool', '!']
spaces = [True, True, False, False]

# Create a Doc from the words and spaces
doc = Doc(nlp.vocab, words=words, spaces=spaces)
print(doc.text)

Writing Creating_a_Doc-solution-1.py


In [5]:
%%writefile Creating_a_Doc-solution-2.py
# Import the Doc class
from spacy.tokens import Doc

# Desired text: "Go, get started!"
words = ['Go', ',', 'get', 'started', '!']
spaces = [False, True, True, False, False]

# Create a Doc from the words and spaces
doc = Doc(nlp.vocab, words=words, spaces=spaces)
print(doc.text)

Writing Creating_a_Doc-solution-2.py


In [6]:
%%writefile Creating_a_Doc-solution-3.py
# Import the Doc class
from spacy.tokens import Doc

# Desired text: "Oh, really?!"
words = ['Oh', ',', 'really', '?', '!']
spaces = [False, True, False, False, False]

# Create a Doc from the words and spaces
doc = Doc(nlp.vocab, words=words, spaces=spaces)
print(doc.text)

Writing Creating_a_Doc-solution-3.py


# Docs, spans and entities from scratch

In [7]:
%%writefile Docs_spans_entities_scratch-solution-1.py
# Import the Doc and Span classes
from spacy.tokens import Doc, Span

words = ['I', 'like', 'David', 'Bowie']
spaces = [True, True, True, False]

# Create a doc from the words and spaces
doc = Doc(nlp.vocab, words=words, spaces=spaces)
print(doc.text)

Writing Docs_spans_entities_scratch-solution-1.py


In [8]:
%%writefile Docs_spans_entities_scratch-solution-2.py
# Import the Doc and Span classes
from spacy.tokens import Doc, Span

# Create a doc from the words and spaces
doc = Doc(nlp.vocab, words=['I', 'like', 'David', 'Bowie'], spaces=[True, True, True, False])

# Create a span for "David Bowie" from the doc and assign it the label "PERSON"
span = Span(doc, 2, 4, label='PERSON')
print(span.text, span.label_)

Writing Docs_spans_entities_scratch-solution-2.py


In [9]:
%%writefile Docs_spans_entities_scratch-solution-3.py
# Import the Doc and Span classes
from spacy.tokens import Doc, Span

# Create a doc from the words and spaces
doc = Doc(nlp.vocab, words=['I', 'like', 'David', 'Bowie'], spaces=[True, True, True, False])

# Create a span for "David Bowie" from the doc and assign it the label "PERSON"
span = Span(doc, 2, 4, label='PERSON')

# Add the span to the doc's entities
doc.ents = [span]

# Print entities' text and labels
print([(ent.text, ent.label_) for ent in doc.ents])

Writing Docs_spans_entities_scratch-solution-3.py


# Data structures best practices

In [10]:
%%writefile Data_structures_best_practices-solution-1.py
print("The correct answer is: B")

Writing Data_structures_best_practices-solution-1.py


In [11]:
%%writefile Data_structures_best_practices-solution-2.py
# Iterate over the tokens
for token in doc:
    # Check if the current token is a proper noun
    if token.pos_ == 'PROPN':
        # Check if the next token is a verb
        if doc[token.i + 1].pos_ == 'VERB':
            print('Found a verb after a proper noun!')

Writing Data_structures_best_practices-solution-2.py


# Inspecting word vectors

In [12]:
%%writefile Inspecting_word_vectors-solution-1.py
# Load the en_core_web_md model
nlp = spacy.load('en_core_web_md')
print(nlp)

# Process a text
doc = nlp("Two bananas in pyjamas")

# Get the vector for the token "bananas"
bananas_vector = doc[1].vector
print(bananas_vector)

Writing Inspecting_word_vectors-solution-1.py


# Comparing similarities

In [13]:
%%writefile Comparing_similarities-solution-1.py
doc1 = nlp("It's a warm summer day")
doc2 = nlp("It's sunny outside")

# Get the similarity of doc1 and doc2
similarity = doc1.similarity(doc2)
print(similarity)

Writing Comparing_similarities-solution-1.py


In [14]:
%%writefile Comparing_similarities-solution-2.py
doc = nlp("TV and books")
token1, token2 = doc[0], doc[2]

# Get the similarity of the tokens "TV" and "books" 
similarity = token1.similarity(token2)
print(similarity)

Writing Comparing_similarities-solution-2.py


In [15]:
%%writefile Comparing_similarities-solution-3.py
doc = nlp("This was a great restaurant. Afterwards, we went to a really nice bar.")

# Create spans for "great restaurant" and "really nice bar"
span1 = doc[3:5]
span2 = doc[12:15]

# Get the similarity of the spans
similarity = span1.similarity(span2)
print(similarity)

Writing Comparing_similarities-solution-3.py


# Debugging patterns

In [16]:
%%writefile Debugging_patterns-solution-1.py
# Create the match patterns
pattern1 = [{'LOWER': 'amazon'}, {'IS_TITLE': True, 'POS': 'PROPN'}]
pattern2 = [{'LOWER': 'ad'}, {'TEXT': '-'}, {'LOWER': 'free'}, {'POS': 'NOUN'}]

# Initialize the Matcher and add the patterns
matcher = Matcher(nlp.vocab)
matcher.add('PATTERN1', None, pattern1)
matcher.add('PATTERN2', None, pattern2)

# Iterate over the matches
for match_id, start, end in matcher(doc):
    # Print pattern string name and text of matched span
    print(doc.vocab.strings[match_id], doc[start:end].text)

Writing Debugging_patterns-solution-1.py


# Efficient phrase matching

In [17]:
%%writefile Efficient_phrase_matching-solution-1.py
# Import the PhraseMatcher and initialize it
from spacy.matcher import PhraseMatcher
matcher = PhraseMatcher(nlp.vocab)

# Create pattern Doc objects and add them to the matcher
# This is the faster version of: [nlp(country) for country in COUNTRIES]
patterns = list(nlp.pipe(COUNTRIES))
matcher.add('COUNTRY', None, *patterns)

# Call the matcher on the test document and print the result
matches = matcher(doc)
print([doc[start:end] for match_id, start, end in matches])

Writing Efficient_phrase_matching-solution-1.py


# Extracting countries and relationships

In [18]:
%%writefile Extracting_countries_relationships-solution-1.py
# Create a doc and find matches in it
doc = nlp(text)

# Iterate over the matches
for match_id, start, end in matcher(doc):
    # Create a Span with the label for "GPE"
    span = Span(doc, start, end, label='GPE')

    # Overwrite the doc.ents and add the span
    doc.ents = list(doc.ents) + [span]
    
# Print the entities in the document
print([(ent.text, ent.label_) for ent in doc.ents if ent.label_ == 'GPE'])

Writing Extracting_countries_relationships-solution-1.py
