# 1. Strings to Hashes

In [None]:
from spacy.lang.en import English

nlp = English()
doc = nlp("I have a cat")

cat_hash = nlp.vocab.strings['cat']
print(cat_hash)

cat_string = nlp.vocab.strings[cat_hash]
print(cat_string)

In [None]:
person_hash = nlp.vocab.strings['PERSON']
print(person_hash)

person_string = nlp.vocab.strings[person_hash]
print(person_string)

# 2. Creating a Doc

In [None]:
from spacy.lang.en import English
from spacy.tokens import Doc

nlp = English()

words = ['spaCy', 'is', 'cool', '!']
spaces = [True, True, False, False]

doc = Doc(nlp.vocab, words=words, spaces=spaces)
print(doc.text)

In [None]:
words = ['Go', ',', 'get', 'started', '!']
spaces = [False, True, True, False, False]

doc = Doc(nlp.vocab, words=words, spaces=spaces)
print(doc.text)

# 3. Docs, Spans, and entities from Scratch

In [None]:
from spacy.lang.en import English
from spacy.tokens import Doc, Span

nlp = English()

words = ['I', 'like', 'David', 'Bowie']
spaces = [True, True, True, False]

doc = Doc(nlp.vocab, words=words, spaces=spaces)
print(doc.text)

span = Span(doc, start=2, end=4, label='PERSON')
print(span.text, span.label_)

doc.ents = [span]

print([(ent.text, ent.label_) for ent in doc.ents])

In [None]:
import spacy

nlp = spacy.load("en_core_web_sm")
doc = nlp("Berlin looks like a nice city")

# Collect all proper nouns that are followed by a verb
for token in doc:
    if token.pos_ == "PROPN":
        if doc[token.i + 1].pos_ == "VERB":
            print("Found proper noun before a verb: ", token.text)

# 4. Inspecting Word Vectors

In [None]:
!python3 -m spacy download en_core_web_md

In [None]:
import en_core_web_md

nlp = en_core_web_md.load()

doc = nlp("Two bananas in pyjamas")

bananas_vector = doc[1].vector
print(bananas_vector)

# 5. Comparing Similarities

In [None]:
doc_1 = nlp("It's a warm summer day")
doc_2 = nlp("It's sunny outside")

similarity = doc_1.similarity(doc_2)
print(similarity)

In [None]:
doc = nlp("TV and Books")
token_1, token_2 = doc[0], doc[2]

similarity = token_1.similarity(token_2)
print(similarity)

In [None]:
doc = nlp("This was a great restaurant. Afterwards, we went to a really nice bar.")

span_1 = doc[3:5]
span_2 = doc[12:15]

print(span_1)
print(span_2)

similarity = span_1.similarity(span_2)
print(similarity)

# 6. Debugging Patterns

In [None]:
import spacy
from spacy.matcher import Matcher

nlp = spacy.load("en_core_web_sm")
doc = nlp(
    "Twitch Prime, the perks program for Amazon Prime members offering free "
    "loot, games and other benefits, is ditching one of its best features: "
    "ad-free viewing. According to an email sent out to Amazon Prime members "
    "today, ad-free viewing will no longer be included as a part of Twitch "
    "Prime for new members, beginning on September 14. However, members with "
    "existing annual subscriptions will be able to continue to enjoy ad-free "
    "viewing until their subscription comes up for renewal. Those with "
    "monthly subscriptions will have access to ad-free viewing until October 15."
)

pattern_1 = [{"LOWER": "amazon"}, {"IS_TITLE": True, "POS": "PROPN"}]
pattern_2 = [{"LOWER": "ad"}, {"IS_PUNCT":True}, {"LOWER":"free"}, {"POS": "NOUN"}]

matcher = Matcher(nlp.vocab)
matcher.add("PATTERN_1", None, pattern_1)
matcher.add("PATTERN_2", None, pattern_2)

for match_id, start, end in matcher(doc):
    print(doc.vocab.strings[match_id], doc[start:end].text)