In [1]:
import spacy
from spacy.lang.en import English
from spacy.lang.de import German
from spacy.matcher import Matcher

nlp = spacy.load('en_core_web_md')

In [57]:
nlp = English()
doc = nlp("thx a lot")
[token.text for token in doc]

['thx', 'a', 'lot']

In [32]:
doc = nlp("thx a lot")
[[token.text, token.pos_] for token in doc]

[['thx', 'INTJ'], ['a', 'DET'], ['lot', 'NOUN']]

In [13]:
matcher = Matcher(nlp.vocab)
doc = nlp("Upcoming iPhone X release date leaked as Apple reveals pre-orders")

pattern = [{"LOWER": "iphone"},{"LOWER": "x"}]
matcher.add("IPHONE_X_PATTERN", None, pattern)

matches = matcher(doc)
print("Matches:", [doc[start:end].text for match_id, start, end in matches])

Matches: ['iPhone X']


In [19]:
matcher = Matcher(nlp.vocab)
doc = nlp(
    "After making the iOS update you won't notice a radical system-wide "
    "redesign: nothing like the aesthetic upheaval we got with iOS 7. Most of "
    "iOS 11's furniture remains the same as in iOS 10. But you will discover "
    "some tweaks once you delve a little deeper."
)

pattern = [{"LOWER": "ios"}, {"IS_DIGIT": True}]

matcher.add("IOS_VERSION_PATTERN", None, pattern)
matches = matcher(doc)

print("Total matches found: ", len(matches))

for match_id, start, end in matches:
    print("Match found: ", doc[start:end].text)

Total matches found:  3
Match found:  iOS 7
Match found:  iOS 11
Match found:  iOS 10


In [11]:
matcher = Matcher(nlp.vocab)

doc = nlp(
    "i downloaded Fortnite on my laptop and can't open the game at all. Help? "
    "so when I was downloading Minecraft, I got the Windows version where it "
    "is the '.zip' folder and I used the default program to unpack it... do "
    "I also need to download Winzip?"
)

pattern = [{"LEMMA": "download"}, {"POS": "PROPN"}]
matcher.add("DOWNLOAD_THINGS_PATTERN", None, pattern)
matches = matcher(doc)
print("Total matches found:", len(matches))

for match_id, start, end in matches:
    print("Match found:", doc[start:end].text)


Total matches found: 3
Match found: downloaded Fortnite
Match found: downloading Minecraft
Match found: download Winzip


In [26]:
nlp.vocab.strings["coffee"]

3197928453018144401

In [33]:
doc = nlp("I have a cat")

cat_hash = doc.vocab.strings["cat"]
print("Cat hash:", cat_hash)

cat_string = doc.vocab.strings[cat_hash]
print("Cat str:", cat_string)

Cat hash: 5439657043933447811
Cat str: cat


In [38]:
doc = nlp("David Bowie is a PERSON")

person_hash = doc.vocab.strings["PERSON"]
print(person_hash)

person_str = doc.vocab.strings[person_hash]
print(person_str)

380
PERSON


In [47]:
# Create an English and German nlp object
nlp = English()
nlp_de = German()

# Get the ID for the string 'Bowie'
bowie_id = nlp.vocab.strings["Bowie"]
print(bowie_id)

# Look up the ID for "Bowie" in the vocab
# print(nlp.vocab.strings[bowie_id])


2644858412616767388


In [11]:
doc = nlp("I love cookies")
doc1 = nlp("you hate cake when it rains")
doc.similarity(doc1)

0.7619861403753894