In [3]:
from spacy.lang.en import English

In [4]:
nlp = English()

In [5]:
doc = nlp("I have a Cat")

In [7]:
cat_hash = nlp.vocab.strings["cat"]

In [8]:
print(cat_hash)

5439657043933447811


In [9]:
cat_string = nlp.vocab.strings[cat_hash]
print(cat_string)

cat


In [10]:
from spacy.tokens import Doc

In [11]:
words = ["spaCy","is","great","!"]
spaces = [True, True, False, False]

In [12]:
doc = Doc(nlp.vocab, words=words, spaces=spaces

In [13]:
print(doc.text)

spaCy is great!


In [17]:
from spacy.tokens import Span

In [18]:
words = ["I", "like", "David", "Bowie"]
spaces = [True, True, True, False]

# Create a doc from the words and spaces
doc = Doc(nlp.vocab, words=words, spaces=spaces)
print(doc.text)

I like David Bowie


In [19]:
span = Span(doc, 2, 4, label="PERSON")

In [20]:
print(span.text, span.label_)

David Bowie PERSON


In [21]:
doc.ents = [span]
print([(ent.text, ent.label_) for ent in doc.ents])

[('David Bowie', 'PERSON')]


In [23]:
import spacy
nlp = spacy.load("en_core_web_md")

In [24]:
doc = nlp("Berlin is a nice city")

In [25]:
token_text = [token.text for token in doc]
pos_tags = [token.pos_ for token in doc]

In [26]:
for index, pos in enumerate(pos_tags):
    if pos == "PROPN":
        if pos_tags[index + 1] == "VERB":
            result = token_texts[index]
            print("Found proper noun before a verb:", result)

In [27]:
doc = nlp("Berlin is a nice city")

In [28]:
#the better way
for token in doc:
    if token.pos_ == "PROPN":
        if doc[token.i + 1].pos_ == 'VERB':
            print("Found proper noun before a verb:", token.text)

In [30]:
##word vectors and similarity

In [32]:
doc1 = nlp("I like fast food")
doc2 = nlp("i like pizza")
print(doc1.similarity(doc2))

0.8627204117787385


In [34]:
doc = nlp("Two bananas in pyjamas")
print(doc[1].vector)

[-2.2009e-01 -3.0322e-02 -7.9859e-02 -4.6279e-01 -3.8600e-01  3.6962e-01
 -7.7178e-01 -1.1529e-01  3.3601e-02  5.6573e-01 -2.4001e-01  4.1833e-01
  1.5049e-01  3.5621e-01 -2.1508e-01 -4.2743e-01  8.1400e-02  3.3916e-01
  2.1637e-01  1.4792e-01  4.5811e-01  2.0966e-01 -3.5706e-01  2.3800e-01
  2.7971e-02 -8.4538e-01  4.1917e-01 -3.9181e-01  4.0434e-04 -1.0662e+00
  1.4591e-01  1.4643e-03  5.1277e-01  2.6072e-01  8.3785e-02  3.0340e-01
  1.8579e-01  5.9999e-02 -4.0270e-01  5.0888e-01 -1.1358e-01 -2.8854e-01
 -2.7068e-01  1.1017e-02 -2.2217e-01  6.9076e-01  3.6459e-02  3.0394e-01
  5.6989e-02  2.2733e-01 -9.9473e-02  1.5165e-01  1.3540e-01 -2.4965e-01
  9.8078e-01 -8.0492e-01  1.9326e-01  3.1128e-01  5.5390e-02 -4.2423e-01
 -1.4082e-02  1.2708e-01  1.8868e-01  5.9777e-02 -2.2215e-01 -8.3950e-01
  9.1987e-02  1.0180e-01 -3.1299e-01  5.5083e-01 -3.0717e-01  4.4201e-01
  1.2666e-01  3.7643e-01  3.2333e-01  9.5673e-02  2.5083e-01 -6.4049e-02
  4.2143e-01 -1.9375e-01  3.8026e-01  7.0883e-03 -2

In [36]:
import spacy
from spacy.matcher import Matcher

nlp = spacy.load("en_core_web_md")
doc = nlp(
    "Twitch Prime, the perks program for Amazon Prime members offering free "
    "loot, games and other benefits, is ditching one of its best features: "
    "ad-free viewing. According to an email sent out to Amazon Prime members "
    "today, ad-free viewing will no longer be included as a part of Twitch "
    "Prime for new members, beginning on September 14. However, members with "
    "existing annual subscriptions will be able to continue to enjoy ad-free "
    "viewing until their subscription comes up for renewal. Those with "
    "monthly subscriptions will have access to ad-free viewing until October 15."
)

# Create the match patterns
pattern1 = [{"LOWER": "amazon"}, {"IS_TITLE": True, "POS": "PROPN"}]
pattern2 = [{"LOWER": "ad"}, {"TEXT": "-"}, {"LOWER": "free"}, {"POS": "NOUN"}]

# Initialize the Matcher and add the patterns
matcher = Matcher(nlp.vocab)
matcher.add("PATTERN1", None, pattern1)
matcher.add("PATTERN2", None, pattern2)

# Iterate over the matches
for match_id, start, end in matcher(doc):
    # Print pattern string name and text of matched span
    print(doc.vocab.strings[match_id], doc[start:end].text)

PATTERN1 Amazon Prime
PATTERN2 ad-free viewing
PATTERN1 Amazon Prime
PATTERN2 ad-free viewing
PATTERN2 ad-free viewing
PATTERN2 ad-free viewing
