In [1]:
import spacy

nlp = spacy.load("en_core_web_sm")
text = "Taylor Swift performed in Los Angeles on March 3rd, 2023."
doc = nlp(text)

# Print all named entities along with their labels

for ent in doc.ents:
    print(ent.text, ent.label_)


Taylor Swift PERSON
Los Angeles GPE
March 3rd, 2023 DATE


In [2]:
import spacy

nlp = spacy.load("en_core_web_sm")
text = "Serena Williams had dinner with Tom Hanks in Paris."
doc = nlp(text)

# prints only the entities of type PERSON

for ent in doc.ents:
    if ent.label_ == "PERSON":
        print(ent.text)


Serena Williams
Tom Hanks


In [3]:
import spacy

nlp = spacy.load("en_core_web_sm")
text = "She was running and had run 5 kilometers by 7am."
doc = nlp(text)

# Print each word with its lemma

for token in doc:
    print(f"{token.text:10} ➝ {token.lemma_}")


She        ➝ she
was        ➝ be
running    ➝ run
and        ➝ and
had        ➝ have
run        ➝ run
5          ➝ 5
kilometers ➝ kilometer
by         ➝ by
7          ➝ 7
am         ➝ am
.          ➝ .


In [4]:
import spacy

nlp = spacy.load("en_core_web_sm")
text = "This is an example sentence with some stop words."
doc = nlp(text)

# create a list of words that are not stop words

filtered_words = [token.text for token in doc if not token.is_stop]
print(filtered_words)


['example', 'sentence', 'stop', 'words', '.']


In [5]:
import spacy

nlp = spacy.load("en_core_web_sm")

# Mark the word "powerful" as a stop word

# Add custom stop word
nlp.vocab["powerful"].is_stop = True

text = "SpaCy is awesome and powerful."
doc = nlp(text)

stop_words = [token.text for token in doc if token.is_stop]
print(stop_words)


['is', 'and', 'powerful']


In [6]:
import spacy
from spacy.matcher import PhraseMatcher

nlp = spacy.load("en_core_web_sm")
matcher = PhraseMatcher(nlp.vocab)

# Use PhraseMatcher to identify the phrase "artificial intelligence" in a sentence and print matches

phrase = ["artificial intelligence", "Artificial Intelligence"]
patterns = [nlp(text) for text in phrase]
matcher.add("AI_PHRASE", patterns)

text = "Artificial Intelligence is the future. I study artificial intelligence."
doc = nlp(text)

matches = matcher(doc)
for match_id, start, end in matches:
    print(doc[start:end].text)


Artificial Intelligence
artificial intelligence


In [7]:
import spacy

#  print each word in a sentence with its POS tag and a human-readable explanation
nlp = spacy.load("en_core_web_sm")
text = "The cat sat on the mat."
doc = nlp(text)

for token in doc:
    print(f"{token.text:10} {token.pos_:10} {spacy.explain(token.pos_)}")

'''
The DET determiner  - a word that introduces a noun and specifies it: the, a, an
cat NOUN noun       - a person, place, thing, or idea: cat, dog, table
sat VERB verb       - an action or state word: sat, eat, sleep
on ADP adposition   - a word showing relationship between words, like time or location: on, in, under
the DET determiner  - same as above: the, a, an
mat NOUN noun       - another thing or object: mat, rug, chair
. PUNCT punctuation - punctuation mark ending the sentence: period, comma, etc.
'''

The        DET        determiner
cat        NOUN       noun
sat        VERB       verb
on         ADP        adposition
the        DET        determiner
mat        NOUN       noun
.          PUNCT      punctuation


'\nThe DET determiner  - a word that introduces a noun and specifies it: the, a, an\ncat NOUN noun       - a person, place, thing, or idea: cat, dog, table\nsat VERB verb       - an action or state word: sat, eat, sleep\non ADP adposition   - a word showing relationship between words, like time or location: on, in, under\nthe DET determiner  - same as above: the, a, an\nmat NOUN noun       - another thing or object: mat, rug, chair\n. PUNCT punctuation - punctuation mark ending the sentence: period, comma, etc.\n'

In [8]:
import spacy
from spacy.language import Language

nlp = spacy.load("en_core_web_sm")

# ^ as a custom sentence separator 

@Language.component("custom_separator")
def custom_separator(doc):
    for token in doc[:-1]:
        if token.text == '^':
            doc[token.i + 1].is_sent_start = True
    return doc

nlp.add_pipe('custom_separator', before='parser')
print(nlp.pipe_names)

text = "SpaCy is great ^ It helps with NLP tasks ^ Really useful."

doc = nlp(text)

for sent in doc.sents:
    print(sent)


['tok2vec', 'tagger', 'custom_separator', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']
SpaCy is great ^
It helps with NLP tasks ^
Really useful.


In [9]:
import spacy
from spacy import displacy

# Ask the user to input a sentence using input(), print each word with its POS tag, and then display it using spacy.displacy.render

nlp = spacy.load("en_core_web_sm")
sentence = input("Enter a sentence: ")
doc = nlp(sentence)

for token in doc:
    print(f"{token.text:10} {token.pos_:10} {spacy.explain(token.pos_)}")

# Display visualization in Jupyter
displacy.render(doc, style="dep", jupyter=True)


Enter a sentence:  the pilot flew the airplane above Israel


the        DET        determiner
pilot      NOUN       noun
flew       VERB       verb
the        DET        determiner
airplane   NOUN       noun
above      ADP        adposition
Israel     PROPN      proper noun
