## nlphw
10.9.25

imports:

In [104]:
import spacy
from spacy.matcher import PhraseMatcher
from spacy import displacy

Load the model

In [106]:
nlp = spacy.load("en_core_web_sm")

1. Named Entities Extraction

In [108]:
# The model response is a SpaCy Doc object
doc = nlp("Taylor Swift performed in Los Angeles on March 3rd, 2023.")

for ent in doc.ents:
    print(ent, ent.label_)

Taylor Swift PERSON
Los Angeles GPE
March 3rd, 2023 DATE


2. Entity Classification

In [110]:
def return_persons_ents(str):
    # The model response is a SpaCy Doc object
    doc = nlp(str)
    
    for ent in doc.ents:
        if ent.label_ == "PERSON":
            print(ent)

return_persons_ents("Serena Williams had dinner with Tom Hanks in Paris.") 

Serena Williams
Tom Hanks


3. Lemmatization

In [137]:
mystring = "She was running and had run 5 kilometers by 7am."

doc = nlp(mystring)
for token in doc:
    print(token, "→", token.lemma_)

She → she
was → be
running → run
and → and
had → have
run → run
5 → 5
kilometers → kilometer
by → by
7 → 7
am → am
. → .


4. Stop Word Removal

In [142]:
def return_non_stop_words(str):
    # The model response is a SpaCy Doc object
    doc = nlp(str)
    return [token for token in doc if not token.is_stop and token.is_alpha]

print(return_non_stop_words("This is an example sentence with some stop words."))

[example, sentence, stop, words]


5. Custom Stop Word

In [145]:
nlp.vocab['powerful'].is_stop = True

print(return_non_stop_words("SpaCy is awesome and powerful.")) # We can see 'powerful' was excluded.

[SpaCy, awesome]


6. Phrase Matcher

In [118]:
matcher = PhraseMatcher(nlp.vocab)

patterns = [nlp("Artificial Intelligence"), nlp("artificial intelligence")]
matcher.add("AI", patterns)

doc = nlp("Artificial Intelligence is the future. I study artificial intelligence.")
matches = matcher(doc)

for match_id, start, end in matches:
    print(doc[start:end].text)

Artificial Intelligence
artificial intelligence


7. POS Tagging + Explanation

In [120]:
# The model response is a SpaCy Doc object
doc = nlp("The cat sat on the mat.")

for token in doc:
    print(f"{token.text:5} {token.pos_:5} {spacy.explain(token.pos_)}")

The   DET   determiner
cat   NOUN  noun
sat   VERB  verb
on    ADP   adposition
the   DET   determiner
mat   NOUN  noun
.     PUNCT punctuation


8. POS Tagging + Displacy Visualization

In [122]:
sentence = input("Enter a sentence: ")
doc = nlp(sentence)

# print each word with POS tag and explanation
for token in doc:
    print(f"{token.text:5} {token.pos_:5} {spacy.explain(token.pos_)}")

# Displacy Visualization
displacy.render(doc, style="dep", jupyter=True)

Enter a sentence:  Apple is looking at buying a U.K. startup for $1 billion.


Apple PROPN proper noun
is    AUX   auxiliary
looking VERB  verb
at    ADP   adposition
buying VERB  verb
a     DET   determiner
U.K.  PROPN proper noun
startup NOUN  noun
for   ADP   adposition
$     SYM   symbol
1     NUM   numeral
billion NUM   numeral
.     PUNCT punctuation
