# SpaCy Basics

In [None]:
import spacy

nlp = spacy.load("en_core_web_lg")

sample_text = "Vax'ildan wielded Whisper, a powerful magic dagger."
doc = nlp(sample_text)

In [21]:
for token in doc:
    print(token.text, token.pos_, token.tag_)

Vax'ildan PROPN NNP
wielded VERB VBD
Whisper PROPN NNP
, PUNCT ,
a DET DT
powerful ADJ JJ
magic ADJ JJ
dagger NOUN NN
. PUNCT .


# Dependency Parsing

In [25]:
spacy.displacy.render(doc, style="dep",options={"distance": 100})

# Lemmatization

In [27]:
# Process a sample text
sample_text = "She was running through the forests quickly."

# Process the text
doc = nlp(sample_text)

# Print the lemma of each token
for token in doc:
    print(token.text, token.lemma_)

She she
was be
running run
through through
the the
forests forest
quickly quickly
. .


# Stop Words

In [28]:
# Process a sample text
sample_text = "These are some example stop words that we want to filter out."

# Process the text
doc = nlp(sample_text)

# Print stop words
stop_words = [token.text for token in doc if token.is_stop]
print(stop_words)

['These', 'are', 'some', 'that', 'we', 'to', 'out']


# Part-of-speech Tagging and Parsing

In [30]:
# Process a sample text
sample_text = "The cat sat on the mat."

# Process the text
doc = nlp(sample_text)

# Print part-of-speech tags and syntactic relationships
for token in doc:
    print(token.text, token.pos_, token.dep_, token.head.text)


The DET det cat
cat NOUN nsubj sat
sat VERB ROOT sat
on ADP prep sat
the DET det mat
mat NOUN pobj on
. PUNCT punct sat


# Tokenization and Sentence Segementation

In [31]:
# Process a sample text
sample_text = "Hello! This is a sample sentence. And here's another one."

# Process the text
doc = nlp(sample_text)

# Print individual tokens
print("Tokens:")
for token in doc:
    print(token.text)

# Print sentence boundaries
print("\nSentences:")
for sent in doc.sents:
    print(sent.text)


Tokens:
Hello
!
This
is
a
sample
sentence
.
And
here
's
another
one
.

Sentences:
Hello!
This is a sample sentence.
And here's another one.


# Named Entity Recognition (NER)

In [32]:
# Process a sample text
sample_text = "Elon Musk is the CEO of SpaceX, and he lives in California."

# Process the text
doc = nlp(sample_text)

# Print named entities and labels
for ent in doc.ents:
    print(ent.text, ent.label_)


Elon Musk PERSON
SpaceX ORG
California GPE


# Word Similarity

In [36]:
# Load two word vectors
word1 = nlp("king")
word2 = nlp("queen")

# Calculate and print similarity
similarity_score = word1.similarity(word2)
print(f"Similarity between 'king' and 'queen': {similarity_score}")


Similarity between 'king' and 'queen': 0.6108841234425123
