In [3]:
import nltk
from nltk.tokenize import word_tokenize
from nltk import pos_tag

# Download required resources
nltk.download('punkt_tab')
# Download the 'averaged_perceptron_tagger_eng' resource
nltk.download('averaged_perceptron_tagger_eng') # This was the missing download


# Sample sentence
text = "The quick brown fox jumps over the lazy dog."

# Tokenize the sentence
tokens = word_tokenize(text)

# Perform POS tagging
tagged = pos_tag(tokens)

# Display the tagged output
print("POS Tags:", tagged)

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.


POS Tags: [('The', 'DT'), ('quick', 'JJ'), ('brown', 'NN'), ('fox', 'NN'), ('jumps', 'VBZ'), ('over', 'IN'), ('the', 'DT'), ('lazy', 'JJ'), ('dog', 'NN'), ('.', '.')]


In [7]:
import spacy

# Load the small English model with word vectors
nlp = spacy.load("en_core_web_md")  # Or "en_core_web_lg" if installed

# Example words
word1 = nlp("king")
word2 = nlp("queen")
word3 = nlp("apple")

# Calculate and print similarity
print("Similarity between king and queen:", word1.similarity(word2))
print("Similarity between king and apple:", word1.similarity(word3))


Similarity between king and queen: 0.38253092765808105
Similarity between king and apple: 0.21109060943126678


In [6]:
!python -m spacy download en_core_web_md

Collecting en-core-web-md==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.8.0/en_core_web_md-3.8.0-py3-none-any.whl (33.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m33.5/33.5 MB[0m [31m37.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: en-core-web-md
Successfully installed en-core-web-md-3.8.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_md')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


🔍 Explanation for Viva
Line	What it Does
word_tokenize(text)	Breaks the sentence into words (tokens).
pos_tag(tokens)	Assigns each word a part-of-speech tag (like noun, verb, adjective, etc.).
nltk.download(...)	Downloads models for tokenization and tagging.
tagged	Returns a list of tuples like [('The', 'DT'), ('fox', 'NN'), ('jumps', 'VBZ')]

💬 Explanation for Viva
Concept	Description
Word Embeddings	Represent words as high-dimensional vectors capturing semantic meaning.
similarity()	Measures how similar two words are based on their vector representations.
spacy.load(...)	Loads a model that contains pre-trained word vectors.