# Import Trained Model

In [1]:
import gensim.downloader as api
model = api.load("word2vec-google-news-300")    # download the model and return as object ready for use



# Example of a word as a vector

In [2]:
word_vectors = model

# Let's look how the vector embedding of a word looks like
print(word_vectors['computer'])  # Get the vector for the word 'computer'
print(word_vectors['computer'].shape)  # Get the shape of the vector (should be 300)

[ 1.07421875e-01 -2.01171875e-01  1.23046875e-01  2.11914062e-01
 -9.13085938e-02  2.16796875e-01 -1.31835938e-01  8.30078125e-02
  2.02148438e-01  4.78515625e-02  3.66210938e-02 -2.45361328e-02
  2.39257812e-02 -1.60156250e-01 -2.61230469e-02  9.71679688e-02
 -6.34765625e-02  1.84570312e-01  1.70898438e-01 -1.63085938e-01
 -1.09375000e-01  1.49414062e-01 -4.65393066e-04  9.61914062e-02
  1.68945312e-01  2.60925293e-03  8.93554688e-02  6.49414062e-02
  3.56445312e-02 -6.93359375e-02 -1.46484375e-01 -1.21093750e-01
 -2.27539062e-01  2.45361328e-02 -1.24511719e-01 -3.18359375e-01
 -2.20703125e-01  1.30859375e-01  3.66210938e-02 -3.63769531e-02
 -1.13281250e-01  1.95312500e-01  9.76562500e-02  1.26953125e-01
  6.59179688e-02  6.93359375e-02  1.02539062e-02  1.75781250e-01
 -1.68945312e-01  1.21307373e-03 -2.98828125e-01 -1.15234375e-01
  5.66406250e-02 -1.77734375e-01 -2.08984375e-01  1.76757812e-01
  2.38037109e-02 -2.57812500e-01 -4.46777344e-02  1.88476562e-01
  5.51757812e-02  5.02929

# Similar Words

## King + Woman - Man = ?

The expected answer here is "Queen". Let's see if the model can find it.

In [3]:
# Example of using most_similar
print(word_vectors.most_similar(positive=['king', 'woman'], negative=['man'], topn=10))

[('queen', 0.7118192911148071), ('monarch', 0.6189674735069275), ('princess', 0.5902431011199951), ('crown_prince', 0.5499460697174072), ('prince', 0.5377321243286133), ('kings', 0.5236844420433044), ('Queen_Consort', 0.5235945582389832), ('queens', 0.5181134343147278), ('sultan', 0.5098593235015869), ('monarchy', 0.5087411403656006)]


## Let's check the similarity b/w a few pairs of words

In [4]:
print(word_vectors.similarity('woman', 'man'))
print(word_vectors.similarity('king', 'queen'))
print(word_vectors.similarity('uncle', 'aunt'))
print(word_vectors.similarity('boy', 'girl'))
print(word_vectors.similarity('nephew', 'niece'))
print(word_vectors.similarity('paper', 'water'))

0.76640123
0.6510956
0.7643474
0.8543272
0.7594367
0.11408084


## Most Similar Words

In [5]:
print(word_vectors.most_similar("tower", topn=5))  # Find words similar to 'tower'

[('towers', 0.8531749844551086), ('skyscraper', 0.6417425870895386), ('Tower', 0.639177143573761), ('spire', 0.5946877598762512), ('responded_Understood_Atlasjet', 0.5931612849235535)]


## Let's see the vector similarity

In [6]:
import numpy as np
# Words to compare
word1 = 'man'
word2 = 'woman'

word3 = 'semiconductor'
word4 = 'earthworm'

word5 = 'cat'
word6 = 'kitten'

# Calculate the vector difference
vector_difference1 = model[word1] - model[word2]
vector_difference2 = model[word3] - model[word4]
vector_difference3 = model[word5] - model[word6]

# Calculate the magnitude of vector differences
magnitude1 = np.linalg.norm(vector_difference1)
magnitude2 = np.linalg.norm(vector_difference2)
magnitude3 = np.linalg.norm(vector_difference3)

# Print the results
print("The magnitude of difference between {} and {} is {:.2f}".format(word1, word2, magnitude1))
print("The magnitude of difference between {} and {} is {:.2f}".format(word3, word4, magnitude2))
print("The magnitude of difference between {} and {} is {:.2f}".format(word5, word6, magnitude3))

The magnitude of difference between man and woman is 1.73
The magnitude of difference between semiconductor and earthworm is 5.67
The magnitude of difference between cat and kitten is 2.30
