## **Install required libraries**


In [None]:
!pip install torch==2.5.1

In [None]:
!pip install numpy>=2.0.0 -U --pre
!pip install scipy>=1.14.0 -U
!pip install gensim -U

## **Import required libraries**


In [None]:
import gensim
import gensim.downloader as api
from gensim.models import KeyedVectors

import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## **Download and load word2vec-google-news-300 model**


In [None]:

try:
    # Download and load the Google News Word2Vec model
    # This will download the compressed file and extract it for you
    print("Attempting to load word2vec-google-news-300 model...")
    model_path = api.load("word2vec-google-news-300", return_path=True)
    model = KeyedVectors.load_word2vec_format(model_path, binary=True)
    print("Model loaded successfully using gensim.downloader!")

except Exception as e:
    print(f"An error occurred: {e}")
    print("Please ensure your NumPy and Gensim libraries are up to date.")
    print("Try running: pip install --upgrade numpy gensim")

## \*\*Test example - Find the most similar


In [None]:
print("Finding most similar words to 'woman':")
print(model.most_similar('woman'))

## **Get the array for man**


In [None]:
model['man']

## **Print how many features (based on the model)**


In [None]:
model['man'].shape #300 features (300 dimentions)

## **Examples**


In [None]:
model.most_similar('man')

In [None]:
model.most_similar('BGP')

In [None]:
model.similarity('man', 'woman')

In [None]:
model.doesnt_match(['BGP', 'OSPF', 'ISIS', 'apple'])

In [None]:
vec = model['king'] - model['man'] + model['woman']
model.most_similar([vec])

## **Get embeddings (the first 10 features) for a set of words**

Get embeddings limited to the first 10 features.
Create an array with them.
Create a pandas data frame using the features and the words.


In [None]:

# king, queen, woman, girl, man, water
words = ['king', 'queen', 'woman', 'girl', 'man', 'water']
embeddings = np.array([
    model['king'][0:10],
    model['queen'][0:10],
    model['woman'][0:10],
    model['girl'][0:10],
    model['man'][0:10],
    model['water'][0:10]
])

df = pd.DataFrame(embeddings, index=words)

## **Generate the plot based on the dataframe generated**


In [None]:
#Create a hashmap
plt.figure(figsize=(18,16))
heatmap = sns.heatmap(df, cmap='crest')
plt.show()