## **Install required libraries**


In [None]:
!pip install torch=2.5.1

In [2]:
!pip install numpy>=2.0.0 -U --pre
!pip install scipy>=1.14.0 -U
!pip install gensim -U

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain-aws 0.2.15 requires numpy<2,>=1; python_version < "3.12", but you have numpy 2.3.0rc1 which is incompatible.
tensorflow 2.19.0 requires numpy<2.2.0,>=1.26.0, but you have numpy 2.3.0rc1 which is incompatible.[0m[31m
[0mCollecting gensim
  Downloading gensim-4.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.1 kB)
Collecting numpy<2.0,>=1.18.5 (from gensim)
  Using cached numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Collecting scipy<1.14.0,>=1.7.0 (from gensim)
  Downloading scipy-1.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
Collecting smart-open>=1.8.1 (from gensim)
  Downloading smart_open-7.1.0-py3-none-any.whl.metadata (24 kB)
Downloading gensim-4.3.3-cp311-cp311-manylinux_2_17_x86_64.ma

## **Import required libraries**


In [None]:
import gensim
import gensim.downloader as api
from gensim.models import KeyedVectors

import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## **Download and load word2vec-google-news-300 model**


In [None]:

try:
    # Download and load the Google News Word2Vec model
    # This will download the compressed file and extract it for you
    print("Attempting to load word2vec-google-news-300 model...")
    model_path = api.load("word2vec-google-news-300", return_path=True)
    model = KeyedVectors.load_word2vec_format(model_path, binary=True)
    print("Model loaded successfully using gensim.downloader!")

except Exception as e:
    print(f"An error occurred: {e}")
    print("Please ensure your NumPy and Gensim libraries are up to date.")
    print("Try running: pip install --upgrade numpy gensim")

Attempting to load word2vec-google-news-300 model...
[--------------------------------------------------] 1.4% 23.4/1662.8MB downloaded

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



[==------------------------------------------------] 4.0% 66.9/1662.8MB downloaded

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



[===-----------------------------------------------] 6.5% 108.4/1662.8MB downloaded

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Model loaded successfully using gensim.downloader!
Finding most similar words to 'woman':
[('man', 0.7664012908935547), ('girl', 0.7494640946388245), ('teenage_girl', 0.7336829304695129), ('teenager', 0.6317085027694702), ('lady', 0.6288785934448242), ('teenaged_girl', 0.6141784191131592), ('mother', 0.6076306104660034), ('policewoman', 0.6069462299346924), ('boy', 0.5975907444953918), ('Woman', 0.5770983099937439)]


## \*\*Test example - Find the most similar


In [None]:
print("Finding most similar words to 'woman':")
print(model.most_similar('woman'))

## **Get the array for man**


In [5]:
model['man']

array([ 0.32617188,  0.13085938,  0.03466797, -0.08300781,  0.08984375,
       -0.04125977, -0.19824219,  0.00689697,  0.14355469,  0.0019455 ,
        0.02880859, -0.25      , -0.08398438, -0.15136719, -0.10205078,
        0.04077148, -0.09765625,  0.05932617,  0.02978516, -0.10058594,
       -0.13085938,  0.001297  ,  0.02612305, -0.27148438,  0.06396484,
       -0.19140625, -0.078125  ,  0.25976562,  0.375     , -0.04541016,
        0.16210938,  0.13671875, -0.06396484, -0.02062988, -0.09667969,
        0.25390625,  0.24804688, -0.12695312,  0.07177734,  0.3203125 ,
        0.03149414, -0.03857422,  0.21191406, -0.00811768,  0.22265625,
       -0.13476562, -0.07617188,  0.01049805, -0.05175781,  0.03808594,
       -0.13378906,  0.125     ,  0.0559082 , -0.18261719,  0.08154297,
       -0.08447266, -0.07763672, -0.04345703,  0.08105469, -0.01092529,
        0.17480469,  0.30664062, -0.04321289, -0.01416016,  0.09082031,
       -0.00927734, -0.03442383, -0.11523438,  0.12451172, -0.02

## **Print how many features (based on the model)**


In [None]:
model['man'].shape #300 features (300 dimentions)

## **Examples**


In [None]:
model.most_similar('man')

In [None]:
model.most_similar('BGP')

In [None]:
model.similarity('man', 'woman')

In [None]:
model.doesnt_match(['BGP', 'OSPF', 'ISIS', 'apple'])

In [None]:
vec = model['king'] - model['man'] + model['woman']
model.most_similar([vec])

## **Get embeddings (the first 10 features) for a set of words**

Get embeddings limited to the first 10 features.
Create an array with them.
Create a pandas data frame using the features and the words.


In [None]:

# king, queen, woman, girl, man, water
words = ['king', 'queen', 'woman', 'girl', 'man', 'water']
embeddings = np.array([
    model['king'][0:10],
    model['queen'][0:10],
    model['woman'][0:10],
    model['girl'][0:10],
    model['man'][0:10],
    model['water'][0:10]
])

df = pd.DataFrame(embeddings, index=words)

## **Generate the plot based on the dataframe generated**


In [None]:
#Create a hashmap
plt.figure(figsize=(18,16))
heatmap = sns.heatmap(df, cmap='crest')
plt.show()