In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [5]:
# Install & import
!pip install gensim
from gensim.models import KeyedVectors
import gensim.downloader as api
import numpy as np

# Load vectors
print("Loading pre-trained word vectors...")
model_path = "/kaggle/input/google-word2vec/GoogleNews-vectors-negative300.bin"
wv = KeyedVectors.load_word2vec_format(model_path, binary=True)
print("word2vec loaded successfully!")

# Vector arithmetic
def explore(w1, w2, w3):
    try:
        vec = wv[w1] - wv[w2] + wv[w3]
        res = [(w, s) for w, s in wv.similar_by_vector(vec, topn=10) if w not in {w1, w2, w3}]
        print(f"\nWord Relationship: {w1} - {w2} + {w3}")
        print("Most similar words to the result (excluding input words):")
        for w, s in res[:5]:
            print(f"{w}: {s:.4f}")
    except KeyError as e:
        print(f"Error: {e} not found in the vocabulary.")

# Similarity check
def sim(w1, w2):
    try:
        print(f"\nSimilarity between '{w1}' and '{w2}': {wv.similarity(w1, w2):.4f}")
    except KeyError as e:
        print(f"Error: {e} not found in the vocabulary.")

# Most similar
def similar(w):
    try:
        print(f"\nMost similar words to '{w}':")
        for w2, s in wv.most_similar(w, topn=5):
            print(f"{w2}: {s:.4f}")
    except KeyError as e:
        print(f"Error: {e} not found in the vocabulary.")

# Examples
explore("paris", "france", "germany")
explore("apple", "fruit", "carrot")
sim("cat", "dog")
sim("computer", "keyboard")
sim("music", "art")
similar("happy")
similar("sad")
similar("technology")

Loading pre-trained word vectors...
word2vec loaded successfully!

Word Relationship: paris - france + germany
Most similar words to the result (excluding input words):
berlin: 0.4838
german: 0.4695
lindsay_lohan: 0.4536
switzerland: 0.4468
heidi: 0.4445

Word Relationship: apple - fruit + carrot
Most similar words to the result (excluding input words):
carrots: 0.5700
proverbial_carrot: 0.4578
Carrot: 0.4159
Twizzler: 0.4074
peppermint_candy: 0.4074

Similarity between 'cat' and 'dog': 0.7609

Similarity between 'computer' and 'keyboard': 0.3964

Similarity between 'music' and 'art': 0.4010

Most similar words to 'happy':
glad: 0.7409
pleased: 0.6632
ecstatic: 0.6627
overjoyed: 0.6599
thrilled: 0.6514

Most similar words to 'sad':
saddening: 0.7273
Sad: 0.6611
saddened: 0.6604
heartbreaking: 0.6574
disheartening: 0.6507

Most similar words to 'technology':
technologies: 0.8332
innovations: 0.6231
technological_innovations: 0.6102
technol: 0.6047
technological_advancement: 0.6036
