### **Libraries**

In [None]:
import os
import requests
import zipfile
import gensim.downloader as api
from gensim.models import KeyedVectors
import numpy as np
import pandas as pd

### **Load word2vec and glove**

In [None]:

def Dow2v():
    print("Downloading Word2Vec model...")
    word2vec_path = "GoogleNews-vectors-negative300.bin"
    if not os.path.exists(word2vec_path):
        word2vec = api.load("word2vec-google-news-300")
        word2vec.save_word2vec_format(word2vec_path, binary=True)
    return KeyedVectors.load_word2vec_format(word2vec_path, binary=True)

def Dowglv():
    print("Downloading GloVe embeddings...")
    glove_zip_path = "glove.6B.zip"
    glove_txt_path = "glove.6B.300d.txt"
    if not os.path.exists(glove_txt_path):
        url = "http://nlp.stanford.edu/data/glove.6B.zip"
        response = requests.get(url)
        with open(glove_zip_path, "wb") as f:
            f.write(response.content)
        with zipfile.ZipFile(glove_zip_path, "r") as zip_ref:
            zip_ref.extractall()
    return loadglv(glove_txt_path)

def loadglv(glove_file_path):
    glove_model = {}
    with open(glove_file_path, encoding="utf8") as f:
        for line in f:
            parts = line.strip().split()
            word = parts[0]
            vector = np.array(parts[1:], dtype=np.float32)
            glove_model[word] = vector
    return glove_model

word2vec = Dow2v()
glove = Dowglv()


Downloading Word2Vec model...
Downloading GloVe embeddings...


### **Cosine Similarity**

In [None]:
def cossim(vec1, vec2):
    return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

### **Find Analogy**

In [None]:
def findglvanalogy(glove_model, word_a, word_b, word_c, top_n=1):
    if word_a not in glove_model or word_b not in glove_model or word_c not in glove_model:
        print(f"[GloVe] Missing one or more words: '{word_a}', '{word_b}', '{word_c}'")
        return None

    vec_a = glove_model[word_a]
    vec_b = glove_model[word_b]
    vec_c = glove_model[word_c]

    target_vector = vec_b - vec_a + vec_c

    similarities = []
    for word in glove_model:
        if word in [word_a, word_b, word_c]:
            continue
        similarity = cossim(target_vector, glove_model[word])
        similarities.append((word, similarity))

    similarities.sort(key=lambda x: x[1], reverse=True)
    return [word for word, _ in similarities[:top_n]]


In [None]:
def findw2vanalogy(model, word_a, word_b, word_c, top_n=1):
    if word_a not in model.key_to_index or word_b not in model.key_to_index or word_c not in model.key_to_index:
        print(f"[Word2Vec] Missing one or more words: '{word_a}', '{word_b}', '{word_c}'")
        return None

    vec_a = model[word_a]
    vec_b = model[word_b]
    vec_c = model[word_c]
    target_vector = vec_b - vec_a + vec_c

    results = model.similar_by_vector(target_vector, topn=top_n+3)

    output = []
    for word, score in results:
        if word not in [word_a, word_b, word_c]:
            output.append(word)
            if len(output) >= top_n:
                break
    return output[:top_n]

### **Evaluate Analogy**

In [None]:
def evalanalogy(tests, model, model_name, is_glove=False):
    results = []
    for i, (a, b, c, expected) in enumerate(tests, 1):
        try:
            if not is_glove:
                a, b, c = a.lower(), b.lower(), c.lower()
                expected = expected.lower()

            if is_glove:
                result = findglvanalogy(model, a, b, c)
            else:
                result = findw2vanalogy(model, a, b, c)
            prediction = result[0] if result else "N/A"
        except Exception as e:
            print(f"Error in test {i}: {e}")
            prediction = "N/A"
        results.append([i, f"{a} : {b} :: {c} : ?", expected, prediction])
    return pd.DataFrame(results, columns=["Test #", "Test", "Expected", f"{model_name} Result"])


### **Semantic & Syntactic Words**

In [None]:
semanticTests = [
    ("brother", "sister", "uncle", "aunt"),
    ("roma", "italy", "barcelona", "spain"),
    ("madrid", "spain", "berlin", "germany"),
    ("king", "queen", "emperor", "empress"),
    ("cat", "kitten", "dog", "puppy"),
    ("eyes", "see", "ears", "hear"),
    ("cow", "milk", "bee", "honey"),
    ("yale", "college","harvard", "university"),
    ("morning", "breakfast", "evening", "dinner"),
    ("money", "coin", "gold", "silver")
]

syntacticTests = [
    ("tiny", "tinier","loud", "louder"),
    ('blink', 'blinking', 'sleep', 'sleeping'),
    ("quick", "quickly", "slow", "slowly"),
    ("zoom", "zooming", "bounce", "bouncing"),
    ("clean", "cleaner", "dirty", "dirtier"),
    ("rich", "richer", "poor", "poorer"),
    ("sit", "sat", "stand", "stood"),
    ("break", "broken", "take", "taken"),
    ("cook", "cooking", "bake", "baking"),
    ("give", "gave", "receive", "received")
]

### **Results**

In [None]:
semanticglv = evalanalogy(semanticTests, glove, "GloVe", is_glove=True)
syntacticglv = evalanalogy(syntacticTests, glove, "GloVe", is_glove=True)

semanticw2v = evalanalogy(semanticTests, word2vec, "Word2Vec", is_glove=False)
syntacticw2v = evalanalogy(syntacticTests, word2vec, "Word2Vec", is_glove=False)

semantic = semanticglv.copy()
semantic["Word2Vec Result"] = semanticw2v["Word2Vec Result"]

syntactic = syntacticglv.copy()
syntactic["Word2Vec Result"] = syntacticw2v["Word2Vec Result"]

print("\nResult (Semantic Analogies):")
print(semantic.to_string(index=False))

print("\nResult (Syntactic Analogies):")
print(syntactic.to_string(index=False))


Result (Semantic Analogies):
 Test #                               Test   Expected GloVe Result Word2Vec Result
      1      brother : sister :: uncle : ?       aunt         aunt            aunt
      2      roma : italy :: barcelona : ?      spain        spain           spain
      3       madrid : spain :: berlin : ?    germany      germany         germany
      4        king : queen :: emperor : ?    empress      empress         empress
      5            cat : kitten :: dog : ?      puppy        puppy           puppy
      6             eyes : see :: ears : ?       hear         hear            hear
      7              cow : milk :: bee : ?      honey        honey           honey
      8      yale : college :: harvard : ? university   university      university
      9 morning : breakfast :: evening : ?     dinner       dinner          dinner
     10           money : coin :: gold : ?     silver       silver          silver

Result (Syntactic Analogies):
 Test #                   

### **Accuracy**

In [None]:
def accuracy(results_df, expected_col='Expected', predicted_col='Result'):
    correct = 0
    for expected, predicted in zip(results_df[expected_col], results_df[predicted_col]):
        if str(predicted).lower() == str(expected).lower():
            correct += 1
    return (correct / len(results_df)) * 100

semanticglvacc = accuracy(semanticglv, 'Expected', 'GloVe Result')
semanticw2vacc = accuracy(semanticw2v, 'Expected', 'Word2Vec Result')
syntacticglvacc = accuracy(syntacticglv, 'Expected', 'GloVe Result')
syntacticw2vacc = accuracy(syntacticw2v, 'Expected', 'Word2Vec Result')

summary_data = [
    ["Semantic", f"{semanticglvacc:.1f}%", f"{semanticw2vacc:.1f}%"],
    ["Syntactic", f"{syntacticglvacc:.1f}%", f"{syntacticw2vacc:.1f}%"]
]

summary_df = pd.DataFrame(summary_data, columns=["Analogy Type", "GloVe Accuracy", "Word2Vec Accuracy"])

print("\n\nAccuracy:")
print(summary_df.to_string(index=False))



Accuracy:
Analogy Type GloVe Accuracy Word2Vec Accuracy
    Semantic         100.0%            100.0%
   Syntactic         100.0%            100.0%
