In [1]:
import sys
import os
import numpy as np
# Add src to path
sys.path.append(os.path.abspath(os.path.join('..', 'src')))

from embeddings import EmbeddingEngine
from preprocessing import clean_text

Downloading NLTK resources...
Download complete.


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [2]:
# 1. Initialize Engine
# Make sure the path matches where you put the file!
glove_path = '../models/glove/glove.6B.100d.txt'
engine = EmbeddingEngine(glove_path)

In [3]:
# 2. Load Model (Wait for "Model loaded successfully!")
engine.load_model()

Loading GloVe model from ../models/glove/glove.6B.100d.txt...
Model loaded successfully!


In [None]:
# 3. Test Vectorization
sentence = "Where is my package?"
tokens = clean_text(sentence)
vector = engine.get_sentence_vector(tokens)

print(f"\nOriginal: {sentence}")
print(f"Tokens: {tokens}")
print(f"Vector Shape: {vector.shape}") # Should be (100,)
print(f"First 5 numbers of vector: {vector[:5]}")




Original: Where is my package?
Tokens: ['package']
Vector Shape: (100,)
First 5 numbers of vector: [-0.57239  0.73942  0.64488 -0.39172  0.42011]


In [5]:
# 4. Sanity Check (Similarity)
# Let's see if 'refund' is mathematically closer to 'money' than 'package'
vec_refund = engine.model['refund']
vec_money = engine.model['money']
vec_package = engine.model['package']

dist_money = np.dot(vec_refund, vec_money) / (np.linalg.norm(vec_refund) * np.linalg.norm(vec_money))
dist_package = np.dot(vec_refund, vec_package) / (np.linalg.norm(vec_refund) * np.linalg.norm(vec_package))

print(f"\nSimilarity (Refund vs Money): {dist_money:.4f}")
print(f"Similarity (Refund vs Package): {dist_package:.4f}")


Similarity (Refund vs Money): 0.5004
Similarity (Refund vs Package): 0.4378
