In [2]:
import numpy as np
import gensim.downloader as api
from scipy.special import softmax

In [3]:
# Load pre-trained GloVe embeddings
print("Loading GloVe embeddings...")
glove_vectors = api.load("glove-wiki-gigaword-300")  # 300-dimensional GloVe embeddings
print("GloVe embeddings loaded.")

Loading GloVe embeddings...
GloVe embeddings loaded.


In [4]:
len(glove_vectors)

400000

In [None]:
if 'the' in glove_vectors:
  #print(glove_vectors['the'])
  print(len(glove_vectors['cat']))
  print(len(glove_vectors['the']))

300
300


In [5]:
if 'cat' in glove_vectors:
  print(glove_vectors['cat'])
  print(len(glove_vectors['cat']))

[-0.29353    0.33247   -0.047372  -0.12247    0.071956  -0.23408
 -0.06238   -0.0037192 -0.39462   -0.69411    0.36731   -0.12141
 -0.044485  -0.15268    0.34864    0.22926    0.54361    0.25215
  0.097972  -0.087305   0.87058   -0.12211   -0.079825   0.28712
 -0.68563   -0.27265    0.22056   -0.75752    0.56293    0.091377
 -0.71004   -0.3142    -0.56826   -0.26684   -0.60102    0.26959
 -0.17992    0.10701   -0.57858    0.38161   -0.67127    0.10927
  0.079426   0.022372  -0.081147   0.011182   0.67089   -0.19094
 -0.33676   -0.48471   -0.35406   -0.15209    0.44503    0.46385
  0.38409    0.045081  -0.59079    0.21763    0.38576   -0.44567
  0.009332   0.442      0.097062   0.38005   -0.11881   -0.42718
 -0.31005   -0.025058   0.12689   -0.13468    0.11976    0.76253
  0.2524    -0.26934    0.068629  -0.10071    0.011066  -0.18532
  0.44983   -0.57507    0.12278   -0.064878   0.044456  -0.020999
 -0.069838  -0.47329   -0.43074    0.39158   -0.047815  -0.93659
 -0.55128   -0.1422    

In [6]:
# Our input sentence
sentence = "The dog chased the cat which was scared"
words = sentence.split()
words

['The', 'dog', 'chased', 'the', 'cat', 'which', 'was', 'scared']

In [7]:
# Step 1: Create word embeddings using GloVe
embedding_dim = 300  # GloVe vectors are 300-dimensional
word_embeddings = np.array([glove_vectors[word.lower()] if word.lower() in glove_vectors else glove_vectors['unk'] for word in words])

In [8]:
# Step 2: Create Query, Key, and Value matrices

W_query = np.random.rand(embedding_dim, embedding_dim)
W_key = np.random.rand(embedding_dim, embedding_dim)
W_value = np.random.rand(embedding_dim, embedding_dim)


In [9]:
W_query

array([[0.10468889, 0.66985945, 0.78306423, ..., 0.59049432, 0.65616668,
        0.50552995],
       [0.6497369 , 0.16131284, 0.16950533, ..., 0.12476515, 0.76795722,
        0.62249402],
       [0.17199248, 0.5605414 , 0.86961772, ..., 0.33535591, 0.65218341,
        0.77357347],
       ...,
       [0.42978207, 0.52834508, 0.44224166, ..., 0.57845809, 0.61938   ,
        0.65871591],
       [0.17675379, 0.76667296, 0.96973226, ..., 0.96413132, 0.25804884,
        0.25553019],
       [0.22106515, 0.55787563, 0.28070566, ..., 0.18544292, 0.76325196,
        0.14542941]])

In [10]:
W_query.shape

(300, 300)

In [11]:
# Step 3: Compute Q, K, V
Q = np.dot(word_embeddings, W_query)
K = np.dot(word_embeddings, W_key)
V = np.dot(word_embeddings, W_value)

In [12]:
Q.shape

(8, 300)

In [15]:
# Step 4: Compute attention scores
attention_scores = np.dot(Q, K.T)

In [16]:
# Step 5: Scale the attention scores
attention_scores /= np.sqrt(embedding_dim)

In [17]:
# Step 6: Apply softmax to get attention weights
attention_weights = softmax(attention_scores, axis=1)

In [18]:
# Step 7: Compute the weighted sum
output = np.dot(attention_weights, V)

In [19]:
output.shape

(8, 300)

In [20]:
# Print results
for i, word in enumerate(words):
    print(f"\nWord: {word}")
    print(f"Top 5 words this word pays attention to:")
    top_attention = sorted(enumerate(attention_weights[i]), key=lambda x: x[1], reverse=True)[:5]
    for idx, weight in top_attention:
        print(f"  {words[idx]}: {weight:.4f}")


Word: The
Top 5 words this word pays attention to:
  The: 0.5000
  the: 0.5000
  chased: 0.0000
  which: 0.0000
  scared: 0.0000

Word: dog
Top 5 words this word pays attention to:
  cat: 1.0000
  dog: 0.0000
  was: 0.0000
  which: 0.0000
  scared: 0.0000

Word: chased
Top 5 words this word pays attention to:
  The: 0.5000
  the: 0.5000
  chased: 0.0000
  which: 0.0000
  scared: 0.0000

Word: the
Top 5 words this word pays attention to:
  The: 0.5000
  the: 0.5000
  chased: 0.0000
  which: 0.0000
  scared: 0.0000

Word: cat
Top 5 words this word pays attention to:
  cat: 1.0000
  dog: 0.0000
  was: 0.0000
  scared: 0.0000
  which: 0.0000

Word: which
Top 5 words this word pays attention to:
  cat: 0.9810
  was: 0.0136
  dog: 0.0033
  which: 0.0020
  scared: 0.0001

Word: was
Top 5 words this word pays attention to:
  cat: 1.0000
  dog: 0.0000
  was: 0.0000
  which: 0.0000
  scared: 0.0000

Word: scared
Top 5 words this word pays attention to:
  was: 0.4775
  cat: 0.2819
  dog: 0.1935


In [21]:
# Analyze relationships
print("\nInteresting relationships:")
for i, word in enumerate(words):
    max_attention = np.argmax(attention_weights[i])
    if i != max_attention:
        print(f"'{word}' pays most attention to '{words[max_attention]}'")


Interesting relationships:
'dog' pays most attention to 'cat'
'chased' pays most attention to 'The'
'the' pays most attention to 'The'
'which' pays most attention to 'cat'
'was' pays most attention to 'cat'
'scared' pays most attention to 'was'
