# Basic Glove w/ Pretrained Google Model

In [2]:
import torch
import torch.nn as nn
import torchtext.vocab as vocab

import warnings
warnings.filterwarnings('ignore')

# Load model

In [3]:
glove = vocab.GloVe(name='6B', dim=100)
print('Loaded {} words'.format(len(glove.itos)))

.vector_cache/glove.6B.zip: 862MB [11:01, 1.30MB/s]                               
100%|█████████▉| 398160/400000 [00:16<00:00, 22882.44it/s]

Loaded 400000 words


100%|█████████▉| 398160/400000 [00:30<00:00, 22882.44it/s]

# Sample Tasks

View a sample word embedding

In [6]:
glove.vectors[glove.stoi["dog"]]

tensor([ 0.3082,  0.3094,  0.5280, -0.9254, -0.7367,  0.6348,  0.4420,  0.1026,
        -0.0914, -0.5661, -0.5327,  0.2013,  0.7704, -0.1398,  0.1373,  1.1128,
         0.8930, -0.1787, -0.0020,  0.5729,  0.5948,  0.5043, -0.2899, -1.3491,
         0.4276,  1.2748, -1.1613, -0.4108,  0.0428,  0.5487,  0.1890,  0.3759,
         0.5803,  0.6697,  0.8116,  0.9386, -0.5100, -0.0701,  0.8282, -0.3535,
         0.2109, -0.2441, -0.1655, -0.7836, -0.4848,  0.3897, -0.8636, -0.0164,
         0.3198, -0.4925, -0.0694,  0.0189, -0.0983,  1.3126, -0.1212, -1.2399,
        -0.0914,  0.3529,  0.6464,  0.0896,  0.7029,  1.1244,  0.3864,  0.5208,
         0.9879,  0.7995, -0.3462,  0.1409,  0.8017,  0.2099, -0.8601, -0.1531,
         0.0745,  0.4082,  0.0192,  0.5159, -0.3443, -0.2453, -0.7798,  0.2743,
         0.2242,  0.2016,  0.0174, -0.0147, -1.0235, -0.3970, -0.0056,  0.3057,
         0.3175,  0.0214,  0.1184, -0.1132,  0.4246,  0.5340, -0.1672, -0.2718,
        -0.6255,  0.1288,  0.6253, -0.52

Find similar words

In [16]:
glove.vectors.shape

torch.Size([400000, 100])

# Load into Torch embeddings object

In [17]:
import torch.nn as nn
import torch

In [19]:
embeddings = nn.Embedding(*glove.vectors.shape, padding_idx=0)
embeddings.weight.data.copy_(glove.vectors)

tensor([[-0.0382, -0.2449,  0.7281,  ..., -0.1459,  0.8278,  0.2706],
        [-0.1077,  0.1105,  0.5981,  ..., -0.8316,  0.4529,  0.0826],
        [-0.3398,  0.2094,  0.4635,  ..., -0.2339,  0.4730, -0.0288],
        ...,
        [ 0.3609, -0.1692, -0.3270,  ...,  0.2714, -0.2919,  0.1611],
        [-0.1046, -0.5047, -0.4933,  ...,  0.4253, -0.5125, -0.1705],
        [ 0.2837, -0.6263, -0.4435,  ...,  0.4368, -0.8261, -0.1570]])

In [23]:
sentence = "the quick brown fox jumped over the lazy dog"
indexes = [glove.stoi[w] for w in sentence.split()]
embeddings(torch.LongTensor(indexes))

tensor([[-0.0382, -0.2449,  0.7281, -0.3996,  0.0832,  0.0440, -0.3914,  0.3344,
         -0.5755,  0.0875,  0.2879, -0.0673,  0.3091, -0.2638, -0.1323, -0.2076,
          0.3340, -0.3385, -0.3174, -0.4834,  0.1464, -0.3730,  0.3458,  0.0520,
          0.4495, -0.4697,  0.0263, -0.5415, -0.1552, -0.1411, -0.0397,  0.2828,
          0.1439,  0.2346, -0.3102,  0.0862,  0.2040,  0.5262,  0.1716, -0.0824,
         -0.7179, -0.4153,  0.2033, -0.1276,  0.4137,  0.5519,  0.5791, -0.3348,
         -0.3656, -0.5486, -0.0629,  0.2658,  0.3020,  0.9977, -0.8048, -3.0243,
          0.0125, -0.3694,  2.2167,  0.7220, -0.2498,  0.9214,  0.0345,  0.4674,
          1.1079, -0.1936, -0.0746,  0.2335, -0.0521, -0.2204,  0.0572, -0.1581,
         -0.3080, -0.4162,  0.3797,  0.1501, -0.5321, -0.2055, -1.2526,  0.0716,
          0.7056,  0.4974, -0.4206,  0.2615, -1.5380, -0.3022, -0.0734, -0.2831,
          0.3710, -0.2522,  0.0162, -0.0171, -0.3898,  0.8742, -0.7257, -0.5106,
         -0.5203, -0.1459,  

In [31]:
glove.stoi["fox"]

2106

In [33]:
all(glove.vectors[glove.stoi["fox"]] == glove.vectors[2106])

True