# Word2Vec

1. 주어진 단어들을 word2vec 모델에 들어갈 수 있는 형태로 만든다.
2. CBOW, Skip-gram 모델을 각각 구현한다.
3. 모델을 학습해보고 결과를 확인한다.  


* **Word Embedding - Word2Vec : CBOW(Continuous Bag-of-Words) & Skip-gram**  
  
  
* **CBOW(Continuous Bag-of-Words)**  
주변 단어들을 가지고 중심 단어를 예측하는 방식으로 학습합니다.  
주변 단어들의 one-hot encoding 벡터를 각각 embedding layer에 projection하여 각각의 embedding 벡터를 얻고  
이 embedding들을 element-wise한 덧셈으로 합친 뒤, 다시 linear transformation하여   
예측하고자 하는 중심 단어의 one-hot encoding 벡터와 같은 사이즈의 벡터로 만든 뒤,   
중심 단어의 one-hot encoding 벡터와의 loss를 계산합니다.    
예) A cute puppy is walking in the park. & window size: 2  
Input(주변 단어): "A", "cute", "is", "walking"  
Output(중심 단어): "puppy"    


* **Skip-gram**
중심 단어를 가지고 주변 단어들을 예측하는 방식으로 학습합니다.   
중심 단어의 one-hot encoding 벡터를 embedding layer에 projection하여 해당 단어의 embedding 벡터를 얻고   
이 벡터를 다시 linear transformation하여 예측하고자 하는 각각의 주변 단어들과의 one-hot encoding 벡터와 같은 사이즈의 벡터로 만든 뒤,   
그 주변 단어들의 one-hot encoding 벡터와의 loss를 각각 계산합니다.   
예) A cute puppy is walking in the park. & window size: 2  
Input(중심 단어): "puppy"  
Output(주변 단어): "A", "cute", "is", "walking"  


## 라이브러리

In [1]:
from tqdm import tqdm
from konlpy.tag import Okt
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset
from collections import defaultdict

import torch
import copy
import numpy as np

## 데이터 전처리

Word2Vec 형식에 맞게 전처리  

In [2]:
train_data = [
  "정말 맛있습니다. 추천합니다.",
  "기대했던 것보단 별로였네요.",
  "다 좋은데 가격이 너무 비싸서 다시 가고 싶다는 생각이 안 드네요.",
  "완전 최고입니다! 재방문 의사 있습니다.",
  "음식도 서비스도 다 만족스러웠습니다.",
  "위생 상태가 좀 별로였습니다. 좀 더 개선되기를 바랍니다.",
  "맛도 좋았고 직원분들 서비스도 너무 친절했습니다.",
  "기념일에 방문했는데 음식도 분위기도 서비스도 다 좋았습니다.",
  "전반적으로 음식이 너무 짰습니다. 저는 별로였네요.",
  "위생에 조금 더 신경 썼으면 좋겠습니다. 조금 불쾌했습니다."       
]

test_words = ["음식", "맛", "서비스", "위생", "가격"]

In [3]:
tokenizer = Okt()

In [5]:
def make_tokenized(data):
    tokenized = []
    for sent in tqdm(data):
        tokens  = tokenizer.morphs(sent, stem=True)
        tokenized.append(tokens)
        
    return tokenized

In [6]:
train_tokenized = make_tokenized(train_data)
train_tokenized

100%|██████████| 10/10 [00:03<00:00,  2.72it/s]


[['정말', '맛있다', '.', '추천', '하다', '.'],
 ['기대하다', '것', '보단', '별로', '이다', '.'],
 ['다',
  '좋다',
  '가격',
  '이',
  '너무',
  '비싸다',
  '다시',
  '가다',
  '싶다',
  '생각',
  '이',
  '안',
  '드네',
  '요',
  '.'],
 ['완전', '최고', '이다', '!', '재', '방문', '의사', '있다', '.'],
 ['음식', '도', '서비스', '도', '다', '만족스럽다', '.'],
 ['위생',
  '상태',
  '가',
  '좀',
  '별로',
  '이다',
  '.',
  '좀',
  '더',
  '개선',
  '되다',
  '기르다',
  '바라다',
  '.'],
 ['맛', '도', '좋다', '직원', '분들', '서비스', '도', '너무', '친절하다', '.'],
 ['기념일', '에', '방문', '하다', '음식', '도', '분위기', '도', '서비스', '도', '다', '좋다', '.'],
 ['전반', '적', '으로', '음식', '이', '너무', '짜다', '.', '저', '는', '별로', '이다', '.'],
 ['위생', '에', '조금', '더', '신경', '써다', '좋다', '.', '조금', '불쾌하다', '.']]

In [11]:
word_count = defaultdict(int)

for tokens in tqdm(train_tokenized):
    for token in tokens:
        word_count[token] += 1

100%|██████████| 10/10 [00:00<00:00, 94254.02it/s]


In [12]:
word_count = sorted(word_count.items(), key = lambda x: x[1], reverse=True)
print(list(word_count))

[('.', 14), ('도', 7), ('이다', 4), ('좋다', 4), ('별로', 3), ('다', 3), ('이', 3), ('너무', 3), ('음식', 3), ('서비스', 3), ('하다', 2), ('방문', 2), ('위생', 2), ('좀', 2), ('더', 2), ('에', 2), ('조금', 2), ('정말', 1), ('맛있다', 1), ('추천', 1), ('기대하다', 1), ('것', 1), ('보단', 1), ('가격', 1), ('비싸다', 1), ('다시', 1), ('가다', 1), ('싶다', 1), ('생각', 1), ('안', 1), ('드네', 1), ('요', 1), ('완전', 1), ('최고', 1), ('!', 1), ('재', 1), ('의사', 1), ('있다', 1), ('만족스럽다', 1), ('상태', 1), ('가', 1), ('개선', 1), ('되다', 1), ('기르다', 1), ('바라다', 1), ('맛', 1), ('직원', 1), ('분들', 1), ('친절하다', 1), ('기념일', 1), ('분위기', 1), ('전반', 1), ('적', 1), ('으로', 1), ('짜다', 1), ('저', 1), ('는', 1), ('신경', 1), ('써다', 1), ('불쾌하다', 1)]


In [13]:
w2i = {} # indexing (word to index)
for pair in tqdm(word_count):
    if pair[0] not in w2i:
        w2i[pair[0]] = len(w2i)

100%|██████████| 60/60 [00:00<00:00, 544714.81it/s]


In [14]:
print(train_tokenized)
print(w2i)

[['정말', '맛있다', '.', '추천', '하다', '.'], ['기대하다', '것', '보단', '별로', '이다', '.'], ['다', '좋다', '가격', '이', '너무', '비싸다', '다시', '가다', '싶다', '생각', '이', '안', '드네', '요', '.'], ['완전', '최고', '이다', '!', '재', '방문', '의사', '있다', '.'], ['음식', '도', '서비스', '도', '다', '만족스럽다', '.'], ['위생', '상태', '가', '좀', '별로', '이다', '.', '좀', '더', '개선', '되다', '기르다', '바라다', '.'], ['맛', '도', '좋다', '직원', '분들', '서비스', '도', '너무', '친절하다', '.'], ['기념일', '에', '방문', '하다', '음식', '도', '분위기', '도', '서비스', '도', '다', '좋다', '.'], ['전반', '적', '으로', '음식', '이', '너무', '짜다', '.', '저', '는', '별로', '이다', '.'], ['위생', '에', '조금', '더', '신경', '써다', '좋다', '.', '조금', '불쾌하다', '.']]
{'.': 0, '도': 1, '이다': 2, '좋다': 3, '별로': 4, '다': 5, '이': 6, '너무': 7, '음식': 8, '서비스': 9, '하다': 10, '방문': 11, '위생': 12, '좀': 13, '더': 14, '에': 15, '조금': 16, '정말': 17, '맛있다': 18, '추천': 19, '기대하다': 20, '것': 21, '보단': 22, '가격': 23, '비싸다': 24, '다시': 25, '가다': 26, '싶다': 27, '생각': 28, '안': 29, '드네': 30, '요': 31, '완전': 32, '최고': 33, '!': 34, '재': 35, '의사': 36, '있다': 37, '만족스럽다': 38, '상태

실제 모델에 들어가기 위한 input 을 만들기 위해 `Dataset` 클래스를 정의한다.

In [15]:
# CBOW
class CBOWDataset(Dataset): # 주변 단어가 input, 중심 단어가 output
    def __init__(self, train_tokenized, window_size=2):
        self.x = []
        self.y = []
        
        for tokens in tqdm(train_tokenized):
            token_ids = [w2i[token] for token in tokens] # 각 token 을 index 로 바꾼 tokens 리스트를 반환
            for i, id in enumerate(token_ids):
                if i - window_size >= 0 and i + window_size < len(token_ids): # 완벽한 범위가 아닐때는 무시
                    self.x.append(token_ids[i-window_size:i] + token_ids[i+1:i+window_size+1]) # list 를 append
                    self.y.append(id)
                    
        self.x = torch.LongTensor(self.x) # (전체 데이터 개수, 2 * window_size) : 완벽 범위일때만 이므로 * 2 ok
        self.y = torch.LongTensor(self.y) # (전체 데이터 개수)
        
    def __len__(self):
        return self.x.shape[0]
    
    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

In [17]:
# SkipGram
class SkipGramDataset(Dataset): # 중심 단어가 input, 주변 단어가 output
    def __init__(self, train_tokenized, window_size=2):
        self.x = []
        self.y = []
        
        for tokens in tqdm(train_tokenized):
            token_ids = [w2i[token] for token in tokens]
            for i, id in enumerate(token_ids):
                if i - window_size >= 0 and i + window_size < len(token_ids):
                    self.y += (token_ids[i-window_size:i] + token_ids[i+1:i+window_size+1])
                    self.x += [id] * 2 * window_size
                    # input : x / 주변 단어마다 중심 단어를 각각 매핑 시켜버림
                    
        self.x = torch.LongTensor(self.x) # (전체 데이터 개수)
        self.y = torch.LongTensor(self.y) # (전체 데이터 개수)
    
    def __len__(self):
        return self.x.shape[0]
    
    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

|Data type|	dtype|	CPU tensor|	GPU tensor|
|-|-|-|-|
|32-bit floating point	|torch.float32 or torch.float	|torch.FloatTensor|	torch.cuda.FloatTensor|
|64-bit floating point	|torch.float64 or torch.double	|torch.DoubleTensor|	torch.cuda.DoubleTensor|
|16-bit floating point	|torch.float16 or torch.half	|torch.HalfTensor	|torch.cuda.HalfTensor|
|8-bit integer (unsigned)	|torch.uint8	|torch.ByteTensor	|torch.cuda.ByteTensor|
|8-bit integer (signed)	|torch.int8	|torch.CharTensor	|torch.cuda.CharTensor|
|16-bit integer (signed)	|torch.int16 or torch.short	|torch.ShortTensor	|torch.cuda.ShortTensor|
|32-bit integer (signed)	|torch.int32 or torch.int	|torch.IntTensor|	torch.cuda.IntTensor|
|64-bit integer (signed)	|torch.int64 or torch.long	|torch.LongTensor	|torch.cuda.LongTensor|
|Boolean|	torch.bool|	torch.BoolTensor	|torch.cuda.BoolTensor|

각 모델에 맞는 Dataset 객체를 생성

In [18]:
cbow_set = CBOWDataset(train_tokenized)
skipgram_set = SkipGramDataset(train_tokenized)

100%|██████████| 10/10 [00:00<00:00, 58497.96it/s]
100%|██████████| 10/10 [00:00<00:00, 60611.33it/s]


In [20]:
print(list(cbow_set))

[(tensor([17, 18, 19, 10]), tensor(0)), (tensor([18,  0, 10,  0]), tensor(19)), (tensor([20, 21,  4,  2]), tensor(22)), (tensor([21, 22,  2,  0]), tensor(4)), (tensor([5, 3, 6, 7]), tensor(23)), (tensor([ 3, 23,  7, 24]), tensor(6)), (tensor([23,  6, 24, 25]), tensor(7)), (tensor([ 6,  7, 25, 26]), tensor(24)), (tensor([ 7, 24, 26, 27]), tensor(25)), (tensor([24, 25, 27, 28]), tensor(26)), (tensor([25, 26, 28,  6]), tensor(27)), (tensor([26, 27,  6, 29]), tensor(28)), (tensor([27, 28, 29, 30]), tensor(6)), (tensor([28,  6, 30, 31]), tensor(29)), (tensor([ 6, 29, 31,  0]), tensor(30)), (tensor([32, 33, 34, 35]), tensor(2)), (tensor([33,  2, 35, 11]), tensor(34)), (tensor([ 2, 34, 11, 36]), tensor(35)), (tensor([34, 35, 36, 37]), tensor(11)), (tensor([35, 11, 37,  0]), tensor(36)), (tensor([8, 1, 1, 5]), tensor(9)), (tensor([ 1,  9,  5, 38]), tensor(1)), (tensor([ 9,  1, 38,  0]), tensor(5)), (tensor([12, 39, 13,  4]), tensor(40)), (tensor([39, 40,  4,  2]), tensor(13)), (tensor([40, 13,

In [21]:
print(list(skipgram_set))

[(tensor(0), tensor(17)), (tensor(0), tensor(18)), (tensor(0), tensor(19)), (tensor(0), tensor(10)), (tensor(19), tensor(18)), (tensor(19), tensor(0)), (tensor(19), tensor(10)), (tensor(19), tensor(0)), (tensor(22), tensor(20)), (tensor(22), tensor(21)), (tensor(22), tensor(4)), (tensor(22), tensor(2)), (tensor(4), tensor(21)), (tensor(4), tensor(22)), (tensor(4), tensor(2)), (tensor(4), tensor(0)), (tensor(23), tensor(5)), (tensor(23), tensor(3)), (tensor(23), tensor(6)), (tensor(23), tensor(7)), (tensor(6), tensor(3)), (tensor(6), tensor(23)), (tensor(6), tensor(7)), (tensor(6), tensor(24)), (tensor(7), tensor(23)), (tensor(7), tensor(6)), (tensor(7), tensor(24)), (tensor(7), tensor(25)), (tensor(24), tensor(6)), (tensor(24), tensor(7)), (tensor(24), tensor(25)), (tensor(24), tensor(26)), (tensor(25), tensor(7)), (tensor(25), tensor(24)), (tensor(25), tensor(26)), (tensor(25), tensor(27)), (tensor(26), tensor(24)), (tensor(26), tensor(25)), (tensor(26), tensor(27)), (tensor(26), tens

## 모델 Class 구현

차례대로 두 가지 Word2Vec 모델을 구현  
* `self.embedding` : `vocab_size` 크기의 one-hot-vector 를 특정 크기의 dim 차원으로 embedding 시키는 layer.
* `self.linear` : 변환된 embedding vector 를 다시 원래 vocab_size 로 바꾸는 layer

`torch.nn.Embedding`  
```py
torch.nn.Embedding(num_embeddings: int, embedding_dim: int, padding_idx: Optional[int] = None, 
    max_norm: Optional[float] = None, norm_type: float = 2.0, scale_grad_by_freq: bool = False, 
    sparse: bool = False, _weight: Optional[torch.Tensor] = None)
```

num_embeddings = vocab_size (단어 사이즈)  
embedding_dim = 임베딩 차원  

In [24]:
# CBOW
class CBOW(nn.Module):
    def __init__(self, vocab_size, dim):
        super(CBOW, self).__init__()
        self.embedding = nn.Embedding(vocab_size, dim, sparse=True) # dim 크기의 임베딩 차원으로 임베딩
        self.linear = nn.Linear(dim, vocab_size) # fc1
        
    # B : batch_size, W : Window size, d_w : word embedding size, V : vocab size
    def forward(self, x): # x : (B, 2W) = 60개의 단어 * (window_size * 2)
        embeddings = self.embedding(x) # (B, 2W, d_w)
        embeddings = torch.sum(embeddings, dim=1) # (B, d_w) : 2W 차원에 맞춰 더해준다. = 주변 단어들을 하나로 통합
        output = self.linear(embeddings) # (B, V)
        return output

In [35]:
# SkipGram
class SkipGram(nn.Module):
    def __init__(self, vocab_size, dim):
        super(SkipGram, self).__init__()
        self.embedding = nn.Embedding(vocab_size, dim, sparse=True)
        self.linear = nn.Linear(dim, vocab_size)
        
    # B: batch size, W: window size, d_w: word embedding size, V: vocab size
    def forward(self, x): # x: (B)
        embeddings = self.embedding(x) # (B, d_w)
        output = self.linear(embeddings) # (B, V)
        return output

In [49]:
len(w2i)

60

In [36]:
cbow = CBOW(vocab_size = len(w2i), dim=256)
skipgram = SkipGram(vocab_size = len(w2i), dim=256)

In [37]:
cbow

CBOW(
  (embedding): Embedding(60, 256, sparse=True)
  (linear): Linear(in_features=256, out_features=60, bias=True)
)

In [38]:
skipgram

SkipGram(
  (embedding): Embedding(60, 256, sparse=True)
  (linear): Linear(in_features=256, out_features=60, bias=True)
)

## 모델 학습

다음과 같이 hyperparameter 를 세팅하고 DataLoader 객체를 만듭니다.

In [39]:
batch_size = 4
learning_rate = 5e-4
num_epochs = 5
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

cbow_loader = DataLoader(cbow_set, batch_size=batch_size)
skipgram_loader = DataLoader(skipgram_set, batch_size=batch_size)

In [48]:
list(cbow_loader)[0]

[tensor([[17, 18, 19, 10],
         [18,  0, 10,  0],
         [20, 21,  4,  2],
         [21, 22,  2,  0]]),
 tensor([ 0, 19, 22,  4])]

### CBOW 모델 학습

In [59]:
cbow.train()
cbow = cbow.to(device)
optim = torch.optim.SGD(cbow.parameters(), lr = learning_rate)
loss_function = nn.CrossEntropyLoss()

for e in range(1, num_epochs+1):
    print('#' * 50)
    print(f'Epoch : {e}')
    for batch in tqdm(cbow_loader):
        x, y = batch
        x, y = x.to(device), y.to(device) # (B, W), (B)
        output = cbow(x) # (B, V)
        
        optim.zero_grad()
        loss = loss_function(output, y)
        loss.backward()
        optim.step()
        
        print(f"Train loss: {loss.item()}")
print('Finished.')

100%|██████████| 16/16 [00:00<00:00, 1010.40it/s]
100%|██████████| 16/16 [00:00<00:00, 1065.88it/s]
100%|██████████| 16/16 [00:00<00:00, 1114.51it/s]
100%|██████████| 16/16 [00:00<00:00, 1080.78it/s]
100%|██████████| 16/16 [00:00<00:00, 1053.38it/s]

##################################################
Epoch : 1
Train loss: 4.150973796844482
Train loss: 4.4851579666137695
Train loss: 3.2803590297698975
Train loss: 5.0054097175598145
Train loss: 3.5689611434936523
Train loss: 3.519221782684326
Train loss: 3.880596160888672
Train loss: 4.828391075134277
Train loss: 3.9648523330688477
Train loss: 4.016818046569824
Train loss: 3.8289847373962402
Train loss: 3.7736568450927734
Train loss: 3.3764405250549316
Train loss: 4.224390506744385
Train loss: 4.186812400817871
Train loss: 3.449195623397827
##################################################
Epoch : 2
Train loss: 3.9793829917907715
Train loss: 4.3450446128845215
Train loss: 3.1698169708251953
Train loss: 4.8731489181518555
Train loss: 3.4398107528686523
Train loss: 3.2897748947143555
Train loss: 3.7204971313476562
Train loss: 4.703166484832764
Train loss: 3.8560996055603027
Train loss: 3.8547139167785645
Train loss: 3.679326057434082
Train loss: 3.448248863220215
Train loss: 3.2470986




### SkipGram 모델 학습

In [62]:
skipgram.train()
skipgram = skipgram.to(device)
optim = torch.optim.SGD(skipgram.parameters(), lr = learning_rate)
loss_function = nn.CrossEntropyLoss()

for e in range(1, num_epochs+1):
    print("#" * 50)
    print(f'Epoch: {e}')
    for batch in tqdm(skipgram_loader):
        x, y = batch
        x, y = x.to(device), y.to(device) # (B, W), (B)
        output = skipgram(x) # (B, V)
        
        optim.zero_grad()
        loss = loss_function(output, y)
        loss.backward()
        optim.step()
        
        print(f'Train loss: {loss.item()}')
print('Finished')

100%|██████████| 64/64 [00:00<00:00, 1106.55it/s]
100%|██████████| 64/64 [00:00<00:00, 1142.25it/s]
100%|██████████| 64/64 [00:00<00:00, 1277.78it/s]
  0%|          | 0/64 [00:00<?, ?it/s]

##################################################
Epoch: 1
Train loss: 4.303864479064941
Train loss: 3.3176536560058594
Train loss: 3.8682799339294434
Train loss: 3.900390386581421
Train loss: 4.208705425262451
Train loss: 4.317439556121826
Train loss: 3.7482657432556152
Train loss: 3.698716640472412
Train loss: 3.760134696960449
Train loss: 4.153445720672607
Train loss: 3.867478370666504
Train loss: 3.8022804260253906
Train loss: 3.6768083572387695
Train loss: 4.228381156921387
Train loss: 3.671078681945801
Train loss: 4.348445415496826
Train loss: 4.288251876831055
Train loss: 3.7952089309692383
Train loss: 3.8651225566864014
Train loss: 3.3352270126342773
Train loss: 3.2646682262420654
Train loss: 3.5230817794799805
Train loss: 3.406064987182617
Train loss: 3.3334126472473145
Train loss: 4.002089977264404
Train loss: 3.348836898803711
Train loss: 4.337854862213135
Train loss: 4.288518905639648
Train loss: 4.1280436515808105
Train loss: 4.527017116546631
Train loss: 3.70891332626342

100%|██████████| 64/64 [00:00<00:00, 1378.78it/s]
100%|██████████| 64/64 [00:00<00:00, 1326.79it/s]

Train loss: 3.205048084259033
Train loss: 2.6956186294555664
Train loss: 3.368621587753296
Train loss: 3.822558879852295
Train loss: 3.9110732078552246
Train loss: 4.214263916015625
Train loss: 4.072235584259033
Train loss: 3.4612653255462646
Train loss: 3.8458738327026367
Train loss: 3.9547572135925293
Train loss: 3.971832513809204
Train loss: 3.5116024017333984
Train loss: 3.459056854248047
Train loss: 4.054853916168213
Train loss: 4.012203216552734
Train loss: 4.024623870849609
Train loss: 3.7975587844848633
Train loss: 3.9030022621154785
Train loss: 3.593550205230713
Train loss: 3.5295205116271973
##################################################
Epoch: 5
Train loss: 4.202300548553467
Train loss: 3.135711193084717
Train loss: 3.7529470920562744
Train loss: 3.7249362468719482
Train loss: 4.09536075592041
Train loss: 4.193469524383545
Train loss: 3.626194477081299
Train loss: 3.5746116638183594
Train loss: 3.6570582389831543
Train loss: 4.044397354125977
Train loss: 3.73263096809387




## 테스트

학습된 각 모델을 이용하여 test 단어들의 word embedding 을 확인

In [43]:
for word in test_words:
    input_id = torch.LongTensor([w2i[word]]).to(device)
    emb = cbow.embedding(input_id)
    
    print(f'Word: {word}')
    print(emb.squeeze(0))

Word: 음식
tensor([ 0.8414, -0.0927,  1.1790, -1.0684,  0.1599, -1.6787,  1.9308,  0.3619,
         1.5416,  0.0210, -0.9105,  0.4918, -0.6280,  1.6087, -1.6134, -0.0077,
         1.0283,  0.1557,  1.1997,  1.2250, -0.3184,  0.1810,  0.5651,  1.2415,
        -0.5179,  1.3614, -0.4105,  0.3008,  0.2566,  1.5908,  0.2766,  0.4073,
        -0.2811,  0.8772,  1.7411, -1.5562,  2.3794,  0.3100,  0.8502, -1.8186,
         0.0955, -1.6632, -0.4454,  2.1830, -0.1331, -0.1485,  2.5633, -1.6486,
        -0.9019, -0.0630,  2.0024,  2.4727, -0.7274, -1.0861, -0.4472, -1.0492,
        -0.0955, -1.0498, -0.7079,  0.2181,  0.2404, -0.0777, -0.5834, -0.9832,
        -0.2822,  0.6947, -1.3775, -0.1527, -0.3475,  1.9124, -0.5776,  1.5515,
        -0.8664, -1.0768, -0.5148,  2.4771, -0.0880,  1.9284,  0.4005, -0.3472,
        -0.5746, -0.5362, -0.9103, -1.3664, -0.9100, -0.6991, -0.8970,  0.4311,
         0.0430, -0.7363, -0.2752, -1.8620,  0.5329, -2.3359, -2.2895,  0.4180,
        -0.5917, -0.5738,  0.07

In [44]:
print(max(emb.squeeze(0)))

tensor(2.7312, grad_fn=<UnbindBackward>)


In [45]:
for word in test_words:
    input_id = torch.LongTensor([w2i[word]]).to(device)
    emb = skipgram.embedding(input_id)
    
    print(f'Word: {word}')
    print(emb.squeeze(0))

Word: 음식
tensor([-2.1371e+00,  2.8544e+00, -1.0220e+00,  2.8298e+00, -8.0401e-01,
         1.1058e-01,  1.5712e+00,  9.4139e-01,  1.0837e+00, -3.1570e-01,
         4.9930e-01, -1.1790e+00,  9.7276e-01, -1.1045e+00, -6.5034e-01,
        -7.3888e-01,  6.2121e-01, -5.2914e-02, -1.9994e+00,  3.7437e-01,
        -4.0234e-01, -8.7200e-01,  9.3793e-01,  4.9113e-01,  4.1820e-01,
        -8.8620e-01,  1.1313e+00, -5.6042e-01, -1.0576e+00, -2.0703e+00,
         1.0570e+00,  3.2552e-01,  7.7578e-01,  1.3018e+00,  8.8480e-01,
        -1.3476e+00, -1.3771e+00, -1.8304e-01, -2.2765e-01,  8.9935e-01,
         1.3754e-01, -4.1829e-01,  1.2732e+00,  1.1498e+00, -3.4014e-01,
         5.3145e-01,  1.2951e+00, -4.2927e-01, -1.2027e+00,  3.2702e-01,
        -1.1742e+00,  1.6044e+00, -9.9670e-02,  1.4488e-01,  1.6443e-01,
         4.7610e-01, -1.3051e+00, -2.4355e+00,  7.9859e-01,  2.8453e-02,
         5.5095e-01,  1.1815e+00,  1.7551e-01, -1.9217e-01, -8.6884e-01,
         2.6721e-01, -6.0254e-01,  8.8265e