# torch `nn.Embedding`

In [2]:
import nltk

nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Playdata\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\Playdata\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Playdata\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

## 사전학습된 임베딩을 사용하지 않는 경우

In [3]:
sentences = [          
    'nice great best amazing',  # 긍정 문장 예시
    'stop lies',                # 부정/비판 문장 예시
    'pitiful nerd',             # 부정 문장 예시
    'excellent work',           # 긍정 문장 예시
    'supreme quality',          # 긍정 문장 예시
    'bad',                      # 부정 문장 예시
    'highly respectable'        # 긍정 문장 예시
]                               # 분류 모델에 넣을 입력 문장 리스트(list[str])
labels = [1, 0, 0, 1, 1, 0, 1]  # 각 문장에 대한 이진 라벨(1=긍정, 0=부정)

In [None]:
# 토큰화
from nltk.tokenize import word_tokenize

tokenized_sentences = [word_tokenize(sent) for sent in sentences]    # 각 문장을 토큰 리스트(list(list[str]))로 변환
tokenized_sentences

[['nice', 'great', 'best', 'amazing'],
 ['stop', 'lies'],
 ['pitiful', 'nerd'],
 ['excellent', 'work'],
 ['supreme', 'quality'],
 ['bad'],
 ['highly', 'respectable']]

In [None]:
# 단어 사전 생성 + 정수 인코딩
from collections import Counter

tokens = [token for sent in tokenized_sentences for token in sent]  # 문장 리스트를 평탄화하여 전체 토큰 리스트 생성
word_counts = Counter(tokens)  # 전체 토큰 등장 빈도 계산
print(word_counts)  # 토큰별 빈도 딕셔너리 형태

word_to_index = {word: index + 2 for index, word in enumerate(tokens)}  # 토큰을 순서대로 인덱싱(+2 : 특수토큰용)
word_to_index['<PAD>'] = 0    # 패딩 토큰 (길이 맞추기용)
word_to_index['<UNK>'] = 1    # OOV 토큰 (처리 불가 단어 대체)
word_to_index = dict(sorted(word_to_index.items(), key=lambda x: x[1]))  # 인덱스를 기준으로 정렬
print(word_to_index)  # 단어 -> 인덱스 사전

vocab_size = len(word_to_index)  # 전체 어휘 수 (특수토큰 포함)
vocab_size

Counter({'nice': 1, 'great': 1, 'best': 1, 'amazing': 1, 'stop': 1, 'lies': 1, 'pitiful': 1, 'nerd': 1, 'excellent': 1, 'work': 1, 'supreme': 1, 'quality': 1, 'bad': 1, 'highly': 1, 'respectable': 1})
{'<PAD>': 0, '<UNK>': 1, 'nice': 2, 'great': 3, 'best': 4, 'amazing': 5, 'stop': 6, 'lies': 7, 'pitiful': 8, 'nerd': 9, 'excellent': 10, 'work': 11, 'supreme': 12, 'quality': 13, 'bad': 14, 'highly': 15, 'respectable': 16}


17

In [None]:
# 정수 인코딩 함수 : 토큰화된 문장 리스트를 단어 -> 인덱스 사전으로 정수 시퀀스 (list[list(int)])로 변환
def texts_to_sequences(sentences, word_to_index):
    sequences = []

    for sent in sentences:  # 문장 단위로 반복
        sequence = []

        for token in sent:
            if token in word_to_index:
                sequence.append(word_to_index[token])  # 해당 단어 인덱스 추가
            else:
                sequence.append(word_to_index['<UNK>'])  # 사전에 없으면 UNK 토큰
        
        sequences.append(sequence)
    
    return sequences

sequences = texts_to_sequences(tokenized_sentences, word_to_index)
sequences

[[2, 3, 4, 5], [6, 7], [8, 9], [10, 11], [12, 13], [14], [15, 16]]

In [8]:
import numpy as np

# 서로 다른 길이의 정수 시퀀스를 0(<PAD>)으로 채우거나 잘라내 (문장수, maxlen) 형태에 맞춰주는 함수
def pad_sequences(sequences, maxlen):
    padded_sequences = np.zeros((len(sequences), maxlen), dtype=int)  # (문장수 x maxlen) 크기의 0 패딩 배열
    
    for index, seq in enumerate(sequences):  # 각 문장 시퀀스 순회
        padded_sequences[index, :len(seq)] = seq[:maxlen]  # 앞에서부터 시퀀스 채운다. 길면 maxlen까지만 채워 자른다.
    
    return padded_sequences  # 패딩 작업 완료된 2D 배열

padded_sequences = pad_sequences(sequences, maxlen=4)  # 모든 문장 길이 4로 패딩/자르기
padded_sequences  # (문장 수, 4) 형태

array([[ 2,  3,  4,  5],
       [ 6,  7,  0,  0],
       [ 8,  9,  0,  0],
       [10, 11,  0,  0],
       [12, 13,  0,  0],
       [14,  0,  0,  0],
       [15, 16,  0,  0]])

In [9]:
padded_sequences.shape

(7, 4)

In [None]:
# Pytorch 텍스트 분류 모델 : Embedding + RNN + Linear로 이진 분류(logit) 출력
import torch
import torch.nn as nn           # 신경망 레이어
import torch.optim as optim     # 옵티마이저(활성화함수)
from torch.utils.data import DataLoader, TensorDataset  # 배치 로더 / 데이터셋 유틸

class SimpleNet(nn.Module):
    # 정수 시퀀스를 임베딩 -> RNN -> 선형층으로 처리해 이진 분류 logit(1개)를 출력
    def __init__(self, vocab_size, embedding_dim, hidden_size):
        super().__init__()                    # nn.Module 초기화
        self.embedding = nn.Embedding(        # 단어 ID를 밀집 벡터로 변환하는 임베딩 층
            num_embeddings = vocab_size,      # 단어 사전 크기 (어휘 수)
            embedding_dim = embedding_dim,    # 임베딩 차원
            padding_idx = 0                   # PAD(0) 인덱스는 0 그대로 사용
        )
        self.rnn = nn.RNN(embedding_dim, hidden_size, batch_first=True)    # 입력(배치, 길이, 차원) 형태의 RNN
        self.out = nn.Linear(hidden_size, 1)     # 마지막 은닉 상태를 1차원 logit으로 변환

    def forward(self, x):
        embedded = self.embedding(x)     # (batch, seq_len) -> (batch, seq_len, embedding_dim)
        out, h_n = self.rnn(embedded)    # h_n: (num_layers*directions, batch, hidden_size)
        out = self.out(h_n.squeeze(0))   # (batch_size, hidden_size) -> (batch, 1)
        return out  # 출력 : 시그모이드 전 logit(확률이 아님)
    
embedding_dim = 100  # 단어 벡터 차원 설정
model = SimpleNet(vocab_size, embedding_dim, hidden_size=16)  # 어휘 크기 / 임베딩 차원/ 은닉크기로 모델 생성
model

SimpleNet(
  (embedding): Embedding(17, 100, padding_idx=0)
  (rnn): RNN(100, 16, batch_first=True)
  (out): Linear(in_features=16, out_features=1, bias=True)
)

In [13]:
%pip install torchinfo

Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl.metadata (21 kB)
Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0
Note: you may need to restart the kernel to use updated packages.


In [None]:
from torchinfo import summary  # 모델 구조를 표 형태로 요약

summary(model)  # model의 레이어 구성 / 파라미터 수를 요약

Layer (type:depth-idx)                   Param #
SimpleNet                                --
├─Embedding: 1-1                         1,700
├─RNN: 1-2                               1,888
├─Linear: 1-3                            17
Total params: 3,605
Trainable params: 3,605
Non-trainable params: 0

In [None]:
# 임베딩 가중치 확인 : 학습 전/후 Embedding 테이블과 단어별 벡터 조회
import pandas as pd

# 학습 전 임베딩 벡터
wv = model.embedding.weight.data  # Embedding 층의 가중치 행렬(단어ID x 임베딩 차원) 추출
print(wv.shape)  # (vocab_size, embedding_dim)

# 특정 단어 벡터
vocab = word_to_index.keys()    # 단어사전에서 단어만 뽑아온다.
pd.DataFrame(wv, index=vocab)

torch.Size([17, 100])


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
<PAD>,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
<UNK>,-0.503153,1.122852,-0.58911,-1.338089,0.537808,-0.930078,0.17486,-0.660207,0.922226,2.106441,...,-0.911769,0.12299,0.896133,0.324314,-0.060996,0.750533,0.698772,-0.087897,0.447661,-0.242309
nice,-1.100964,0.893685,0.193806,-1.329189,1.789114,-0.522488,0.262018,0.498728,-1.204775,0.857129,...,0.413068,1.536693,1.346791,0.896332,-0.065012,-1.482238,-1.226632,0.135369,1.297657,-0.205065
great,-2.082712,0.616099,-0.777128,1.908266,0.761909,1.869433,0.292291,-0.936683,-0.55584,2.168294,...,-0.371885,1.953568,0.1835,-0.158066,0.563312,-0.49219,-0.465123,-0.9582,0.576206,1.370161
best,-0.392523,0.098091,-2.461212,1.324039,0.626163,0.212848,-0.234914,-0.497381,0.345082,-1.305001,...,1.404677,1.085973,1.415279,-0.354888,0.088245,-0.147179,1.300656,1.109785,-1.24671,0.649416
amazing,-1.052736,0.373408,-0.122459,-1.373757,-0.713679,-0.755578,-0.649324,0.685076,-1.486379,0.652252,...,0.893645,0.57371,0.211526,-0.341499,-1.023671,1.03308,-0.349468,0.4929,-0.180742,0.708267
stop,-0.497697,-0.235544,-0.375438,1.334417,-1.592178,-1.880756,-0.395881,2.018922,-0.983122,0.782248,...,-0.044324,-1.229873,0.401176,-0.847899,0.147723,-0.908863,-1.022872,0.476816,1.072323,-1.172453
lies,0.024769,-0.456872,-0.664279,1.604704,0.140491,0.072229,-0.196021,2.606885,2.34544,-0.094659,...,0.120991,-0.098169,-0.863077,-0.096738,-0.500837,0.618213,0.010688,-1.475451,0.053425,-1.066209
pitiful,0.877789,1.218326,-0.743894,-0.279423,0.816912,-1.1059,-0.822485,0.553456,2.524921,1.436808,...,0.746145,-1.203568,0.519214,-0.210401,-0.593928,1.15735,1.680863,1.440359,0.477093,0.506625
nerd,-1.140655,-0.458999,-2.165972,1.278016,-1.035399,-1.723518,-0.620779,-0.368372,0.688099,-0.453923,...,-0.65656,-0.213419,0.089783,0.813107,1.426436,1.67497,-0.45608,-1.502486,-1.498011,-0.500809


In [None]:
# Pytorch 학습 준비 : 텐서 변환 -> DataLoader 구성 -> 손실함수/옵티마이저 설정
X = torch.tensor(padded_sequences, dtype=torch.long)      # 입력 시퀀스(정수 ID)를 LongTensor로 변환
y = torch.tensor(labels, dtype=torch.float).unsqueeze(1)  # 라벨을 float으로 변환 후 (N,) -> (N, 1)로 차원 맞춤

dataset = TensorDataset(X, y)  # (X, y) 쌍을 Dataset 객체로 묶음
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)  # 배치 단위로 섞어서 공급하는 로더

criterion = nn.BCEWithLogitsLoss()  # 출력 logit과 정답(0/1)로 이진분류 손실 계산 (시그모이드 포함)
optimizer = optim.Adam(model.parameters(), lr=0.005)  # 모델 파라미터는 Adam으로 업데이트

BCEWithLogitsLoss을 사용할 때에는 모델 출력이 Sigmoid를 거치지 않은 logit이어야 한다.

In [None]:
# 학습 루프 : 미니배치 단위로 20 epoch 학습하며 평균 손실 출력
for epoch in range(20):
    epoch_loss = 0    # 손실 누적

    for x_batch, y_batch in dataloader:    # 미니배치 단위로 (X, y) 가져오기
        optimizer.zero_grad()              # 이전 배치 기울기 초기화
        output = model(x_batch)            # 순전파로 logit 계산
        loss = criterion(output, y_batch)  # 예측 logit과 정답으로 손실 계산
        loss.backward()                    # 역전파로 기울기 계산
        optimizer.step()                   # 파라미터 업데이트

        epoch_loss += loss.item()  # 배치손실을 float으로 누적
    
    print(f"Epoch {epoch + 1}: Loss {epoch_loss / len(dataloader)}")  # epoch별 평균 손실 계산

Epoch 1: Loss 0.7636233568191528
Epoch 2: Loss 0.6141544878482819
Epoch 3: Loss 0.5330058261752129
Epoch 4: Loss 0.4667740762233734
Epoch 5: Loss 0.38759031891822815
Epoch 6: Loss 0.31159811466932297
Epoch 7: Loss 0.2405405081808567
Epoch 8: Loss 0.17771075293421745
Epoch 9: Loss 0.13561180606484413
Epoch 10: Loss 0.11030002310872078
Epoch 11: Loss 0.08654980733990669
Epoch 12: Loss 0.06717688962817192
Epoch 13: Loss 0.054683173075318336
Epoch 14: Loss 0.04753232840448618
Epoch 15: Loss 0.04142040014266968
Epoch 16: Loss 0.033912552054971457
Epoch 17: Loss 0.02996383048593998
Epoch 18: Loss 0.026607134845107794
Epoch 19: Loss 0.024000450037419796
Epoch 20: Loss 0.022583614569157362


In [None]:
# 평가 / 예측 : 학습된 모델로 확률 -> 0/1 예측값 생성 후 정답과 비교
model.eval()                        # 평가 모드
with torch.no_grad():               # 기울기 계산 비활성화
    output = model(X)               # 전체 샘플에 대한 예측 logit 계산
    prob = torch.sigmoid(output)    # logit에 0~1 확률로 변환
    pred = (prob >= 0.5).int()      # 임계값 0.5 기준으로 이진 분류(0/1) 예측값 생성

print(labels)
print(pred.squeeze().detach().numpy())  # 예측라벨을 1차원 numpy 배열로 변환

[1, 0, 0, 1, 1, 0, 1]
[1 0 0 1 1 0 1]


## 사전학습된 임베딩을 사용하는 경우

In [22]:
from gensim.models import KeyedVectors

model_wv = KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin.gz', binary=True)
model_wv.vectors.shape

(3000000, 300)

In [None]:
# 임베딩 매트릭스 초기화 : 사전학습 벡터로 Embedding 레이어를 채우기 위한 준비
print(len(word_to_index))  # 어휘 크기 (vocab_size) 확인

# (vocab_size, embedding_dim) 크기의 0 행렬 생성
embedding_matrix = np.zeros((len(word_to_index), model_wv.vectors.shape[1]))
print(embedding_matrix.shape)

17
(17, 300)


In [32]:
# 사전학습 임베딩 매핑 : 내 단어사전을 GoogleNews 벡터로 채워 embedding_matrix 구성
# model_wv.key_to_index['bad']  # 'bad'의 내부 인덱스 확인 (706)
# model_wv.vectors[240]         # 특정 인덱스 벡터 직접 조회

# 단어가 사전학습 모델에 있으면 임베딩 벡터(np.ndarray)를 반환, 없으면 None 반환
def get_word_embedding(word):
    if word in model_wv:          # 사전학습 단어가 존재하면
        return model_wv[word]     # 해당 단어 임베딩 벡터 반환
    else:
        return None
    
# get_word_embedding('bad')

for word, index in word_to_index.items():  # 내 단어사전(단어-> 인덱스)를 순회
    if index >= 2:                         # 특수토큰 제외
        emb = get_word_embedding(word)     # 사전학습 임베딩에서 해당 단어 벡터 조회
        if emb is not None:                # 벡터가 존재하면
            embedding_matrix[index] = emb  # 내 인덱스 위치에 사전학습 벡터를 복사해서 채운다.

In [33]:
pd.DataFrame(embedding_matrix, index=word_to_index.keys())

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,290,291,292,293,294,295,296,297,298,299
<PAD>,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
<UNK>,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
nice,0.158203,0.105957,-0.189453,0.386719,0.083496,-0.267578,0.083496,0.113281,-0.104004,0.178711,...,-0.085449,0.189453,-0.146484,0.134766,-0.040771,0.032715,0.089355,-0.267578,0.008362,-0.213867
great,0.071777,0.208008,-0.028442,0.178711,0.132812,-0.099609,0.096191,-0.116699,-0.008545,0.148438,...,-0.011475,0.064453,-0.289062,-0.048096,-0.199219,-0.071289,0.064453,-0.167969,-0.020874,-0.142578
best,-0.126953,0.021973,0.287109,0.15332,0.12793,0.032715,-0.115723,-0.029541,0.15332,0.011292,...,0.006439,-0.033936,-0.166016,-0.016846,-0.048584,-0.022827,-0.152344,-0.101562,-0.090332,0.088379
amazing,0.07373,0.004059,-0.135742,0.022095,0.180664,-0.046631,0.224609,-0.229492,-0.040039,0.225586,...,0.018433,-0.02124,-0.25,-0.020142,-0.310547,-0.207031,-0.006317,-0.141602,-0.150391,-0.137695
stop,-0.057861,0.013184,0.115234,0.069824,-0.306641,-0.044678,0.048584,0.152344,0.073242,-0.100098,...,0.100098,0.171875,-0.113281,0.064453,-0.115723,0.048096,-0.004822,0.086426,0.029907,0.007812
lies,0.149414,-0.012817,0.328125,0.025513,0.017334,0.19043,0.188477,-0.143555,-0.09082,0.206055,...,-0.308594,0.183594,-0.202148,0.031494,-0.164062,-0.201172,0.080078,-0.105469,0.149414,0.157227
pitiful,0.269531,0.253906,-0.020996,0.060303,-0.010925,0.217773,0.139648,-0.057617,0.3125,0.253906,...,-0.063477,0.132812,-0.094238,0.089355,-0.06543,-0.016235,-0.10791,-0.072266,-0.094238,0.028809
nerd,0.265625,-0.207031,-0.026611,0.419922,-0.208984,0.390625,0.164062,0.063965,0.149414,-0.0177,...,0.21582,0.125,-0.227539,-0.310547,-0.112793,-0.09668,0.255859,0.124023,-0.030273,0.082031


In [34]:
# Pytorch 텍스트 분류 모델 : Embedding + RNN + Linear로 이진 분류(logit) 출력
import torch
import torch.nn as nn           # 신경망 레이어
import torch.optim as optim     # 옵티마이저(활성화함수)
from torch.utils.data import DataLoader, TensorDataset  # 배치 로더 / 데이터셋 유틸

class SimpleNet(nn.Module):
    # 정수 시퀀스를 임베딩 -> RNN -> 선형층으로 처리해 이진 분류 logit(1개)를 출력
    def __init__(self, vocab_size, embedding_dim, hidden_size):
        super().__init__()                    # nn.Module 초기화
        self.embedding = nn.Embedding(        # 단어 ID를 밀집 벡터로 변환하는 임베딩 층
            num_embeddings = vocab_size,      # 단어 사전 크기 (어휘 수)
            embedding_dim = embedding_dim,    # 임베딩 차원
            padding_idx = 0                   # PAD(0) 인덱스는 0 그대로 사용
        )

        # 사전학습된 임베딩 벡터로 초기화 : Embedding 가중치를 사전학습 행렬로 덮어쓰기
        self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float))

        self.rnn = nn.RNN(embedding_dim, hidden_size, batch_first=True)    # 입력(배치, 길이, 차원) 형태의 RNN
        self.out = nn.Linear(hidden_size, 1)     # 마지막 은닉 상태를 1차원 logit으로 변환

    def forward(self, x):
        embedded = self.embedding(x)     # (batch, seq_len) -> (batch, seq_len, embedding_dim)
        out, h_n = self.rnn(embedded)    # h_n: (num_layers*directions, batch, hidden_size)
        out = self.out(h_n.squeeze(0))   # (batch_size, hidden_size) -> (batch, 1)
        return out  # 출력 : 시그모이드 전 logit(확률이 아님)
    
embedding_dim = model_wv.vectors.shape[1]  # 사전학습 임베딩 차원 (300)으로 임베딩 차원 설정
model = SimpleNet(vocab_size, embedding_dim, hidden_size=16)  # 어휘 크기 / 임베딩 차원/ 은닉크기로 모델 생성
print(model)

criterion = nn.BCEWithLogitsLoss()  # 출력 logit과 정답(0/1)로 이진분류 손실 계산 (시그모이드 포함)
optimizer = optim.Adam(model.parameters(), lr=0.005)  # 모델 파라미터는 Adam으로 업데이트

SimpleNet(
  (embedding): Embedding(17, 300, padding_idx=0)
  (rnn): RNN(300, 16, batch_first=True)
  (out): Linear(in_features=16, out_features=1, bias=True)
)


In [35]:
# 학습 루프 : 미니배치 단위로 20 epoch 학습하며 평균 손실 출력
for epoch in range(20):
    epoch_loss = 0    # 손실 누적

    for x_batch, y_batch in dataloader:    # 미니배치 단위로 (X, y) 가져오기
        optimizer.zero_grad()              # 이전 배치 기울기 초기화
        output = model(x_batch)            # 순전파로 logit 계산
        loss = criterion(output, y_batch)  # 예측 logit과 정답으로 손실 계산
        loss.backward()                    # 역전파로 기울기 계산
        optimizer.step()                   # 파라미터 업데이트

        epoch_loss += loss.item()  # 배치손실을 float으로 누적
    
    print(f"Epoch {epoch + 1}: Loss {epoch_loss / len(dataloader)}")  # epoch별 평균 손실 계산

Epoch 1: Loss 0.6975708156824112
Epoch 2: Loss 0.5997142046689987
Epoch 3: Loss 0.4891001582145691
Epoch 4: Loss 0.38117462396621704
Epoch 5: Loss 0.2806808315217495
Epoch 6: Loss 0.20620886608958244
Epoch 7: Loss 0.15090016275644302
Epoch 8: Loss 0.1125807985663414
Epoch 9: Loss 0.0887848399579525
Epoch 10: Loss 0.06743127293884754
Epoch 11: Loss 0.053572106175124645
Epoch 12: Loss 0.04420152306556702
Epoch 13: Loss 0.03629125561565161
Epoch 14: Loss 0.03122431505471468
Epoch 15: Loss 0.026289566420018673
Epoch 16: Loss 0.02287012478336692
Epoch 17: Loss 0.020185789559036493
Epoch 18: Loss 0.018032597843557596
Epoch 19: Loss 0.016256834845989943
Epoch 20: Loss 0.014796118019148707


In [36]:
# 평가 / 예측 : 학습된 모델로 확률 -> 0/1 예측값 생성 후 정답과 비교
model.eval()                        # 평가 모드
with torch.no_grad():               # 기울기 계산 비활성화
    output = model(X)               # 전체 샘플에 대한 예측 logit 계산
    prob = torch.sigmoid(output)    # logit에 0~1 확률로 변환
    pred = (prob >= 0.5).int()      # 임계값 0.5 기준으로 이진 분류(0/1) 예측값 생성

print(labels)
print(pred.squeeze().detach().numpy())  # 예측라벨을 1차원 numpy 배열로 변환

[1, 0, 0, 1, 1, 0, 1]
[1 0 0 1 1 0 1]


사전학습 임베딩을 사용했을 때에도 학습 데이터 분류가 잘 되는지 파악한다.  
만약 틀린 샘플이 있다면 해당 문장이 OOV(0벡터) 비중이 큰지 확인해봐야 한다.