In [1]:
import torch
from torch import nn

In [2]:
# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

Using cuda device


In [3]:
CONTEXT_SIZE = 2  # 2 words to the left, 2 to the right (window size)
EMDEDDING_DIM = 100  # word vector에 dimension
raw_text = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells.""".split() 
# split은 공백을 기준으로 토큰화 시키는 것
print(raw_text)
print(len(raw_text))

['We', 'are', 'about', 'to', 'study', 'the', 'idea', 'of', 'a', 'computational', 'process.', 'Computational', 'processes', 'are', 'abstract', 'beings', 'that', 'inhabit', 'computers.', 'As', 'they', 'evolve,', 'processes', 'manipulate', 'other', 'abstract', 'things', 'called', 'data.', 'The', 'evolution', 'of', 'a', 'process', 'is', 'directed', 'by', 'a', 'pattern', 'of', 'rules', 'called', 'a', 'program.', 'People', 'create', 'programs', 'to', 'direct', 'processes.', 'In', 'effect,', 'we', 'conjure', 'the', 'spirits', 'of', 'the', 'computer', 'with', 'our', 'spells.']
62


In [4]:
# By deriving a set from `raw_text`, we deduplicate the array
vocab = set(raw_text) # set은 중복 없애줌
print(vocab)
vocab_size = len(vocab)
print(vocab_size)

#dictionary 구조이고, key를 word로 value값를 idx로
word_to_idx = {word:idx for idx, word in enumerate(vocab)}
print('word_to_idx:',word_to_idx)
# word 를 index해주는거 
# word_to_idx = {}
# for idx, word in enumerate(vocab):
#   word_to_idx[word] = idx # key 값을 [word] 그에 해당하는 값을 idx

idx_to_word = {idx:word for idx, word in enumerate(vocab)} 
print('idx_to_word:',idx_to_word)

data = []
for i in range(2, len(raw_text) - 2):
    context = [raw_text[i - 2], raw_text[i - 1],
               raw_text[i + 1], raw_text[i + 2]]
    target = raw_text[i]
    data.append((context, target))
  
# 앞뒤에 2개 단어 를 

{'called', 'evolve,', 'they', 'we', 'programs', 'computational', 'program.', 'spells.', 'data.', 'of', 'to', 'create', 'by', 'process.', 'conjure', 'rules', 'The', 'things', 'are', 'effect,', 'study', 'direct', 'about', 'process', 'idea', 'pattern', 'computers.', 'evolution', 'our', 'that', 'directed', 'People', 'abstract', 'We', 'spirits', 'is', 'In', 'the', 'with', 'other', 'a', 'As', 'manipulate', 'processes.', 'computer', 'Computational', 'inhabit', 'processes', 'beings'}
49
word_to_idx: {'called': 0, 'evolve,': 1, 'they': 2, 'we': 3, 'programs': 4, 'computational': 5, 'program.': 6, 'spells.': 7, 'data.': 8, 'of': 9, 'to': 10, 'create': 11, 'by': 12, 'process.': 13, 'conjure': 14, 'rules': 15, 'The': 16, 'things': 17, 'are': 18, 'effect,': 19, 'study': 20, 'direct': 21, 'about': 22, 'process': 23, 'idea': 24, 'pattern': 25, 'computers.': 26, 'evolution': 27, 'our': 28, 'that': 29, 'directed': 30, 'People': 31, 'abstract': 32, 'We': 33, 'spirits': 34, 'is': 35, 'In': 36, 'the': 37,

In [5]:
print(data)

[(['We', 'are', 'to', 'study'], 'about'), (['are', 'about', 'study', 'the'], 'to'), (['about', 'to', 'the', 'idea'], 'study'), (['to', 'study', 'idea', 'of'], 'the'), (['study', 'the', 'of', 'a'], 'idea'), (['the', 'idea', 'a', 'computational'], 'of'), (['idea', 'of', 'computational', 'process.'], 'a'), (['of', 'a', 'process.', 'Computational'], 'computational'), (['a', 'computational', 'Computational', 'processes'], 'process.'), (['computational', 'process.', 'processes', 'are'], 'Computational'), (['process.', 'Computational', 'are', 'abstract'], 'processes'), (['Computational', 'processes', 'abstract', 'beings'], 'are'), (['processes', 'are', 'beings', 'that'], 'abstract'), (['are', 'abstract', 'that', 'inhabit'], 'beings'), (['abstract', 'beings', 'inhabit', 'computers.'], 'that'), (['beings', 'that', 'computers.', 'As'], 'inhabit'), (['that', 'inhabit', 'As', 'they'], 'computers.'), (['inhabit', 'computers.', 'they', 'evolve,'], 'As'), (['computers.', 'As', 'evolve,', 'processes']

In [30]:
# data에 있는 context(ex:['We', 'are', 'to', 'study'])을 index화 해주고 torch.tensor로 바꿔주겠다
def make_context_vector(context, word_to_idx):
    # context에 있는 w를 word_to_idx에서 가져와서 idxs 담겠다.
    idxs = [word_to_idx[w] for w in context]
    return torch.tensor(idxs, dtype=torch.long)
    # torch.long은 보통 인데스 나타낼때 사용하는 data type

    # word_to_idx = {'word':i, ...}
con = ['We', 'are', 'to', 'study']
make_context_vector(con, word_to_idx)


tensor([33, 18, 10, 20])

In [26]:
class CBOW(nn.Module):
    def __init__(self, vocab_size, embedding_dim):
        super(CBOW, self).__init__()
 
        #out: 1 x emdedding_dim
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)

        #out: 1 x vocab_size
        self.linear = nn.Linear(embedding_dim, vocab_size)
        self.activation_function = nn.LogSoftmax(dim = -1)
        # LogSoftmax 는 softmax값에 log 취한것, neumaric stablity때문

    def forward(self, inputs):  #input으로 context가 idx로 변환 tensor가 옴
        individual_embeds = self.embeddings(inputs) # 4*100 (4: 2*CONTEXT_SIZE)
        # torch.Size([4, 100])
        # embddings을 lookup한 값들 총 4개의 1*100 array가 있겠죠
        embeds = (1/4)*(individual_embeds[0] + individual_embeds[1] + individual_embeds[2] + individual_embeds[3]) # 100*1 위에 4개를 평균 낸것 
        embeds = embeds.view(1,-1) # 1*100 형태를 reshape해주는것
        z = self.linear(embeds)
        log_y_hat = self.activation_function(z)
        return log_y_hat

    def get_word_emdedding(self, word):
        word = torch.tensor([word_to_idx[word]],dtype=torch.long)
        return self.embeddings(word).view(1,-1)

In [48]:
model = CBOW(vocab_size, EMDEDDING_DIM).to(device)

loss_function = nn.NLLLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

# TRAINING
for epoch in range(55):
    total_loss = 0

    for context, target in data:
        context_vector = make_context_vector(context, word_to_idx).to(device)  
        y = torch.tensor([word_to_idx[target]]).to(device)
        
        log_y_hat = model(context_vector)

        total_loss += loss_function(log_y_hat, y)

    #optimize at the end of each epoch
    optimizer.zero_grad()
    total_loss.backward()
    optimizer.step()

In [49]:
#TESTING
context = ['We','are','to','study']
context1 = ['are','about','study','the']
context2 = ['Computational','processes','abstract','beings']
context_vector = make_context_vector(context, word_to_idx).to(device)
context_vector1 = make_context_vector(context1, word_to_idx).to(device)
context_vector2 = make_context_vector(context2, word_to_idx).to(device)


new_y_hat1 = model(context_vector1)
new_y_hat = model(context_vector)
new_y_hat2 = model(context_vector2)
#Print result
print(f'Raw text: {" ".join(raw_text)}\n')
print(f'Context: {context}\n')
print(f'Prediction: {idx_to_word[torch.argmax(new_y_hat[0]).item()]}')
print('\n')
print(f'Context: {context1}\n')
print(f'Prediction: {idx_to_word[torch.argmax(new_y_hat1[0]).item()]}')
print('\n')
print(f'Context: {context2}\n')
print(f'Prediction: {idx_to_word[torch.argmax(new_y_hat2[0]).item()]}')


Raw text: We are about to study the idea of a computational process. Computational processes are abstract beings that inhabit computers. As they evolve, processes manipulate other abstract things called data. The evolution of a process is directed by a pattern of rules called a program. People create programs to direct processes. In effect, we conjure the spirits of the computer with our spells.

Context: ['We', 'are', 'to', 'study']

Prediction: about


Context: ['are', 'about', 'study', 'the']

Prediction: to


Context: ['Computational', 'processes', 'abstract', 'beings']

Prediction: are
