In [5]:
import numpy as np

W = np.arange(21).reshape(7,3)
W

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14],
       [15, 16, 17],
       [18, 19, 20]])

In [6]:
W[:,1]

array([ 1,  4,  7, 10, 13, 16, 19])

In [8]:
W[:,2]

array([ 2,  5,  8, 11, 14, 17, 20])

In [12]:
W[1,:]

array([3, 4, 5])

In [13]:
W[1]

array([3, 4, 5])

In [6]:
W[5]

array([15, 16, 17])

In [8]:
idx = np.array([1,3,6,1])
W[idx]

array([[ 3,  4,  5],
       [ 9, 10, 11],
       [18, 19, 20],
       [ 3,  4,  5]])

In [10]:
# embedding 계층 구현

class Embedding:
    def __init__(self, W):
        self.params = [W]
        self.grads = [np.zeros_like(W)]
        self.idx = None
    
    def forward(self, idx):
        W, = self.params
        self.idx = idx
        out = W[self.idx]
        return out
    
    def backward(self, dout):
        dW, = self.grad
        dW[...] = 0
        # dW[self.idx] = dout # 실은 나쁜 예
        
        for i, word_id in enumerate(self.idx):
            # dW[word_id] += dout[i]
            # 혹은
            np.add.at(dW, self.idx, dout) # dout을 dW의 self.idx번째 행에 더해준다
        return None

In [1]:
# Embedding 계층과 Dot 계층 합치기 

class EmbeddingDot:
    def __init__(self, W):
        self.embed = Embedding(W)
        self.params = self.embed.params
        self.grads = self.embed.grads
        self.cache = None
        
    def forward(self, h, idx):
        target_W = self.embed.forward(idx)
        out = np.sum(h * target_W, axis = 1) # 각 원소별 곱 -> 합 : 내적
        
        self.cache = (h, target_W)
        return out
    
    def backward(self, dout):
        dout = dout.reshape(dout.shape[0], 1)
        
        dtarget_W = h * dout
        self.embed.backward(dtarget_W)
        dh = target_W * dout
        return dh


In [4]:
import numpy as np
np.random.choice(7)

1

In [5]:
np.random.choice(10)

3

In [6]:
# words에서 하나만 무작위로 샘플링
words = ['you', 'say', 'goodbye', 'I', 'hello', '.']
np.random.choice(words)

'say'

In [7]:
# 5개만 무작위로 샘플링 (중복 있음)
np.random.choice(words, size=5)

array(['I', 'I', 'I', '.', 'you'], dtype='<U7')

In [8]:
# 5개만 무작위로 샘플링 (중복 없음)
np.random.choice(words, size=5, replace=False)

array(['you', 'goodbye', '.', 'say', 'I'], dtype='<U7')

In [9]:
# 확률분포에 따라 샘플링
p = [0.5, 0.1, 0.05, 0.2, 0.05, 0.1]
np.random.choice(words, p=p)

'you'

In [10]:
p = [0.7, 0.29, 0.01]
new_p = np.power(p, 0.75)
new_p /= np.sum(new_p)
print(new_p)

[0.64196878 0.33150408 0.02652714]


In [14]:
import sys
sys.path.append('..')
from common.np import *  # import numpy as np
from common.layers import Embedding, SigmoidWithLoss
import collections


class UnigramSampler:
    def __init__(self, corpus, power, sample_size):
        self.sample_size = sample_size
        self.vocab_size = None
        self.word_p = None

        counts = collections.Counter()
        for word_id in corpus:
            counts[word_id] += 1

        vocab_size = len(counts)
        self.vocab_size = vocab_size

        self.word_p = np.zeros(vocab_size)
        for i in range(vocab_size):
            self.word_p[i] = counts[i]

        self.word_p = np.power(self.word_p, power)
        self.word_p /= np.sum(self.word_p)

    def get_negative_sample(self, target):
        batch_size = target.shape[0]

        if not GPU:
            negative_sample = np.zeros((batch_size, self.sample_size), dtype=np.int32)

            for i in range(batch_size):
                p = self.word_p.copy()
                target_idx = target[i]
                p[target_idx] = 0
                p /= p.sum()
                negative_sample[i, :] = np.random.choice(self.vocab_size, size=self.sample_size, replace=False, p=p)
        else:
            # GPU(cupy）로 계산할 때는 속도를 우선한다.
            # 부정적 예에 타깃이 포함될 수 있다.
            negative_sample = np.random.choice(self.vocab_size, size=(batch_size, self.sample_size),
                                               replace=True, p=self.word_p)

        return negative_sample


In [15]:
corpus = np.array([0, 1, 2, 3, 4, 1, 2, 3])
power = 0.75
sample_size = 2

sampler = UnigramSampler(corpus, power, sample_size)
target = np.array([1, 3, 0])
negative_sample = sampler.get_negative_sample(target)
print(negative_sample)

[[0 4]
 [0 4]
 [1 2]]


In [16]:
class NegativeSamplingLoss:
    def __init__(self, W, corpus, power=0.75, sample_size=5):
        self.sample_size = sample_size
        self.sampler = UnigramSampler(corpus, power, sample_size)
        self.loss_layers = [SigmoidWithLoss() for _ in range(sample_size+1)] # 긍정적 예 +1
        self.embed_dot_layers = [EmbeddingDot(W) for _ in range(sample_size+1)]
        
        self.parmas, self.grads = [], []
        for layer in self.embed_dot_layers:
            self.params += layer.params
            self.grads += layer.grads
            
    def forward(self, h, target):
        batch_size = target.shape[0]
        negative_sample = self.sampler.get_negative_sample(target)
        
        # 긍정적인 예 순전파
        score = self.embed_dot_layers[0].forward(h, target)
        correct_label = np.ones(batch_size, dtype=int32)
        loss = self.loss_layers[0].forward(score, correct_label)
        
        # 부정적인 예 순전파
        negative_label = np.zeros(batch_size, dtype=int32)
        for i in range(self.sample_size):
            negative_target = negative_sample[:,i]
            score = self.embed_dot_layers[i+1].forward(h, negative_target)
            loss += self.loss_layers[i+1].forward(score, negative_label)
        return loss
    
    def backward(self, dout=1):
        dh = 0
        for l0, l1 in zip(self.loss_layers, self.embed_dot_layers):
            dscore = l0.backward(dout)
            dh += l1.backwar(dscore)
            
        return dh

In [1]:
# 두 개의 리스트
names = ["Min-Sik", "John", "Alice"]
scores = [85, 90, 88]

# zip을 사용하여 두 리스트를 동시에 순회
for name, score in zip(names, scores):
    print(f"{name}의 점수는 {score}점입니다.")


Min-Sik의 점수는 85점입니다.
John의 점수는 90점입니다.
Alice의 점수는 88점입니다.


In [9]:
# CBOW 구현
import sys
sys.path.append('..')
import numpy as np
from common.layers import Embedding
from ch04.negative_sampling_layer import NegativeSamplingLoss

class CBOW:
    def __init__(self, vocab_size, hidden_size, window_size, corpus):
        V, H = vocab_size, hidden_size
        
        # 가중치 초기화
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(V, H).astype('f')
        
        # 계층 생성
        self.in_layers = []
        for i in range(2*window_size):
            layer = Embedding(W_in)
            self.in_layers.append(layer)
        self.ns_loss = NegativeSamplingLoss(W_out, corpus, power=0.75, sample_size=5)
        
        # 모든 가중치와 기울기를 배열에 모은다.
        layers = self.in_layers + [self.ns_loss]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads
            
        # 인스턴스 변수에 단어의 분산 표현을 저장한다.
        self.word_vecs = W_in
    
    def forward(self, contexts, target):
        h = 0
        for i, layer in enumerate(self.in_layers):
            h += layer.forward(contexts[:, i])
        h *= 1 / len(self.in_layers)
        loss = self.ns_loss.forward(h, target)
        
        return loss
    
    def backward(self, dout=1):
        dout = self.ns_loss.backward(dout)
        dout *= 1 / len(self.in_layers)
        for layer in layers:
            layer.backward(dout)
        return None

In [None]:
# CBOW 모델 학습 코드 
import sys
sys.path.append('..')
import numpy as np
from common import config
# GPU 실행, 쿠파이 필요
# ===============
# config.GPU = True
# ===============
import pickle
from common.trainer import Trainer
from common.optimizer import Adam
from cbow import CBOW
from common.util import create_contexts_traget, to_cpu, to_gpu
from dataset import ptb

# 하이퍼파라미터 설정
window_size = 5
hidden_size = 100
batch_size = 100
max_epoch = 10

# 데이터 읽기
corpus, word_to_id, id_to_word = ptb.load_data('train')
vocab_size = len(word_to_id)

contexts, target = create_contexts_traget(corpus, window_size)
if config.GPU:
    contexts, target = to_gpu(contexts), to_gpu(target)

# 모델 생성
model = CBOW(vocab_size, hidden_size, window_size, corpus)
optimizer = Adam()
trainer = Trainer(model, optimizer)

# 학습 시작
trainer.fit(contexts, target, max_epoch, batch_size)
trainer.plot()

# 나중에 사용할 수 있도록 필요한 데이터 저장
word_vecs = model.word_vecs
if config.GPU:
    word_vecs = to_cpu(word_vecs)
