<a href="https://colab.research.google.com/github/junieberry/DL-fromScratch2/blob/main/improvedCBOW.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
cd /content/drive/MyDrive/밑시딥/deep-learning-from-scratch-2/ch04

/content/drive/MyDrive/밑시딥/deep-learning-from-scratch-2/ch04


In [None]:
import sys
sys.path.append('..')
import numpy as np
from negative_sampling_layer import UnigramSampler

## Embedding

In [None]:
class Embedding:
  def __init__(self, W):
    self.params = [W]
    self.grads = [np.zeros_like(W)]
    self.idx = None
  
  def forward(self, idx):
    W, = self.params ## 튜플
    self.idx = idx
    out = W[idx]
    return out
  

  ## 
  def backward(self, dout):
    dW, = self.grads
    dw[...] = 0

    for i, word_id in enumerate(self.idx):
      dW[word_id] += dout[i]


In [None]:
class EmbeddingDot:
  def __init__(self, W):
    self.embed = Embedding(W)
    self.params = self.embed.params
    self.grads = self.embed.grads
    self.cache = None ## forward 시 계산 결과 유지
  
  ## h는 은닉층 뉴런
  def forward(self, h, idx):
    target_W = self.embed.forward(idx)
    out = np.sum(target+W * h, axis=1)

    self.cache = (h, target_W)
    return out
  
  def backward(self, dout):
    h, target_W = self.cache
    dout = dout.reshape(dout.shape[0], 1)

    dtarget_W = dout * h
    self.embed.backward(dtarget_W)
    dh = dout * target_W
    return dh


In [None]:
W = np.arange(21).reshape(7,3)
params=[W]
w, = params
print(w)
w=params
print(w)


a=np.array([[0,1,2],[9,10,11],[3,4,5]])

print(np.sum(a))
print(np.sum(a, axis=0))
print(np.sum(a, axis=1))

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]
 [12 13 14]
 [15 16 17]
 [18 19 20]]
[array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14],
       [15, 16, 17],
       [18, 19, 20]])]
45
[12 15 18]
[ 3 30 12]


## Negative sampling

In [None]:
from common.layers import SigmoidWithLoss

class NegativeSamplingLoss:
  def __init__(self, W, corpus, power=0.75, sample_size=5):
    self.sample_size = sample_size
    self.sampler = UnigramSampler(corpus, power, sample_size)

    ## 0번째 계층은 positive sample을 위한 계층
    self.loss_layers = [SigmoidWithLoss() for _ in range(sample_size +1)]
    self.embed_dot_layers = [EmbeddingDot(W) for _ in range(sample_size + 1)]

    self.params, self.grads = [], []
    for layer in self.embed_dot_layers:
      self.params += layer.params
      self.grads += layer.grads
  
  def forward(self, h, target):
    batch_size = target.shape[0]
    negative_sample = self.sampler.get_negative_sample(target)

    ## forward positive sample
    score = self.embed_dot_layers[0].forward(h, target)
    correct_label = np.ones(batch_size, dtype=np.int32)
    loss = self.loss_layers[0].forward(score, correct_label)

    ## forward negative sample
    negative_label = np.zeros(batch_size, dtype=np.int32)
    for i in range(self.sample_size):
      negative_target = negative_sample[:,i]
      score = self.embed_dot_layers[1+i].forward(h, negative_target)
      loss += self.loss_layers[1+i].forward(score, negative_label)
    return loss
  
  def backward(self, dout=1):
    dh = 0
    for l0, l1 in zip(self.loss_layers, self.embed_dot_layers):
      dscore = l0.backward(dout)
      dh += l1.backward(dscore)
    
    return dh

In [None]:

## UnigramSampler 예시

corpus = np.array([0,1,2,3,4,1,2,3])
power=0.75
sample_size=2

## power = 확률분포에 제곱할 값 (기본값=0.75)
## sample_size = 네거티브 샘플링할 수
sampler=UnigramSampler(corpus, power, sample_size)
## target = 긍정적 얘시
target =np.array([1,3,0])
negative_sample = sampler.get_negative_sample(target)
print(negative_sample)

[[2 4]
 [2 0]
 [3 1]]


In [None]:


## 0-9 중 하나 무작위로 샘플링
print(np. random.choice(10))

## words에서 하나 무작위로 샘플링
word=['a','b','c','d','e','f']
print(np.random.choice(word))

## word에서 2개 무작위로 샘플링 (중복 o)
print(np.random.choice(word, size=2))

## word에서 2개 무작위로 샘플링 (중복 x)
print(np.random.choice(word, size=2, replace=False))

## 확률 분포에 따라 샘플링
p = [0.5, 0.1, 0.05, 0.2, 0.05, 0.1]
print(np.random.choice(word,p=p))


7
b
['d' 'd']
['c' 'a']
e


## CBOW

In [None]:
from common.layers import MatMul, SoftmaxWithLoss

class CBOW:
  def __init__(self, vocab_size, hidden_size, window_size, corpus):
    V,H = vocab_size, hidden_size

    ## 가중치를 32비트 부동소수점 수로 초기화
    W_in=0.01*np.random.randn(V, H).astype('f')
    W_out=0.01*np.random.randn(H, V).astype('f')

    ## 계층 생성
    ## 입력 측 맥락은 윈도우 크기만큼 생성
    self.in_layers = []
    for i in range(2*window_size):
      layer = Embedding(W_in)
      self.in_layers.append(layer)
    self.ns_loss = NegativeSamplingLoss(W_out, corpus, power=0.75, sample_size=5)

    ## 가중치와 기울기
    layers = self.in_layers + [self.ns_loss]
    self.params, self.grads = [], []
    for layer in layers:
      self.params += layer.params
      self.grads += layer.grads
    
    
    ## 단어의 분산 표현 저장
    self.word_vecs = W_in

  def forward(self, contexts, target):
    h = 0
    for i, layer in enumerate(self.in_layers):
      h += layer.forward(contexts[:,i])
    h *= 1/len(self.in_layers)
    loss = self.ns_loss.forward(h, target)
    return loss

  def backward(self, dout=1):
    dout = self.ns_loss.backward(dout)
    dout *= 1/len(self.in_layers)
    for layer in self.in_layers:
      layer.backward(dout)
    return None

In [None]:
import sys
sys.path.append('..')
from common import config

# config.GPU = True

import pickle
from common.trainer import Trainer
from common.optimizer import Adam
from common.util import create_contexts_target, to_cpu, to_gpu
from dataset import ptb

## Hyperparameter
window_size = 5
hidden_size = 1000
batch_size = 100
max_epoch = 10

## Data preprocessing
corpus, word_to_id, id_to_word = ptb.load_data('train')
vocab_size = len(word_to_id)

contexts, target = create_contexts_target(corpus, window_size)
# if config.GPU:
#   contexts, target = to_gpu(contexts), to_gpu(target)

##
model = CBOW(vocab_size, hidden_size, window_size, corpus)
optimizer = Adam()
trainer = Trainer(model, optimizer)

## Training
trainer.fit(contexts, target, max_epoch, batch_size)
trainer.plot()

word_vecs = model.word_vecs
if config.GPU:
  word_vecs = to_cpu(word_vecs)
params={}
params['word_vecs'] = word_vecs.astype(np.float16)
params['word_to_id'] = word_to_id
params['id_to_word'] = id_to_word
pkl_file='CBOW_params.pkl'
with open(pkl_file, 'wb') as f:
  pickle.dump(params, f,-1)