# TensorFlow Text Generation x RNN
* TensorFlow Tutorial을 참고하여 Custom RNN 모델을 작성
* TF 1.15 적용
* 셰익스피어 데이터셋을 이용해 텍스트 생성 문제
    https://www.tensorflow.org/tutorials/text/text_generation?hl=ko

In [5]:
import tensorflow as tf
import numpy as np
import os
import time

## 1. 데이터 로드

In [6]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [7]:
# 문자열 디코딩
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
# 텍스트 길이
print ('텍스트의 길이: {}자'.format(len(text)))

텍스트의 길이: 1115394자


In [8]:
# 텍스트의 처음 250자를 살펴봅니다
print(text[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



In [9]:
# 파일의 고유 문자수를 출력합니다.
vocab = sorted(set(text))
print ('고유 문자수 {}개'.format(len(vocab)))

고유 문자수 65개


## 2. 텍스트 처리
    - 텍스트 벡터화 : 문자를 수치화해야 한다.
    - 두개의 lookup table을 만들어 문자=>숫자 맵핑, 숫자=>문자 맵핑

In [10]:
# 고유 문자(vocab)에서 인덱스로 맵핑
# {'a':0, 'b':1, ...}
char2idx = {u: i for i, u in enumerate(vocab)}
# ['a', 'b', ...]
idx2char = np.array(vocab)
# text => int(고유번호) 맵핑
text_as_int = np.array([char2idx[c] for c in text])

In [11]:
# 각 문자에 대한 정수 표현
print('{')
for char, _ in zip(char2idx, range(20)):
    print('  {:4s}: {:3d},'.format(repr(char), char2idx[char]))
print('  ...\n}')

{
  '\n':   0,
  ' ' :   1,
  '!' :   2,
  '$' :   3,
  '&' :   4,
  "'" :   5,
  ',' :   6,
  '-' :   7,
  '.' :   8,
  '3' :   9,
  ':' :  10,
  ';' :  11,
  '?' :  12,
  'A' :  13,
  'B' :  14,
  'C' :  15,
  'D' :  16,
  'E' :  17,
  'F' :  18,
  'G' :  19,
  ...
}


## 3. 훈련 샘플과 타깃 생성 (tf.dataset)

In [12]:
# text를 샘플 시퀀스로 나눈다. seq_lenth 길이로
# 텍스트를 seq_length + 1 개의 청크로 나눈다.
# ex. input: "Hello", seq_length: 4 => 입력 시퀀스 "Hell", 타깃 시퀀스 "ello"

# 먼저 텍스트 벡터를 문자 인덱스의 스트림으로 변환
# seq_length: 입력에 대해 원하는 문장의 최대 길이
seq_length = 100
examples_per_epoch = len(text)//seq_length

# 훈련 샘플 / 타깃 생성
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

# for i in char_dataset.take(4):
#     print(idx2char[i.numpy()])

In [13]:
print(text_as_int.shape)
print(examples_per_epoch)

(1115394,)
11153


In [14]:
iterator = char_dataset.make_initializable_iterator()
el = iterator.get_next()

Instructions for updating:
Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_initializable_iterator(dataset)`.


In [15]:
with tf.Session() as sess:
    sess.run(iterator.initializer)
    for i in range(5):
        print(idx2char[sess.run(el)])

F
i
r
s
t


In [16]:
# batch 생성 (배치 수 : examples_per_epoch)
seq = char_dataset.batch(seq_length+1, drop_remainder=True)

In [17]:
print(seq)

<DatasetV1Adapter shapes: (101,), types: tf.int64>


In [18]:
iterator = seq.make_initializable_iterator()
el_seq = iterator.get_next()
with tf.Session() as sess:
    sess.run(iterator.initializer)
    for i in range(5):
        item = sess.run(el_seq)
        print(repr(''.join(idx2char[item])))

'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'
"now Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us ki"
"ll him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it be d"
'one: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor citi'


In [19]:
# 배치 개수 확인하기
iterator = seq.make_initializable_iterator()
el_seq = iterator.get_next()

n_batches_seq = len(text)//(seq_length+1)
print(n_batches_seq)

with tf.Session() as sess:
    sess.run(iterator.initializer)
    for i in range(n_batches_seq):
        item = sess.run(el_seq)
    print("alive! %d" % i)

11043
alive! 11042


In [20]:
# chunk 생성
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = seq.map(split_input_target)

In [21]:
dataset

<DatasetV1Adapter shapes: ((100,), (100,)), types: (tf.int64, tf.int64)>

In [22]:
iterator = dataset.make_initializable_iterator()
get_next = iterator.get_next()
with tf.Session() as sess:
    sess.run(iterator.initializer)
    input_ex, target_ex = sess.run(get_next)
    print(repr(''.join(idx2char[input_ex])))
    print(repr(''.join(idx2char[target_ex])))

'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


In [23]:
get_next

(<tf.Tensor 'IteratorGetNext_3:0' shape=(100,) dtype=int64>,
 <tf.Tensor 'IteratorGetNext_3:1' shape=(100,) dtype=int64>)

### Time Step
각 char에 매치되는 idx는 하나의 time step  
time step 0의 입력으로 모델은 F의 idx 를 받고 다음 문자로 i의 idx를 예측  
RNN은 현재 입력 문자 F 외에도 이전 time step의 context를 고려

In [24]:
for i, (input_idx, target_idx) in enumerate(zip(input_ex[:5], target_ex[:5])):
    print("{:4d}단계".format(i))
    print("  입력: {} ({:s})".format(input_idx, repr(idx2char[input_idx])))
    print("  예상 출력: {} ({:s})".format(target_idx, repr(idx2char[target_idx])))

   0단계
  입력: 18 ('F')
  예상 출력: 47 ('i')
   1단계
  입력: 47 ('i')
  예상 출력: 56 ('r')
   2단계
  입력: 56 ('r')
  예상 출력: 57 ('s')
   3단계
  입력: 57 ('s')
  예상 출력: 58 ('t')
   4단계
  입력: 58 ('t')
  예상 출력: 1 (' ')


훈련 배치 생성

In [25]:
BATCH_SIZE = 64
# 데이터셋을 섞을 버퍼의 크기 (전체 시퀀스를 메모리에서 섞지 않음)
BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

In [26]:
dataset

<DatasetV1Adapter shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>

In [27]:
# 배치 개수 확인하기
iterator = dataset.make_initializable_iterator()
get_next = iterator.get_next()

n_batches_dataset = n_batches_seq//64
print(n_batches_dataset)

with tf.Session() as sess:
    sess.run(iterator.initializer)
    for i in range(n_batches_dataset):
        item = sess.run(get_next)
    print("alive! %d" % i)

172
alive! 171


## 4. 모델 설계
![img.png](https://tensorflow.org/tutorials/text/images/text_generation_training.png)

In [28]:
# hyperparameters
vocab_size = len(vocab)
embedding_dim = 256
rnn_units = 1024

## A. 케라스를 이용한 모델

### Build Model

In [None]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim,
                                  batch_input_shape=[batch_size, None]),
        tf.keras.layers.LSTM(rnn_units,
                            return_sequences=True,
                            stateful=True,
                            recurrent_initializer='glorot_uniform'),
        tf.keras.layers.Dense(vocab_size)
    ])
    return model

In [None]:
model = build_model(vocab_size=len(vocab),
                  embedding_dim=embedding_dim,
                  rnn_units=rnn_units,
                  batch_size=BATCH_SIZE)

In [None]:
# run model
iterator_tf = dataset.make_initializable_iterator()
get_next_tf = iterator_tf.get_next()

with tf.Session() as sess:
    sess.run(iterator_tf.initializer)
    input_ex, target_ex = sess.run(get_next_tf)
    
    example_batch_predictions = model(input_ex)
    print(example_batch_predictions.shape, "# (배치 크기, 시퀀스 길이, 어휘 사전 크기)")

In [None]:
model.summary()

In [None]:
example_batch_predictions

In [None]:
with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)
    sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
    sampled_indices = tf.squeeze(sampled_indices,axis=-1)
    sampled_indices = sampled_indices.eval()

In [None]:
sampled_indices

In [None]:
print("입력: \n", repr("".join(idx2char[input_ex[0]])))
print()
print("예측된 다음 문자: \n", repr("".join(idx2char[sampled_indices])))

### Optimizer & Loss function
이 모델은 로짓을 반환하기 때문에 from_logits 플래그를 설정해야 합니다.

In [None]:
def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)
    example_batch_loss  = loss(target_ex, example_batch_predictions)
    print("예측 배열 크기(shape): ", example_batch_predictions.shape, " # (배치 크기, 시퀀스 길이, 어휘 사전 크기")
    print("스칼라 손실:          ", example_batch_loss.eval().mean())

In [None]:
model.compile('adam', loss=loss)
# model.compile('rmsprop', 'mse', target_tensors=[iterator.get_next()])

### Training

In [None]:
EPOCHS=10
# 체크포인트가 저장될 디렉토리
# checkpoint_dir = './training_checkpoints'
# # 체크포인트 파일 이름
# checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

# checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
#     filepath=checkpoint_prefix,
#     save_weights_only=True)

history = model.fit(dataset, epochs=EPOCHS)  #, callbacks=[checkpoint_callback])

In [None]:
# state = np.zeros((sample_x.shape[0], HIDDEN_SIZE))
    
# _total_loss, _ = sess.run([total_loss, train],
#                                feed_dict={rnn_input: sample_x,
#                                           rnn_output: sample_y,
#                                           initial_state: state})
# print(f'[Epoch:{epoch+1}] loss: {_total_loss:<7.4}', end='\r')

## B. NumPy를 이용한 모델
- Reference
    - https://victorzhou.com/blog/intro-to-rnns/
    - https://ratsgo.github.io/natural%20language%20processing/2017/03/09/rnnlstm/

### 4.1 Embedding

In [52]:
def char2tensor(char):
    tensor = np.zeros((vocab_size, ))
    tensor[char2idx[char]] = 1
    return tensor

def line2tensor(line):
    tensor = np.zeros((len(line), vocab_size, ))
    for li, letter in enumerate(line):
        tensor[li][char2idx[char]] = 1
    return tensor

def idx2tensor(idx):
    tensor = np.zeros((vocab_size, ))
    tensor[idx] = 1
    return tensor

def idxs2tensor(idxs):
    tensor = np.zeros((len(idxs), vocab_size, ))
    for li, letter in enumerate(idxs):
        tensor[li][letter] = 1
    return tensor

In [53]:
# a = [0, 1, 0, 0, ...]
char2tensor('a')

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [73]:
iterator_dataset = dataset.make_one_shot_iterator()        # 한번에 배치 만들어줌
get_next_dataset = iterator_dataset.get_next()

with tf.Session() as sess:
    input_ex_batch, target_ex_batch = sess.run(get_next_dataset)
            
    # BATCH_SIZE 만큼
    for i, (input_ex, target_ex) in enumerate(zip(input_ex_batch, target_ex_batch)):
        # tensor는 한 문장 (100, 65, 1)
        input_tensor = idxs2tensor(input_ex)
        target_tensor = idxs2tensor(target_ex)

In [55]:
get_next_dataset

(<tf.Tensor 'IteratorGetNext_6:0' shape=(64, 100) dtype=int64>,
 <tf.Tensor 'IteratorGetNext_6:1' shape=(64, 100) dtype=int64>)

In [56]:
input_ex_batch

array([[53, 44,  0, ..., 52, 43, 43],
       [61, 47, 58, ..., 15, 21, 26],
       [47, 57,  1, ..., 40, 43, 39],
       ...,
       [46, 47, 57, ...,  1, 57, 46],
       [ 1, 39, 58, ..., 53,  1, 15],
       [ 1, 46, 39, ..., 52,  2,  1]])

In [57]:
input_tensor = idxs2tensor(input_ex)
print(input_tensor.shape)
# print(input_tensor)

(100, 65)


In [125]:
np.where(input_tensor==1)

(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
        34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
        51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
        68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
        85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]),
 array([50, 47, 52, 45, 40, 56, 53, 49, 43,  6,  0, 13, 52, 42,  1, 57, 58,
        56, 39, 47, 45, 46, 58,  1, 39, 51,  1, 52, 53, 58, 46, 47, 52, 45,
        10,  1, 40, 59, 58,  1, 61, 46, 39, 58, 43,  5, 43, 56,  1, 21,  1,
        40, 43,  6,  0, 26, 53, 56,  1, 21,  1, 52, 53, 56,  1, 39, 52, 63,
         1, 51, 39, 52,  1, 58, 46, 39, 58,  1, 40, 59, 58,  1, 51, 39, 52,
         1, 47, 57,  0, 35, 47, 58, 46,  1, 52, 53, 58, 46, 47, 52]),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 

### 4.2 Simple RNN Cell
- Many-to-many
- Tx = Ty (fixed-length)
![image](https://stanford.edu/~shervine/teaching/cs-230/illustrations/rnn-many-to-many-same-ltr.png?2790431b32050b34b80011afead1f232)

In [267]:
from src.layers import tanh, softmax

class RNNCell:    
    def __init__(self
                , input_dim=None
                , output_dim=None
                , hidden_size=32
                , initial_state=None
                , activation=tanh
                , seed=1
                ):
        '''
        ================ parameters =================
        input  : inputs
        shape  : (sequence_length, embedding_size, )
        =============================================
        output : y 
        shape  : (inputs.shape[0]  # sequence_length
                , output_dim       # embedding_size
                , )
        '''

        # init
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.hidden_size = hidden_size
        
        if output_dim is None:
            self.output_dim=hidden_size
        if initial_state is None:
            self.initial_state = np.zeros((hidden_size, ))
        else:
            self.initial_state = np.reshape(initial_state, (hidden_size, ))

        np.random.seed(seed)
        # weights
        self.Wxh = np.random.normal(size=(hidden_size, input_dim)).round(3)
        self.Whh = np.random.normal(size=(hidden_size, hidden_size)).round(3)
        self.Why = np.random.normal(size=(output_dim,  hidden_size)).round(3)
        # bias
        self.bh = np.zeros((hidden_size, )).round(3)
        self.by = np.zeros((output_dim,  )).round(3)
        self.activation = activation
        
        # back prop
        self.last_inputs = None
        self.last_hs = {0: initial_state}
        
    def forward(self, inputs):
        # tanh를 적용하는 순간 ht가 1로 수렴한다. (30 넘는 값)
        # word embedding이 필요한듯
        # 초기화 문제? yes ... normal(음수 필요), glorot_uniform(?)
    
        ht = self.initial_state # hidden state
        outputs = np.zeros((inputs.shape[0], self.output_dim)) # (100, 65, )
        states = np.zeros((inputs.shape[0], self.hidden_size))
        
        # back prop
        self.last_inputs = inputs
        self.last_hs = {0: ht}
        
        for i, xt in enumerate(inputs): # 모든 글자에 대해 (input: 'hello', xt: 'h')
            # hidden state
            ht = self.activation(self.Wxh @ xt + self.Whh @ ht + self.bh)
            self.last_hs[i+1] = ht
            # predicted output y
            y = (self.Why @ ht) + self.by
            outputs[i] = y
            states[i] = ht
        
        return outputs, states  # 100개의 y, ht
    
    def backprop(self, dy, learning_rate=1e-2):
        # dy : (output_dim x 1)
        time = len(self.last_inputs)
        
        # calculate dL/dWhy & dL/dby
        # (output_dim x hidden_size) = (output_dim x 1) * (1 x hidden_size)
        dWhy = np.expand_dims(dy, 1) @ np.expand_dims(self.last_hs[time], 0)
        dby = dy
        # init
        dWhh = np.zeros_like(self.Whh)
        dWxh = np.zeros_like(self.Wxh)
        dbh = np.zeros_like(self.bh)
        # (hidden_size x ) = (hidden_size x output_dim) x (output_dim x )
        dh = self.Why.T @ dy
        
        for t in reversed(range(time)):
            
            last_h = np.expand_dims(self.last_hs[t], 0)
            last_input = np.expand_dims(self.last_inputs[t], 0)
#             print( last_h.shape )
#             print( last_input.shape )
        
            dh_raw = None
            if self.activation == tanh:
                dh_raw = (1 - self.last_hs[t+1]**2)
            elif self.activation == sigmoid:
                dh_raw = (1 - self.last_hs[t+1]) * self.last_hs[t+1]
            else: 
                return
            # (hs, ) * (hs, )
            dh_raw *= dh
            
            # dh_raw : (hs, )
            dbh += dh_raw
            # (hs x hs) = (hs x 1) x (1 x hs)
            dWhh += np.expand_dims(dh_raw, 1) @ last_h
            # (hs x input_dim) = (hs x 1) x (1 x input_dim)
            dWxh += np.expand_dims(dh_raw, 1) @ last_input
            dh = self.Whh @ dh_raw  # ??
        
        # Update weights and biases using GD
        self.Whh -= learning_rate * dWhh
        self.Wxh -= learning_rate * dWxh
        self.Why -= learning_rate * dWhy
        self.bh -= learning_rate * dbh
        self.by -= learning_rate * dby

In [268]:
# 문장 하나에 대한 테스트 (100, 65)
output_dim = 65
hidden_size = 128
hidden_state = np.zeros((hidden_size, ))

rnn = RNNCell(input_dim=vocab_size  # feature 수
           , output_dim=output_dim
           , hidden_size=hidden_size
           , initial_state=hidden_state)

output, hidden_state = rnn.forward(input_tensor)

print("======== output ========")
print("shape: %s" % str(output.shape))
print(np.argmax(output, 1))
# print([np.argmax(item, axis=0) for item in output])

shape: (100, 65)
[48 10 60 20 61 58 43 10 38 54 60 13 11 43  6 47 31 58 13 57 61 48 38 58
 54 62  9 54 11 32 32 27 20  3 48 51 50  2 11 60 11 51 29 57 58 21 36 49
 33 36  2  8  5 37 60 60 59 36 57 57 18 50 50 32  7 43 62 59 45 63 27 36
 60 47 26 49 55 59 63 55 36 54 31 37 30  9 28 27 33 17  1 58 46 21 21 18
 31 18 11  6]


In [269]:
# BACKPROP
print("======== backprop ========")
# calculate dL/dy : Loss에 대한 y의 gradient
dLdy = sum(output - target_tensor)
# dLdy[target_tensor] -= 1  # ??
rnn.backprop(dLdy)
print("done....")

done....


### 4.3 Training Model

In [270]:
def cross_entropy(y_hat, y):
    return - np.log([item[y[i]] for (i, item) in enumerate(y_hat)])

In [278]:
from src.layers import softmax

# hyperparameters
input_dim = vocab_size
output_dim = vocab_size  # embedding_size = 65
hidden_size = vocab_size
epochs = 1
n_batches = n_batches_dataset
backprop = False
# init
hidden_state = np.zeros((hidden_size, ))
max_output = np.zeros((BATCH_SIZE, seq_length, ), int)
losses = list()
# dataset
iterator_dataset = dataset.make_initializable_iterator() # get_next로 하나씩 가져옴
# iterator_dataset = dataset.make_one_shot_iterator()        # 한번에 배치 만들어줌
get_next_dataset = iterator_dataset.get_next()

with tf.Session() as sess:
    
    for e in range(epochs):
        
        sess.run(iterator_dataset.initializer)
        num_correct = 0
        loss = 0.
        
        rnn = RNNCell(input_dim=input_dim
               , output_dim=output_dim
               , hidden_size=hidden_size
               , initial_state=hidden_state)
            
        for n in range(n_batches):
            
            # load dataset batch
            # input_ex_batch: (n_batches, batch_size, seq_length)
            input_ex_batch, target_ex_batch = sess.run(get_next_dataset)
            
            
            for i, (input_ex, target_ex) in enumerate(zip(input_ex_batch, target_ex_batch)): 
                
                # tensor는 한 문장 (100, 65, 1)
                input_tensor = idxs2tensor(input_ex)
                
                # FORWARD
                outputs, states = rnn.forward(input_tensor)
                outputs = softmax(outputs)
                # (100, 65, 1) 중에 가장 가능성 높은 것만 선택 -> (100, )
                # argmax default(flatten), axis 0(행), axis 1(열)
                max_output[i] = np.argmax(outputs, 1)

                # loss & accuracy -- working
                # MSE
                # E(y_true, y_pred) = 1/n * sum(y_pred - y_pred)
#                 loss += 1/seq_length * [item[target_ex[i]] - 1 for (i, item) in enumerate(outputs)]
                loss += sum(cross_entropy(outputs, target_ex)) / seq_length
                num_correct += np.sum(max_output[i] == target_ex)

                # BACKPROP
                if backprop:
                    # calculate dL/dy : Loss에 대한 y의 gradient
                    dLdy = (outputs - target_tensor)[-1]
                    rnn.backprop(dLdy)

        print("EPOCH %d" % e)
        print("loss %.3f" % loss)
        print("accuracy %.3f" % (num_correct / (n_batches * BATCH_SIZE * seq_length)))
        print()

EPOCH 0
loss 312300.408
accuracy 0.016



In [279]:
# np.log([out[target_ex[i]] for (i, out) in enumerate(outputs)])

In [280]:
outputs.shape

(100, 65)

In [284]:
ans = ""
for item in max_output[0]:
    ans += idx2char[item]
ans

"i3zF!GnfGi!L&GnykMUoy!zra$ecNA!duSPASDTZSb$$T$\nqazaGduhR jvpSrO.cpd3jrdq;bSRRO,IW-NSo!RVR$RNN.OfIlG'"

번외 테스트
- array sequence
- short string ("hello" => "ello!")

In [285]:
from sklearn.preprocessing import MinMaxScaler

# scaling
scaler = MinMaxScaler()

# 5x4 data
input_test = np.array([ 
           [1,2,3,4]
         , [2,3,4,5]
         , [3,4,5,9]
         , [4,5,9,3]
         , [5,9,3,1] ])

target_test = np.array([
           [2,3,4,5]
         , [3,4,5,9]
         , [4,5,9,3]
         , [5,9,3,1]
         , [9,3,1,1] ])

In [286]:
input_test = scaler.fit_transform(input_test)
input_test

array([[0.        , 0.        , 0.        , 0.375     ],
       [0.25      , 0.14285714, 0.16666667, 0.5       ],
       [0.5       , 0.28571429, 0.33333333, 1.        ],
       [0.75      , 0.42857143, 1.        , 0.25      ],
       [1.        , 1.        , 0.        , 0.        ]])

In [287]:
target_test = scaler.fit_transform(target_test)
target_test

array([[0.        , 0.        , 0.375     , 0.5       ],
       [0.14285714, 0.16666667, 0.5       , 1.        ],
       [0.28571429, 0.33333333, 1.        , 0.25      ],
       [0.42857143, 1.        , 0.25      , 0.        ],
       [1.        , 0.        , 0.        , 0.        ]])

In [291]:
# hyperparameters
input_dim = 1
output_dim = 1 #4
hidden_size = 1 #3
epochs = 1
# init
hidden_state = np.zeros((hidden_size, ))
# max_output = np.zeros((BATCH_SIZE, seq_length, ), int)
losses = list()

with tf.Session() as sess:
    
    rnn_test = RNNCell(input_dim=input_dim
                   , output_dim=output_dim
                   , hidden_size=hidden_size
                   , initial_state=hidden_state)
    
    for e in range(epochs):
        
        loss = 0
        num_correct = 0
        
        for i, (inputs, targets) in enumerate(zip(input_test, target_test)):

            # FORWARD
            outputs, states = rnn_test.forward(inputs)

            # loss & accuracy -- working
            # E(y_true, y_pred) = -y_true * log(y_pred)  
            #                   => -sum( E (y_true, y_pred) )
            loss += .5 * sum([(out[0][0] - targets[j])**2 for j, out in enumerate(outputs)])
            num_correct += np.sum(outputs == targets)
            
            # BACKPROP
            # calculate dL/dy : Loss에 대한 y의 gradient
            
            print(f"output:{outputs}")
            print(f"target:{targets}")
            
            print(f"output:{outputs[-1]}")
            print(f"target:{targets[-1]}")
            
            dLdy = outputs[-1] - targets[-1]
            rnn_test.backprop(dLdy)

        print("loss %.3f" % loss)
        print("accuracy %.3f" % (num_correct / (i+1)))
        print()

ValueError: matmul: Input operand 1 does not have enough dimensions (has 0, gufunc core with signature (n?,k),(k,m?)->(n?,m?) requires 1)

In [292]:
outputs

array([[1.37490094e-12, 2.20687376e-13, 2.11974007e-11, ...,
        7.52281899e-15, 3.64914921e-11, 7.00585116e-14],
       [5.65793220e-14, 2.01931417e-10, 2.65762810e-10, ...,
        1.52667898e-10, 7.88839626e-14, 2.37505521e-07],
       [9.56872016e-15, 1.43173296e-10, 1.46095722e-08, ...,
        5.49537259e-16, 5.74108245e-09, 1.25803372e-10],
       ...,
       [1.73762316e-08, 1.96644774e-14, 8.11756036e-12, ...,
        5.01853863e-18, 5.20082673e-09, 4.74739202e-11],
       [1.56023482e-14, 6.72361408e-08, 9.39449015e-15, ...,
        1.78614663e-10, 7.03859159e-19, 4.93649767e-12],
       [9.48138002e-12, 1.06062815e-11, 2.99409159e-14, ...,
        2.17395820e-13, 8.86660691e-13, 1.86445926e-11]])

## C. TensorFlow를 이용한 모델