In [1]:
import numpy as np

# 입력 시퀀스의 타임스텝의 수
timesteps = 100
# 입력 특성의 차원
input_features = 32
# 출력 특성의 차원
output_features = 64

inputs = np.random.random((timesteps, input_features))

# 초기 상태 : 0벡터
state_t = np.zeros((output_features,))

W = np.random.random((output_features, input_features))
U = np.random.random((output_features, output_features))
b = np.random.random((output_features, ))

successive_outputs = []

for input_t in inputs:
    output_t = np.tanh( np.dot(W, input_t) + np.dot(U, state_t) + b )
    successive_outputs.append(output_t)
    state_t = output_t

# 최종 출력의 크기 : (timesteps, output_features) 2D tensor
final_output_sequence = np.stack(successive_outputs, axis=0)

In [3]:
W, W.shape

(array([[0.83517397, 0.16119646, 0.85267148, ..., 0.63245986, 0.1791181 ,
         0.7725546 ],
        [0.18068833, 0.66596178, 0.59673225, ..., 0.41689787, 0.98662133,
         0.3690927 ],
        [0.5858957 , 0.12694827, 0.85531232, ..., 0.76941192, 0.49228704,
         0.27612495],
        ...,
        [0.59045437, 0.66490014, 0.49433664, ..., 0.85170889, 0.25125989,
         0.57779365],
        [0.80463066, 0.61489048, 0.3247619 , ..., 0.91803868, 0.9284634 ,
         0.75466422],
        [0.59371342, 0.55129162, 0.85726098, ..., 0.77591821, 0.44188222,
         0.65531992]]),
 (64, 32))

In [4]:
len(W[0])

32

In [5]:
inputs, inputs.shape

(array([[0.68332689, 0.26877599, 0.71641477, ..., 0.51105412, 0.84465145,
         0.52471106],
        [0.54742386, 0.52258646, 0.52116589, ..., 0.18313729, 0.40149681,
         0.26247227],
        [0.53272843, 0.42567461, 0.93242042, ..., 0.71091917, 0.29542666,
         0.17511571],
        ...,
        [0.73951361, 0.12644467, 0.51074846, ..., 0.6891985 , 0.04314439,
         0.62952236],
        [0.45752691, 0.10785195, 0.17904153, ..., 0.99050963, 0.28407869,
         0.08454627],
        [0.12326639, 0.05758784, 0.80997825, ..., 0.83268207, 0.77702384,
         0.0205653 ]]),
 (100, 32))

In [6]:
len(inputs[0])

32

In [8]:
i = np.dot(W, inputs[0])

In [10]:
i.shape, inputs[0].shape

((64,), (32,))

In [11]:
inputs[0]

array([0.68332689, 0.26877599, 0.71641477, 0.03283676, 0.41309121,
       0.70628209, 0.88247382, 0.7108023 , 0.96945409, 0.18985963,
       0.722335  , 0.80728414, 0.55288248, 0.25819481, 0.32546263,
       0.69419279, 0.61720801, 0.05321311, 0.46804393, 0.40919045,
       0.94335757, 0.49730263, 0.38001989, 0.56421185, 0.13585384,
       0.72227158, 0.41593894, 0.88857563, 0.95060307, 0.51105412,
       0.84465145, 0.52471106])

In [12]:
i[0]

8.834915150787275

In [13]:
i  # 64 차원의 벡터

array([ 8.83491515, 11.55752321, 10.01411815,  8.5137758 ,  7.70835746,
        7.26686396,  9.46845283,  9.447725  ,  8.7960813 ,  7.72351804,
        8.42707716,  9.48733133,  7.7348807 ,  9.23370699,  7.82164123,
        9.41837301,  7.69425522,  8.78456866,  8.51813932,  9.85597795,
        8.86473189,  9.10171614,  9.32379754,  9.64709005, 10.08715702,
        8.8487719 ,  9.20073921,  7.17968146,  7.3433291 ,  9.20172147,
        8.34146958,  9.28289182,  7.70920841,  8.1881492 ,  9.31642225,
        8.95722161,  7.77094691,  8.5090749 ,  8.5489809 , 10.33892524,
        8.51330402,  9.40113692,  7.65962074,  9.29443024,  7.29696802,
        8.29991986,  8.9368344 ,  8.87065539,  8.49616132,  9.67546549,
        8.16373112,  8.66205465, 10.2142733 ,  9.21684408,  9.56705742,
        8.49236525,  8.72377941,  6.90130715,  7.90870445,  9.1756249 ,
        8.49781064,  7.98166427,  9.74418733,  8.23641767])

In [14]:
U.shape, state_t.shape

((64, 64), (64,))

In [15]:
u = np.dot(U, state_t)

In [16]:
u.shape

(64,)

In [17]:
u[0]

35.35078793165137

In [18]:
b.shape

(64,)

In [19]:
b[0]

0.040992439343406684

In [1]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras import preprocessing

# 특성으로 사용하는 단어의 수 (가장 빈도수가 높은 1만개의 단어)
max_features = 10000
# 텍스트의 길이 (가장 빈번한 1만개의 단어 중 사용할 길이)
maxlen = 20

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

# 리스트를 (samples, maxlen) 크기의 2D 정수 텐서로 변환
# pad_sequences() 는 패딩을 넣을 위치를 지정 가능
x_train = preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = preprocessing.sequence.pad_sequences(x_test, maxlen=maxlen)

  x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])
  x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])


In [21]:
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

In [22]:
i = 0
for sample in x_train:
    print(sample[-20:])
    i += 1
    if i == 3:
        break

[65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]
[23, 4, 1690, 15, 16, 4, 1355, 5, 28, 6, 52, 154, 462, 33, 89, 78, 285, 16, 145, 95]
[1352, 13, 191, 79, 638, 89, 2, 14, 9, 8, 106, 607, 624, 35, 534, 6, 227, 7, 129, 113]


In [23]:
# 리스트를 (samples, maxlen) 크기의 2D 정수 텐서로 변환
# pad_sequences() 는 패딩을 넣을 위치를 지정 가능
x_train = preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = preprocessing.sequence.pad_sequences(x_test, maxlen=maxlen)

In [24]:
x_train.shape

(25000, 20)

In [25]:
x_train

array([[  65,   16,   38, ...,   19,  178,   32],
       [  23,    4, 1690, ...,   16,  145,   95],
       [1352,   13,  191, ...,    7,  129,  113],
       ...,
       [  11, 1818, 7561, ...,    4, 3586,    2],
       [  92,  401,  728, ...,   12,    9,   23],
       [ 764,   40,    4, ...,  204,  131,    9]], dtype=int32)

In [26]:
from keras.preprocessing.text import Tokenizer

samples = ["The cat sat on the mat.", "The dog ate my homework."]

tokenizer = Tokenizer(num_words=100, char_level=True)
tokenizer.fit_on_texts(samples)

In [27]:
tokenizer.word_index

{' ': 1,
 't': 2,
 'e': 3,
 'h': 4,
 'a': 5,
 'o': 6,
 'm': 7,
 '.': 8,
 'c': 9,
 's': 10,
 'n': 11,
 'd': 12,
 'g': 13,
 'y': 14,
 'w': 15,
 'r': 16,
 'k': 17}

In [28]:
tokenizer.texts_to_sequences(samples)

[[2, 4, 3, 1, 9, 5, 2, 1, 10, 5, 2, 1, 6, 11, 1, 2, 4, 3, 1, 7, 5, 2, 8],
 [2,
  4,
  3,
  1,
  12,
  6,
  13,
  1,
  5,
  2,
  3,
  1,
  7,
  14,
  1,
  4,
  6,
  7,
  3,
  15,
  6,
  16,
  17,
  8]]

In [30]:
one_hot = tokenizer.texts_to_matrix(samples, mode='binary')

In [31]:
len(one_hot[0])

100

In [32]:
one_hot[0]

array([0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [8]:
from keras.models import Sequential
from keras.layers import LSTM, Embedding, Dense

model = Sequential()
model.add(Embedding(10000, 32, input_length=20))
model.add(LSTM(32))
model.add(Dense(1, activation='sigmoid'))

In [9]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 20, 32)            320000    
_________________________________________________________________
lstm_2 (LSTM)                (None, 32)                8320      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 33        
Total params: 328,353
Trainable params: 328,353
Non-trainable params: 0
_________________________________________________________________


LSTM layer's parameter  
params = 4 * ((size_of_input + 1) * size_of_output + size_of_output^2)  

8320 = 4 * ((32+1)*32 + 32 ^ 2)

In [10]:
from keras.models import Sequential
from keras.layers import LSTM, Embedding, Dense

model = Sequential()
model.add(Embedding(10000, 64, input_length=20))
model.add(LSTM(32))
model.add(Dense(1, activation='sigmoid'))

model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, 20, 64)            640000    
_________________________________________________________________
lstm_3 (LSTM)                (None, 32)                12416     
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 33        
Total params: 652,449
Trainable params: 652,449
Non-trainable params: 0
_________________________________________________________________


In [11]:
4 * ((64+1)*32 + 32**2)

12416

12416 = 4 * ((64+1) * 32 + 32^2)

In [4]:
from keras.models import Sequential
from keras.layers import LSTM, Embedding, Dense, SimpleRNN

model = Sequential()
model.add(Embedding(10000, 32))
model.add(SimpleRNN(32))
model.add(Dense(1, activation='sigmoid'))

In [5]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, None, 32)          320000    
_________________________________________________________________
simple_rnn (SimpleRNN)       (None, 32)                2080      
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 33        
Total params: 322,113
Trainable params: 322,113
Non-trainable params: 0
_________________________________________________________________


2080 = 32 * (32 + 32 + 1)

In [12]:
from keras.models import Sequential
from keras.layers import LSTM, Embedding, Dense, SimpleRNN

model = Sequential()
model.add(Embedding(10000, 64))
model.add(SimpleRNN(32))
model.add(Dense(1, activation='sigmoid'))

model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_5 (Embedding)      (None, None, 64)          640000    
_________________________________________________________________
simple_rnn_2 (SimpleRNN)     (None, 32)                3104      
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 33        
Total params: 643,137
Trainable params: 643,137
Non-trainable params: 0
_________________________________________________________________


In [14]:
32 * (64 + 32 + 1)

3104

SimpleRNN layer's Parameter  

num_units * (num_feature + num_units + 1)  
output_size * (input_size + output_size + 1)  

3104 = 32 * (64 + 32 + 1)

In [7]:
import tensorflow as tf
tf.random.normal([32, 10, 8])

<tf.Tensor: shape=(32, 10, 8), dtype=float32, numpy=
array([[[ 7.14666069e-01,  1.11679542e+00, -2.05651641e-01, ...,
         -7.90959671e-02, -8.05340350e-01,  7.74299264e-01],
        [ 1.53350234e+00,  7.46777892e-01,  1.06387007e+00, ...,
          3.08109254e-01,  1.19671106e-01, -5.40174782e-01],
        [-1.27242529e+00, -6.10901892e-01,  9.98392403e-01, ...,
         -7.80037701e-01, -2.11879686e-02, -3.04433405e-01],
        ...,
        [ 7.04427302e-01, -5.44988990e-01,  7.55060971e-01, ...,
          2.30370736e+00, -5.12110353e-01,  3.01460666e-03],
        [ 8.39543641e-01,  1.92880362e-01, -2.47096467e+00, ...,
         -1.20919406e+00,  1.24776244e+00, -2.09685832e-01],
        [-1.28268230e+00,  4.96025860e-01, -4.08567578e-01, ...,
         -1.34108770e+00,  1.00390327e+00,  1.70109078e-01]],

       [[-6.29415885e-02, -1.16104162e+00,  1.02791417e+00, ...,
          2.80491412e-02,  2.49254793e-01, -5.81165731e-01],
        [ 3.35045457e-02,  1.87612307e+00, -1.8637