<a href="https://colab.research.google.com/github/marinba2200/DL_test/blob/main/1216_3_RNN_Shapes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Import library

In [None]:
from tensorflow.keras.layers import Input, SimpleRNN, Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD, Adam

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### Things you should automatically know and have memorized
*   N = number of samples
*   T = sequence length (時間上)
*   D = number of input features (空間上)
*   M = number of hidden units (RNN變形會用到) 參考RNN投影片15,16
*   K = number of output units (RNN變形會用到)

In [None]:
# Make some data
N = 1
T = 10 #input數量 (輸入跟輸出的T是否要一樣，要看是哪一個種類的sequencial data)
D = 3 #input中每筆的屬性量 (如果是二元分類的輸出，D=2。如果是有一萬種單字的文章，D=10,000。看有幾種，因為會給input做one-hot encoding，所以D會=單字種類數)
M = 5 #每層有幾個神經元
K = 2 #最後一個Dense層的神經元量

In [None]:
# Make an RNN
i = Input(shape=(T, D))
x = SimpleRNN(M)(i)
x = Dense(K)(x)

model = Model(i, x)

In [None]:
# Get the output
X = np.random.randn(N, T, D)
Yhat = model.predict(X) #最後出來多大，就看K有多大
print(Yhat)

[[-0.9151237 -1.7989362]]


In [None]:
# See if we can replicate this output
# Get the weights first
model.summary()

# ((5+3)+1)*5=45, (5+1)*2=12

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 10, 3)]           0         
_________________________________________________________________
simple_rnn (SimpleRNN)       (None, 5)                 45        
_________________________________________________________________
dense (Dense)                (None, 2)                 12        
Total params: 57
Trainable params: 57
Non-trainable params: 0
_________________________________________________________________


In [None]:
# See what's returned
model.layers[1].get_weights()

[array([[ 0.4778772 , -0.32349855,  0.07877702, -0.35469264,  0.6788556 ],
        [-0.53654456, -0.86100554,  0.4016326 , -0.7120358 , -0.03198963],
        [-0.39228415,  0.5323381 ,  0.49781328,  0.20075423,  0.17226177]],
       dtype=float32),
 array([[ 0.84640265, -0.03395028,  0.2248157 ,  0.47656727,  0.06922216],
        [ 0.2682192 , -0.14302172, -0.77366066, -0.04106214, -0.5544066 ],
        [ 0.17540893,  0.7945424 ,  0.24006842, -0.305359  , -0.4325012 ],
        [-0.36929578, -0.13228607,  0.31786987,  0.58826184, -0.6316865 ],
        [ 0.21098228, -0.5741038 ,  0.43844408, -0.57610893, -0.31899318]],
       dtype=float32),
 array([0., 0., 0., 0., 0.], dtype=float32)]

In [None]:
# Check their shapes
# Should make sense
# First output is input > hidden
# Second output is hidden > hidden
# Third output is bias term (vector of length M)
a, b, c = model.layers[1].get_weights()
print(a.shape, b.shape, c.shape) #前面是權重，最後一個是bias

(3, 5) (5, 5) (5,)


In [None]:
Wx, Wh, bh = model.layers[1].get_weights()
Wo, bo = model.layers[2].get_weights()

In [None]:
h_last = np.zeros(5) # initial hidden state
x = X[0] # the one and only sample
Yhats = [] # where we store the outputs

for t in range(T):
  h = np.tanh(x[t].dot(Wx) + h_last.dot(Wh) + bh)
  y = h.dot(Wo) + bo # we only care about this value on the last iteration
  Yhats.append(y)
  
  # important: assign h to h_last
  h_last = h

# print the final output
Yhats = np.reshape(Yhats, (-1, 2))
print(Yhats.shape)
print(Yhats[-1])

(10, 2)
[-0.91512377 -1.79893616]


In [None]:
print(Yhat)

[[-0.9151237 -1.7989362]]


### Exercise: 
* calculate the output for multiple samples at once (N > 1)

In [None]:
# Make some data
N = 4
T = 10
D = 3
M = 5
K = 2

In [None]:
# Make an RNN
i = Input(shape=(T, D))
x = SimpleRNN(M)(i)
x = Dense(K)(x)

model = Model(i, x)

In [None]:
# Get the output
X = np.random.randn(N, T, D)
Yhat = model.predict(X)
print(Yhat)

[[ 0.9076325   0.19478619]
 [ 0.43865803  0.28718203]
 [-0.43363115  0.18769965]
 [ 0.3015159  -0.18593629]]


In [None]:
# See if we can replicate this output
# Get the weights first
model.summary()

# ((5+3)+1)*5=45, (5+1)*2=12

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 10, 3)]           0         
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, 5)                 45        
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 12        
Total params: 57
Trainable params: 57
Non-trainable params: 0
_________________________________________________________________


In [None]:
# Check their shapes
# Should make sense
# First output is input > hidden
# Second output is hidden > hidden
# Third output is bias term (vector of length M)
a, b, c = model.layers[1].get_weights()
print(a.shape, b.shape, c.shape)

(3, 5) (5, 5) (5,)


In [None]:
Wx, Wh, bh = model.layers[1].get_weights()
Wo, bo = model.layers[2].get_weights()

In [None]:
h_last = np.zeros(5) # initial hidden state
Yhats = [] # where we store the outputs

for i in range(N):
  x = X[i]

  for t in range(T):
    h = np.tanh(x[t].dot(Wx) + h_last.dot(Wh) + bh)
    y = h.dot(Wo) + bo # we only care about this value on the last iteration
  
    # important: assign h to h_last
    h_last = h
  
  Yhats.append(y)


# print the final output
Yhats = np.reshape(Yhats, (-1, 2))
print(Yhats.shape)
print(Yhats)

(4, 2)
[[ 0.90763253  0.19478625]
 [ 0.43422689  0.28975614]
 [-0.43354553  0.18817008]
 [ 0.29575148 -0.19132254]]


In [None]:
print(Yhat)

[[ 0.9076325   0.19478619]
 [ 0.43865803  0.28718203]
 [-0.43363115  0.18769965]
 [ 0.3015159  -0.18593629]]


### Exercise

In [None]:
# Make some data
N = 4
T = 10
D = 3
M = 5
K = 2

In [None]:
# Make an RNN
i = Input(shape=(T, D))
x = SimpleRNN(M)(i)
x = Dense(K)(x)

model = Model(i, x)

In [None]:
# Get the output
X = np.random.randn(N, T, D)
Yhat = model.predict(X)
print(Yhat)

In [None]:
# See if we can replicate this output
# Get the weights first
model.summary()

# ((5+3)+1)*5=45, (5+1)*2=12

In [None]:
# Check their shapes
# Should make sense
# First output is input > hidden
# Second output is hidden > hidden
# Third output is bias term (vector of length M)
a, b, c = model.layers[1].get_weights()
print(a.shape, b.shape, c.shape)

In [None]:
Wx, Wh, bh = model.layers[1].get_weights()
Wo, bo = model.layers[2].get_weights()

In [None]:
h_last = np.zeros(5) # initial hidden state
Yhats = [] # where we store the outputs

for i in range(N): #input
  x = X[i]

  for t in range(T): #時間
    h = np.tanh(x[t].dot(Wx) + h_last.dot(Wh) + bh)
    y = h.dot(Wo) + bo # we only care about this value on the last iteration
  
    # important: assign h to h_last
    h_last = h
  
  Yhats.append(y)


# print the final output
Yhats = np.reshape(Yhats, (-1, 2))
print(Yhats.shape)
print(Yhats)

In [None]:
print(Yhat)