# RNN初探

### 因為在深度學習的應用，常常會有Python和套件版本相依性的問題存在，因此我們這邊提出確認版本的方法，未來電腦程式執行有問題時可以依此確認過!

In [2]:
import tensorflow
print(tensorflow.__version__)

2.8.2


In [3]:
import tensorflow.keras as ks
ks.__version__

'2.8.0'

In [4]:
!python --version

Python 3.7.13


## 載入資料、準備資料

In [5]:
from keras.datasets import mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data()

備好資料

In [7]:
from tensorflow.keras.utils import to_categorical

# 將 training 的 input 資料轉為3維，並 normalize 把顏色控制在 0 ~ 1 之間
X_train = X_train.reshape(-1, 28, 28) / 255.      
X_test = X_test.reshape(-1, 28, 28) / 255.
y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)

----
## 試試看基本的RNN類神經網路

In [9]:
from tensorflow.keras import models
from tensorflow.keras import layers

model = models.Sequential()

model.add(layers.SimpleRNN(
    # 如果後端使用tensorflow，batch_input_shape 的 batch_size 需設為 None.
    # 否則執行 model.evaluate() 會有錯誤產生.
    batch_input_shape=(None, 28, 28), 
    units= 50,
    unroll=True,
)) 

model.add(layers.Dense(units=10, kernel_initializer='normal', activation='softmax'))


In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn (SimpleRNN)      (None, 50)                3950      
                                                                 
 dense (Dense)               (None, 10)                510       
                                                                 
Total params: 4,460
Trainable params: 4,460
Non-trainable params: 0
_________________________________________________________________


In [11]:
# 編譯: 選擇損失函數、優化方法及成效衡量方式
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) 

In [12]:
model.fit(X_train, y_train, epochs=5, batch_size=64)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x2a7c1923788>

In [13]:
# 再使用測試資料進行評估
test_loss, test_acc = model.evaluate(X_test, y_test)



### 為何這麼差，因為我們設計的太簡單了，層數非常的少

----
## 再來一次

In [1]:
from keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

(X_train, y_train), (X_test, y_test) = mnist.load_data()

# 將 training 的 input 資料轉為3維，並 normalize 把顏色控制在 0 ~ 1 之間
X_train = X_train.reshape(-1, 28, 28) / 255.      
X_test = X_test.reshape(-1, 28, 28) / 255.
y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)



In [2]:
# 建立RNN

from tensorflow.keras import models
from tensorflow.keras import layers

model = models.Sequential()

model.add(layers.SimpleRNN(
    # 如果後端使用tensorflow，batch_input_shape 的 batch_size 需設為 None.
    # 否則執行 model.evaluate() 會有錯誤產生.
    batch_input_shape=(None, 28, 28), 
    units= 50,
    unroll=True,
)) 

model.add( layers.Dense(64, activation='relu') )
model.add( layers.Dense(10, activation='softmax') )

In [3]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn (SimpleRNN)      (None, 50)                3950      
                                                                 
 dense (Dense)               (None, 64)                3264      
                                                                 
 dense_1 (Dense)             (None, 10)                650       
                                                                 
Total params: 7,864
Trainable params: 7,864
Non-trainable params: 0
_________________________________________________________________


In [4]:
# compile 並訓練

model.compile(optimizer='rmsprop',
             loss='categorical_crossentropy',
             metrics=['accuracy'])
model.fit(X_train, y_train, epochs=5, batch_size=64)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x18222583348>

In [5]:
# 再使用測試資料進行評估
test_loss, test_acc = model.evaluate(X_test, y_test)



### 只是好了些… 實際上我們也不怎麼直接使用RNN處理問題，因為效果都不好

## 試試LSTM

In [22]:
model_input_shape = X_train.shape[1:]

model = models.Sequential()

model.add(layers.LSTM(16, input_shape = model_input_shape, return_sequences=True))
model.add(layers.LSTM(16, return_sequences=False))

model.add(layers.Dense(64, activation='relu', name='dense_1'))
model.add(layers.Dense(10, activation='softmax') )
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 28, 16)            2880      
                                                                 
 lstm_1 (LSTM)               (None, 16)                2112      
                                                                 
 dense_1 (Dense)             (None, 64)                1088      
                                                                 
 dense_3 (Dense)             (None, 10)                650       
                                                                 
Total params: 6,730
Trainable params: 6,730
Non-trainable params: 0
_________________________________________________________________


In [23]:
# compile 並訓練

model.compile(optimizer='rmsprop',
             loss='categorical_crossentropy',
             metrics=['accuracy'])
model.fit(X_train, y_train, epochs=5, batch_size=64)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x2a7b64c4ac8>

## ---- 以下是之前的NN & CNN參考比較用 ----

In [None]:
# 舊版是這樣
#from keras import models
#from keras import layers
#from keras.datasets import mnist
#from keras.utils import to_categorical


In [6]:
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

In [11]:
# 建立一個小型的cnn
from tensorflow.keras import layers
from tensorflow.keras import models

model = models.Sequential()

model.add( layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)) )
model.add( layers.MaxPooling2D((2, 2)) )

model.add( layers.Conv2D(64, (3, 3), activation='relu') )
model.add( layers.MaxPooling2D((2, 2)) )

model.add( layers.Conv2D(64, (3, 3), activation='relu') )

In [12]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 26, 26, 32)        320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 13, 13, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 11, 11, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 5, 5, 64)         0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 3, 3, 64)          36928     
                                                                 
Total params: 55,744
Trainable params: 55,744
Non-trai

In [13]:
# 再加入Dense密集層，這邊需要先展平

model.add( layers.Flatten() )
model.add( layers.Dense(64, activation='relu') )
model.add( layers.Dense(10, activation='softmax') )

In [14]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 26, 26, 32)        320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 13, 13, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 11, 11, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 5, 5, 64)         0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 3, 3, 64)          36928     
                                                                 
 flatten (Flatten)           (None, 576)              

In [15]:
#- 備資料
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

#- 整理資料
train_images = train_images.reshape((60000, 28, 28, 1))  #reshape 是 NumPy 陣列的 method
train_images = train_images.astype('float32') / 255

test_images = test_images.reshape((10000, 28, 28, 1))
test_images = test_images.astype('float32') / 255

#- 準備標籤
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

In [16]:
# compile 並訓練

model.compile(optimizer='rmsprop',
             loss='categorical_crossentropy',
             metrics=['accuracy'])
model.fit(train_images, train_labels, epochs=5, batch_size=64)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1e48130d208>

In [17]:
# 再使用測試資料進行評估
test_loss, test_acc = model.evaluate(test_images, test_labels)



### 相較之前的單純密集連接層NN的model，準確度提昇不少