In [1]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import TimeDistributed, Dense, Conv2D

In [2]:
from tensorflow.keras.datasets import mnist

In [3]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Dense+TimeDistributed 

In [34]:
model =  Sequential()

In [35]:
layer = TimeDistributed(Dense(4), input_shape=(3, 28*28))

In [36]:
model.add(layer)

In [37]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
time_distributed_3 (TimeDist (None, 3, 4)              3140      
Total params: 3,140
Trainable params: 3,140
Non-trainable params: 0
_________________________________________________________________


# Dense

In [38]:
model2 = Sequential([Dense(4, input_shape=(28*28,))])

In [39]:
model2.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_9 (Dense)              (None, 4)                 3140      
Total params: 3,140
Trainable params: 3,140
Non-trainable params: 0
_________________________________________________________________


> - TimeDistributed가 있으나 없으나 파라미터 수는 같다.
- 즉, weight가 공유된다.
- 순차적으로 처리하지만, 같은 weight로 예측한다.

## 비교

In [40]:
import numpy as np

In [50]:
X_train[:3].reshape(-1,3,28*28)

array([[[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]]], dtype=uint8)

In [51]:
model.predict(X_train[:3].reshape(-1,3,28*28))

array([[[   4.0725327 ,   -0.87196636,  119.48971   ,   99.18028   ],
        [  30.89159   ,   -0.9095249 ,   39.8052    ,  -11.999203  ],
        [ -29.977325  ,   54.415005  ,  109.15031   , -155.94592   ]]],
      dtype=float32)

In [52]:
X_train[:6].reshape(-1,3,28*28)

array([[[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]]], dtype=uint8)

In [53]:
model.predict(X_train[:6].reshape(-1,3,28*28))

array([[[   4.0725327 ,   -0.87196636,  119.48971   ,   99.18028   ],
        [  30.89159   ,   -0.9095249 ,   39.8052    ,  -11.999203  ],
        [ -29.977325  ,   54.415005  ,  109.15031   , -155.94592   ]],

       [[ 201.62799   ,   62.516117  ,   42.595688  , -104.630875  ],
        [   2.8831635 ,  219.55298   ,  -48.420544  ,  -59.729992  ],
        [ 249.03983   ,  148.95279   ,   35.20711   ,   12.046921  ]]],
      dtype=float32)

In [49]:
X_train[:9].reshape(-1,3,28*28)

array([[[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]]], dtype=uint8)

In [46]:
model.predict(X_train[:9].reshape(-1,3,28*28))

array([[[   4.0725327 ,   -0.87196636,  119.48971   ,   99.18028   ],
        [  30.89159   ,   -0.9095249 ,   39.8052    ,  -11.999203  ],
        [ -29.977325  ,   54.415005  ,  109.15031   , -155.94592   ]],

       [[ 201.62799   ,   62.516117  ,   42.595688  , -104.630875  ],
        [   2.8831635 ,  219.55298   ,  -48.420544  ,  -59.729992  ],
        [ 249.03983   ,  148.95279   ,   35.20711   ,   12.046921  ]],

       [[ -37.482246  ,  178.3122    ,  -38.719997  ,   23.836891  ],
        [ 184.95084   ,  115.91388   ,   60.163788  ,   81.592155  ],
        [   1.0323296 ,  112.012314  ,   -7.83025   ,  -14.694199  ]]],
      dtype=float32)

> - TimeDistribution은 몇개가 들어올지 모를 때 유용하다. (?)
    - RCNN에서 쓰는 이유라고 함...?
    - 하지만 TimeDistribution도 input shape을 정해야하는데?

# Conv2D+TimeDistributed 

In [60]:
model3 = Sequential()
model3.add(TimeDistributed(Conv2D(3, (3,3)), input_shape=(3,28,28,1)))

In [61]:
model3.summary()

Model: "sequential_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
time_distributed_10 (TimeDis (None, 3, 26, 26, 3)      30        
Total params: 30
Trainable params: 30
Non-trainable params: 0
_________________________________________________________________


# Conv2D

In [63]:
model4 = Sequential(Conv2D(3, (3,3), input_shape=(28,28,1)))

In [64]:
model4.summary()

Model: "sequential_14"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_8 (Conv2D)            (None, 26, 26, 3)         30        
Total params: 30
Trainable params: 30
Non-trainable params: 0
_________________________________________________________________


> - Conv2D layer를 써도 TimeDistribution이 있을 때나 없을 때나 weight는 같다.

## 비교

In [78]:
pred3 = model3.predict(X_train[:9].reshape(3,3,28,28,1))

In [79]:
pred3.shape

(3, 3, 26, 26, 3)

In [80]:
pred4 = model4.predict(X_train[:9].reshape(9,28,28,1))

In [81]:
pred4.shape

(9, 26, 26, 3)

> 같은 이미지 9장을 했을 때, 
- TimeDistributed를 쓰면 3장씩 묶인 결과물 3개가 되고
- TimeDIstributed를 안 쓰면 9개 결과물이 된다.
- 개수는 batch_size로 계산했다.
- 각 이미지에 대한 적용 weight는 같다. (TimeDistributed를 쓰는 이유)