# keras SimpleRNN Data Input Format : 1차원 데이터를 SimpleRNN에 3차원으로 input 하기

- [0) 데이터 준비](#데이터-준비) <br>
- [1) 방법 1](#방법-1)<br>
- [2) 방법 2](#방법-2) <br>
    - [첫 번째 방법과 두 번째 방법의 적용 차이](#첫-번째-방법과-두-번째-방법의-적용-차이) <br>
- [3) 방법 3](#방법-3)<br>
- [4) 방법 4](#방법-4)<br>



## 데이터 준비

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, SimpleRNN

In [None]:
TIMESTEPS = 3
sample = np.array(['a', 'b', 'c', 'd', 'e', 'f', 'g', 
          'h', 'i', 'j', 'k', 'l', 'm', 'n', 
          'o', 'p', 'q', 'r', 's', 't', 'u', 
          'v', 'w', 'x', 'y', 'z'])

train = sample[ :15]
test = sample[16: ]

print(train, train.shape)
print(test, test.shape)

## 방법 1
https://www.datatechnotes.com/2018/12/rnn-example-with-keras-simplernn-in.html

In [None]:
# 기존 시퀀스 뒤꽁무늬에 timestep 만큼 데이터를 조금 더 채워넣는다. 
train = np.append(train, np.repeat(train[-1,], TIMESTEPS))
test = np.append(test, np.repeat(test[-1,], TIMESTEPS))

print(train)
print(test)

In [None]:
# 2차원 array로 만든다. 
def convertToMatrix(data, TIMESTEPS) : 
    X, Y =[], []
    
    for i in range(len(data) - TIMESTEPS):
        d = i + TIMESTEPS  
        X.append(data[i:d,])
        Y.append(data[d,])
    return np.array(X), np.array(Y)

trainX,trainY = convertToMatrix(train, TIMESTEPS)
testX,testY = convertToMatrix(test, TIMESTEPS)

# 확인해본다. 
trainX.shape
trainY.shape
for i in range(15) : 
    print('trainX :', trainX[i], ' trainY : ', trainY[i])

In [None]:
# 만든 것을 3차원으로 만든다. 

trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
trainX.shape # (sample size, 1, timestep) 순으로 나타나게 된다. 

## 방법 2
https://frhyme.github.io/machine-learning/simple_rnn/

In [None]:
def maker(data) : 
    xs, ys = [], []

    for i in range(TIMESTEPS, len(data)): 
        xs.append(np.array([data[i-j] for j in range(TIMESTEPS, 0, -1)]))  
        ys.append(data[i])
        
    xs, ys = np.array(xs).reshape(len(xs), TIMESTEPS, 1), np.array(ys).reshape(len(ys), 1)
    
    return xs, ys

trainX, trainY = maker(train)

### 첫 번째 방법과 두 번째 방법의 적용 차이

첫 번째 방법 : model.add(SimpleRNN(units = 128, input_shape = (1, TIMESTEPS), activation="relu")) <br>
두 번째 방법 : model.add(SimpleRNN(units = 128, input_shape = (TIMESTEPS, 1), activation="relu"))

## 방법 3
https://3months.tistory.com/168 

In [None]:
import copy 

train = sample[ :15] # a부터 o까지의 알파벳 1차원 배열
test = sample[16: ] 

def method3(arraydata) : 
    new = copy.deepcopy(arraydata)

    for i in range(0, TIMESTEPS) : 
        temp = np.roll(arraydata, i + 1) 

        for j in range(0, i+1) : 
            temp[j] = None

        new = np.vstack([new, temp])
    
    new = new.transpose()
    new = new[TIMESTEPS:]
    new = new.transpose()
    
    ys = new[0]
    new = t = np.delete(new, 0, axis = 0)
    
    xs = new.transpose() 
    
    return xs, ys


In [None]:
trainX, trainY = method3(train)

trainX = trainX.reshape(trainX.shape[0], TIMESTEPS, 1)

trainX.shape #12, 3, 1

## 방법 4
https://datascienceschool.net/view-notebook/1d93b9dc6c624fbaa6af2ce9290e2479/

In [None]:
from scipy.linalg import toeplitz

In [None]:
garo = np.append(train[-1], np.zeros(train.shape[0] - TIMESTEPS))
sero = train[::-1] # 맨 마지막부터 데이터를 거꾸로 배열하는 인덱싱

S = np.fliplr( toeplitz(garo, sero) )
# np.fliplr : 행렬의 행 별로 원소 순서 거꾸로 만들어 주는 함수

# 샘플 확인
print(S[:TIMESTEPS, :TIMESTEPS])
print(S.shape)

In [None]:
trainX = S[: , 0:TIMESTEPS]
trainY = S[:, TIMESTEPS]

trainX = trainX.reshape((trainX.shape[0], TIMESTEPS, 1))