In [1]:
import numpy as np

# 입력 시퀀스의 타임스텝의 수
timesteps = 100
# 입력 특성의 차원
input_features = 32
# 출력 특성의 차원
output_features = 64

inputs = np.random.random((timesteps, input_features))

# 초기 상태 : 0벡터
state_t = np.zeros((output_features,))

W = np.random.random((output_features, input_features))
U = np.random.random((output_features, output_features))
b = np.random.random((output_features, ))

successive_outputs = []

for input_t in inputs:
    output_t = np.tanh( np.dot(W, input_t) + np.dot(U, state_t) + b )
    successive_outputs.append(output_t)
    state_t = output_t

# 최종 출력의 크기 : (timesteps, output_features) 2D tensor
final_output_sequence = np.stack(successive_outputs, axis=0)

In [3]:
W, W.shape

(array([[0.83517397, 0.16119646, 0.85267148, ..., 0.63245986, 0.1791181 ,
         0.7725546 ],
        [0.18068833, 0.66596178, 0.59673225, ..., 0.41689787, 0.98662133,
         0.3690927 ],
        [0.5858957 , 0.12694827, 0.85531232, ..., 0.76941192, 0.49228704,
         0.27612495],
        ...,
        [0.59045437, 0.66490014, 0.49433664, ..., 0.85170889, 0.25125989,
         0.57779365],
        [0.80463066, 0.61489048, 0.3247619 , ..., 0.91803868, 0.9284634 ,
         0.75466422],
        [0.59371342, 0.55129162, 0.85726098, ..., 0.77591821, 0.44188222,
         0.65531992]]),
 (64, 32))

In [4]:
len(W[0])

32

In [5]:
inputs, inputs.shape

(array([[0.68332689, 0.26877599, 0.71641477, ..., 0.51105412, 0.84465145,
         0.52471106],
        [0.54742386, 0.52258646, 0.52116589, ..., 0.18313729, 0.40149681,
         0.26247227],
        [0.53272843, 0.42567461, 0.93242042, ..., 0.71091917, 0.29542666,
         0.17511571],
        ...,
        [0.73951361, 0.12644467, 0.51074846, ..., 0.6891985 , 0.04314439,
         0.62952236],
        [0.45752691, 0.10785195, 0.17904153, ..., 0.99050963, 0.28407869,
         0.08454627],
        [0.12326639, 0.05758784, 0.80997825, ..., 0.83268207, 0.77702384,
         0.0205653 ]]),
 (100, 32))

In [6]:
len(inputs[0])

32

In [8]:
i = np.dot(W, inputs[0])

In [10]:
i.shape, inputs[0].shape

((64,), (32,))

In [11]:
inputs[0]

array([0.68332689, 0.26877599, 0.71641477, 0.03283676, 0.41309121,
       0.70628209, 0.88247382, 0.7108023 , 0.96945409, 0.18985963,
       0.722335  , 0.80728414, 0.55288248, 0.25819481, 0.32546263,
       0.69419279, 0.61720801, 0.05321311, 0.46804393, 0.40919045,
       0.94335757, 0.49730263, 0.38001989, 0.56421185, 0.13585384,
       0.72227158, 0.41593894, 0.88857563, 0.95060307, 0.51105412,
       0.84465145, 0.52471106])

In [12]:
i[0]

8.834915150787275

In [13]:
i  # 64 차원의 벡터

array([ 8.83491515, 11.55752321, 10.01411815,  8.5137758 ,  7.70835746,
        7.26686396,  9.46845283,  9.447725  ,  8.7960813 ,  7.72351804,
        8.42707716,  9.48733133,  7.7348807 ,  9.23370699,  7.82164123,
        9.41837301,  7.69425522,  8.78456866,  8.51813932,  9.85597795,
        8.86473189,  9.10171614,  9.32379754,  9.64709005, 10.08715702,
        8.8487719 ,  9.20073921,  7.17968146,  7.3433291 ,  9.20172147,
        8.34146958,  9.28289182,  7.70920841,  8.1881492 ,  9.31642225,
        8.95722161,  7.77094691,  8.5090749 ,  8.5489809 , 10.33892524,
        8.51330402,  9.40113692,  7.65962074,  9.29443024,  7.29696802,
        8.29991986,  8.9368344 ,  8.87065539,  8.49616132,  9.67546549,
        8.16373112,  8.66205465, 10.2142733 ,  9.21684408,  9.56705742,
        8.49236525,  8.72377941,  6.90130715,  7.90870445,  9.1756249 ,
        8.49781064,  7.98166427,  9.74418733,  8.23641767])

In [14]:
U.shape, state_t.shape

((64, 64), (64,))

In [15]:
u = np.dot(U, state_t)

In [16]:
u.shape

(64,)

In [17]:
u[0]

35.35078793165137

In [18]:
b.shape

(64,)

In [19]:
b[0]

0.040992439343406684

In [1]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras import preprocessing

# 특성으로 사용하는 단어의 수 (가장 빈도수가 높은 1만개의 단어)
max_features = 10000
# 텍스트의 길이 (가장 빈번한 1만개의 단어 중 사용할 길이)
maxlen = 20

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

# 리스트를 (samples, maxlen) 크기의 2D 정수 텐서로 변환
# pad_sequences() 는 패딩을 넣을 위치를 지정 가능
x_train = preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = preprocessing.sequence.pad_sequences(x_test, maxlen=maxlen)

  x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])
  x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])


In [21]:
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

In [22]:
i = 0
for sample in x_train:
    print(sample[-20:])
    i += 1
    if i == 3:
        break

[65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]
[23, 4, 1690, 15, 16, 4, 1355, 5, 28, 6, 52, 154, 462, 33, 89, 78, 285, 16, 145, 95]
[1352, 13, 191, 79, 638, 89, 2, 14, 9, 8, 106, 607, 624, 35, 534, 6, 227, 7, 129, 113]


In [23]:
# 리스트를 (samples, maxlen) 크기의 2D 정수 텐서로 변환
# pad_sequences() 는 패딩을 넣을 위치를 지정 가능
x_train = preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = preprocessing.sequence.pad_sequences(x_test, maxlen=maxlen)

In [24]:
x_train.shape

(25000, 20)

In [25]:
x_train

array([[  65,   16,   38, ...,   19,  178,   32],
       [  23,    4, 1690, ...,   16,  145,   95],
       [1352,   13,  191, ...,    7,  129,  113],
       ...,
       [  11, 1818, 7561, ...,    4, 3586,    2],
       [  92,  401,  728, ...,   12,    9,   23],
       [ 764,   40,    4, ...,  204,  131,    9]], dtype=int32)

In [26]:
from keras.preprocessing.text import Tokenizer

samples = ["The cat sat on the mat.", "The dog ate my homework."]

tokenizer = Tokenizer(num_words=100, char_level=True)
tokenizer.fit_on_texts(samples)

In [27]:
tokenizer.word_index

{' ': 1,
 't': 2,
 'e': 3,
 'h': 4,
 'a': 5,
 'o': 6,
 'm': 7,
 '.': 8,
 'c': 9,
 's': 10,
 'n': 11,
 'd': 12,
 'g': 13,
 'y': 14,
 'w': 15,
 'r': 16,
 'k': 17}

In [28]:
tokenizer.texts_to_sequences(samples)

[[2, 4, 3, 1, 9, 5, 2, 1, 10, 5, 2, 1, 6, 11, 1, 2, 4, 3, 1, 7, 5, 2, 8],
 [2,
  4,
  3,
  1,
  12,
  6,
  13,
  1,
  5,
  2,
  3,
  1,
  7,
  14,
  1,
  4,
  6,
  7,
  3,
  15,
  6,
  16,
  17,
  8]]

In [30]:
one_hot = tokenizer.texts_to_matrix(samples, mode='binary')

In [31]:
len(one_hot[0])

100

In [32]:
one_hot[0]

array([0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [8]:
from keras.models import Sequential
from keras.layers import LSTM, Embedding, Dense

model = Sequential()
model.add(Embedding(10000, 32, input_length=20))
model.add(LSTM(32))
model.add(Dense(1, activation='sigmoid'))

In [9]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 20, 32)            320000    
_________________________________________________________________
lstm_2 (LSTM)                (None, 32)                8320      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 33        
Total params: 328,353
Trainable params: 328,353
Non-trainable params: 0
_________________________________________________________________


LSTM layer's parameter  
params = 4 * ((size_of_input + 1) * size_of_output + size_of_output^2)  

8320 = 4 * ((32+1)*32 + 32 ^ 2)

In [10]:
from keras.models import Sequential
from keras.layers import LSTM, Embedding, Dense

model = Sequential()
model.add(Embedding(10000, 64, input_length=20))
model.add(LSTM(32))
model.add(Dense(1, activation='sigmoid'))

model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, 20, 64)            640000    
_________________________________________________________________
lstm_3 (LSTM)                (None, 32)                12416     
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 33        
Total params: 652,449
Trainable params: 652,449
Non-trainable params: 0
_________________________________________________________________


In [11]:
4 * ((64+1)*32 + 32**2)

12416

12416 = 4 * ((64+1) * 32 + 32^2)

In [4]:
from keras.models import Sequential
from keras.layers import LSTM, Embedding, Dense, SimpleRNN

model = Sequential()
model.add(Embedding(10000, 32))
model.add(SimpleRNN(32))
model.add(Dense(1, activation='sigmoid'))

In [5]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, None, 32)          320000    
_________________________________________________________________
simple_rnn (SimpleRNN)       (None, 32)                2080      
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 33        
Total params: 322,113
Trainable params: 322,113
Non-trainable params: 0
_________________________________________________________________


2080 = 32 * (32 + 32 + 1)

In [12]:
from keras.models import Sequential
from keras.layers import LSTM, Embedding, Dense, SimpleRNN

model = Sequential()
model.add(Embedding(10000, 64))
model.add(SimpleRNN(32))
model.add(Dense(1, activation='sigmoid'))

model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_5 (Embedding)      (None, None, 64)          640000    
_________________________________________________________________
simple_rnn_2 (SimpleRNN)     (None, 32)                3104      
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 33        
Total params: 643,137
Trainable params: 643,137
Non-trainable params: 0
_________________________________________________________________


In [14]:
32 * (64 + 32 + 1)

3104

SimpleRNN layer's Parameter  

num_units * (num_feature + num_units + 1)  
output_size * (input_size + output_size + 1)  

3104 = 32 * (64 + 32 + 1)

In [7]:
import tensorflow as tf
tf.random.normal([32, 10, 8])

<tf.Tensor: shape=(32, 10, 8), dtype=float32, numpy=
array([[[ 7.14666069e-01,  1.11679542e+00, -2.05651641e-01, ...,
         -7.90959671e-02, -8.05340350e-01,  7.74299264e-01],
        [ 1.53350234e+00,  7.46777892e-01,  1.06387007e+00, ...,
          3.08109254e-01,  1.19671106e-01, -5.40174782e-01],
        [-1.27242529e+00, -6.10901892e-01,  9.98392403e-01, ...,
         -7.80037701e-01, -2.11879686e-02, -3.04433405e-01],
        ...,
        [ 7.04427302e-01, -5.44988990e-01,  7.55060971e-01, ...,
          2.30370736e+00, -5.12110353e-01,  3.01460666e-03],
        [ 8.39543641e-01,  1.92880362e-01, -2.47096467e+00, ...,
         -1.20919406e+00,  1.24776244e+00, -2.09685832e-01],
        [-1.28268230e+00,  4.96025860e-01, -4.08567578e-01, ...,
         -1.34108770e+00,  1.00390327e+00,  1.70109078e-01]],

       [[-6.29415885e-02, -1.16104162e+00,  1.02791417e+00, ...,
          2.80491412e-02,  2.49254793e-01, -5.81165731e-01],
        [ 3.35045457e-02,  1.87612307e+00, -1.8637

In [2]:
from keras.models import Sequential
from keras.layers import GRU, Embedding, Dense, SimpleRNN

model = Sequential()
model.add(Embedding(10000, 64))
model.add(GRU(32))
model.add(Dense(1, activation='sigmoid'))

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, None, 64)          640000    
_________________________________________________________________
gru (GRU)                    (None, 32)                9408      
_________________________________________________________________
dense (Dense)                (None, 1)                 33        
Total params: 649,441
Trainable params: 649,441
Non-trainable params: 0
_________________________________________________________________


In [4]:
from keras.models import Sequential
from keras.layers import GRU, Embedding, Dense, SimpleRNN

model = Sequential()
model.add(Embedding(10000, 128))
model.add(GRU(64, return_sequences=True))
model.add(Dense(1, activation='sigmoid'))

model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, None, 128)         1280000   
_________________________________________________________________
gru_2 (GRU)                  (None, None, 64)          37248     
_________________________________________________________________
dense_2 (Dense)              (None, None, 1)           65        
Total params: 1,317,313
Trainable params: 1,317,313
Non-trainable params: 0
_________________________________________________________________


In [12]:
(64 * 128 + 64 + 64 * 64 + 64) * 3

37248

In [10]:
from keras.models import Sequential
from keras.layers import GRU, Embedding, Dense, SimpleRNN

model = Sequential()
model.add(Embedding(10000, 128))
model.add(GRU(64, return_sequences=True, reset_after=False))
model.add(Dense(1, activation='sigmoid'))

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, None, 128)         1280000   
_________________________________________________________________
gru (GRU)                    (None, None, 64)          37056     
_________________________________________________________________
dense (Dense)                (None, None, 1)           65        
Total params: 1,317,121
Trainable params: 1,317,121
Non-trainable params: 0
_________________________________________________________________


In [11]:
(64 * 128 + 64 * 64 + 64) * 3

37056

In [17]:
import tensorflow as tf
import numpy as np

In [16]:
tf.random.set_seed(0)

In [20]:
layer = tf.keras.layers.Dropout(.5, input_shape=(2,))
data = np.arange(10).reshape(5, 2).astype(np.float32)

In [21]:
data

array([[0., 1.],
       [2., 3.],
       [4., 5.],
       [6., 7.],
       [8., 9.]], dtype=float32)

In [22]:
outputs = layer(data, training=True)

In [23]:
outputs

<tf.Tensor: shape=(5, 2), dtype=float32, numpy=
array([[ 0.,  0.],
       [ 4.,  6.],
       [ 0., 10.],
       [ 0., 14.],
       [16.,  0.]], dtype=float32)>

In [25]:
from keras.models import Sequential
from keras import layers
from keras.optimizers import RMSprop

model = Sequential()
model.add(layers.Bidirectional(
    layers.GRU(32), input_shape=(None, 1000)
))
model.add(layers.Dense(1))

model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional (Bidirectional (None, 64)                198528    
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 65        
Total params: 198,593
Trainable params: 198,593
Non-trainable params: 0
_________________________________________________________________


In [31]:
((32 * 1000 + 32 + 32 * 32 + 32) * 3) * 2

198528

In [28]:
198528 // 2

99264

In [29]:
32 * 1000 + 32 + 32 * 32 + 32

33088

In [30]:
(32 * 1000 + 32 + 32 * 32 + 32) * 3

99264

In [43]:
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.optimizers import RMSprop

model = Sequential()
model.add(layers.Conv1D(32, 5, activation='relu', 
                        input_shape=(None, 1000)))
model.add(layers.MaxPooling1D(3))
model.add(layers.Conv1D(32, 5, activation='relu'))
model.add(layers.GRU(32, dropout=0.1, recurrent_dropout=0.5))
model.add(layers.Dense(1))

model.summary()

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_6 (Conv1D)            (None, None, 32)          160032    
_________________________________________________________________
max_pooling1d_3 (MaxPooling1 (None, None, 32)          0         
_________________________________________________________________
conv1d_7 (Conv1D)            (None, None, 32)          5152      
_________________________________________________________________
gru_6 (GRU)                  (None, 32)                6336      
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 33        
Total params: 171,553
Trainable params: 171,553
Non-trainable params: 0
_________________________________________________________________


In [44]:
2272 - 32

2240

In [45]:
2240 // 5

448

In [47]:
448 / 32

14.0

bias + (filter 수) * (윈도우 크기) * (입력의 차원 수)

In [62]:
32 + 32 * 5 * 14

2272

In [40]:
(32 * 32 + 32 + 32 * 32 + 32)

6336

In [37]:
# 1D 윈도우 크기 : 7 (7개의 특성을 고려함)
# 32 : filter의 수
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.optimizers import RMSprop

model = Sequential()
model.add(layers.Embedding(10000, 128, input_length=500))
model.add(layers.Conv1D(32, 7, activation='relu'))
model.add(layers.MaxPooling1D(5))
model.add(layers.Conv1D(32, 7, activation='relu'))
model.add(layers.GlobalMaxPooling1D())
model.add(layers.Dense(1))

model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, 500, 128)          1280000   
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 494, 32)           28704     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 98, 32)            0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 92, 32)            7200      
_________________________________________________________________
global_max_pooling1d (Global (None, 32)                0         
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 33        
Total params: 1,315,937
Trainable params: 1,315,937
Non-trainable params: 0
____________________________________________

In [33]:
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.optimizers import RMSprop

model = Sequential()
model.add(layers.Embedding(10000, 128, input_length=500))
model.add(layers.Conv1D(32, 7, activation='relu'))
model.add(layers.MaxPooling1D(5))
model.add(layers.Conv1D(32, 7, activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(1))

model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 500, 128)          1280000   
_________________________________________________________________
conv1d (Conv1D)              (None, 494, 32)           28704     
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 98, 32)            0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 92, 32)            7200      
_________________________________________________________________
flatten (Flatten)            (None, 2944)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 2945      
Total params: 1,318,849
Trainable params: 1,318,849
Non-trainable params: 0
____________________________________________

In [36]:
32 * 128 * 7 + 32

28704

In [38]:
32 * 92

2944

온도 예측 데이터 타임스텝 확인

In [48]:
import os

data_dir = './datasets/jena_climate'
fname = os.path.join(data_dir, 'jena_climate_2009_2016.csv')

with open(fname, 'r') as f:
    data = f.read()
    
lines = data.split('\n')
header = lines[0].split(',')
lines = lines[1:]

In [49]:
import numpy as np

float_data = np.zeros((len(lines), len(header) - 1))
for i, line in enumerate(lines):
    values = [float(x) for x in line.split(',')[1:]]
    float_data[i, :] = values
    
mean = float_data[:200000].mean(axis=0)
float_data -= mean
std = float_data[:200000].std(axis=0)
float_data /= std

In [50]:
def generator(data, lookback, delay, min_index, max_index,
             shuffle=False, batch_size=128, step=6):
    if max_index is None:
        max_index = len(data) - delay - 1
    i = min_index + lookback
    while True:
        if shuffle:
            rows = np.random.randint(
                min_index + lookback, max_index, size=batch_size
            )
        else:
            if i + batch_size >= max_index:
                i = min_index + lookback
            rows = np.arange(i, min(i + batch_size, max_index))
            i += len(rows)
        
        samples = np.zeros((len(rows),
                          lookback // step,
                          data.shape[-1]))
        targets = np.zeros((len(rows),))
        for j, row in enumerate(rows):
            indices = range(rows[j] - lookback, rows[j], step)
            samples[j] = data[indices]
            targets[j] = data[rows[j] + delay][1]
        yield samples, targets

In [51]:
lookback = 1440
step = 6
delay = 144
batch_size = 128

train_gen = generator(float_data, 
                      lookback=lookback,
                      delay=delay,
                      min_index=0,
                      max_index=200000,
                      shuffle=True,
                      step=step,
                      batch_size=batch_size)

val_gen = generator(float_data, 
                    lookback=lookback,
                    delay=delay,
                    min_index=200001,
                    max_index=300000,
                    shuffle=True,
                    step=step,
                    batch_size=batch_size)

test_gen = generator(float_data, 
                     lookback=lookback,
                     delay=delay,
                     min_index=300001,
                     max_index=None,
                     shuffle=True,
                     step=step,
                     batch_size=batch_size)

# 추출 횟수
val_steps = (300000 - 200001 - lookback) // batch_size

test_steps = (len(float_data) - 300001 - lookback) // batch_size

In [53]:
sample = next(train_gen)
len(sample) # 데이터, 타겟

2

In [57]:
sample[0]

array([[[ 1.40130108, -0.80738206, -0.90652231, ...,  0.67613738,
          0.39923698, -1.93211861],
        [ 1.47558973, -0.88193728, -0.98582319, ...,  1.46355715,
          2.5445401 , -1.74831297],
        [ 1.52393631, -0.96101099, -1.06847482, ...,  2.10780969,
          1.68641885, -1.78999252],
        ...,
        [-0.7082608 , -0.18382933, -0.12803334, ..., -0.32603324,
         -0.29584123, -1.92657673],
        [-0.62100111, -0.29905103, -0.24977695, ...,  0.45487893,
          0.50650214, -1.8603051 ],
        [-0.51015772, -0.3577915 , -0.31567487, ..., -0.75553493,
         -0.61763669,  1.54575654]],

       [[-0.34271346,  1.29823804,  1.31501938, ...,  2.25748452,
          3.03795981,  0.63596481],
        [-0.31559221,  1.27112705,  1.28597962, ...,  1.59370917,
          1.53195703,  0.8507126 ],
        [-0.33681754,  1.21125695,  1.2279001 , ...,  1.54815597,
          1.74219673,  0.64520128],
        ...,
        [ 0.50276215,  1.42136711,  1.36863125, ...,  

In [58]:
sample[1]

array([-0.64132726,  1.01357266, -0.0121264 , -0.32955089, -0.40975423,
       -1.15756566, -0.26290304,  0.70066667, -1.51791588, -0.10588524,
       -0.21884769,  0.51540825,  1.42136711,  0.1674839 ,  0.27253898,
       -0.3024399 , -0.98473311, -0.75203046, -0.41540235,  1.29371954,
        0.25220574, -0.26968079, -1.29199098, -0.82545605,  0.86672147,
        0.24542799,  0.31433508,  0.33014983,  0.76279602, -0.79269694,
       -1.49758264, -0.03358927, -1.16660266, -0.34762488,  0.2736686 ,
       -0.54079067, -1.24002825, -0.01551527,  1.10507225,  0.95031369,
       -1.47499015, -0.62099402, -0.63793838,  0.41826054,  0.798944  ,
       -1.65347082, -0.41879123,  0.62272258, -0.64471613, -0.57016091,
       -0.92486301, -2.86442831,  0.32676095,  2.29004837,  2.21549315,
        1.76929146,  0.77296264, -0.11605186,  1.10055375, -0.01890415,
       -1.46708278, -0.04149664, -0.58371641, -0.7034566 ,  0.89948058,
       -2.57524443, -0.46171696, -0.69103074,  1.00679491,  0.35

In [59]:
len(sample[0]), len(sample[1])

(128, 128)

In [60]:
sample[0].shape

(128, 240, 14)

In [67]:
float_data.shape

(420451, 14)

In [70]:
idx = range(0-1440, 0, 3)
idx

range(-1440, 0, 3)

In [72]:
list(idx)[:5]

[-1440, -1437, -1434, -1431, -1428]

In [73]:
len(list(idx))

480

In [76]:
float_data[idx]

array([[ 1.56520779, -1.32023159, -1.42477034, ...,  0.51995494,
         0.30055304,  0.17875983],
       [ 1.56049105, -1.2886021 , -1.39461367, ...,  0.70867538,
         0.33487789,  0.0806224 ],
       [ 1.53690735, -1.29086135, -1.39461367, ...,  0.24663569,
        -0.10276394,  0.21686025],
       ...,
       [ 1.36356716, -1.5280825 , -1.61576261, ..., -0.87917936,
        -0.91368852,  0.08754975],
       [ 1.32347487, -1.46934203, -1.55433235, ..., -1.03536179,
        -1.1024752 ,  0.30460666],
       [ 1.28927851, -1.38236094, -1.46721307, ..., -0.69696652,
        -0.67341457,  0.45008097]])

In [77]:
float_data[144][1]  # 섭씨 온도

-1.5450268701517724

In [79]:
float_data[-3]

array([ 1.28927851, -1.38236094, -1.46721307, -1.76651024, -0.4450241 ,
       -1.11104091, -1.43326305, -0.52391257, -1.43500165, -1.44052643,
        1.66810153, -0.69696652, -0.67341457,  0.45008097])

In [78]:
float_data[0]

array([ 0.90014748, -1.93135845, -1.98211036, -1.86280029,  1.07285236,
       -1.30742164, -1.47375773, -0.79868641, -1.4762674 , -1.47815522,
        2.12375056, -0.72950452, -0.78067973, -0.27613603])

In [None]:
samples = np.zeros((len(rows),
                          lookback // step,
                          data.shape[-1]))
        targets = np.zeros((len(rows),))
        for j, row in enumerate(rows):
            indices = range(rows[j] - lookback, rows[j], step)
            samples[j] = data[indices]
            targets[j] = data[rows[j] + delay][1]

In [68]:
float_data[0]

array([ 0.90014748, -1.93135845, -1.98211036, -1.86280029,  1.07285236,
       -1.30742164, -1.47375773, -0.79868641, -1.4762674 , -1.47815522,
        2.12375056, -0.72950452, -0.78067973, -0.27613603])

In [63]:
lookback = 1440
step = 3
delay = 144
batch_size = 128

train_gen = generator(float_data, 
                      lookback=lookback,
                      delay=delay,
                      min_index=0,
                      max_index=200000,
                      shuffle=True,
                      step=step,
                      batch_size=batch_size)

sample1 = next(train_gen)

In [64]:
sample1[0].shape

(128, 480, 14)

In [66]:
sample1[1].shape

(128,)

In [65]:
3 * 480

1440

In [None]:
import numpy as np
import keras

class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, labels, batch_size=32, dim=(32,32,32), n_channels=1,
                 n_classes=10, shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size), dtype=int)

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            X[i,] = np.load('data/' + ID + '.npy')

            # Store class
            y[i] = self.labels[ID]

        return X, keras.utils.to_categorical(y, num_classes=self.n_classes)