<a href="https://colab.research.google.com/github/minzzii-kim/machine-learing/blob/main/tensorflow_note_7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
# [문제 5 - 시계열 데이터]
# 패턴의 일부가 주어졌을때 미래 패턴을 예측하는것
# ex) 주가예측

# Conv1D Layer를 통해 특성추출


In [9]:
import csv
import tensorflow as tf
import numpy as np
import urllib

from tensorflow.keras.layers import Conv1D, Dense, LSTM, Lambda
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.losses import Huber



In [10]:
url = 'https://storage.googleapis.com/download.tensorflow.org/data/Sunspots.csv'
urllib.request.urlretrieve(url, 'sunspots.csv')

with open('sunspots.csv') as f:
  reader = csv.reader(f, delimiter=',')
  next(reader) #header skip
  i=0
  for row in reader:
    print(row)
    i+=1
    if i==10:
      break



['0', '1749-01-31', '96.7']
['1', '1749-02-28', '104.3']
['2', '1749-03-31', '116.7']
['3', '1749-04-30', '92.8']
['4', '1749-05-31', '141.7']
['5', '1749-06-30', '139.2']
['6', '1749-07-31', '158.0']
['7', '1749-08-31', '110.5']
['8', '1749-09-30', '126.5']
['9', '1749-10-31', '125.8']


In [11]:
# 데이터셋 만들기
l_ts = []
l_sp = []

with open('sunspots.csv') as f:
  reader = csv.reader(f, delimiter=',')
  next(reader)
  for row in reader:
    l_ts.append(int(row[0])) #typecasting!!
    l_sp.append(float(row[2]))

# list to numpy array 
arr_ts = np.array(l_ts)
arr_sp = np.array(l_sp)

print(len(arr_ts)) #3235

3235


In [12]:
# Train/Valid 셋트 구성
# x -> timestamp, y-> sunspots

TRAIN_SIZE=3000
train_x = arr_ts[:TRAIN_SIZE]
train_y = arr_sp[:TRAIN_SIZE]

valid_x = arr_ts[TRAIN_SIZE:]
valid_y = arr_sp[TRAIN_SIZE:]



In [13]:
# [Window Dataset Loader]

window_size = 30
batch_size = 32
shuffle_size=1000

# 주어짐
def windowed_dataset(series, window_size, batch_size, shuffle_buffer):
    series = tf.expand_dims(series, axis=-1) # 1D->2D
    ds = tf.data.Dataset.from_tensor_slices(series) #array, list를 tf dataset 자료구조로 바꿈
    ds = ds.window(window_size + 1, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda w: w.batch(window_size + 1))
    ds = ds.shuffle(shuffle_buffer)
    ds = ds.map(lambda w: (w[:-1], w[1:])) # x,y를 튜플로
    return ds.batch(batch_size).prefetch(1)

In [14]:
train_ds = windowed_dataset(train_x, 
                            window_size=window_size,
                            batch_size=batch_size,
                            shuffle_buffer=shuffle_size
                            )
valid_ds = windowed_dataset(valid_x, 
                            window_size=window_size,
                            batch_size=batch_size,
                            shuffle_buffer=shuffle_size
                            )

In [16]:
# 모델정의
model = Sequential([
  Conv1D(60,
        kernel_size=5,
        padding='causal', 
        activation='relu',
        input_shape=[None,1]),
  LSTM(60, return_sequences=True),
  LSTM(60, return_sequences=True),
  Dense(32, activation='relu'),
  Dense(16, activation='relu'),
  Dense(1),
  Lambda(lambda x: x*400)       
])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, None, 60)          360       
                                                                 
 lstm (LSTM)                 (None, None, 60)          29040     
                                                                 
 lstm_1 (LSTM)               (None, None, 60)          29040     
                                                                 
 dense (Dense)               (None, None, 32)          1952      
                                                                 
 dense_1 (Dense)             (None, None, 16)          528       
                                                                 
 dense_2 (Dense)             (None, None, 1)           17        
                                                                 
 lambda (Lambda)             (None, None, 1)           0

In [18]:
# 모델생성
optimizer = SGD(learning_rate=1e-5, momentum=0.9)
loss=Huber()

model.compile(optimizer=optimizer, loss=loss, metrics=['mae'])

In [20]:
#학습

checkpoint_path = 'my_checkpoint.ckpt'
checkpoint = ModelCheckpoint(
    checkpoint_path,
    monitor='val_mae',
    save_best_only=True,
    save_weights_only=True,
    verbose=1
)

model.fit(train_ds, epochs=100, validation_data=(valid_ds), callbacks=[checkpoint])
model.load_weights(checkpoint_path)
model.evaluate(valid_ds)

Epoch 1/100
     92/Unknown - 4s 47ms/step - loss: 814.5408 - mae: 815.0410
Epoch 00001: val_mae improved from inf to 1243.67712, saving model to my_checkpoint.ckpt
Epoch 2/100
Epoch 00002: val_mae did not improve from 1243.67712
Epoch 3/100
Epoch 00003: val_mae did not improve from 1243.67712
Epoch 4/100
Epoch 00004: val_mae did not improve from 1243.67712
Epoch 5/100
Epoch 00005: val_mae did not improve from 1243.67712
Epoch 6/100
Epoch 00006: val_mae did not improve from 1243.67712
Epoch 7/100
Epoch 00007: val_mae did not improve from 1243.67712
Epoch 8/100
Epoch 00008: val_mae did not improve from 1243.67712
Epoch 9/100
Epoch 00009: val_mae did not improve from 1243.67712
Epoch 10/100
Epoch 00010: val_mae did not improve from 1243.67712
Epoch 11/100
Epoch 00011: val_mae did not improve from 1243.67712
Epoch 12/100
Epoch 00012: val_mae did not improve from 1243.67712
Epoch 13/100
Epoch 00013: val_mae did not improve from 1243.67712
Epoch 14/100
Epoch 00014: val_mae did not improve f

[1243.1771240234375, 1243.6771240234375]