In [None]:
cp -r ~/datasets/meetup/backup/CrudeOil ../

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
print(tf.__version__)

# Load Data

In [None]:
data = pd.read_csv("../CrudeOil/CL=F_daily.csv")
data.shape

In [None]:
data.head()

# Remove NaN

In [None]:
data.Close.plot()

In [None]:
pd.isna(data.Close).sum()

In [None]:
series = data.Close[~pd.isna(data.Close)]
time_str = data.Date[~pd.isna(data.Close)]
print(series.shape)

# Define Param

In [None]:
window_size = 14
delay = 10

# Split Train/Valid

In [None]:
train_ratio = 0.95
split_time = int(len(data)*train_ratio)
print("split_time_str: ", time_str.iloc[split_time])

In [None]:
x_train = series[:split_time]
x_valid = series[split_time-(window_size+delay):]
x_forcast = series[-window_size:]
print(x_train.shape, x_valid.shape, x_forcast.shape)

In [None]:
plt.plot(x_train, label="train")
plt.plot(x_valid, label="valid")
plt.plot(x_forcast, label="forcast")
plt.legend()

# Prepare Windowed Data

In [None]:
def windowed_dataset(series, window_size, batch_size, shuffle_buffer, delay=1, train=True):
    dataset = tf.data.Dataset.from_tensor_slices(series)
    dataset = dataset.window(window_size + delay, shift=1, drop_remainder=True)
    dataset = dataset.flat_map(lambda window: window.batch(window_size + delay))
    if train:
        dataset = dataset.shuffle(shuffle_buffer)
    dataset = dataset.map(lambda window: (window[:window_size], window[-1]))
    dataset = dataset.batch(batch_size).prefetch(1)  
    return dataset

In [None]:
tf.keras.backend.clear_session()
tf.random.set_seed(51)
np.random.seed(51)

In [None]:
batch_size = 32
shuffle_buffer_size = 1000

time_str_train = time_str[(window_size+delay):split_time]
time_str_valid = time_str[split_time:]
print(time_str_train.shape, time_str_valid.shape)

train_set = windowed_dataset(x_train, window_size, batch_size=128, shuffle_buffer=shuffle_buffer_size, delay=delay)
valid_set = windowed_dataset(x_valid, window_size, batch_size=128, shuffle_buffer=shuffle_buffer_size, delay=delay, train=False)

In [None]:
def check_data(dataset):
    count=0
    for i,(x,y) in enumerate(dataset):
        count+=x.shape[0]
        if i==0:
            print("x.shape: ", x.shape, ", y.shape: ", y.shape)
    print("#records: ", count)
    
check_data(train_set)
check_data(valid_set)

# Train Model

In [None]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1),
                      input_shape=[None]),
  tf.keras.layers.Lambda(lambda x: x / 100.0),
  """
  add your code here!!
  """
  tf.keras.layers.Dense(1),
  tf.keras.layers.Lambda(lambda x: x * 100.0),
  tf.keras.layers.Lambda(lambda x: tf.squeeze(x, axis=-1) ),
])

In [None]:
lr_schedule = tf.keras.callbacks.LearningRateScheduler(
    lambda epoch: 1e-4 * 10**(epoch / 20))
optimizer = tf.keras.optimizers.SGD(lr=1e-4, momentum=0.9)
model.compile(loss=tf.keras.losses.Huber(),
              optimizer=optimizer,
              metrics=["mae"])

history = model.fit(train_set, validation_data=valid_set, epochs=20, callbacks=[lr_schedule])

# Validation

In [None]:
y_true = []
y_pred = []

for x,y in valid_set:
    #print(x.shape, y.shape)
    y_hat = model.predict(x)
    #print(y_hat.shape, y_hat.dtype)
    y_true += list(y)
    y_pred = list(y_hat)

time_index = list(x_valid[window_size+delay-1:].index)
plt.plot(time_index, y_true, label="valid_true",c="orange")
plt.plot(time_index, y_pred, label="valid_pred",c="red")
plt.legend()

In [None]:
time_str_valid

# Forecast

In [None]:
forcast_days = 200
forcast_data = list(x_forcast.values)
time_index = list(x_forcast.index)

for day in range(forcast_days):
    x = np.array(forcast_data[day:day+window_size])[np.newaxis]
    y_hat = model.predict(x)
    forcast_data.append(int(y_hat))
    time_index.append(time_index[-1]+1)


plt.plot(x_train, label="train")
plt.plot(x_valid, label="valid")
plt.plot(time_index, forcast_data, label="forcast_pred")
plt.legend()    


In [None]:
plt.plot(x_train, label="train")
plt.plot(x_valid, label="valid")
plt.plot(time_index[-forcast_days:], forcast_data[-forcast_days:], label="forcast_pred")
plt.xlim([2300,None])
plt.legend()  