In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
tf.keras.backend.set_floatx('float64')

# Load Data

In [2]:
df = pd.read_csv('training_set.csv')
df.shape

(74278, 7)

In [3]:
df.head()

Unnamed: 0,Time,Weekday,Open,High,Low,Close,Volume
0,170 05:00:00,0,1.12053,1.12079,1.1205,1.12067,302.690002
1,170 05:10:00,0,1.12066,1.12074,1.12051,1.1207,486.690001
2,170 05:20:00,0,1.1207,1.12071,1.12065,1.1207,212.12
3,170 05:30:00,0,1.1207,1.12072,1.1205,1.12061,811.989999
4,170 05:40:00,0,1.1206,1.12079,1.12027,1.12029,502.870001


In [4]:
data = df[['Close']]

# Preprocessing

In [5]:
from sklearn.preprocessing import StandardScaler

In [6]:
ss = StandardScaler()
data = ss.fit_transform(data)

In [7]:
data.shape

(74278, 1)

# Create Dataset

In [8]:
window = 10 #hyperparam
n_output_timestamp = 1 #hyperparam

In [9]:
X, Y = [], []
for i in range(len(data)-window-n_output_timestamp):
    X.append( data[i:i+window, :] )
    Y.append( data[i+window:i+window+n_output_timestamp, 0] )
X = np.array(X)    
Y = np.array(Y)  
X.shape, Y.shape

((74267, 10, 1), (74267, 1))

In [10]:
dataset = tf.data.Dataset.from_tensor_slices((X, Y)).shuffle(1024).batch(1024)

for x, y in dataset:
    print(x.numpy().shape, y.numpy().shape)
    break
    
x, y = next(iter(dataset))
print(x.numpy().shape, y.numpy().shape)

(1024, 10, 1) (1024, 1)
(1024, 10, 1) (1024, 1)


# Build Model

In [11]:
model = tf.keras.Sequential([
    tf.keras.layers.LSTM(64),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)
])

In [12]:
model(x).shape

TensorShape([1024, 1])

In [13]:
model.compile(loss='mse',
              optimizer=tf.keras.optimizers.Adam(3e-2),
              metrics=['mse'])

In [14]:
history = model.fit(dataset, epochs=1)



In [19]:
history = model.fit(dataset, epochs=1)#0.0019



In [23]:
model.save_weights('model/model_lstm_one_layer_epo2_0.0019.h5')

In [24]:
history = model.fit(dataset, epochs=1)



In [28]:
model.save_weights('model/model_lstm_one_layer_epo3_0.0006.h5')

In [63]:
model.load_weights('model/model_lstm_one_layer_epo3_0.0006.h5')

In [64]:
model.compile(loss='mse',
              optimizer=tf.keras.optimizers.SGD(3e-3),
              metrics=['mse'])

In [65]:
history = model.fit(dataset, epochs=1,)



In [69]:
history = model.fit(dataset, epochs=1,)



In [73]:
model.save_weights('model/model_lstm_one_layer_epo5_0.0005.h5')

In [75]:
history = model.fit(dataset, epochs=1,)



In [79]:
model.save_weights('model/model_lstm_one_layer_epo6_0.0005.h5')

In [98]:
model.load_weights('model/model_lstm_one_layer_epo6_0.0005.h5')

# Predict Test & Save Out

In [99]:
# pred test
n_pred_timestamp = 30
data_ = np.array(list(data.flatten()) + [np.nan]*n_pred_timestamp)
for i in range(len(data)-window-n_output_timestamp, 
               len(data)-window-n_output_timestamp+n_pred_timestamp):
    X_test = data_[None, i:i+window, None]
    assert(np.isnan(X_test).sum()==0 )
    #print(i, X_test.shape)
    y_test = model(X_test)
    assert(y_test.shape==(1,1) )
    data_[i+window+1] = float(y_test)


In [100]:
pred = data_[-30:]
pred_ = ss.inverse_transform(pred)
pred_

array([1.12622769, 1.1261066 , 1.12591331, 1.12572281, 1.12554361,
       1.12539017, 1.12522876, 1.12505063, 1.12487547, 1.12470406,
       1.12453488, 1.12436324, 1.12418678, 1.12401131, 1.12383878,
       1.12366637, 1.12349213, 1.123317  , 1.12314291, 1.12296974,
       1.12279632, 1.12262241, 1.12244871, 1.12227572, 1.12210325,
       1.12193094, 1.12175888, 1.12158742, 1.12141668, 1.12124657])

In [101]:
op_df = pd.read_csv('sample_submission.csv')
op_df['Close'] = pred_
op_df.to_csv('submit/my_submission_lstm.csv', index=False) 