In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
tf.keras.backend.set_floatx('float64')

# Load Data

In [2]:
df = pd.read_csv('training_set.csv')
df.shape

(74278, 7)

In [3]:
df.head()

Unnamed: 0,Time,Weekday,Open,High,Low,Close,Volume
0,170 05:00:00,0,1.12053,1.12079,1.1205,1.12067,302.690002
1,170 05:10:00,0,1.12066,1.12074,1.12051,1.1207,486.690001
2,170 05:20:00,0,1.1207,1.12071,1.12065,1.1207,212.12
3,170 05:30:00,0,1.1207,1.12072,1.1205,1.12061,811.989999
4,170 05:40:00,0,1.1206,1.12079,1.12027,1.12029,502.870001


In [4]:
data = df[['Close']]

# Preprocessing

In [5]:
from sklearn.preprocessing import StandardScaler

In [6]:
ss = StandardScaler()
data = ss.fit_transform(data)

In [7]:
data.shape

(74278, 1)

# Create Dataset

In [250]:
window = 10 #hyperparam
n_output_timestamp = 1 #hyperparam

In [251]:
X, Y = [], []
for i in range(len(data)-window-n_output_timestamp):
    X.append( data[i:i+window, :] )
    Y.append( data[i+window:i+window+n_output_timestamp, 0] )
X = np.array(X)    
Y = np.array(Y)  
X.shape, Y.shape

((74267, 10, 1), (74267, 1))

In [252]:
dataset = tf.data.Dataset.from_tensor_slices((X, Y)).shuffle(1024).batch(1024)

for x, y in dataset:
    print(x.numpy().shape, y.numpy().shape)
    break
    
x, y = next(iter(dataset))
print(x.numpy().shape, y.numpy().shape)

(1024, 10, 1) (1024, 1)
(1024, 10, 1) (1024, 1)


# Build Model

In [257]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv1D(64, 5, 2, activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(1),
])

In [258]:
model(x).shape

TensorShape([1024, 1])

In [259]:
model.compile(loss='mse',
              optimizer=tf.keras.optimizers.Adam(1e-2),
              metrics=['mse'])

In [260]:
history = model.fit(dataset, epochs=1,) #0.0016



In [265]:
model.save_weights('model_1dCNN_epo1_0.0016.h5')

In [296]:
model.load_weights('model_1dCNN_epo1_0.0016.h5')

In [297]:
model.compile(loss='mse',
              optimizer=tf.keras.optimizers.SGD(1e-1),
              metrics=['mse'])

In [298]:
history = model.fit(dataset, epochs=1,) #0.0014



In [302]:
model.save_weights('model_1dCNN_epo2_0.0014.h5')

In [320]:
model.load_weights('model_1dCNN_epo2_0.0014.h5')

In [321]:
model.compile(loss='mse',
              optimizer=tf.keras.optimizers.SGD(1e-2),
              metrics=['mse'])

In [322]:
history = model.fit(dataset, epochs=1,) 



In [326]:
history = model.fit(dataset, epochs=3,) #0.0022

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [330]:
history = model.fit(dataset, epochs=3,) #0.0010

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [334]:
model.save_weights('model_1dCNN_epo9_0.0010.h5')

In [404]:
model.load_weights('model_1dCNN_epo9_0.0010.h5')

# Predict Test & Save Out

In [410]:
# pred test
n_pred_timestamp = 30
data_ = np.array(list(data.flatten()) + [np.nan]*n_pred_timestamp)
for i in range(len(data)-window-n_output_timestamp, 
               len(data)-window-n_output_timestamp+n_pred_timestamp):
    X_test = data_[None, i:i+window, None]
    assert(np.isnan(X_test).sum()==0 )
    #print(i, X_test.shape)
    y_test = model(X_test)
    assert(y_test.shape==(1,1) )
    data_[i+window+1] = float(y_test)


In [411]:
pred = data_[-30:]
pred_ = ss.inverse_transform(pred)
pred_

array([1.12484168, 1.12485839, 1.12480041, 1.12464811, 1.12437306,
       1.12417243, 1.12380252, 1.12347339, 1.12314359, 1.12292459,
       1.12262859, 1.12238928, 1.12216042, 1.12191717, 1.12164915,
       1.12140101, 1.12114995, 1.12091742, 1.12069019, 1.1204807 ,
       1.12027082, 1.120067  , 1.11986654, 1.11967284, 1.11948144,
       1.11929867, 1.11912059, 1.11894913, 1.11878153, 1.11861944])

In [412]:
op_df = pd.read_csv('sample_submission.csv')
op_df['Close'] = pred_
op_df.to_csv('submit/my_submission_1dCNN.csv', index=False)