In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
tf.keras.backend.set_floatx('float64')

# Load Data

In [2]:
df = pd.read_csv('training_set.csv')
df.shape

(74278, 7)

In [3]:
df.head()

Unnamed: 0,Time,Weekday,Open,High,Low,Close,Volume
0,170 05:00:00,0,1.12053,1.12079,1.1205,1.12067,302.690002
1,170 05:10:00,0,1.12066,1.12074,1.12051,1.1207,486.690001
2,170 05:20:00,0,1.1207,1.12071,1.12065,1.1207,212.12
3,170 05:30:00,0,1.1207,1.12072,1.1205,1.12061,811.989999
4,170 05:40:00,0,1.1206,1.12079,1.12027,1.12029,502.870001


In [4]:
data = df[['Close']]

# Preprocessing

In [5]:
from sklearn.preprocessing import StandardScaler

In [6]:
ss = StandardScaler()
data = ss.fit_transform(data)

In [7]:
data.shape

(74278, 1)

# Create Dataset

In [8]:
window = 30 #hyperparam
n_output_timestamp = 30 #hyperparam

In [9]:
X, Y = [], []
for i in range(len(data)-window-n_output_timestamp):
    X.append( data[i:i+window, :] )
    Y.append( data[i+window:i+window+n_output_timestamp, :] )
X = np.array(X)    
Y = np.array(Y)  
X.shape, Y.shape

((74218, 30, 1), (74218, 30, 1))

In [10]:
dataset = tf.data.Dataset.from_tensor_slices((X, Y)).shuffle(1024).batch(1024)

for x, y in dataset:
    print(x.numpy().shape, y.numpy().shape)
    break
    
x, y = next(iter(dataset))
print(x.numpy().shape, y.numpy().shape)

(1024, 30, 1) (1024, 30, 1)
(1024, 30, 1) (1024, 30, 1)


# Build Model

In [11]:
from typing import Tuple

In [12]:
class Encoder(tf.keras.Model):

    def __init__(self, n_input_feature: int, enc_units: int) -> None:
        super(Encoder, self).__init__()
        self.enc_units = enc_units
        self.rnn = tf.keras.layers.GRU(self.enc_units, return_sequences=True, return_state=True)

    def call(self, x: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
        """
        batch_size=B, n_input_timestamp=T_in
        n_input_feature=F_in, enc_units=H_enc
        input:
            x.shape : (B, T_in, F_in)
        return:
            output.shape : (B, T_in, H_enc)
            state.shape : (B, H_enc)
        """
        output, state = self.rnn(x)
        return output, state
    
# try
#e = Encoder(6,100)
#enc_output, enc_hidden =  e(x)
#x.shape, enc_output.shape, enc_hidden.shape

In [13]:
class BahdanauAttention(tf.keras.Model):

    def __init__(self, units: int) -> None:
        super(BahdanauAttention, self).__init__()
        self.W1 = tf.keras.layers.Dense(units)
        self.W2 = tf.keras.layers.Dense(units)
        self.V = tf.keras.layers.Dense(1)

    def call(self, query: tf.Tensor, values: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
        """
        batch_size=B, n_input_timestamp=T_in
        enc_units=H_enc, dec_units=H_dec, units=U
        input:
            query.shape : (B, H_dec)
            values.shape : (B, T_in, H_enc)
        return:
            context_vector.shape : (B, H_enc)
            attention_weights: (B, T_in, 1)
        """
        hidden_with_time_axis = tf.expand_dims(query, 1)
        score = self.V(tf.nn.tanh(self.W1(values) + self.W2(hidden_with_time_axis)))
        attention_weights = tf.nn.softmax(score, axis=1)
        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)
        return context_vector, attention_weights
    
# try    
#att = BahdanauAttention(70)
#dec_hidden = enc_hidden[:, :90] #init
#context_vector, attention_weights = att(dec_hidden, enc_output)
#enc_hidden.shape, dec_hidden.shape, enc_output.shape, context_vector.shape, attention_weights.shape

In [14]:
class Decoder(tf.keras.Model):

    def __init__(self, n_output_feature: int, dec_units: int) -> None:
        super(Decoder, self).__init__()
        self.dec_units = dec_units
        self.rnn = tf.keras.layers.GRU(self.dec_units, return_sequences=True, return_state=True)
        self.fc = tf.keras.layers.Dense(n_output_feature)
        self.attention = BahdanauAttention(self.dec_units)

    def call(self, x: tf.Tensor, hidden: tf.Tensor, enc_output: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
        """
        batch_size=B, n_output_timestamp=T_out:=1, n_input_timestamp=T_in
        n_output_feature=F_out, 
        enc_units=H_enc, dec_units=units=H_dec, 
        input:
            x.shape : (B, T_out=1, F_out)
            hidden.shape : (B, H_dec)
            enc_output.shape : (B, T_in, H_enc)
        return:
            x.shape : (B, T_out=1, F_out)
            state.shape : (B, H_dec)
            attention_weights.shape : (B, T_in, 1)
        """
        context_vector, attention_weights = self.attention(hidden, enc_output)
        x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)# (B, 1, H + F_out)
        output, state = self.rnn(x) # (B, T_out=1, H), (B, H)
        #output = tf.reshape(output, (-1, output.shape[2])) # (B, H)
        x = self.fc(output)# (B, T_out=1, F_out)
        return x, state, attention_weights
# try    
#d = Decoder(1,80)
#dec_input = y[:,0:1,:]
#predictions, dec_hidden, _ = d(dec_input, dec_hidden, enc_output)
#predictions.shape, dec_hidden.shape

In [15]:
class Seq2Seq(tf.keras.Model):
    def __init__(self, 
                 n_input_timestamp: int, 
                 n_input_feature: int, 
                 enc_units: int,
                 n_output_timestamp: int, 
                 n_output_feature: int, 
                 dec_units: int) -> None:
        super(Seq2Seq, self).__init__()
        assert(n_input_feature==n_output_feature)
        self.n_input_timestamp = n_input_timestamp
        self.n_output_timestamp = n_output_timestamp
        self.n_input_feature = n_input_feature
        self.n_output_feature = n_output_feature
        #self.fc_enc2dec = tf.keras.layers.Dense(n_output_feature) # enc2dec_hidden
        self.enc = Encoder(n_input_feature, enc_units)
        self.dec = Decoder(n_output_feature, dec_units)
        
    def call(self, source, target=None, test_mode=False):
        """
        input:
            source.shape : (B, T_in, F_in)
            target.shape : (B, T_out, F_out)
        return:
            pred_target : (B, T_out, F_out)
        """
        enc_output, enc_hidden =  self.enc(source)
        #dec_hidden = self.self.fc_enc2dec(enc_hidden) 
        dec_hidden = enc_hidden #init
        dec_input = source[:,-1:,:] #init
        pred_target = []
        if test_mode or (target is None) :   
            for t in range(self.n_output_timestamp):
                predictions, dec_hidden, _ = self.dec(dec_input, dec_hidden, enc_output)
                dec_input = predictions
                pred_target += [predictions]
        else:
            for t in range(self.n_output_timestamp):
                predictions, dec_hidden, _ = self.dec(dec_input, dec_hidden, enc_output)
                dec_input = target[:,t:t+1,:] #(B,1,F)  # using teacher forcing
                pred_target += [predictions]
   
        return tf.concat(pred_target, 1)        
# try                
#s2s = Seq2Seq(200,6,100,30,6,100)    
#tgt = s2s(x,y, test_mode=0)
#tgt.shape

In [16]:
@tf.function
def train_step(model:tf.keras.Model, src: tf.Tensor, tgt: tf.Tensor):
    loss = 0
    with tf.GradientTape() as tape:
        tgt_ = model(src, tgt, test_mode=0)
        loss = tf.reduce_mean(tf.abs(tgt-tgt_))
        
    variables = model.trainable_variables 
    gradients = tape.gradient(loss, variables)
    optimizer.apply_gradients(zip(gradients, variables))
    return loss    

In [17]:
model = Seq2Seq(window,X.shape[-1],100,n_output_timestamp,Y.shape[-1],100)  

In [18]:
# try run (without backward)
for x, y in dataset:
    tgt = model(x,y, test_mode=0)
    break

In [19]:
def train(model, epochs=1):
    for epoch in range(epochs):
        loss_ = []
        for bn, (x, y) in enumerate(dataset):
            loss = train_step(model, x, y)
            loss_.append(loss.numpy())
            if bn%5 ==0 : print(f"[{epoch}][{bn}]", loss.numpy())
        print(f"[{epoch}]", np.mean(loss_))

In [20]:
model.load_weights('model/model_seq2seq_epo8_0.00128.h5') 

# Predict Test & Save Out

In [21]:
# pred test
n_pred_timestamp = 30
data_ = np.array(list(data.flatten()) + [np.nan]*n_pred_timestamp)
for i in range(len(data)-window-n_output_timestamp, 
               len(data)-window-n_output_timestamp+n_pred_timestamp):
    X_test = data_[None, i:i+window, None]
    assert(np.isnan(X_test).sum()==0 )
    print(i, X_test.shape)
    y_test = model(X_test)
    print(i, y_test[0,:,0].shape)
    data_[-30:] = y_test[0,:,0]
    break


74218 (1, 30, 1)
74218 (30,)


In [22]:
pred = data_[-30:].reshape((-1,1))
pred_ = ss.inverse_transform(pred)
pred_.shape

(30, 1)

In [23]:
op_df = pd.read_csv('sample_submission.csv')
op_df['Close'] = pred_
op_df.to_csv('submit/my_submission_seq2seq.csv', index=False) # mse: 0.0007