In [None]:

import pandas as pd
import json
import numpy as np
import time

with open('data.json') as f:
    data = json.load(f)

%matplotlib qt


In [None]:
# Reading to dataframe
# Creating additional columns
gen = ((k, v['assetA']['bid'], v['assetA']['ask'], v['assetB']['bid'], v['assetB']['ask']) for k, v in data.items())
data_list = [x for x in gen]
df = pd.DataFrame(data_list, columns=['timestamp', 'assetA_bid', 'assetA_ask', 'assetB_bid', 'assetB_ask'])
df['delta_t'] = df['timestamp'].astype('uint64').diff()
df['trade_freq'] = df.apply (lambda row: 1/row['delta_t'], axis=1)
df['trade_freq_avg'] = df['trade_freq'].rolling(window=15).mean()
df['ts'] = df['timestamp']
df = df.set_index('timestamp')

In [None]:
# Model description
# Inputs:
#       asset bid history
#       asset ask history
#       common timesamples for each bid-ask pair
#       2D array of [n x 3]
# Outputs:
#       asset bid future [k samples]
#       asset ask future [k samples]
#       2D array of [k x 2]


In [None]:

# training

length = 90
# getting the data
hist = [df_scaled.shift(-x).values[:length] for x in range(len(df)-length)]
target = [df_scaled.shift(-length).values[:len(df)-length]]
hist = np.array(hist)
target = np.array(target)


hist = hist[:, :, 0]
target = target[0, :, 0]
target = target.reshape(-1, 1)
hist = hist.reshape(hist.shape + (1,))
print(hist.shape)
print(target.shape)


# getting training and test array
df_len = len(df)
training_set_ratio = 75
X_train = hist[:df_len//100*training_set_ratio, :, :]
X_test = hist[df_len//100*training_set_ratio:, :, :]
X2_test = hist[df_len//4:df_len//4*3, :, :]
y_train = target[:df_len//100*training_set_ratio, :]
y_test = target[df_len//100*training_set_ratio:, :]
y2_test = target[df_len//4:df_len//4*3, :]

# building LSTM
try:
    model = models.load_model('saved_model.model')
except:
    model = tf.keras.Sequential()
    model.add(layers.LSTM(units=32, return_sequences=True,
                          input_shape=(90, 1), dropout=0.2))
    model.add(layers.LSTM(units=32, return_sequences=True, dropout=0.2))
    model.add(layers.LSTM(units=32, dropout=0.2))
    model.add(layers.Dense(units=1))
    model.summary()

    model.compile(optimizer='adam', loss='mean_squared_error')
    print(tf.__version__)
    history = model.fit(X_train, y_train, epochs=20, batch_size=32)

    model.save('saved_model.model')

    loss = history.history['loss']
    epoch_count = range(1, len(loss) + 1)
    plt.figure(figsize=(12, 8))
    plt.plot(epoch_count, loss, 'r--')
    plt.legend(['Training Loss'])
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.show()

