In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import os
#os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   # see issue #152
#os.environ["CUDA_VISIBLE_DEVICES"] = ""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential, save_model, load_model
from tensorflow.keras.layers import LSTM, Dropout, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from os import path, getcwd
import zipfile


#tf.config.threading.set_inter_op_parallelism_threads(2)
#tf.config.threading.set_intra_op_parallelism_threads(2)
# detect and init the TPU
# tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
# tf.config.experimental_connect_to_cluster(tpu)
# tf.tpu.experimental.initialize_tpu_system(tpu)

# instantiate a distribution strategy
# tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

epochs = 50
batch_size = 128 # 16 * tpu_strategy.num_replicas_in_sync
window_size = int(batch_size * 2)  # must be a multiple of batch_size
validation_size = 8192 * batch_size  # must be a multiple of batch_size
test_size = 8192 * batch_size  # must be a multiple of batch_size
ma_periods = 14  # Simple Moving Average periods length
ticker = 'btcusd'  # Your data file name without extention
start_date = '2014-09-17'  
seed = 42  # An arbitrary value to make sure your seed is the same
model_path = f"/kaggle/working/models/{ticker}-{batch_size}-{window_size}-{ma_periods}"
model_lite_path = f"/kaggle/working/models/{ticker}-{batch_size}-{window_size}-{ma_periods}/model.tflite"
scaler_path = f'/kaggle/working/scalers/{ticker}-{batch_size}-{window_size}-{ma_periods}.bin'
full_time_series_path = "/kaggle/input/bitcoin-historical-data/bitstampUSD_1-min_data_2012-01-01_to_2020-09-14.csv"
train_time_series_path = f'/kaggle/input/btcusdt-datasets/{ticker}-train.csv'
validate_time_series_path = f'/kaggle/input/btcusdt-datasets/{ticker}-validate.csv'
test_time_series_path = f'/kaggle/input/btcusdt-datasets/{ticker}-test.csv'

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
def get_train(values, window_size):
    X, y = [], []
    len_values = len(values)
    for i in range(window_size, len_values):
        X.append(values[i-window_size:i])
        y.append(values[i])
    X, y = np.asarray(X), np.asarray(y)
    X = np.reshape(X, (X.shape[0], X.shape[1], 1))
    print(f"X {X.shape}, y {y.shape}")
    return X, y

def get_val(values, window_size):
    X = []
    len_values = len(values)
    for i in range(window_size, len_values):
        X.append(values[i-window_size:i])
    X = np.asarray(X)
    X = np.reshape(X, (X.shape[0], X.shape[1], 1))
    y = values[-X.shape[0]:]
    print(f"X {X.shape}, y {y.shape}")
    return X, y

def zipdir(path, ziph):
    # ziph is zipfile handle
    for root, dirs, files in os.walk(path):
        for file in files:
            if not file.endswith("zip"):
                ziph.write(os.path.join(root, file))


os.makedirs("/kaggle/working/models", exist_ok=True)
os.makedirs("/kaggle/working/scalers", exist_ok=True)

Begin training

In [None]:

np.random.seed(seed)
tf.random.set_seed(seed)

In [None]:
df = pd.read_csv(train_time_series_path, dayfirst=True, 
    index_col=['Timestamp'], parse_dates=['Timestamp'])
df

In [None]:
fig = plt.figure(figsize=(24, 18))
ax1, ax2, ax3 = fig.subplots(3)
ax1.set_title('HLAvg')
ax1.set(xlabel='Timestamp', ylabel='High-Low Average')
ax1.plot(df['HLAvg'])
ax2.set_title('MA')
ax2.set(xlabel='Timestamp', ylabel='MA')
ax2.plot(df['MA'])
ax3.set_title('Returns')
ax3.set(xlabel='Timestamp', ylabel='Returns')
ax3.plot(df['Returns'])

In [None]:
scaler = MinMaxScaler()
train_values = scaler.fit_transform(df[['Returns']].values)

In [None]:
fig = plt.figure(figsize=(24, 8))
ax1 = fig.subplots(1)
ax1.set_title('Returns MinMax Scaled')
ax1.set(xlabel='Sample', ylabel='Scaled Returns')
ax1.plot(train_values)

In [None]:
X, y = get_train(train_values, window_size)


In [None]:
df_val = pd.read_csv(validate_time_series_path, dayfirst = True, usecols=['Timestamp','Returns'],
    index_col=['Timestamp'], parse_dates=['Timestamp'])
df_val['Scaled'] = scaler.transform(df_val[['Returns']].values)
X_val, y_val = get_val(df_val['Scaled'].values, window_size)

In [None]:
# instantiating the model in the strategy scope creates the model on the TPU
# with tpu_strategy.scope():
if path.exists(model_path):
    print("using existing model")
    model = load_model(model_path)
else:
    print("using a new model")
    model = Sequential()

model.add(LSTM(76, input_shape=(X.shape[1], 1), return_sequences = False))
model.add(Dropout(0.2))
model.add(Dense(1))
optimizer = tf.keras.optimizers.Adam()
model.compile(loss="mse", optimizer=optimizer)
print(model.summary())

In [None]:
history = model.fit(X, y, validation_data=(X_val, y_val), epochs = epochs, batch_size = batch_size, shuffle=False, verbose = 1)

In [None]:
run_model = tf.function(lambda x: model(x))
concrete_func = run_model.get_concrete_function(
    tf.TensorSpec([batch_size, 100, 1], model.inputs[0].dtype))
model.save(model_path, save_format="tf", signatures=concrete_func)
joblib.dump(scaler, scaler_path) 

In [None]:
fig = plt.figure(figsize=(12, 8))
ax1 = fig.subplots(1)
ax1.set_title('Model Loss')
ax1.set(xlabel='Epoch', ylabel='Loss')
ax1.plot(history.history['loss'][7:], label='Train Loss')
ax1.plot(history.history['val_loss'][7:], label='Val Loss')
ax1.legend()

In [None]:
os.chdir('/kaggle/working')
converter = tf.lite.TFLiteConverter.from_saved_model(model_path)
tfmodel = converter.convert()
with open('model.tflite', 'wb') as f:
    f.write(tfmodel)

In [None]:
zip_path = path.join(getcwd(), 'results.zip')
if path.exists(zip_path):
    os.remove(zip_path)
    
zipf = zipfile.ZipFile('results.zip', 'w', zipfile.ZIP_DEFLATED)
zipdir('/kaggle/working', zipf)
zipf.close()