In [10]:
import pandas as pd 
import os, glob
import matplotlib.pyplot as plt 
import numpy as np
from tensorflow.keras.preprocessing import timeseries_dataset_from_array
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

In [11]:
df = pd.concat((pd.read_csv(f, engine="python") for f in glob.glob(r"gemini_BTCUSD_20[12][90]*.csv")))
df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d %H:%M:%S")
df = df.drop(["Symbol", "Unix Timestamp"], axis=1)
df = df.resample("1T", on="Date").agg({"Open": "first", "Close": "last", "Volume": np.sum, "High": np.max, "Low": np.min})

# df = df.drop("Date", axis=1)
df["diff"] = np.log10(((df["Close"] - df["Open"])/df["Close"])*100.values)
df["inc"] = df["diff"].apply(lambda x: 1 if x > 0 else 0)
df.drop
df.head()
df.dropna()
df.dtypes

  import sys
  import sys


Open      float64
Close     float64
Volume    float64
High      float64
Low       float64
diff      float64
inc         int64
dtype: object

In [12]:
def split_dataset(dataset, split=2/3):
    train_size = int(len(dataset) * split)
    test_size = len(dataset) - train_size
    print(train_size, test_size)
    train = dataset[:train_size]
    test = dataset[train_size:]
    return train, test

In [13]:
df = df.drop(["Volume", "Low", "High", "Open", "Close"], axis=1)
train, test = split_dataset(df);
print(train.head())
y = train["diff"]
len(train)
train_generator = TimeseriesGenerator(train.to_numpy(), train["diff"], length=30, batch_size=128)
test_generator = TimeseriesGenerator(test.to_numpy(), test["diff"], length=30, batch_size=128)
print(len(train_generator))
print(train_generator[1][0][0])
print(train_generator[1][1][0])

749762 374881
                         diff  inc
Date                              
2019-01-01 00:00:00 -3.144369    0
2019-01-01 00:01:00       NaN    0
2019-01-01 00:02:00       NaN    0
2019-01-01 00:03:00 -3.334173    0
2019-01-01 00:04:00      -inf    0
5858
[[       -inf  0.        ]
 [       -inf  0.        ]
 [-3.83345406  0.        ]
 [       -inf  0.        ]
 [       -inf  0.        ]
 [       -inf  0.        ]
 [       -inf  0.        ]
 [       -inf  0.        ]
 [        nan  0.        ]
 [        nan  0.        ]
 [        nan  0.        ]
 [        nan  0.        ]
 [-3.26258803  0.        ]
 [        nan  0.        ]
 [       -inf  0.        ]
 [        nan  0.        ]
 [       -inf  0.        ]
 [       -inf  0.        ]
 [        nan  0.        ]
 [-2.14576418  0.        ]
 [        nan  0.        ]
 [       -inf  0.        ]
 [-2.96542272  0.        ]
 [       -inf  0.        ]
 [        nan  0.        ]
 [       -inf  0.        ]
 [        nan  0.        ]
 [-3.93

In [14]:
from tensorflow.keras import models
from tensorflow.keras.layers import Input, Dense, Bidirectional, LSTM, BatchNormalization, Dropout
from tensorflow.keras import regularizers
from tensorflow.keras.losses import BinaryCrossentropy, MeanSquaredError


model = models.Sequential()
model.add(Bidirectional(LSTM(units=128, return_sequences=True)))
model.add(LSTM(units=64))
model.add(Dropout(.1))
model.add(Dense(64, activation="relu"))
model.add(Dense(32, activation="linear"))
model.add(Dense(16, activation="linear"))
model.add(Dense(1, activation="linear"))
model.compile("adam", MeanSquaredError())
model.fit(train_generator, epochs=1)
model.summary()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional (Bidirectional (None, None, 256)         134144    
_________________________________________________________________
lstm_1 (LSTM)                (None, 64)                82176     
_________________________________________________________________
dropout (Dropout)            (None, 64)                0         
_________________________________________________________________
dense (Dense)                (None, 64)                4160      
_________________________________________________________________
dense_1 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_2 (Dense)              (None, 16)                528       
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 1

In [15]:
predictions = model.predict(test_generator)

In [16]:
# plt.hist(predictions)
predictions

array([[nan],
       [nan],
       [nan],
       ...,
       [nan],
       [nan],
       [nan]], dtype=float32)