In [2]:
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dropout, LSTM, Activation, Dense
from keras.optimizers import SGD, Adam
from keras.callbacks import EarlyStopping
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import numpy as np

In [5]:
data = pd.read_csv('bitcoin.csv', index_col=0).drop(['time_period_start', 'time_period_end', 'time_open', 'time_close'], axis=1)
data

Unnamed: 0,price_open,price_high,price_low,price_close,volume_traded,trades_count
0,7165.72,7165.72,7165.71,7165.71,0.021841,2
1,7168.30,7168.30,7168.30,7168.30,1.000000,2
2,7170.50,7170.50,7170.50,7170.50,0.002000,1
3,7169.20,7169.20,7169.20,7169.20,0.004000,2
4,7169.20,7169.20,7169.20,7169.20,0.002000,1
...,...,...,...,...,...,...
9995,7179.50,7179.50,7179.50,7179.50,0.013325,1
9996,7181.67,7181.67,7181.67,7181.67,0.013364,1
9997,7179.50,7179.50,7179.50,7179.50,0.001526,1
9998,7182.12,7182.12,7182.12,7182.12,0.013437,1


## Lookback Window

In order to capture the past as features, we create the below function. It takes as input some number of seconds to look back, defaulted to 60 seconds.

Question: Is this a good feature set? Can you find a better one?

In [13]:
def lookback(dataset, timesteps = 10):
    # this uses the shift method of pandas dataframes to shift all of the columns down one row
    # and then append to the original dataset
    data = dataset
    for i in range(1, timesteps):
        step_back = dataset.shift(i).reset_index()
        step_back.columns = ['index'] + [f'{column}_{i}' for column in dataset.columns if column != 'index']
        data = data.reset_index().merge(step_back, on='index', ).drop('index', axis=1)
        
    return data.dropna()
        
features = lookback(data)
features

Unnamed: 0,price_open,price_high,price_low,price_close,volume_traded,trades_count,price_open_1,price_high_1,price_low_1,price_close_1,...,price_low_8,price_close_8,volume_traded_8,trades_count_8,price_open_9,price_high_9,price_low_9,price_close_9,volume_traded_9,trades_count_9
9,7165.72,7165.72,7165.72,7165.72,0.002777,1,7165.72,7165.72,7165.72,7165.72,...,7168.30,7168.30,1.000000,2.0,7165.72,7165.72,7165.71,7165.71,0.021841,2.0
10,7165.72,7165.72,7164.90,7164.90,0.074787,2,7165.72,7165.72,7165.72,7165.72,...,7170.50,7170.50,0.002000,1.0,7168.30,7168.30,7168.30,7168.30,1.000000,2.0
11,7163.80,7163.80,7163.80,7163.80,0.300000,1,7165.72,7165.72,7164.90,7164.90,...,7169.20,7169.20,0.004000,2.0,7170.50,7170.50,7170.50,7170.50,0.002000,1.0
12,7165.72,7165.72,7165.72,7165.72,0.200297,3,7163.80,7163.80,7163.80,7163.80,...,7169.20,7169.20,0.002000,1.0,7169.20,7169.20,7169.20,7169.20,0.004000,2.0
13,7165.72,7170.79,7165.72,7165.72,0.036206,3,7165.72,7165.72,7165.72,7165.72,...,7165.72,7169.20,0.075433,3.0,7169.20,7169.20,7169.20,7169.20,0.002000,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,7179.50,7179.50,7179.50,7179.50,0.013325,1,7179.04,7179.04,7178.23,7178.23,...,7180.99,7180.99,0.006652,1.0,7180.99,7180.99,7180.99,7180.99,0.006652,1.0
9996,7181.67,7181.67,7181.67,7181.67,0.013364,1,7179.50,7179.50,7179.50,7179.50,...,7180.99,7180.99,0.136530,1.0,7180.99,7180.99,7180.99,7180.99,0.006652,1.0
9997,7179.50,7179.50,7179.50,7179.50,0.001526,1,7181.67,7181.67,7181.67,7181.67,...,7182.12,7182.12,0.014822,1.0,7180.99,7180.99,7180.99,7180.99,0.136530,1.0
9998,7182.12,7182.12,7182.12,7182.12,0.013437,1,7179.50,7179.50,7179.50,7179.50,...,7176.56,7176.56,0.000010,1.0,7182.12,7182.12,7182.12,7182.12,0.014822,1.0


In [14]:
features.columns

Index(['price_open', 'price_high', 'price_low', 'price_close', 'volume_traded',
       'trades_count', 'price_open_1', 'price_high_1', 'price_low_1',
       'price_close_1', 'volume_traded_1', 'trades_count_1', 'price_open_2',
       'price_high_2', 'price_low_2', 'price_close_2', 'volume_traded_2',
       'trades_count_2', 'price_open_3', 'price_high_3', 'price_low_3',
       'price_close_3', 'volume_traded_3', 'trades_count_3', 'price_open_4',
       'price_high_4', 'price_low_4', 'price_close_4', 'volume_traded_4',
       'trades_count_4', 'price_open_5', 'price_high_5', 'price_low_5',
       'price_close_5', 'volume_traded_5', 'trades_count_5', 'price_open_6',
       'price_high_6', 'price_low_6', 'price_close_6', 'volume_traded_6',
       'trades_count_6', 'price_open_7', 'price_high_7', 'price_low_7',
       'price_close_7', 'volume_traded_7', 'trades_count_7', 'price_open_8',
       'price_high_8', 'price_low_8', 'price_close_8', 'volume_traded_8',
       'trades_count_8', 'pric

In [15]:
### split data into features and target
target = features['price_high'].values
features = features.drop('price_high', axis=1).values
features

array([[7.1657200e+03, 7.1657200e+03, 7.1657200e+03, ..., 7.1657100e+03,
        2.1841180e-02, 2.0000000e+00],
       [7.1657200e+03, 7.1649000e+03, 7.1649000e+03, ..., 7.1683000e+03,
        1.0000000e+00, 2.0000000e+00],
       [7.1638000e+03, 7.1638000e+03, 7.1638000e+03, ..., 7.1705000e+03,
        2.0000000e-03, 1.0000000e+00],
       ...,
       [7.1795000e+03, 7.1795000e+03, 7.1795000e+03, ..., 7.1809900e+03,
        1.3652963e-01, 1.0000000e+00],
       [7.1821200e+03, 7.1821200e+03, 7.1821200e+03, ..., 7.1821200e+03,
        1.4822130e-02, 1.0000000e+00],
       [7.1765600e+03, 7.1765600e+03, 7.1765600e+03, ..., 7.1765600e+03,
        1.0000000e-05, 1.0000000e+00]])

In [16]:
model = Sequential()
model.add(Dense(32, input_dim=features.shape[1]))
model.add(Dense(16))
model.add(Dense(1, activation='relu'))

model.compile(
    loss='mse',
    optimizer=Adam(lr=0.01), # is this the best optimizer/learning rate?
    metrics=['mean_squared_error', 'mean_absolute_error'] # does accuracy make sense in this context?
)

## callbacks
early_stopping = EarlyStopping(
    monitor='val_loss',
    mode='auto',
    restore_best_weights=True
)


history = model.fit(
    features,
    target,
    validation_split=.3,
    epochs=20,
    verbose=1
)

  super().__init__(name, **kwargs)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [17]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 32)                1920      
                                                                 
 dense_4 (Dense)             (None, 16)                528       
                                                                 
 dense_5 (Dense)             (None, 1)                 17        
                                                                 
Total params: 2,465
Trainable params: 2,465
Non-trainable params: 0
_________________________________________________________________


Below we calculate predictions and root mean square error. Can we easily improve this RMSE?

In [18]:
## prediction
predictions = model.predict(features)
predictions



array([[0.],
       [0.],
       [0.],
       ...,
       [0.],
       [0.],
       [0.]], dtype=float32)

In [19]:
rmse = np.sqrt(np.mean(np.square((target.reshape(-1, 1) - predictions))))

In [20]:
rmse

7188.874310224613

## Save Model

The last thing we'll do is save the model for use in the API.

In [21]:
model.save('model.h5')