In [64]:
from keras.models import Sequential
from keras.layers import Dropout, LSTM, Activation, Dense
from keras.optimizers import SGD, Adam
from keras.callbacks import EarlyStopping
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import numpy as np

In [65]:
data = pd.read_csv('bitcoin.csv').drop(['time_period_start', 'time_period_end', 'time_open', 'time_close'], axis=1)
data

Unnamed: 0,price_open,price_high,price_low,price_close,volume_traded,trades_count
0,7165.72,7165.72,7165.71,7165.71,0.021841,2
1,7168.30,7168.30,7168.30,7168.30,1.000000,2
2,7170.50,7170.50,7170.50,7170.50,0.002000,1
3,7169.20,7169.20,7169.20,7169.20,0.004000,2
4,7169.20,7169.20,7169.20,7169.20,0.002000,1
...,...,...,...,...,...,...
9995,7179.50,7179.50,7179.50,7179.50,0.013325,1
9996,7181.67,7181.67,7181.67,7181.67,0.013364,1
9997,7179.50,7179.50,7179.50,7179.50,0.001526,1
9998,7182.12,7182.12,7182.12,7182.12,0.013437,1


## Lookback Window

In order to capture the past as features, we create the below function. It takes as input some number of seconds to look back, defaulted to 60 seconds.

Question: Is this a good feature set? Can you find a better one?

In [66]:
def lookback(dataset, timesteps = 60):
    # this uses the shift method of pandas dataframes to shift all of the columns down one row
    # and then append to the original dataset
    data = dataset
    for i in range(1, timesteps):
        step_back = dataset.shift(i).reset_index()
        step_back.columns = ['index'] + [f'{column}_-{i}' for column in dataset.columns if column != 'index']
        data = data.reset_index().merge(step_back, on='index', ).drop('index', axis=1)
        
    return data.dropna()
        
features = lookback(data)

In [67]:
### split data into features and target
target = features['price_high'].values
features = features.drop('price_high', axis=1).values

In [68]:
model = Sequential()
model.add(Dense(32, input_dim=features.shape[1]))
model.add(Dense(16))
model.add(Dense(1, activation='relu'))

model.compile(
    loss='mse',
    optimizer=Adam(lr=0.01), # is this the best optimizer/learning rate?
    metrics=['mean_squared_error', 'mean_absolute_error'] # does accuracy make sense in this context?
)

## callbacks
early_stopping = EarlyStopping(
    monitor='val_loss',
    mode='auto',
    restore_best_weights=True
)


history = model.fit(
    features,
    target,
    validation_split=.3,
    epochs=20,
    verbose=1
)

Train on 6958 samples, validate on 2983 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [69]:
model.summary()

Model: "sequential_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_37 (Dense)             (None, 32)                11520     
_________________________________________________________________
dense_38 (Dense)             (None, 16)                528       
_________________________________________________________________
dense_39 (Dense)             (None, 1)                 17        
Total params: 12,065
Trainable params: 12,065
Non-trainable params: 0
_________________________________________________________________


Below we calculate predictions and root mean square error. Can we easily improve this RMSE?

In [70]:
## prediction
predictions = model.predict(features)

In [71]:
rmse = np.sqrt(np.mean(np.square((target.reshape(-1, 1) - predictions))))

In [72]:
rmse

7191.088757578544

## Save Model

The last thing we'll do is save the model for use in the API.

In [73]:
model.save('model.h5')