# ML to predict Bitcoin prices

+ Data from: https://coinmarketcap.com/api/
+ Adapted from: https://dashee87.github.io/data%20science/deep%20learning/python/another-keras-tutorial-for-neural-network-beginners/
+ Adapted from: https://dashee87.github.io/deep%20learning/python/predicting-cryptocurrency-prices-with-deep-learning/
+ Adapted from: https://github.com/dashee87/blogScripts/blob/master/Jupyter/2017-11-20-predicting-cryptocurrency-prices-with-deep-learning.ipynb

## Acquire and prepare data

In [1]:
import pandas as pd
import time
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
import numpy as np

# Get bitcoin market info: "Date", "Open", "High", "Low", "Close", "Volume", "Market Cap". Starting from 01/01/2016.
bitcoin_market_info = pd.read_html("https://coinmarketcap.com/currencies/bitcoin/historical-data/?start=20160101&end="+time.strftime("%Y%m%d"))[0]

# Convert the date string to the datetime format.
bitcoin_market_info = bitcoin_market_info.assign(Date=pd.to_datetime(bitcoin_market_info['Date']))

# Convert"Volume" to an integer.
bitcoin_market_info['Volume'] = bitcoin_market_info['Volume'].astype('int64')

# Look at the first few rows.
bitcoin_market_info.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Market Cap
0,2018-04-15,7999.33,8338.42,7999.33,8329.11,5244480000,135812000000
1,2018-04-14,7874.67,8140.71,7846.0,7986.24,5191430000,133682000000
2,2018-04-13,7901.09,8183.96,7758.93,7895.96,7764460000,134114000000
3,2018-04-12,6955.38,7899.23,6806.51,7889.25,8906250000,118048000000
4,2018-04-11,6843.47,6968.32,6817.59,6968.32,4641890000,116126000000


In [2]:
# Rename columns.
bitcoin_market_info.columns =[bitcoin_market_info.columns[0]]+['bt_'+i for i in bitcoin_market_info.columns[1:]]

In [3]:
# Create "close off high" and "volatility" columns.
# The "close off high" attribute calculates the gap between the closing price and price high.
# The "volatility" attribute calculated by the high and low price divided by the opening price.
for coins in ['bt_']: 
    kwargs = { coins+'close_off_high': lambda x: 2*(x[coins+'High']- x[coins+'Close'])/(x[coins+'High']-x[coins+'Low'])-1,
            coins+'volatility': lambda x: (x[coins+'High']- x[coins+'Low'])/(x[coins+'Open'])}
    bitcoin_market_info = bitcoin_market_info.assign(**kwargs)
bitcoin_market_info.head()

Unnamed: 0,Date,bt_Open,bt_High,bt_Low,bt_Close,bt_Volume,bt_Market Cap,bt_close_off_high,bt_volatility
0,2018-04-15,7999.33,8338.42,7999.33,8329.11,5244480000,135812000000,-0.945088,0.04239
1,2018-04-14,7874.67,8140.71,7846.0,7986.24,5191430000,133682000000,0.048285,0.037425
2,2018-04-13,7901.09,8183.96,7758.93,7895.96,7764460000,134114000000,0.355198,0.053794
3,2018-04-12,6955.38,7899.23,6806.51,7889.25,8906250000,118048000000,-0.981734,0.157104
4,2018-04-11,6843.47,6968.32,6817.59,6968.32,4641890000,116126000000,-1.0,0.022025


In [4]:
# Remove columns "Open", "High", "Low", "Market Cap".
model_data = bitcoin_market_info[['Date']+[coin+metric for coin in ['bt_'] 
                                   for metric in ['Close','Volume','close_off_high','volatility']]]
# Reverse the data frame so that the row represent the right time frame.
model_data = model_data.sort_values(by='Date')
model_data.head()

Unnamed: 0,Date,bt_Close,bt_Volume,bt_close_off_high,bt_volatility
835,2016-01-01,434.33,36278900,-0.560641,0.020292
834,2016-01-02,433.44,30096600,0.250597,0.009641
833,2016-01-03,430.01,39633800,-0.173865,0.020827
832,2016-01-04,433.09,38477500,-0.474265,0.012649
831,2016-01-05,431.96,34522600,-0.013333,0.010391


In [5]:
# Split the data into training and test sets. 
split_date = '2017-06-01'
training_set, test_set = model_data[model_data['Date']<split_date], model_data[model_data['Date']>=split_date]

# Remove "Date" columns. We are now finished with them.
training_set = training_set.drop('Date', 1)
test_set = test_set.drop('Date', 1)

In [6]:
# We only want the window length to be one day.
window_len = 1
norm_cols = [coin+metric for coin in ['bt_'] for metric in ['Close','Volume']]

In [7]:
# Prepare training inputs.
LSTM_training_inputs = []
for i in range(len(training_set)-window_len):
    temp_set = training_set[i:(i+window_len)].copy()
    for col in norm_cols:
        temp_set.loc[:, col] = temp_set[col]/temp_set[col].iloc[0] - 1
    LSTM_training_inputs.append(temp_set)

In [8]:
# Prepare test inputs.
LSTM_test_inputs = []
for i in range(len(test_set)-window_len):
    temp_set = test_set[i:(i+window_len)].copy()
    for col in norm_cols:
        temp_set.loc[:, col] = temp_set[col]/temp_set[col].iloc[0] - 1
    LSTM_test_inputs.append(temp_set)

In [9]:
# Convert to numpy arrays.
LSTM_training_inputs = [np.array(LSTM_training_input) for LSTM_training_input in LSTM_training_inputs]
LSTM_training_inputs = np.array(LSTM_training_inputs)

LSTM_test_inputs = [np.array(LSTM_test_inputs) for LSTM_test_inputs in LSTM_test_inputs]
LSTM_test_inputs = np.array(LSTM_test_inputs)

## Build model

In [10]:
from keras.models import Sequential
from keras.layers import Activation, Dense
from keras.layers import LSTM
from keras.layers import Dropout

def build_model(inputs, output_size, neurons, activ_func="linear",
                dropout=0.25, loss="mae", optimizer="adam"):
    model = Sequential()
    '''
    Use LSTM(Long Short Term Memory) an efficient, gradient-based Model introduced by Hochreiter & Schmidhuber in 1997
    [ Ref: http://www.bioinf.jku.at/publications/older/2604.pdf ]. 
    
    Recurrent Neural Networks attempt to address memory issues in traditional by adding loops within them, allowing 
    information to persist [ Ref: http://colah.github.io/posts/2015-08-Understanding-LSTMs/ ]. A resonable analogy, is 
    to envision recurrent neural network as numerous copies of the same network, each passing a message to a parent. 
    This chain-like nature resembles the behaviour of sequences and lists, making them naturally suited to the 
    architecture of a neural network. Unfortunately, RNNs are burdened with the problem of hadling long-term dependencies. 
    As the neural network grows, gaps between past relevant data grows, and the RNN model becomes unable to learn to 
    connect the information.
    
    In theory, RNNs are absolutely capable of handling this issue. In fact, some are. Long Short Term Memory is an extension 
    of or type of RNN that is capable. LSTM is very efficient on a large variety of problems, including timeline data 
    [ Ref: https://dashee87.github.io/deep%20learning/python/predicting-cryptocurrency-prices-with-deep-learning/ ], 
    and are now widely used. LSTMs have another loop learning what data to forget and what data to remember. LSTM models 
    still have this chain like structure, but with four different layers communicating in a certain way.
    ''' 
    model.add(LSTM(neurons, input_shape=(inputs.shape[1], inputs.shape[2])))
    model.add(Dropout(dropout))
    model.add(Dense(units output_size))
    model.add(Activation(activ_func))

    model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])
    return model

Using TensorFlow backend.


## Train model

In [11]:
# Random seed for reproducibility.
np.random.seed(202)

# Attempt to predict the closing price for the next day.
# Change this value if you want to make longer/shorter prediction, i.e. number of days.
pred_range = 1

# Initialise model architecture.
bt_model = build_model(LSTM_training_inputs, output_size=pred_range, neurons = 20)

# Model output is next price.
LSTM_training_outputs = []
for i in range(window_len, len(training_set['bt_Close'])-pred_range):
    LSTM_training_outputs.append((training_set['bt_Close'][i:i+pred_range].values/
                                  training_set['bt_Close'].values[i-window_len])-1)
LSTM_training_outputs = np.array(LSTM_training_outputs)

# Train model with training set.
bt_history = bt_model.fit(LSTM_training_inputs[:-pred_range], LSTM_training_outputs, 
                            epochs=50, batch_size=1, verbose=2, shuffle=True)

Epoch 1/50
 - 2s - loss: 0.0237 - acc: 0.0039
Epoch 2/50
 - 1s - loss: 0.0221 - acc: 0.0039
Epoch 3/50
 - 1s - loss: 0.0212 - acc: 0.0039
Epoch 4/50
 - 1s - loss: 0.0206 - acc: 0.0039
Epoch 5/50
 - 1s - loss: 0.0197 - acc: 0.0039
Epoch 6/50
 - 1s - loss: 0.0193 - acc: 0.0039
Epoch 7/50
 - 1s - loss: 0.0189 - acc: 0.0039
Epoch 8/50
 - 1s - loss: 0.0190 - acc: 0.0039
Epoch 9/50
 - 1s - loss: 0.0191 - acc: 0.0039
Epoch 10/50
 - 1s - loss: 0.0190 - acc: 0.0039
Epoch 11/50
 - 1s - loss: 0.0189 - acc: 0.0039
Epoch 12/50
 - 1s - loss: 0.0187 - acc: 0.0039
Epoch 13/50
 - 1s - loss: 0.0188 - acc: 0.0039
Epoch 14/50
 - 1s - loss: 0.0187 - acc: 0.0039
Epoch 15/50
 - 1s - loss: 0.0187 - acc: 0.0039
Epoch 16/50
 - 1s - loss: 0.0186 - acc: 0.0039
Epoch 17/50
 - 1s - loss: 0.0187 - acc: 0.0039
Epoch 18/50
 - 1s - loss: 0.0187 - acc: 0.0039
Epoch 19/50
 - 1s - loss: 0.0187 - acc: 0.0039
Epoch 20/50
 - 1s - loss: 0.0186 - acc: 0.0039
Epoch 21/50
 - 1s - loss: 0.0187 - acc: 0.0039
Epoch 22/50
 - 1s - lo

## Test Model

In [12]:
bt_pred_prices = ((bt_model.predict(LSTM_test_inputs)[:-pred_range][::pred_range]+1)*\
                   test_set['bt_Close'].values[:-(window_len + pred_range)][::pred_range].reshape(int(np.ceil((len(LSTM_test_inputs)-pred_range)/float(pred_range))),1))

In [13]:
bt_pred_prices

array([[  2420.87523726],
       [  2498.3180671 ],
       [  2530.39093067],
       [  2527.30436095],
       [  2698.81200848],
       [  2888.31603498],
       [  2755.76144165],
       [  2817.38315081],
       [  2841.72178347],
       [  2961.1617296 ],
       [  2972.06514549],
       [  2700.02694033],
       [  2733.09453651],
       [  2541.7100044 ],
       [  2482.81034155],
       [  2530.34635477],
       [  2669.84167233],
       [  2568.7428891 ],
       [  2605.74060831],
       [  2736.00925127],
       [  2707.00936456],
       [  2715.69194144],
       [  2755.07047006],
       [  2633.42174293],
       [  2607.7787235 ],
       [  2500.10886571],
       [  2564.98524812],
       [  2586.30327764],
       [  2552.57418069],
       [  2499.86874567],
       [  2452.04510087],
       [  2516.68936976],
       [  2575.63992042],
       [  2612.5491624 ],
       [  2612.19279611],
       [  2616.87674781],
       [  2568.35068901],
       [  2580.40371504],
       [  25

## Acquire and prepare yesterday's data

In [14]:
# Get market for yesterday.
bitcoin_market_new = pd.read_html("https://coinmarketcap.com/currencies/bitcoin/historical-data/?start=20180415&end=20180416")[0]

# Convert the date string to the datetime format.
bitcoin_market_new = bitcoin_market_new.assign(Date=pd.to_datetime(bitcoin_market_info['Date']))

# Convert "Volume" to an integer.
bitcoin_market_new['Volume'] = bitcoin_market_new['Volume'].astype('int64')

# Rename columns.
bitcoin_market_new.columns =[bitcoin_market_new.columns[0]]+['bt_'+i for i in bitcoin_market_new.columns[1:]]

# Create "close off high" and "volatility" columns.
# The "close off high" attribute calculates the gap between the closing price and price high.
# The "volatility" attribute calculated by the high and low price divided by the opening price.
for coins in ['bt_']: 
    kwargs = { coins+'close_off_high': lambda x: 2*(x[coins+'High']- x[coins+'Close'])/(x[coins+'High']-x[coins+'Low'])-1,
            coins+'volatility': lambda x: (x[coins+'High']- x[coins+'Low'])/(x[coins+'Open'])}
    bitcoin_market_new = bitcoin_market_new.assign(**kwargs)

# Remove columns "Open", "High", "Low", "Market Cap".
m_data = bitcoin_market_new[['Date']+[coin+metric for coin in ['bt_'] 
                                   for metric in ['Close','Volume','close_off_high','volatility']]]

# Reverse the data frame so that the row represent the right time frame.
m_data = m_data.sort_values(by='Date')
m_data.head()

# Remove "Date" columns. We are now finished with them.
predict_set = m_data.drop('Date', 1)

window_len = 1
norm_cols = [coin+metric for coin in ['bt_'] for metric in ['Close','Volume']]

# Prepare test inputs.
LSTM_predict_inputs = []
for i in range(len(predict_set)):#-window_len):
    temp_set = predict_set[i:(i+window_len)].copy()
    for col in norm_cols:
        temp_set.loc[:, col] = temp_set[col]/temp_set[col].iloc[0] - 1
    LSTM_predict_inputs.append(temp_set)

# Convert to numpy arrays.
LSTM_predict_inputs = [np.array(LSTM_predict_inputs) for LSTM_predict_inputs in LSTM_predict_inputs]
LSTM_predict_inputs = np.array(LSTM_predict_inputs)

## Predict today's price

In [15]:
((bt_model.predict(LSTM_predict_inputs)+1)*\
                   predict_set['bt_Close'].values.reshape(1,1))

array([[ 8361.02402598]])