# ML to predict Bitcoin prices

+ Data from: https://coinmarketcap.com/api/
+ Adapted from: https://dashee87.github.io/data%20science/deep%20learning/python/another-keras-tutorial-for-neural-network-beginners/
+ Adapted from: https://dashee87.github.io/deep%20learning/python/predicting-cryptocurrency-prices-with-deep-learning/
+ Adapted from: https://github.com/dashee87/blogScripts/blob/master/Jupyter/2017-11-20-predicting-cryptocurrency-prices-with-deep-learning.ipynb

## Acquire and prepare data

In [1]:
import pandas as pd
import time
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
import numpy as np

# Get bitcoin market info: "Date", "Open", "High", "Low", "Close", "Volume", "Market Cap". Starting from 01/01/2016.
bitcoin_market_info = pd.read_html("https://coinmarketcap.com/currencies/bitcoin/historical-data/?start=20160101&end=20180419")[0]#+time.strftime("%Y%m%d"))[0]

# Convert the date string to the datetime format.
bitcoin_market_info = bitcoin_market_info.assign(Date=pd.to_datetime(bitcoin_market_info['Date']))

# Convert"Volume" to an integer.
bitcoin_market_info['Volume'] = bitcoin_market_info['Volume'].astype('int64')

# Look at the first few rows.
bitcoin_market_info.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Market Cap
0,2018-04-19,8159.27,8298.69,8138.78,8294.31,7063210000,138591000000
1,2018-04-18,7944.43,8197.8,7886.01,8163.42,6529910000,134926000000
2,2018-04-17,8071.66,8285.96,7881.72,7902.09,6900880000,137070000000
3,2018-04-16,8337.57,8371.15,7925.73,8058.67,5631310000,141571000000
4,2018-04-15,7999.33,8338.42,7999.33,8329.11,5244480000,135812000000


In [2]:
# Rename columns.
bitcoin_market_info.columns =[bitcoin_market_info.columns[0]]+['bt_'+i for i in bitcoin_market_info.columns[1:]]

In [3]:
model_data = bitcoin_market_info

# Create "Supply" column, to display the circulating supply - i.e. the amount of bitcoins that currently exist. 
# This is important as the supply or exclusivity should directly affect prices or future prices. 
for coins in ['bt_']: 
    kwargs = { coins+'Supply': lambda x: (x[coins+'Market Cap'])/(x[coins+'Close']) }
    bitcoin_market_info = bitcoin_market_info.assign(**kwargs)
bitcoin_market_info.head()

Unnamed: 0,Date,bt_Open,bt_High,bt_Low,bt_Close,bt_Volume,bt_Market Cap,bt_Supply
0,2018-04-19,8159.27,8298.69,8138.78,8294.31,7063210000,138591000000,16709170.0
1,2018-04-18,7944.43,8197.8,7886.01,8163.42,6529910000,134926000000,16528120.0
2,2018-04-17,8071.66,8285.96,7881.72,7902.09,6900880000,137070000000,17346040.0
3,2018-04-16,8337.57,8371.15,7925.73,8058.67,5631310000,141571000000,17567540.0
4,2018-04-15,7999.33,8338.42,7999.33,8329.11,5244480000,135812000000,16305700.0


In [4]:
# Only keep columns "Open", "High", "Low", "Close", "Supply".
model_data = bitcoin_market_info[['Date']+[coin+metric for coin in ['bt_'] 
                                   for metric in ['Open', 'High', 'Low', 'Close', 'Volume', 'Supply']]]

# Reverse the data frame so that the row represent the right time frame.
model_data = model_data.sort_values(by='Date')
model_data.head()

Unnamed: 0,Date,bt_Open,bt_High,bt_Low,bt_Close,bt_Volume,bt_Supply
839,2016-01-01,430.72,436.25,427.51,434.33,36278900,14904630.0
838,2016-01-02,434.62,436.06,431.87,433.44,30096600,15073900.0
837,2016-01-03,433.58,433.74,424.71,430.01,39633800,15161280.0
836,2016-01-04,430.06,434.52,429.08,433.09,38477500,14934960.0
835,2016-01-05,433.07,434.18,429.68,431.96,34522600,15083290.0


In [5]:
# Split the data into training and test sets. 
split_date = '2017-06-01'
#split_date = '2018-04-18'
#training_set, test_set = model_data[model_data['Date']<split_date], model_data[model_data['Date']>=split_date]
training_set = model_data
test_set = model_data[model_data['Date']>=split_date]

# Remove "Date" columns. We are now finished with them.
training_set = training_set.drop('Date', 1)
test_set = test_set.drop('Date', 1)

In [6]:
# We only want the window length to be one day.
window_len = 1

In [7]:
# Prepare training inputs.
LSTM_training_inputs = []
for i in range(len(training_set)-window_len):
    temp_set = training_set[i:(i+window_len)].copy()
    LSTM_training_inputs.append(temp_set)

In [8]:
# Prepare test inputs.
LSTM_test_inputs = []
for i in range(len(test_set)-window_len):
    temp_set = test_set[i:(i+window_len)].copy()
    LSTM_test_inputs.append(temp_set)

In [9]:
# Convert to numpy arrays.
LSTM_training_inputs = [np.array(LSTM_training_input) for LSTM_training_input in LSTM_training_inputs]
LSTM_training_inputs = np.array(LSTM_training_inputs)

LSTM_test_inputs = [np.array(LSTM_test_inputs) for LSTM_test_inputs in LSTM_test_inputs]
LSTM_test_inputs = np.array(LSTM_test_inputs)

## Build model

In [10]:
from keras.models import Sequential
from keras.layers import Activation, Dense
from keras.layers import LSTM
from keras.layers import Dropout

def build_model(inputs, output_size, neurons, activ_func="linear",
                dropout=0.25, loss="mae", optimizer="adam"):
    model = Sequential()
    
    # Use LSTM(Long Short Term Memory) an efficient, gradient-based Model introduced by Hochreiter & Schmidhuber in 1997
    # [ Ref: http://www.bioinf.jku.at/publications/older/2604.pdf ]. 
    
    # Recurrent Neural Networks attempt to address memory issues in traditional neural networks by adding loops within them, 
    # allowing information to persist [ Ref: http://colah.github.io/posts/2015-08-Understanding-LSTMs/ ]. A resonable analogy, 
    # is to envision recurrent neural network as numerous copies of the same network, each passing a message to a parent. 
    # This chain-like nature resembles the behaviour of sequences and lists, making them naturally suited to the 
    # architecture of a neural network. Unfortunately, RNNs are burdened with the problem of hadling long-term dependencies. 
    # As the neural network grows, gaps between past relevant data grows, and the RNN model becomes unable to learn to 
    # connect the information.
    
    # In theory, RNNs are absolutely capable of handling this issue. In fact, some are. Long Short Term Memory is an extension 
    # of or type of RNN that is capable. LSTM is very efficient on a large variety of problems, including timeline data 
    # [ Ref: https://dashee87.github.io/deep%20learning/python/predicting-cryptocurrency-prices-with-deep-learning/ ], 
    # and are now widely used. LSTMs have another loop learning what data to forget and what data to remember. LSTM models 
    # still have this chain like structure, but with four different layers communicating in a certain way.
    
    model.add(LSTM(neurons, return_sequences=True, input_shape=(inputs.shape[1], inputs.shape[2])))
    # model.add(LSTM(neurons, return_sequences=True))
    model.add(LSTM(neurons))
    
    model.add(Dropout(dropout))
    model.add(Dense(units=output_size))
    model.add(Activation(activ_func))

    model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])
    return model

Using TensorFlow backend.


## Train model

In [11]:
# Random seed for reproducibility.
np.random.seed(202)

# Attempt to predict the closing price for the next day.
# Change this value if you want to make longer/shorter prediction, i.e. number of days.
pred_range = 1

# Initialise model architecture.
bt_model = build_model(LSTM_training_inputs, output_size=pred_range, neurons = 20)

# Model output is next price.
LSTM_training_outputs = []

for i in range(window_len, len(training_set['bt_Close'])-pred_range):
    LSTM_training_outputs.append((training_set['bt_Close'][i:i+pred_range].values/
                                  training_set['bt_Close'].values[i-window_len])-1)
LSTM_training_outputs = np.array(LSTM_training_outputs)

# Train model with training set.
bt_history = bt_model.fit(LSTM_training_inputs[:-pred_range], LSTM_training_outputs, 
                            epochs=25, batch_size=1, verbose=2, shuffle=True)

Epoch 1/25
 - 4s - loss: 0.0428 - acc: 0.0024
Epoch 2/25
 - 2s - loss: 0.0341 - acc: 0.0024
Epoch 3/25
 - 2s - loss: 0.0308 - acc: 0.0024
Epoch 4/25
 - 2s - loss: 0.0299 - acc: 0.0024
Epoch 5/25
 - 2s - loss: 0.0289 - acc: 0.0024
Epoch 6/25
 - 2s - loss: 0.0283 - acc: 0.0024
Epoch 7/25
 - 2s - loss: 0.0281 - acc: 0.0024
Epoch 8/25
 - 3s - loss: 0.0279 - acc: 0.0024
Epoch 9/25
 - 2s - loss: 0.0278 - acc: 0.0024
Epoch 10/25
 - 2s - loss: 0.0278 - acc: 0.0024
Epoch 11/25
 - 2s - loss: 0.0279 - acc: 0.0024
Epoch 12/25
 - 2s - loss: 0.0278 - acc: 0.0024
Epoch 13/25
 - 2s - loss: 0.0276 - acc: 0.0024
Epoch 14/25
 - 2s - loss: 0.0276 - acc: 0.0024
Epoch 15/25
 - 2s - loss: 0.0277 - acc: 0.0024
Epoch 16/25
 - 2s - loss: 0.0276 - acc: 0.0024
Epoch 17/25
 - 2s - loss: 0.0277 - acc: 0.0024
Epoch 18/25
 - 2s - loss: 0.0276 - acc: 0.0024
Epoch 19/25
 - 2s - loss: 0.0275 - acc: 0.0024
Epoch 20/25
 - 2s - loss: 0.0277 - acc: 0.0024
Epoch 21/25
 - 2s - loss: 0.0277 - acc: 0.0024
Epoch 22/25
 - 2s - lo

## Test Model

In [12]:
bt_pred_prices = ((bt_model.predict(LSTM_test_inputs)[:-pred_range][::pred_range]+1)*\
                   test_set['bt_Close'].values[:-(window_len + pred_range)][::pred_range].reshape(int(np.ceil((len(LSTM_test_inputs)-pred_range)/float(pred_range))),1))

In [13]:
bt_pred_prices

array([[  2409.34821811],
       [  2490.06740709],
       [  2516.88374854],
       [  2513.34159   ],
       [  2688.44829722],
       [  2864.94585199],
       [  2733.82594963],
       [  2807.33074227],
       [  2825.53183372],
       [  2949.50738243],
       [  2959.91372389],
       [  2661.25172406],
       [  2718.67671793],
       [  2507.89827293],
       [  2466.08279125],
       [  2520.09570585],
       [  2657.49943748],
       [  2549.84383388],
       [  2591.17902288],
       [  2723.44962647],
       [  2690.73969356],
       [  2707.05963867],
       [  2746.58372401],
       [  2610.31068141],
       [  2590.98890703],
       [  2479.96124856],
       [  2554.00637047],
       [  2576.35999241],
       [  2540.86836438],
       [  2482.35270587],
       [  2436.03448029],
       [  2507.99833391],
       [  2565.62344973],
       [  2603.22636434],
       [  2603.57657776],
       [  2610.15058385],
       [  2520.19576683],
       [  2572.90788875],
       [  25

## Acquire and prepare yesterday's data

In [14]:
# Get market for yesterday.
bitcoin_market_new = pd.read_html("https://coinmarketcap.com/currencies/bitcoin/historical-data/?start=20180420&end=20180420")[0]

# Convert the date string to the datetime format.
bitcoin_market_new = bitcoin_market_new.assign(Date=pd.to_datetime(bitcoin_market_info['Date']))

# Convert "Volume" to an integer.
bitcoin_market_new['Volume'] = bitcoin_market_new['Volume'].astype('int64')

# Rename columns.
bitcoin_market_new.columns =[bitcoin_market_new.columns[0]]+['bt_'+i for i in bitcoin_market_new.columns[1:]]

for coins in ['bt_']: 
    kwargs = { coins+'Supply': lambda x: (x[coins+'Market Cap'])/(x[coins+'Close']) }
    bitcoin_market_new = bitcoin_market_new.assign(**kwargs)
bitcoin_market_new.head()

# Only keep columns "Open", "High", "Low", "Close", "Supply".
m_data = bitcoin_market_new[['Date']+[coin+metric for coin in ['bt_'] 
                                   for metric in ['Open', 'High', 'Low', 'Close', 'Volume', 'Supply']]]

# Reverse the data frame so that the row represent the right time frame.
m_data = m_data.sort_values(by='Date')
m_data.head()

# Remove "Date" columns. We are now finished with them.
predict_set = m_data.drop('Date', 1)

window_len = 1

# Prepare test inputs.
LSTM_predict_inputs = []
for i in range(len(predict_set)):#-window_len):
    temp_set = predict_set[i:(i+window_len)].copy()
    LSTM_predict_inputs.append(temp_set)

# Convert to numpy arrays.
LSTM_predict_inputs = [np.array(LSTM_predict_inputs) for LSTM_predict_inputs in LSTM_predict_inputs]
LSTM_predict_inputs = np.array(LSTM_predict_inputs)

## Predict today's price

In [15]:
(bt_model.predict(LSTM_predict_inputs, batch_size=1)+1)*predict_set['bt_Close'].values.reshape(1,1)

array([[ 8851.22379364]])

## Evaluate prediction

In [16]:
# TODO: get data of day predicted and data of the prior day
# TODO: prepare inputs and outputs
# TODO: train/fit data