In [3]:
import json
import requests
from keras.models import Sequential
from keras.layers import Input, Activation, Dense, Dropout, LSTM, Bidirectional
from keras.layers.recurrent import LSTM
from keras.models import Model
from keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.metrics import mean_absolute_error
import os
%matplotlib inline

In [21]:
path_data = os.path.join(os.getcwd(),"data")

coin_dataframes = {}

def convert_comma_int(field):
    try:
        return int(field.replace(',', ''))
    except ValueError:
        return None
    
for fn in os.listdir(path_data):
    #if "bitcoin_cache" in fn:
     #   continue
    if fn.startswith("coin_"):
        coin_name = fn.split("_")[1]
        df = pd.read_csv(os.path.join(path_data, fn), parse_dates=["Date"])
        #df['Marketcap'] = df['Marketcap'].map(convert_comma_int)
        coin_dataframes[coin_name] = df.sort_values('Date')

In [22]:
coin_dataframes.keys()

dict_keys(['Aave.csv', 'BinanceCoin.csv', 'Bitcoin.csv', 'Cardano.csv', 'ChainLink.csv', 'Cosmos.csv', 'CryptocomCoin.csv', 'Dogecoin.csv', 'EOS.csv', 'Ethereum.csv', 'Iota.csv', 'Litecoin.csv', 'Monero.csv', 'NEM.csv', 'Polkadot.csv', 'Solana.csv', 'Stellar.csv', 'Tether.csv', 'Tron.csv', 'Uniswap.csv', 'USDCoin.csv', 'WrappedBitcoin.csv', 'XRP.csv'])

In [27]:
coin_dataframes['BinanceCoin.csv'].head()

Unnamed: 0,SNo,Name,Symbol,Date,High,Low,Open,Close,Volume,Marketcap
0,1,Binance Coin,BNB,2017-07-26 23:59:59,0.109013,0.099266,0.105893,0.105138,200395.0,10513800.0
1,2,Binance Coin,BNB,2017-07-27 23:59:59,0.108479,0.100888,0.105108,0.107737,344499.0,10773700.0
2,3,Binance Coin,BNB,2017-07-28 23:59:59,0.109019,0.101473,0.107632,0.104067,342568.0,10406700.0
3,4,Binance Coin,BNB,2017-07-29 23:59:59,0.111264,0.101108,0.104782,0.107811,340218.0,10781100.0
4,5,Binance Coin,BNB,2017-07-30 23:59:59,0.108138,0.103162,0.107935,0.106414,224261.0,10641400.0


# Compute relative growth and other relative values

We add these values as new columns to the dataframes:

In [29]:
def add_relative_columns(df):
    day_diff = df['Close'] - df['Open']
    df['rel_close'] = day_diff / df['Open']
    df['high_low_ratio'] = df['High'] / df['Low']
    df['rel_high'] = df['High'] / df['Close']
    df['rel_low'] = df['Low'] / df['Close']
    
    
for df in coin_dataframes.values():
    add_relative_columns(df)


## Create historical training data

The history tables will have values for the last 10 days for each day.

In [30]:
def create_history_frames(coin_dataframes):
    history_frames = {}
    for coin_name, df in coin_dataframes.items():
        history_frames[coin_name], x_cols = create_history_frame(df)
    return history_frames, x_cols
        

def create_history_frame(df):
    feature_cols = ['rel_close', 'rel_high', 'rel_low', 'high_low_ratio']
    y_col = ['rel_close']
    x_cols = []
    days = 10
    history = df[['Date'] + y_col].copy()
    for n in range(1, days+1):
        for feat_col in feature_cols:
            colname = '{}_{}'.format(feat_col, n)
            history[colname] = df[feat_col].shift(n)
            x_cols.append(colname)
    history = history[days:]
    return history, x_cols

y_col = 'rel_close'
coin_history, x_cols = create_history_frames(coin_dataframes)

# Define model

We will train a separate model for each currency. The models' architecture  identical.

In [35]:
def create_model():
    input_layer = Input(batch_shape=(None, len(x_cols), 1))
    layer = Bidirectional(LSTM(128, return_sequences=True))(input_layer)
    layer = Bidirectional(LSTM(128))(layer)
    out = Dense(1, activation="sigmoid")(layer)
    m = Model(inputs=input_layer, outputs=out)
    m.compile("rmsprop", loss='mean_squared_error')
    return m

def create_train_test_mtx(history):
    X = history[x_cols].to_numpy()
    y = history[y_col].to_numpy()
    X = X.reshape(X.shape[0], X.shape[1], 1)
    rand_mtx = np.random.permutation(X.shape[0])
    train_split = int(X.shape[0] * 0.9)
    train_indices = rand_mtx[:train_split]
    test_indices = rand_mtx[train_split:]

    X_train = X[train_indices]
    X_test = X[test_indices]
    y_train = y[train_indices]
    y_test = y[test_indices]
    
    return X_train, X_test, y_train, y_test

def train_model(model, X, y):
    ea = EarlyStopping(monitor='val_loss', patience=2)
    val_loss = model.fit(X, y, epochs=500, batch_size=64, callbacks=[ea], verbose=1, validation_split=.1)
    return val_loss

## Train a model for each currency

We save RMSE as well as the predictions on each test set.

In [36]:
rmse = {}
pred = {}
test = {}

for coin_name, history in coin_history.items():
    model = create_model()
    X_train, X_test, y_train, y_test = create_train_test_mtx(history)
    train_model(model, X_train, y_train)
    test[coin_name] = y_test
    
    # run prediction on test set
    pred[coin_name] = model.predict(X_test)
    # compute test loss
    rmse[coin_name] = np.sqrt(np.mean((pred[coin_name] - y_test)**2))
    print(coin_name, rmse[coin_name])

Epoch 1/500
Epoch 2/500
Epoch 3/500
Aave.csv 0.09636017471429305
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
BinanceCoin.csv 0.06727292641398559
Epoch 1/500


KeyboardInterrupt: 

## Do our models predict the signum of the value change correctly?

In [None]:
pred_sign = {coin_name: np.sign(pred[coin_name]) * np.sign(test[coin_name]) for coin_name in pred.keys()}
for coin, val in sorted(pred_sign.items()):
    cnt = np.unique(pred_sign[coin], return_counts=True)[1]
    print("[{}] pos/neg change guessed correctly: {}, incorrectly: {}, correct%: {}".format(
        coin, cnt[0], cnt[1], cnt[0]/ (cnt[0]+cnt[1]) * 100))

## Did we guess anything useful at all?

In [None]:
pred_sign = {coin_name: np.sign(pred[coin_name]) for coin_name in pred.keys()}
for coin, val in sorted(pred_sign.items()):
    e, cnt = np.unique(val, return_counts=True)
    print("[{}] guesses: {}".format(coin, dict(zip(e, cnt))))