In [64]:
import pandas as pd
import numpy as np

from keras import Input
from keras.engine import Model
from keras.layers.core import Dense, Activation
from keras.layers.recurrent import LSTM
from keras.layers import Concatenate, concatenate
from keras.callbacks import TensorBoard

In [65]:
# features is a list of strings of feature names 

def build_model(features, data_length, label_length):
    
    inputs_list = [] 
    for feature_name in features:
        inputs_list.append((Input(shape=(data_length,1), name=feature_name)))
    
    layers = [] 
    for i, input_name in enumerate(inputs_list): 
        layers.append(LSTM(64, return_sequences=False)(inputs_list[i]) )
        
    output = concatenate(layers) 
    output = Dense(label_length, activation='linear', name='weighted_average_output')(output)
    
    model = Model(
        inputs = inputs_list,
        outputs = [output]
    )
    
    model.compile(optimizer='rmsprop', loss='mse')
    
    return model    
        
data_length = 10
label_length = 10

In [114]:
from sklearn.preprocessing import MinMaxScaler

master_df = pd.read_csv('C:/Users/Shoya/surf/data/master_df.csv', encoding='latin1')
df = master_df[['Timestamp', 'Close', 'Volume_(BTC)', 'Volume_(Currency)', 'Date(UTC)', 'Bitcoin (Adj.Overlap)', 
               'Close Price % Change', 'Close Price % Change (Abs)', 'Is Spike']]

df = df.dropna()

df['Volume_BTC'] = df['Volume_(BTC)']
df['Bitcoin_Adj'] = df['Bitcoin (Adj.Overlap)']

cols = ['Volume_BTC','Bitcoin_Adj', 'Close' ]

# Stationalize Data by taking log differences
data_array = np.diff(np.log(df[cols]), axis=0)

# Min-Max Scale 
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data_array)

df_scaled = pd.DataFrame(data_scaled, columns=cols)
display(df_scaled.head())
# cols = ['Timestamp','Volume_BTC', 'Bitcoin_Adj', 'Close']

# for col in cols:
#     df[col] = scaler.fit_transform([df[col].values])

# X = df[['Timestamp', 'Volume_BTC', 'Bitcoin_Adj']].values
# Y = df['Close'].values


# X_train, X_test = X[1:train_size], X[train_size:len(X)]
# Y_train, Y_test = Y[1:train_size], Y[train_size:len(X)]
# print('Observations: %d' % (len(X)))
# print('Training Observations: %d' % (len(X_train)))
# print('Testing Observations: %d' % (len(Y_test)))




Unnamed: 0,Volume_BTC,Bitcoin_Adj,Close
0,0.27289,0.453625,0.566575
1,0.776791,0.47197,0.484557
2,0.463316,0.439996,0.538331
3,0.725079,0.529463,0.520715
4,0.210661,0.416611,0.566098


In [None]:
# Lag Price column 

# specify the number of lag hours
n_hours = 3
# frame as supervised learning

def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
# put it all together
    agg = concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg



reframed = series_to_supervised(scaled, n_hours, 1)
print(reframed.shape)

In [None]:
# split and reshape data to feed into RNN

X_timestamp = df['Timestamp'].values
X_volume = df['Volume_BTC'].values
X_trends = df['Bitcoin_Adj'].values

Y_price = df['Close'].values

train_size = int(len(X_timestamp) * 0.85)
train_size = int(train_size/10) * 10 

test_size_index = int(len(X_timestamp)/10)*10

X_train_timestamp, X_test_timestamp = X_timestamp[:train_size], X_timestamp[train_size:test_size_index ]
X_train_volume, X_test_volume = X_volume[:train_size], X_volume[train_size:test_size_index ]
X_train_trends, X_test_trends = X_trends[:train_size], X_trends[train_size:test_size_index ]

Y_train_price, Y_test_price = Y_price[:train_size], Y_price[train_size:test_size_index ]


# X.shape is (samples, timesteps, dimension) 
# timestemps is 15, samples is just however many nobs there are (but it doesn't matter, so it should be None)


X_train_timestamp = np.reshape(X_train_timestamp, (int(X_train_timestamp.shape[0]/data_length),data_length,1) ) 
X_train_volume = np.reshape(X_train_volume, (int(X_train_volume.shape[0]/data_length),data_length,1) ) 
X_train_trends = np.reshape(X_train_trends, (int(X_train_trends.shape[0]/data_length),data_length,1) ) 

X_test_timestamp = np.reshape(X_test_timestamp, (int(X_test_timestamp.shape[0]/data_length),data_length,1) ) 
X_test_volume = np.reshape(X_test_volume, (int(X_test_volume.shape[0]/data_length),data_length,1) ) 
X_test_trends = np.reshape(X_test_trends, (int(X_test_trends.shape[0]/data_length),data_length,1) )  


# Don't need the 1 for the third dimension for Y's??

# Y_train_price = np.reshape(Y_train_price, (int(Y_train_price.shape[0]/data_length),  data_length) ) 
# Y_test_price = np.reshape(Y_test_price, (int(Y_test_price.shape[0]/data_length),  data_length) ) 

Y_train_price = np.reshape(Y_train_price, (int(Y_train_price.shape[0]/data_length),  data_length) ) 
Y_test_price = np.reshape(Y_test_price, (int(Y_test_price.shape[0]/data_length),  data_length) ) 

In [116]:
features = ['Volume_BTC', 'Bitcoin_Adj'] # NEED TO ADD PREVIOUS PRICES! 

rnn = build_model(features, 10, 10) 

tensorboard_callback = TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=True)

rnn.fit(
    [
        #X_train_timestamp,
        X_train_volume,
        X_train_trends
    ],
    [
        Y_train_price
    ]
    ,
    validation_data=(
        [
            #X_test_timestamp,
            X_test_volume,
            X_test_trends
        ],
        [
            Y_test_price
        ]),
    epochs=2,
    batch_size=32,
    callbacks=[
      tensorboard_callback
    ],
    verbose=2
)


Train on 2386 samples, validate on 421 samples
Epoch 1/2
2s - loss: 1798766.9733 - val_loss: 112379228.9976
Epoch 2/2
1s - loss: 1783870.5390 - val_loss: 112229987.3064


<keras.callbacks.History at 0x2ed044ebf98>

In [68]:
score = rnn.evaluate(
    [
        X_test_timestamp,
        X_test_volume,
        X_test_trends
    ],
    [
        Y_test_price
    ], 
    batch_size=32)



In [None]:
# Probably need to change up x and y to frame it as a supervising problem -> look online to that one article
# also scale it from 0 to 1 
# remove seasonality 
# Change timestamp into some categorial input, not just a timestamp 