In [1]:
import pandas as pd
import numpy as np

from keras import Input
from keras.engine import Model
from keras.layers.core import Dense, Activation
from keras.layers.recurrent import LSTM
from keras.layers import Concatenate, concatenate
from keras.callbacks import TensorBoard

Using TensorFlow backend.


In [14]:

# features is a list of strings of feature names 

def build_model(features, data_length, label_length):
    
    inputs_list = [] 
    for feature_name in features:
        inputs_list.append((Input(shape=(data_length,1), name=feature_name)))
    
    layers = [] 
    for i, input_name in enumerate(inputs_list): 
        layers.append(LSTM(64, return_sequences=False)(inputs_list[i]) )
        
    output = concatenate(layers) 
    output = Dense(label_length, activation='linear', name='IsSpike')(output)
    
    model = Model(
        inputs = inputs_list,
        outputs = [output]
    )
    
    model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model    

In [5]:
from sklearn.preprocessing import MinMaxScaler

master_df = pd.read_csv('C:/Users/Shoya/surf/data/master_df.csv', encoding='latin1')
df = master_df[['Timestamp', 'Close', 'Volume_(BTC)', 'Volume_(Currency)', 'Date(UTC)', 'Bitcoin (Adj.Overlap)', 
               'Close Price % Change', 'Close Price % Change (Abs)', 'Is Spike']]

# lag price 
df['Price_lagged'] = df['Close'].shift(1)

df = df.dropna()

df['Volume_BTC'] = df['Volume_(BTC)']
df['Bitcoin_Adj'] = df['Bitcoin (Adj.Overlap)']

cols = ['Volume_BTC','Bitcoin_Adj', 'Close', 'Price_lagged']

# Stationalize Data by taking log differences
data_array = np.diff(np.log(df[cols]), axis=0)

# Min-Max Scale 

scalers = {}
datas = [] 

df_scaled = pd.DataFrame(columns=cols)

for i in range(len(cols)): 
    scalers[cols[i]] = MinMaxScaler()
    #print('data', data_array[:,i])
    
    col_data = data_array[:,i]
    col_data = np.reshape(col_data, (len(col_data), 1))
    
    data = scalers[cols[i]].fit_transform( col_data )  #:, np.newaxis
    #print('scaled', data)
    data = np.reshape(data, (1, len(data)))
    df_scaled[cols[i]] = data[0]
    
df_scaled['Is Spike'] = df['Is Spike']
df_scaled.dropna(inplace=True)
display(df_scaled.head())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,Volume_BTC,Bitcoin_Adj,Close,Price_lagged,Is Spike
1,0.776791,0.47197,0.484557,0.566575,1.0
2,0.463316,0.439996,0.538331,0.484557,0.0
3,0.725079,0.529463,0.520715,0.538331,-1.0
4,0.210661,0.416611,0.566098,0.520715,0.0
5,0.594148,0.445509,0.568881,0.566098,0.0


In [7]:
data_length = 10
label_length = 10

# split and reshape data to feed into RNN

# X_timestamp = df_scaled['Timestamp'].values
X_volume = df_scaled['Volume_BTC'].values
X_trends = df_scaled['Bitcoin_Adj'].values
X_lagged_price = df_scaled['Price_lagged'].values

Y_is_spike = df_scaled['Is Spike'].values 

train_size = int(len(X_volume) * 0.85)
train_size = int(train_size/10) * 10 

test_size_index = int(len(X_volume)/10)*10

# X_train_timestamp, X_test_timestamp = X_timestamp[:train_size], X_timestamp[train_size:test_size_index ]
X_train_volume, X_test_volume = X_volume[:train_size], X_volume[train_size:test_size_index ]
X_train_trends, X_test_trends = X_trends[:train_size], X_trends[train_size:test_size_index ]
X_train_lagged_price, X_test_lagged_price = X_lagged_price[:train_size], X_lagged_price[train_size:test_size_index ]

Y_train_is_spike, Y_test_is_spike = Y_is_spike[:train_size], Y_is_spike[train_size:test_size_index ]


# X.shape is (samples, timesteps, dimension) 
# timestemps is 15, samples is just however many nobs there are (but it doesn't matter, so it should be None)



# X_train_timestamp = np.reshape(X_train_timestamp, (int(X_train_timestamp.shape[0]/data_length),data_length,1) ) 
X_train_volume = np.reshape(X_train_volume, (int(X_train_volume.shape[0]/data_length),data_length,1) ) 
X_train_trends = np.reshape(X_train_trends, (int(X_train_trends.shape[0]/data_length),data_length,1) ) 
X_train_lagged_price = np.reshape(X_train_lagged_price, (int(X_train_lagged_price.shape[0]/data_length), data_length, 1))

# X_test_timestamp = np.reshape(X_test_timestamp, (int(X_test_timestamp.shape[0]/data_length),data_length,1) ) 
X_test_volume = np.reshape(X_test_volume, (int(X_test_volume.shape[0]/data_length),data_length,1) ) 
X_test_trends = np.reshape(X_test_trends, (int(X_test_trends.shape[0]/data_length),data_length,1) )  
X_test_lagged_price = np.reshape(X_test_lagged_price, (int(X_test_lagged_price.shape[0]/data_length),data_length,1))


# Don't need the 1 for the third dimension for Y's??


Y_train_is_spike = np.reshape(Y_train_is_spike, (int(Y_train_is_spike.shape[0]/data_length),  data_length) ) 
Y_test_is_spike = np.reshape(Y_test_is_spike, (int(Y_test_is_spike.shape[0]/data_length),  data_length) )

In [15]:
features = ['Volume_BTC', 'Bitcoin_Adj', 'Price_lagged']

rnn = build_model(features, 10, 10) 

tensorboard_callback = TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=True)

history = rnn.fit(
    [
        #X_train_timestamp,
        X_train_volume,
        X_train_trends,
        X_train_lagged_price
    ],
    [
        Y_train_is_spike
    ]
    ,
    validation_data=(
        [
            #X_test_timestamp,
            X_test_volume,
            X_test_trends,
            X_test_lagged_price
        ],
        [
            Y_test_is_spike
        ]),
    epochs=10,
    batch_size=32,
    callbacks=[
      tensorboard_callback
    ],
    verbose=1
)

Train on 2386 samples, validate on 421 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [18]:
score = rnn.evaluate(
    [
        #X_test_timestamp,
        X_test_volume,
        X_test_trends,
        X_test_lagged_price
    ],
    [
        Y_test_is_spike
    ])

print("Accuracy: %.2f%%" % (score[1]*100))



In [19]:
yhat = rnn.predict( 
    [
        #X_test_timestamp,
        X_test_volume,
        X_test_trends,
        X_test_lagged_price
    ],
    verbose=0
)

display(yhat)

array([[ 0.13758324,  0.15045621, -0.1352286 , ...,  0.14248075,
        -0.16064788,  0.13012886],
       [ 0.13171619,  0.14954735, -0.13293265, ...,  0.13843442,
        -0.15661025,  0.13136587],
       [ 0.13703728,  0.14961395, -0.13495894, ...,  0.14360675,
        -0.16229413,  0.13116887],
       ..., 
       [ 0.13307448,  0.15346955, -0.13808365, ...,  0.14424242,
        -0.16202313,  0.13140647],
       [ 0.13495244,  0.14864312, -0.13214175, ...,  0.14069733,
        -0.15901351,  0.13167071],
       [ 0.12900235,  0.14685658, -0.13127899, ...,  0.13757789,
        -0.15631311,  0.12945868]], dtype=float32)

In [None]:
# Need to actually categorize output