In [1]:
%matplotlib notebook
import json
import numpy as np
import os
import pandas as pd
import sys
import matplotlib.pyplot as plt

if sys.version_info[0] == 3:
    from urllib.request import urlopen
else:
    # Not Python 3 - today, it is most likely to be Python 2
    # But note that this might need an update when Python 4
    # might be around one day
    from urllib import urlopen


In [2]:
# connect to poloniex's API
CURRENCIES = ['USDT_BTC', 'USDT_LTC', 'USDT_ETH', 'USDT_XRP']
url = 'https://poloniex.com/public?command=returnChartData&currencyPair=$C&start=1356998100&end=9999999999&period=300'
urls = [url.replace('$C', c) for c in CURRENCIES]

for i, c in enumerate(CURRENCIES):
    with urlopen(urls[i]) as url:
        r = url.read()
        d = json.loads(r.decode())
        df = pd.DataFrame(d)
        #print(df.columns)
        df.to_pickle('data/poloniex/' + c + '.pkl')
        print('Successfully downloaded', c)

Successfully downloaded USDT_BTC
Successfully downloaded USDT_LTC
Successfully downloaded USDT_ETH
Successfully downloaded USDT_XRP


In [47]:
#btc_df = pd.read_pickle('data/poloniex/USDT_BTC.pkl')
btc_df = pd.read_pickle('data/coinmarketcap/bitcoin.pkl')
class PastSampler:

    def __init__(self, N, K, sliding_window = True):
        self.K = K
        self.N = N
        self.sliding_window = sliding_window
 
    def transform(self, A):
        M = self.N + self.K     #Number of samples per row (sample + target)
        #indexes
        if self.sliding_window:
            I = np.arange(M) + np.arange(A.shape[0] - M + 1).reshape(-1, 1)
        else:
            if A.shape[0]%M == 0:
                I = np.arange(M)+np.arange(0,A.shape[0],M).reshape(-1,1)
                
            else:
                I = np.arange(M)+np.arange(0,A.shape[0] -M,M).reshape(-1,1)
            
        print(I)
        print(I.shape)
        
        B = A[I].reshape(-1, M * A.shape[1], A.shape[2])
        ci = self.N * A.shape[1]    #Number of features per sample
        return B[:, :ci], B[:, ci:] #Sample matrix, Target matrix

In [49]:
#poloniex
#btc_df.head()
#btc_df = btc_df.drop(columns=['date'])

#btc_df.head()
btc_df = btc_df.drop(columns=['time', 'time_readable', 'price_btc', 'market_cap']).query('volume_usd>0')
btc_df.head()

Unnamed: 0,price_usd,volume_usd
112,735.35,62752900
113,734.98,62589500
114,730.98,62060000
115,731.45,62093000
116,731.13,62042500


In [50]:
import sklearn.preprocessing as prep
scaler = prep.MinMaxScaler()
#print(btc_df.as_matrix().shape)
#print(btc_df.as_matrix(), '#')
#print('#', np.array(scaler.fit_transform(btc_df))[:,None,:], '#')
#print(btc_df.shape)
original_A = np.array(btc_df)[:,None,:]
A = np.array(scaler.fit_transform(btc_df))[:,None,:]
print(original_A.shape)
print(A.shape)
#print(prep.MinMaxScaler().fit_transform(A.reshape(-1,8)))
#print(A[1,0,:])
#print(scaler.inverse_transform(A.reshape(-1,8)), '#')
#print(scaler.get_params())


(438788, 1, 2)
(438788, 1, 2)


In [51]:
NPS, NFS = 256, 32         #Number of past and future samples
ps = PastSampler(NPS, NFS, sliding_window=True)
datas, labels = ps.transform(A)
print(datas.shape, labels.shape)

labels = labels[:,:,0].reshape(-1, NFS, 1)
print(labels.shape)

[[     0      1      2 ...    285    286    287]
 [     1      2      3 ...    286    287    288]
 [     2      3      4 ...    287    288    289]
 ...
 [438498 438499 438500 ... 438783 438784 438785]
 [438499 438500 438501 ... 438784 438785 438786]
 [438500 438501 438502 ... 438785 438786 438787]]
(438501, 288)
(438501, 256, 2) (438501, 32, 2)
(438501, 32, 1)


In [52]:
from sklearn.utils import shuffle
print(datas.shape, labels.shape)
#datas, labels = shuffle(datas, labels)
print(datas.shape, labels.shape)

(438501, 256, 2) (438501, 32, 1)
(438501, 256, 2) (438501, 32, 1)


In [53]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten,Reshape
from keras.layers import Conv1D, MaxPooling1D
from keras.utils import np_utils
from keras.layers import CuDNNLSTM, LSTM, LeakyReLU
from keras.callbacks import CSVLogger, ModelCheckpoint

step_size = datas.shape[1]
units = 50
second_units = 30
batch_size = 512
nb_features = datas.shape[2]
epochs = 5
output_size = NFS
#split training validation
training_size = int(0.95 * datas.shape[0])
training_datas = datas[:training_size,:]
training_labels = labels[:training_size,:,0]
validation_datas = datas[training_size:,:]
validation_labels = labels[training_size:,:,0]


#build model
#if CuDNNLSTM is not working, use LSTM
model = Sequential()
model.add(CuDNNLSTM(units=units, input_shape=(step_size,nb_features),return_sequences=True))
model.add(Dropout(0.5))
model.add(CuDNNLSTM(units=second_units, return_sequences=False))
model.add(Dropout(0.4))
model.add(Dense(output_size))
model.add(LeakyReLU())
model.compile(loss='mse', optimizer='adam')
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
cu_dnnlstm_8 (CuDNNLSTM)     (None, 256, 50)           10800     
_________________________________________________________________
dropout_8 (Dropout)          (None, 256, 50)           0         
_________________________________________________________________
cu_dnnlstm_9 (CuDNNLSTM)     (None, 30)                9840      
_________________________________________________________________
dropout_9 (Dropout)          (None, 30)                0         
_________________________________________________________________
dense_6 (Dense)              (None, 32)                992       
_________________________________________________________________
leaky_re_lu_6 (LeakyReLU)    (None, 32)                0         
Total params: 21,632
Trainable params: 21,632
Non-trainable params: 0
_________________________________________________________________


In [54]:
output_file_name = 'nn_models/logger'
model.fit(
    training_datas, 
    training_labels, 
    batch_size=batch_size,
    validation_split=0.15,
    #validation_data=(validation_datas,validation_labels), 
    epochs=epochs,
    verbose=2,
    callbacks=[
        CSVLogger(output_file_name+'.csv', append=True)#,
        #ModelCheckpoint('nn_models/'+output_file_name+'-{epoch:02d}-{val_loss:.5f}.hdf5', verbose=1)
    ])

Train on 354088 samples, validate on 62487 samples
Epoch 1/5
 - 73s - loss: 3.2810e-05 - val_loss: 0.0028
Epoch 2/5
 - 76s - loss: 8.5948e-06 - val_loss: 0.0023
Epoch 3/5
 - 78s - loss: 7.3416e-06 - val_loss: 0.0025
Epoch 4/5
 - 79s - loss: 6.5956e-06 - val_loss: 0.0029
Epoch 5/5
 - 79s - loss: 6.4860e-06 - val_loss: 0.0042


<keras.callbacks.History at 0x2024a2d6f98>

In [55]:
#model.save('nn_models/btc_' + str(epochs) + '_epochs.h5')
model.save('nn_models/btc_cm_' + str(epochs) + '_epochs.h5')

In [162]:
original_A.reshape(-1,8)[:,0].shape

(319870,)

In [59]:
scaler2 = prep.MinMaxScaler()
scaler2.fit_transform(original_A.reshape(-1,nb_features)[:,0].reshape(-1,1))
vd = validation_datas[0:len(validation_datas):NPS,:,:]
print(vd.shape)
predicted = model.predict(vd)

vd = scaler.inverse_transform(vd.reshape(-1,nb_features))

(86, 256, 2)


In [58]:
predict = predicted.reshape(-1,1)
truth = validation_labels[0:len(validation_labels):NPS,:].reshape(-1,1)

predict = scaler2.inverse_transform(predict)
truth = scaler2.inverse_transform(truth)

plt.figure(figsize=(8,6))
plt.plot(truth, label = 'Actual')
plt.plot(predict, 'r', label='Predicted')
plt.legend(loc='upper left')
plt.show()

<IPython.core.display.Javascript object>

In [43]:
score = model.evaluate(validation_datas,validation_labels,batch_size=32,verbose=1)



In [44]:
print(score)

0.002749334807991933
