In [None]:
%%time
# Import libraries
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from plotly.subplots import make_subplots
import plotly.graph_objects as go

from keras.models import Sequential
from keras.layers import LSTM,Dense,Dropout,AveragePooling1D,Reshape

from sklearn.metrics import mean_absolute_error

In [None]:
%%time
#Import data
data = pd.read_csv('../input/historical-data-on-the-trading-of-cryptocurrencies/crypto_tradinds.csv')
data.tail()

In [None]:
%%time
#Select Bitcoin Data
btc_data = data[data['ticker']=='BTC']
btc_data.tail()

In [None]:
%%time
#Check data
btc_data.nunique()

In [None]:
%%time
btc_data['price_btc'].unique()

In [None]:
%%time
#Check value 0 in 'price_btc'
btc_data_0 = btc_data[data['price_btc']==0]
btc_data_0.tail()

Previous code return only 1 row. Value '0' in column 'price_btc' for BTC must be mistake in dataset.

In [None]:
%%time
#Drop columns with 1 value (and 'price_btc' with one mistake)
drop_columns_list = btc_data.nunique()[btc_data.nunique()<=2].index
btc_data.drop(drop_columns_list, axis=1, inplace=True)

In [None]:
%%time
#Print full graph of bitcoin price
fig = go.Figure(data=go.Scatter(x=btc_data['trade_date'], y=btc_data['price_usd']))
fig.show()

In [None]:
%%time
# fix random seed for reproducibility
np.random.seed(42)

In [None]:
%%time
def data_preproc_and_split(data,n):
    #define variables
    col = []
    for i in range(n):
        col.append('price' + str(i))
        col.append('volume' + str(i))
    train = pd.DataFrame(columns = col)
    target = pd.DataFrame(columns = ['date','price'])
    pred_convert = pd.DataFrame(columns = ['date','price'])
    
    #Preprocessing of data
    for i in range(1,len(data)-n-1):
        def_nom = data.loc[i-1, 'price_usd']
        for j in range(n):
            train.loc[i, 'price' + str(j)] = data.loc[i+j, 'price_usd']/def_nom-1
            train.loc[i, 'volume' + str(j)] = data.loc[i+j, 'volume']/data.loc[i+j, 'market_cap']
        target.loc[i, 'price'] = data.loc[i+n+1, 'price_usd']/def_nom-1
        target.loc[i, 'date'] = data.loc[i+n+1, 'trade_date']  
        #Save start prices for convertation prediction resalt to valid prices
        pred_convert.loc[i, 'price'] = def_nom
        pred_convert.loc[i, 'date'] = data.loc[i+n+1, 'trade_date'] 

    #Data split
    x_train = train.iloc[:train.shape[0]-100]
    x_valid = train.iloc[train.shape[0]-100:]
    y_train = target.iloc[:target.shape[0]-100]
    y_valid = target.iloc[target.shape[0]-100:]
    y_train.drop(['date'], axis=1, inplace=True)
    y_valid.drop(['date'], axis=1, inplace=True)
    
    #Convert shape of data for LSTM model
    x_train = x_train.to_numpy().reshape((x_train.shape[0],n,2))
    x_valid = x_valid.to_numpy().reshape((x_valid.shape[0],n,2))
    return x_train,x_valid,y_train,y_valid,target,pred_convert

In [None]:
%%time
#model
def model_gen(x,y,n,e=75,v=0):
    mod = Sequential()
    mod.add(LSTM(32,return_sequences=True,input_shape=(n,2)))
    mod.add(LSTM(64))
    mod.add(Dropout(0.35))
    mod.add(Dense(128, activation='relu'))
    mod.add(Dense(1))
    mod.compile(optimizer='adam',loss='mse')
    mod.fit(x,y,epochs=e,shuffle=False,verbose=v)
    return mod

In [None]:
%%time
#Tuning, step 1: found optimal epoch for model
x_train,x_valid,y_train,y_valid,target,pred_convert = data_preproc_and_split(btc_data,15) #Create necessary datasets from start data (preprocessing and split)
y_pred = {}
mae = {'before_convert': {}, 'after_convert': {}}
R = range(25,130,25)
for i in R:
    #print (i)
    model = model_gen(x_train,y_train,15,i,0) #Model generation without output
    preds = model.predict(x_valid) #Prediction
    y_pred[i] = pd.DataFrame(preds, index=y_valid.index, columns = ['price']) #Create DataFrame from prediction results
    y_pred[i]['date'] = target['date'] #Add date column to results
    mae['before_convert'][i] = mean_absolute_error(y_valid['price'],y_pred[i]['price']) #Save Mean absolute error before price convertation
    y_pred[i]['price'] = pred_convert['price']*(y_pred[i]['price']+1) #Convert prediction results to valid price
    mae['after_convert'][i] = mean_absolute_error(btc_data.iloc[btc_data.shape[0]-100:]['price_usd'],y_pred[i]['price']) #Save Mean absolute error after price convertation

#Print results of prediction
fig = go.Figure()
fig.add_trace(go.Scatter(x=btc_data.iloc[btc_data.shape[0]-100:]['trade_date'], y=btc_data.iloc[btc_data.shape[0]-100:]['price_usd'], name='Real price'))
for i in R:
    fig.add_trace(go.Scatter(x=y_pred[i]['date'], y=y_pred[i]['price'], name='Epoch = ' + str(i)))
fig.show()
for i in R:
    print('Epoch = ' + str(i) + '. Mean absolute error before price convertation: ' + str(mae['before_convert'][i]) + '. Mean absolute error after price convertation: ' + str(mae['after_convert'][i]) + '.') 

In [None]:
%%time
#Tuning, step 2: found optimal size of timeframe
y_pred = {}
mae = {'before_convert': {}, 'after_convert': {}}
R = range(10,31,5)
for i in R:
    #print (i)
    x_train,x_valid,y_train,y_valid,target,pred_convert = data_preproc_and_split(btc_data,i) #Create necessary datasets from start data (preprocessing and split)
    model = model_gen(x_train,y_train,i,75,0) #Model generation without output
    preds = model.predict(x_valid) #Prediction
    y_pred[i] = pd.DataFrame(preds, index=y_valid.index, columns = ['price']) #Create DataFrame from prediction results
    y_pred[i]['date'] = target['date'] #Add date column to results
    mae['before_convert'][i] = mean_absolute_error(y_valid['price'],y_pred[i]['price']) #Save Mean absolute error before price convertation
    y_pred[i]['price'] = pred_convert['price']*(y_pred[i]['price']+1) #Convert prediction results to valid price
    mae['after_convert'][i] = mean_absolute_error(btc_data.iloc[btc_data.shape[0]-100:]['price_usd'],y_pred[i]['price']) #Save Mean absolute error after price convertation

#Print results of prediction
fig = go.Figure()
fig.add_trace(go.Scatter(x=btc_data.iloc[btc_data.shape[0]-100:]['trade_date'], y=btc_data.iloc[btc_data.shape[0]-100:]['price_usd'], name='Real price'))
for i in R:
    fig.add_trace(go.Scatter(x=y_pred[i]['date'], y=y_pred[i]['price'], name='TimeFrame Size = ' + str(i)))
fig.show()
for i in R:
    print('TimeFrame Size = ' + str(i) + '. Mean absolute error before price convertation: ' + str(mae['before_convert'][i]) + '. Mean absolute error after price convertation: ' + str(mae['after_convert'][i]) + '.')

In [None]:
%%time
#Print final result
fig = go.Figure()
fig.add_trace(go.Scatter(x=btc_data.iloc[btc_data.shape[0]-100:]['trade_date'], y=btc_data.iloc[btc_data.shape[0]-100:]['price_usd'], name='Real price'))
fig.add_trace(go.Scatter(x=y_pred[15]['date'], y=y_pred[15]['price'], name='Predict price'))
fig.show()