In [None]:
import yfinance as yf
from pandas_datareader import data as pdr
from datetime import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression
import statsmodels.api as sm
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
from keras.layers import LSTM
from sklearn.metrics import mean_squared_error
yf.pdr_override()

In [None]:
start = datetime(2020, 1, 1)
end = datetime(2023, 2, 16)
tickers = ['BTC-USD' ,'ETH-USD', 'BNB-USD']
stk_data = pdr.get_data_yahoo(tickers,
                start= start,
                end= end, interval = '1d')['Close']
stk_data.pct

In [None]:
return_period = 5
Y = np.log(stk_data.loc[:, ('Adj Close', 'BTC-USD')]).diff(return_period).shift(-return_period)
Y.name = Y.name[-1]+'_pred'

X1 = np.log(stk_data.loc[:, ('Adj Close', ('ETH-USD', 'BNB-USD'))]).diff(return_period)
X1.columns = X1.columns.droplevel()

X2 = pd.concat([np.log(stk_data.loc[:, ('Adj Close', 'BTC-USD')]).diff(i) for i in [return_period, return_period*3, return_period*6, return_period*12]], axis=1).dropna()
X2.columns = ['BTC-USD', 'BTC-USD_3DT', 'BTC-USD_6DT', 'BTC-USD_12DT']

X = pd.concat([X1, X2], axis=1)

dataset = pd.concat([Y, X], axis=1).dropna().iloc[::return_period, :]
Y = dataset.loc[:, Y.name]
X = dataset.loc[:, X.columns]

In [None]:
dataset.hist(bins=50, sharex=False, sharey=False, xlabelsize=1, ylabelsize=1, figsize=(12,12))
plt.show()

In [None]:
dataset.plot(kind='density', subplots=True, layout=(4,4), sharex=True, legend=True, fontsize=1, figsize=(15,15))
plt.show()

In [None]:
correlation = dataset.corr()
plt.figure(figsize=(15,15))
plt.title('Correlation Matrix')
sns.heatmap(correlation, vmax=1, square=True,annot=True,cmap='cubehelix')

In [None]:
res = sm.tsa.seasonal_decompose(Y,period=52)
fig = res.plot()
fig.set_figheight(8)
fig.set_figwidth(15)
plt.show()

In [None]:
bestfeatures = SelectKBest(k=5, score_func=f_regression)
fit = bestfeatures.fit(X,Y)
dfscores = pd.DataFrame(fit.scores_)
dfcolumns = pd.DataFrame(X.columns)
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
featureScores.columns = ['Specs','Score']
featureScores.nlargest(10,'Score').set_index('Specs')

In [None]:
validation_size = 0.2
train_size = int(len(X) * (1-validation_size))
X_train, X_test = X[0:train_size], X[train_size:len(X)]
Y_train, Y_test = Y[0:train_size], Y[train_size:len(X)]

In [None]:
seq_len = 2

Y_train_LSTM, Y_test_LSTM = np.array(Y_train)[seq_len-1:], np.array(Y_test)
X_train_LSTM = np.zeros((X_train.shape[0]+1-seq_len, seq_len, X_train.shape[1]))
X_test_LSTM = np.zeros((X_test.shape[0], seq_len, X.shape[1]))
for i in range(seq_len):
    X_train_LSTM[:, i, :] = np.array(X_train)[i:X_train.shape[0]+i+1-seq_len, :]
    X_test_LSTM[:, i, :] = np.array(X)[X_train.shape[0]+i-1:X.shape[0]+i+1-seq_len, :]

In [None]:
def create_LSTMmodel(neurons=12, learn_rate = 0.01, momentum=0):
    model = Sequential()
    model.add(LSTM(50, input_shape=(X_train_LSTM.shape[1], X_train_LSTM.shape[2])))
    model.add(Dense(1))
    model.compile(loss='mse', optimizer='adam')
    return model
LSTMModel = create_LSTMmodel(12, learn_rate = 0.01, momentum=0)
LSTMModel_fit = LSTMModel.fit(X_train_LSTM, Y_train_LSTM, validation_data=(X_test_LSTM, Y_test_LSTM),epochs=1000, batch_size=72, verbose=1, shuffle=False)

In [None]:
#Visual plot
plt.plot(LSTMModel_fit.history['loss'], label='train', )
plt.plot(LSTMModel_fit.history['val_loss'], '--',label='test',)
plt.legend()
plt.show()

In [None]:
error_Training_LSTM = mean_squared_error(Y_train_LSTM, LSTMModel.predict(X_train_LSTM))
predicted = LSTMModel.predict(X_test_LSTM)
error_Test_LSTM = mean_squared_error(Y_test,predicted)

In [None]:
Y_pred = pd.DataFrame(predicted,index=Y_test.reset_index()['Date']).cumsum()
Y_pred

In [None]:
Y_real = pd.DataFrame(Y_test).cumsum()
Y_real

In [None]:
plt.plot(Y_real, label='Real')
plt.plot(Y_pred, label='Predicted')

# Add title, legend and axis labels
plt.title('Comparison of Returns')
plt.legend(loc='upper left')
plt.xlabel('Date')
plt.ylabel('Returns')

# Show the graph
plt.show()