In [120]:
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None
import tensorflow 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Bidirectional, TimeDistributed, InputLayer
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import RootMeanSquaredError
from matplotlib import pyplot as plt
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import seaborn as sns
from functools import reduce
from phik import resources, report
tensorflow.random.set_seed(15)


In [121]:
close = pd.read_csv("data/close.csv").add_suffix('_close').rename(columns={'Date_close':'Date'})
adj_close = pd.read_csv("data/adj_close.csv").add_suffix('_adj_close').rename(columns={'Date_adj_close':'Date'})
high = pd.read_csv("data/high.csv").add_suffix('_high').rename(columns={'Date_high':'Date'})
low = pd.read_csv("data/low.csv").add_suffix('_low').rename(columns={'Date_low':'Date'})
open1 = pd.read_csv("data/open.csv").add_suffix('_open').rename(columns={'Date_open':'Date'})
volume = pd.read_csv("data/volume.csv").add_suffix('_vol').rename(columns={'Date_vol':'Date'})

## Parsing and concactenating the data to desired format


In [122]:
dfs = [close, adj_close, high, low, open1, volume]
df_merged = reduce(lambda  left,right: pd.merge(left,right,on=['Date'],
                                            how='outer'), dfs)

def get_coin(coin,df = df_merged):
    re = '^' + coin 
    new = df.filter(regex=re,axis=1)
    new.columns  = new.columns.str.lstrip(coin + '_').dropna()
    new['Date'] = df.loc[:,'Date']
    new['Date'] = pd.to_datetime(new['Date'])
    new = new.dropna()
    new.name = coin
    return new

In [123]:
#original dataset parsing 
ADA = get_coin('ADA-USD')
ATOM = get_coin('ATOM-USD')
AVAX = get_coin('AVAX-USD')
AXS = get_coin('AXS-USD') 
BTC = get_coin('BTC-USD') 
ETH = get_coin('ETH-USD')
LINK = get_coin('LINK-USD')
LUNA1 = get_coin('LUNA1-USD') 
MATIC = get_coin('MATIC-USD') 
SOL = get_coin('SOL-USD') 

In [124]:
AVAX=AVAX.loc[~((AVAX['Date'] == '2020-07-13') | (AVAX['Date'] == '2020-07-14'))]


## Auxiliary functions to run our model



In [125]:
def df_to_X_y(df, past_days,future_days= 1):
    X = []
    y = []
    for i in range(past_days, len(df) - future_days +1):
        X.append(df[i - past_days:i, 0:df.shape[1]])
        y.append(df[i + future_days - 1:i + future_days, 0])

    X, y = np.array(X), np.array(y)
    
    #print('X shape == {}.'.format(X.shape))
    #print('y shape == {}.'.format(y.shape))
    return X,y
    


In [126]:
def choose_sequential_model(model_name,trainX,trainY, metric):
    model = Sequential()
    if model_name == 'LSTM':
        model.add(LSTM(64, activation='relu', input_shape=(trainX.shape[1], trainX.shape[2]), return_sequences=True))
        model.add(LSTM(32, activation='relu', return_sequences=False))
        model.add(Dropout(0.2))
        model.add(Dense(trainY.shape[1]))
    else:
        model.add(GRU(64, activation='relu', input_shape=(trainX.shape[1], trainX.shape[2]), return_sequences=True))
        model.add(GRU(32, activation='relu', return_sequences=False))
        model.add(Dropout(0.2))
        model.add(Dense(trainY.shape[1]))
    
    model.compile(optimizer='adam', loss='mse',metrics = metric)
    #model.build(input_shape=(None,trainX.shape[1], trainX.shape[2]))

    #model.summary()
    return model


In [127]:
def run_fit_model( model_name, df):
    #split into train and test
    train_size = int(len(df)*0.85)
    test_size = len(df) - train_size
    train, test = df[0:train_size,:],df[train_size:len(df)]
    #define input shape
    trainX,trainY = df_to_X_y(train,20)
    testX,testY = df_to_X_y(test,20)
        
    #define metrics to use in our model 
    metric = ['mse','mae',RootMeanSquaredError()]
    model = choose_sequential_model(model_name,trainX,trainY,metric)
    history = model.fit(trainX, trainY, epochs=600, batch_size=32, validation_data=(testX,testY), verbose=0)
    return trainX, trainY, testX, testY, history,model
    



In [128]:
def plot_metrics(history):
    plt.plot(history.history['val_root_mean_squared_error'], label='Val RMSE')
    plt.plot(history.history['val_mae'], label='Val MAE')
    plt.plot(history.history['val_mse'], label='Val MSE')
    plt.legend()

In [129]:
def mean_calc(dic,metric):
    filtered_vals = dic.get(metric)
    average = sum(filtered_vals) / len(filtered_vals)
    print(metric + ' ----> ' + str(average))

## Final function, plots and final output 

In [130]:

def coin_predictions(coin, model_n, multivariable='IND', metric_details=0):
    tensorflow.random.set_seed(15)
    indicators = ['rsi','macd']
    #if multivariable is set to 0, the function makes the predictions based on only the closing price variable
    if multivariable == 'CLOSE':
        df_training = coin.close.to_frame()

    #if multivariable is set to 1, the function makes the predictions based only on the OHLC variables
    elif multivariable == 'OHLC' and (indicators not in coin.columns.to_list()):
        cols = coin.drop(['adj_close','vol', 'Date'], axis = 1).columns
        df_training = coin[cols].astype(float)

    #if multivariable is set to 2, the function makes the predictions based on  the financial indicator variables
    elif multivariable == 'IND':
       cols = coin.drop(['adj_close','vol', 'Date','high', 'low', 'open'], axis = 1).columns
       df_training = coin[cols].astype(float)
    
    #else, predictions are based on only financial indicator variables  + OHLC
    else:
        cols = coin.drop(['adj_close','vol', 'Date'], axis = 1).columns
        df_training = coin[cols].astype(float)
    
    
    
    #slice only the last 365 days to use as prediction data
    df_training = df_training.tail(365)
    
    #scale the training dataset
    scaler = MinMaxScaler()
    scaler = scaler.fit(df_training)
    df_training_scaled = scaler.transform(df_training)

    #run and fit our prediction model, where the model is stored in the variable model and the fit is stored in the history variable
    trainX, trainY, testX, testY, history, model = run_fit_model(model_n, df_training_scaled)

    #h_to_plot = history

    #if metric_details is set to 1, details about the performance metrics are shown and plotted 
    if metric_details == 1:
        itermetrics = ['val_root_mean_squared_error','val_mae','val_mse']
        for i in itermetrics:
            mean_calc(history.history,i)
    
        plot_metrics(history)

    #generate date range for predtiction using the 20th previous predictions to predict the next day  
    train_dates = pd.to_datetime(coin.Date.tail(365))
    n_days_for_prediction = 20
    predict_period_dates = pd.date_range(list(train_dates)[-19], periods=n_days_for_prediction).tolist()

    forecast_dates = []
    for time_i in predict_period_dates:
        forecast_dates.append(time_i.date())

    #make the prediction 
    prediction = model.predict(testX[-n_days_for_prediction:])
    #inverse scale the obtained results
    prediction_copies = np.repeat(prediction, df_training.shape[1], axis=-1)
    
    #create a new dataframe with the prediction
    if multivariable == 'CLOSE':
        y_pred_future = scaler.inverse_transform(prediction_copies)[:,0]
        df_forecast = pd.DataFrame({'Date':np.array(forecast_dates), 'close':y_pred_future})

        

    elif multivariable == 'OHLC':
        y_pred_future = scaler.inverse_transform(prediction_copies)
        df_forecast = pd.DataFrame({'Date':np.array(forecast_dates), 'close':y_pred_future[:,0],  'high':y_pred_future[:,1],  'low':y_pred_future[:,2],  'open':y_pred_future[:,3]})

        
    
    elif multivariable == 'IND':
        y_pred_future = scaler.inverse_transform(prediction_copies)
        df_forecast = pd.DataFrame({'Date':np.array(forecast_dates), 'close':y_pred_future[:,0],  'rsi':y_pred_future[:,1],  'macd':y_pred_future[:,2]})
        
   

    else:
        y_pred_future = scaler.inverse_transform(prediction_copies)
        df_forecast = pd.DataFrame({'Date':np.array(forecast_dates), 'close':y_pred_future[:,0],  'high':y_pred_future[:,1],  'low':y_pred_future[:,2],  'open':y_pred_future[:,3]
        , 'rsi':y_pred_future[:,4], 'macd':y_pred_future[:,5]})

    
    df_forecast['Date']=pd.to_datetime(df_forecast['Date'])
    
    original = coin.tail(365)
    original['Date']=pd.to_datetime(original['Date'])



   

    return df_forecast, original 





## Predictions

In [131]:
ADA_I_1 = pd.read_csv("data_updated/ADA_IND.csv")
ADA_I_1.name = 'ADA-USD'
ATOM_I_1 = pd.read_csv("data_updated/ATOM_IND.csv")
ATOM_I_1.name = 'ATOM-USD'
AVAX_I_1 = pd.read_csv("data_updated/AVAX_IND.csv")
AVAX_I_1.name = 'AVAX-USD'
AXS_I_1 = pd.read_csv("data_updated/AXS_IND.csv")
AXS_I_1.name = 'AXS-USD'
BTC_I_1 = pd.read_csv("data_updated/BTC_IND.csv")
BTC_I_1.name = 'BTC-USD'
ETH_I_1 = pd.read_csv("data_updated/ETH_IND.csv")
ETH_I_1.name = 'ETH-USD'
LINK_I_1 = pd.read_csv("data_updated/LINK_IND.csv")
LINK_I_1.name = 'LINK-USD'
LUNA1_I_1 = pd.read_csv("data_updated/LUNA1_IND.csv")
LUNA1_I_1.name = 'LUNA1-USD'
MATIC_I_1 = pd.read_csv("data_updated/MATIC_IND.csv")
MATIC_I_1.name = 'MATIC-USD'
SOL_I_1 = pd.read_csv("data_updated/SOL_IND.csv")
SOL_I_1.name = 'SOL-USD'


In [132]:
def repeatRows(d, n=1):
    return pd.concat([d]*n)

In [133]:
def final_output(n_days):  
    coins_I = [ADA_I_1,ATOM_I_1,AVAX_I_1,AXS_I_1, BTC_I_1,ETH_I_1, LINK_I_1,LUNA1_I_1, MATIC_I_1, SOL_I_1 ]
    for i in range(n_days):
        for coin in range(len(coins_I)):
            coin_name = coins_I[coin].name
            forecast, original = coin_predictions(coins_I[coin],'GRU')
            new_data = pd.concat([original,repeatRows(original[-1:], 1)])
            new_data.iloc[-1,[0,1,7,8]] = [forecast['Date'].iloc[-1],forecast['close'].iloc[-1],forecast['rsi'].iloc[-1],forecast['macd'].iloc[-1]]
            coins_I[coin] = new_data
            print('- ' + coin_name + ': ', forecast['close'].iloc[-1])

In [134]:
final_output(2)

- ADA-USD:  0.8217862
- ATOM-USD:  16.769114
- AVAX-USD:  55.15369
