# Prections using the different models

In [2]:
# Import Libraries
import numpy as np
import pandas as pd
#import hvplot.pandas
import tensorflow as tf
from pathlib import Path

# API import
import requests
import json
import pickle as pk

In [3]:
#Import Data for predictions: In this case we are using the X_test as start data for our predictions
#All data from API
df = pd.read_csv("./Export/Test-data/DataSet.csv", index_col='Date', infer_datetime_format=True)


#Adapted X data for the different models
X_test_rnn = pd.read_csv("./Export/Test-data/X_test.csv")
X_test_reg = X_test_rnn.iloc[:,-1]

# Output data y_test
y_test = pd.read_csv("./Export/Test-data/y_test.csv")

In [4]:
# Scale data for RNN
#Import X_scaler from pickle
filename_x_scaler = "./Export/X_scaler_rnn"
infile = open(filename_x_scaler,'rb')
X_scaler = pk.load(infile)
infile.close()

# Scale the testing sets
X_test_rnn = X_scaler.transform(X_test_rnn)
# Reshape X_test
X_test_rnn = X_test_rnn.reshape((X_test_rnn.shape[0], X_test_rnn.shape[1], 1))


#Import y_scaler from pickle
filename_y_scaler = "./Export/Y_scaler_rnn"
infile = open(filename_y_scaler,'rb')
Y_scaler = pk.load(infile)
infile.close()

# Scale the testing sets
y_test_rnn = Y_scaler.transform(y_test)

https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


# Load and predict LSTM model

In [5]:
# Load RNN model to predict values
from tensorflow.keras.models import model_from_json

# load json and create model
file_path = Path("./Export/rnn_model.json")
with open(file_path, "r") as json_file:
    rnn_model = json_file.read()
rnn_loaded = model_from_json(rnn_model)

# load weights into new model
file_path = "./Export/rnn_model.h5"
rnn_loaded.load_weights(file_path)

In [7]:
# Prediction with RNN
prediction_rnn = rnn_loaded.predict(X_test_rnn)
predicted_prices = Y_scaler.inverse_transform(prediction_rnn)
real_prices = Y_scaler.inverse_transform(y_test_rnn.reshape(-1, 1))

# Create a DataFrame of Real and Predicted values
outcome = pd.DataFrame({
    "Real_Closing": real_prices.ravel(),
    "Predicted_LSTM": predicted_prices.ravel()
}, index = df.index[-len(real_prices): ]) 

# Plot the real vs predicted values as a line chart
#display(outcome.tail(),
#        outcome.hvplot(title=f"Actual Vs. Predicted Prices"))

In [8]:
# Signal Strategy for RNN
# Return for real price and prediction
outcome["real_return"] = outcome["Real_Closing"].pct_change()
outcome["LSTM_return"] = outcome["Predicted_LSTM"].pct_change()

#Determine a strategy for the RNN signal
# There is a lag in the signals, the RNN signal has always 1-2 periods delay. The signal will react with this delay (alias: reddit signal)
# When both signal go in the same direction , then keep it.
# When signals go in different directions (up vs down), then take the direction of the real price
outcome.loc[(outcome["real_return"] >= 0.0), "signal_real"] = 1
outcome.loc[(outcome["real_return"] < 0.0), "signal_real"] = 0
outcome.loc[(outcome["LSTM_return"] >= 0.0), "signal_nn"] = 1
outcome.loc[(outcome["LSTM_return"] < 0.0), "signal_nn"] = 0
outcome["delta_signal"] = outcome["signal_real"] - outcome["signal_nn"]

outcome["signal_LSTM"] = 0.0
outcome.loc[(outcome["signal_real"] == outcome["signal_LSTM"]), "signal_LSTM"] = outcome["signal_real"]
outcome.loc[(outcome["delta_signal"] == -1), "signal_LSTM"] = 0
outcome.loc[(outcome["delta_signal"] == 1), "signal_LSTM"] = 1

# outcome['Entry/Exit'] = outcome['signal'].diff()

outcome.tail()

Unnamed: 0_level_0,Real_Closing,Predicted_LSTM,real_return,LSTM_return,signal_real,signal_nn,delta_signal,signal_LSTM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-01-25 20:00:00,36875.0,40980.875,-0.005314,0.000794,0.0,1.0,-1.0,0.0
2022-01-25 21:00:00,36557.3,40949.761719,-0.008616,-0.000759,0.0,0.0,0.0,0.0
2022-01-25 22:00:00,36780.9,40821.949219,0.006116,-0.003121,1.0,0.0,1.0,1.0
2022-01-25 23:00:00,36956.9,40755.816406,0.004785,-0.00162,1.0,0.0,1.0,1.0
2022-01-26 00:00:00,36841.7,40755.171875,-0.003117,-1.6e-05,0.0,0.0,0.0,0.0


# Load and predict Linear Regression model

In [9]:
#Import y_scaler from pickle
filename_LinReg = "./Export/LinReg"
infile = open(filename_LinReg,'rb')
LinReg = pk.load(infile)
infile.close()

https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


In [10]:
# Prediction with Linear regression
X_reg = X_test_reg.to_frame("Closing_price")
predictions = np.array( LinReg.predict(X_reg)).reshape(-1, 1)

# Add Prediction to Dataframe
outcome["Predicted_LinReg"] = predictions


In [12]:
# Signal Strategy for Time Serie
outcome["LinReg_return"] = outcome["Predicted_LinReg"].pct_change()

# Create a new column in the trading_df called signal setting its value to zero. 
# Create the signal to buy when return > 0 and sell when return < 1
outcome["signal_LinReg"] = 0.0
outcome.loc[(outcome["LinReg_return"] >= 0), "signal_LinReg"] = 1
outcome.loc[(outcome["LinReg_return"] < 0), "signal_LinReg"] = 0

#display(outcome[["Real_Closing", "Predicted_LinReg"]].tail(),
#        outcome[["Predicted_LinReg", "Real_Closing"]].hvplot(title=f"Actual Vs. Predicted Prices"))

# Merge signals from different models to an unique signal

In [127]:
# Strategy : 0 sell, 1 hold,  2 buy (on signals)
predictions_df = outcome.drop(columns=[ 'Predicted_LSTM', 'real_return', 'LSTM_return',
       'signal_real', 'signal_nn', 'delta_signal',
       'Predicted_LinReg', 'LinReg_return' ])

predictions_df["cum_signal"] = predictions_df["signal_LSTM"] + predictions_df["signal_LinReg"]

In [128]:
# Create column with difference (delta) of the cumulative signal 
# The first value of the delta column is hardcoded as 1 to provide the ability to buy if the signal advises this at time zero
predictions_df["delta_signal"] = predictions_df["cum_signal"].diff()

# Set the first delta value to 1 to allow for a buy if applicable since at the start of the time series there must be a buy first
predictions_df.at["2022-01-07 14:00:00",'delta_signal'] = 1


In [129]:
# Create the buy/sell switch column and set to NaN as a placeholder for now.
predictions_df["Action_Switch"] = np.nan

# Create the "Action" column to document the FINAL OUTPUT action (i.e., buy, sell, or hold) that should be taken based on the signals. 
# Initially set to NaN as placeholder for now.
predictions_df["Action"] = np.nan

predictions_df

Unnamed: 0_level_0,Real_Closing,signal_LSTM,signal_LinReg,cum_signal,delta_signal,Action_Switch,Action
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-01-07 14:00:00,41898.5,0.0,0.0,0.0,1.0,,
2022-01-07 15:00:00,41317.1,0.0,1.0,1.0,1.0,,
2022-01-07 16:00:00,42143.9,1.0,0.0,1.0,0.0,,
2022-01-07 17:00:00,41995.1,0.0,1.0,1.0,0.0,,
2022-01-07 18:00:00,41700.1,0.0,0.0,0.0,-1.0,,
...,...,...,...,...,...,...,...
2022-01-25 20:00:00,36875.0,0.0,0.0,0.0,-1.0,,
2022-01-25 21:00:00,36557.3,0.0,0.0,0.0,0.0,,
2022-01-25 22:00:00,36780.9,1.0,0.0,1.0,1.0,,
2022-01-25 23:00:00,36956.9,1.0,1.0,2.0,1.0,,


In [130]:
# Define function to determine what action to take while also minimizing the number of buys & sells (i.e., once a buy happens, 
# another buy can only occur after a sell occurs).
def buy_sell_action(df):

    # Create buy and sell switch components to compare to the Action Switch in each row
    can_sell_switch = 0
    can_buy_switch = 1

    # Set a variable for the switch that can be updated after each run through the if statements to determine how the buy/sell switch should change
    # Initially set switch to 1 (i.e., buy) at time point zero because a buy must occur first after the start of the time series.
    # Action Switch = 1 means "can buy", and Action Switch = 0 means "can sell" as defined above.
    action_switch = 1
    
    # Run through if statements based on the conditions to determine whether the action should be a buy (2), sell (0), or hold (1)
    for i in range(len(predictions_df)):
    
        predictions_df["Action_Switch"][i] = action_switch
    
        # First look at the condition when a sell is possible according to the switch
        if predictions_df["Action_Switch"][i] == can_sell_switch:
            if predictions_df["delta_signal"][i] < 0 and predictions_df["cum_signal"][i] == 0:
                predictions_df["Action"][i] = 0
        
                # Change the buy/sell switch to a "can buy" since a sell has now occurred
                action_switch = can_buy_switch
    
            else:
                predictions_df["Action"][i] = 1

        # Second look at the condition when a buy is possible according to the switch
        if predictions_df["Action_Switch"][i] == can_buy_switch:
            if predictions_df["delta_signal"][i] > 0 and predictions_df["cum_signal"][i] == 2:
                predictions_df["Action"][i] = 2
                
                # Change the buy/sell switch to a "can sell" since a buy has now occurred
                action_switch = can_sell_switch

            else:
                predictions_df["Action"][i] = 1
   
        # Finally look at the condition when a hold is needed due to no change in the signal from the previous time point
        if predictions_df["delta_signal"][i] == 0:
            predictions_df["Action"][i] = 1   
        
    return df.head(50)

In [131]:
buy_sell_action(predictions_df)

Unnamed: 0_level_0,Real_Closing,signal_LSTM,signal_LinReg,cum_signal,delta_signal,Action_Switch,Action
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-01-07 14:00:00,41898.5,0.0,0.0,0.0,1.0,1.0,-99.0
2022-01-07 15:00:00,41317.1,0.0,1.0,1.0,1.0,1.0,-99.0
2022-01-07 16:00:00,42143.9,1.0,0.0,1.0,0.0,1.0,1000.0
2022-01-07 17:00:00,41995.1,0.0,1.0,1.0,0.0,1.0,1000.0
2022-01-07 18:00:00,41700.1,0.0,0.0,0.0,-1.0,1.0,-99.0
2022-01-07 19:00:00,41772.8,1.0,0.0,1.0,1.0,1.0,-99.0
2022-01-07 20:00:00,41916.3,1.0,1.0,2.0,1.0,1.0,2.0
2022-01-07 21:00:00,41880.7,0.0,1.0,1.0,-1.0,0.0,99.0
2022-01-07 22:00:00,41539.6,0.0,0.0,0.0,-1.0,0.0,0.0
2022-01-07 23:00:00,41561.4,1.0,0.0,1.0,1.0,1.0,-99.0


In [None]:
# Test signal
predictions_df['signal'] = predictions_df["cum_signal"]
predictions_df.head()

In [15]:
#Library of functions for the backtesting
from Backtesting import BacktestingSimulation

initial_capital= float(100000)
share_size=500

cumul = BacktestingSimulation(predictions_df, 'cum_signal', 'BTC', initial_capital, share_size)
LSTM = BacktestingSimulation(predictions_df, 'signal_LSTM', 'BTC', initial_capital, share_size)
LinReg = BacktestingSimulation(predictions_df, 'signal_LinReg', 'BTC', initial_capital, share_size)

portfolio_cumul = cumul.portfolio_returns()
portfolio_LSTM = LSTM.portfolio_returns()
portfolio_LinReg =LinReg.portfolio_returns()

a = LSTM.visual_portfolio()
b = LinReg.visual_portfolio()
c = cumul.visual_portfolio()


#display(portfolio_cumul,
#        b * a * c
#       )

ModuleNotFoundError: No module named 'hvplot'

In [12]:
# # Determine a function that will be able to calculate the  return for different strategies
# #Function needs a dataframe with a feature called 'signal' and an other one with the closing prices

# def portfolio_returns ( initial_capital , share_size , signal_df , name_signal ) :
#     # Create a New Dataframe to hold the the return information
#     portfolio_returns = pd.DataFrame(signal_df['Real_Closing'])
    
#      # Buy a 500 share position when the dual moving average crossover Signal equals 1
#     # Otherwise, `Position` should be zero (sell)
#     portfolio_returns['Position'] = share_size * signal_df[name_signal]

#     # Determine the points in time where a 500 share position is bought or sold
#     portfolio_returns['Entry/Exit Position'] = portfolio_returns['Position'].diff()

#     # Multiply the close price by the number of shares held, or the Position
#     portfolio_returns['Portfolio Holdings'] = signal_df['Real_Closing'] * portfolio_returns['Position']

#     # Subtract the amount of either the cost or proceeds of the trade from the initial capital invested
#     portfolio_returns['Portfolio Cash'] = initial_capital - (signal_df['Real_Closing'] * portfolio_returns['Entry/Exit Position']).cumsum() 

#      # Calculate the total portfolio value by adding the portfolio cash to the portfolio holdings (or investments)
#     portfolio_returns['Portfolio Total'] = portfolio_returns['Portfolio Cash'] + portfolio_returns['Portfolio Holdings']

#      # Calculate the portfolio daily returns
#     portfolio_returns['Portfolio Daily Returns'] = portfolio_returns['Portfolio Total'].pct_change()

#      # Calculate the portfolio cumulative returns
#     portfolio_returns['Portfolio Cumulative Returns'] = (1 + portfolio_returns['Portfolio Daily Returns']).cumprod() - 1
    
#     return portfolio_returns.dropna()


In [13]:
# # Define a function that make a time dependant vizualisation of the potfolio using the return of function 'portfolio_returns'
# def viz_portfolio(portfolio_returns, name_signal):
#     # Visualize exit position relative to total portfolio value
#     exit = portfolio_returns[portfolio_returns['Entry/Exit Position'] == -1.0]['Portfolio Total'].hvplot.scatter(
#         color='yellow',
#         marker='v',
#         legend=False,
#         ylabel='Total Portfolio Value',
#         width=1000,
#         height=400
#     )

#     # Visualize entry position relative to total portfolio value
#     entry = portfolio_returns[portfolio_returns['Entry/Exit Position'] == 1.0]['Portfolio Total'].hvplot.scatter(
#         color='purple',
#         marker='^',
#         ylabel='Total Portfolio Value',
#         width=1000,
#         height=400
#     )

#     # Visualize the value of the total portfolio
#     total_portfolio_value = portfolio_returns[['Portfolio Total']].hvplot(
#         ylabel='Total Portfolio Value',
#         xlabel='Date',
#         width=1000,
#         height=400,
#         label = name_signal
#     )

#     # Overlay the plots
#     portfolio_entry_exit_plot = total_portfolio_value * entry * exit
#     portfolio_entry_exit_plot.opts(
#         title="Total Portfolio Value",
#         yformatter='%.0f'
#     )
    
#     return portfolio_entry_exit_plot

# Backtesting
1. Consider de investement ratios
2. Calculate the actual return over the test periode

### Investement ratio's

In [14]:
# def investement_ratios (portfolio_returns, name_signal):
#     # Create a list for the column name
#     columns = [f"Backtest {name_signal}"]

#     # Create a list holding the names of the new evaluation metrics
#     metrics = [
#         "Annualized Return",
#         "Cumulative Returns",
#         "Annual Volatility",
#         "Sharpe Ratio",
#         "Sortino Ratio"]

#     # Initialize the DataFrame with index set to the evaluation metrics and the column
#     portfolio_evaluation_df = pd.DataFrame(index=metrics, columns=columns)

#     # Calculate annualized return
#     portfolio_evaluation_df.loc["Annualized Return"] = (
#         portfolio_returns["Portfolio Daily Returns"].mean() * 365
#     )

#      # Calculate cumulative return
#     portfolio_evaluation_df.loc["Cumulative Returns"] = portfolio_returns["Portfolio Cumulative Returns"][-1]

#      # Calculate annual volatility
#     portfolio_evaluation_df.loc["Annual Volatility"] = (
#         portfolio_returns["Portfolio Daily Returns"].std() * np.sqrt(365)
#     )

#      # Calculate Sharpe ratio
#     portfolio_evaluation_df.loc["Sharpe Ratio"] = (
#         portfolio_returns["Portfolio Daily Returns"].mean() * 365) / (
#         portfolio_returns["Portfolio Daily Returns"].std() * np.sqrt(365)
#     )

    
#     # Calculate Sharpe ratio Sortino ratio:
#     #First create a DataFrame that contains the Portfolio Daily Returns column and the downside return values
#     sortino_ratio_df = portfolio_returns[["Portfolio Daily Returns"]].copy()
#     sortino_ratio_df.loc[:,"Downside Returns"] = 0

#     # Second Find Portfolio Daily Returns values less than 0,
#     # square those values, and add them to the Downside Returns column
#     sortino_ratio_df.loc[sortino_ratio_df["Portfolio Daily Returns"] < 0,
#                          "Downside Returns"] = sortino_ratio_df["Portfolio Daily Returns"]**2

#     # Then Calculate the annualized return value and the annualized downside standard deviation value
#     annualized_return = (
#         sortino_ratio_df["Portfolio Daily Returns"].mean() * 365)

#     downside_standard_deviation = (
#         np.sqrt(sortino_ratio_df["Downside Returns"].mean()) * np.sqrt(365))

#     # Finally do the last calculation for the sortino_ratio
#     sortino_ratio = annualized_return/downside_standard_deviation

#     # Add the Sortino ratio to the evaluation DataFrame
#     portfolio_evaluation_df.loc["Sortino Ratio"] = sortino_ratio
    
#     return portfolio_evaluation_df

In [15]:
# Compare strategy ratio's
cumul_ratio = cumul.investement_ratios()
LSTM_ratio = LSTM.investement_ratios()
LinReg_ratio = LinReg.investement_ratios()

# concat all ratio's to one dataframe
all_ratios = pd.concat([cumul_ratio, LSTM_ratio, LinReg_ratio], axis=1, join='inner')
all_ratios

Unnamed: 0,Backtest cum_signal,Backtest signal_LSTM,Backtest signal_LinReg
Annualized Return,-3.725383,9.375072,6.009301
Cumulative Returns,-9.6145,-1.846,-7.7685
Annual Volatility,13.735627,6.113555,14.748953
Sharpe Ratio,-0.27122,1.533489,0.407439
Sortino Ratio,-0.322391,4.829705,0.750229


### Investement return

In [16]:
# Initialize trade evaluation DataFrame with columns
def trade_evaluation(portfolio_returns, ticker):
    trade_evaluation_df = pd.DataFrame(
                columns=[
                    "CURRENCY",
                    "Entry Date",
                    "Exit Date",
                    "Shares",
                    "Entry Share Price",
                    "Exit Share Price",
                    "Entry Portfolio Holding",
                    "Exit Portfolio Holding",
                    "Profit/Loss"]
            )

        # Loop through signal DataFrame
        # If `Entry/Exit` is 1, set entry trade metrics
        # Else if `Entry/Exit` is -1, set exit trade metrics and calculate profit
        # Then append the record to the trade evaluation DataFrame
    for index, row in portfolio_returns.iterrows():
        if row["Entry/Exit Position"] == 500:
            entry_date = index
            entry_portfolio_holding = row["Portfolio Holdings"]
            share_size = row["Entry/Exit Position"]
            entry_share_price = row["Real_Closing"]

        elif row["Entry/Exit Position"] == -500:
            exit_date = index
            exit_portfolio_holding = abs(row["Real_Closing"] * row["Entry/Exit Position"])
            exit_share_price = row["Real_Closing"]
            profit_loss = exit_portfolio_holding - entry_portfolio_holding
            trade_evaluation_df = trade_evaluation_df.append(
                        {
                            "CURRENCY": ticker,
                            "Entry Date": entry_date,
                            "Exit Date": exit_date,
                            "Shares": share_size,
                            "Entry Share Price": entry_share_price,
                            "Exit Share Price": exit_share_price,
                            "Entry Portfolio Holding": entry_portfolio_holding,
                            "Exit Portfolio Holding": exit_portfolio_holding,
                            "Profit/Loss": profit_loss
                        },
                        ignore_index=True)

            # Print the DataFrame
    return trade_evaluation_df

In [15]:
trade_cumul = cumul.trade_evaluation()
#trade_LSTM = LSTM.trade_evaluation()
#trade_LinReg = LinReg.trade_evaluation()

# all_trades = pd.concat([trade_cumul['Portfolio Cumulative Returns'], trade_LSTM['Portfolio Cumulative Returns'], trade_LinReg['Portfolio Cumulative Returns']], axis=1, join='inner')
# all_trades

UnboundLocalError: local variable 'entry_portfolio_holding' referenced before assignment