<a href="https://colab.research.google.com/github/manuelrucci7/deep-learning-course/blob/main/colab/FinanceAnalysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Data Retrieval

In [None]:
%%capture
# !pip install pandas_ta

In [None]:
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import numpy as np
# import pandas_ta as ta
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.model_selection import train_test_split

In [None]:
# Define the symbol for ENI
ticker_symbol = "ENI.MI"  # Use the Yahoo Finance symbol for ENI listed in Milan

# Function to retrieve time series data from Yahoo Finance
def get_yahoo_finance_data(symbol=ticker_symbol, interval="5m", period="5d"):
    ticker = yf.Ticker(symbol)
    data = ticker.history(interval=interval, period=period)
    return data

In [None]:
# Retrieve 5-minute interval data for the last 5 days
#  [1m, 2m, 5m, 15m, 30m, 60m, 90m, 1h, 1d, 5d, 1wk, 1mo, 3mo]
time_series_data = get_yahoo_finance_data(interval="5m", period="max")
#time_series_data = get_yahoo_finance_data(interval="1d", period="5y")


In [None]:
# Indicator available
# List of indicators available from Yahoo Finance using yfinance library
# Note: Yahoo Finance provides basic stock data, and yfinance helps retrieve the following:
# 1. Open Price - The opening price of the stock for a given interval.
# 2. Close Price - The closing price of the stock for a given interval.
# 3. High Price - The highest price reached during a given interval.
# 4. Low Price - The lowest price reached during a given interval.
# 5. Volume - The number of shares traded during a given interval.
# 6. Dividends - Dividend payments made by the stock during a specified period.
# 7. Stock Splits - Information about stock splits during the specified period.
list(time_series_data.keys())

['Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits']

In [None]:
# Get this indicator available by default in alpha_vantage
# List of all available indicators with comments
# Note: These indicators are not directly available from Yahoo Finance through yfinance. You would need to calculate them manually.
# 1. SMA - Simple Moving Average: This is the average of the closing prices over a specific period. It helps to smooth out price data and identify general trends.
# 2. EMA - Exponential Moving Average: Similar to the SMA, but gives more weight to recent prices, making it more responsive to new information.
# 3. MACD - Moving Average Convergence Divergence: A trend-following momentum indicator that shows the relationship between two moving averages, typically used to identify changes in momentum.
# 4. STOCH - Stochastic Oscillator: A momentum indicator that compares the closing price to a range of prices over a certain period. It is used to identify potential reversal points.
# 5. RSI - Relative Strength Index: A momentum oscillator that measures the speed and change of price movements. It helps identify overbought or oversold conditions.
# 6. BBANDS - Bollinger Bands: These bands consist of a middle band (SMA) and two outer bands. They help measure market volatility and identify overbought or oversold conditions.
# 7. ADX - Average Directional Movement Index: Used to quantify the strength of a trend, whether up or down, but not the direction.
# 8. CCI - Commodity Channel Index: Used to identify cyclical trends, and it compares the current price to its average price over a specific period to spot overbought or oversold conditions.
# 9. ATR - Average True Range: A volatility indicator that measures the degree of price variation, helping to determine the level of price uncertainty.
# 10. OBV - On Balance Volume: A volume-based indicator that measures buying and selling pressure. It is used to predict price movements based on volume changes.
# 11. WILLR - Williams %R: A momentum indicator that shows the level of the closing price relative to the highest high for a look-back period, identifying overbought and oversold levels.
# 12. AROON - Aroon Indicator: Helps identify the beginning of a new trend and the strength of that trend by measuring how long it has been since the highest high or lowest low.
# 13. MFI - Money Flow Index: A momentum indicator that uses price and volume data to identify overbought or oversold conditions, similar to RSI but includes volume.
# 14. MAV - Moving Average Envelopes: These are lines placed above and below a moving average. The envelopes help identify overbought and oversold conditions by using percentage-based thresholds.
# 15. TRIX - Triple Exponential Average: This indicator shows the rate of change in a triple-smoothed exponential moving average, used to filter out insignificant price movements.

In [None]:
# Function to calculate various indicators
def calculate_indicators(df_inp):
    df = df_inp.copy()
    # Simple Moving Average (SMA)
    df['SMA_20'] = df['Close'].rolling(window=20).mean()

    # Exponential Moving Average (EMA)
    df['EMA_20'] = df['Close'].ewm(span=20, adjust=False).mean()

    # Moving Average Convergence Divergence (MACD)
    df['EMA_12'] = df['Close'].ewm(span=12, adjust=False).mean()
    df['EMA_26'] = df['Close'].ewm(span=26, adjust=False).mean()
    df['MACD'] = df['EMA_12'] - df['EMA_26']
    df['MACD_Signal'] = df['MACD'].ewm(span=9, adjust=False).mean()
    df['MACD_Hist'] = df['MACD'] - df['MACD_Signal']

    # Stochastic Oscillator (STOCH)
    df['Low_14'] = df['Low'].rolling(window=14).min()
    df['High_14'] = df['High'].rolling(window=14).max()
    df['STOCH_k'] = 100 * ((df['Close'] - df['Low_14']) / (df['High_14'] - df['Low_14']))
    df['STOCH_d'] = df['STOCH_k'].rolling(window=3).mean()

    # Relative Strength Index (RSI)
    delta = df['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI_14'] = 100 - (100 / (1 + rs))

    # Bollinger Bands (BBANDS)
    df['BB_middle'] = df['SMA_20']
    df['BB_upper'] = df['BB_middle'] + 2 * df['Close'].rolling(window=20).std()
    df['BB_lower'] = df['BB_middle'] - 2 * df['Close'].rolling(window=20).std()

    # Average Directional Movement Index (ADX)
    df['+DM'] = np.where((df['High'] - df['High'].shift(1)) > (df['Low'].shift(1) - df['Low']),
                         (df['High'] - df['High'].shift(1)), 0)
    df['-DM'] = np.where((df['Low'].shift(1) - df['Low']) > (df['High'] - df['High'].shift(1)),
                         (df['Low'].shift(1) - df['Low']), 0)
    df['TR'] = np.maximum((df['High'] - df['Low']),
                          np.maximum(abs(df['High'] - df['Close'].shift(1)), abs(df['Low'] - df['Close'].shift(1))))
    df['+DI'] = 100 * (df['+DM'] / df['TR']).rolling(window=14).mean()
    df['-DI'] = 100 * (df['-DM'] / df['TR']).rolling(window=14).mean()
    df['DX'] = (abs(df['+DI'] - df['-DI']) / (df['+DI'] + df['-DI'])) * 100
    df['ADX'] = df['DX'].rolling(window=14).mean()

    # Commodity Channel Index (CCI)
    df['TP'] = (df['High'] + df['Low'] + df['Close']) / 3
    df['CCI'] = (df['TP'] - df['TP'].rolling(window=20).mean()) / (0.015 * df['TP'].rolling(window=20).std())

    # Average True Range (ATR)
    df['ATR'] = df['TR'].rolling(window=14).mean()

    # On Balance Volume (OBV)
    df['OBV'] = (np.where(df['Close'] > df['Close'].shift(1), df['Volume'],
                          np.where(df['Close'] < df['Close'].shift(1), -df['Volume'], 0))).cumsum()

    # Williams %R
    df['WILLR'] = (df['High_14'] - df['Close']) / (df['High_14'] - df['Low_14']) * -100

    # Aroon Indicator
    df['AROON_Up'] = df['High'].rolling(window=25).apply(lambda x: x.argmax() / 25 * 100, raw=True)
    df['AROON_Down'] = df['Low'].rolling(window=25).apply(lambda x: x.argmin() / 25 * 100, raw=True)

    # Money Flow Index (MFI)
    df['MF'] = (df['High'] + df['Low'] + df['Close']) / 3 * df['Volume']
    df['Positive_MF'] = np.where(df['Close'] > df['Close'].shift(1), df['MF'], 0)
    df['Negative_MF'] = np.where(df['Close'] < df['Close'].shift(1), df['MF'], 0)
    df['MFI'] = 100 - (100 / (1 + (df['Positive_MF'].rolling(window=14).sum() / df['Negative_MF'].rolling(window=14).sum())))

    # Moving Average Envelopes (MAV)
    df['Upper_Envelope'] = df['SMA_20'] * 1.05
    df['Lower_Envelope'] = df['SMA_20'] * 0.95

    # Triple Exponential Average (TRIX)
    df['TRIX'] = df['Close'].ewm(span=15, adjust=False).mean().ewm(span=15, adjust=False).mean().ewm(span=15, adjust=False).mean().pct_change() * 100

    return df

In [None]:
def generate_plotly_figure(df, plot_optional_indicators=False):
    # Initialize figure with subplots for better separation of indicators

    # Create subplots with different rows for better visualization
    fig = make_subplots(rows=8, cols=1, shared_xaxes=True,
                        vertical_spacing=0.02, subplot_titles=(
                            'Candlestick & Moving Averages',
                            'Bollinger Bands',
                            'RSI',
                            'MACD',
                            'Stochastic Oscillator',
                            'Optional Indicators',
                            'OBV' if plot_optional_indicators else ''),
                        row_heights=[0.4, 0.15, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1])

    # Plot main price line using Candlestick chart (row 1)
    fig.add_trace(go.Candlestick(
        x=df.index,
        open=df['Open'],
        high=df['High'],
        low=df['Low'],
        close=df['Close'],
        name='Candlestick'
    ), row=1, col=1)

    # Add Moving Averages to row 1
    fig.add_trace(go.Scatter(
        x=df.index, y=df['SMA_20'],
        mode='lines', name='SMA 20',
        line=dict(color='orange')
    ), row=1, col=1)

    fig.add_trace(go.Scatter(
        x=df.index, y=df['EMA_20'],
        mode='lines', name='EMA 20',
        line=dict(color='magenta')
    ), row=1, col=1)

    # Add Bollinger Bands to row 2 for visualizing market volatility
    fig.add_trace(go.Scatter(
        x=df.index, y=df['BB_upper'],
        mode='lines', name='Upper Bollinger Band',
        line=dict(color='grey', dash='dash')
    ), row=2, col=1)

    fig.add_trace(go.Scatter(
        x=df.index, y=df['BB_lower'],
        mode='lines', name='Lower Bollinger Band',
        line=dict(color='grey', dash='dash')
    ), row=2, col=1)

    # Add RSI indicator to row 3
    fig.add_trace(go.Scatter(
        x=df.index, y=df['RSI_14'],
        mode='lines', name='RSI 14',
        line=dict(color='blue')
    ), row=3, col=1)

    # Add MACD and Signal Line to row 4
    fig.add_trace(go.Scatter(
        x=df.index, y=df['MACD'],
        mode='lines', name='MACD',
        line=dict(color='green')
    ), row=4, col=1)

    fig.add_trace(go.Scatter(
        x=df.index, y=df['MACD_Signal'],
        mode='lines', name='MACD Signal',
        line=dict(color='red')
    ), row=4, col=1)

    # Add Stochastic Oscillator to row 5
    fig.add_trace(go.Scatter(
        x=df.index, y=df['STOCH_k'],
        mode='lines', name='Stochastic %K',
        line=dict(color='purple')
    ), row=5, col=1)

    fig.add_trace(go.Scatter(
        x=df.index, y=df['STOCH_d'],
        mode='lines', name='Stochastic %D',
        line=dict(color='darkblue')
    ), row=5, col=1)

    # Add optional indicators if requested (row 6)
    if plot_optional_indicators:
        # Add ADX
        fig.add_trace(go.Scatter(
            x=df.index, y=df['ADX'],
            mode='lines', name='ADX',
            line=dict(color='brown')
        ), row=6, col=1)

        # Add CCI
        fig.add_trace(go.Scatter(
            x=df.index, y=df['CCI'],
            mode='lines', name='CCI',
            line=dict(color='cyan')
        ), row=6, col=1)

        # Add ATR
        fig.add_trace(go.Scatter(
            x=df.index, y=df['ATR'],
            mode='lines', name='ATR',
            line=dict(color='black')
        ), row=6, col=1)

        # Add Williams %R
        fig.add_trace(go.Scatter(
            x=df.index, y=df['WILLR'],
            mode='lines', name='Williams %R',
            line=dict(color='darkred')
        ), row=6, col=1)

        # Add Aroon Up and Down
        fig.add_trace(go.Scatter(
            x=df.index, y=df['Aroon_Up'],
            mode='lines', name='Aroon Up',
            line=dict(color='teal')
        ), row=6, col=1)

        fig.add_trace(go.Scatter(
            x=df.index, y=df['Aroon_Down'],
            mode='lines', name='Aroon Down',
            line=dict(color='navy')
        ), row=6, col=1)

        # Add OBV to its own row (row 7)
        fig.add_trace(go.Scatter(
            x=df.index, y=df['OBV'],
            mode='lines', name='OBV',
            line=dict(color='darkgreen')
        ), row=7, col=1)

    # Update layout for better visualization
    fig.update_layout(
        title=f'Price and Indicators for {df.index.name}',
        xaxis_title='Date',
        yaxis_title='Price',
        template='plotly_white',
        hovermode='x',
        height=1600
    )

    # Show the figure
    fig.show()


In [None]:
df = time_series_data.copy()
#df = calculate_indicators(time_series_data)
df.dropna(inplace=True)
#df['time_idx'] = df.index
features_inp = ['Open', 'High', 'Low', 'Volume', 'Dividends', 'Stock Splits']
features_out = ['Close']
X = df[features_inp]
y = df[features_out]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

In [None]:
# Merge X_train, Y_train into data_train.csv
df_train = pd.concat([X_train, y_train], axis=1)
df_train.to_csv("data_train.csv")

# Merge  X_test, Y_test into data_test.csv
df_test = pd.concat([X_test, y_test], axis=1)
df_test.to_csv("data_test.csv")

## Train a Model

In [None]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset

In [None]:
# Dataset definition
class TimeSeriesDataset(Dataset):
    def __init__(self, df_x, df_y, encoder_length, prediction_length):
        self.x = df_x.copy()
        self.y = df_y.copy()
        # check if self.x and self.y has the same length
        assert len(self.x) == len(self.y), "Input and output data must have the same length"
        self.encoder_length = encoder_length
        self.prediction_length = prediction_length

    def __len__(self):
        return len(self.x) - self.encoder_length - self.prediction_length

    def __getitem__(self, idx):
        x = self.x[idx:idx + self.encoder_length] #.values
        y = self.y[idx + self.encoder_length: idx + self.encoder_length + self.prediction_length] #.values
        x = torch.tensor(x, dtype=torch.float32)
        y = torch.tensor(y, dtype=torch.float32).flatten() # do not output [10,1] --> output [10], assume to have only one output
        return x,y

In [None]:
# Define LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=2, dropout=0.1):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, dropout=dropout, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        _, (hn, _) = self.lstm(x)
        return self.fc(hn[-1])

In [None]:
# Get data
df_train = pd.read_csv("data_train.csv", parse_dates=["Datetime"])
#df_train = pd.read_csv("data_train.csv", parse_dates=["Date"])

# Params
features_inp = [col for col in df_train.keys() if col not in ['Close', 'Datetime','Date']]
features_out = ["Close"]
max_encoder_length = 60
max_prediction_length = 10

# Define Model
input_size = len(features_inp)
hidden_size = 16
output_size = max_prediction_length
model = LSTMModel(input_size, hidden_size, output_size)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

# Create data Loader
df_train_x = df_train[features_inp].values
df_train_y = df_train[features_out].values

# Normalize data using standar scaler
from sklearn.preprocessing import MinMaxScaler
input_scaler = MinMaxScaler(feature_range=(0, 1))
df_train_x_std = input_scaler.fit_transform(df_train_x)
output_scaler = MinMaxScaler(feature_range=(0, 1))
df_train_y_std = output_scaler.fit_transform(df_train_y)

dataset = TimeSeriesDataset(df_train_x_std, df_train_y_std, max_encoder_length, max_prediction_length)
train_dataloader = DataLoader(dataset, batch_size=64, shuffle=True)
#for x_batch, y_batch in train_dataloader:
#    print(x_batch.shape, y_batch.shape)  # torch.Size([64, 60, 6]) torch.Size([64, 10, 1])
#    break

In [None]:
# Training loop
epochs = 150
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

for epoch in range(epochs):
    model.train()
    epoch_loss = 0
    for x_batch, y_batch in train_dataloader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        y_pred = model(x_batch)
        loss = criterion(y_pred, y_batch)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss / len(train_dataloader)}")

# Save the model
torch.save(model.state_dict(), "lstm_model.pth")

Epoch 1/150, Loss: 0.2628481458380537
Epoch 2/150, Loss: 0.24355970576124372
Epoch 3/150, Loss: 0.2229422648560326
Epoch 4/150, Loss: 0.19392499665044388
Epoch 5/150, Loss: 0.15235482746700071
Epoch 6/150, Loss: 0.09316301943277414
Epoch 7/150, Loss: 0.053112414999390546
Epoch 8/150, Loss: 0.03878532743678903
Epoch 9/150, Loss: 0.030220871228935582
Epoch 10/150, Loss: 0.023373999791044108
Epoch 11/150, Loss: 0.017286616430248855
Epoch 12/150, Loss: 0.013509242031020377
Epoch 13/150, Loss: 0.011359612975831865
Epoch 14/150, Loss: 0.00985013069760687
Epoch 15/150, Loss: 0.008801835993269705
Epoch 16/150, Loss: 0.007849878859969805
Epoch 17/150, Loss: 0.007182331474603347
Epoch 18/150, Loss: 0.006603640512967447
Epoch 19/150, Loss: 0.006169699255447343
Epoch 20/150, Loss: 0.005729682563435075
Epoch 21/150, Loss: 0.00541244136204697
Epoch 22/150, Loss: 0.005150890048101263
Epoch 23/150, Loss: 0.0048663516326825015
Epoch 24/150, Loss: 0.004642139557959899
Epoch 25/150, Loss: 0.0044503877255

## Test the model

In [None]:
# Execute the model on test data and plot the results
df_test = pd.read_csv("data_test.csv", parse_dates=["Datetime"])
#df_test = pd.read_csv("data_train.csv", parse_dates=["Datetime"])

# Create a prediciton loop
model.eval()
df_test_x = df_test[features_inp]
df_test_y = df_test[features_out]

y_pred_list = []
y_true_list = []

# Iterate over the dataframe
for idx in range(len(df_test)):
    # Take last max_encoder_values
    if idx >= max_encoder_length:
        x = df_test_x.iloc[idx - max_encoder_length:idx].values
        x = input_scaler.transform(x)
        x = torch.tensor(x, dtype=torch.float32).unsqueeze(0)
        x = x.to(device)
        y_pred = model(x)
        y_pred_last = y_pred[0,-1].detach().cpu().numpy() # take the last value that is the current prediction
        y_pred_last = output_scaler.inverse_transform(y_pred_last.reshape(-1, 1))[0][0]
        #print(y_pred_last)
        y = df_test_y.iloc[idx].values[0]    # take the idx value
        y_pred_list.append(y_pred_last)
        y_true_list.append(y)
    #else:
    #    y_pred_list.append(0)
    #    y_true_list.append(df_test_y.iloc[idx].values[0])

fig = go.Figure()
fig.add_trace(go.Scatter(x=df_test.index, y=y_pred_list, mode='lines', name='Predicted'))
fig.add_trace(go.Scatter(x=df_test.index, y=y_true_list, mode='lines', name='Actual'))
fig.update_layout(title='Prediction vs Actual', xaxis_title='Date', yaxis_title='Price')
fig.show()



## Test using Backtrader

In [None]:
%%capture
!pip install backtrader

In [None]:
import backtrader as bt
import torch
import pandas as pd

In [None]:
# Read test dataset
df_test = pd.read_csv("data_test.csv", parse_dates=["Datetime"])
#df_test = pd.read_csv("data_test.csv", parse_dates=["Date"])
#df_test.set_index("Datetime", inplace=True)

# --> Assuming that Close is at the end!!
feature_inp = list(df_test.keys()[1:-1])
feature_out = [df_test.keys()[-1]]
print(f"input_feature: {feature_inp}")
print(f"output_feature: {feature_out}")
print(f"input_size: {len(feature_inp)}, output_size: {len(feature_out)}")

input_feature: ['Open', 'High', 'Low', 'Volume', 'Dividends', 'Stock Splits']
output_feature: ['Close']
input_size: 6, output_size: 1


In [None]:
# Assuming you already have a trained LSTM model class named `LSTMModel`
# Load the trained model
input_size = len(feature_inp)  # Based on relevant columns (update to your input size if needed)
hidden_size = 16
output_size = 10  # max_prediction_length
tft = LSTMModel(input_size, hidden_size, output_size)
tft.load_state_dict(torch.load("lstm_model.pth", weights_only=True))
tft.eval()

LSTMModel(
  (lstm): LSTM(6, 16, num_layers=2, batch_first=True, dropout=0.1)
  (fc): Linear(in_features=16, out_features=10, bias=True)
)

In [None]:
# Load the scaler TODO, you need to save and then load

In [None]:
class TFTStrategy(bt.Strategy):
    params = (
        ('stake_size', 100),  # Number of shares to buy/sell
    )
    def __init__(self):
        # Initialize relevant data variables
        self.data_close = self.datas[0].close
        self.last_prediction = None
        self.num_fails = 0  # Tracking failed trades

        # Lists to collect metrics for plotting
        self.portfolio_value_list = []
        self.datetime_list = []

        # Lists to collect buy and sell actions
        self.buy_dates = []
        self.buy_prices = []
        self.sell_dates = []
        self.sell_prices = []

    def notify_trade(self, trade):
        if trade.status == trade.Closed:
            print(f"Trade Closed: P/L = {trade.pnlcomm}")
            if trade.pnlcomm < 0:
                self.num_fails += 1

    def next(self):
        if len(self) >= max_encoder_length:  # Ensure enough historical data for prediction
            # Prepare data for prediction
            input_data = df.iloc[len(self) - max_encoder_length:len(self)].copy()

            # Extract relevant features
            features_inp = [col for col in input_data.columns if col not in ["Datetime", "Close"]]
            input_data = input_data[features_inp].values
            # scaler
            input_data = input_scaler.transform(input_data)
            # Convert to tensor
            input_tensor = torch.tensor(input_data, dtype=torch.float32).unsqueeze(0)

            # Predict next value using the trained model
            with torch.no_grad():
                prediction = tft(input_tensor)

            # Get the predicted closing price (it predicts the next 10 values)
            self.last_prediction = prediction[0, 0].item()
            # Output scaler
            self.last_prediction = output_scaler.inverse_transform(torch.tensor(self.last_prediction).reshape(-1, 1))[0][0]

            # Example trading logic based on predicted price movement
            current_price = self.data_close[0]
            #print(f"Prediction: {self.last_prediction}, Current Price: {current_price}, Cash: {self.broker.get_cash()}, Position Size: {self.position.size if self.position else 0}")

            # Adjusted threshold to trigger trades
            threshold=0.01
            if self.last_prediction > current_price * (1 + threshold):
                if not self.position:  # Buy if no open position
                    self.buy(size=self.params.stake_size)
                    print(f"Buying at {current_price}")
                    self.buy_dates.append(self.data.datetime.datetime(0))
                    self.buy_prices.append(current_price)
            elif self.last_prediction < current_price * (1 - threshold):
                if self.position:  # Sell if there is an open position
                    self.sell(size=self.params.stake_size)
                    print(f"Selling at {current_price}")
                    self.sell_dates.append(self.data.datetime.datetime(0))
                    self.sell_prices.append(current_price)

        self.portfolio_value_list.append(self.broker.getvalue())
        self.datetime_list.append(self.data.datetime.datetime(0))

# Load historical data
#df_test = pd.read_csv("data_train.csv", parse_dates=["Datetime"])
df_test = pd.read_csv("data_test.csv", parse_dates=["Datetime"])

df = df_test.copy()
df.set_index('Datetime', inplace=True)
#df.set_index('Date', inplace=True)
df.dropna(inplace=True)

# Backtrader data feed
data = bt.feeds.PandasData(dataname=df)

# Initialize cerebro
cerebro = bt.Cerebro()

# Set the initial capital for testing
initial_capital = 10000  # Set initial capital to $10,000
cerebro.broker.set_cash(initial_capital)
cerebro.broker.setcommission(commission=0.001)  # Set commission to 0.1%
cerebro.adddata(data)
cerebro.addstrategy(TFTStrategy)

# Print starting cash
print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue())

# Run backtesting
strategy = cerebro.run()  # Save strategy instance to access collected data

# Print ending cash
print('Ending Portfolio Value: %.2f' % cerebro.broker.getvalue())


Starting Portfolio Value: 10000.00
Ending Portfolio Value: 10000.00


In [None]:
st = strategy[0]

# Create a DataFrame from the collected data
results_df = pd.DataFrame({
    'datetime': st.datetime_list,
    'portfolio_value': st.portfolio_value_list,
    'Open': df['Open'].values,
    'High': df['High'].values,
    'Low': df['Low'].values,
    'Close': df['Close'].values
})

# Create a Figure
fig = go.Figure()

# Add candlestick trace for OHLC data
fig.add_trace(go.Candlestick(
    x=results_df['datetime'],
    open=results_df['Open'],
    high=results_df['High'],
    low=results_df['Low'],
    close=results_df['Close'],
    name='Candlestick',
    increasing_line_color='cyan',  # Optional: customize colors for visibility
    decreasing_line_color='gray',
))

# Add portfolio value line trace
fig.add_trace(go.Scatter(
    x=results_df['datetime'],
    y=results_df['portfolio_value'],
    mode='lines',
    name='Portfolio Value',
    line=dict(color='orange', width=2),
))

# Add buy actions as scatter markers
fig.add_trace(go.Scatter(
    x=st.buy_dates, y=st.buy_prices,
    mode='markers', name='Buy',
    marker=dict(symbol='triangle-up', color='green', size=10),
    text=['Buy at {:.2f}'.format(price) for price in st.buy_prices],
))

# Add sell actions as scatter markers
fig.add_trace(go.Scatter(
    x=st.sell_dates, y=st.sell_prices,
    mode='markers', name='Sell',
    marker=dict(symbol='triangle-down', color='red', size=10),
    text=['Sell at {:.2f}'.format(price) for price in st.sell_prices],
))

# Customize layout for better presentation
fig.update_layout(
    title='Backtrader Portfolio Value with Buy and Sell Actions and Candlestick Chart',
    xaxis_title='Date',
    yaxis_title='Price / Portfolio Value',
    legend_title='Legend',
    template='plotly_dark',
    xaxis_rangeslider_visible=False  # Hide the default range slider for a cleaner view
)

# Optional: add a vertical line to show when transactions occurred
for buy_date in st.buy_dates:
    fig.add_shape(
        type='line',
        x0=buy_date,
        y0=results_df['Low'].min(),
        x1=buy_date,
        y1=results_df['High'].max(),
        line=dict(color='green', width=1, dash='dot'),
        opacity=0.3,
    )

for sell_date in st.sell_dates:
    fig.add_shape(
        type='line',
        x0=sell_date,
        y0=results_df['Low'].min(),
        x1=sell_date,
        y1=results_df['High'].max(),
        line=dict(color='red', width=1, dash='dot'),
        opacity=0.3,
    )

# Show the figure
fig.show()
