<a href="https://colab.research.google.com/github/manuelrucci7/anomalib/blob/main/FinanceAnalysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Data Retrieval

In [67]:
%%capture
# !pip install pandas_ta

In [1]:
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import numpy as np
# import pandas_ta as ta
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [2]:
# Define the symbol for ENI
ticker_symbol = "ENI.MI"  # Use the Yahoo Finance symbol for ENI listed in Milan

# Function to retrieve time series data from Yahoo Finance
def get_yahoo_finance_data(symbol=ticker_symbol, interval="5m", period="5d"):
    ticker = yf.Ticker(symbol)
    data = ticker.history(interval=interval, period=period)
    return data

In [3]:
# Retrieve 5-minute interval data for the last 5 days
time_series_data = get_yahoo_finance_data(interval="5m", period="1mo")

In [4]:
# Indicator available
# List of indicators available from Yahoo Finance using yfinance library
# Note: Yahoo Finance provides basic stock data, and yfinance helps retrieve the following:
# 1. Open Price - The opening price of the stock for a given interval.
# 2. Close Price - The closing price of the stock for a given interval.
# 3. High Price - The highest price reached during a given interval.
# 4. Low Price - The lowest price reached during a given interval.
# 5. Volume - The number of shares traded during a given interval.
# 6. Dividends - Dividend payments made by the stock during a specified period.
# 7. Stock Splits - Information about stock splits during the specified period.
list(time_series_data.keys())

['Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits']

In [5]:
# Get this indicator available by default in alpha_vantage
# List of all available indicators with comments
# Note: These indicators are not directly available from Yahoo Finance through yfinance. You would need to calculate them manually.
# 1. SMA - Simple Moving Average: This is the average of the closing prices over a specific period. It helps to smooth out price data and identify general trends.
# 2. EMA - Exponential Moving Average: Similar to the SMA, but gives more weight to recent prices, making it more responsive to new information.
# 3. MACD - Moving Average Convergence Divergence: A trend-following momentum indicator that shows the relationship between two moving averages, typically used to identify changes in momentum.
# 4. STOCH - Stochastic Oscillator: A momentum indicator that compares the closing price to a range of prices over a certain period. It is used to identify potential reversal points.
# 5. RSI - Relative Strength Index: A momentum oscillator that measures the speed and change of price movements. It helps identify overbought or oversold conditions.
# 6. BBANDS - Bollinger Bands: These bands consist of a middle band (SMA) and two outer bands. They help measure market volatility and identify overbought or oversold conditions.
# 7. ADX - Average Directional Movement Index: Used to quantify the strength of a trend, whether up or down, but not the direction.
# 8. CCI - Commodity Channel Index: Used to identify cyclical trends, and it compares the current price to its average price over a specific period to spot overbought or oversold conditions.
# 9. ATR - Average True Range: A volatility indicator that measures the degree of price variation, helping to determine the level of price uncertainty.
# 10. OBV - On Balance Volume: A volume-based indicator that measures buying and selling pressure. It is used to predict price movements based on volume changes.
# 11. WILLR - Williams %R: A momentum indicator that shows the level of the closing price relative to the highest high for a look-back period, identifying overbought and oversold levels.
# 12. AROON - Aroon Indicator: Helps identify the beginning of a new trend and the strength of that trend by measuring how long it has been since the highest high or lowest low.
# 13. MFI - Money Flow Index: A momentum indicator that uses price and volume data to identify overbought or oversold conditions, similar to RSI but includes volume.
# 14. MAV - Moving Average Envelopes: These are lines placed above and below a moving average. The envelopes help identify overbought and oversold conditions by using percentage-based thresholds.
# 15. TRIX - Triple Exponential Average: This indicator shows the rate of change in a triple-smoothed exponential moving average, used to filter out insignificant price movements.

In [6]:
# Function to calculate various indicators
def calculate_indicators(df):
    # Simple Moving Average (SMA)
    df['SMA_20'] = df['Close'].rolling(window=20).mean()

    # Exponential Moving Average (EMA)
    df['EMA_20'] = df['Close'].ewm(span=20, adjust=False).mean()

    # Moving Average Convergence Divergence (MACD)
    df['EMA_12'] = df['Close'].ewm(span=12, adjust=False).mean()
    df['EMA_26'] = df['Close'].ewm(span=26, adjust=False).mean()
    df['MACD'] = df['EMA_12'] - df['EMA_26']
    df['MACD_Signal'] = df['MACD'].ewm(span=9, adjust=False).mean()
    df['MACD_Hist'] = df['MACD'] - df['MACD_Signal']

    # Stochastic Oscillator (STOCH)
    df['Low_14'] = df['Low'].rolling(window=14).min()
    df['High_14'] = df['High'].rolling(window=14).max()
    df['STOCH_k'] = 100 * ((df['Close'] - df['Low_14']) / (df['High_14'] - df['Low_14']))
    df['STOCH_d'] = df['STOCH_k'].rolling(window=3).mean()

    # Relative Strength Index (RSI)
    delta = df['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI_14'] = 100 - (100 / (1 + rs))

    # Bollinger Bands (BBANDS)
    df['BB_middle'] = df['SMA_20']
    df['BB_upper'] = df['BB_middle'] + 2 * df['Close'].rolling(window=20).std()
    df['BB_lower'] = df['BB_middle'] - 2 * df['Close'].rolling(window=20).std()

    # Average Directional Movement Index (ADX)
    df['+DM'] = np.where((df['High'] - df['High'].shift(1)) > (df['Low'].shift(1) - df['Low']),
                         (df['High'] - df['High'].shift(1)), 0)
    df['-DM'] = np.where((df['Low'].shift(1) - df['Low']) > (df['High'] - df['High'].shift(1)),
                         (df['Low'].shift(1) - df['Low']), 0)
    df['TR'] = np.maximum((df['High'] - df['Low']),
                          np.maximum(abs(df['High'] - df['Close'].shift(1)), abs(df['Low'] - df['Close'].shift(1))))
    df['+DI'] = 100 * (df['+DM'] / df['TR']).rolling(window=14).mean()
    df['-DI'] = 100 * (df['-DM'] / df['TR']).rolling(window=14).mean()
    df['DX'] = (abs(df['+DI'] - df['-DI']) / (df['+DI'] + df['-DI'])) * 100
    df['ADX'] = df['DX'].rolling(window=14).mean()

    # Commodity Channel Index (CCI)
    df['TP'] = (df['High'] + df['Low'] + df['Close']) / 3
    df['CCI'] = (df['TP'] - df['TP'].rolling(window=20).mean()) / (0.015 * df['TP'].rolling(window=20).std())

    # Average True Range (ATR)
    df['ATR'] = df['TR'].rolling(window=14).mean()

    # On Balance Volume (OBV)
    df['OBV'] = (np.where(df['Close'] > df['Close'].shift(1), df['Volume'],
                          np.where(df['Close'] < df['Close'].shift(1), -df['Volume'], 0))).cumsum()

    # Williams %R
    df['WILLR'] = (df['High_14'] - df['Close']) / (df['High_14'] - df['Low_14']) * -100

    # Aroon Indicator
    df['AROON_Up'] = df['High'].rolling(window=25).apply(lambda x: x.argmax() / 25 * 100, raw=True)
    df['AROON_Down'] = df['Low'].rolling(window=25).apply(lambda x: x.argmin() / 25 * 100, raw=True)

    # Money Flow Index (MFI)
    df['MF'] = (df['High'] + df['Low'] + df['Close']) / 3 * df['Volume']
    df['Positive_MF'] = np.where(df['Close'] > df['Close'].shift(1), df['MF'], 0)
    df['Negative_MF'] = np.where(df['Close'] < df['Close'].shift(1), df['MF'], 0)
    df['MFI'] = 100 - (100 / (1 + (df['Positive_MF'].rolling(window=14).sum() / df['Negative_MF'].rolling(window=14).sum())))

    # Moving Average Envelopes (MAV)
    df['Upper_Envelope'] = df['SMA_20'] * 1.05
    df['Lower_Envelope'] = df['SMA_20'] * 0.95

    # Triple Exponential Average (TRIX)
    df['TRIX'] = df['Close'].ewm(span=15, adjust=False).mean().ewm(span=15, adjust=False).mean().ewm(span=15, adjust=False).mean().pct_change() * 100

    return df

In [7]:
def generate_plotly_figure(df, plot_optional_indicators=False):
    # Initialize figure with subplots for better separation of indicators

    # Create subplots with different rows for better visualization
    fig = make_subplots(rows=8, cols=1, shared_xaxes=True,
                        vertical_spacing=0.02, subplot_titles=(
                            'Candlestick & Moving Averages',
                            'Bollinger Bands',
                            'RSI',
                            'MACD',
                            'Stochastic Oscillator',
                            'Optional Indicators',
                            'OBV' if plot_optional_indicators else ''),
                        row_heights=[0.4, 0.15, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1])

    # Plot main price line using Candlestick chart (row 1)
    fig.add_trace(go.Candlestick(
        x=df.index,
        open=df['Open'],
        high=df['High'],
        low=df['Low'],
        close=df['Close'],
        name='Candlestick'
    ), row=1, col=1)

    # Add Moving Averages to row 1
    fig.add_trace(go.Scatter(
        x=df.index, y=df['SMA_20'],
        mode='lines', name='SMA 20',
        line=dict(color='orange')
    ), row=1, col=1)

    fig.add_trace(go.Scatter(
        x=df.index, y=df['EMA_20'],
        mode='lines', name='EMA 20',
        line=dict(color='magenta')
    ), row=1, col=1)

    # Add Bollinger Bands to row 2 for visualizing market volatility
    fig.add_trace(go.Scatter(
        x=df.index, y=df['BB_upper'],
        mode='lines', name='Upper Bollinger Band',
        line=dict(color='grey', dash='dash')
    ), row=2, col=1)

    fig.add_trace(go.Scatter(
        x=df.index, y=df['BB_lower'],
        mode='lines', name='Lower Bollinger Band',
        line=dict(color='grey', dash='dash')
    ), row=2, col=1)

    # Add RSI indicator to row 3
    fig.add_trace(go.Scatter(
        x=df.index, y=df['RSI_14'],
        mode='lines', name='RSI 14',
        line=dict(color='blue')
    ), row=3, col=1)

    # Add MACD and Signal Line to row 4
    fig.add_trace(go.Scatter(
        x=df.index, y=df['MACD'],
        mode='lines', name='MACD',
        line=dict(color='green')
    ), row=4, col=1)

    fig.add_trace(go.Scatter(
        x=df.index, y=df['MACD_Signal'],
        mode='lines', name='MACD Signal',
        line=dict(color='red')
    ), row=4, col=1)

    # Add Stochastic Oscillator to row 5
    fig.add_trace(go.Scatter(
        x=df.index, y=df['STOCH_k'],
        mode='lines', name='Stochastic %K',
        line=dict(color='purple')
    ), row=5, col=1)

    fig.add_trace(go.Scatter(
        x=df.index, y=df['STOCH_d'],
        mode='lines', name='Stochastic %D',
        line=dict(color='darkblue')
    ), row=5, col=1)

    # Add optional indicators if requested (row 6)
    if plot_optional_indicators:
        # Add ADX
        fig.add_trace(go.Scatter(
            x=df.index, y=df['ADX'],
            mode='lines', name='ADX',
            line=dict(color='brown')
        ), row=6, col=1)

        # Add CCI
        fig.add_trace(go.Scatter(
            x=df.index, y=df['CCI'],
            mode='lines', name='CCI',
            line=dict(color='cyan')
        ), row=6, col=1)

        # Add ATR
        fig.add_trace(go.Scatter(
            x=df.index, y=df['ATR'],
            mode='lines', name='ATR',
            line=dict(color='black')
        ), row=6, col=1)

        # Add Williams %R
        fig.add_trace(go.Scatter(
            x=df.index, y=df['WILLR'],
            mode='lines', name='Williams %R',
            line=dict(color='darkred')
        ), row=6, col=1)

        # Add Aroon Up and Down
        fig.add_trace(go.Scatter(
            x=df.index, y=df['Aroon_Up'],
            mode='lines', name='Aroon Up',
            line=dict(color='teal')
        ), row=6, col=1)

        fig.add_trace(go.Scatter(
            x=df.index, y=df['Aroon_Down'],
            mode='lines', name='Aroon Down',
            line=dict(color='navy')
        ), row=6, col=1)

        # Add OBV to its own row (row 7)
        fig.add_trace(go.Scatter(
            x=df.index, y=df['OBV'],
            mode='lines', name='OBV',
            line=dict(color='darkgreen')
        ), row=7, col=1)

    # Update layout for better visualization
    fig.update_layout(
        title=f'Price and Indicators for {df.index.name}',
        xaxis_title='Date',
        yaxis_title='Price',
        template='plotly_white',
        hovermode='x',
        height=1600
    )

    # Show the figure
    fig.show()


In [8]:
df_np = calculate_indicators(time_series_data)
df_np.to_csv("data.csv")
# print keys
print(list(df_np.keys()))
#generate_plotly_figure(df_np, plot_optional_indicators=False)
df_np

['Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits', 'SMA_20', 'EMA_20', 'EMA_12', 'EMA_26', 'MACD', 'MACD_Signal', 'MACD_Hist', 'Low_14', 'High_14', 'STOCH_k', 'STOCH_d', 'RSI_14', 'BB_middle', 'BB_upper', 'BB_lower', '+DM', '-DM', 'TR', '+DI', '-DI', 'DX', 'ADX', 'TP', 'CCI', 'ATR', 'OBV', 'WILLR', 'AROON_Up', 'AROON_Down', 'MF', 'Positive_MF', 'Negative_MF', 'MFI', 'Upper_Envelope', 'Lower_Envelope', 'TRIX']


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,SMA_20,EMA_20,EMA_12,...,WILLR,AROON_Up,AROON_Down,MF,Positive_MF,Negative_MF,MFI,Upper_Envelope,Lower_Envelope,TRIX
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-09-26 09:00:00+02:00,13.700,13.754,13.656,13.726,0,0.0,0.0,,13.726000,13.726000,...,,,,0.000000e+00,0.000000e+00,0.000000e+00,,,,
2024-09-26 09:05:00+02:00,13.716,13.736,13.634,13.646,917038,0.0,0.0,,13.718381,13.713692,...,,,,1.253774e+07,0.000000e+00,1.253774e+07,,,,-0.001138
2024-09-26 09:10:00+02:00,13.650,13.650,13.596,13.614,946078,0.0,0.0,,13.708440,13.698355,...,,,,1.288558e+07,0.000000e+00,1.288558e+07,,,,-0.003444
2024-09-26 09:15:00+02:00,13.612,13.658,13.592,13.652,852756,0.0,0.0,,13.703065,13.691223,...,,,,1.162648e+07,1.162648e+07,0.000000e+00,,,,-0.005884
2024-09-26 09:20:00+02:00,13.650,13.656,13.624,13.642,484824,0.0,0.0,,13.697249,13.683651,...,,,,6.613323e+06,0.000000e+00,6.613323e+06,,,,-0.008442
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-25 17:05:00+02:00,14.480,14.480,14.478,14.478,36085,0.0,0.0,14.4759,14.470845,14.475333,...,-22.727962,20.0,4.0,5.224627e+05,0.000000e+00,5.224627e+05,46.232752,15.199695,13.752105,0.011212
2024-10-25 17:10:00+02:00,14.476,14.476,14.476,14.476,140658,0.0,0.0,14.4738,14.471336,14.475436,...,-31.579079,16.0,0.0,2.036165e+06,0.000000e+00,2.036165e+06,43.368846,15.197490,13.750110,0.010619
2024-10-25 17:15:00+02:00,14.474,14.478,14.464,14.472,80067,0.0,0.0,14.4726,14.471399,14.474907,...,-42.104603,12.0,36.0,1.158676e+06,0.000000e+00,1.158676e+06,36.129271,15.196230,13.748970,0.009969
2024-10-25 17:20:00+02:00,14.476,14.476,14.464,14.466,185054,0.0,0.0,14.4716,14.470885,14.473537,...,-57.895397,8.0,32.0,2.677485e+06,0.000000e+00,2.677485e+06,33.004588,15.195180,13.748020,0.009208


## Train a Model

In [9]:
%%capture
!pip install pytorch_forecasting

In [14]:
import pandas as pd
import torch
from lightning.pytorch import Trainer
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer
from pytorch_lightning import seed_everything
from pytorch_forecasting.metrics import QuantileLoss

# Load your DataFrame (assuming it is already preprocessed with all indicators)
df = pd.read_csv("data.csv", parse_dates=["Datetime"])
df['time_idx'] = df.index  # Create a time index for pytorch-forecasting
# Add a 'symbol' column if it doesn't exist. Replace 'your_symbol' with the actual symbol of your data.
# This assumes all data belongs to one symbol. If not, adjust as necessary.
df['symbol'] = 'ENI'
# Preprocessing: Drop rows with missing values in relevant columns
relevant_columns = [
    "Close", "SMA_20", "EMA_20", "MACD", "RSI_14", "BB_middle", "ADX",
    "CCI", "ATR", "OBV", "WILLR", "AROON_Up", "AROON_Down", "TRIX",
    "Upper_Envelope", "Lower_Envelope", "Open", "High", "Low", "Volume" # Add these for known reals
]
df.dropna(subset=relevant_columns, inplace=True)

# Defining the training dataset
max_encoder_length = 60  # How far back we look in history
max_prediction_length = 10  # How far we want to predict into the future

# Define the TimeSeriesDataSet
training_cutoff = df["time_idx"].max() - max_prediction_length


training = TimeSeriesDataSet(
    df[df.time_idx <= training_cutoff],
    time_idx="time_idx",
    target="Close",  # Predicting future 'Close' price
    group_ids=["symbol"],  # Assuming each row has a symbol, modify accordingly if it doesn't
    min_encoder_length=max_encoder_length // 2,  # allow for incomplete sequences
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=["symbol"],
    static_reals=[
        "SMA_20", "EMA_20", "MACD", "RSI_14", "BB_middle", "ADX",
        "CCI", "ATR", "OBV", "WILLR", "AROON_Up", "AROON_Down", "TRIX",
        "Upper_Envelope", "Lower_Envelope",
    ],  # These features are expected to be static
    time_varying_known_reals=["time_idx", "Open", "High", "Low", "Volume"],
    time_varying_unknown_reals=[
        "Close" # Target now varies with time
    ],
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
)

# Create dataloaders for model
batch_size = 64
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)

# Define the Temporal Fusion Transformer model
tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=16,
    attention_head_size=1,
    dropout=0.1,
    hidden_continuous_size=8,
    output_size=7,  # QuantileLoss has 7 outputs
    loss=QuantileLoss(),
    log_interval=10,  # logging every 10 batches
    reduce_on_plateau_patience=4,
)

# Train the model
trainer = Trainer(max_epochs=30, accelerator="gpu", devices=1 if torch.cuda.is_available() else None)
trainer.fit(tft, train_dataloader)

# Save the model
torch.save(tft.state_dict(), "tft_model.pth")


/usr/local/lib/python3.10/dist-packages/lightning/pytorch/utilities/parsing.py:208: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/utilities/parsing.py:208: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
  super().__init__(loss=loss, logging_metrics=logging_metrics, **kwargs)
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
/usr/loc

Training: |          | 0/? [00:00<?, ?it/s]

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/loops/training_epoch_loop.py:389: ReduceLROnPlateau conditioned on metric val_loss which is not available but strict is set to `False`. Skipping learning rate update.
INFO: 
Detected KeyboardInterrupt, attempting graceful shutdown ...
INFO:lightning.pytorch.utilities.rank_zero:
Detected KeyboardInterrupt, attempting graceful shutdown ...


NameError: name 'exit' is not defined

In [42]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset

# Load DataFrame and preprocess as in your original code
df = pd.read_csv("data.csv", parse_dates=["Datetime"])
df['time_idx'] = df.index
df['symbol'] = 'ENI'
relevant_columns = [
    "Close", "SMA_20", "EMA_20", "MACD", "RSI_14", "BB_middle", "ADX",
    "CCI", "ATR", "OBV", "WILLR", "AROON_Up", "AROON_Down", "TRIX",
    "Upper_Envelope", "Lower_Envelope", "Open", "High", "Low", "Volume"
]
df.dropna(subset=relevant_columns, inplace=True)

# Dataset definition
class TimeSeriesDataset(Dataset):
    def __init__(self, data, encoder_length, prediction_length):
        self.data = data
        self.encoder_length = encoder_length
        self.prediction_length = prediction_length

    def __len__(self):
        return len(self.data) - self.encoder_length - self.prediction_length

    def __getitem__(self, idx):
        x = self.data[idx:idx + self.encoder_length].values
        y = self.data[idx + self.encoder_length: idx + self.encoder_length + self.prediction_length]["Close"].values
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

max_encoder_length = 60
max_prediction_length = 10
dataset = TimeSeriesDataset(df[relevant_columns], max_encoder_length, max_prediction_length)
train_dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

# Define LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=2, dropout=0.1):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, dropout=dropout, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        _, (hn, _) = self.lstm(x)
        return self.fc(hn[-1])

# Initialize and train the model
input_size = len(relevant_columns)
hidden_size = 16
output_size = max_prediction_length
print(input_size, hidden_size, output_size)
model = LSTMModel(input_size, hidden_size, output_size)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 50
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

for epoch in range(epochs):
    model.train()
    epoch_loss = 0
    for x_batch, y_batch in train_dataloader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        y_pred = model(x_batch)
        loss = criterion(y_pred, y_batch)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss / len(train_dataloader)}")

# Save the model
torch.save(model.state_dict(), "lstm_model.pth")


20 16 10
Epoch 1/50, Loss: 199.3386665793026
Epoch 2/50, Loss: 187.42171164119944
Epoch 3/50, Loss: 160.57506785673255
Epoch 4/50, Loss: 135.98120364020852
Epoch 5/50, Loss: 117.5359313067268
Epoch 6/50, Loss: 102.70658335966222
Epoch 7/50, Loss: 90.12737767836627
Epoch 8/50, Loss: 79.18830871582031
Epoch 9/50, Loss: 69.5441827213063
Epoch 10/50, Loss: 61.03028106689453
Epoch 11/50, Loss: 53.44017971263212
Epoch 12/50, Loss: 46.688966638901654
Epoch 13/50, Loss: 40.69755542979521
Epoch 14/50, Loss: 35.38048755421358
Epoch 15/50, Loss: 30.644047737121582
Epoch 16/50, Loss: 26.47228504629696
Epoch 17/50, Loss: 22.78271417056813
Epoch 18/50, Loss: 19.530703993404614
Epoch 19/50, Loss: 16.677303454455206
Epoch 20/50, Loss: 14.195116267484778
Epoch 21/50, Loss: 12.032103678759407
Epoch 22/50, Loss: 10.161740218891817
Epoch 23/50, Loss: 8.536899005665498
Epoch 24/50, Loss: 7.142809054430793
Epoch 25/50, Loss: 5.957270453957951
Epoch 26/50, Loss: 4.9407252704395965
Epoch 27/50, Loss: 4.083019

## Test using Backtrader

In [17]:
%%capture
!pip install backtrader

In [68]:
import backtrader as bt
import torch
import pandas as pd

# Assuming you already have a trained LSTM model class named `LSTMModel`
# Load the trained model
input_size = 20  # Based on relevant columns (update to your input size if needed)
hidden_size = 16
output_size = 10  # max_prediction_length
tft = LSTMModel(input_size, hidden_size, output_size)
tft.load_state_dict(torch.load("lstm_model.pth"))
tft.eval()

# Backtrader strategy class
class TFTStrategy(bt.Strategy):
    params = (
        ('stake_size', 1),  # Number of shares to buy/sell
    )

    def __init__(self):
        # Load initial data
        self.data_close = self.datas[0].close
        self.last_prediction = None
        self.num_fails = 0  # Tracking failed trades

    def notify_trade(self, trade):
        if trade.status == trade.Closed:
            print(f"Trade Closed: P/L = {trade.pnlcomm}")
            if trade.pnlcomm < 0:
                self.num_fails += 1

    def next(self):
        if len(self) >= max_encoder_length:  # Ensure enough historical data for prediction
            # Prepare data for prediction
            input_data = df.iloc[len(self) - max_encoder_length:len(self)].copy()

            # Get this column for running
            relevant_columns = [
                "Close", "SMA_20", "EMA_20", "MACD", "RSI_14", "BB_middle", "ADX",
                "CCI", "ATR", "OBV", "WILLR", "AROON_Up", "AROON_Down", "TRIX",
                "Upper_Envelope", "Lower_Envelope", "Open", "High", "Low", "Volume"
            ]
            input_data = input_data[relevant_columns]
            #print(input_data)
            input_tensor = torch.tensor(input_data.values, dtype=torch.float32).unsqueeze(0)

            # Predict next value using the trained model
            with torch.no_grad():
                prediction = tft(input_tensor)

            # Get the predicted closing price (it predicts the next 10 values)
            self.last_prediction = prediction[0, 0].item()

            # Example trading logic based on predicted price movement
            current_price = self.data_close[0]
            print(f"Prediction: {self.last_prediction}, Current Close: {current_price}, Cash: {self.broker.get_cash()}, Position Size: {self.position.size if self.position else 0}")

            # Example threshold to trigger trades
            if self.last_prediction > current_price * 1.005:
                if not self.position:
                    self.buy(size=self.params.stake_size)
                    #print(f"Buying at {current_price}")
            elif self.last_prediction < current_price * 0.995:
                if self.position:
                    self.sell(size=self.params.stake_size)
                    #print(f"Selling at {current_price}")

# Load historical data
df = pd.read_csv("data.csv")  # Load your historical data
df['datetime'] = pd.to_datetime(df['Datetime'])  # Assuming you have a datetime column
df.set_index('datetime', inplace=True)

# Fill any missing values
df.fillna(method='ffill', inplace=True)
df.dropna(inplace=True)

# Backtrader data feed
data = bt.feeds.PandasData(dataname=df)

# Initialize cerebro
cerebro = bt.Cerebro()

# Set the initial capital for testing
initial_capital = 10000  # Set initial capital to $10,000
cerebro.broker.set_cash(initial_capital)

# Set commission (optional)
cerebro.broker.setcommission(commission=0.001)  # Set commission to 0.1%

# Add data to cerebro
cerebro.adddata(data)

# Add strategy
cerebro.addstrategy(TFTStrategy)

# Print starting cash
print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue())

# Run backtesting
cerebro.run()

# Print ending cash
print('Ending Portfolio Value: %.2f' % cerebro.broker.getvalue())


  tft.load_state_dict(torch.load("lstm_model.pth"))
  df.fillna(method='ffill', inplace=True)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
2024-10-08 11:10:00+02:00  14.326  14.3155  14.334449 -0.022984  42.105131   
2024-10-08 11:15:00+02:00  14.326  14.3164  14.333645 -0.021017  50.793603   
2024-10-08 11:20:00+02:00  14.344  14.3201  14.334631 -0.017801  60.294241   
2024-10-08 11:25:00+02:00  14.350  14.3224  14.336095 -0.014599  60.294520   
2024-10-08 11:30:00+02:00  14.352  14.3244  14.337609 -0.011765  63.636561   
2024-10-08 11:35:00+02:00  14.346  14.3261  14.338409 -0.009889  60.869505   
2024-10-08 11:40:00+02:00  14.360  14.3266  14.340465 -0.007190  73.134105   
2024-10-08 11:45:00+02:00  14.356  14.3282  14.341944 -0.005313  69.230826   
2024-10-08 11:50:00+02:00  14.368  14.3308  14.344426 -0.002824  67.741861   
2024-10-08 11:55:00+02:00  14.362  14.3328  14.346100 -0.001320  61.667024   
2024-10-08 12:00:00+02:00  14.358  14.3349  14.347233 -0.000446  67.272680   
2024-10-08 12:05:00+02:00  14.362  14.3372  14.348639  0.000563  79.591678   

KeyboardInterrupt: 

In [58]:
# Print ending cash
print('Ending Portfolio Value: %.2f' % cerebro.broker.getvalue())

# Plot the results
cerebro.plot()

plt.show()

Ending Portfolio Value: 10000.68
