In [657]:
import pandas as pd
import numpy as np
import yaml
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder, MinMaxScaler, PowerTransformer, RobustScaler
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import talib
import optuna
import pickle
import torch
from torch.utils.data import Dataset, DataLoader
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer
from pytorch_forecasting.metrics import QuantileLoss
from pytorch_forecasting.data.encoders import NaNLabelEncoder
from pytorch_lightning import Trainer
import torch.nn as nn

In [658]:
def load_yaml(file):
    with open(file, 'r') as f:
        config = yaml.safe_load(f)
    return config

In [659]:
config = load_yaml('../config_Transformer.yaml')

In [660]:
#trading_data = pd.read_excel(config['data_excel_path'], sheet_name='Data_Basic')
#trading_data.count()
all_trading_data_dfs = []
sheet_names = ['5minData12-17-2019']
for sheet in sheet_names:
    temp_df = pd.read_excel(config['all_data_excel_path'], sheet_name=sheet)
    all_trading_data_dfs.append(temp_df)
all_trading_data = pd.concat(all_trading_data_dfs, ignore_index=True)

# Load 5 min data
all_trading_data['Date'] = pd.to_datetime(all_trading_data['Date'])


In [661]:
all_trading_data.head()

Unnamed: 0,Date,Symbol,Open,High,Low,Close,Volume
0,2019-12-17 09:30:00,SPY,319.92,320.02,319.71,319.81,546833
1,2019-12-17 09:35:00,SPY,319.81,320.25,319.81,320.0,995662
2,2019-12-17 09:40:00,SPY,320.0,320.02,319.72,319.78,641910
3,2019-12-17 09:45:00,SPY,319.78,319.83,319.58,319.63,540248
4,2019-12-17 09:50:00,SPY,319.63,319.755,319.59,319.755,445601


In [662]:
##### Set the Date Start and End for the filtering of trading data

train_start_date = pd.to_datetime(config['train_start_date'])
train_end_date = pd.to_datetime(config['train_end_date'])

trading_data_raw = all_trading_data[(all_trading_data['Date'] >= train_start_date) & (all_trading_data['Date'] <= train_end_date)]
trading_data_raw = trading_data_raw.reset_index(drop=True)
trading_data_raw.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 780 entries, 0 to 779
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    780 non-null    datetime64[ns]
 1   Symbol  780 non-null    object        
 2   Open    780 non-null    float64       
 3   High    780 non-null    float64       
 4   Low     780 non-null    float64       
 5   Close   780 non-null    float64       
 6   Volume  780 non-null    int64         
dtypes: datetime64[ns](1), float64(4), int64(1), object(1)
memory usage: 42.8+ KB


In [663]:
trading_data_raw.drop(columns=['Symbol'], inplace=True)

In [664]:
trading_data_diff = trading_data_raw.copy()
if config['do_relative_diff']:
    trading_data_diff.iloc[:, 1:] = trading_data_diff.iloc[:, 1:].diff()
    trading_data_diff.iloc[0, 1:] = 0 
trading_data_diff.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2023-04-17 09:30:00,412.37,412.605,412.23,412.56,1068090
1,2023-04-17 09:35:00,412.56,413.075,412.46,412.81,1010302
2,2023-04-17 09:40:00,412.83,413.19,412.73,412.89,611624
3,2023-04-17 09:45:00,412.9,412.99,412.37,412.52,625857
4,2023-04-17 09:50:00,412.53,412.975,412.5187,412.6999,577084


In [665]:
trading_data_raw.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2023-04-17 09:30:00,412.37,412.605,412.23,412.56,1068090
1,2023-04-17 09:35:00,412.56,413.075,412.46,412.81,1010302
2,2023-04-17 09:40:00,412.83,413.19,412.73,412.89,611624
3,2023-04-17 09:45:00,412.9,412.99,412.37,412.52,625857
4,2023-04-17 09:50:00,412.53,412.975,412.5187,412.6999,577084


In [666]:
n_bars = config['n_past']
m_bars = config['m_future']

In [667]:
#dataset = StockDataset( df=trading_data_raw, n_past=n_bars, m_future=m_bars)
#print("Dataset size : ", len(dataset))

In [668]:
#feature_columns = ['Open', 'High', 'Low', 'Close', 'Volume']
#feature_columns = config['feature_columns'].split(',')

feature_columns2 = ['Open', 'High', 'Low', 'Volume']
fourier_lookback_window = config['fourier_lookback_window']
fourier_n_components = config['fourier_n_components']

In [669]:
def calculate_bollinger_bands(data, window_size, num_std_dev):
    rolling_mean = data['Close'].rolling(window=window_size).mean()
    rolling_std = data['Close'].rolling(window=window_size).std()
    data['BOLLBU'] = rolling_mean + (rolling_std * num_std_dev)
    data['BOLLBM'] = rolling_mean
    data['BOLLBL'] = rolling_mean - (rolling_std * num_std_dev)
    
    return data

def calculate_donchn_bands(data, window_size):
    data['DONCH_U'] = data['High'].rolling(window=window_size).max()
    data['DONCH_L'] = data['Low'].rolling(window=window_size).min()
    
    return data
    
def calculate_tchr(data):
    period = config['tchr_period']
    retracement = config['tchr_retracement']
    adj = config['tchr_adj']
    range = config['tchr_range']

    if range == 'highlow':
        data['TCHR_U'] = talib.MAX(data['High'], timeperiod=period) + adj
        data['TCHR_L'] = talib.MIN(data['Low'], timeperiod=period) - adj
    elif range == 'close':
        data['TCHR_U'] = talib.MAX(data['Close'], timeperiod=period) + adj
        data['TCHR_L'] = talib.MIN(data['Close'], timeperiod=period) - adj
    
    #calculate retracement
    if retracement == "long":
        data['TCHR'] = (data['Close'] - data['TCHR_L']) / (data['TCHR_U'] - data['TCHR_L'])
    elif retracement == "short":
        data['TCHR'] = (data['TCHR_U'] - data['Close']) / (data['TCHR_U'] - data['TCHR_L'])
    
    return data
    
def calculate_adwm(data):
    period = config['adwm_period']
    data['Previous_Close']= data['Close'].shift(1)
    data['TRH'] = data[['High', 'Previous_Close']].max(axis=1)
    data['TRL'] = data[['Low', 'Previous_Close']].min(axis=1)

    data['ADWM_AD'] = 0.0
    data.loc[data['Close'] > data['Previous_Close'], 'ADWM_AD'] = (data['Close'] - data['TRL'])
    data.loc[data['Close'] < data['Previous_Close'], 'ADWM_AD'] = (data['Close'] - data['TRH'])

    data['ADWM'] = data['ADWM_AD']

    data['ADWMMA'] = data['ADWM'].rolling(window=period).mean()

    return data

def calculate_si(row, prev_row, limit):
    if pd.isna(prev_row['Close']):
        return 0
    c = row['Close']
    c_prev = prev_row['Close']
    o = row['Open']
    o_prev = prev_row['Open']
    return (50 * ((c - c_prev) + (0.5 * (c - o)) + (0.25 * (c_prev - o_prev))) / limit)

def calculate_WASI(data):
    wasi_limit = config['wasi_limit']
    data['SI'] = data.apply(lambda row: calculate_si(row, data.shift(1).loc[row.name], wasi_limit), axis=1)
    data['WASI'] = data['SI']
    return data

def calculate_ATR(data):
    atr_period = config['atr_period']
    atr_ma = config['atr_ma']
    data['ATR'] = talib.ATR(data['High'], data['Low'], data['Close'], timeperiod=atr_period)
    data['ADJATR'] = talib.SMA(data['ATR'], timeperiod=atr_ma)
    return data


def compute_fourier_df(value_series, n_components=10):
    fft_result = np.fft.fft(value_series)
    real = fft_result.real[:n_components]
    imag = fft_result.imag[:n_components]
    mag = np.abs(fft_result)[:n_components]

    return real, imag, mag

def get_fourier_columns():
    return [f'fourier_real_{j+2}' for j in range(fourier_n_components-1)] + [f'fourier_imag_{j+2}' for j in range(fourier_n_components-1)] + [f'fourier_mag_{j+2}' for j in range(fourier_n_components-1)]   
    

In [670]:
def add_new_features_df(data):
    fourier_lookback_window = config['fourier_lookback_window']
    fourier_n_components = config['fourier_n_components']

    #data['Date'] = pd.to_datetime(data['Date'])
    #print(f"bolband period : {config['bolband_period']}")
    bolband_period = config['bolband_period']
    bolband_width = config['bolband_width']
    upper, middle, lower = talib.BBANDS(data['Close'], timeperiod=bolband_period, nbdevup=bolband_width, nbdevdn=bolband_width, matype=0)
    data['BOLLBU'] = upper
    data['BOLLBM'] = middle
    data['BOLLBL'] = lower
    #data = calculate_bollinger_bands(data, int(config['bolband_period']), int(config['bolband_width']))

    # Calculate DONCHN Bands
    donchn_period = config['donchn_period']
    data['DONUP'] = talib.MAX(data['High'], timeperiod=donchn_period)

    data['DONLOW'] = talib.MIN(data['Low'], timeperiod=donchn_period)

    data['DONMID'] = (data['DONLOW'] + data['DONUP']) / 2

    data['MA20'] = talib.SMA(data['Close'], timeperiod=20)

    data['MA50'] = talib.SMA(data['Close'], timeperiod=50)

    data['MA100'] = talib.SMA(data['Close'], timeperiod=100)

    data['EMA20'] = talib.EMA(data['Close'], timeperiod=20)

    # Calculate the pivot points
    data['PVPT'] = (data['High'] + data['Low'] + data['Close']) / 3
    data['PVPTR1'] = (2 * data['PVPT']) - data['Low']

    data['PVPTR2'] = data['PVPT'] + data['High'] - data['Low']

    data['PVPTR3'] = data['High'] + 2 * (data['PVPT'] - data['Low'])

    data['PVPTS1'] = (2 * data['PVPT']) - data['High']

    data['PVPTS2'] = data['PVPT'] + data['High'] - data['Low']

    data['PVPTS3'] = data['Low'] - 2 * (data['High'] - data['PVPT'])

    data = calculate_tchr(data)

    data = calculate_adwm(data)

    data = calculate_WASI(data)

    volume_ma_period = config['volume_ma_period']

    data['VOLMA'] = talib.SMA(data['Volume'], timeperiod=volume_ma_period)

    data = calculate_ATR(data)

    data['DayofWeek'] = data['Date'].dt.dayofweek

    data['DayofWeek'] = data['DayofWeek'].astype('category')

    # Add fourier columns to the df
    for i in range(fourier_n_components):
        data[f'fourier_real_{i+1}'] = np.nan
        data[f'fourier_imag_{i+1}'] = np.nan
        data[f'fourier_mag_{i+1}'] = np.nan

    features = []
    epsilon = 1e-5
    #print(f"fourier window - 1 : {fourier_lookback_window - 1}")
    for i in range(len(data)):
        if i >= fourier_lookback_window - 1:
            #print("entered point 1")
            close_window = data['Close'].iloc[i - fourier_lookback_window + 1: i + 1].values
            real, imag, mag = compute_fourier_df(close_window, n_components=fourier_n_components)

            for j in range(fourier_n_components):
                data.iloc[i, data.columns.get_loc(f'fourier_real_{j+1}')] = real[j]
                data.iloc[i, data.columns.get_loc(f'fourier_imag_{j+1}')] = imag[j]
                data.iloc[i, data.columns.get_loc(f'fourier_mag_{j+1}')] = mag[j]
                #print(data.loc[i, 'Date'])
    


    # Add the time of day feature to the trading data

    # Define max time of day in minutes
    MAX_TIME_MINUTES = 1440
    data['Minutes_Passed'] = (data['Date'].dt.hour* 60) + data['Date'].dt.minute
    print(f"number of na s : {data['Date'].isna().sum()}")
    data['TimeOfDay_Group'] = (data['Minutes_Passed'] // 5).astype('int')
    data['Sine_TimeOfDay'] = np.sin(2 * np.pi * data['TimeOfDay_Group'] / MAX_TIME_MINUTES)
    data['Cosine_TimeOfDay'] = np.cos(2 * np.pi * data['TimeOfDay_Group'] / MAX_TIME_MINUTES)
    data['Take_Profit_Level'] = (data['Close'] * config['atr_multiplier'] * data['ADJATR'])
    #print(f"Last row after adding features : ")
    #print(data.tail())
    return data


In [671]:
def get_all_feature_columns():
    fourier_features = get_fourier_columns()
    robust_features = config['robust_scale_features'].split(',')
    return robust_features + fourier_features

In [672]:

def get_features(data, inference=False, scalers={}):
    
    # Define global variables

    stand_features = config['stand_scale_features'].split(',')
    stand_features = [x for x in stand_features if x.strip()]
    robust_features = config['robust_scale_features'].split(',')
    robust_features = [x for x in robust_features if x.strip()]
    fourier_columns = get_fourier_columns()
    # need to use min max scaler for g1
    stand_features = stand_features + fourier_columns

    real_cols = [f'fourier_real_{j+2}' for j in range(fourier_n_components-1)]
    imag_cols = [f'fourier_imag_{j+2}' for j in range(fourier_n_components-1)]
    mag_cols = [f'fourier_mag_{j+2}' for j in range(fourier_n_components-1)]

    if inference:
        scaler = scalers['fourier_minmax']
        real_min, real_max = scaler['real_min'], scaler['real_max']
        imag_min, imag_max = scaler['imag_min'], scaler['imag_max']
        mag_min, mag_max = scaler['mag_min'], scaler['mag_max']

    else:
        real_min, real_max = data[real_cols].min().min(), data[real_cols].max().max()
        imag_min, imag_max = data[imag_cols].min().min(), data[imag_cols].max().max()
        mag_min, mag_max = data[mag_cols].min().min(), data[mag_cols].max().max()
        fourier_min_max = {
                    'real_min': real_min, 'real_max': real_max,
                    'imag_min': imag_min, 'imag_max': imag_max,
                    'mag_min': mag_min,'mag_max': mag_max,
                    }
        with open(f'../{config['fourier_minmax_path']}', 'wb') as f:
            pickle.dump(fourier_min_max, f)
    
    
    data[real_cols] = (data[real_cols] - real_min) / (real_max - real_min)
    data[imag_cols] = (data[imag_cols] - imag_min) / (imag_max - imag_min)
    data[mag_cols] = (data[mag_cols] - mag_min) / (mag_max - mag_min)

    # Apply standard scaler to g2
    #print("Before standard scaler")
    if inference and 'stand' in scalers.keys():
        scaler = scalers['stand']
        data[stand_features] = scaler.transform(data[stand_features])
    else:
        scaler = StandardScaler()
        data[stand_features] = scaler.fit_transform(data[stand_features])
        #print("After standard scaler")

        with open(f'../{config['stand_scaler_path']}', 'wb') as f:
            pickle.dump(scaler, f)
    

    # Apply robust scaler
    if inference:
        scaler = scalers['robust']
        data[robust_features] = scaler.transform(data[robust_features])

    else:
        scaler = RobustScaler()
        data[robust_features] = scaler.fit_transform(data[robust_features])

        with open(f'../{config['robust_scaler_path']}', 'wb') as f:
            pickle.dump(scaler, f)
  

    return data


In [673]:
'''
trading_data = trading_data_diff.copy()
if config['scaler_type'] == 'MinMax':
    MainScaler = MinMaxScaler()
elif config['scaler_type'] == 'Standard':
    MainScaler = StandardScaler()
elif config['scaler_type'] == 'Robust':
    MainScaler = RobustScaler()
if config['use_scaler']:
    trading_data[feature_columns] = MainScaler.fit_transform(trading_data[feature_columns])
trading_data.head()
'''
trading_data_raw = trading_data_diff.copy()

#### Add additional features

In [674]:
trading_data_raw = add_new_features_df(trading_data_raw)

number of na s : 0


#### Scale and Process the features

In [675]:
trading_data = trading_data_raw.copy()
trading_data = get_features(trading_data)
trading_data.dropna(inplace=True)

In [676]:
trading_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,BOLLBU,BOLLBM,BOLLBL,DONUP,...,fourier_imag_9,fourier_mag_9,fourier_real_10,fourier_imag_10,fourier_mag_10,Minutes_Passed,TimeOfDay_Group,Sine_TimeOfDay,Cosine_TimeOfDay,Take_Profit_Level
99,2023-04-18 11:15:00,0.391605,0.388241,0.386241,0.387252,-0.48712,0.808401,0.570362,0.369065,0.920609,...,0.210548,-0.62304,0.45729,-0.272984,-1.139074,675,135,-0.462267,0.426996,3.698701
100,2023-04-18 11:20:00,0.394713,0.344932,0.348554,0.316703,0.154669,0.742088,0.536586,0.365882,0.920609,...,-0.029615,-0.713889,0.68188,-0.055649,-1.043548,680,136,-0.435173,0.403819,3.723128
101,2023-04-18 11:25:00,0.326338,0.406803,0.370339,0.44863,-0.419102,0.634149,0.505921,0.410284,0.875034,...,-0.232365,-0.855574,0.716025,0.215039,-0.88169,685,137,-0.408158,0.380491,3.752511
102,2023-04-18 11:30:00,0.463089,0.484142,0.466813,0.488474,-0.100506,0.55055,0.48382,0.448033,0.783885,...,-0.344423,-0.988309,0.51778,0.458522,-0.729355,690,138,-0.381223,0.357014,3.772116
103,2023-04-18 11:35:00,0.498831,0.50425,0.30514,0.264558,0.285023,0.501837,0.457606,0.442838,0.61222,...,-0.360586,-1.020047,0.068554,0.572158,-0.68092,695,139,-0.354369,0.333387,3.78857


#### Prepare trading dataset

In [677]:
def prepare_data(df):
    df['time_idx'] = range(len(df))
    df['stock_id'] = 0

    target = 'Close'
    features = get_all_feature_columns()
    print(f"Features : {features}")
    assert df[target].isna().sum() == 0, "Target column contains NaN values"
    print("Min time_idx:", df["time_idx"].min())
    print("Max time_idx:", df["time_idx"].max())

    # create a new dataset using the TimeSeriesDataSet
    dataset = TimeSeriesDataSet(
        df,
        time_idx="time_idx",
        target=target,
        group_ids=['stock_id'],
        max_encoder_length=n_bars,
        max_prediction_length=m_bars,
        static_categoricals=[],
        time_varying_known_reals=['time_idx'],
        time_varying_unknown_reals=features,
        target_normalizer=None,
        add_relative_time_idx=True,
        add_target_scales=False,
        add_encoder_length=False
    )
    
    return dataset

In [678]:
tft_timeseries_dataset = prepare_data(trading_data)

Features : ['Open', 'High', 'Low', 'Close', 'BOLLBU', 'BOLLBM', 'BOLLBL', 'DONUP', 'DONMID', 'DONLOW', 'MA20', 'MA50', 'MA100', 'EMA20', 'PVPTR1', 'PVPTR2', 'PVPTR3', 'PVPT', 'PVPTS1', 'PVPTS2', 'PVPTS3', 'Sine_TimeOfDay', 'Cosine_TimeOfDay', 'ADWM', 'ADWMMA', 'WASI', 'ADJATR', 'TCHR', 'Volume', 'VOLMA', 'fourier_real_2', 'fourier_real_3', 'fourier_real_4', 'fourier_real_5', 'fourier_real_6', 'fourier_real_7', 'fourier_real_8', 'fourier_real_9', 'fourier_real_10', 'fourier_imag_2', 'fourier_imag_3', 'fourier_imag_4', 'fourier_imag_5', 'fourier_imag_6', 'fourier_imag_7', 'fourier_imag_8', 'fourier_imag_9', 'fourier_imag_10', 'fourier_mag_2', 'fourier_mag_3', 'fourier_mag_4', 'fourier_mag_5', 'fourier_mag_6', 'fourier_mag_7', 'fourier_mag_8', 'fourier_mag_9', 'fourier_mag_10']
Min time_idx: 0
Max time_idx: 680


In [679]:
class StockPriceTFT(nn.Module):
    def __init__(self, dataset):
        super().__init__()
        self.tft = TemporalFusionTransformer.from_dataset(
            dataset,
            learning_rate=0.01,
            hidden_size=64,
            attention_head_size=4,
            dropout=0.2,
            loss=None
        )
        self.loss_fn = QuantileLoss()
    
    def forward(self, x):
        return self.tft(x)
    
    def training_step(self, batch, batch_idx):
        x,y = batch
        y_pred = self.tft(x)
        loss = self.loss_fn(y_pred, y)
        self.log("train_loss", loss)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y= batch
        y_pred = self.tft(x)
        loss = self.loss_fn(y_pred, y)
        self.log("val_loss", loss)
        return loss
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=0.01)
        return optimizer

In [693]:
def train_model(dataset, n_epochs=20,batch_size=64):
    train_dataloader = dataset.to_dataloader(train=True, batch_size=batch_size, num_workers=2)
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = StockPriceTFT(dataset).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    criterion = QuantileLoss()
    
    model.train()

    ##Training loop
    for epoch in range(n_epochs):
        total_loss = 0
        for batch in train_dataloader:
            optimizer.zero_grad()
            # Move batch to GPU if available
            x,y = batch
            y = y[0]
            x  = {key: value.to(device) for key, value in x.items()}
            y = y.to(device)
            #batch = {key: value.to("cuda" if torch.cuda.is_available() else "cpu") for key, value in batch.items()}

            output = model.tft(x)
            #output = output[0]
            output = output[0].squeeze(-1)
            print(f"Outpput shape : {output.shape} , y shape : {y.shape}")
            loss = criterion(output[0], y)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
        avg_loss = total_loss / len(train_dataloader)
        print(f"Epoch {epoch+1}/{n_epochs}, Loss: {avg_loss:.4f}")

    print("Model trained successfully")
    return model

#### Train the Model

In [694]:
model = train_model(tft_timeseries_dataset)


Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.


Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.



Outpput shape : torch.Size([64, 24]) , y shape : torch.Size([64, 24])
Outpput shape : torch.Size([64, 24]) , y shape : torch.Size([64, 24])
Outpput shape : torch.Size([64, 24]) , y shape : torch.Size([64, 24])
Outpput shape : torch.Size([64, 24]) , y shape : torch.Size([64, 24])
Outpput shape : torch.Size([64, 24]) , y shape : torch.Size([64, 24])
Outpput shape : torch.Size([64, 24]) , y shape : torch.Size([64, 24])
Outpput shape : torch.Size([64, 24]) , y shape : torch.Size([64, 24])
Epoch 1/20, Loss: 0.9398
Outpput shape : torch.Size([64, 24]) , y shape : torch.Size([64, 24])
Outpput shape : torch.Size([64, 24]) , y shape : torch.Size([64, 24])
Outpput shape : torch.Size([64, 24]) , y shape : torch.Size([64, 24])
Outpput shape : torch.Size([64, 24]) , y shape : torch.Size([64, 24])
Outpput shape : torch.Size([64, 24]) , y shape : torch.Size([64, 24])
Outpput shape : torch.Size([64, 24]) , y shape : torch.Size([64, 24])
Outpput shape : torch.Size([64, 24]) , y shape : torch.Size([64, 

In [700]:
bolband_period = config['bolband_period']
donchn_period = config['donchn_period']
max_MA_period = 100
max_EMA_period = 20
tchr_period = config['tchr_period']
adwm_period = config['adwm_period']
atr_period = config['atr_period']
volume_ma_period = config['volume_ma_period']
print(f" Fourier window : {fourier_lookback_window}")
max_window = max(bolband_period, 
                 donchn_period, 
                 max_MA_period, 
                 max_EMA_period,
                 tchr_period,
                 adwm_period,
                 atr_period,
                 volume_ma_period, fourier_lookback_window)


 Fourier window : 100


In [701]:


def predict_stock_price(model, t_dataset, df_input, scaler):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    feature_columns = get_all_feature_columns()
    robust_features = config['robust_scale_features'].split(',')
    last_actual_close = df_input.iloc[-1]['Close']
    n_bars = config['n_past']
    m_bars = config['m_future']
    print(f"Last actual close : {last_actual_close}")
    '''
    future_dates = pd.date_range(df_input['Date'].iloc[-1], periods=m_bars+1, freq='5min')[1:]
    df_future = pd.DataFrame({'Date': future_dates})
    df_input = pd.concat([df_input, df_future], ignore_index=True)
    '''
    #Relative scaling
    df_input_diff = df_input.copy()
    if config['do_relative_diff']:
        df_input_diff.iloc[:,1:] = df_input_diff.iloc[:,1:].diff()
        df_input_diff.iloc[0, 1:] = 0
    #Scaling
    df_scaled = df_input_diff.copy()
    if config['use_scaler']:
        df_scaled[feature_columns] = scaler.transform(df_scaled[feature_columns])
    df_scaled['time_idx'] = range(len(df_scaled))
    df_scaled['stock_id'] = 0
    '''
    for col in feature_columns:
        df_scaled.loc[df_scaled.index[-m_bars:], col] = -9999
    df_scaled[feature_columns].iloc[-m_bars:] = -9999
    '''
    #print(df_scaled)
    #print(df_scaled.head())
    '''
    prediction_dataset = TimeSeriesDataSet(
        df_scaled,
        time_idx="time_idx",
        target="Close",
        group_ids=['stock_id'],
        static_categoricals=[],
        max_encoder_length=n_bars,
        min_encoder_length=n_bars,
        max_prediction_length=m_bars,
        min_prediction_length=m_bars,
        time_varying_known_reals=['time_idx'],
        time_varying_unknown_reals=feature_columns,
        target_normalizer=None,
        add_relative_time_idx=True,
        add_target_scales=False,
        add_encoder_length=False
    )

    #Create dataloader for inference
    prediction_dataloader = prediction_dataset.to_dataloader(
        train=False, 
        batch_size=1, 
        num_workers=0,

    )
    '''
    prediction_dataset = TimeSeriesDataSet.from_dataset(t_dataset, df_scaled, predict=True)
    predictions = model.tft.predict(prediction_dataset, mode="prediction")
    '''
    with torch.no_grad():
        for batch in prediction_dataloader:
            x, _ = batch
            x = {key: value.to(device) for key, value in x.items()}
            pred = model.predict(x)
            
            pred = pred[0].cpu().numpy()
            if config['use_mean_or_median'] == 'median':
                val_pred = pred[0][:,2]
            elif config['use_mean_or_median'] == 'mean':
                val_pred = pred[0][:,5]

            #print(f"Prediction initial : {pred}")

            predictions.append(val_pred)

    '''
    pred_np = predictions[0].numpy()
    print("Shape : ", pred_np.shape)
    pred_df = pd.DataFrame(pred_np, columns=['Close'])
    #pred_df = predictions[0].output_predictions.to_dataframe()
    print(f"Predictions shape : {pred_df.shape}")
    print(pred_df.head())
    #print(f"Predictions size : {len(predictions)}")
    #print("Predictions array : ")
    #print(predictions)
    #predictions = np.array(predictions).reshape(-1,1)
    #predictions = predictions[-m_bars:]
    close_idx = robust_features.index('Close')
    
    #scaled_placeholder = np.zeros((predictions.shape[0], len(robust_features)))
    #scaled_placeholder[:, close_idx] = predictions[:, 0]
    placeholder_df = pd.DataFrame(np.zeros((pred_df.shape[0], len(robust_features))), columns=robust_features)
    #if config['use_scaler']:
    #    inverse_transformed = scaler.inverse_transform(scaled_placeholder)
    #else:
    #    inverse_transformed = scaler.inverse_transform(scaled_placeholder)
    
    placeholder_df['Close'] = pred_df['Close']
    print(f"Placeholder shape : {placeholder_df.shape}")
    df_output = placeholder_df.copy()
    df_output[robust_features] = scaler.inverse_transform(placeholder_df[robust_features])    
    #df_output = pd.DataFrame(inverse_transformed, columns=robust_features)
    print(df_output['Close'])
    print(f"Output shape : {df_output.shape}")
    if config['do_relative_diff']:
        df_output['Close'] = df_output['Close'] + last_actual_close
    else:
        pred_close_first = df_output.iloc[0]['Close']
        df_output['Close'] = df_output['Close'] - pred_close_first
    #df_output = df_output.cumsum()


    return df_output

In [702]:
def load_scalers():
    with open(f'../{config['robust_scaler_path']}', 'rb') as f:
        robust_scaler = pickle.load(f)

    with open(f'../{config['fourier_minmax_path']}', 'rb') as f:
        fourier_minmax = pickle.load(f)
    scalers = {
        'fourier_minmax': fourier_minmax, 
        'robust': robust_scaler
    }
    return scalers

In [703]:
def predict_data_from_point(model, start_predict_date_str, plot_graph=True):
    pred_start_time = pd.to_datetime(start_predict_date_str)
    pred_start_idx = all_trading_data.index[all_trading_data['Date'] >= pred_start_time][0]
    pred_before_extend = config['pred_before_extend']
    pred_after_extend = config['pred_after_extend']
    start_idx = max(0, pred_start_idx - n_bars - max_window - pred_before_extend)
    end_idx= min(len(all_trading_data), pred_start_idx + m_bars + pred_after_extend)
    trading_data_inference = all_trading_data.loc[start_idx:end_idx].copy()
    trading_data_inference.drop(columns=['Symbol'], inplace=True)
    # Take the first m_bars rows of trading_data_inference
    print(f"Clipping : {pred_start_idx} - {n_bars} - {max_window} = {pred_start_idx - n_bars - max_window}")
    inference_data = all_trading_data.loc[pred_start_idx - n_bars - m_bars - max_window: pred_start_idx].copy()
    #print(inference_data.head())
    inference_data = add_new_features_df(inference_data)
    scalers = load_scalers()
    inference_data = get_features(inference_data, inference=True, scalers=scalers)
    inference_data.dropna(inplace=True)
    predicted_stock_price = predict_stock_price(model, tft_timeseries_dataset, inference_data, scalers['robust'])
    last_index = inference_data.index[-1]
    last_close = trading_data_inference['Close'].loc[last_index]

    print(f"Last index : {last_index} , last close : {last_close}")
    actual_data = trading_data_inference
    print('actual data shape : ', actual_data.shape)
    predicted_stock_price['Close'] = predicted_stock_price['Close'] + last_close
    #actual_next_predicted = trading_data_inference.iloc[last_index + 1: last_index + 1 + predicted_stock_price.shape[0]]
    actual_next_predicted = trading_data_inference.loc[(last_index + 1): (last_index + 1 + predicted_stock_price.shape[0])]
    print(f"{last_index + 1} : {last_index + 1 + predicted_stock_price.shape[0]}")
    if plot_graph:
        fig = go.Figure()

        fig.add_trace(go.Candlestick(
            x=actual_data["Date"],
            open=actual_data["Open"],
            high=actual_data["High"],
            low=actual_data["Low"],
            close=actual_data["Close"],
            name="Actual Price"
        ))

        # Overlay the Predicted Close Prices as a Red Line
        fig.add_trace(go.Scatter(
            x=actual_next_predicted["Date"],
            y=predicted_stock_price["Close"],
            mode='lines+markers',
            name="Predicted Close",
            line=dict(color='blue', width=2)  # Red line for predictions
        ))

        # Layout Settings
        fig.update_layout(
            title="Stock Price Prediction vs Actual Data",
            xaxis_title="Date",
            yaxis_title="Price",
            xaxis_rangeslider_visible=False,
            yaxis=dict(fixedrange=False), 
            xaxis=dict(type='category')
        )

        # Show the chart
        fig.show()

    return predicted_stock_price
        

In [704]:
pred_prices = predict_data_from_point(model, '2023-05-04 13:25:00', plot_graph=True)

Clipping : 66167 - 200 - 100 = 65867
number of na s : 0
Last actual close : -1.6863027774795516



If predicting, no randomization should be possible - setting stop_randomization=True


The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.



IndexError: index 3 is out of bounds for dimension 1 with size 1

In [None]:
inf_start_date = pd.to_datetime(config['inf_start_date'])
inf_end_date = pd.to_datetime(config['inf_end_date'])
pred_start_time = pd.to_datetime(config['pred_start_time'])
pred_start_idx = all_trading_data.index[all_trading_data['Date'] >= pred_start_time][0]
pred_before_extend = config['pred_before_extend']
pred_after_extend = config['pred_after_extend']
start_idx = max(0, pred_start_idx - n_bars - m_bars - pred_before_extend)
end_idx= min(len(all_trading_data), pred_start_idx + m_bars + pred_after_extend)


trading_data_inference = all_trading_data.loc[start_idx:end_idx].copy()
trading_data_inference.drop(columns=['Symbol'], inplace=True)
#trading_data_inference.head()

In [None]:
# Take the first m_bars rows of trading_data_inference
inference_data = all_trading_data.loc[pred_start_idx - n_bars: pred_start_idx + 1].copy()

In [None]:
inference_data.drop(columns=['Symbol'], inplace=True)
inference_data.iloc[:,1:].head()
#inference_data.shape

Unnamed: 0,Open,High,Low,Close,Volume
65872,414.29,414.6,414.21,414.4112,507912
65873,414.41,414.8599,414.35,414.8,547436
65874,414.8,415.07,414.7,415.0,547849
65875,415.0,415.12,414.8826,415.05,439700
65876,415.05,415.27,414.99,415.165,580052


In [None]:


#print(inference_data)

# Pass the inference_data to the predict_stock_price function
predicted_stock_price = predict_stock_price(model, inference_data, MainScaler)

#print(f'Shape : {predicted_stock_price.shape}')
# Display the predicted stock prices
#print(predicted_stock_price)

Last actual close : 415.795
Predictions size : 3



ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




In [None]:
print(f"Trading data inference shape : {trading_data_inference.shape}")
print(f"Starting index : {trading_data_inference.index[0]} and Last index : {trading_data_inference.index[-1]}")

Trading data inference shape : (369, 6)
Starting index : 65730 and Last index : 66098


#### Show predictions

In [None]:
last_index = inference_data.index[-1]
print(f"Last index : {last_index}")
no_bars_extend = 200
actual_data = trading_data_inference.loc[:last_index+no_bars_extend]
print('actual data shape : ', actual_data.shape)
#actual_next_predicted = trading_data_inference.iloc[last_index + 1: last_index + 1 + predicted_stock_price.shape[0]]
actual_next_predicted = trading_data_inference.loc[(last_index + 1): (last_index + 1 + predicted_stock_price.shape[0])]
print(f"{last_index + 1} : {last_index + 1 + predicted_stock_price.shape[0]}")
#actual_next_predicted.head()

Last index : 65957
actual data shape :  (369, 6)
65958 : 66000


In [None]:
fig = go.Figure()

fig.add_trace(go.Candlestick(
    x=actual_data["Date"],
    open=actual_data["Open"],
    high=actual_data["High"],
    low=actual_data["Low"],
    close=actual_data["Close"],
    name="Actual Price"
))

# Overlay the Predicted Close Prices as a Red Line
fig.add_trace(go.Scatter(
    x=actual_next_predicted["Date"],
    y=predicted_stock_price["Close"],
    mode='lines+markers',
    name="Predicted Close",
    line=dict(color='blue', width=2)  # Red line for predictions
))

# Layout Settings
fig.update_layout(
    title="Stock Price Prediction vs Actual Data",
    xaxis_title="Date",
    yaxis_title="Price",
    xaxis_rangeslider_visible=False,
    yaxis=dict(fixedrange=False), 
    xaxis=dict(type='category')
)

# Show the chart
fig.show()

In [299]:
##### CReate the objective function for optuna to tune tree parameters

def objective(trial):
    params = {
        'tree_method' : trial.suggest_categorical('tree_method', ['approx', 'hist']) , 
        'gamma': trial.suggest_float('gamma', 1e-2, 10),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'min_child_weight': trial.suggest_float('min_child_weight', 1, 250),
        'subsample': trial.suggest_float('subsample', 0.1, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'lambda': trial.suggest_float('lambda', 0.1, 25), 
        'alpha': trial.suggest_float('alpha', 0.001, 10),
    }
    params.update(base_params)
    #thresholds = [trial.suggest_float(f'threshold_{i}', 0.1, 0.9) for i in ]
    pruning_callback = optuna.integration.XGBoostPruningCallback(trial, f'valid-{metric}')

    xgb_model = xgb.train(params=params, dtrain=dtrain, num_boost_round=num_boosting_rounds, 
                          evals=[(dtrain, 'train'),(dvalid, 'valid')],
                          early_stopping_rounds=50,
                          verbose_eval=0,
                          callbacks=[pruning_callback])
    trial.set_user_attr('best_iteration', xgb_model.best_iteration)
    #xgb.XGBClassifier(**params, random_state=46, early_stopping_rounds=30, objective='multi:softprob', lambda_=config['lam'], alpha=config['alpha'], n_estimators=100)
    #xgb_model.fit(X_train, Y_train, eval_set=[(X_train, Y_train), (X_valid, Y_valid)])

    y_pred = xgb_model.predict(dvalid)
    rmse_buy = mean_squared_error(y_valid['BuyPower'], y_pred[:, 0]) ** 0.5
    rmse_sell = mean_squared_error(y_valid['SellPower'], y_pred[:, 1]) ** 0.5

    return (rmse_buy + rmse_sell) / 2

In [179]:

study = optuna.create_study(direction='minimize') # for metric auc its maximuze, and for mlogloss its minimie

study.optimize(objective, n_trials=50)

# Get the best parameters
print(f" Best parameters: {study.best_params}")
print(f" Best Accuracy: {study.best_value}")

[I 2025-02-09 03:12:21,215] A new study created in memory with name: no-name-ec0ef680-8d96-444a-bef7-42e1ed2a215d
[W 2025-02-09 03:12:21,239] Trial 0 failed with parameters: {'tree_method': 'hist', 'gamma': 0.6138627990390165, 'max_depth': 9, 'min_child_weight': 134.81627696184316, 'subsample': 0.804895871577925, 'colsample_bytree': 0.927952421015343, 'lambda': 11.219359993750993, 'alpha': 6.245163446513805} because of the following error: NameError("name 'base_params' is not defined").
Traceback (most recent call last):
  File "c:\Projects\Trading\Uns_SPY_Trading\venv\Lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\coool\AppData\Local\Temp\ipykernel_13860\3346073159.py", line 14, in objective
    params.update(base_params)
                  ^^^^^^^^^^^
NameError: name 'base_params' is not defined
[W 2025-02-09 03:12:21,265] Trial 0 failed with value None.


NameError: name 'base_params' is not defined

In [111]:
best_params = study.best_params
best_params

{'tree_method': 'approx',
 'gamma': 0.016898585123857512,
 'max_depth': 5,
 'min_child_weight': 13.027880153141155,
 'subsample': 0.7344303043839889,
 'colsample_bytree': 0.6887500122227045,
 'lambda': 15.572006313152583,
 'alpha': 5.325786812240595}

In [112]:
low_learning_rate = 0.01

params = {}
params.update(base_params)
params.update(study.best_params)
params['learning_rate'] = low_learning_rate

model_stage2 = xgb.train(params=params, dtrain=dtrain, num_boost_round=num_boosting_rounds, 
                         evals=[(dtrain, 'train'), (dvalid, 'valid')], 
                         early_stopping_rounds=50,
                         verbose_eval=0)

In [113]:
model_stage2.best_iteration # got the best iteration from stage 2 training

9999

In [114]:
model_final = xgb.train(params=params, dtrain=dtrain_valid,
                        num_boost_round = model_stage2.best_iteration,
                        verbose_eval=0)

In [115]:
model_path = f'../{config['model_save_name']}'
model_final.save_model(model_path)

In [116]:
y_pred_test = model_final.predict(dtest)

y_pred_buy = y_pred_test[:, 0]

y_pred_sell = y_pred_test[:, 1]

metrics = {
    "RMSE (Buy Power)": mean_squared_error( y_test['BuyPower'], y_pred_buy),
    "RMSE (Sell Power)": mean_squared_error(y_test['SellPower'], y_pred_sell),
}

df_metrics = pd.DataFrame(metrics.items(), columns=["Metric", "Value"])

print(df_metrics)


              Metric     Value
0   RMSE (Buy Power)  0.170721
1  RMSE (Sell Power)  0.183691


In [214]:
## Load the new data to run inference
#new_data = pd.read_excel(config['data_excel_path'], sheet_name='TestNew')
new_data_start_date = pd.to_datetime(config['inf_start_date'])
new_data_end_date = pd.to_datetime(config['inf_end_date'])
new_data = all_trading_data[(all_trading_data['Date'] >= new_data_start_date) & (all_trading_data['Date'] <= new_data_end_date)]
new_data = new_data.reset_index(drop=True)

In [215]:
new_data.shape

(9672, 7)

In [119]:
bolband_period = config['bolband_period']
donchn_period = config['donchn_period']
max_MA_period = 100
max_EMA_period = 20
tchr_period = config['tchr_period']
adwm_period = config['adwm_period']
atr_period = config['atr_period']
volume_ma_period = config['volume_ma_period']
print(f" Fourier window : {fourier_lookback_window}")
max_window = max(bolband_period, 
                 donchn_period, 
                 max_MA_period, 
                 max_EMA_period,
                 tchr_period,
                 adwm_period,
                 atr_period,
                 volume_ma_period, fourier_lookback_window)


 Fourier window : 100


In [177]:
def process_data(raw_data):
    #### Loading the scalers for inferencing
    with open(f'../{config['robust_scaler_path']}', 'rb') as f:
        robust_scaler = pickle.load(f)

    # Load standard scaler
    with open(f'../{config['stand_scaler_path']}', 'rb') as f:
        stand_scaler = pickle.load(f)

    with open(f'../{config['fourier_minmax_path']}', 'rb') as f:
        fourier_minmax = pickle.load(f)

    with open(f'../{config['label_scales_pickle']}', 'rb') as f:
        label_scales = pickle.load(f)
    
    scalers = {'fourier_minmax': fourier_minmax, 
               'stand': stand_scaler,
               'robust': robust_scaler}

    trading_signals = pd.DataFrame(columns=['Date','High', 'Low', 'Open', 'Close', 'BuyPower', 'SellPower', 'Signal', 'Take_Profit_Level'])

                    
    # This is the dataframe to which we will be adding the rows live
    historical_data = pd.DataFrame(columns=['Date','High', 'Low', 'Open', 'Close', 'Volume'])
    numeric_columns = ['High', 'Low', 'Open', 'Close', 'Volume'] 
    historical_data[numeric_columns] = historical_data[numeric_columns].apply(pd.to_numeric)
    predictions = []
    for index,row in raw_data.iterrows():
        #print(f'volume : {row['Volume']}')
        if index % 100 == 0:
            print(f'Index : {index}')
        new_row = pd.DataFrame({'Date': [row['Date']], 'High': [row['High']], 'Low': [row['Low']], 'Open': [row['Open']], 'Close': [row['Close']], 'Volume': [row['Volume']]})
        signal_row = pd.DataFrame({'Date': [row['Date']], 'High': [row['High']], 'Low': [row['Low']], 'Open': [row['Open']], 'Close': [row['Close']], 'Signal': ['N']})
        trading_signals = pd.concat([trading_signals, signal_row], ignore_index=True)
        historical_data = pd.concat([historical_data, new_row], ignore_index=True)
        historical_data['Volume'] = historical_data['Volume'].astype(int)
        #print(historical_data['Volume'])
        #print(f'historical data length : {len(historical_data)}')
        if len(historical_data) > max_window:
            #print(f"At index : {index}")
            #print(f"length of historical data : {len(historical_data)} , so splicing")
            historical_data = historical_data.iloc[-max_window:].reset_index(drop=True)
            #print(f"Now lenngth : {len(historical_data)} and max window : {max_window}")
            
        if len(historical_data) >= max_window:
            updated_data = add_new_features_df(historical_data.copy())
            #print("Historical data 1 : ")
            #print(historical_data[['Volume']].tail())
            #last_row_to_print = historical_data.iloc[[-1]]
            #for column, value in last_row_to_print.items():
            #    print(f'{column} : {value}')
            #updated_data.info()
            inf_features = get_features(updated_data.copy(), inference=True, scalers=scalers)
            #print("Historical data 2 : ")
            #print(historical_data[['Volume']].tail())
            last_row_features = inf_features.iloc[[-1]]
            last_row_features = last_row_features[all_feature_columns]
            if last_row_features.isna().any().any():
                print(f'The inference row at index : {index} contains na')
                #last_row_to_print = updated_data.iloc[[-1]]
                #for column, value in last_row_to_print.items():
                #    print(f'{column} : {value}')
                print(historical_data[['Volume']].tail())
                break
                continue
        
            last_row_dm = xgb.DMatrix(last_row_features, enable_categorical=True)
            prediction = model_final.predict(last_row_dm)
 
            buy_power, sell_power = prediction[0]
            if config['label_power_transform']:
                pred_df = pd.DataFrame([[buy_power, sell_power]], columns=['BuyPower', 'SellPower'])
                pred_original_df = pd.DataFrame(label_scales['PT'].inverse_transform(pred_df), columns=pred_df.columns)
                buy_power = pred_original_df['BuyPower'].iloc[0]
                sell_power = pred_original_df['SellPower'].iloc[0]
            #print(f'Buy Power : {buy_power}, Sell Power : {sell_power}')

            #predicted_class_index = np.argmax(prob_prediction, axis=1)
            trading_signals.loc[trading_signals.index[-1], 'BuyPower'] = buy_power
            trading_signals.loc[trading_signals.index[-1], 'SellPower'] = sell_power
            trading_signals.loc[trading_signals.index[-1], 'Take_Profit_Level'] = updated_data['Take_Profit_Level'].iloc[-1]
            #predictions.append(prediction[0])
        
        
    return trading_signals

            

        
    

In [240]:
def calculate_trade(trading_signals):
    trade_positions = []
    trade_enter_buy = config['trade_enter_buy']
    trade_enter_sell = config['trade_enter_sell']
    trades = []
    balance = 0
    profit_amount = 10
    profit_count = 0
    loss_count = 0
    buy_count = 0
    sell_count = 0
    loss_amount = profit_amount * config['risk']
    positions = pd.DataFrame(columns=['Entry', 'EntryDate', 'Exit', 'ExitDate', 'Profit', 'Type'])
                    
    for index,row in trading_signals.iterrows():
        buy_power = row['BuyPower']
        sell_power = row['SellPower']
        #print(f'Buy Power : {buy_power}, Sell Power : {sell_power}')

        #predicted_class_index = np.argmax(prob_prediction, axis=1)          #predictions.append(prediction[0])
        for trade in trades:
            if (trade['Type'] == 'B' and row['High'] >= trade['TakeProfit']) or (trade['Type'] == 'S' and row['Low'] <= trade['TakeProfit']):
                trade['Active'] = 'N'
                balance += (trade['Profit'] * profit_amount)
                profit_count += 1
                print(f"Profit, New Balance : {balance}")
                
                pos_row = pd.DataFrame({'Entry': [trade['Entry']], 'EntryDate': [trade['EntryDate']], 'Exit': [row['Close']], 'ExitDate': [row['Date']] , 'Profit': [True], 'Type': [trade['Type']]})
                positions = pd.concat([positions, pos_row], ignore_index=True)
            elif (trade['Type'] == 'B' and row['Low'] <= trade['StopLoss']) or (trade['Type'] == 'S' and row['High'] >= trade['StopLoss']):
                trade['Active'] = 'N'
                balance -= (trade['Profit'] * loss_amount)
                loss_count += 1
                print(f"Loss, New Balance : {balance}")
                pos_row = pd.DataFrame({'Entry': [trade['Entry']], 'EntryDate': [trade['EntryDate']], 'Exit': [row['Close']], 'ExitDate': [row['Date']] , 'Profit': [False], 'Type': [trade['Type']]})
                positions = pd.concat([positions, pos_row], ignore_index=True)
        
        
        if len(trades) == 0:
            if buy_power > sell_power and buy_power > trade_enter_buy:
                take_profit = row['Take_Profit_Level'] 
                trades.append({"Type": "B", 
                            "TakeProfit": row['Close'] + take_profit,
                            "StopLoss": row['Close'] - (take_profit * config['risk']),
                            "Profit": take_profit,
                            "Active": "Y",
                            'Entry': row['Close'],
                            'EntryDate': row['Date']
                            })
                buy_count += 1
            elif sell_power > buy_power and sell_power > trade_enter_sell:
                take_profit = row['Take_Profit_Level'] 
                trades.append({"Type": "S", 
                            "TakeProfit": row['Close'] - take_profit,
                            "Profit": take_profit,
                            "StopLoss": row['Close'] + (take_profit * config['risk']),
                            "Active": "Y",
                            'Entry': row['Close'],
                            'EntryDate': row['Date']
                            })
                sell_count += 1
        else:
            if buy_power > sell_power and buy_power > trade_enter_buy and trades[0]['Type'] == 'S':
                trade['Active'] = 'N'
                profit = (trade['Entry'] - row['Close']) * profit_amount
                balance += profit 
                if profit < 0:
                    loss_count += 1
                    print(f"Loss, New Balance : {balance}")
                else:
                    profit_count += 1
                    print(f"Profit, New Balance : {balance}")
                pos_row = pd.DataFrame({'Entry': [trade['Entry']], 'EntryDate': [trade['EntryDate']], 'Exit': [row['Close']], 'ExitDate': [row['Date']] , 'Profit': [True] if profit > 0 else [False], 'Type': [trade['Type']]})
                positions = pd.concat([positions, pos_row], ignore_index=True)

            elif sell_power > buy_power and sell_power > trade_enter_sell and trades[0]['Type'] == 'B':
                trade['Active'] = 'N'
                profit = (row['Close'] - trade['Entry']) * profit_amount
                balance += profit
                if profit < 0:
                    loss_count += 1
                    print(f"Loss, New Balance : {balance}")
                else:
                    profit_count += 1
                    print(f"Profit, New Balance : {balance}")
                pos_row = pd.DataFrame({'Entry': [trade['Entry']], 'EntryDate': [trade['EntryDate']], 'Exit': [row['Close']], 'ExitDate': [row['Date']] , 'Profit': [True] if profit > 0 else [False], 'Type': [trade['Type']]})
                positions = pd.concat([positions, pos_row], ignore_index=True)
            
        # Filter out all trades that are not active
        trades = [trade for trade in trades if trade['Active'] == 'Y']
        
        
    print(f"Final Balance : {balance} \n Profit count : {profit_count} \n Loss count : {loss_count}")
    print(f"Buy count : {buy_count} \n Sell count : {sell_count}")
    
    return positions

            

        
    

In [179]:
new_data.isna().any(axis=1).sum()

np.int64(0)

In [180]:
print(volume_ma_period)

10


In [181]:
new_data.head()

Unnamed: 0,Date,Symbol,Open,High,Low,Close,Volume
0,2024-01-02 09:30:00,SPY,472.16,472.8,472.05,472.67,2339778
1,2024-01-02 09:35:00,SPY,472.67,472.74,471.88,471.92,1574945
2,2024-01-02 09:40:00,SPY,471.92,472.1,471.71,471.8,1634708
3,2024-01-02 09:45:00,SPY,471.79,472.09,471.39,471.39,1398881
4,2024-01-02 09:50:00,SPY,471.395,471.95,471.36,471.42,1396561


In [216]:
tr_signals = process_data(new_data)


Index : 0



The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



Index : 100
Index : 200
Index : 300
Index : 400
Index : 500
Index : 600
Index : 700
Index : 800
Index : 900
Index : 1000
Index : 1100
Index : 1200
Index : 1300
Index : 1400
Index : 1500
Index : 1600
Index : 1700
Index : 1800
Index : 1900
Index : 2000
Index : 2100
Index : 2200
Index : 2300
Index : 2400
Index : 2500
Index : 2600
Index : 2700
Index : 2800
Index : 2900
Index : 3000
Index : 3100
Index : 3200
Index : 3300
Index : 3400
Index : 3500
Index : 3600
Index : 3700
Index : 3800
Index : 3900
Index : 4000
Index : 4100
Index : 4200
Index : 4300
Index : 4400
Index : 4500
Index : 4600
Index : 4700
Index : 4800
Index : 4900
Index : 5000
Index : 5100
Index : 5200
Index : 5300
Index : 5400
Index : 5500
Index : 5600
Index : 5700
Index : 5800
Index : 5900
Index : 6000
Index : 6100
Index : 6200
Index : 6300
Index : 6400
Index : 6500
Index : 6600
Index : 6700
Index : 6800
Index : 6900
Index : 7000
Index : 7100
Index : 7200
Index : 7300
Index : 7400
Index : 7500
Index : 7600
Index : 7700
Index : 

In [235]:
tr_signals.head()

Unnamed: 0,Date,High,Low,Open,Close,BuyPower,SellPower,Signal,Take_Profit_Level
0,2024-01-02 09:30:00,472.8,472.05,472.16,472.67,,,N,
1,2024-01-02 09:35:00,472.74,471.88,472.67,471.92,,,N,
2,2024-01-02 09:40:00,472.1,471.71,471.92,471.8,,,N,
3,2024-01-02 09:45:00,472.09,471.39,471.79,471.39,,,N,
4,2024-01-02 09:50:00,471.95,471.36,471.395,471.42,,,N,


In [241]:
positions_df = calculate_trade(tr_signals.copy())
#tr_signals.tail()

Loss, New Balance : -20.333652349436655
Profit, New Balance : 13.154045049564651
Loss, New Balance : -0.7684314407878077
Profit, New Balance : 3.031568559212147
Loss, New Balance : -12.573250703418111
Profit, New Balance : 35.36689420871904
Loss, New Balance : 33.46689420871906
Loss, New Balance : 28.766894208719357
Loss, New Balance : 9.618887768097057
Profit, New Balance : 40.56888776809676
Profit, New Balance : 64.76888776809692
Loss, New Balance : 39.79796161378347
Loss, New Balance : 19.501150280472707
Loss, New Balance : -3.1802325622942647
Profit, New Balance : 23.319767437705508
Profit, New Balance : 69.4805911336955
Loss, New Balance : 67.63059113369548
Profit, New Balance : 96.59784094512905
Loss, New Balance : 96.39784094512923
Loss, New Balance : 73.02598372430924
Profit, New Balance : 127.80152487403589
Profit, New Balance : 182.40152487403626
Loss, New Balance : 159.93926609603386
Profit, New Balance : 172.23926609603348
Profit, New Balance : 224.03498483443295
Loss, New 


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



Loss, New Balance : 200.94098575116868
Loss, New Balance : 184.1409857511686
Loss, New Balance : 167.6909857511688
Profit, New Balance : 178.1909857511689
Loss, New Balance : 155.0572333506921
Loss, New Balance : 137.05723335069197
Profit, New Balance : 181.75723335069225
Loss, New Balance : 174.4472333506923
Profit, New Balance : 189.59723335069216
Profit, New Balance : 246.29846509459978
Profit, New Balance : 254.29746509459957
Loss, New Balance : 232.88446509459956
Profit, New Balance : 252.28446509459954
Profit, New Balance : 264.68446509459966
Profit, New Balance : 278.5844650945995
Loss, New Balance : 262.1977864817229
Loss, New Balance : 246.50601850932446
Loss, New Balance : 225.9779192741696
Profit, New Balance : 251.2965358483725
Profit, New Balance : 284.4138124344548
Loss, New Balance : 281.3138124344548
Profit, New Balance : 302.56481243445455
Loss, New Balance : 264.5947131379397
Profit, New Balance : 308.040923371587
Loss, New Balance : 293.09092337158694
Profit, New Bal

In [230]:
def compute_trade_signal(row):
    if row['BuyPower'] > config['trade_enter_buy'] and row['BuyPower'] > row['SellPower']:
        return 'B'
    elif row['SellPower'] > config['trade_enter_sell'] and row['SellPower'] > row['BuyPower']:
        return 'S'
    else:
        return 'N'
    
tr_signals['Signal'] = tr_signals.apply(compute_trade_signal, axis=1)

In [244]:
buy_signals = tr_signals[tr_signals['Signal'] == 'B']
sell_signals = tr_signals[tr_signals['Signal']== 'S']
#trading_data['Date'] = pd.to_datetime(trading_data['Date'])
fig = make_subplots(rows=2, cols=1, shared_xaxes=True, 
                    row_heights=[0.7, 0.3], vertical_spacing=0.1,
                    subplot_titles=("CandleStick Chart", "Buy/Sell Signals"))
fig.add_trace(
    go.Candlestick(
        x=tr_signals['Date'], 
        open=tr_signals['Open'], 
        high=tr_signals['High'], 
        low=tr_signals['Low'], 
        close=tr_signals['Close'],
        name='OHLC'
        ), row=1, col=1
)
'''
fig.add_trace(
    go.Scatter(
        x=buy_signals['Date'], 
        y=buy_signals['Low'], 
        mode='markers', 
        name='Buy Signal', 
        marker=dict(color='blue', size=10)))

fig.add_trace(
    go.Scatter(
        x=sell_signals['Date'], 
        y=sell_signals['High'], 
        mode='markers', 
        name='Sell Signal', 
        marker=dict(color='yellow', size=10)))
'''
for index, row in positions_df.iterrows():
    if row['Type'] == 'B':
        entry_symbol = 'triangle-up'
        entry_color = 'blue'
        exit_symbol = 'triangle-down'
        exit_color ='green' if row['Profit'] else 'red'
    elif row['Type'] == 'S':
        entry_symbol = 'triangle-down'
        entry_color = 'yellow'
        exit_symbol = 'triangle-up'
        exit_color = 'green' if row['Profit'] else 'red'
    
    fig.add_trace(
        go.Scatter(
            x=[row['EntryDate']],
            y=[row['Entry']],
            mode='markers',
            marker=dict(symbol=entry_symbol, size=10, color=entry_color),
            name=f'Entry {row['Type']}'
        ))
    
    fig.add_trace(
        go.Scatter(
            x=[row['ExitDate']],
            y=[row['Exit']],
            mode='markers',
            marker=dict(symbol=exit_symbol, size=10, color=exit_color),
            name=f'Exit {row["Type"]}'
        ))
    
fig.add_trace(
    go.Scatter(
        x=tr_signals['Date'],
        y=tr_signals['BuyPower'],
        mode='lines',
        name='Buy Signal',
        line=dict(color='green', width=2)
    ), row=2, col=1
)

fig.add_trace(
    go.Scatter(
        x=tr_signals['Date'],
        y=tr_signals['SellPower'],
        mode='lines',
        name='Sell Signal',
        line=dict(color='red', width=2)
    ), row=2, col=1
)
   

fig.update_layout(
    title='CandleStick chart with Buy Sell Signals',
    xaxis=dict(type="date", 
               rangebreaks=[
                   dict(bounds=["sat", "mon"]),
                   dict(bounds=[16,9.5], pattern='hour')
               ]              
            ),
    xaxis_rangeslider_visible=False,
    height=600,
    hovermode='x unified'
)



fig.update_xaxes(matches='x')

fig.show()