In [1]:
from finance_analysis import FinancialInstrument as fi
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from itertools import product

class Stock_Dataset(fi):
    def __init__(self,windows: list,horizons: list,intercross: bool,lookover: bool,**kwargs):
        """
        This class creates multiple datasets with varying windows and varying horizons so that multiple datasets
        can be created quickly.
        :param windows: List of windows to be produced. i.e if window is 7 then number of prices in X would be 7
        :param horizons: List of horizons to be produced. i.e. if horizon is 1 then Y will have price movement for 1 day.
        :param intercross: if true: eg. if windows = [5,7] and horizon = [2,3] then dataset produced would have
        [(5,2),(5,3),(7,2),(7,3)] windows and horizon pair respectively.
        else: the window horizon pair would be [(5,2),(7,3)]
        :param lookover: if true then single day windows would have prices for that day only not for the previous day where as otherwise
        they would have prices from previous day too. It is specifically for intra-day trading where we might not want previous day's prices to
        affect the model.
        :param kwargs: keyword arguments for Financial Instrument class which are ticker, start, end and interval specifically.
        """
        super().__init__(**kwargs)
        self.windows = windows
        self.horizons = horizons
        self.lookover = lookover
        self.intercross = intercross

    def create_dataset(self):
        self.dataset = {}
        if self.intercross:
            wh_list = list(product(self.windows,self.horizons))
        else:
            wh_list = [(i,j) for i,j in zip(self.windows,self.horizons)]

        for (i,j) in wh_list:
            tmp_dataset = self.create_windows_horizons(i,j)
            self.dataset[f'data_{i}_{j}'] = tmp_dataset


    def w_h(self,data,window,horizon):
        data['position'] = data.log_returns.diff(-horizon).apply(lambda x: 1 if x>=0 else -1)
        data.dropna(inplace=True)
        for i in range(1,window+1):
            col = f't-{i}'
            data[col] = data.log_returns.shift(i)

        data.dropna(inplace=True)
        return data.copy()

    def create_windows_horizons(self,window,horizon):
        if self.lookover:
            grp = self.data.groupby(pd.Grouper(freq='D'))
            data_f = None
            for (t,val) in grp:
                val_2 = val.copy()
                tmp_data = self.w_h(val_2,window,horizon)
                if data_f is None:
                    data_f = tmp_data
                else:
                    data_f = pd.concat([data_f,tmp_data])

        else:
            data_f = self.w_h(self.data.copy(),window,horizon)

        return data_f




In [31]:
tata_data = Stock_Dataset(windows=[5,7,10],horizons=[1,3],intercross=True,lookover=True,ticker='TATAMOTORS.NS',start='2023-01-01',end=pd.Timestamp.today(),interval='15m')

[*********************100%***********************]  1 of 1 completed


In [32]:
tata_data.create_dataset()

In [33]:
tata_data.dataset

{'data_5_1':                                 price  log_returns  position       t-1  \
 Datetime                                                                 
 2023-01-02 10:45:00+05:30  393.899994    -0.001522      -1.0  0.001014   
 2023-01-02 11:00:00+05:30  393.600006    -0.000762      -1.0 -0.001522   
 2023-01-02 11:15:00+05:30  393.750000     0.000381       1.0 -0.000762   
 2023-01-02 11:30:00+05:30  393.700012    -0.000127      -1.0  0.000381   
 2023-01-02 11:45:00+05:30  393.799988     0.000254      -1.0 -0.000127   
 ...                               ...          ...       ...       ...   
 2023-02-24 14:15:00+05:30  428.399994     0.000233      -1.0 -0.002681   
 2023-02-24 14:30:00+05:30  428.700012     0.000700       1.0  0.000233   
 2023-02-24 14:45:00+05:30  427.950012    -0.001751      -1.0  0.000700   
 2023-02-24 15:00:00+05:30  427.799988    -0.000351      -1.0 -0.001751   
 2023-02-24 15:15:00+05:30  427.750000    -0.000117      -1.0 -0.000351   
 
           

                                price  log_returns       t-1       t-2  \
Datetime                                                                 
2023-01-03 10:00:00+05:30  397.000000    -0.000378  0.000378  0.001386   
2023-01-03 10:15:00+05:30  396.299988    -0.001765 -0.000378  0.000378   
2023-01-03 10:30:00+05:30  395.950012    -0.000883 -0.001765 -0.000378   
2023-01-03 10:45:00+05:30  394.899994    -0.002655 -0.000883 -0.001765   
2023-01-03 11:00:00+05:30  394.350006    -0.001394 -0.002655 -0.000883   
2023-01-03 11:15:00+05:30  394.700012     0.000887 -0.001394 -0.002655   
2023-01-03 11:30:00+05:30  394.700012     0.000000  0.000887 -0.001394   
2023-01-03 11:45:00+05:30  394.600006    -0.000253  0.000000  0.000887   
2023-01-03 12:00:00+05:30  394.149994    -0.001141 -0.000253  0.000000   
2023-01-03 12:15:00+05:30  393.750000    -0.001015 -0.001141 -0.000253   
2023-01-03 12:30:00+05:30  393.500000    -0.000635 -0.001015 -0.001141   
2023-01-03 12:45:00+05:30  393.450012 