In [21]:
from finance_analysis import FinancialInstrument as fi
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from itertools import product

class Stock_Dataset(fi):
    def __init__(self,windows: list,horizons: list,intercross: bool,lookover: bool,**kwargs):
        """
        This class creates multiple datasets with varying windows and varying horizons so that multiple datasets
        can be created quickly.
        :param windows: List of windows to be produced. i.e if window is 7 then number of prices in X would be 7
        :param horizons: List of horizons to be produced. i.e. if horizon is 1 then Y will have price movement for 1 day.
        :param intercross: if true: eg. if windows = [5,7] and horizon = [2,3] then dataset produced would have
        [(5,2),(5,3),(7,2),(7,3)] windows and horizon pair respectively.
        else: the window horizon pair would be [(5,2),(7,3)]
        :param lookover: if true then single day windows would have prices for that day only not for the previous day where as otherwise
        they would have prices from previous day too. It is specifically for intra-day trading where we might not want previous day's prices to
        affect the model.
        :param kwargs: keyword arguments for Financial Instrument class which are ticker, start, end and interval specifically.
        """
        super().__init__(**kwargs)
        self.windows = windows
        self.horizons = horizons
        self.lookover = lookover
        self.intercross = intercross

    def create_dataset(self):
        self.dataset = {}
        if self.intercross:
            wh_list = list(product(self.windows,self.horizons))
        else:
            wh_list = [(i,j) for i,j in zip(self.windows,self.horizons)]

        for (i,j) in wh_list:
            tmp_dataset = self.create_windows_horizons(i,j)
            self.dataset[f'data_{i}_{j}'] = tmp_dataset


    def w_h(self,data,window,horizon):
        data['position'] = np.log(data.price / data.price.shift(horizon)).apply(lambda x: 1 if x>=0 else -1)
        data.dropna(inplace=True)
        for i in range(1,window+1):
            col = f't-{i}'
            data[col] = data.log_returns.shift(i)

        data.dropna(inplace=True)
        return data.copy()

    def create_windows_horizons(self,window,horizon):
        if self.lookover:
            grp = self.data.groupby(pd.Grouper(freq='D'))
            data_f = None
            for (t,val) in grp:
                val_2 = val.copy()
                tmp_data = self.w_h(val_2,window,horizon)
                if data_f is None:
                    data_f = tmp_data
                else:
                    data_f = pd.concat([data_f,tmp_data])

        else:
            data_f = self.w_h(self.data.copy(),window,horizon)

        return data_f



In [46]:
tata_data = Stock_Dataset(windows=[10],horizons=[3],intercross=False,lookover=False,ticker='TATAMOTORS.NS',start='2018-01-01',end=pd.Timestamp.today(),interval='1D')

[*********************100%***********************]  1 of 1 completed


In [22]:
infy_data = Stock_Dataset(windows=[10],horizons=[3],intercross=False,lookover=False,ticker='INFY.NS',start='2018-01-01',end=pd.Timestamp.today(),interval='1D')

[*********************100%***********************]  1 of 1 completed


In [44]:
np.log(infy_data.data.price / infy_data.data.price.shift(3)).dropna().iloc[::3].apply(lambda x: x if x>=0 else 0).sum()

5.303444152856573

In [58]:
np.log(tata_data.data.price / tata_data.data.price.shift(3)).dropna().iloc[::3] #.apply(lambda x: x if x>=0 else 0).sum()

Date
2018-01-04 00:00:00+05:30    0.012875
2018-01-09 00:00:00+05:30    0.017522
2018-01-12 00:00:00+05:30   -0.001029
2018-01-17 00:00:00+05:30   -0.032908
2018-01-22 00:00:00+05:30   -0.002486
                               ...   
2023-02-14 00:00:00+05:30    0.008663
2023-02-17 00:00:00+05:30   -0.001477
2023-02-22 00:00:00+05:30   -0.024042
2023-02-27 00:00:00+05:30   -0.027144
2023-03-02 00:00:00+05:30    0.005964
Name: price, Length: 426, dtype: float64

In [89]:
resampled_first = infy_data.data.price.resample('3D').first()
resampled_last = infy_data.data.price.iloc[2:].resample('3D').first()
resampled_mid = infy_data.data.price.iloc[1:].resample('3D').first()
funct = (lambda x: x if x>=0 else 0)
log_1 = np.log(resampled_first / resampled_first.shift(1))
log_2 = np.log(resampled_mid/ resampled_mid.shift(1))
log_3 = np.log(resampled_last / resampled_last.shift(1))
print(log.apply(funct).sum())
print(log_2.apply(funct).sum())
print(log_3.apply(funct).sum())

8.931071044530674
6.030765331238655
5.873857402438013


In [85]:
resampled_last

Date
2018-01-03 00:00:00+05:30    433.899994
2018-01-06 00:00:00+05:30    433.549988
2018-01-09 00:00:00+05:30    437.549988
2018-01-12 00:00:00+05:30    437.100006
2018-01-15 00:00:00+05:30    432.350006
                                ...    
2023-02-18 00:00:00+05:30    443.000000
2023-02-21 00:00:00+05:30    436.500000
2023-02-24 00:00:00+05:30    427.750000
2023-02-27 00:00:00+05:30    417.950012
2023-03-02 00:00:00+05:30    420.450012
Freq: 3D, Name: price, Length: 629, dtype: float64

In [72]:
print(tata_data.data.price.shift(3).iloc[::3].head())

Date
2018-01-01 00:00:00+05:30           NaN
2018-01-04 00:00:00+05:30    424.450012
2018-01-09 00:00:00+05:30    429.950012
2018-01-12 00:00:00+05:30    437.549988
2018-01-17 00:00:00+05:30    437.100006
Name: price, dtype: float64
