In [15]:
import numpy as np
import pandas as pd
from matplotlib.pyplot import subplots

In [280]:
class DataLoader:
    def __init__(self, data, stocks = None):
        # prices must be PER DAY     
        if stocks is None:
            self.data = data
            self.nt, self.nins = self.data.shape
            self.stocks = range(self.nins)
        else:
            self.data = data[:, stocks[0]:stocks[0]+1]

            for i in range(1, len(stocks)):
                if not all(isinstance(i, int) and 0 <= i < data.shape[1] for i in stocks):
                    raise ValueError("All elements in 'stocks' must be valid integer column indices.")
                self.data = np.hstack([self.data, data[:, stocks[i]:stocks[i]+1]])
            self.nt, self.nins = self.data.shape
            self.stocks = stocks
        self.t = 0 # set curret time step to 0

    def resetTime(self):
        '''
        resets time step back to start
        '''
        self.t = 0

    def goToTime(self, i):
        '''
        puts current time at certain time step
        '''
        if type(i) == int and i >= 0 and i < self.nt:
            self.t = i
        else:
            raise IndexError('Either i not an int or out of bounds')

    def currentTime(self):
        '''
        returns current time step
        '''
        return self.t

    def stepTime(self, history = True, step = 1):
        '''
        return current prices, then advances one step
        '''
        if self.t >= self.nt: # check to see if end of data
            raise IndexError('No more data')
        if history:
            prices = self.data[:self.t + 1]
        else:
            prices = self.data[self.t]
        self.t += step
        return prices

    def lookBack(self, t):
        '''
        returns the previous specified amount of time steps
        '''
        start = max(0, self.t - t) # start at t timesteps before
        return self.data[start:self.t]

    def currentPrices(self):
        '''
        returns current prices at this timestep
        '''
        if self.t >= self.nt:
            raise IndexError('No more data')
        return self.data[self.t]

    def getReturns(self, log = True):
        '''
        returns the returns for today from yesterday (today - yesterday)
        '''
        if self.t <= 0:
            return np.full(self.nins, np.nan, dtype=float)
        
        today = self.data[self.t]
        yesterday = self.data[self.t - 1]

        with np.errstate(divide = 'ignore', invalid = 'ignore'): # ignore all division and log errors and replace with inf or nan
            if log:
                returns = np.log(today/yesterday)
            else: returns = today/yesterday - 1
                
        return returns

    def lookBackReturns(self, t_input, log = True):
        '''
        computes returns of every day until t days ago, (t outputs)
        '''
        # error handling
        t = min(self.t, t_input) # ensuring t is not bigger than self.t
        if t == 0:
            raise ValueError('cannot lookback 0 timesteps')

        lookback = self.data[self.t - t:self.t + 1]

        with np.errstate(divide = 'ignore', invalid = 'ignore'):
            if log:
                returns = np.log(lookback[1:]/lookback[:-1])
            else:
                returns = lookback[1:]/lookback[:-1] - 1

        return returns

    def returnsToNow(self, log = True):
        '''
        computes returns of every day until today
        '''
        lookback = self.data[:self.t + 1]
        
        with np.errstate(divide = 'ignore', invalid = 'ignore'):
            if log:
                returns = np.log(lookback[1:]/lookback[:-1])
            else:
                returns = lookback[1:]/lookback[:-1] - 1
                
        return returns

    def goToEnd(self):
        '''
        moves timestep to the end
        '''
        self.t = self.nt

    def getLagFeatures(self, stocknum, lag = 1, returns = True):
        '''
        input stock number, returns table with the lags up to lag of the other stocks as features
        the columns are shifted over, so like the index in the dataframe is like stock num + 1
        '''
        try:
            i = self.stocks.index(stocknum) # changes stock num to index to make it easier
        except:
            raise ValueError('stock number should be part of initialisation dataset!')

        if returns:
            data = self.returnsToNow().T
        else:
            data = self.data[:self.t + 1].T # transposing to get prices across
            
        y = data[i] # response

        returnArr = y
        for stock in range(self.nins):
            for l in range(1, lag + 1):
                shifted = np.empty_like(data[stock]) # making array same shape as y but empty
                shifted[:] = np.nan # filling with nan
                shifted[l:] = data[stock][:-l]

                returnArr = np.vstack([returnArr, shifted])
        return returnArr.T

In [282]:
StockData = np.loadtxt('../Data/StockData.txt')
pd.DataFrame(StockData.T)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1240,1241,1242,1243,1244,1245,1246,1247,1248,1249
0,13.46,13.48,13.47,13.53,13.64,13.62,13.67,13.79,13.76,13.67,...,8.7,8.54,8.49,8.52,8.59,8.43,8.39,8.33,8.25,8.26
1,71.65,72.1,72.35,72.51,71.99,72.35,72.22,71.37,71.02,71.17,...,68.9,69.1,69.69,69.36,69.61,69.65,70.14,69.68,70.15,70.93
2,48.46,48.52,48.48,48.42,48.4,48.41,48.39,48.41,48.4,48.41,...,43.51,43.53,43.53,43.49,43.44,43.46,43.42,43.38,43.35,43.34
3,50.52,50.5,50.62,50.75,50.65,50.52,50.55,50.57,50.57,50.52,...,39.76,39.9,39.86,39.88,39.94,40.0,40.02,40.03,39.92,39.95
4,52.1,52.06,51.8,51.66,51.97,51.93,51.84,52.19,52.13,52.34,...,53.99,54.07,53.47,53.61,53.55,53.51,53.54,53.85,53.88,54.0
5,13.0,12.95,12.79,12.66,12.62,12.52,12.56,12.47,12.45,12.48,...,4.96,5.01,5.03,5.0,5.02,5.01,5.01,5.03,5.03,5.05
6,18.98,18.95,18.98,18.96,18.89,18.93,18.87,18.92,18.94,18.88,...,15.1,15.1,15.09,15.04,14.98,14.94,14.91,14.9,14.91,14.87
7,47.71,47.84,47.98,48.74,48.88,48.5,47.99,48.03,47.53,47.51,...,60.82,61.27,59.82,58.89,59.52,59.55,59.24,58.88,59.56,59.57
8,69.49,69.73,69.6,69.54,69.68,69.81,69.76,69.76,69.64,69.53,...,66.31,66.39,66.2,66.12,65.94,65.8,65.67,65.55,65.39,65.48
9,49.96,49.93,49.33,49.67,49.46,49.53,49.99,49.83,50.02,50.44,...,70.2,69.07,68.32,67.84,68.21,68.21,67.2,67.52,67.5,67.36


In [286]:
data = DataLoader(StockData)
data.goToEnd()
pd.DataFrame(data.getLagFeatures(0))

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,41,42,43,44,45,46,47,48,49,50
0,0.001485,,,,,,,,,,...,,,,,,,,,,
1,-0.000742,0.001485,0.006261,0.001237,-0.000396,-0.000768,-0.003854,-0.001582,0.002721,0.003448,...,-0.003683,0.003759,-0.001384,0.014653,-0.001909,-0.000760,-0.002639,0.001380,-0.003271,-0.000178
2,0.004444,-0.000742,0.003461,-0.000825,0.002373,-0.005007,-0.012432,0.001582,0.002922,-0.001866,...,-0.001231,0.006589,0.004147,-0.012958,0.006800,-0.001713,0.005675,0.003303,-0.005750,-0.003215
3,0.008097,0.004444,0.002209,-0.001238,0.002565,-0.002706,-0.010216,-0.001054,0.015716,-0.000862,...,0.003381,-0.000355,-0.006920,-0.005265,0.001354,-0.001716,-0.001213,0.000549,0.009020,0.004284
4,-0.001467,0.008097,-0.007197,-0.000413,-0.001972,0.005983,-0.003165,-0.003699,0.002868,0.002011,...,0.001533,0.000000,-0.002782,0.004756,0.002433,0.000763,0.007459,0.000275,-0.017707,-0.004284
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1244,-0.018802,0.008182,0.003598,-0.001150,0.001503,-0.001120,0.003992,-0.003997,0.010641,-0.002726,...,-0.006562,0.000271,-0.001369,-0.001741,0.009211,-0.003673,-0.001565,0.001004,-0.006936,-0.005513
1245,-0.004756,-0.018802,0.000574,0.000460,0.001501,-0.000747,-0.001994,-0.002674,0.000504,-0.002125,...,-0.003103,0.005273,-0.002743,-0.021137,-0.004266,0.000000,0.004558,0.002338,0.015663,0.004448
1246,-0.007177,-0.004756,0.007011,-0.000921,0.000500,0.000560,0.000000,-0.002010,-0.005219,-0.001978,...,0.005811,0.001348,-0.008276,0.023748,-0.007923,-0.001842,0.000390,0.001000,-0.005530,-0.000888
1247,-0.009650,-0.007177,-0.006580,-0.000922,0.000250,0.005773,0.003984,-0.000671,-0.006096,-0.001829,...,-0.001546,0.002153,0.001384,0.009085,0.010551,-0.001845,0.008664,0.001997,-0.001513,-0.000889
