In [234]:

import numpy as np
np.set_printoptions(formatter={'float_kind':"{:-.3e}".format})
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import matplotlib.pyplot as plt

import core as co
import hist_data as hd
hd.set_hist_data(data_count=None)

In [360]:


class DataSource:
    def __init__(self, data, scalers, step=1):
        
        self.data = list(data)
        self.step = step
        self.scalers = scalers
        self.feature_count = 2
        self.end_index = None
        self.data_count = None
        self.features = None
        self.targets = None
        self.indexes = None
 
        self.fit_data()

    def fit_data(self):
        return
        # opens = []
        # volumes = []

        # for i in range(0, len(self.data)):  
        #     open = 0
        #     volume = 0
        #     if not i + step < len(self.data):
        #         break
        #     for k in range(step):
        #         val = self.data[i + k]
        #         open += (val[1][0][0] + val[1][1][0]) / 2
        #         volume += val[2]              

        #     opens.append(open / self.step) #.reshape(-1, 1)
        #     volumes.append(volume / self.step) #.reshape(-1, 1)

        # opens = np.array(opens).reshape(-1, 1)
        # volumes = np.log(np.array(volumes).reshape(-1, 1) + 10.0)

        # self.fit_transform([opens, volumes])

    def feature_names(self):
        return [str(i) for i in range(20)]
        # Change accordingly
    
    def target_names(self):
        return [str(i) for i in range(20)]
        # Change accordingly   

    def fit(self, data):
        for i in range(len(data)):
            if data[i] is not None:
                data[i] = self.scalers[i].fit(data[i])
        return data

    def fit_transform(self, data):
        for i in range(len(data)):
            if data[i] is not None:
                data[i] = self.scalers[i].fit_transform(data[i])
        return data
    
    def transform(self, data, index=None):
        if index is None:
            for i in range(len(data)):
                if data[i] is not None:
                    data[i] = self.scalers[i].transform(data[i])
        else:
            data = self.scalers[index].transform(data)
        return data
    
    def inverse_transform(self, data, index=None):
        if index is None:
            for i in range(len(data)):
                if data[i] is not None:
                    data[i] = self.scalers[i].inverse_x(data[i])
        else:
            data = self.scalers[index].inverse_x(data)        
        return data
    
    def len(self):
        return len(self.indexes)
    
    def get_data__(self):
        # Change accordingly   
        return (
            self.features, 
            self.targets, 
            self.indexes)
    
    def get_data(self, end_index, data_count, future_count):
        """
        Returns amount of data, typically called by a ``Context Sequencer`` 
        or ``Predictor``.

        Parameters:
        -----------
        end_index : Index of oldest historical data used, including targeting.
        data_count : Index of the earliest historical data used is 
            `end_index - data_count` (possibly lowered in order to meet 
            constrictions.)
        future_count : Historical data index difference between training
            and prediction data

        Returns:
        --------

        (features, targets, indexes) : Tuple of numpy arrays containing
            data relevant (yet not formatted) for nn modeling.
        """
        self.end_index = end_index
        self.data_count = data_count
        self.feature_count = future_count

        feature_data_count = self.data_count - self.future_count

        if feature_data_count % self.step != 0:
            feature_data_count = (feature_data_count // self.step + 1) \
                * self.step
        begin = self.end_index - feature_data_count - future_count
        if begin + 1 < 0 or self.end_index + 1 > len(self.data):
            raise Exception(f'''
ERROR
The wanted data range is 
({begin + 1}, {self.end_index + 1})
while the maximal is
({0}, {len(self.data)})
''')

        return self.get_data__(
            end_index, data_count, future_count)
        
    def report(self, verbose=False):
        index_count = self.indexes[-1] - self.indexes[0]
        print(f'''
wanted end index: ({self.end_index})
wanted data count {self.data_count}
index range: ({self.indexes[0]}, {self.indexes[-1]})
index count: {self.indexes[-1] - self.indexes[0]}''')
        if index_count != self.data_count:
            print('Real data count is adjusted to constrictions.')
        else:
            print()
        if verbose:
            print(f'''
features:
{self.features[:3]}
targets:
{self.targets[:3]}
indexes:
{self.indexes[:3]}
''')

    def plot(self):
        targets = self.targets.transpose()
        for i in range(len(targets)):
            plt.plot(self.indexes, targets[i], label=self.target_names()[i])
        features = self.features.transpose()
        for i in range(len(features)):
            plt.plot(self.indexes, features[i], label=self.feature_names()[i])
        plt.legend()
        plt.show()



class ForexDataSource(DataSource):
    def fit_data(self):
        opens = []
        volumes = []

        for i in range(0, len(self.data)):  
            open = 0
            volume = 0
            if not i + self.step < len(self.data):
                break
            for k in range(self.step):
                val = self.data[i + k]
                open += (val[1][0][0] + val[1][1][0]) / 2
                volume += val[2]              

            opens.append(open / self.step) #.reshape(-1, 1)
            volumes.append(volume / self.step) #.reshape(-1, 1)

        opens = np.array(opens).reshape(-1, 1)
        volumes = np.log(np.array(volumes).reshape(-1, 1) + 10.0)

        self.fit_transform([opens, volumes])

    def feature_names(self):
        return ('opens', 'volumes')
    
    def target_names(self):
        return ('opens target',)
        
    def get_data__(self):
        def helper(i):
            val = self.data[i]
            return (val[1][0][0] + val[1][1][0]) / 2, val[2]

        indexes = [] 
        opens = []
        volumes = []
        targets = []
        for i in range(self.begin, self.end_index, self.step):
            index = 0
            open = 0
            volume = 0
            target = 0
            index = i + self.step - 1
            for k in range(self.step):
                _open, _volume = helper(i + k)
                open += _open
                volume += _volume
                _open, _volume = helper(i + k + self.future_count)
                target += _open

            indexes.append(index)
            opens.append(open / self.step)
            volumes.append(volume / self.step)
            targets.append(target / self.step)

        opens = np.array(opens).reshape(-1, 1)
        volumes = np.log(np.array(volumes).reshape(-1, 1) + 10.0)
        opens, volumes = self.transform([opens, volumes])

        targets = np.array(targets).reshape(-1, 1)
        targets = self.transform(targets, index=0)

        self.features = np.concatenate((opens, volumes), axis=1)
        self.targets = targets
        self.indexes = np.array(indexes).reshape(-1, 1)
        return (
            self.features, 
            self.targets, 
            self.indexes)

#     def get_data(self, end_index, data_count, future_count):
#         """
#         Returns amount of data, typically called by a ``Context Sequencer`` 
#         or ``Predictor``.

#         Parameters:
#         -----------
#         end_index : Index of oldest historical data used, including targeting.
#         data_count : Index of the earliest historical data used is 
#             `end_index - data_count` (possibly lowered in order to meet 
#             constrictions.)
#         future_count : Historical data index difference between training
#             and prediction data

#         Returns:
#         --------

#         (features, targets, indexes) : Tuple of numpy arrays containing
#             data relevant (yet not formatted) for nn modeling.
#         """
#         self.end_index = end_index
#         self.data_count = data_count

#         feature_data_count = self.data_count - future_count

#         if feature_data_count % self.step != 0:
#             feature_data_count = (feature_data_count // self.step + 1) \
#                 * self.step
#         begin = self.end_index - feature_data_count - future_count
#         if begin + 1 < 0 or self.end_index + 1 > len(self.data):
#             raise Exception(f'''
# ERROR
# The wanted data range is 
# ({begin + 1}, {self.end_index + 1})
# while the maximal is
# ({0}, {len(self.data)})
# ''')
        


#         def helper(i):
#             val = self.data[i]
#             return (val[1][0][0] + val[1][1][0]) / 2, val[2]

#         indexes = [] 
#         opens = []
#         volumes = []
#         targets = []
#         for i in range(begin, self.end_index, self.step):
#             index = 0
#             open = 0
#             volume = 0
#             target = 0
#             index = i + self.step - 1
#             for k in range(self.step):
#                 _open, _volume = helper(i + k)
#                 open += _open
#                 volume += _volume
#                 _open, _volume = helper(i + k + future_count)
#                 target += _open

#             indexes.append(index)
#             opens.append(open / self.step)
#             volumes.append(volume / self.step)
#             targets.append(target / self.step)

#         opens = np.array(opens).reshape(-1, 1)
#         volumes = np.log(np.array(volumes).reshape(-1, 1) + 10.0)
#         opens, volumes = self.transform([opens, volumes])

#         targets = np.array(targets).reshape(-1, 1)
#         targets = self.transform(targets, index=0)

#         self.features = np.concatenate((opens, volumes), axis=1)
#         self.targets = targets
#         self.indexes = np.array(indexes).reshape(-1, 1)


#         return (
#             self.features, 
#             self.targets, 
#             self.indexes
#         )


In [361]:
# ds = ForexDataSource(
#     hd.DICT_DATA.values(), (StandardScaler(), StandardScaler()), 3)

ds = ForexDataSource(
    hd.DICT_DATA.values(), (MinMaxScaler(), MinMaxScaler()), 3)

features, targets, indexes = ds.get_data(
    end_index=4000, data_count=98, future_count=10)
ds.report()
ds.plot()


AttributeError: 'ForexDataSource' object has no attribute 'future_count'

In [325]:
print(f'''
wanted end range: ({1000})
index range: ({indexes[0]}, {indexes[-1]})
targets.shape: {targets.shape}
indexes.shape: {indexes.shape}
features.shape: {features.shape}
''')


wanted end range: (1000)
index range: ([3904], [4000])
targets.shape: (33, 1)
indexes.shape: (33, 1)
features.shape: (33, 2)



In [212]:
class ContextSequencer:
    def __init__(self, 
                data_source, 
                seq_len=5,  
                future_len=5, 
                end_day=0):
        self.data_source = data_source
        self.seq_len = seq_len
        self.future_len = future_len
        self.first_trained_index = end_day * co.config.PERIOD * 60 * 24
        self.trained_indexes = set()
        self.last_trained_index = None

    def create_sequences(self, end_index, seq_len, data_count):
        """Lists sequences of ``data`` items, ``seq_len`` long, ``data_count`` 
        of them, ending - not including - ``end_index`` index of ``data``. Each next sequence is shifted by 1 from the previous.
        """        

        step = self.data_source.step
        _features, _targets, _indexes = self.data_source.get_data(
                            end_index, 
                            data_count + seq_len * step + self.future_len)
        features = []
        targets = []
        indexes = []
        for i in range(data_count):
            features.append(
                _features[i: (i + seq_len)].flatten()
            )
            indexes.append(
                _indexes[i + seq_len + self.future_len - 1]
            )
            targets.append(
                _targets[i + seq_len + self.future_len - 1]
            )

        features = np.array(features)
        targets = np.array(targets)
        indexes = np.array(indexes) 

        return (
            features[~np.isnan(features)], 
            targets[~np.isnan(targets)], 
            indexes[~np.isnan(indexes)]
            )

In [213]:
cs = ContextSequencer(
    ForexDataSource(
        hd.DICT_DATA.values(), 
        (StandardScaler(), StandardScaler()),
        step = 1
        ))

features, targets, indexes = cs.create_sequences(end_index=1000, seq_len=3, data_count=3000)
print(features[:10])
# print(targets[:10])
# print(indexes[:10])

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


IndexError: index 7 is out of bounds for axis 0 with size 0