In [None]:
import numpy as np
np.set_printoptions(formatter={'float_kind':"{:-.3e}".format})
from sklearn.preprocessing import StandardScaler, MinMaxScaler

import core as co
import hist_data as hd
hd.set_hist_data(data_count=None)

In [165]:
class ForexDataSource:
    def __init__(self, data, scalers, step=1):
        
        data = list(data)
        self.step = step
        self.scalers = scalers
        self.feature_count = 2

        self.indexes = [] 
        
        opens = []
        volumes = []
        for i in range(0, len(data), step):
            index = 0
            open = 0
            volume = 0
            if i + step > len(data):
                break
            for k in range(step):
                val = data[i + k]
                index = int(val[0] // co.config.PERIOD)
                open += (val[1][0][0] + val[1][1][0]) / 2
                volume += val[2]
                if k < step - 1:
                    self.indexes.append(None)
                    opens.append(None)
                    volumes.append(None)                

            self.indexes.append(index)
            opens.append(open / step)
            volumes.append(volume / step)

        (opens, volumes) = self.fit_transform(
            [np.array(opens).reshape(-1, 1), np.array(volumes).reshape(-1, 1)]
            )
        self.targets = opens.copy()
        self.features = np.concatenate((opens, volumes), axis=1)

    def fit_transform(self, data):
        for i in range(len(data)):
            data[i] = self.scalers[i].fit_transform(data[i])
        return data
    
    def inverse_transform(self, data, index):
        return self.scalers[index].inverse_x(data)
    
    def len(self):
        return len(self.indexes)
    
    def get_data(self, end_index, data_count):
        begin = end_index - data_count - 1
        return (
            np.array(self.features[begin: end_index]), 
            np.array(self.targets[begin: end_index]),
            np.array(self.indexes[begin: end_index]))



In [169]:
ds = ForexDataSource(
    hd.DICT_DATA.values(), (StandardScaler(), StandardScaler()), 3)

features, targets, indexes = ds.get_data(1000, 100)
print('targets:\n', targets[np.isfinite(targets)])
print(indexes.shape)
# print('indexes:\n', indexes[np.isfinite(indexes)])
seq = features[0 : 20]
# print(seq)
fl = seq.flatten()
print(fl[np.isfinite(fl)])

targets:
 [2.884e-01 2.696e-01 2.587e-01 2.368e-01 2.650e-01 2.444e-01 2.226e-01
 2.195e-01 1.658e-01 1.370e-01 5.087e-02 1.808e-02 1.153e-01 1.771e-01
 2.172e-01 2.053e-01 3.148e-01 3.180e-01 2.858e-01 2.413e-01 1.339e-01
 1.828e-01 3.263e-01 2.721e-01 2.087e-01 1.464e-01 2.311e-01 3.944e-01
 5.367e-01 5.705e-01 3.625e-01 -8.400e-02 -2.533e-01 -3.816e-01]
(101,)
[2.884e-01 1.927e+00 2.696e-01 1.648e+00 2.587e-01 1.346e+00 2.368e-01
 1.551e+00 2.650e-01 1.193e+00 2.444e-01 1.195e+00 2.226e-01 1.005e+00]


In [None]:
class ContextSequencer:
    def __init__(self, 
                data_source, 
                seq_len=5,  
                future_len=5, 
                end_day=0):
        self.data_source = data_source
        self.seq_len = seq_len
        self.future_len = future_len
        self.first_trained_index = end_day * co.config.PERIOD * 60 * 24
        self.trained_indexes = set()
        self.last_trained_index = None

    def create_sequences(self, end_index, seq_len, data_count, step=1):
        """Lists sequences of ``data`` items, ``seq_len`` long, ``data_count`` 
        of them, ending - not including - ``end_index`` index of ``data``. Each next sequence is shifted by 1 from the previous.
        """        
        _features, _targets, _indexes = self.data_source.get_data(
                            end_index, data_count + seq_len + self.future_len)

        features = []
        targets = []
        y_indexes = []
        for i in range(data_count):
            features.append(
                _features[i: (i + seq_len)].flatten()
            )
            y_indexes.append(
                _indexes[i + seq_len + self.future_len - 1]
            )
            targets.append(
                _targets[i + seq_len + self.future_len - 1][0]
            )

        return np.array(features), np.array(targets), y_indexes    

In [None]:
cs = ContextSequencer(
    ForexDataSource(hd.DICT_DATA.values(), (StandardScaler(), StandardScaler())))

features, targets, indexes = cs.create_sequences(end_index=1000, seq_len=3, data_count=100)
print(features[-1])
print(targets[-1])
print(indexes[-1])

[1.093e-01 4.194e+00 -2.251e-01 6.056e+00 -1.361e-01 8.092e+00]
-0.38475342033816684
27880691
