In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import keras
from trading_tools import data_frame
from tqdm import tqdm
import time




In [2]:
ASSET = "AUDCAD_otc" 
# ASSET = "EURUSD_otc" 
df = data_frame.load_csv(f"actives/ACTIVO-{ASSET}-0005s.csv")
# df = data_frame.load_csv(f"actives/ACTIVO-{ASSET}-0001s.csv")

In [3]:
def check_conditions_vectorized(df, window_size):
    labels = np.zeros(len(df))
    
    # Create shifted arrays for high, low, and close
    high_shifted = np.array([df['high'].shift(-i).values for i in range(window_size)])
    low_shifted = np.array([df['low'].shift(-i).values for i in range(window_size)])
    close_shifted = df['close'].shift(-window_size).values

    # Initialize condition arrays
    condition_high_down = np.all(high_shifted[1:] < high_shifted[:-1], axis=0)
    condition_low_down = np.all(low_shifted[1:] < low_shifted[:-1], axis=0)
    target_condition_down = close_shifted < df['close'].values
    
    condition_high_up = np.all(high_shifted[1:] > high_shifted[:-1], axis=0)
    condition_low_up = np.all(low_shifted[1:] > low_shifted[:-1], axis=0)
    target_condition_up = close_shifted > df['close'].values
    
    # Apply conditions
    conditions_down = condition_high_down & condition_low_down & target_condition_down
    conditions_up = condition_high_up & condition_low_up & target_condition_up
    
    # Adjust indices to ensure correct length
    valid_conditions = conditions_down[:-window_size] | conditions_up[:-window_size]
    labels[:len(valid_conditions)] = valid_conditions.astype(int)
    labels[len(valid_conditions):] = np.nan
    
    return labels


In [4]:
window_size = 6
QUALITY_LABEL = f'seq_quality_{window_size}'

# Measure execution time for the vectorized method
start_time = time.time()
SEQ_LABEL = f'seq_quality_{window_size}'
df[SEQ_LABEL] = check_conditions_vectorized(df, window_size)
time_check_conditions_vectorized = time.time() - start_time
print(f"Time for check_conditions_vectorized: {time_check_conditions_vectorized:.2f} seconds")

df.dropna(inplace=True)
count_values = df[SEQ_LABEL].value_counts()

print(f"Conteo de valores en la columna {SEQ_LABEL}: {count_values}")


Time for check_conditions_vectorized: 0.12 seconds
Conteo de valores en la columna seq_quality_6: seq_quality_6
0.0    878644
1.0     31072
Name: count, dtype: int64


In [5]:
column_indices = {name: i for i, name in enumerate(df.columns)}
num_features = df.shape[1]

from sklearn.model_selection import train_test_split

features_columns = df.drop(columns=[SEQ_LABEL]).columns.copy()

# Split the data into training and temporary sets first
X_temp, y_temp = df.drop(columns=[SEQ_LABEL]).values, df[SEQ_LABEL].values

# Identify the indices of each class
zero_indices = [i for i, y in enumerate(y_temp) if y == 0]
one_indices = [i for i, y in enumerate(y_temp) if y == 1]

# Determine the smaller class size
min_class_size = min(len(zero_indices), len(one_indices))

# Randomly sample from each class to ensure balance
np.random.seed(42)
zero_sample = np.random.choice(zero_indices, min_class_size, replace=False)
one_sample = np.random.choice(one_indices, min_class_size, replace=False)

# Combine the sampled indices
balanced_indices = np.concatenate([zero_sample, one_sample])

# Create balanced datasets
X_balanced = [X_temp[i] for i in balanced_indices]
y_balanced = [y_temp[i] for i in balanced_indices]
print(len(balanced_indices))
# Split the data into training and temporary sets first
X_train, X_temp, y_train, y_temp = train_test_split(X_balanced, y_balanced, test_size=0.3, random_state=42, shuffle=True)

# Split the temporary set into validation and test sets
X_val, X_test, y_val, y_test = train_test_split( X_temp, y_temp, test_size=0.333, random_state=16, shuffle=True)
# Verify the balance
print(f"Total zeros: {len(zero_indices)}, Total ones: {len(one_indices)}")
print(f"Training data size: {len(X_train)}, Training target size: {len(y_train)}")
print(f"Validation data size: {len(X_val)}, Validation target size: {len(y_val)}")
print(f"Testing data size: {len(X_test)}, Testing target size: {len(y_test)}")
print(f"Class balance in training set: {np.sum(np.array(y_train) == 0)} zeros and {np.sum(np.array(y_train) == 1)} ones")

@staticmethod
def create_df(features, targets, feature_labels, target_label):
    out_df = pd.DataFrame(data=features, columns=feature_labels)
    out_df[target_label] = targets
    return out_df
    
train_df = create_df(X_train, y_train, features_columns, SEQ_LABEL)
val_df = create_df(X_val, y_val, features_columns, SEQ_LABEL)
test_df = create_df(X_test, y_test, features_columns, SEQ_LABEL)


62144
Total zeros: 878644, Total ones: 31072
Training data size: 43500, Training target size: 43500
Validation data size: 12435, Validation target size: 12435
Testing data size: 6209, Testing target size: 6209
Class balance in training set: 21804 zeros and 21696 ones


In [6]:
class WindowGenerator():
    def __init__(self, input_width, label_columns, features_columns,
                 train_df=train_df, val_df=val_df, test_df=test_df,
                  batch_size=32):
        # Almacenar los datos crudos
        self.train_df = train_df
        self.val_df = val_df
        self.test_df = test_df

        self.column_indices = {name: i for i, name in enumerate(train_df.columns)}
        
        self.label_columns = label_columns     
           
        self.features_columns = features_columns
        

        # Calcular los parámetros de la ventana
        self.input_width = input_width

        self.total_window_size = input_width 

        self.input_slice = slice(0, input_width)
        self.input_indices = np.arange(self.total_window_size)[self.input_slice]

        self.batch_size = batch_size
        self.normalize_inputs = False

    def __repr__(self):
        return '\n'.join([
            f'Total window size: {self.total_window_size}',
            f'Input indices: {min(self.input_indices)} - {max(self.input_indices)}',
            f'Label column name(s): {self.label_columns}',
            f'Feature column name(s): {self.features_columns}'
        ])
    
    @tf.function
    def normalize(self, inputs):
        def normalize_sequence(sequence):
            rolling_mean = tf.reduce_mean(sequence, axis=0, keepdims=True)
            return (sequence - rolling_mean) / rolling_mean

        return tf.map_fn(normalize_sequence, inputs, fn_output_signature=tf.float32)

    def split_window(self, features):
        data = features[:, self.input_slice, :]
        inputs = tf.stack([data[:, :, self.column_indices[name]] for name in self.features_columns], axis=-1)
        labels = tf.stack([data[:, -1, self.column_indices[name]] for name in self.label_columns], axis=-1)

        # if self.normalize_inputs:
        #     inputs = self.normalize(tf.cast(inputs, tf.float32))  # Convertir inputs a float32

        inputs.set_shape([None, self.input_width, len(self.features_columns)])
        labels.set_shape([None, len(self.label_columns)])

        return inputs, labels

    def make_dataset(self, data):
        data = data.to_numpy()
        ds = keras.preprocessing.timeseries_dataset_from_array(
            data=data,
            targets=None,
            sequence_length=self.total_window_size,
            sequence_stride=1,
            shuffle=False,
            batch_size=self.batch_size
        )
        ds = ds.map(self.split_window)
        return ds

    @property
    def train(self):
        return self.make_dataset(self.train_df)

    @property
    def val(self):
        return self.make_dataset(self.val_df)

    @property
    def test(self):
        return self.make_dataset(self.test_df)

    @property
    def example(self):
        result = getattr(self, '_example', None)
        if result is None:
            # No se encontró un lote de ejemplo, así que obtén uno del dataset de entrenamiento
            result = next(iter(self.train))
            # Y guárdalo en caché para la próxima vez
            self._example = result
        return result


In [7]:
label_columns=[SEQ_LABEL]
# features_columns=['open', "close"]
features_columns= [col for col in df.columns if col not in label_columns]
num_labels = len(label_columns)
num_features = len(features_columns)
input_width = 120

window = WindowGenerator( input_width=input_width, label_columns= label_columns , features_columns=features_columns, batch_size=2048)

window

Total window size: 120
Input indices: 0 - 119
Label column name(s): ['seq_quality_6']
Feature column name(s): ['open', 'high', 'low', 'close']

In [10]:
keras.backend.clear_session()

# Number of hidden neuros in each layer of the encoder and decoder
layers = [35, 35] 

learning_rate = 0.01
decay = 0 # Learning rate decay

# Other possible optimiser "sgd" (Stochastic Gradient Descent)
optimiser = keras.optimizers.legacy.Adam(learning_rate=learning_rate, decay=decay) 

# The dimensionality of the input at each time step. In this case a 1D signal.
num_input_features = num_features 
# The dimensionality of the output at each time step. In this case a 1D signal.
num_output_features = num_labels 
# There is no reason for the input sequence to be of same dimension as the ouput sequence.
# For instance, using 3 input signals: consumer confidence, inflation and house prices to predict the future house prices.

# Other loss functions are possible, see Keras documentation.
loss = "mse" 

# Regularisation isn't really needed for this application
lambda_regulariser = 0.000001 # Will not be used if regulariser is None
regulariser = None # Possible regulariser: keras.regularizers.l2(lambda_regulariser)

# batch_size * steps_per_epoch = total number of training examples
batch_size = 2048
steps_per_epoch = 200
epochs = 15

input_sequence_length = input_width # Length of the sequence used by the encoder
target_sequence_length = window_size # Length of the sequence predicted by the decoder
num_steps_to_predict = 20 # Length to use when testing the model


In [11]:


encoder_inputs = keras.layers.Input(shape=(None, num_input_features))

# Create a list of RNN Cells, these are then concatenated into a single layer
# with the RNN layer.
encoder_cells = []
for hidden_neurons in layers:
    encoder_cells.append(keras.layers.GRUCell(hidden_neurons,
                                              kernel_regularizer=regulariser,
                                              recurrent_regularizer=regulariser,
                                              bias_regularizer=regulariser))

encoder = keras.layers.RNN(encoder_cells, return_state=True)

encoder_outputs_and_states = encoder(encoder_inputs)

# Discard encoder outputs and only keep the states.
# The outputs are of no interest to us, the encoder's
# job is to create a state describing the input sequence.
encoder_states = encoder_outputs_and_states[1:]

In [13]:
# The decoder input will be set to zero (see random_sine function of the utils module).
# Do not worry about the input size being 1, I will explain that in the next cell.
decoder_inputs = keras.layers.Input(shape=(None, 1))

decoder_cells = []
for hidden_neurons in layers:
    decoder_cells.append(keras.layers.GRUCell(hidden_neurons,
                                              kernel_regularizer=regulariser,
                                              recurrent_regularizer=regulariser,
                                              bias_regularizer=regulariser))

decoder = keras.layers.RNN(decoder_cells, return_sequences=True, return_state=True)

# Set the initial state of the decoder to be the ouput state of the encoder.
# This is the fundamental part of the encoder-decoder.
decoder_outputs_and_states = decoder(decoder_inputs, initial_state=encoder_states)

# Only select the output of the decoder (not the states)
decoder_outputs = decoder_outputs_and_states[0]

# Apply a dense layer with linear activation to set output to correct dimension
# and scale (tanh is default activation for GRU in Keras, our output sine function can be larger then 1)
decoder_dense = keras.layers.Dense(num_output_features,
                                   activation='linear',
                                   kernel_regularizer=regulariser,
                                   bias_regularizer=regulariser)

decoder_outputs = decoder_dense(decoder_outputs)

In [14]:
# Create a model using the functional API provided by Keras.
# The functional API is great, it gives an amazing amount of freedom in architecture of your NN.
# A read worth your time: https://keras.io/getting-started/functional-api-guide/ 
model = keras.models.Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_outputs)
model.compile(optimizer=optimiser, loss=loss)