In [1]:
import pandas as pd
import ta 
import optuna 
import time
import numpy as np
from multiprocessing import Pool
from itertools import combinations, chain 
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
def powerset(s):
    return chain.from_iterable(combinations(s,r) for r in range(1,len(s)+1))

In [3]:
def file_features(data, ds_type: str):
    data1=pd.DataFrame()
    #Calcular indicadores tecnicos
    cmf_data = ta.volume.ChaikinMoneyFlowIndicator(data.High, data.Low, data.Close, data.Volume, window = 14)
    rsi_data = ta.momentum.RSIIndicator(data.Close, window=14)
    
    data1["CMF"] = cmf_data.chaikin_money_flow()
    data1["RSI"] = rsi_data.rsi()
    # Calcular la volatilidad
    data1['Volatility'] = data['High'] - data['Low']
    data1['Close_Lag0'] = data['Close']
    # Calcular las tendencias
    for i in range(1, 5 + 1):
        data1[f'Close_Lag{i}'] = data['Close'].shift(i)
    #Variable ded respuesta
    if ds_type == "buy":
        data1['Response'] = (data['Close'] < data['Close'].shift(-10))
    else:
        data1['Response'] = (data['Close'] > data['Close'].shift(-10))
    
    data1 = data1.drop(data1.index[:30])
    data1 = data1.drop(data1.index[-30:])
    data1.reset_index(drop=True, inplace=True)
    
    return data1

In [4]:
def objective_dnn_1(trial, data, threshold=0.5):
    # Dividir los datos en conjuntos de entrenamiento y prueba
    X = data.iloc[:, :-1]
    # Selecciona la variable objetivo
    y = data.iloc[:, -1]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
    # Definir los parámetros a optimizar
    num_layers = trial.suggest_int('num_layers', 1, 5)
    num_units = [trial.suggest_int(f'num_units_layer_{i}', 32, 256) for i in range(num_layers)]
    activations = [trial.suggest_categorical(f'activation_layer_{i}', ['relu', 'sigmoid', 'tanh']) for i in range(num_layers)]
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
    # Crear el modelo de red neuronal
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Input(shape=(X_train.shape[1],)))
    for i in range(num_layers):
        model.add(tf.keras.layers.Dense(units=num_units[i], activation=activations[i]))
    model.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))
    # Compilar el modelo
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    # Definir Early Stopping para evitar overfitting
    early_stopping = EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)
    # Entrenar el modelo
    model.fit(X_train, y_train, epochs=30, batch_size=32, validation_split=0.2, callbacks=[early_stopping])
    # Calcular las predicciones en el conjunto de prueba
    y_pred = model.predict(X_test)
    # Redondear las predicciones si superan el umbral
    y_pred_rounded = (y_pred > threshold).astype('int32')
    # Calcular la precisión en el conjunto de prueba
    accuracy = accuracy_score(y_test, y_pred_rounded)
    return accuracy

In [5]:
def objective_dnn_2(trial, data, threshold=0.5):
    # Dividir los datos en conjuntos de entrenamiento y prueba
    X = data.iloc[:, :-1]
    # Selecciona la variable objetivo
    y = data.iloc[:, -1]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
    # Definir los parámetros a optimizar
    num_layers = trial.suggest_int('num_layers', 1, 6)
    num_units = [trial.suggest_int(f'num_units_layer_{i}', 64, 256) for i in range(num_layers)]
    activations = [trial.suggest_categorical(f'activation_layer_{i}', ['relu', 'sigmoid', 'selu']) for i in range(num_layers)]
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
    # Crear el modelo de red neuronal
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Input(shape=(X_train.shape[1],)))
    for i in range(num_layers):
        model.add(tf.keras.layers.Dense(units=num_units[i], activation=activations[i]))
    model.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))
    # Compilar el modelo
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    # Definir Early Stopping para evitar overfitting
    early_stopping = EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)
    # Entrenar el modelo
    model.fit(X_train, y_train, epochs=30, batch_size=32, validation_split=0.2, callbacks=[early_stopping])
    # Calcular las predicciones en el conjunto de prueba
    y_pred = model.predict(X_test)
    # Redondear las predicciones si superan el umbral
    y_pred_rounded = (y_pred > threshold).astype('int32')
    # Calcular la precisión en el conjunto de prueba
    accuracy = accuracy_score(y_test, y_pred_rounded)
    return accuracy

In [6]:
def objective_dnn_3(trial, data, threshold=0.5):
    # Dividir los datos en conjuntos de entrenamiento y prueba
    X = data.iloc[:, :-1]
    # Selecciona la variable objetivo
    y = data.iloc[:, -1]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
    # Definir los parámetros a optimizar
    num_layers = trial.suggest_int('num_layers', 2, 6)
    num_units = [trial.suggest_int(f'num_units_layer_{i}', 64, 512) for i in range(num_layers)]
    activations = [trial.suggest_categorical(f'activation_layer_{i}', ['relu', 'elu', 'selu']) for i in range(num_layers)]
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
    # Crear el modelo de red neuronal
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Input(shape=(X_train.shape[1],)))
    for i in range(num_layers):
        model.add(tf.keras.layers.Dense(units=num_units[i], activation=activations[i]))
    model.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))
    # Compilar el modelo
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    # Definir Early Stopping para evitar overfitting
    early_stopping = EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)
    # Entrenar el modelo
    model.fit(X_train, y_train, epochs=30, batch_size=64, validation_split=0.2, callbacks=[early_stopping])
    # Calcular las predicciones en el conjunto de prueba
    y_pred = model.predict(X_test)
    # Redondear las predicciones si superan el umbral
    y_pred_rounded = (y_pred > threshold).astype('int32')
    # Calcular la precisión en el conjunto de prueba
    accuracy = accuracy_score(y_test, y_pred_rounded)
    return accuracy

In [7]:
def optimize_params_dnn_1(data):
    # Crear un estudio Optuna para la optimización
    study = optuna.create_study(direction='maximize')
    # Función objetivo con el dataset como parámetro fijo
    objective_fn = lambda trial: objective_dnn_1(trial, data)
    # Ejecutar la optimización
    study.optimize(objective_fn, n_trials=20)
    # Obtener los mejores parámetros
    best_params = study.best_params
    best_accuracy = study.best_value
    return best_params, best_accuracy

In [8]:
def optimize_params_dnn_2(data):
    # Crear un estudio Optuna para la optimización
    study = optuna.create_study(direction='maximize')
    # Función objetivo con el dataset como parámetro fijo
    objective_fn = lambda trial: objective_dnn_2(trial, data)
    # Ejecutar la optimización
    study.optimize(objective_fn, n_trials=20)
    # Obtener los mejores parámetros
    best_params = study.best_params
    best_accuracy = study.best_value
    return best_params, best_accuracy

In [9]:
def optimize_params_dnn_3(data):
    # Crear un estudio Optuna para la optimización
    study = optuna.create_study(direction='maximize')
    # Función objetivo con el dataset como parámetro fijo
    objective_fn = lambda trial: objective_dnn_3(trial, data)
    # Ejecutar la optimización
    study.optimize(objective_fn, n_trials=20)
    # Obtener los mejores parámetros
    best_params = study.best_params
    best_accuracy = study.best_value
    return best_params, best_accuracy

In [10]:
def optimize_params(data):
    # Optimización de DNN 1
    best_params_dnn_1, best_accuracy_dnn_1 = optimize_params_dnn_1(data)
    print("Mejores parámetros de DNN 1:", best_params_dnn_1)    
    # Optimización de DNN 2
    best_params_dnn_2, best_accuracy_dnn_2 = optimize_params_dnn_2(data)
    print("Mejores parámetros de DNN 2:", best_params_dnn_2)    
    # Optimización de DNN 3
    best_params_dnn_3, best_accuracy_dnn_3 = optimize_params_dnn_3(data)
    print("Mejores parámetros de DNN 3:", best_params_dnn_3)
    return best_params_dnn_1, best_params_dnn_2, best_params_dnn_3

In [11]:
def params(data: str):
    data_1d_train = pd.read_csv(data)
    data_1d_train = data_1d_train.dropna()
    dataresult_long_1d_train = file_features(data_1d_train, ds_type="buy")
    dataresult_short_1d_train = file_features(data_1d_train, ds_type="sell")
    best_params_dnn1_long, best_params_dnn2_long, best_params_dnn3_long = optimize_params(dataresult_long_1d_train)
    best_params_dnn1_short, best_params_dnn2_short, best_params_dnn3_short = optimize_params(dataresult_short_1d_train)
    
    return best_params_dnn1_long, best_params_dnn2_long, best_params_dnn3_long, best_params_dnn1_short, best_params_dnn2_short, best_params_dnn3_short

In [12]:
dnn1_params_1d_long, dnn2_params_1d_long, dnn3_params_1d_long, dnn1_params_1d_short, dnn2_params_1d_short, dnn3_params_1d_short = params("aapl_1d_train.csv")

In [13]:
dnn1_params_1h_long, dnn2_params_1h_long, dnn3_params_1h_long, dnn1_params_1h_short, dnn2_params_1h_short, dnn3_params_1h_short = params("aapl_1h_train.csv")

[I 2024-04-09 21:44:35,132] A new study created in memory with name: no-name-2276d61a-5d46-4a16-9490-aa38d6796fb1
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[I 2024-04-09 21:44:37,361] Trial 0 finished with value: 0.765625 and parameters: {'num_layers': 4, 'num_units_layer_0': 155, 'num_units_layer_1': 86, 'num_units_layer_2': 91, 'num_units_layer_3': 154, 'activation_layer_0': 'sigmoid', 'activation_layer_1': 'relu', 'activation_layer_2': 'relu', 'activation_layer_3': 'tanh', 'learning_rate': 0.005511010424661961}. Best is trial 0 with value: 0.765625.


Epoch 1/5


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[I 2024-04-09 21:44:38,840] Trial 1 finished with value: 0.421875 and parameters: {'num_layers': 1, 'num_units_layer_0': 100, 'activation_layer_0': 'tanh', 'learning_rate': 0.0001788644971700735}. Best is trial 0 with value: 0.765625.
[I 2024-04-09 21:44:38,840] A new study created in memory with name: no-name-015ab260-b421-4b9a-8071-466124292928


Mejores parámetros de DNN 1: {'num_layers': 4, 'num_units_layer_0': 155, 'num_units_layer_1': 86, 'num_units_layer_2': 91, 'num_units_layer_3': 154, 'activation_layer_0': 'sigmoid', 'activation_layer_1': 'relu', 'activation_layer_2': 'relu', 'activation_layer_3': 'tanh', 'learning_rate': 0.005511010424661961}
Epoch 1/5


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[I 2024-04-09 21:44:40,347] Trial 0 finished with value: 0.296875 and parameters: {'num_layers': 2, 'num_units_layer_0': 243, 'num_units_layer_1': 98, 'activation_layer_0': 'selu', 'activation_layer_1': 'selu', 'learning_rate': 0.00010975358159141193}. Best is trial 0 with value: 0.296875.


Epoch 1/5


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[I 2024-04-09 21:44:41,854] Trial 1 finished with value: 0.765625 and parameters: {'num_layers': 2, 'num_units_layer_0': 98, 'num_units_layer_1': 65, 'activation_layer_0': 'selu', 'activation_layer_1': 'relu', 'learning_rate': 0.01920628843877236}. Best is trial 1 with value: 0.765625.
[I 2024-04-09 21:44:41,857] A new study created in memory with name: no-name-08dbaef1-12e1-4344-bf30-b3775be52f24


Mejores parámetros de DNN 2: {'num_layers': 2, 'num_units_layer_0': 98, 'num_units_layer_1': 65, 'activation_layer_0': 'selu', 'activation_layer_1': 'relu', 'learning_rate': 0.01920628843877236}
Epoch 1/5


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[I 2024-04-09 21:44:43,507] Trial 0 finished with value: 0.234375 and parameters: {'num_layers': 3, 'num_units_layer_0': 424, 'num_units_layer_1': 186, 'num_units_layer_2': 423, 'activation_layer_0': 'relu', 'activation_layer_1': 'selu', 'activation_layer_2': 'elu', 'learning_rate': 0.0014259664976853138}. Best is trial 0 with value: 0.234375.


Epoch 1/5


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[I 2024-04-09 21:44:45,966] Trial 1 finished with value: 0.234375 and parameters: {'num_layers': 5, 'num_units_layer_0': 314, 'num_units_layer_1': 96, 'num_units_layer_2': 132, 'num_units_layer_3': 434, 'num_units_layer_4': 196, 'activation_layer_0': 'relu', 'activation_layer_1': 'selu', 'activation_layer_2': 'selu', 'activation_layer_3': 'selu', 'activation_layer_4': 'selu', 'learning_rate': 0.0020037637981947594}. Best is trial 0 with value: 0.234375.
[I 2024-04-09 21:44:45,967] A new study created in memory with name: no-name-4a3fdcbc-9fda-4380-9961-f35fe12ba714


Mejores parámetros de DNN 3: {'num_layers': 3, 'num_units_layer_0': 424, 'num_units_layer_1': 186, 'num_units_layer_2': 423, 'activation_layer_0': 'relu', 'activation_layer_1': 'selu', 'activation_layer_2': 'elu', 'learning_rate': 0.0014259664976853138}
Epoch 1/5


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[I 2024-04-09 21:44:48,202] Trial 0 finished with value: 0.21875 and parameters: {'num_layers': 5, 'num_units_layer_0': 162, 'num_units_layer_1': 100, 'num_units_layer_2': 198, 'num_units_layer_3': 240, 'num_units_layer_4': 51, 'activation_layer_0': 'tanh', 'activation_layer_1': 'relu', 'activation_layer_2': 'relu', 'activation_layer_3': 'tanh', 'activation_layer_4': 'tanh', 'learning_rate': 0.00449320591078656}. Best is trial 0 with value: 0.21875.


Epoch 1/5


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[I 2024-04-09 21:44:49,987] Trial 1 finished with value: 0.21875 and parameters: {'num_layers': 3, 'num_units_layer_0': 164, 'num_units_layer_1': 98, 'num_units_layer_2': 131, 'activation_layer_0': 'sigmoid', 'activation_layer_1': 'relu', 'activation_layer_2': 'relu', 'learning_rate': 0.00016452315735045173}. Best is trial 0 with value: 0.21875.
[I 2024-04-09 21:44:49,987] A new study created in memory with name: no-name-5d0c93cb-658f-454e-bbdf-e4bf7771878e


Mejores parámetros de DNN 1: {'num_layers': 5, 'num_units_layer_0': 162, 'num_units_layer_1': 100, 'num_units_layer_2': 198, 'num_units_layer_3': 240, 'num_units_layer_4': 51, 'activation_layer_0': 'tanh', 'activation_layer_1': 'relu', 'activation_layer_2': 'relu', 'activation_layer_3': 'tanh', 'activation_layer_4': 'tanh', 'learning_rate': 0.00449320591078656}


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[I 2024-04-09 21:44:52,111] Trial 0 finished with value: 0.421875 and parameters: {'num_layers': 4, 'num_units_layer_0': 249, 'num_units_layer_1': 131, 'num_units_layer_2': 68, 'num_units_layer_3': 101, 'activation_layer_0': 'relu', 'activation_layer_1': 'sigmoid', 'activation_layer_2': 'relu', 'activation_layer_3': 'relu', 'learning_rate': 0.0002490905120224322}. Best is trial 0 with value: 0.421875.


Epoch 1/5


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[I 2024-04-09 21:44:54,717] Trial 1 finished with value: 0.21875 and parameters: {'num_layers': 6, 'num_units_layer_0': 68, 'num_units_layer_1': 249, 'num_units_layer_2': 111, 'num_units_layer_3': 211, 'num_units_layer_4': 242, 'num_units_layer_5': 202, 'activation_layer_0': 'selu', 'activation_layer_1': 'relu', 'activation_layer_2': 'relu', 'activation_layer_3': 'sigmoid', 'activation_layer_4': 'selu', 'activation_layer_5': 'selu', 'learning_rate': 0.0008014733276600091}. Best is trial 0 with value: 0.421875.
[I 2024-04-09 21:44:54,734] A new study created in memory with name: no-name-219981f7-95e6-498b-839a-6f822f0fc3b2


Mejores parámetros de DNN 2: {'num_layers': 4, 'num_units_layer_0': 249, 'num_units_layer_1': 131, 'num_units_layer_2': 68, 'num_units_layer_3': 101, 'activation_layer_0': 'relu', 'activation_layer_1': 'sigmoid', 'activation_layer_2': 'relu', 'activation_layer_3': 'relu', 'learning_rate': 0.0002490905120224322}


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[I 2024-04-09 21:44:59,023] Trial 0 finished with value: 0.265625 and parameters: {'num_layers': 4, 'num_units_layer_0': 166, 'num_units_layer_1': 332, 'num_units_layer_2': 507, 'num_units_layer_3': 87, 'activation_layer_0': 'elu', 'activation_layer_1': 'relu', 'activation_layer_2': 'selu', 'activation_layer_3': 'relu', 'learning_rate': 1.0141650181481786e-05}. Best is trial 0 with value: 0.265625.


Epoch 1/5


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[I 2024-04-09 21:45:01,377] Trial 1 finished with value: 0.78125 and parameters: {'num_layers': 5, 'num_units_layer_0': 120, 'num_units_layer_1': 194, 'num_units_layer_2': 254, 'num_units_layer_3': 196, 'num_units_layer_4': 111, 'activation_layer_0': 'relu', 'activation_layer_1': 'elu', 'activation_layer_2': 'relu', 'activation_layer_3': 'elu', 'activation_layer_4': 'relu', 'learning_rate': 1.4451941039038466e-05}. Best is trial 1 with value: 0.78125.


Mejores parámetros de DNN 3: {'num_layers': 5, 'num_units_layer_0': 120, 'num_units_layer_1': 194, 'num_units_layer_2': 254, 'num_units_layer_3': 196, 'num_units_layer_4': 111, 'activation_layer_0': 'relu', 'activation_layer_1': 'elu', 'activation_layer_2': 'relu', 'activation_layer_3': 'elu', 'activation_layer_4': 'relu', 'learning_rate': 1.4451941039038466e-05}


In [14]:
dnn1_params_1m_long, dnn2_params_1m_long, dnn3_params_1m_long, dnn1_params_1m_short, dnn2_params_1m_short, dnn3_params_1m_short = params("aapl_1m_train.csv")

In [15]:
dnn1_params_5m_long, dnn2_params_5m_long, dnn3_params_5m_long, dnn1_params_5m_short, dnn2_params_5m_short, dnn3_params_5m_short = params("aapl_5m_train.csv")

In [16]:
#Una vez actualizados los parametros optimos .....

In [17]:
def create_and_compile_dnn_model(input_shape, params):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Input(shape=(input_shape,)))
    for i in range(params['num_layers']):
        units = params[f'num_units_layer_{i}']
        activation = params[f'activation_layer_{i}']
        model.add(tf.keras.layers.Dense(units=units, activation=activation))
    model.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))
    
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=params['learning_rate']),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    
    return model

In [18]:
def buy_signals(data, best_dnn1_params, best_dnn2_params, best_dnn3_params):
    buy_signals = pd.DataFrame()
    X = data.iloc[:, :-1]
    y = data.iloc[:, -1]
    input_shape = X.shape[1]
    threshold = 0.5

    # Crear y compilar modelo DNN1
    best_dnn1_model = create_and_compile_dnn_model(input_shape, best_dnn1_params)
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    best_dnn1_model.fit(X, y, epochs=30, batch_size=32, validation_split=0.2, callbacks=[early_stopping])
    y_pred_dnn1 = best_dnn1_model.predict(X)
    y_pred_rounded_dnn1 = (y_pred_dnn1 > threshold).astype('int32')
    
    # Crear y compilar modelo DNN2
    best_dnn2_model = create_and_compile_dnn_model(input_shape, best_dnn2_params)
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    best_dnn2_model.fit(X, y, epochs=30, batch_size=32, validation_split=0.2, callbacks=[early_stopping])
    y_pred_dnn2 = best_dnn2_model.predict(X)
    y_pred_rounded_dnn2 = (y_pred_dnn2 > threshold).astype('int32')
    
    # Crear y compilar modelo DNN3
    best_dnn3_model = create_and_compile_dnn_model(input_shape, best_dnn3_params)
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    best_dnn3_model.fit(X, y, epochs=30, batch_size=32, validation_split=0.2, callbacks=[early_stopping])
    y_pred_dnn3 = best_dnn3_model.predict(X)
    y_pred_rounded_dnn3 = (y_pred_dnn3 > threshold).astype('int32')

    # Agregar las predicciones como nuevas columnas al conjunto de datos original
    buy_signals['predicciones_dnn1'] = pd.Series(y_pred_rounded_dnn1.flatten())
    buy_signals['predicciones_dnn2'] = pd.Series(y_pred_rounded_dnn2.flatten())
    buy_signals['predicciones_dnn3'] = pd.Series(y_pred_rounded_dnn3.flatten())

    return buy_signals

In [19]:
def sell_signals(data, best_dnn1_params, best_dnn2_params, best_dnn3_params):
    sell_signals = pd.DataFrame()
    X = data.iloc[:, :-1]
    y = data.iloc[:, -1]
    input_shape = X.shape[1]
    threshold = 0.5

    # Crear y compilar modelo DNN1
    best_dnn1_model = create_and_compile_dnn_model(input_shape, best_dnn1_params)
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    best_dnn1_model.fit(X, y, epochs=30, batch_size=32, validation_split=0.2, callbacks=[early_stopping])
    y_pred_dnn1 = best_dnn1_model.predict(X)
    y_pred_rounded_dnn1 = (y_pred_dnn1 > threshold).astype('int32')
    
    # Crear y compilar modelo DNN2
    best_dnn2_model = create_and_compile_dnn_model(input_shape, best_dnn2_params)
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    best_dnn2_model.fit(X, y, epochs=30, batch_size=32, validation_split=0.2, callbacks=[early_stopping])
    y_pred_dnn2 = best_dnn2_model.predict(X)
    y_pred_rounded_dnn2 = (y_pred_dnn2 > threshold).astype('int32')
    
    # Crear y compilar modelo DNN3
    best_dnn3_model = create_and_compile_dnn_model(input_shape, best_dnn3_params)
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    best_dnn3_model.fit(X, y, epochs=30, batch_size=32, validation_split=0.2, callbacks=[early_stopping])
    y_pred_dnn3 = best_dnn3_model.predict(X)
    y_pred_rounded_dnn3 = (y_pred_dnn3 > threshold).astype('int32')

    # Agregar las predicciones como nuevas columnas al conjunto de datos original
    sell_signals['predicciones_dnn1'] = pd.Series(y_pred_rounded_dnn1.flatten())
    sell_signals['predicciones_dnn2'] = pd.Series(y_pred_rounded_dnn2.flatten())
    sell_signals['predicciones_dnn3'] = pd.Series(y_pred_rounded_dnn3.flatten())

    return sell_signals

In [20]:
def backtest(data, buy_signals, sell_signals, stop_loss, take_profit, n_shares):
            history = []
            active_operations = []
            cash = 1_000_000
            com = 1.25 / 100

            for i, row in data.iterrows():
                # close active operation
                active_op_temp = []
                for operation in active_operations:
                    if operation["stop_loss"] > row.Close:
                        cash += (row.Close * operation["n_shares"]) * (1 - com)
                    elif operation["take_profit"] < row.Close:
                        cash += (row.Close * operation["n_shares"]) * (1 - com)
                    else:
                        active_op_temp.append(operation)
                active_operations = active_op_temp

                # check if we have enough cash
                if cash < (row.Close * (1 + com)):
                    asset_vals = sum([operation["n_shares"] * row.Close for operation in active_operations])
                    portfolio_value = cash + asset_vals
                    continue

                # Apply buy signals
                if buy_signals.loc[i].any():
                    active_operations.append({
                        "bought": row.Close,
                        "n_shares": n_shares,
                        "stop_loss": row.Close * stop_loss,
                        "take_profit": row.Close * take_profit
                    })

                    cash -= row.Close * (1 + com) * n_shares

                # Apply sell signals
                if sell_signals.loc[i].any():
                    active_op_temp = []
                    for operation in active_operations:
                        if operation["take_profit"] < row.Close or operation["stop_loss"] > row.Close:
                            cash += (row.Close * operation["n_shares"]) * (1 - com)
                        else:
                            active_op_temp.append(operation)
                    active_operations = active_op_temp

                asset_vals = sum([operation["n_shares"] * row.Close for operation in active_operations])
                portfolio_value = cash + asset_vals

            return portfolio_value

In [21]:
def optimize(trial, strategy, data):
    portfolio_value = 0

    stop_loss = trial.suggest_float("stop_loss", 0.80, 0.90)
    take_profit = trial.suggest_float("take_profit", 1.01, 1.10)
    n_shares = trial.suggest_int("n_shares", 20, 50)

    strat_params = {}

    buy_signals = pd.DataFrame()
    sell_signals = pd.DataFrame()

    if "dnn1" in strategy:
        buy_signals["dnn1"] = global_buy_signals["predicciones_dnn1"]
        sell_signals["dnn1"] = global_sell_signals["predicciones_dnn1"]
        
    if "dnn2" in strategy:
        buy_signals["dnn2"] = global_buy_signals["predicciones_dnn2"]
        sell_signals["dnn2"] = global_sell_signals["predicciones_dnn2"]
        
    if "dnn3" in strategy:
        buy_signals["dnn3"] = global_buy_signals["predicciones_dnn3"]
        sell_signals["dnn3"] = global_sell_signals["predicciones_dnn3"]
    
    return backtest(data, buy_signals, sell_signals, stop_loss, take_profit, n_shares)

In [22]:
def optimize_file(data):
    data = data.drop(data.index[:30])
    data = data.drop(data.index[-30:])
    data.reset_index(drop=True, inplace=True)
    strategies = list(powerset(["dnn1", "dnn2", "dnn3"]))
    best_strat = None
    best_val = -1
    best_params = None

    for strat in strategies:
        study = optuna.create_study(direction="maximize")
        study.optimize(lambda x: optimize(x, strat, data), n_trials=20)
        value = study.best_value
        if value > best_val:
            best_val = value
            best_strat = strat
            best_params = study.best_params
    print(study.best_value)
    print(best_strat)
    print(best_params)

    return {"file": data,
            "strat": best_strat,
            "value": best_val,
            "params": best_params}

In [23]:
data_1d_test = pd.read_csv("aapl_1d_test.csv")
data_1d_test = data_1d_test.dropna()
dataresult_long_1d_test = file_features(data_1d_test, ds_type="buy")
dataresult_short_1d_test = file_features(data_1d_test, ds_type="sell")
global_buy_signals = buy_signals(dataresult_long_1d_test, dnn1_params_1d_long, dnn2_params_1d_long, dnn3_params_1d_long)
global_sell_signals = sell_signals(dataresult_short_1d_test, dnn1_params_1d_short, dnn2_params_1d_short, dnn3_params_1d_short)
file_1d_test = optimize_file(data_1d_test)

In [24]:
data_1h_test = pd.read_csv("aapl_1h_test.csv")
data_1h_test = data_1h_test.dropna()
dataresult_long_1h_test = file_features(data_1h_test, ds_type="buy")
dataresult_short_1h_test = file_features(data_1h_test, ds_type="sell")
global_buy_signals = buy_signals(dataresult_long_1h_test, dnn1_params_1h_long, dnn2_params_1h_long, dnn3_params_1h_long)
global_sell_signals = sell_signals(dataresult_short_1h_test, dnn1_params_1h_short, dnn2_params_1h_short, dnn3_params_1h_short)
file_1h_test = optimize_file(data_1h_test)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[I 2024-04-09 21:45:26,379] A new study created in memory with name: no-name-b4ae5b57-3585-4e47-bc03-22d6acd93812
[I 2024-04-09 21:45:32,955] Trial 0 finished with value: 425447.2666882039 and parameters: {'stop_loss': 0.8537642483871418, 'take_profit': 1.019527590911823, 'n_shares': 48}. Best is trial 0 with value: 425447.2666882039.
[I 2024-04-09 21:45:45,083] Trial 1 finished with value: 637778.4338516494 and parameters: {'stop_loss': 0.8853651582600753, 'take_profit': 1.048248129696119, 'n_shares': 27}. Best is trial 1 with value: 637778.4338516494.
[I 2024-04-09 21:45:45,087] A new study created in memory with name: no-name-6a4b9fd5-a577-4357-bec3-8a1edd2285aa
[I 2024-04-09 21:45:46,313] Trial 0 finished with value: 1000000.0 and parameters: {'stop_loss': 0.8932428009927589, 'take_profit': 1.088822405044012, 'n_shares': 39}. Best is trial 0 with value: 1000000.0.
[I 2024-04-09 21:45:47,334] Trial 1 finished with value: 1000000.0 and parameters: {'stop_loss': 0.8022676154455103, 't

838428.8239197981
('dnn2',)
{'stop_loss': 0.8932428009927589, 'take_profit': 1.088822405044012, 'n_shares': 39}


In [25]:
data_1m_test = pd.read_csv("aapl_1m_test.csv")
data_1m_test = data_1m_test.dropna()
dataresult_long_1m_test = file_features(data_1m_test, ds_type="buy")
dataresult_short_1m_test = file_features(data_1m_test, ds_type="sell")
global_buy_signals = buy_signals(dataresult_long_1m_test, dnn1_params_1m_long, dnn2_params_1m_long, dnn3_params_1m_long)
global_sell_signals = sell_signals(dataresult_short_1m_test, dnn1_params_1m_short, dnn2_params_1m_short, dnn3_params_1m_short)
file_1m_test = optimize_file(data_1m_test)

In [26]:
data_5m_test = pd.read_csv("aapl_5m_test.csv")
data_5m_test = data_5m_test.dropna()
dataresult_long_5m_test = file_features(data_5m_test, ds_type="buy")
dataresult_short_5m_test = file_features(data_5m_test, ds_type="sell")
global_buy_signals = buy_signals(dataresult_long_5m_test, dnn1_params_5m_long, dnn2_params_5m_long, dnn3_params_5m_long)
global_sell_signals = sell_signals(dataresult_short_5m_test, dnn1_params_5m_short, dnn2_params_5m_short, dnn3_params_5m_short)
file_5m_test = optimize_file(data_5m_test)