In [25]:
import pandas as pd
import ta 
import optuna 
import time
import numpy as np
from multiprocessing import Pool
from itertools import combinations, chain 
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

In [26]:
def powerset(s):
    return chain.from_iterable(combinations(s,r) for r in range(1,len(s)+1))

In [27]:
def file_features(data, ds_type: str):
    data1=pd.DataFrame()
    #Calcular indicadores tecnicos
    cmf_data = ta.volume.ChaikinMoneyFlowIndicator(data.High, data.Low, data.Close, data.Volume, window = 14)
    rsi_data = ta.momentum.RSIIndicator(data.Close, window=14)
    
    data1["CMF"] = cmf_data.chaikin_money_flow()
    data1["RSI"] = rsi_data.rsi()
    # Calcular la volatilidad
    data1['Volatility'] = data['High'] - data['Low']
    data1['Close_Lag0'] = data['Close']
    # Calcular las tendencias
    for i in range(1, 5 + 1):
        data1[f'Close_Lag{i}'] = data['Close'].shift(i)
    #Variable ded respuesta
    if ds_type == "buy":
        data1['Response'] = (data['Close'] < data['Close'].shift(-10))
    else:
        data1['Response'] = (data['Close'] > data['Close'].shift(-10))
    
    data1 = data1.drop(data1.index[:30])
    data1 = data1.drop(data1.index[-30:])
    data1.reset_index(drop=True, inplace=True)
    
    return data1

In [28]:
def objective_log_regresor(trial, data):
    # Dividir los datos en conjuntos de entrenamiento y prueba
    X = data.iloc[:, :-1]
    # Selecciona la variable objetivo
    y = data.iloc[:, -1]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
    
    # Definir los parámetros a optimizar
    penalty = trial.suggest_categorical('penalty', ['l1', 'l2'])
    C = trial.suggest_loguniform('C', 0.001, 1000)
    solver = trial.suggest_categorical('solver', ['liblinear', 'saga'])

    # Crear el modelo de regresión logística con los parámetros sugeridos
    model = LogisticRegression(penalty=penalty, C=C, solver=solver, max_iter=10_000, random_state=123)
    # Entrenar el modelo
    model.fit(X_train, y_train)
    # Calcular la precisión en el conjunto de prueba
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

In [29]:
def objective_svm(trial, data):
    # Dividir los datos en conjuntos de entrenamiento y prueba
    X = data.iloc[:, :-1]
    # Selecciona la variable objetivo
    y = data.iloc[:, -1]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    # Definir los parámetros a optimizar
    C = trial.suggest_loguniform('C', 0.001, 1000)
    kernel = trial.suggest_categorical('kernel', ['linear', 'poly', 'rbf', 'sigmoid'])
    if kernel == 'poly':
        degree = trial.suggest_int('degree', 2, 5)
    else:
        degree = 3  # Valor predeterminado si el kernel no es 'poly'
    gamma = trial.suggest_categorical('gamma', ['scale', 'auto']) if kernel in ['rbf', 'poly', 'sigmoid'] else 'scale'
    # Crear el modelo SVM con los parámetros sugeridos
    model = SVC(C=C, kernel=kernel, degree=degree, gamma=gamma, max_iter=10_000, random_state=123)
    # Entrenar el modelo
    model.fit(X_train, y_train)
    # Calcular la precisión en el conjunto de prueba
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

In [30]:
def objective_xgboost(trial, data):
    data = data.copy()
    # Dividir los datos en conjuntos de entrenamiento y prueba
    X = data.iloc[:, :-1]
    # Selecciona la variable objetivo
    y = data.iloc[:, -1]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
    # Definir los parámetros a optimizar
    n_estimators = trial.suggest_int('n_estimators', 100, 1000, step=100)
    max_depth = trial.suggest_int('max_depth', 3, 10)
    learning_rate = trial.suggest_loguniform('learning_rate', 0.01, 0.5)
    subsample = trial.suggest_discrete_uniform('subsample', 0.5, 1.0, 0.1)
    colsample_bytree = trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1.0, 0.1)
    # Crear el modelo XGBoost con los parámetros sugeridos
    model = XGBClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        learning_rate=learning_rate,
        subsample=subsample,
        colsample_bytree=colsample_bytree,
        random_state=123,
        max_iter=10_000
    )
    # Entrenar el modelo
    model.fit(X_train, y_train)
    # Calcular la precisión en el conjunto de prueba
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

In [31]:
def optimize_params_log_regresor(data):
    # Crear un estudio Optuna para la optimización
    study = optuna.create_study(direction='maximize')
    
    # Función objetivo con el dataset como parámetro fijo
    objective_fn = lambda trial: objective_log_regresor(trial, data)
    
    # Ejecutar la optimización
    study.optimize(objective_fn, n_trials=20)

    # Obtener los mejores parámetros
    best_params = study.best_params
    best_accuracy = study.best_value

    return best_params, best_accuracy

In [32]:
def optimize_params_svm(data):
    # Crear un estudio Optuna para la optimización
    study = optuna.create_study(direction='maximize')
    
    # Función objetivo con el dataset como parámetro fijo
    objective_fn = lambda trial: objective_svm(trial, data)
    
    # Ejecutar la optimización
    study.optimize(objective_fn, n_trials=20)

    # Obtener los mejores parámetros
    best_params = study.best_params
    best_accuracy = study.best_value

    return best_params, best_accuracy

In [33]:
def optimize_params_xgboost(data):
    # Crear un estudio Optuna para la optimización
    study = optuna.create_study(direction='maximize')
    
    # Función objetivo con el dataset como parámetro fijo
    objective_fn = lambda trial: objective_xgboost(trial, data)
    
    # Ejecutar la optimización
    study.optimize(objective_fn, n_trials=20)

    # Obtener los mejores parámetros
    best_params = study.best_params
    best_accuracy = study.best_value

    return best_params, best_accuracy

In [34]:
def optimize_params(data):
    # Optimización de regresión logística
    best_params_lr, best_accuracy_lr = optimize_params_log_regresor(data)
    print("Mejores parámetros de regresión logística:", best_params_lr)
    # Optimización de SVM
    best_params_svm, best_accuracy_svm = optimize_params_svm(data)
    print("Mejores parámetros de SVM:", best_params_svm)
    # Optimización de XGBoost
    best_params_xgb, best_accuracy_xgb = optimize_params_xgboost(data)
    print("Mejores parámetros de XGBoost:", best_params_xgb)
    return best_params_svm, best_params_lr, best_params_xgb

In [35]:
def params(data: str):
    data_1d_train = pd.read_csv(data)
    data_1d_train = data_1d_train.dropna()
    dataresult_long_1d_train = file_features(data_1d_train, ds_type="buy")
    dataresult_short_1d_train = file_features(data_1d_train, ds_type="sell")
    best_params_svm_long, best_params_lr_long, best_params_xgb_long = optimize_params(dataresult_long_1d_train)
    best_params_svm_short, best_params_lr_short, best_params_xgb_short = optimize_params(dataresult_short_1d_train)
    
    return best_params_lr_long, best_params_svm_long, best_params_xgb_long, best_params_lr_short, best_params_svm_short, best_params_xgb_short

In [36]:
logistic_params_1d_long, svm_params_1d_long, xgboost_params_1d_long, logistic_params_1d_short, svm_params_1d_short, xgboost_params_1d_short = params("data/aapl_1d_train.csv")

[I 2024-04-09 22:32:37,383] A new study created in memory with name: no-name-6d2e4eb2-5f14-4133-8b40-c4a65089e954
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:32:41,582] Trial 0 finished with value: 0.6564593301435406 and parameters: {'penalty': 'l2', 'C': 0.1796765744160107, 'solver': 'saga'}. Best is trial 0 with value: 0.6564593301435406.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:32:41,926] Trial 1 finished with value: 0.6593301435406699 and parameters: {'penalty': 'l1', 'C': 0.010968139046945382, 'solver': 'saga'}. Best is trial 1 with value: 0.6593301435406699.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:32:41,942] Trial 2 finished with value: 0.6593301435406699 and parameters: {'penalty': 'l2', 'C': 821.2808174255085, 'solver': 'liblinear'}. Best is trial 1 with value: 0.6593301435406699.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:32:45,486] Trial 3 finished with value: 0.6564593301435406 an

Mejores parámetros de regresión logística: {'penalty': 'l1', 'C': 0.010968139046945382, 'solver': 'saga'}


[I 2024-04-09 22:33:11,160] Trial 0 finished with value: 0.41913875598086126 and parameters: {'C': 0.061414378859970746, 'kernel': 'linear'}. Best is trial 0 with value: 0.41913875598086126.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:33:11,698] Trial 1 finished with value: 0.6593301435406699 and parameters: {'C': 0.43326595131065515, 'kernel': 'poly', 'degree': 2, 'gamma': 'scale'}. Best is trial 1 with value: 0.6593301435406699.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:33:12,550] Trial 2 finished with value: 0.6593301435406699 and parameters: {'C': 0.45061736358005255, 'kernel': 'rbf', 'gamma': 'scale'}. Best is trial 1 with value: 0.6593301435406699.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:33:13,016] Trial 3 finished with value: 0.6593301435406699 and parameters: {'C': 0.009704285874560488, 'kernel': 'linear'}. Best is trial 1 with value: 0.6593301435406699.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2

Mejores parámetros de SVM: {'C': 0.43326595131065515, 'kernel': 'poly', 'degree': 2, 'gamma': 'scale'}


[I 2024-04-09 22:33:22,325] Trial 0 finished with value: 0.5492822966507177 and parameters: {'n_estimators': 600, 'max_depth': 5, 'learning_rate': 0.12433422150235085, 'subsample': 0.9, 'colsample_bytree': 0.7}. Best is trial 0 with value: 0.5492822966507177.
  learning_rate = trial.suggest_loguniform('learning_rate', 0.01, 0.5)
  subsample = trial.suggest_discrete_uniform('subsample', 0.5, 1.0, 0.1)
  colsample_bytree = trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1.0, 0.1)
Parameters: { "max_iter" } are not used.

[I 2024-04-09 22:33:24,329] Trial 1 finished with value: 0.55311004784689 and parameters: {'n_estimators': 1000, 'max_depth': 10, 'learning_rate': 0.01505270856303083, 'subsample': 0.7, 'colsample_bytree': 1.0}. Best is trial 1 with value: 0.55311004784689.
  learning_rate = trial.suggest_loguniform('learning_rate', 0.01, 0.5)
  subsample = trial.suggest_discrete_uniform('subsample', 0.5, 1.0, 0.1)
  colsample_bytree = trial.suggest_discrete_uniform('colsample_by

Mejores parámetros de XGBoost: {'n_estimators': 200, 'max_depth': 4, 'learning_rate': 0.01737000818203036, 'subsample': 0.6, 'colsample_bytree': 0.8}


[I 2024-04-09 22:33:32,335] Trial 1 finished with value: 0.6602870813397129 and parameters: {'penalty': 'l1', 'C': 36.32196574239224, 'solver': 'liblinear'}. Best is trial 0 with value: 0.662200956937799.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:33:36,433] Trial 2 finished with value: 0.6583732057416268 and parameters: {'penalty': 'l1', 'C': 5.351811048229829, 'solver': 'saga'}. Best is trial 0 with value: 0.662200956937799.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:33:36,449] Trial 3 finished with value: 0.6574162679425838 and parameters: {'penalty': 'l2', 'C': 0.021415818023240857, 'solver': 'liblinear'}. Best is trial 0 with value: 0.662200956937799.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:33:37,769] Trial 4 finished with value: 0.6602870813397129 and parameters: {'penalty': 'l1', 'C': 2.097341312469804, 'solver': 'liblinear'}. Best is trial 0 with value: 0.662200956937799.
  C = trial.suggest_loguniform('C',

Mejores parámetros de regresión logística: {'penalty': 'l2', 'C': 0.6431546383805156, 'solver': 'liblinear'}


[I 2024-04-09 22:33:50,576] Trial 0 finished with value: 0.661244019138756 and parameters: {'C': 0.0035454674239493874, 'kernel': 'rbf', 'gamma': 'scale'}. Best is trial 0 with value: 0.661244019138756.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:33:50,921] Trial 1 finished with value: 0.5014354066985646 and parameters: {'C': 0.39203213125879516, 'kernel': 'sigmoid', 'gamma': 'scale'}. Best is trial 0 with value: 0.661244019138756.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:33:51,443] Trial 2 finished with value: 0.661244019138756 and parameters: {'C': 0.008510351060590953, 'kernel': 'poly', 'degree': 4, 'gamma': 'scale'}. Best is trial 0 with value: 0.661244019138756.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:33:51,977] Trial 3 finished with value: 0.661244019138756 and parameters: {'C': 0.0017184873841547049, 'kernel': 'sigmoid', 'gamma': 'scale'}. Best is trial 0 with value: 0.661244019138756.
  C = trial.suggest_l

Mejores parámetros de SVM: {'C': 0.0035454674239493874, 'kernel': 'rbf', 'gamma': 'scale'}


[I 2024-04-09 22:34:00,554] Trial 0 finished with value: 0.5645933014354066 and parameters: {'n_estimators': 300, 'max_depth': 4, 'learning_rate': 0.06013607443118158, 'subsample': 0.9, 'colsample_bytree': 1.0}. Best is trial 0 with value: 0.5645933014354066.
  learning_rate = trial.suggest_loguniform('learning_rate', 0.01, 0.5)
  subsample = trial.suggest_discrete_uniform('subsample', 0.5, 1.0, 0.1)
  colsample_bytree = trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1.0, 0.1)
Parameters: { "max_iter" } are not used.

[I 2024-04-09 22:34:01,023] Trial 1 finished with value: 0.5464114832535886 and parameters: {'n_estimators': 900, 'max_depth': 3, 'learning_rate': 0.10992730127435824, 'subsample': 0.8, 'colsample_bytree': 0.8}. Best is trial 0 with value: 0.5645933014354066.
  learning_rate = trial.suggest_loguniform('learning_rate', 0.01, 0.5)
  subsample = trial.suggest_discrete_uniform('subsample', 0.5, 1.0, 0.1)
  colsample_bytree = trial.suggest_discrete_uniform('colsample_

Mejores parámetros de XGBoost: {'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.030090107781624707, 'subsample': 0.9, 'colsample_bytree': 0.8}


In [37]:
logistic_params_1h_long, svm_params_1h_long, xgboost_params_1h_long, logistic_params_1h_short, svm_params_1h_short, xgboost_params_1h_short = params("data/aapl_1h_train.csv")

[I 2024-04-09 22:34:11,627] A new study created in memory with name: no-name-88813fd5-bc2d-4508-bafc-4a535cd35406
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:11,927] Trial 0 finished with value: 0.3125 and parameters: {'penalty': 'l2', 'C': 5.2064407063007545, 'solver': 'saga'}. Best is trial 0 with value: 0.3125.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:12,152] Trial 1 finished with value: 0.3125 and parameters: {'penalty': 'l2', 'C': 72.07770753287781, 'solver': 'saga'}. Best is trial 0 with value: 0.3125.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:12,158] Trial 2 finished with value: 0.765625 and parameters: {'penalty': 'l1', 'C': 0.0021323714279392757, 'solver': 'saga'}. Best is trial 2 with value: 0.765625.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:12,853] Trial 3 finished with value: 0.203125 and parameters: {'penalty': 'l1', 'C': 178.49543206972857, 'solver': 'liblinear'}. B

Mejores parámetros de regresión logística: {'penalty': 'l1', 'C': 0.0021323714279392757, 'solver': 'saga'}


[I 2024-04-09 22:34:15,083] Trial 15 finished with value: 0.671875 and parameters: {'C': 39.21074674382456, 'kernel': 'poly', 'degree': 4, 'gamma': 'auto'}. Best is trial 2 with value: 0.703125.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:15,104] Trial 16 finished with value: 0.25 and parameters: {'C': 478.91575669173466, 'kernel': 'linear'}. Best is trial 2 with value: 0.703125.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:15,118] Trial 17 finished with value: 0.234375 and parameters: {'C': 1.532879727392161, 'kernel': 'sigmoid', 'gamma': 'auto'}. Best is trial 2 with value: 0.703125.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:15,146] Trial 18 finished with value: 0.625 and parameters: {'C': 6.604752799341769, 'kernel': 'poly', 'degree': 3, 'gamma': 'auto'}. Best is trial 2 with value: 0.703125.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:15,159] Trial 19 finished with value: 0.234375 a

Mejores parámetros de SVM: {'C': 0.04584664364591048, 'kernel': 'poly', 'degree': 2, 'gamma': 'auto'}


[I 2024-04-09 22:34:15,444] Trial 0 finished with value: 0.375 and parameters: {'n_estimators': 500, 'max_depth': 9, 'learning_rate': 0.09546149786469105, 'subsample': 0.7, 'colsample_bytree': 0.8}. Best is trial 0 with value: 0.375.
  learning_rate = trial.suggest_loguniform('learning_rate', 0.01, 0.5)
  subsample = trial.suggest_discrete_uniform('subsample', 0.5, 1.0, 0.1)
  colsample_bytree = trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1.0, 0.1)
Parameters: { "max_iter" } are not used.

[I 2024-04-09 22:34:15,565] Trial 1 finished with value: 0.359375 and parameters: {'n_estimators': 200, 'max_depth': 5, 'learning_rate': 0.013041247657032835, 'subsample': 0.5, 'colsample_bytree': 1.0}. Best is trial 0 with value: 0.375.
  learning_rate = trial.suggest_loguniform('learning_rate', 0.01, 0.5)
  subsample = trial.suggest_discrete_uniform('subsample', 0.5, 1.0, 0.1)
  colsample_bytree = trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1.0, 0.1)
Parameters: { "max_iter"

Mejores parámetros de XGBoost: {'n_estimators': 900, 'max_depth': 9, 'learning_rate': 0.2220589262776878, 'subsample': 0.5, 'colsample_bytree': 0.7}


[I 2024-04-09 22:34:20,857] Trial 0 finished with value: 0.3125 and parameters: {'penalty': 'l1', 'C': 6.254169900980349, 'solver': 'saga'}. Best is trial 0 with value: 0.3125.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:21,556] Trial 1 finished with value: 0.1875 and parameters: {'penalty': 'l1', 'C': 480.4009633378153, 'solver': 'liblinear'}. Best is trial 0 with value: 0.3125.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:21,649] Trial 2 finished with value: 0.375 and parameters: {'penalty': 'l2', 'C': 0.044784496339515464, 'solver': 'saga'}. Best is trial 2 with value: 0.375.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:21,667] Trial 3 finished with value: 0.421875 and parameters: {'penalty': 'l2', 'C': 0.0028315415752095535, 'solver': 'saga'}. Best is trial 3 with value: 0.421875.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:22,096] Trial 4 finished with value: 0.296875 and parameters: 

Mejores parámetros de regresión logística: {'penalty': 'l2', 'C': 0.0028315415752095535, 'solver': 'saga'}


[I 2024-04-09 22:34:24,412] Trial 17 finished with value: 0.21875 and parameters: {'C': 40.06753069465152, 'kernel': 'sigmoid', 'gamma': 'auto'}. Best is trial 1 with value: 0.78125.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:24,425] Trial 18 finished with value: 0.21875 and parameters: {'C': 0.18274359857920905, 'kernel': 'linear'}. Best is trial 1 with value: 0.78125.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:24,437] Trial 19 finished with value: 0.21875 and parameters: {'C': 0.005437205263968488, 'kernel': 'rbf', 'gamma': 'auto'}. Best is trial 1 with value: 0.78125.
[I 2024-04-09 22:34:24,438] A new study created in memory with name: no-name-b43fbfc1-cf19-4d9f-8450-c75d56083c98
  learning_rate = trial.suggest_loguniform('learning_rate', 0.01, 0.5)
  subsample = trial.suggest_discrete_uniform('subsample', 0.5, 1.0, 0.1)
  colsample_bytree = trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1.0, 0.1)
Parameters: { "max_iter" 

Mejores parámetros de SVM: {'C': 0.2018232497206618, 'kernel': 'rbf', 'gamma': 'auto'}


[I 2024-04-09 22:34:25,099] Trial 1 finished with value: 0.375 and parameters: {'n_estimators': 1000, 'max_depth': 5, 'learning_rate': 0.018001348182327223, 'subsample': 1.0, 'colsample_bytree': 0.9}. Best is trial 0 with value: 0.390625.
  learning_rate = trial.suggest_loguniform('learning_rate', 0.01, 0.5)
  subsample = trial.suggest_discrete_uniform('subsample', 0.5, 1.0, 0.1)
  colsample_bytree = trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1.0, 0.1)
Parameters: { "max_iter" } are not used.

[I 2024-04-09 22:34:25,553] Trial 2 finished with value: 0.34375 and parameters: {'n_estimators': 700, 'max_depth': 8, 'learning_rate': 0.033694986470518815, 'subsample': 1.0, 'colsample_bytree': 0.9}. Best is trial 0 with value: 0.390625.
  learning_rate = trial.suggest_loguniform('learning_rate', 0.01, 0.5)
  subsample = trial.suggest_discrete_uniform('subsample', 0.5, 1.0, 0.1)
  colsample_bytree = trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1.0, 0.1)
Parameters: { "ma

Mejores parámetros de XGBoost: {'n_estimators': 100, 'max_depth': 10, 'learning_rate': 0.102602018285415, 'subsample': 0.5, 'colsample_bytree': 0.6}


In [38]:
logistic_params_1m_long, svm_params_1m_long, xgboost_params_1m_long, logistic_params_1m_short, svm_params_1m_short, xgboost_params_1m_short = params("data/aapl_1m_train.csv")

[I 2024-04-09 22:34:29,239] A new study created in memory with name: no-name-8caf7def-6613-4832-95ab-fe191fb893fc
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:29,245] Trial 0 finished with value: 0.328125 and parameters: {'penalty': 'l2', 'C': 2.8254439628942936, 'solver': 'liblinear'}. Best is trial 0 with value: 0.328125.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:29,252] Trial 1 finished with value: 0.234375 and parameters: {'penalty': 'l1', 'C': 0.0019724770256614936, 'solver': 'liblinear'}. Best is trial 0 with value: 0.328125.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:29,946] Trial 2 finished with value: 0.203125 and parameters: {'penalty': 'l1', 'C': 322.9479687374996, 'solver': 'liblinear'}. Best is trial 0 with value: 0.328125.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:29,984] Trial 3 finished with value: 0.375 and parameters: {'penalty': 'l2', 'C': 0.009060464431933262, 'so

Mejores parámetros de regresión logística: {'penalty': 'l2', 'C': 0.0015827990076342706, 'solver': 'saga'}


  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:32,249] Trial 19 finished with value: 0.234375 and parameters: {'C': 0.33148860835040184, 'kernel': 'sigmoid', 'gamma': 'auto'}. Best is trial 5 with value: 0.6875.
[I 2024-04-09 22:34:32,250] A new study created in memory with name: no-name-4c3e589e-161a-4541-9ef7-8f1228d8a9cd
  learning_rate = trial.suggest_loguniform('learning_rate', 0.01, 0.5)
  subsample = trial.suggest_discrete_uniform('subsample', 0.5, 1.0, 0.1)
  colsample_bytree = trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1.0, 0.1)
Parameters: { "max_iter" } are not used.

[I 2024-04-09 22:34:32,328] Trial 0 finished with value: 0.328125 and parameters: {'n_estimators': 100, 'max_depth': 5, 'learning_rate': 0.015186455486987863, 'subsample': 0.9, 'colsample_bytree': 0.9}. Best is trial 0 with value: 0.328125.
  learning_rate = trial.suggest_loguniform('learning_rate', 0.01, 0.5)
  subsample = trial.suggest_discrete_uniform('subsample', 0.5, 1.0,

Mejores parámetros de SVM: {'C': 208.31110635195978, 'kernel': 'rbf', 'gamma': 'auto'}


[I 2024-04-09 22:34:32,590] Trial 1 finished with value: 0.359375 and parameters: {'n_estimators': 600, 'max_depth': 10, 'learning_rate': 0.1895823003911276, 'subsample': 0.8, 'colsample_bytree': 1.0}. Best is trial 1 with value: 0.359375.
  learning_rate = trial.suggest_loguniform('learning_rate', 0.01, 0.5)
  subsample = trial.suggest_discrete_uniform('subsample', 0.5, 1.0, 0.1)
  colsample_bytree = trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1.0, 0.1)
Parameters: { "max_iter" } are not used.

[I 2024-04-09 22:34:32,897] Trial 2 finished with value: 0.328125 and parameters: {'n_estimators': 1000, 'max_depth': 8, 'learning_rate': 0.3138231930714395, 'subsample': 0.6, 'colsample_bytree': 1.0}. Best is trial 1 with value: 0.359375.
  learning_rate = trial.suggest_loguniform('learning_rate', 0.01, 0.5)
  subsample = trial.suggest_discrete_uniform('subsample', 0.5, 1.0, 0.1)
  colsample_bytree = trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1.0, 0.1)
Parameters: { "m

Mejores parámetros de XGBoost: {'n_estimators': 200, 'max_depth': 8, 'learning_rate': 0.016335181464666, 'subsample': 0.6, 'colsample_bytree': 0.5}


[I 2024-04-09 22:34:37,213] Trial 1 finished with value: 0.203125 and parameters: {'penalty': 'l1', 'C': 13.1532840732247, 'solver': 'liblinear'}. Best is trial 0 with value: 0.390625.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:37,252] Trial 2 finished with value: 0.390625 and parameters: {'penalty': 'l2', 'C': 0.008861789686083622, 'solver': 'saga'}. Best is trial 0 with value: 0.390625.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:37,402] Trial 3 finished with value: 0.34375 and parameters: {'penalty': 'l2', 'C': 0.14903357651947263, 'solver': 'saga'}. Best is trial 0 with value: 0.390625.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:37,624] Trial 4 finished with value: 0.28125 and parameters: {'penalty': 'l2', 'C': 3.063017204782627, 'solver': 'saga'}. Best is trial 0 with value: 0.390625.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:37,630] Trial 5 finished with value: 0.25 and paramet

Mejores parámetros de regresión logística: {'penalty': 'l2', 'C': 0.002105767490569824, 'solver': 'saga'}


  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:40,168] Trial 14 finished with value: 0.5 and parameters: {'C': 82.70898411262169, 'kernel': 'sigmoid', 'gamma': 'scale'}. Best is trial 5 with value: 0.78125.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:40,181] Trial 15 finished with value: 0.234375 and parameters: {'C': 0.4115262010935288, 'kernel': 'linear'}. Best is trial 5 with value: 0.78125.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:40,194] Trial 16 finished with value: 0.703125 and parameters: {'C': 9.94327184053376, 'kernel': 'rbf', 'gamma': 'auto'}. Best is trial 5 with value: 0.78125.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:40,214] Trial 17 finished with value: 0.21875 and parameters: {'C': 547.5935638415561, 'kernel': 'linear'}. Best is trial 5 with value: 0.78125.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:40,228] Trial 18 finished with value: 0.7031

Mejores parámetros de SVM: {'C': 89.33147882881373, 'kernel': 'linear'}


[I 2024-04-09 22:34:40,587] Trial 0 finished with value: 0.3125 and parameters: {'n_estimators': 900, 'max_depth': 9, 'learning_rate': 0.18076739947479364, 'subsample': 1.0, 'colsample_bytree': 1.0}. Best is trial 0 with value: 0.3125.
  learning_rate = trial.suggest_loguniform('learning_rate', 0.01, 0.5)
  subsample = trial.suggest_discrete_uniform('subsample', 0.5, 1.0, 0.1)
  colsample_bytree = trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1.0, 0.1)
Parameters: { "max_iter" } are not used.

[I 2024-04-09 22:34:40,873] Trial 1 finished with value: 0.34375 and parameters: {'n_estimators': 900, 'max_depth': 3, 'learning_rate': 0.26930560992636277, 'subsample': 1.0, 'colsample_bytree': 1.0}. Best is trial 1 with value: 0.34375.
  learning_rate = trial.suggest_loguniform('learning_rate', 0.01, 0.5)
  subsample = trial.suggest_discrete_uniform('subsample', 0.5, 1.0, 0.1)
  colsample_bytree = trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1.0, 0.1)
Parameters: { "max_ite

Mejores parámetros de XGBoost: {'n_estimators': 400, 'max_depth': 3, 'learning_rate': 0.061659645644687434, 'subsample': 0.8, 'colsample_bytree': 0.7}


In [None]:
logistic_params_5m_long, svm_params_5m_long, xgboost_params_5m_long, logistic_params_5m_short, svm_params_5m_short, xgboost_params_5m_short = params("data/aapl_5m_train.csv")

[I 2024-04-09 22:34:44,582] A new study created in memory with name: no-name-f9ba9170-031e-463c-bbb1-ea6c39e84c3f
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:46,311] Trial 0 finished with value: 0.5003836317135549 and parameters: {'penalty': 'l2', 'C': 0.0018530287394076777, 'solver': 'saga'}. Best is trial 0 with value: 0.5003836317135549.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:50,317] Trial 1 finished with value: 0.4966751918158568 and parameters: {'penalty': 'l2', 'C': 0.5100156334562252, 'solver': 'saga'}. Best is trial 0 with value: 0.5003836317135549.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:54,912] Trial 2 finished with value: 0.4968030690537084 and parameters: {'penalty': 'l1', 'C': 94.37950768512842, 'solver': 'saga'}. Best is trial 0 with value: 0.5003836317135549.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:34:57,588] Trial 3 finished with value: 0.5010230179028133 and pa

Mejores parámetros de regresión logística: {'penalty': 'l2', 'C': 0.002395829495054093, 'solver': 'liblinear'}


[I 2024-04-09 22:41:17,069] Trial 0 finished with value: 0.49923273657289 and parameters: {'C': 0.07823691840138479, 'kernel': 'linear'}. Best is trial 0 with value: 0.49923273657289.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:41:18,901] Trial 1 finished with value: 0.5075447570332481 and parameters: {'C': 1.2674477769826162, 'kernel': 'poly', 'degree': 2, 'gamma': 'auto'}. Best is trial 1 with value: 0.5075447570332481.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:41:20,950] Trial 2 finished with value: 0.5040920716112532 and parameters: {'C': 13.432964266547256, 'kernel': 'linear'}. Best is trial 1 with value: 0.5075447570332481.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:41:25,865] Trial 3 finished with value: 0.5001278772378517 and parameters: {'C': 0.7640505865563899, 'kernel': 'linear'}. Best is trial 1 with value: 0.5075447570332481.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:41:51,492] Tri

Mejores parámetros de SVM: {'C': 0.002176996974479978, 'kernel': 'sigmoid', 'gamma': 'scale'}


[I 2024-04-09 22:47:20,636] Trial 0 finished with value: 0.49373401534526856 and parameters: {'n_estimators': 200, 'max_depth': 4, 'learning_rate': 0.03943694697221585, 'subsample': 0.6, 'colsample_bytree': 0.9}. Best is trial 0 with value: 0.49373401534526856.
  learning_rate = trial.suggest_loguniform('learning_rate', 0.01, 0.5)
  subsample = trial.suggest_discrete_uniform('subsample', 0.5, 1.0, 0.1)
  colsample_bytree = trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1.0, 0.1)
Parameters: { "max_iter" } are not used.

[I 2024-04-09 22:47:21,236] Trial 1 finished with value: 0.4962915601023018 and parameters: {'n_estimators': 600, 'max_depth': 5, 'learning_rate': 0.08008828956625542, 'subsample': 1.0, 'colsample_bytree': 0.5}. Best is trial 1 with value: 0.4962915601023018.
  learning_rate = trial.suggest_loguniform('learning_rate', 0.01, 0.5)
  subsample = trial.suggest_discrete_uniform('subsample', 0.5, 1.0, 0.1)
  colsample_bytree = trial.suggest_discrete_uniform('colsampl

Mejores parámetros de XGBoost: {'n_estimators': 1000, 'max_depth': 9, 'learning_rate': 0.014804455005781484, 'subsample': 0.9, 'colsample_bytree': 0.7}


[I 2024-04-09 22:47:52,105] Trial 0 finished with value: 0.4997442455242967 and parameters: {'penalty': 'l1', 'C': 1.504928660446724, 'solver': 'saga'}. Best is trial 0 with value: 0.4997442455242967.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:47:52,211] Trial 1 finished with value: 0.5023017902813299 and parameters: {'penalty': 'l2', 'C': 0.010388708053639678, 'solver': 'liblinear'}. Best is trial 1 with value: 0.5023017902813299.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:47:56,971] Trial 2 finished with value: 0.5010230179028133 and parameters: {'penalty': 'l1', 'C': 0.34170519743426936, 'solver': 'saga'}. Best is trial 1 with value: 0.5023017902813299.
  C = trial.suggest_loguniform('C', 0.001, 1000)
[I 2024-04-09 22:48:01,038] Trial 3 finished with value: 0.49910485933503834 and parameters: {'penalty': 'l2', 'C': 6.2898124437453085, 'solver': 'saga'}. Best is trial 1 with value: 0.5023017902813299.
  C = trial.suggest_loguniform('C', 0

In [None]:
def buy_signals(data, logistic_params, svm_params, xgboost_params):
    buy_signals = pd.DataFrame()
    # Selecciona las características
    X = data.iloc[:, :-1]
    # Selecciona la variable objetivo
    y = data.iloc[:, -1]

    # Crear modelos con los parámetros proporcionados
    logistic_model = LogisticRegression(**logistic_params)
    svm_model = SVC(**svm_params)
    xgboost_model = XGBClassifier(**xgboost_params)

    # Entrenar los modelos con todo el conjunto de datos original
    logistic_model.fit(X, y)
    svm_model.fit(X, y)
    xgboost_model.fit(X, y)

    # Realizar predicciones en el conjunto de datos original
    predictions_lr = logistic_model.predict(X)
    predictions_svm = svm_model.predict(X)
    predictions_xgboost = xgboost_model.predict(X)
    predictions_xgboost_bool = predictions_xgboost.astype(bool)

    # Agregar las predicciones como nuevas columnas al conjunto de datos original
    buy_signals['predicciones_lr'] = predictions_lr
    buy_signals['predicciones_svm'] = predictions_svm
    buy_signals['predicciones_xgboost'] = predictions_xgboost_bool

    return buy_signals

In [None]:
def sell_signals(data, logistic_params, svm_params, xgboost_params):
    sell_signals = pd.DataFrame()
    # Selecciona las características
    X = data.iloc[:, :-1]
    # Selecciona la variable objetivo
    y = data.iloc[:, -1]

    # Crear modelos con los parámetros proporcionados
    logistic_model = LogisticRegression(**logistic_params)
    svm_model = SVC(**svm_params)
    xgboost_model = XGBClassifier(**xgboost_params)

    # Entrenar los modelos con todo el conjunto de datos original
    logistic_model.fit(X, y)
    svm_model.fit(X, y)
    xgboost_model.fit(X, y)

    # Realizar predicciones en el conjunto de datos original
    predictions_lr = logistic_model.predict(X)
    predictions_svm = svm_model.predict(X)
    predictions_xgboost = xgboost_model.predict(X)
    predictions_xgboost_bool = predictions_xgboost.astype(bool)

    # Agregar las predicciones como nuevas columnas al conjunto de datos original
    sell_signals['predicciones_lr'] = predictions_lr
    sell_signals['predicciones_svm'] = predictions_svm
    sell_signals['predicciones_xgboost'] = predictions_xgboost_bool

    return sell_signals

In [None]:
def backtest(data, buy_signals, sell_signals, stop_loss, take_profit, n_shares):
            history = []
            active_operations = []
            cash = 1_000_000
            com = 1.25 / 100

            for i, row in data.iterrows():
                # close active operation
                active_op_temp = []
                for operation in active_operations:
                    if operation["stop_loss"] > row.Close:
                        cash += (row.Close * operation["n_shares"]) * (1 - com)
                    elif operation["take_profit"] < row.Close:
                        cash += (row.Close * operation["n_shares"]) * (1 - com)
                    else:
                        active_op_temp.append(operation)
                active_operations = active_op_temp

                # check if we have enough cash
                if cash < (row.Close * (1 + com)):
                    asset_vals = sum([operation["n_shares"] * row.Close for operation in active_operations])
                    portfolio_value = cash + asset_vals
                    continue

                # Apply buy signals
                if buy_signals.loc[i].any():
                    active_operations.append({
                        "bought": row.Close,
                        "n_shares": n_shares,
                        "stop_loss": row.Close * stop_loss,
                        "take_profit": row.Close * take_profit
                    })

                    cash -= row.Close * (1 + com) * n_shares

                # Apply sell signals
                if sell_signals.loc[i].any():
                    active_op_temp = []
                    for operation in active_operations:
                        if operation["take_profit"] < row.Close or operation["stop_loss"] > row.Close:
                            cash += (row.Close * operation["n_shares"]) * (1 - com)
                        else:
                            active_op_temp.append(operation)
                    active_operations = active_op_temp

                asset_vals = sum([operation["n_shares"] * row.Close for operation in active_operations])
                portfolio_value = cash + asset_vals

            return portfolio_value

In [None]:
def optimize(trial, strategy, data):
    portfolio_value = 0

    stop_loss = trial.suggest_float("stop_loss", 0.80, 0.90)
    take_profit = trial.suggest_float("take_profit", 1.01, 1.10)
    n_shares = trial.suggest_int("n_shares", 20, 50)

    strat_params = {}

    buy_signals = pd.DataFrame()
    sell_signals = pd.DataFrame()

    if "logistic" in strategy:
        buy_signals["logistic"] = global_buy_signals["predicciones_lr"]
        sell_signals["logistic"] = global_sell_signals["predicciones_lr"]
        
    if "svm" in strategy:
        buy_signals["svm"] = global_buy_signals["predicciones_svm"]
        sell_signals["svm"] = global_sell_signals["predicciones_svm"]
        
    if "xg" in strategy:
        buy_signals["xg"] = global_buy_signals["predicciones_xgboost"]
        sell_signals["xg"] = global_sell_signals["predicciones_xgboost"]
    
    return backtest(data, buy_signals, sell_signals, stop_loss, take_profit, n_shares)

In [None]:
def optimize_file(data):
    data = data.drop(data.index[:30])
    data = data.drop(data.index[-30:])
    data.reset_index(drop=True, inplace=True)    
    strategies = list(powerset(["logistic", "svm", "xg"]))
    best_strat = None
    best_val = -1
    best_params = None

    for strat in strategies:
        study = optuna.create_study(direction="maximize")
        study.optimize(lambda x: optimize(x, strat, data), n_trials=20)
        value = study.best_value
        print(f"Best value found so far: {value}")
        if value > best_val:
            best_val = value
            best_strat = strat
            best_params = study.best_params
            print(best_strat)
            print(best_params)

    return {"file": data,
            "strat": best_strat,
            "value": best_val,
            "params": best_params}

In [None]:
#Ahora con los datos de prueba

In [None]:
data_1d_test = pd.read_csv("data/aapl_1d_test.csv")
data_1d_test = data_1d_test.dropna()
dataresult_long_1d_test = file_features(data_1d_test, ds_type="buy")
dataresult_short_1d_test = file_features(data_1d_test, ds_type="sell")
global_buy_signals = buy_signals(dataresult_long_1d_test, logistic_params_1d_long, svm_params_1d_long, xgboost_params_1d_long)
global_sell_signals = sell_signals(dataresult_short_1d_test, logistic_params_1d_short, svm_params_1d_short, xgboost_params_1d_short)
file_1d_test = optimize_file(data_1d_test)

In [None]:
data_1m_test = pd.read_csv("data/aapl_1m_train.csv")
data_1m_test = data_1m_test.dropna()
dataresult_long_1m_test = file_features(data_1m_test, ds_type="buy")
dataresult_short_1m_test = file_features(data_1m_test, ds_type="sell")
global_buy_signals = buy_signals(dataresult_long_1m_test, logistic_params_1m_long, svm_params_1m_long, xgboost_params_1m_long)
global_sell_signals = sell_signals(dataresult_short_1m_test, logistic_params_1m_short, svm_params_1m_short, xgboost_params_1m_short)
file_1m_test = optimize_file(data_1m_test)

In [None]:
data_1h_test = pd.read_csv("data/aapl_1h_test.csv")
data_1h_test = data_1h_test.dropna()
dataresult_long_1h_test = file_features(data_1h_test, ds_type="buy")
dataresult_short_1h_test = file_features(data_1h_test, ds_type="sell")
global_buy_signals = buy_signals(dataresult_long_1h_test, logistic_params_1h_long, svm_params_1h_long, xgboost_params_1h_long)
global_sell_signals = sell_signals(dataresult_short_1h_test, logistic_params_1h_short, svm_params_1h_short, xgboost_params_1h_short)
file_1h_test = optimize_file(data_1h_test)

In [None]:
data_5m_test = pd.read_csv("data/aapl_5m_train.csv")
data_5m_test = data_5m_test.dropna()
dataresult_long_5m_test = file_features(data_5m_test, ds_type="buy")
dataresult_short_5m_test = file_features(data_5m_test, ds_type="sell")
global_buy_signals = buy_signals(dataresult_long_5m_test, logistic_params_5m_long, svm_params_5m_long, xgboost_params_5m_long)
global_sell_signals = sell_signals(dataresult_short_5m_test, logistic_params_5m_short, svm_params_5m_short, xgboost_params_5m_short)
file_5m_test = optimize_file(data_5m_test)