In [1]:
import optuna
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Cargar un dataset de ejemplo
data = datasets.load_breast_cancer()
X = data.data
y = data.target

# Dividir los datos en entrenamiento y validación
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

# Definir la función objetivo
def objective(trial):
    # Sugerir hiperparámetros para optimizar
    lr = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)  # Tasa de aprendizaje
    C = trial.suggest_float('C', 1e-5, 10.0, log=True)  # Parámetro de regularización
    
    # Preprocesar los datos (escalado)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_valid_scaled = scaler.transform(X_valid)

    # Definir el modelo con los hiperparámetros sugeridos
    model = LogisticRegression(C=C, solver='lbfgs', max_iter=1000, random_state=42)
    
    # Entrenar el modelo
    model.fit(X_train_scaled, y_train)
    
    # Evaluar el modelo en el conjunto de validación
    y_pred = model.predict(X_valid_scaled)
    accuracy = accuracy_score(y_valid, y_pred)

    # Retornar la métrica que queremos optimizar (en este caso, la precisión)
    return accuracy

# Crear el estudio de Optuna y realizar la optimización
study = optuna.create_study(direction="maximize")  # Optimizamos para maximizar la precisión
study.optimize(objective, n_trials=100)

# Mostrar los mejores hiperparámetros encontrados
print("Best hyperparameters: ", study.best_params)
print("Best accuracy: ", study.best_value)


[I 2024-09-17 11:33:52,442] A new study created in memory with name: no-name-aa719f12-cde5-4de9-b145-03e19617d2eb
[I 2024-09-17 11:33:52,476] Trial 0 finished with value: 0.9736842105263158 and parameters: {'learning_rate': 0.01117316045925188, 'C': 0.8164449685394375}. Best is trial 0 with value: 0.9736842105263158.
[I 2024-09-17 11:33:52,490] Trial 1 finished with value: 0.8070175438596491 and parameters: {'learning_rate': 0.00013772071225645247, 'C': 0.0003518301073700355}. Best is trial 0 with value: 0.9736842105263158.
[I 2024-09-17 11:33:52,507] Trial 2 finished with value: 0.9649122807017544 and parameters: {'learning_rate': 5.9704864078909555e-05, 'C': 0.014183330949101876}. Best is trial 0 with value: 0.9736842105263158.
[I 2024-09-17 11:33:52,676] Trial 3 finished with value: 0.6228070175438597 and parameters: {'learning_rate': 3.212577654264932e-05, 'C': 4.808255069373159e-05}. Best is trial 0 with value: 0.9736842105263158.
[I 2024-09-17 11:33:52,732] Trial 4 finished with 

Best hyperparameters:  {'learning_rate': 0.008006905143994053, 'C': 0.07777173914815366}
Best accuracy:  0.9824561403508771


In [5]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import optuna
from sklearn.metrics import mean_squared_error

# Simulación de datos de series temporales financieras
def generate_synthetic_data(n_series=100, n_timesteps=120, noise=0.1):
    """
    Genera datos sintéticos de series temporales para simular datos financieros.
    Cada serie es una señal sinusoidal con un poco de ruido.
    """
    X = np.zeros((n_series, n_timesteps))
    y = np.zeros((n_series,))
    
    for i in range(n_series):
        t = np.arange(0, n_timesteps)
        X[i, :] = np.sin(2 * np.pi * t / 25) + noise * np.random.randn(n_timesteps)
        y[i] = X[i, -1] + noise * np.random.randn()  # Predecir el último valor de la serie
    
    return X, y

# Generar los datos de entrenamiento y validación
X_train, y_train = generate_synthetic_data(n_series=800, n_timesteps=104)
X_valid, y_valid = generate_synthetic_data(n_series=200, n_timesteps=104)

# Convertir los datos a tensores
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_valid_tensor = torch.tensor(X_valid, dtype=torch.float32)
y_valid_tensor = torch.tensor(y_valid, dtype=torch.float32)

# Crear DataLoader para alimentar los datos al modelo
train_loader = DataLoader(TensorDataset(X_train_tensor, y_train_tensor), batch_size=32, shuffle=True)
valid_loader = DataLoader(TensorDataset(X_valid_tensor, y_valid_tensor), batch_size=32)

# Definir el modelo PatchTST (simplificado)
class PatchTST(nn.Module):
    def __init__(self, input_size, patch_len, hidden_size):
        super(PatchTST, self).__init__()
        self.patch_len = patch_len
        self.hidden_size = hidden_size
        
        # Definir una red simple con capas lineales
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, 1)
    
    def forward(self, x):
        # Aplicar la red
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Función para calcular MAPE
def MAPE(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Función para calcular sMAPE
def sMAPE(y_true, y_pred):
    return 100 * np.mean(2 * np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)))

# Función para entrenar el modelo
def train_model(model, optimizer, train_loader, valid_loader, n_epochs=10):
    criterion = nn.MSELoss()  # Usamos el MSE como métrica de error
    for epoch in range(n_epochs):
        model.train()
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            output = model(X_batch)
            loss = criterion(output.squeeze(), y_batch)
            loss.backward()
            optimizer.step()

    # Evaluación del modelo en el conjunto de validación
    model.eval()
    preds = []
    targets = []
    with torch.no_grad():
        for X_batch, y_batch in valid_loader:
            output = model(X_batch)
            preds.append(output.squeeze().numpy())
            targets.append(y_batch.numpy())
    
    preds = np.concatenate(preds)
    targets = np.concatenate(targets)
    
    mse = mean_squared_error(targets, preds)
    mape = MAPE(targets, preds)
    smape = sMAPE(targets, preds)
    
    return mse, mape, smape

# Función objetivo para Optuna
def objective(trial):
    # Sugerir hiperparámetros
    patch_len = trial.suggest_int('patch_len', 16, 64)  # Longitud del parche
    hidden_size = trial.suggest_int('hidden_size', 16, 128)  # Tamaño de la capa oculta
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)  # Tasa de aprendizaje

    # Crear el modelo PatchTST con los hiperparámetros sugeridos
    model = PatchTST(input_size=104, patch_len=patch_len, hidden_size=hidden_size)
    
    # Optimizador
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    
    # Entrenar el modelo y obtener el MSE, MAPE y sMAPE en el conjunto de validación
    mse, mape, smape = train_model(model, optimizer, train_loader, valid_loader, n_epochs=10)
    
    # Optimizamos el MSE, pero también podrías optimizar MAPE o sMAPE
    return smape

# Ejecutar la optimización con Optuna
study = optuna.create_study(direction="minimize")  # Queremos minimizar el MSE
study.optimize(objective, n_trials=50)

# Mostrar los mejores hiperparámetros encontrados
print("Best hyperparameters: ", study.best_params)
print("Best validation SMAPE: ", study.best_value)



[I 2024-09-17 11:41:35,260] A new study created in memory with name: no-name-82ce096b-19e4-4638-9f31-57a1dabc3faa
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)  # Tasa de aprendizaje
[I 2024-09-17 11:41:36,030] Trial 0 finished with value: 16.607093811035156 and parameters: {'patch_len': 60, 'hidden_size': 79, 'learning_rate': 6.078138422979291e-05}. Best is trial 0 with value: 16.607093811035156.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)  # Tasa de aprendizaje
[I 2024-09-17 11:41:36,487] Trial 1 finished with value: 16.639289259910583 and parameters: {'patch_len': 20, 'hidden_size': 100, 'learning_rate': 0.005758967690826906}. Best is trial 0 with value: 16.607093811035156.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)  # Tasa de aprendizaje
[I 2024-09-17 11:41:36,947] Trial 2 finished with value: 15.654653310775757 and parameters: {'patch_len': 29, 'hidden_size': 118, 'learning_rate': 0.0010099809854228

Best hyperparameters:  {'patch_len': 19, 'hidden_size': 55, 'learning_rate': 0.0040124225311043375}
Best validation SMAPE:  15.129777789115906
