In [1]:
import os
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, root_mean_squared_error

logical_cores = os.cpu_count()
print(f"Number of logical CPU cores: {logical_cores}")

num_workers = max(1, logical_cores // 2)
print(f"Number of workers set to: {num_workers}")

def is_gpu_available():
    try:
        return torch.cuda.is_available()
    except ImportError:
        return False

gpu_available = is_gpu_available()
print(f"GPU available: {gpu_available}")

Number of logical CPU cores: 16
Number of workers set to: 8
GPU available: True


In [2]:
def load_or_create_ticker_df(csv_file_path):
    """
    Load the existing ticker DataFrame from a CSV file if it exists,
    otherwise create a new DataFrame with predefined column types.
    Ensure the DataFrame has the specified columns, add any missing columns,
    and rearrange the columns in alphabetical order, excluding 'Ticker_Symbol'.

    Args:
    csv_file_path (str): The path to the CSV file.

    Returns:
    pd.DataFrame: The loaded or newly created DataFrame.
    """
    # Define the column types
    column_types = {
        "Ticker_Symbol": str,
        "Best_Cov1D_Classification_Accuracy": float,
        "Best_Cov1D_Classification_Path": str,
        "Best_Cov1D_Regression_RMSE": float,
        "Best_Cov1D_Regression_Path": str,
        "Best_Cov1D_LSTM_Classification_Accuracy": float,
        "Best_Cov1D_LSTM_Classification_Path": str,
        "Best_Cov1D_LSTM_Regression_RMSE": float,
        "Best_Cov1D_LSTM_Regression_Path": str,
        "Best_LSTM_Classification_Accuracy": float,
        "Best_LSTM_Classification_Path": str,
        "Best_LSTM_Regression_RMSE": float,
        "Best_LSTM_Regression_Path": str,
        "Best_XGBClassifier_Classification_Accuracy": float,
        "Best_XGBClassifier_Classification_Path": str,
        "Best_XGBRegressor_Regression_RMSE": float,
        "Best_XGBRegressor_Regression_Path": str
    }


    if os.path.isfile(csv_file_path):
        # Load the existing file into a DataFrame
        ticker_df = pd.read_csv(csv_file_path)
        
        # Ensure all specified columns are present
        for column, dtype in column_types.items():
            if column not in ticker_df.columns:
                ticker_df[column] = pd.Series(dtype=dtype)
        
        # Reorder columns alphabetically, excluding 'Ticker_Symbol'
        columns = ["Ticker_Symbol"] + sorted([col for col in ticker_df.columns if col != "Ticker_Symbol"])
        ticker_df = ticker_df[columns]
    else:
        # Create a new DataFrame with the specified column types
        ticker_df = pd.DataFrame(columns=column_types.keys()).astype(column_types)
    
    return ticker_df

csv_file_path = "../ticker-best-model.csv"
ticker_df = load_or_create_ticker_df(csv_file_path)

In [3]:
os.makedirs('../models/pytorch/conv1d-classification/', exist_ok=True)
os.makedirs('../models/pytorch/conv1d-regression/', exist_ok=True)
os.makedirs('../models/pytorch/lstm-classification/', exist_ok=True)
os.makedirs('../models/pytorch/lstm-regression/', exist_ok=True)
os.makedirs('../models/pytorch/conv1d-classification-lstm-classification/', exist_ok=True)
os.makedirs('../models/pytorch/conv1d-classification-lstm-regression/', exist_ok=True)
os.makedirs('../models/pytorch/conv1d-regression-lstm-classification/', exist_ok=True)
os.makedirs('../models/pytorch/conv1d-regression-lstm-regression/', exist_ok=True)

path = '../data'
ticker_list = []

if os.path.exists(path):
    ticker_list = [os.path.splitext(f)[0] for f in os.listdir(path) if f.endswith('.csv')]


In [4]:
def preprocess_data(df):
    if df.isna().sum().sum() > 0 or df.isin([float('inf'), float('-inf')]).sum().sum() > 0:
        df = df.replace([float('inf'), float('-inf')], float('nan')).dropna()

    df = df.dropna()

    columns_to_drop = [
        'NEXT_DAY_CLOSEPRICE', 'DAILY_CLOSEPRICE_CHANGE', 'CLOSEPRICE_DIRECTION',
        'DAILY_MIDPRICE', 'NEXT_DAY_MIDPRICE', 'DAILY_MIDPRICE_CHANGE', 'MIDPRICE_DIRECTION', 'Date'
    ]
    X = df.drop(columns=columns_to_drop)
    y_classifier = (df['DAILY_CLOSEPRICE_CHANGE'] > 0).astype(int)
    y_regressor = df['DAILY_CLOSEPRICE_CHANGE']

    return X, y_classifier, y_regressor

In [5]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, l2_lambda=0.01, dropout_rate=0.5):
        super(ResidualBlock, self).__init__()
        
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size, stride=1, padding='same')
        self.bn1 = nn.BatchNorm1d(out_channels)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size, stride=1, padding='same')
        self.bn2 = nn.BatchNorm1d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout(dropout_rate)
        
        nn.init.kaiming_normal_(self.conv1.weight, nonlinearity='relu')
        nn.init.kaiming_normal_(self.conv2.weight, nonlinearity='relu')
        nn.init.zeros_(self.conv1.bias)
        nn.init.zeros_(self.conv2.bias)
        
        self.l2_lambda = l2_lambda

        if in_channels != out_channels:
            self.residual_conv = nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=1)
        else:
            self.residual_conv = nn.Identity()

    def forward(self, x):
        residual = self.residual_conv(x)
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += residual
        out = self.relu(out)
        
        return out

class Conv1DModel(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, num_blocks=1, l2_lambda=0.01, dropout_rate=0.5, classification=True):
        super(Conv1DModel, self).__init__()
        self.blocks = nn.Sequential(
            ResidualBlock(in_channels, out_channels, kernel_size, l2_lambda=l2_lambda, dropout_rate=dropout_rate),
            *[ResidualBlock(out_channels, out_channels, kernel_size, l2_lambda=l2_lambda, dropout_rate=dropout_rate) for _ in range(num_blocks-1)]
        )
        self.global_avg_pool = nn.AdaptiveAvgPool1d(1)  # Global average pooling for 1D
        self.fc = nn.Linear(out_channels, 2 if classification else 1)
        self.classification = classification

    def forward(self, x):
        out = self.blocks(x)
        out = self.global_avg_pool(out)
        out = out.view(out.size(0), -1)  # Flatten the tensor
        out = self.fc(out)
        if self.classification:
            out = F.log_softmax(out, dim=1)
        return out

In [6]:
def process_conv1d_classification(X, y, gpu_available, ticker, ticker_df, csv_file_path):
    device = torch.device('cuda' if gpu_available and torch.cuda.is_available() else 'cpu')
    
    # Convert DataFrame to numpy array
    X = X.to_numpy()
    y = y.to_numpy()
    
    # Reshape X for Conv1D
    NUM_CHANNELS = 1
    X = X.reshape((X.shape[0], NUM_CHANNELS, -1))  # Reshape for Conv1D: (batch_size, num_channels, sequence_length)
    
    # Split data into training and validation sets
    TEST_SIZE = 0.2
    RANDOM_STATE = 42
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE)
    
    def conv1d_objective(trial):
        in_channels = X_train.shape[1]
        out_channels = trial.suggest_int('out_channels', 16, 128)
        kernel_size = trial.suggest_int('kernel_size', 3, 7)
        num_blocks = trial.suggest_int('num_blocks', 1, 5)
        l2_lambda = trial.suggest_float('l2_lambda', 1e-5, 1e-2)
        dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
        
        model = Conv1DModel(in_channels, out_channels, kernel_size, num_blocks, l2_lambda, dropout_rate, classification=True).to(device)
        optimizer = optim.Adam(model.parameters(), lr=trial.suggest_float('lr', 1e-5, 1e-2), weight_decay=l2_lambda)
        criterion = nn.CrossEntropyLoss()
        
        input_train = torch.tensor(X_train, dtype=torch.float32).to(device)
        target_train = torch.tensor(y_train, dtype=torch.long).to(device)
        
        model.train()
        EPOCHS = 50
        for epoch in range(EPOCHS):
            optimizer.zero_grad()
            output = model(input_train)
            loss = criterion(output, target_train)
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        input_val = torch.tensor(X_val, dtype=torch.float32).to(device)
        target_val = torch.tensor(y_val, dtype=torch.long).to(device)
        with torch.no_grad():
            val_output = model(input_val)
            val_pred = val_output.argmax(dim=1)
            val_accuracy = accuracy_score(target_val.cpu(), val_pred.cpu())
            return val_accuracy  # Return accuracy directly
    
    study = optuna.create_study(direction='maximize')  # Change direction to 'maximize'
    study.optimize(conv1d_objective, n_trials=100)
    
    best_model = Conv1DModel(X.shape[1], study.best_params['out_channels'], study.best_params['kernel_size'], study.best_params['num_blocks'], study.best_params['l2_lambda'], study.best_params['dropout_rate'], classification=True).to(device)
    model_path = f'../models/pytorch/conv1d-classification/{ticker}.pth'
    
    # Update ticker_df and save the best model
    metric_col = 'Best_Cov1D_Classification_Accuracy'
    path_col = 'Best_Cov1D_Classification_Path'
    
    if ticker in ticker_df['Ticker_Symbol'].values:
        current_score = ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker, metric_col].values[0]
        if pd.isnull(current_score) or study.best_value > current_score:
            ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker, [metric_col, path_col]] = [study.best_value, model_path]
            torch.save(best_model.state_dict(), model_path)
            ticker_df.to_csv(csv_file_path, index=False)
            print(f"Best model for {ticker} saved with accuracy: {study.best_value}")
        else:
            print(f"Previous model accuracy: {current_score} is better for {ticker} than accuracy: {study.best_value}")
    else:
        new_row = pd.DataFrame({'Ticker_Symbol': [ticker], metric_col: [study.best_value], path_col: [model_path]})
        ticker_df = pd.concat([ticker_df, new_row], ignore_index=True)
        torch.save(best_model.state_dict(), model_path)
        ticker_df.to_csv(csv_file_path, index=False)
        print(f"Best model for {ticker} saved with accuracy: {study.best_value}")
    
    return ticker_df

In [7]:
def process_conv1d_regression(X, y, gpu_available, ticker, ticker_df, csv_file_path):
    device = torch.device('cuda' if gpu_available and torch.cuda.is_available() else 'cpu')
    
    # Convert DataFrame to numpy array
    X = X.to_numpy()
    y = y.to_numpy().reshape(-1, 1)
    
    # Reshape X for Conv1D
    NUM_CHANNELS = 1
    X = X.reshape((X.shape[0], NUM_CHANNELS, -1))  # Reshape for Conv1D: (batch_size, num_channels, sequence_length)
    
    # Split data into training and validation sets
    TEST_SIZE = 0.2
    RANDOM_STATE = 42
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE)
    
    def conv1d_objective(trial):
        in_channels = X_train.shape[1]
        out_channels = trial.suggest_int('out_channels', 16, 128)
        kernel_size = trial.suggest_int('kernel_size', 3, 7)
        num_blocks = trial.suggest_int('num_blocks', 1, 5)
        l2_lambda = trial.suggest_float('l2_lambda', 1e-5, 1e-2)
        dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
        
        model = Conv1DModel(in_channels, out_channels, kernel_size, num_blocks, l2_lambda, dropout_rate, classification=False).to(device)
        optimizer = optim.Adam(model.parameters(), lr=trial.suggest_float('lr', 1e-5, 1e-2), weight_decay=l2_lambda)
        criterion = nn.MSELoss()
        
        input_train = torch.tensor(X_train, dtype=torch.float32).to(device)
        target_train = torch.tensor(y_train, dtype=torch.float32).to(device)
        
        model.train()
        EPOCHS = 50
        for epoch in range(EPOCHS):
            optimizer.zero_grad()
            output = model(input_train)
            loss = criterion(output, target_train)
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        input_val = torch.tensor(X_val, dtype=torch.float32).to(device)
        target_val = torch.tensor(y_val, dtype=torch.float32).to(device)
        with torch.no_grad():
            val_output = model(input_val)
            val_pred = val_output.argmax(dim=1)
            val_rmse = root_mean_squared_error(target_val.cpu(), val_pred.cpu())
            return val_rmse
    
    study = optuna.create_study(direction='minimize')
    study.optimize(conv1d_objective, n_trials=100)
    
    best_model = Conv1DModel(X.shape[1], study.best_params['out_channels'], study.best_params['kernel_size'], study.best_params['num_blocks'], study.best_params['l2_lambda'], study.best_params['dropout_rate'], classification=False).to(device)
    model_path = f'../models/pytorch/conv1d-regression/{ticker}.pth'
    
    # Update ticker_df and save the best model
    metric_col = 'Best_Cov1D_Regression_RMSE'
    path_col = 'Best_Cov1D_Regression_Path'
    
    if ticker in ticker_df['Ticker_Symbol'].values:
        current_score = ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker, metric_col].values[0]
        if pd.isnull(current_score) or study.best_value < current_score:
            ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker, [metric_col, path_col]] = [study.best_value, model_path]
            torch.save(best_model.state_dict(), model_path)
            ticker_df.to_csv(csv_file_path, index=False)
            print(f"Best model for {ticker} saved with RMSE: {study.best_value}")
        else:
            print(f"Previous model RMSE: {current_score} is better for {ticker} than RMSE: {study.best_value}")
    else:
        new_row = pd.DataFrame({'Ticker_Symbol': [ticker], metric_col: [study.best_value], path_col: [model_path]})
        ticker_df = pd.concat([ticker_df, new_row], ignore_index=True)
        torch.save(best_model.state_dict(), model_path)
        ticker_df.to_csv(csv_file_path, index=False)
        print(f"Best model for {ticker} saved with RMSE: {study.best_value}")
    
    return ticker_df

In [8]:
for ticker in ticker_list:
    dataframe = pd.read_csv(f"../data/{ticker}.csv")
    X, y_classifier, y_regressor = preprocess_data(dataframe)
    ticker_df = process_conv1d_classification(X, y_classifier, gpu_available, ticker, ticker_df, csv_file_path)
    ticker_df = process_conv1d_regression(X, y_regressor, gpu_available, ticker, ticker_df, csv_file_path)

[I 2024-09-05 19:36:09,358] A new study created in memory with name: no-name-f89b496e-3476-46c6-9770-2e982162a3e6
  return F.conv1d(input, weight, bias, self.stride,
[I 2024-09-05 19:36:11,255] Trial 0 finished with value: 0.6060606060606061 and parameters: {'out_channels': 90, 'kernel_size': 4, 'num_blocks': 3, 'l2_lambda': 0.00938488194533828, 'dropout_rate': 0.19566784636298565, 'lr': 0.006170737203851276}. Best is trial 0 with value: 0.6060606060606061.
[I 2024-09-05 19:36:11,888] Trial 1 finished with value: 0.3939393939393939 and parameters: {'out_channels': 103, 'kernel_size': 7, 'num_blocks': 3, 'l2_lambda': 0.009484191715979648, 'dropout_rate': 0.3170123481473942, 'lr': 0.003630235398214712}. Best is trial 0 with value: 0.6060606060606061.
[I 2024-09-05 19:36:12,374] Trial 2 finished with value: 0.696969696969697 and parameters: {'out_channels': 73, 'kernel_size': 4, 'num_blocks': 3, 'l2_lambda': 0.002114998097442547, 'dropout_rate': 0.2356602801020391, 'lr': 0.001799577388292

Best model for CL=F saved with accuracy: 0.7272727272727273


[I 2024-09-05 19:37:11,267] Trial 0 finished with value: 1.3886516599165026 and parameters: {'out_channels': 60, 'kernel_size': 3, 'num_blocks': 1, 'l2_lambda': 0.0037720234683198493, 'dropout_rate': 0.47739487726435414, 'lr': 0.001337190448802571}. Best is trial 0 with value: 1.3886516599165026.
[I 2024-09-05 19:37:11,905] Trial 1 finished with value: 1.3886516599165026 and parameters: {'out_channels': 50, 'kernel_size': 3, 'num_blocks': 5, 'l2_lambda': 0.005998782788062697, 'dropout_rate': 0.12472895319909894, 'lr': 0.006204676629804238}. Best is trial 0 with value: 1.3886516599165026.
[I 2024-09-05 19:37:12,500] Trial 2 finished with value: 1.3886516599165026 and parameters: {'out_channels': 56, 'kernel_size': 7, 'num_blocks': 4, 'l2_lambda': 0.007375691298647212, 'dropout_rate': 0.14658434170747278, 'lr': 0.0009562214372706208}. Best is trial 0 with value: 1.3886516599165026.
[I 2024-09-05 19:37:13,290] Trial 3 finished with value: 1.3886516599165026 and parameters: {'out_channels'

Best model for CL=F saved with RMSE: 1.3886516599165026


[I 2024-09-05 19:38:08,781] Trial 0 finished with value: 0.5454545454545454 and parameters: {'out_channels': 34, 'kernel_size': 6, 'num_blocks': 1, 'l2_lambda': 0.009480174911929626, 'dropout_rate': 0.4293606422793995, 'lr': 0.004617893774911781}. Best is trial 0 with value: 0.5454545454545454.
[I 2024-09-05 19:38:09,418] Trial 1 finished with value: 0.45454545454545453 and parameters: {'out_channels': 26, 'kernel_size': 5, 'num_blocks': 5, 'l2_lambda': 0.006116043730032924, 'dropout_rate': 0.19815873569403097, 'lr': 0.006365882151998594}. Best is trial 0 with value: 0.5454545454545454.
[I 2024-09-05 19:38:10,416] Trial 2 finished with value: 0.45454545454545453 and parameters: {'out_channels': 91, 'kernel_size': 7, 'num_blocks': 5, 'l2_lambda': 0.0019838959344792577, 'dropout_rate': 0.1566794897336052, 'lr': 0.007643242857355498}. Best is trial 0 with value: 0.5454545454545454.
[I 2024-09-05 19:38:11,077] Trial 3 finished with value: 0.45454545454545453 and parameters: {'out_channels'

Best model for NVDA saved with accuracy: 0.6060606060606061


[I 2024-09-05 19:38:50,359] Trial 0 finished with value: 4.167355508960071 and parameters: {'out_channels': 74, 'kernel_size': 6, 'num_blocks': 4, 'l2_lambda': 0.006123108710094324, 'dropout_rate': 0.1438130465260845, 'lr': 0.009875838117096226}. Best is trial 0 with value: 4.167355508960071.
[I 2024-09-05 19:38:50,868] Trial 1 finished with value: 4.167355508960071 and parameters: {'out_channels': 79, 'kernel_size': 5, 'num_blocks': 3, 'l2_lambda': 0.0026067017004195053, 'dropout_rate': 0.11433277705990173, 'lr': 0.006698920936506528}. Best is trial 0 with value: 4.167355508960071.
[I 2024-09-05 19:38:51,404] Trial 2 finished with value: 4.167355508960071 and parameters: {'out_channels': 43, 'kernel_size': 3, 'num_blocks': 4, 'l2_lambda': 0.004487239534762522, 'dropout_rate': 0.26573389406988, 'lr': 0.0038001567645420033}. Best is trial 0 with value: 4.167355508960071.
[I 2024-09-05 19:38:51,757] Trial 3 finished with value: 4.167355508960071 and parameters: {'out_channels': 75, 'kern

Best model for NVDA saved with RMSE: 4.167355508960071


[I 2024-09-05 19:39:43,289] Trial 0 finished with value: 0.5142857142857142 and parameters: {'out_channels': 33, 'kernel_size': 4, 'num_blocks': 2, 'l2_lambda': 0.00010494191627591149, 'dropout_rate': 0.3651159478586631, 'lr': 0.0015131229746867493}. Best is trial 0 with value: 0.5142857142857142.
[I 2024-09-05 19:39:43,749] Trial 1 finished with value: 0.4857142857142857 and parameters: {'out_channels': 71, 'kernel_size': 4, 'num_blocks': 3, 'l2_lambda': 0.008802565886574503, 'dropout_rate': 0.4257006812509895, 'lr': 0.008923429297057237}. Best is trial 0 with value: 0.5142857142857142.
[I 2024-09-05 19:39:43,991] Trial 2 finished with value: 0.42857142857142855 and parameters: {'out_channels': 41, 'kernel_size': 5, 'num_blocks': 1, 'l2_lambda': 0.0009829539217230154, 'dropout_rate': 0.3473195767606292, 'lr': 0.009278376703864678}. Best is trial 0 with value: 0.5142857142857142.
[I 2024-09-05 19:39:44,934] Trial 3 finished with value: 0.5142857142857142 and parameters: {'out_channels'

Best model for SGDUSD=X saved with accuracy: 0.7428571428571429


[I 2024-09-05 19:40:53,777] Trial 0 finished with value: 0.0016006725511762895 and parameters: {'out_channels': 114, 'kernel_size': 7, 'num_blocks': 2, 'l2_lambda': 0.005547591418410655, 'dropout_rate': 0.2090267487015665, 'lr': 0.0014721290256535877}. Best is trial 0 with value: 0.0016006725511762895.
[I 2024-09-05 19:40:54,800] Trial 1 finished with value: 0.0016006725511762895 and parameters: {'out_channels': 121, 'kernel_size': 3, 'num_blocks': 5, 'l2_lambda': 0.0045739656058043695, 'dropout_rate': 0.30965523120701954, 'lr': 0.006722381518455283}. Best is trial 0 with value: 0.0016006725511762895.
[I 2024-09-05 19:40:55,903] Trial 2 finished with value: 0.0016006725511762895 and parameters: {'out_channels': 101, 'kernel_size': 6, 'num_blocks': 5, 'l2_lambda': 0.005888538639454044, 'dropout_rate': 0.18266863178224582, 'lr': 0.00023525546244806253}. Best is trial 0 with value: 0.0016006725511762895.
[I 2024-09-05 19:40:56,148] Trial 3 finished with value: 0.0016006725511762895 and pa

Best model for SGDUSD=X saved with RMSE: 0.0016006725511762895


[I 2024-09-05 19:42:04,339] Trial 0 finished with value: 0.5142857142857142 and parameters: {'out_channels': 48, 'kernel_size': 7, 'num_blocks': 5, 'l2_lambda': 0.008208250328173836, 'dropout_rate': 0.22306701456352657, 'lr': 0.008056436894204502}. Best is trial 0 with value: 0.5142857142857142.
[I 2024-09-05 19:42:04,939] Trial 1 finished with value: 0.4857142857142857 and parameters: {'out_channels': 71, 'kernel_size': 6, 'num_blocks': 4, 'l2_lambda': 0.0015962864513771955, 'dropout_rate': 0.3428457699764299, 'lr': 0.0062502796425669286}. Best is trial 0 with value: 0.5142857142857142.
[I 2024-09-05 19:42:05,164] Trial 2 finished with value: 0.5142857142857142 and parameters: {'out_channels': 88, 'kernel_size': 6, 'num_blocks': 1, 'l2_lambda': 0.0022850555161759295, 'dropout_rate': 0.33972610237677486, 'lr': 0.0052092818262931}. Best is trial 0 with value: 0.5142857142857142.
[I 2024-09-05 19:42:05,679] Trial 3 finished with value: 0.45714285714285713 and parameters: {'out_channels':

Previous model accuracy: 0.6857142857142857 is better for USDSGD=X than accuracy: 0.6285714285714286


[I 2024-09-05 19:43:16,726] Trial 0 finished with value: 0.002848010049792017 and parameters: {'out_channels': 43, 'kernel_size': 3, 'num_blocks': 3, 'l2_lambda': 0.006727748404102566, 'dropout_rate': 0.1673177581815155, 'lr': 0.0067136842774747916}. Best is trial 0 with value: 0.002848010049792017.
[I 2024-09-05 19:43:17,524] Trial 1 finished with value: 0.002848010049792017 and parameters: {'out_channels': 52, 'kernel_size': 6, 'num_blocks': 5, 'l2_lambda': 0.0023500522869028557, 'dropout_rate': 0.4267168519429475, 'lr': 0.009613930228136261}. Best is trial 0 with value: 0.002848010049792017.
[I 2024-09-05 19:43:17,859] Trial 2 finished with value: 0.002848010049792017 and parameters: {'out_channels': 19, 'kernel_size': 5, 'num_blocks': 2, 'l2_lambda': 0.007437917916081833, 'dropout_rate': 0.2889964641933198, 'lr': 0.0002239672112942915}. Best is trial 0 with value: 0.002848010049792017.
[I 2024-09-05 19:43:18,201] Trial 3 finished with value: 0.002848010049792017 and parameters: {'o

Best model for USDSGD=X saved with RMSE: 0.002848010049792017


In [None]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, sequence_length, classification=True):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.sequence_length = sequence_length
        self.classification = classification

    def forward(self, x):
        h0 = torch.zeros(self.lstm.num_layers, x.size(0), self.lstm.hidden_size).to(x.device)
        c0 = torch.zeros(self.lstm.num_layers, x.size(0), self.lstm.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        if self.classification:
            out = F.log_softmax(out, dim=1)
        return out

def process_lstm(X, y, classification, gpu_available, ticker):
    device = torch.device('cuda' if gpu_available and torch.cuda.is_available() else 'cpu')

    # Convert DataFrame to numpy array
    X = X.to_numpy()
    y = y.to_numpy()
  
    # Reshape X for LSTM
    X = X.reshape((X.shape[0], -1, 1))  # Reshape for LSTM: (batch_size, sequence_length, num_features)
    
    # Split data into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    
    input_val = torch.tensor(X_val, dtype=torch.float32).to(device)
    target_val = torch.tensor(y_val, dtype=torch.long if classification else torch.float32).to(device)
    
    def lstm_objective(trial):
        input_size = X.shape[2]
        hidden_size = trial.suggest_int('hidden_size', 16, 128)
        num_layers = trial.suggest_int('num_layers', 1, 3)
        sequence_length = X.shape[1]
        
        model = LSTMModel(input_size, hidden_size, num_layers, 2 if classification else 1, sequence_length, classification).to(device)
        
        optimizer = optim.Adam(model.parameters(), lr=trial.suggest_float('lr', 1e-5, 1e-2))
        criterion = nn.CrossEntropyLoss() if classification else nn.MSELoss()
        
        input_train = torch.tensor(X_train, dtype=torch.float32).to(device)
        target_train = torch.tensor(y_train, dtype=torch.long if classification else torch.float32).to(device)
        
        model.train()
        for epoch in range(10):
            optimizer.zero_grad()
            output = model(input_train)
            loss = criterion(output, target_train)
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        with torch.no_grad():
            val_output = model(input_val)
            val_loss = criterion(val_output, target_val)
            val_accuracy = (val_output.argmax(dim=1) == target_val).float().mean().item()
            val_mse =val_mse = val_loss.item()
            print(f'Trial {trial.number}: Validation Accuracy = {val_accuracy:.4f}, Validation MSE = {val_mse:.4f}, Validation Loss = {val_loss.item():.4f}')
            return 1 - accuracy if classification else mse
    
    study = optuna.create_study(direction='minimize')
    study.optimize(lstm_objective, n_trials=100)
    
    best_model = LSTMModel(X.shape[2], study.best_params['hidden_size'], study.best_params['num_layers'], 2 if classification else 1, X.shape[1], classification).to(device)
    
    # Save the best model
    model_type = 'classification' if classification else 'regression'
    torch.save(best_model.state_dict(), f'../models/pytorch/lstm-{model_type}/{ticker}.pth')
    
    # Print out metrics for the best model
    best_model.eval()
    with torch.no_grad():
        val_output = best_model(input_val)
        val_loss = criterion(val_output, target_val)
        val_accuracy = (val_output.argmax(dim=1) == target_val).float().mean().item()
        val_mse =val_mse = val_loss.item()
        print(f'Validation Accuracy: {val_accuracy:.4f}')
        print(f'Validation MSE: {val_mse:.4f}')
  

In [7]:
class ConvLSTMModel(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, lstm_input_size, lstm_hidden_size, lstm_num_layers, output_size, classification=True):
        super(ConvLSTMModel, self).__init__()
        self.conv1d = nn.Conv1d(in_channels, out_channels, kernel_size)
        self.lstm = nn.LSTM(lstm_input_size, lstm_hidden_size, lstm_num_layers, batch_first=True)
        self.fc = nn.Linear(lstm_hidden_size, output_size)
        self.classification = classification

    def forward(self, x):
        x = self.conv1d(x)
        x = x.permute(0, 2, 1)  # Change shape to (batch_size, sequence_length, num_channels)
        x, _ = self.lstm(x)
        x = self.fc(x[:, -1, :])
        return x

def process_conv1dlstm(X, y_classifier, y_regressor, conv1d_classification, lstm_classification, gpu_available, ticker):
    device = torch.device('cuda' if gpu_available and torch.cuda.is_available() else 'cpu')
    
    # Convert DataFrame to numpy array
    X = X.to_numpy()
    y_classifier = y_classifier.to_numpy()
    y_regressor = y_regressor.to_numpy()
    
    # Reshape X for ConvLSTM
    X = X.reshape((X.shape[0], 1, X.shape[1]))  # Reshape for Conv1D: (batch_size, num_channels, sequence_length)
    
    # Split data into training and validation sets
    y = y_classifier if conv1d_classification else y_regressor
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    
    input_val = torch.tensor(X_val, dtype=torch.float32).to(device)
    target_val = torch.tensor(y_val, dtype=torch.long if conv1d_classification else torch.float32).to(device)

    criterion = nn.CrossEntropyLoss() if conv1d_classification else nn.MSELoss()
    
    def conv_lstm_objective(trial):
        in_channels = X.shape[1]
        out_channels = trial.suggest_int('out_channels', 16, 128)
        kernel_size = trial.suggest_int('kernel_size', 3, 7)
        lstm_hidden_size = trial.suggest_int('lstm_hidden_size', 16, 128)
        lstm_num_layers = trial.suggest_int('lstm_num_layers', 1, 3)
        output_size = len(set(y_classifier)) if conv1d_classification else 1  # Number of classes for classification
        
        model = ConvLSTMModel(in_channels, out_channels, kernel_size, out_channels, lstm_hidden_size, lstm_num_layers, output_size, classification=conv1d_classification).to(device)
        
        optimizer = optim.Adam(model.parameters(), lr=trial.suggest_float('lr', 1e-5, 1e-2))
        criterion = nn.CrossEntropyLoss() if conv1d_classification else nn.MSELoss()
        
        input_train = torch.tensor(X_train, dtype=torch.float32).to(device)
        target_train = torch.tensor(y_train, dtype=torch.long if conv1d_classification else torch.float32).to(device)
        
        # Training loop
        model.train()
        for epoch in range(10):
            optimizer.zero_grad()
            output = model(input_train)
            loss = criterion(output, target_train)
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        with torch.no_grad():
            val_output = model(input_val)
            val_loss = criterion(val_output, target_val)
            val_accuracy = (val_output.argmax(dim=1) == target_val).float().mean().item()
            val_mse =val_mse = val_loss.item()
            print(f'Trial {trial.number}: Validation Accuracy = {val_accuracy:.4f}, Validation MSE = {val_mse:.4f}, Validation Loss = {val_loss.item():.4f}')
            return val_loss.item()
    
    study = optuna.create_study(direction='minimize')
    study.optimize(conv_lstm_objective, n_trials=100)
    
    best_params = study.best_params
    best_model = ConvLSTMModel(X.shape[1], best_params['out_channels'], best_params['kernel_size'], best_params['out_channels'], best_params['lstm_hidden_size'], best_params['lstm_num_layers'], len(set(y_classifier)) if conv1d_classification else 1, classification=conv1d_classification).to(device)
    
    # Save the best model
    conv_model_type = 'classification' if conv1d_classification else 'regression'
    lstm_model_type = 'classification' if lstm_classification else 'regression'
    torch.save(best_model.state_dict(), f'../models/pytorch/conv1d-{conv_model_type}-lstm-{lstm_model_type}/{ticker}.pth')
    
    # Evaluate the best model on the validation set
    best_model.eval()
    with torch.no_grad():
        val_output = best_model(input_val)
        val_loss = criterion(val_output, target_val)
        val_accuracy = (val_output.argmax(dim=1) == target_val).float().mean().item()
        val_mse =val_mse = val_loss.item()
        print(f'Validation Accuracy: {val_accuracy:.4f}')
        print(f'Validation MSE: {val_mse:.4f}')