In [1]:
import os
import numpy as np
import pandas as pd
import xgboost
import xgboost as xgb
import optuna
from xgboost import XGBClassifier, XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, root_mean_squared_error 
import json


import joblib

logical_cores = os.cpu_count()
print(f"Number of logical CPU cores: {logical_cores}")

num_workers = max(1, logical_cores // 2)
print(f"Number of workers set to: {num_workers}")

def is_gpu_available():
    try:
        import torch
        return torch.cuda.is_available()
    except ImportError:
        return False

gpu_available = is_gpu_available()
print(f"GPU available: {gpu_available}")

print(xgboost.build_info())

Number of logical CPU cores: 16
Number of workers set to: 8
GPU available: True
{'BUILTIN_PREFETCH_PRESENT': False, 'CUDA_VERSION': [12, 4], 'DEBUG': False, 'MM_PREFETCH_PRESENT': True, 'THRUST_VERSION': [2, 3, 2], 'USE_CUDA': True, 'USE_DLOPEN_NCCL': False, 'USE_FEDERATED': False, 'USE_NCCL': False, 'USE_OPENMP': True, 'USE_RMM': False, 'libxgboost': 'C:\\Users\\ng_mi\\Anaconda\\envs\\pytorch-gpu\\Lib\\site-packages\\xgboost\\lib\\xgboost.dll'}


In [2]:
os.makedirs('../feature-importances/xbclassifier', exist_ok=True)
os.makedirs('../feature-importances/xbregressor', exist_ok=True)
os.makedirs('../models/hyperparameters-search-models/xgboost/xbclassifier', exist_ok=True)
os.makedirs('../models/hyperparameters-search-models/xgboost/xbregressor', exist_ok=True)
os.makedirs('../models/trained-models/xgboost/xbclassifier', exist_ok=True)
os.makedirs('../models/trained-models/xgboost/xbregressor', exist_ok=True)
os.makedirs('../models/best-hyperparameters/xgboost/xbclassifier', exist_ok=True)
os.makedirs('../models/best-hyperparameters/xgboost/xbregressor', exist_ok=True)


path = '../data/train'

ticker_list = []

if os.path.exists(path):
    ticker_list = [os.path.splitext(f)[0] for f in os.listdir(path) if f.endswith('.csv')]


In [3]:
def load_or_create_ticker_df(csv_file_path):
    """
    Load the existing ticker DataFrame from a CSV file if it exists,
    otherwise create a new DataFrame with predefined column types.
    Ensure the DataFrame has the specified columns, add any missing columns,
    and rearrange the columns in alphabetical order, excluding 'Ticker_Symbol'.

    Args:
    csv_file_path (str): The path to the CSV file.

    Returns:
    pd.DataFrame: The loaded or newly created DataFrame.
    """
    # Define the column types
    column_types = {
        "Ticker_Symbol": str,
        "Best_Cov1D_Classification_Accuracy": float,
        "Best_Cov1D_Classification_Path": str,
        "Best_Cov1D_Regression_RMSE": float,
        "Best_Cov1D_Regression_Path": str,
        "Best_LSTM_Classification_Accuracy": float,
        "Best_LSTM_Classification_Path": str,
        "Best_LSTM_Regression_RMSE": float,
        "Best_LSTM_Regression_Path": str,
        "Best_Transformer_Classification_Accuracy": float,
        "Best_Transformer_Classification_Path": str,
        "Best_Transformer_Regression_RMSE": float,
        "Best_Transformer_Regression_Path": str,
        "Best_XGBClassifier_Classification_Accuracy": float,
        "Best_XGBClassifier_Classification_Path": str,
        "Best_XGBRegressor_Regression_RMSE": float,
        "Best_XGBRegressor_Regression_Path": str
    }


    if os.path.isfile(csv_file_path):
        # Load the existing file into a DataFrame
        ticker_df = pd.read_csv(csv_file_path)
        
        # Ensure all specified columns are present
        for column, dtype in column_types.items():
            if column not in ticker_df.columns:
                ticker_df[column] = pd.Series(dtype=dtype)
        
        # Reorder columns alphabetically, excluding 'Ticker_Symbol'
        columns = ["Ticker_Symbol"] + sorted([col for col in ticker_df.columns if col != "Ticker_Symbol"])
        ticker_df = ticker_df[columns]
    else:
        # Create a new DataFrame with the specified column types
        ticker_df = pd.DataFrame(columns=column_types.keys()).astype(column_types)
    
    return ticker_df

In [4]:
def delete_hyperparameter_search_model(ticker_symbol):
    csv_path = f'../models/hyperparameters-search-models/ticker-all-models-best-hyperparameters-list.csv'
    xbclassifier_model_path = f'../models/hyperparameters-search-models/xgboost/xbclassifier/{ticker_symbol}.pkl'
    xbregressor_model_path = f'../models/hyperparameters-search-models/xgboost/xbregressor/{ticker_symbol}.pkl'
    xbclassifier_params_path = f'../models/best-hyperparameters/xgboost/xbclassifier/{ticker_symbol}.json'
    xbregressor_params_path = f'../models/best-hyperparameters/xgboost/xbregressor/{ticker_symbol}.json'

    # Delete model files if they exist
    if os.path.isfile(xbclassifier_model_path):
        os.remove(xbclassifier_model_path)
        print(f"Deleted {xbclassifier_model_path}")
    if os.path.isfile(xbregressor_model_path):
        os.remove(xbregressor_model_path)
        print(f"Deleted {xbregressor_model_path}")
    if os.path.isfile(xbclassifier_params_path):
        os.remove(xbclassifier_params_path)
        print(f"Deleted {xbclassifier_params_path}")
    if os.path.isfile(xbregressor_params_path):
        os.remove(xbregressor_params_path)
        print(f"Deleted {xbregressor_params_path}")

    # Delete the ticker_symbol row from the CSV file if it exists
    if os.path.isfile(csv_path):
        ticker_df = pd.read_csv(csv_path)
        if ticker_symbol in ticker_df['Ticker_Symbol'].values:
            ticker_df = ticker_df[ticker_df['Ticker_Symbol'] != ticker_symbol]
            ticker_df.to_csv(csv_path, index=False)
            print(f"Deleted {ticker_symbol} from {csv_path}")
        else:
            print(f"{ticker_symbol} not found in {csv_path}")
    else:
        print(f"{csv_path} does not exist")


In [5]:
def preprocess_data(df):
    if df.isna().sum().sum() > 0 or df.isin([float('inf'), float('-inf')]).sum().sum() > 0:
        df = df.replace([float('inf'), float('-inf')], float('nan')).dropna()

    df = df.dropna()

    columns_to_drop = [
        'NEXT_DAY_CLOSEPRICE', 'DAILY_CLOSEPRICE_CHANGE', 'CLOSEPRICE_DIRECTION',
        'DAILY_MIDPRICE', 'NEXT_DAY_MIDPRICE', 'DAILY_MIDPRICE_CHANGE', 'MIDPRICE_DIRECTION', 'Date'
    ]
    X = df.drop(columns=columns_to_drop)
    y_classifier = (df['DAILY_CLOSEPRICE_CHANGE'] > 0).astype(int)
    y_regressor = df['DAILY_CLOSEPRICE_CHANGE']

    return X, y_classifier, y_regressor

In [6]:
def xbclassifier_hyperparameters_search(X, y, gpu_available, ticker_symbol):
    def objective(trial):
        param = {
            'verbosity': 0,
            'objective': 'binary:logistic',
            'eval_metric': 'logloss',
            'tree_method': 'hist',
            'device': 'cuda' if gpu_available else 'cpu',  
            'use_label_encoder': False,
            'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
            'max_depth': trial.suggest_int('max_depth', 3, 10),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
            'subsample': trial.suggest_float('subsample', 0.5, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
            'gamma': trial.suggest_float('gamma', 0, 5),
            'lambda': trial.suggest_float('lambda', 1e-8, 1.0, log=True),  # Adjusting range
            'alpha': trial.suggest_float('alpha', 1e-8, 1.0, log=True),     # Adjusting range
            'early_stopping_rounds': 10 
        }

        X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)
        model = XGBClassifier(**param)
        model.fit(X_train, y_train, eval_set=[(X_valid, y_valid)], verbose=False)
        preds = model.predict(X_valid)
        accuracy = accuracy_score(y_valid, preds)
        return accuracy

    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=100)

    best_model = XGBClassifier(**study.best_params)
    best_model.fit(X, y, eval_set=[(X, y)], verbose=False)

    model_path = f'../models/hyperparameters-search-models/xgboost/xbclassifier/{ticker_symbol}.pkl'
    csv_path = f'../models/hyperparameters-search-models/ticker-all-models-best-hyperparameters-list.csv'
    params_path = f'../models/best-hyperparameters/xgboost/xbclassifier/{ticker_symbol}.json'

    ticker_df = load_or_create_ticker_df(csv_path)

    # Update ticker_df
    if ticker_symbol in ticker_df['Ticker_Symbol'].values:
        current_score = ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker_symbol, 'Best_XGBClassifier_Classification_Accuracy'].values[0]
        if pd.isnull(current_score) or study.best_value > current_score:
            ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker_symbol, ['Best_XGBClassifier_Classification_Accuracy', 'Best_XGBClassifier_Classification_Path']] = [study.best_value, model_path]
            joblib.dump(best_model, model_path)
            ticker_df.to_csv(csv_path, index=False)
            print(f"Model for {ticker_symbol} saved with accuracy: {study.best_value}")

            with open(params_path, 'w') as f:
                json.dump(study.best_params, f)
            print(f"parameters for {ticker_symbol} saved to {params_path}")
            
             # Save feature importances
            feature_importances = pd.DataFrame(best_model.feature_importances_, index=X.columns, columns=['importance'])
            feature_importances = feature_importances.sort_values(by='importance', ascending=False)
            feature_importances.to_csv(f'../feature-importances/xbclassifier/{ticker_symbol}_feature_importances.csv')
            print(f"Feature importances for {ticker_symbol} saved.")
        else:
            print(f"Previous model accuracy: {current_score} is better for {ticker_symbol} than accuracy: {study.best_value}")
    else:
        new_row = pd.DataFrame({'Ticker_Symbol': [ticker_symbol], 'Best_XGBClassifier_Classification_Accuracy': [study.best_value], 'Best_XGBClassifier_Classification_Path': [model_path]})
        ticker_df = pd.concat([ticker_df, new_row], ignore_index=True)
        joblib.dump(best_model, model_path)
        ticker_df.to_csv(csv_path, index=False)
        print(f"Model for {ticker_symbol} saved with accuracy: {study.best_value}")

        with open(params_path, 'w') as f:
                json.dump(study.best_params, f)
        print(f"Parameters for {ticker_symbol} saved to {params_path}")
        
        # Save feature importances
        feature_importances = pd.DataFrame(best_model.feature_importances_, index=X.columns, columns=['importance'])
        feature_importances = feature_importances.sort_values(by='importance', ascending=False)
        feature_importances.to_csv(f'../feature-importances/xbclassifier/{ticker_symbol}_feature_importances.csv')
        print(f"Feature importances for {ticker_symbol} saved.")

def xbclassifier_resume_training(X, y, gpu_available, ticker_symbol, hyperparameter_search = False):
    hyperparameters_search_model_path = f'../models/hyperparameters-search-models/xgboost/xbclassifier/{ticker_symbol}.pkl'
    full_trained_model_path = f'../models/trained-models/xgboost/xbclassifier/{ticker_symbol}.pkl'
    params_path = f'../models/best-hyperparameters/xgboost/xbclassifier/{ticker_symbol}.json'
    csv_path = f'../models/trained-models/ticker-all-models-full-trained-list.csv'

    if hyperparameter_search:
        xbclassifier_hyperparameters_search(X, y, gpu_available, ticker_symbol)

    # Check if the model exists
    if os.path.exists(hyperparameters_search_model_path) and os.path.exists(params_path):
        print(f"Loaded existing model for {ticker_symbol}.")
        with open(params_path, 'r') as f:
            best_params = json.load(f)
    else:
        print(f"No existing model found for {ticker_symbol}. Initiating hyperparameter search.")
        xbclassifier_hyperparameters_search(X, y, gpu_available, ticker_symbol)
        with open(params_path, 'r') as f:
            best_params = json.load(f)

    # Split the data
    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

    param = {
        'verbosity': 0,
        'objective': 'binary:logistic',
        'eval_metric': 'logloss',
        'tree_method': 'hist',
        'device': 'cuda' if gpu_available else 'cpu',  
        'use_label_encoder': False,
        'n_estimators': best_params['n_estimators'],
        'max_depth': best_params['max_depth'],
        'learning_rate': best_params['learning_rate'],
        'subsample': best_params['subsample'],
        'colsample_bytree': best_params['colsample_bytree'],
        'gamma': best_params['gamma'],
        'lambda': best_params['lambda'],
        'alpha': best_params['alpha'],
        'early_stopping_rounds': 10
    }

    best_model = XGBClassifier(**param)
    best_model.fit(X_train, y_train, eval_set=[(X_valid, y_valid)], verbose=False)
    preds = best_model.predict(X_valid)
    accuracy = accuracy_score(y_valid, preds)
    
    print(f"Validation accuracy for {ticker_symbol}: {accuracy:.4f}")
                  
    ticker_df = load_or_create_ticker_df(csv_path)

    if ticker_symbol in ticker_df['Ticker_Symbol'].values:
        ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker_symbol, ['Best_XGBClassifier_Classification_Accuracy', 'Best_XGBClassifier_Classification_Path']] = [accuracy, full_trained_model_path]
    else:
        new_row = pd.DataFrame({'Ticker_Symbol': [ticker_symbol], 'Best_XGBClassifier_Classification_Accuracy': [accuracy], 'Best_XGBClassifier_Classification_Path': [full_trained_model_path]})
        ticker_df = pd.concat([ticker_df, new_row], ignore_index=True)
                             
    joblib.dump(best_model, full_trained_model_path)
    print(f"Model for {ticker_symbol} saved with accuracy: {accuracy:.4f}")
    ticker_df.to_csv(csv_path, index=False)
        
    # Save feature importances
    feature_importances = pd.DataFrame(best_model.feature_importances_, index=X.columns, columns=['importance'])
    feature_importances = feature_importances.sort_values(by='importance', ascending=False)
    feature_importances.to_csv(f'../feature-importances/xbclassifier/{ticker_symbol}_feature_importances.csv')
    print(f"Feature importances for {ticker_symbol} saved.")
   

In [7]:
def xbregressor_hyperparameters_search(X, y, gpu_available, ticker_symbol):
    def objective(trial):
        param = {
            'verbosity': 0,
            'objective': 'reg:squarederror',
            'eval_metric': 'rmse',
            'tree_method': 'hist',
            'device': 'cuda' if gpu_available else 'cpu',  
            'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
            'max_depth': trial.suggest_int('max_depth', 3, 10),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
            'subsample': trial.suggest_float('subsample', 0.5, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
            'gamma': trial.suggest_float('gamma', 0, 5),
            'lambda': trial.suggest_float('lambda', 1e-8, 1.0, log=True),  # Adjusting range
            'alpha': trial.suggest_float('alpha', 1e-8, 1.0, log=True),     # Adjusting range
            'early_stopping_rounds': 10 
        }

        X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)
        model = XGBRegressor(**param)
        model.fit(X_train, y_train, eval_set=[(X_valid, y_valid)], verbose=False)
        preds = model.predict(X_valid)
        rmse = root_mean_squared_error (y_valid, preds)
        return rmse

    study = optuna.create_study(direction='minimize')
    study.optimize(objective, n_trials=100)

    best_params = study.best_params
    best_params.update(study.best_trial.params)

    best_model = XGBRegressor(**best_params)
    best_model.fit(X, y, eval_set=[(X, y)], verbose=False)

    model_path = f'../models/hyperparameters-search-models/xgboost/xbregressor/{ticker_symbol}.pkl'
    params_path = f'../models/best-hyperparameters/xgboost/xbregressor/{ticker_symbol}.json'
    csv_path = f'../models/hyperparameters-search-models/ticker-all-models-best-hyperparameters-list.csv'

    ticker_df = load_or_create_ticker_df(csv_path)

    # Update ticker_df
    if ticker_symbol in ticker_df['Ticker_Symbol'].values:
        current_score = ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker_symbol, 'Best_XGBRegressor_Regression_RMSE'].values[0]
        if pd.isnull(current_score) or study.best_value < current_score:
            ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker_symbol, ['Best_XGBRegressor_Regression_RMSE', 'Best_XGBRegressor_Regression_Path']] = [study.best_value, model_path]
            joblib.dump(best_model, model_path)
            ticker_df.to_csv(csv_path, index=False)
            print(f"Best model for {ticker_symbol} saved with RMSE: {study.best_value}")

            with open(params_path, 'w') as f:
                json.dump(study.best_params, f)
            print(f"parameters for {ticker_symbol} saved to {params_path}")

            # Save feature importances
            feature_importances = pd.DataFrame(best_model.feature_importances_, index=X.columns, columns=['importance'])
            feature_importances = feature_importances.sort_values(by='importance', ascending=False)
            feature_importances.to_csv(f'../feature-importances/xbregressor/{ticker_symbol}_feature_importances.csv')
            print(f"Feature importances for {ticker_symbol} saved.")
        else:
            print(f"Previous model MSE: {current_score} is better for {ticker_symbol} than MSE: {study.best_value}")
    else:
        new_row = pd.DataFrame({'Ticker_Symbol': [ticker_symbol], 'Best_XGBRegressor_Regression_RMSE': [study.best_value], 'Best_XGBRegressor_Regression_Path': [model_path]})
        ticker_df = pd.concat([ticker_df, new_row], ignore_index=True)
        joblib.dump(best_model, model_path)
        ticker_df.to_csv(csv_path, index=False)
        print(f"Best model for {ticker_symbol} saved with RMSE: {study.best_value}")

        with open(params_path, 'w') as f:
                json.dump(study.best_params, f)
        print(f"parameters for {ticker_symbol} saved to {params_path}")
        
        # Save feature importances
        feature_importances = pd.DataFrame(best_model.feature_importances_, index=X.columns, columns=['importance'])
        feature_importances = feature_importances.sort_values(by='importance', ascending=False)
        feature_importances.to_csv(f'../feature-importances/xbregressor/{ticker_symbol}_feature_importances.csv')
        print(f"Feature importances for {ticker_symbol} saved.")

def xbregressor_resume_training(X, y, gpu_available, ticker_symbol, hyperparameter_search = False):
    hyperparameters_search_model_path = f'../models/hyperparameters-search-models/xgboost/xbregressor/{ticker_symbol}.pkl'
    full_trained_model_path = f'../models/trained-models/xgboost/xbregressor/{ticker_symbol}.pkl'
    params_path = f'../models/best-hyperparameters/xgboost/xbregressor/{ticker_symbol}.json'
    csv_path = f'../models/trained-models/ticker-all-models-full-trained-list.csv'

    if hyperparameter_search:
        xbregressor_hyperparameters_search(X, y, gpu_available, ticker_symbol)
        
    # Check if the model exists
    if os.path.exists(hyperparameters_search_model_path) and os.path.exists(params_path):
        print(f"Loaded existing model for {ticker_symbol}.")
        with open(params_path, 'r') as f:
            best_params = json.load(f)
    else:
        print(f"No existing model found for {ticker_symbol}. Initiating hyperparameter search.")
        xbregressor_hyperparameters_search(X, y, gpu_available, ticker_symbol)
        with open(params_path, 'r') as f:
            best_params = json.load(f)

    # Split the data
    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

    param = {
        'verbosity': 0,
        'objective': 'reg:squarederror',
        'eval_metric': 'rmse',
        'tree_method': 'hist',
        'device': 'cuda' if gpu_available else 'cpu',  
        'use_label_encoder': False,
        'n_estimators': best_params['n_estimators'],
        'max_depth': best_params['max_depth'],
        'learning_rate': best_params['learning_rate'],
        'subsample': best_params['subsample'],
        'colsample_bytree': best_params['colsample_bytree'],
        'gamma': best_params['gamma'],
        'lambda': best_params['lambda'],
        'alpha': best_params['alpha'],
        'early_stopping_rounds': 10
    }

    best_model = XGBRegressor(**param)
    best_model.fit(X_train, y_train, eval_set=[(X_valid, y_valid)], verbose=False)
    preds = best_model.predict(X_valid)
    rmse = root_mean_squared_error (y_valid, preds)
    
    print(f"Validation RSME for {ticker_symbol}: {rmse:.4f}")
                  
    ticker_df = load_or_create_ticker_df(csv_path)

    if ticker_symbol in ticker_df['Ticker_Symbol'].values:
        ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker_symbol, ['Best_XGBRegressor_Regression_RMSE', 'Best_XGBRegressor_Regression_Path']] = [rmse, full_trained_model_path]
    else:
        new_row = pd.DataFrame({'Ticker_Symbol': [ticker_symbol], 'Best_XGBRegressor_Regression_RMSE': [rmse], 'Best_XGBRegressor_Regression_Path': [full_trained_model_path]})
        ticker_df = pd.concat([ticker_df, new_row], ignore_index=True)
                             
    joblib.dump(best_model, full_trained_model_path)
    print(f"Model for {ticker_symbol} saved with RSME: {rmse:.4f}")
    ticker_df.to_csv(csv_path, index=False)
        
    # Save feature importances
    feature_importances = pd.DataFrame(best_model.feature_importances_, index=X.columns, columns=['importance'])
    feature_importances = feature_importances.sort_values(by='importance', ascending=False)
    feature_importances.to_csv(f'../feature-importances/xbregressor/{ticker_symbol}_feature_importances.csv')
    print(f"Feature importances for {ticker_symbol} saved.")

In [8]:
for ticker_symbol in ticker_list:
    dataframe = pd.read_csv(f"../data/train/{ticker_symbol}.csv")
    X, y_classifier, y_regressor = preprocess_data(dataframe)
    xbclassifier_resume_training(X, y_classifier, gpu_available, ticker_symbol)
    xbregressor_resume_training(X, y_regressor, gpu_available, ticker_symbol)


[I 2024-09-09 12:59:37,796] A new study created in memory with name: no-name-c3fe0e1a-f485-4714-81b3-c8b9f622c592
[I 2024-09-09 12:59:37,984] Trial 0 finished with value: 0.5211267605633803 and parameters: {'n_estimators': 450, 'max_depth': 9, 'learning_rate': 0.200063162961013, 'subsample': 0.6085965811906393, 'colsample_bytree': 0.7969015273133331, 'gamma': 0.7779556970402285, 'lambda': 5.516609481015119e-05, 'alpha': 0.16043837412228554}. Best is trial 0 with value: 0.5211267605633803.


No existing model found for CL=F. Initiating hyperparameter search.


[I 2024-09-09 12:59:38,110] Trial 1 finished with value: 0.38028169014084506 and parameters: {'n_estimators': 216, 'max_depth': 6, 'learning_rate': 0.17376826370452012, 'subsample': 0.6567698279199915, 'colsample_bytree': 0.5719887888998484, 'gamma': 0.3014781414635115, 'lambda': 7.071825095353033e-06, 'alpha': 1.8476793171273253e-05}. Best is trial 0 with value: 0.5211267605633803.
[I 2024-09-09 12:59:38,282] Trial 2 finished with value: 0.4647887323943662 and parameters: {'n_estimators': 362, 'max_depth': 6, 'learning_rate': 0.17169982967804495, 'subsample': 0.5215048538951716, 'colsample_bytree': 0.7269834625258454, 'gamma': 4.3848344786747955, 'lambda': 0.8868694196331341, 'alpha': 0.17306082160886557}. Best is trial 0 with value: 0.5211267605633803.
[I 2024-09-09 12:59:38,450] Trial 3 finished with value: 0.4788732394366197 and parameters: {'n_estimators': 901, 'max_depth': 4, 'learning_rate': 0.0758463185792816, 'subsample': 0.7180176906489423, 'colsample_bytree': 0.6290623271893

Model for CL=F saved with accuracy: 0.5915492957746479
Parameters for CL=F saved to ../models/best-hyperparameters/xgboost/xbclassifier/CL=F.json
Feature importances for CL=F saved.


[I 2024-09-09 13:00:04,567] A new study created in memory with name: no-name-6e4d7ef7-bade-48f7-b299-97b3f9f13ddf


Validation accuracy for CL=F: 0.5915
Model for CL=F saved with accuracy: 0.5915
Feature importances for CL=F saved.
No existing model found for CL=F. Initiating hyperparameter search.


[I 2024-09-09 13:00:04,896] Trial 0 finished with value: 1.6554775681191238 and parameters: {'n_estimators': 382, 'max_depth': 9, 'learning_rate': 0.11874604862725154, 'subsample': 0.8174975748159181, 'colsample_bytree': 0.7754772184205447, 'gamma': 1.8527001235619434, 'lambda': 7.395778942728812e-07, 'alpha': 0.00017036270728525445}. Best is trial 0 with value: 1.6554775681191238.
[I 2024-09-09 13:00:05,080] Trial 1 finished with value: 1.6849361702036383 and parameters: {'n_estimators': 640, 'max_depth': 4, 'learning_rate': 0.278951740956326, 'subsample': 0.6528710246369909, 'colsample_bytree': 0.980357196886068, 'gamma': 3.6625027807557826, 'lambda': 0.010536795095044637, 'alpha': 0.48745757034840337}. Best is trial 0 with value: 1.6554775681191238.
[I 2024-09-09 13:00:05,331] Trial 2 finished with value: 1.7170534412962968 and parameters: {'n_estimators': 443, 'max_depth': 10, 'learning_rate': 0.2453440726606533, 'subsample': 0.7995495302373816, 'colsample_bytree': 0.79722639178676

Best model for CL=F saved with RMSE: 1.6044562664750317
parameters for CL=F saved to ../models/best-hyperparameters/xgboost/xbregressor/CL=F.json
Feature importances for CL=F saved.


  ticker_df.loc[ticker_df['Ticker_Symbol'] == ticker_symbol, ['Best_XGBRegressor_Regression_RMSE', 'Best_XGBRegressor_Regression_Path']] = [rmse, full_trained_model_path]
[I 2024-09-09 13:00:31,897] A new study created in memory with name: no-name-177b483e-03d9-404f-91b6-8247f5fc4711


Validation RSME for CL=F: 1.6045
Model for CL=F saved with RSME: 1.6045
Feature importances for CL=F saved.
No existing model found for NVDA. Initiating hyperparameter search.


[I 2024-09-09 13:00:32,219] Trial 0 finished with value: 0.5211267605633803 and parameters: {'n_estimators': 218, 'max_depth': 8, 'learning_rate': 0.24590288863189416, 'subsample': 0.7740931348732378, 'colsample_bytree': 0.5758887701606445, 'gamma': 0.01810524633062638, 'lambda': 0.00015221055798837467, 'alpha': 2.8960534315124262e-08}. Best is trial 0 with value: 0.5211267605633803.
[I 2024-09-09 13:00:32,434] Trial 1 finished with value: 0.5915492957746479 and parameters: {'n_estimators': 671, 'max_depth': 7, 'learning_rate': 0.19171056980214943, 'subsample': 0.6307924553803215, 'colsample_bytree': 0.8584599549167699, 'gamma': 0.8065286226941853, 'lambda': 0.0015085059784785053, 'alpha': 7.284398991691973e-08}. Best is trial 1 with value: 0.5915492957746479.
[I 2024-09-09 13:00:32,694] Trial 2 finished with value: 0.5070422535211268 and parameters: {'n_estimators': 376, 'max_depth': 9, 'learning_rate': 0.16082534830149878, 'subsample': 0.5771853582711985, 'colsample_bytree': 0.864555

Model for NVDA saved with accuracy: 0.676056338028169
Parameters for NVDA saved to ../models/best-hyperparameters/xgboost/xbclassifier/NVDA.json
Feature importances for NVDA saved.


[I 2024-09-09 13:01:03,362] A new study created in memory with name: no-name-256144ed-b915-45a8-bf0f-d8b17ff97cf4


Validation accuracy for NVDA: 0.6761
Model for NVDA saved with accuracy: 0.6761
Feature importances for NVDA saved.
No existing model found for NVDA. Initiating hyperparameter search.


[I 2024-09-09 13:01:03,564] Trial 0 finished with value: 1.6581452468804552 and parameters: {'n_estimators': 159, 'max_depth': 5, 'learning_rate': 0.11555936327851654, 'subsample': 0.8055902062816049, 'colsample_bytree': 0.9100860877703033, 'gamma': 3.192119104190722, 'lambda': 0.00018265622616478477, 'alpha': 0.34311767720344594}. Best is trial 0 with value: 1.6581452468804552.
[I 2024-09-09 13:01:03,788] Trial 1 finished with value: 1.7413812036034229 and parameters: {'n_estimators': 552, 'max_depth': 8, 'learning_rate': 0.19134181517274082, 'subsample': 0.9593062193783977, 'colsample_bytree': 0.906228732862915, 'gamma': 4.945846415554014, 'lambda': 0.00014682520866816226, 'alpha': 0.08775961819416658}. Best is trial 0 with value: 1.6581452468804552.
[I 2024-09-09 13:01:03,970] Trial 2 finished with value: 1.6715337408087172 and parameters: {'n_estimators': 481, 'max_depth': 3, 'learning_rate': 0.017630335627580547, 'subsample': 0.6908559244149033, 'colsample_bytree': 0.6574149816018

Best model for NVDA saved with RMSE: 1.4963245402484802
parameters for NVDA saved to ../models/best-hyperparameters/xgboost/xbregressor/NVDA.json
Feature importances for NVDA saved.


[I 2024-09-09 13:01:32,794] A new study created in memory with name: no-name-c8a44536-ca84-48fd-aca7-3a85056b3a96


Validation RSME for NVDA: 1.4963
Model for NVDA saved with RSME: 1.4963
Feature importances for NVDA saved.
No existing model found for SGDUSD=X. Initiating hyperparameter search.


[I 2024-09-09 13:01:33,126] Trial 0 finished with value: 0.7567567567567568 and parameters: {'n_estimators': 165, 'max_depth': 9, 'learning_rate': 0.20401777157807047, 'subsample': 0.9651963494920709, 'colsample_bytree': 0.7278427735665818, 'gamma': 4.297923963708565, 'lambda': 0.6650622775237347, 'alpha': 0.013585692931142515}. Best is trial 0 with value: 0.7567567567567568.
[I 2024-09-09 13:01:33,544] Trial 1 finished with value: 0.7972972972972973 and parameters: {'n_estimators': 641, 'max_depth': 3, 'learning_rate': 0.21135479962993484, 'subsample': 0.6087057674879741, 'colsample_bytree': 0.9864196409238604, 'gamma': 2.457577584438753, 'lambda': 0.000360303282597293, 'alpha': 0.04041773407843951}. Best is trial 1 with value: 0.7972972972972973.
[I 2024-09-09 13:01:34,963] Trial 2 finished with value: 0.7162162162162162 and parameters: {'n_estimators': 566, 'max_depth': 10, 'learning_rate': 0.012603849352109679, 'subsample': 0.8973726030686184, 'colsample_bytree': 0.9815373656117958

Model for SGDUSD=X saved with accuracy: 0.8108108108108109
Parameters for SGDUSD=X saved to ../models/best-hyperparameters/xgboost/xbclassifier/SGDUSD=X.json
Feature importances for SGDUSD=X saved.


[I 2024-09-09 13:02:18,778] A new study created in memory with name: no-name-cc05b16e-ce1f-4b43-ad24-53f1b3363bd1


Validation accuracy for SGDUSD=X: 0.8108
Model for SGDUSD=X saved with accuracy: 0.8108
Feature importances for SGDUSD=X saved.
No existing model found for SGDUSD=X. Initiating hyperparameter search.


[I 2024-09-09 13:02:18,990] Trial 0 finished with value: 0.002156658818078091 and parameters: {'n_estimators': 348, 'max_depth': 10, 'learning_rate': 0.22003589820582387, 'subsample': 0.7997892071128887, 'colsample_bytree': 0.9456664960770448, 'gamma': 1.3370618374616605, 'lambda': 0.00018013942206518104, 'alpha': 0.000497563528302888}. Best is trial 0 with value: 0.002156658818078091.
[I 2024-09-09 13:02:19,192] Trial 1 finished with value: 0.002156659837624187 and parameters: {'n_estimators': 474, 'max_depth': 8, 'learning_rate': 0.16409566107615042, 'subsample': 0.820415976641162, 'colsample_bytree': 0.6859111491669876, 'gamma': 3.161591630915956, 'lambda': 0.011448052759127119, 'alpha': 2.399624603502392e-05}. Best is trial 0 with value: 0.002156658818078091.
[I 2024-09-09 13:02:19,456] Trial 2 finished with value: 0.0021566588438833943 and parameters: {'n_estimators': 941, 'max_depth': 3, 'learning_rate': 0.22295504910382646, 'subsample': 0.9384065778905324, 'colsample_bytree': 0.

Best model for SGDUSD=X saved with RMSE: 0.0021566588171909457
parameters for SGDUSD=X saved to ../models/best-hyperparameters/xgboost/xbregressor/SGDUSD=X.json
Feature importances for SGDUSD=X saved.
Validation RSME for SGDUSD=X: 0.0022
Model for SGDUSD=X saved with RSME: 0.0022
Feature importances for SGDUSD=X saved.


[I 2024-09-09 13:02:45,584] A new study created in memory with name: no-name-c14cb654-e9b1-4e34-9d9d-c67a69de023e


No existing model found for USDSGD=X. Initiating hyperparameter search.


[I 2024-09-09 13:02:45,847] Trial 0 finished with value: 0.8108108108108109 and parameters: {'n_estimators': 604, 'max_depth': 4, 'learning_rate': 0.1867374615944201, 'subsample': 0.5348742595035411, 'colsample_bytree': 0.9901413262634379, 'gamma': 4.041356194688007, 'lambda': 0.09937144750397124, 'alpha': 0.0034791761713304473}. Best is trial 0 with value: 0.8108108108108109.
[I 2024-09-09 13:02:46,890] Trial 1 finished with value: 0.7432432432432432 and parameters: {'n_estimators': 212, 'max_depth': 9, 'learning_rate': 0.023784908714165737, 'subsample': 0.9270409915651203, 'colsample_bytree': 0.9995939855993026, 'gamma': 3.1225995524190275, 'lambda': 0.11909393700476191, 'alpha': 1.8275950828928852e-06}. Best is trial 0 with value: 0.8108108108108109.
[I 2024-09-09 13:02:47,655] Trial 2 finished with value: 0.6756756756756757 and parameters: {'n_estimators': 978, 'max_depth': 5, 'learning_rate': 0.04533080281784588, 'subsample': 0.5439730781541701, 'colsample_bytree': 0.9096035673591

Model for USDSGD=X saved with accuracy: 0.8243243243243243
Parameters for USDSGD=X saved to ../models/best-hyperparameters/xgboost/xbclassifier/USDSGD=X.json
Feature importances for USDSGD=X saved.


[I 2024-09-09 13:03:25,337] A new study created in memory with name: no-name-755a0e7e-4900-49bc-81d7-d0e1ad08681e
[I 2024-09-09 13:03:25,530] Trial 0 finished with value: 0.00389405257680628 and parameters: {'n_estimators': 134, 'max_depth': 3, 'learning_rate': 0.192387125455862, 'subsample': 0.5769717237556362, 'colsample_bytree': 0.6148705388502465, 'gamma': 4.768731811427371, 'lambda': 0.0004174287674463215, 'alpha': 6.281567755518959e-07}. Best is trial 0 with value: 0.00389405257680628.


Validation accuracy for USDSGD=X: 0.8243
Model for USDSGD=X saved with accuracy: 0.8243
Feature importances for USDSGD=X saved.
No existing model found for USDSGD=X. Initiating hyperparameter search.


[I 2024-09-09 13:03:25,723] Trial 1 finished with value: 0.0038940529043550763 and parameters: {'n_estimators': 245, 'max_depth': 10, 'learning_rate': 0.06748914877544822, 'subsample': 0.5444815270862554, 'colsample_bytree': 0.5505054027688447, 'gamma': 0.6826762508099071, 'lambda': 0.7758018351196777, 'alpha': 9.042984695284536e-06}. Best is trial 0 with value: 0.00389405257680628.
[I 2024-09-09 13:03:25,861] Trial 2 finished with value: 0.0038940524744129064 and parameters: {'n_estimators': 680, 'max_depth': 9, 'learning_rate': 0.06770908494120617, 'subsample': 0.6259450391217521, 'colsample_bytree': 0.7065316555505374, 'gamma': 2.723708583546147, 'lambda': 0.0005039397427678913, 'alpha': 0.011069008104539937}. Best is trial 2 with value: 0.0038940524744129064.
[I 2024-09-09 13:03:26,013] Trial 3 finished with value: 0.0038940541669087096 and parameters: {'n_estimators': 805, 'max_depth': 6, 'learning_rate': 0.074131609728821, 'subsample': 0.9682868682834317, 'colsample_bytree': 0.52

Best model for USDSGD=X saved with RMSE: 0.0038940524672725144
parameters for USDSGD=X saved to ../models/best-hyperparameters/xgboost/xbregressor/USDSGD=X.json
Feature importances for USDSGD=X saved.
Validation RSME for USDSGD=X: 0.0039
Model for USDSGD=X saved with RSME: 0.0039
Feature importances for USDSGD=X saved.
