In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from tabulate import tabulate
import pandas_ta as ta
from sklearn.preprocessing import StandardScaler,MinMaxScaler,Normalizer

import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers.legacy import Adam as LegacyAdam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from skopt import gp_minimize
from skopt.space import Real, Integer
from sklearn.metrics import mean_squared_error
from bayes_opt import BayesianOptimization

import tensorflow as tf
from tensorflow.keras.optimizers.legacy import Adam
import time

from pandas.tseries.offsets import MonthEnd

from pandas.tseries.offsets import BDay

pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows', None)

In [2]:
#tickers = ['SPY', 'QQQ', 'VTI', 'IWM', 'EFA', 'EEM', 'GLD', 'SLV', 'USO', 'XLF']
#tickers = ['SPY', 'QQQ', 'VTI']
#tickers = ['SPY']

#start_date, end_date = '2000-01-01', '2024-05-01'
#train_start_date, train_end_date, test_start_date, test_end_date, prediction_start_date = start_date,'2014-01-01', '2014-01-01', '2024-01-01', '2024-01-01'
#prediction_dates=['2024-01-01','2024-02-01','2024-03-01', '2024-04-01']

In [3]:
def data_loading(ticker_symbol, start_date, end_date):
    # Fetch the ETF data from Yahoo Finance for the period from 2010-01-01 to 2024-01-01
    etf_data = yf.Ticker(ticker_symbol)
    etf_history = etf_data.history(start=start_date, end=end_date)
    etf_history.index = etf_history.index.tz_localize(None)

    return etf_history, etf_data
    
#etf_history,etf_data = data_loading('SPY', start_date, end_date)
#etf_history.head(10)

In [4]:
#etf_history.tail(10)

In [5]:
#Function to derive the predictor columns
def etf_predictors(etf_history,  start_date, end_date, etf_data, benchmark_ticker = '^GSPC' ):
    
    # Calculate Daily Returns
    etf_history['Daily Return'] = etf_history['Close'].pct_change()

    # Calculate 21-Day Volatility (standard deviation of daily returns, annualized)
    etf_history['Volatility'] = etf_history['Daily Return'].rolling(window=21).std() * np.sqrt(252)
    #etf_history['Volatility_ta'] = ta.volatility(etf_history['Close'], window=21, annualize=True)
   
    # Get the ETF info
    etf_info = etf_data.info

    # Retrieve the net asset value price (NAV) and total net assets
    nav_price = etf_info['navPrice']
    total_assets = etf_info['totalAssets']

    # Calculate the number of shares outstanding
    shares_outstanding = total_assets / nav_price

    # Calculate Market Capitalization for each day
    # Market Capitalization = Closing Price * Total Number of Shares Outstanding
    etf_history['Market Cap'] = etf_history['Close'] * shares_outstanding

    total_assets = etf_info['totalAssets']
    total_liabilities = etf_info.get('totalLiabilities', 0)  # Handle the case where total liabilities might not be present
    
    # Calculate Book Value per Share
    book_value_per_share = (total_assets - total_liabilities) / shares_outstanding
    
    # Calculate Price to Book (P/B) Ratio
    etf_history['P/B Ratio'] = etf_history['Close'] / book_value_per_share
    
    # Calculate 1-Month Momentum (21 trading days)
    
    etf_history['Momentum'] = ta.mom(etf_history['Close'], length=21)
    
    benchmark_data = yf.download(benchmark_ticker, start=start_date, end=end_date)
    benchmark_data['Daily Return'] = benchmark_data['Close'].pct_change()
    #benchmark_data_history = benchmark_data.dropna()
    benchmark_data.head(10)
    
    # Ensure the indices are time zone-naive
    benchmark_data.index = benchmark_data.index.tz_localize(None)
    combined_data = etf_history[['Close']].join(benchmark_data[['Close']], lsuffix='_ETF', rsuffix='_Benchmark')
    combined_data = combined_data.dropna()
    
    # Calculate rolling beta with a 30-day window
    rolling_beta_21 = rolling_beta(combined_data, window=21)

    # Add the rolling beta to the dataframe
    combined_data.loc[:, 'Rolling Beta 21-day'] = rolling_beta_21
    etf_history['Rolling Beta']=combined_data['Rolling Beta 21-day']
    
    # Calculate daily profitability
    daily_profitabilities = []
    previous_nav = None
    for index, row in etf_history.iterrows():
        current_nav = row['Close']  # Current day's NAV
        #print(current_nav)
        if previous_nav is not None:
            daily_profitability = (current_nav - previous_nav) / previous_nav * 100
            daily_profitabilities.append(daily_profitability)
        else:
            daily_profitabilities.append(None)
        previous_nav = current_nav  # Update previous_nav for the next iteration

    # Add daily profitabilities to ETF dataset
    etf_history['Daily Profitability (%)'] = daily_profitabilities
    
    # Calculate the dividend yield for each day
    dividend_yields = []

    for index, row in etf_history.iterrows():
        # Get the dividend payment for the day
        dividend_payment = row['Dividends']

        # Get the current market price of the ETF for the day
        current_price = row['Close']

        # Calculate the dividend yield for the day
        dividend_yield = (dividend_payment / current_price) * 100

        # Append the dividend yield to the list
        dividend_yields.append(dividend_yield)
      
    etf_history['Div yield'] = dividend_yields

    #Volatility
    etf_history['ATR'] = ta.atr(etf_history['High'], etf_history['Low'], etf_history['Close'], length=21)    
    
    # Compute the Relative Volatility Index (RVI)
    rvi = ta.rvi(etf_history['Close'],length=21)
    etf_history['RVI'] = rvi
    
    #Momentum
    rsi_window = 14  # Window size for RSI calculation
    roc_window = 12  # Window size for ROC calculation
    #rsi_window = 21  
    #roc_window = 21  

    

    etf_history['RSI'] = ta.rsi(etf_history['Close'],length=rsi_window)
    # Calculate Rate of Change (ROC)
    etf_history['ROC'] = ta.roc(etf_history['Close'], length=roc_window)
        
    etf_history['log_returns'] = np.log(etf_history['Close'] / etf_history['Close'].shift(1))

    return etf_history

# Function to calculate rolling beta
def rolling_beta(df, window):
    rolling_cov = df['Close_Benchmark'].rolling(window=window).cov(df['Close_ETF'])
    rolling_var = df['Close_Benchmark'].rolling(window=window).var()
    rolling_beta = rolling_cov / rolling_var
    return rolling_beta



In [6]:
#etf_history = etf_predictors(etf_history,  start_date, end_date, etf_data,benchmark_ticker = '^GSPC' )
#etf_history.head(10)

In [7]:

def pre_processing(etf_history, train_start_date, train_end_date, test_start_date, test_end_date, prediction_dates, feature_columns=None, scaling_strategy='StandardScaler', final_end_date='2024-06-01'):
    # Handle missing values and inf replacements
    etf_history.fillna(etf_history.median(), inplace=True)
    etf_history = etf_history.replace(-np.inf, 0)

    # Default feature columns if none are provided
    if feature_columns is None:
        feature_columns = ['Volatility', 'Volume', 'Daily Return', 'Market Cap', 'P/B Ratio', 'Momentum', 
                           'Rolling Beta', 'Daily Profitability (%)', 'ATR', 'RVI', 'RSI', 'ROC']

    # Selecting the features (X) and the target (y)
    X = etf_history.loc[:, feature_columns]
    y = etf_history['log_returns']

    # Split train and test data
    train_data = X.loc[train_start_date:train_end_date]
    test_data = X.loc[test_start_date:test_end_date]
    y_train = y.loc[train_start_date:train_end_date]
    y_test = y.loc[test_start_date:test_end_date]

    # Scaling strategy based on input parameter
    if scaling_strategy == 'StandardScaler':
        scaler = StandardScaler()
    elif scaling_strategy == 'Normalizer':
        scaler = Normalizer()
    else:
        raise ValueError(f"Unsupported scaling strategy: {scaling_strategy}")
    
    # Scaling the train and test data
    train_data_scaled = scaler.fit_transform(train_data)
    test_data_scaled = scaler.transform(test_data)

    # Create a dictionary to store forecast data for each prediction period
    forecast_data = {}
    for i, start_date in enumerate(prediction_dates):
        # Determine the end date for each forecast period
        if i < len(prediction_dates) - 1:
            end_date = (pd.to_datetime(start_date) + MonthEnd(0)).strftime('%Y-%m-%d')
        else:
            end_date = final_end_date  # The final end date provided or default

        # Store the forecast data for each period
        forecast_data[f'forecast_data_{i+1}m'] = X.loc[start_date:end_date]

    # Scale the forecast data dynamically
    forecast_data_scaled = {}
    for period_key, period_data in forecast_data.items():
        if not period_data.empty:
            forecast_data_scaled[period_key] = sm.add_constant(scaler.transform(period_data))

    # Add constant to scaled train and test data
    train_data_scaled = sm.add_constant(train_data_scaled)
    test_data_scaled = sm.add_constant(test_data_scaled)

    # Dynamically return the scaled forecast data along with train and test data
    return {
        'train_data_scaled': train_data_scaled,
        'test_data_scaled': test_data_scaled,
        'y_train': y_train,
        'y_test': y_test,
        'scaler': scaler,
        'X': X,
        'etf_history': etf_history,
        'forecast_data_scaled': forecast_data_scaled,
        'forecast_data': forecast_data
    }


In [8]:
#train_data_scaled, test_data_scaled, y_train, y_test, scaler, X, etf_history, forecast_data_scaled_1m, forecast_data_scaled_2m, forecast_data_scaled_3m, forecast_data_scaled_4m, forecast_data_1m, forecast_data_2m, forecast_data_3m, forecast_data_4m = pre_processing(etf_history, train_start_date, train_end_date, test_start_date, test_end_date, prediction_start_date, prediction_dates)

In [9]:
"""new_list =[train_data_scaled, test_data_scaled, y_train, y_test, X, etf_history, forecast_data_scaled_1m, forecast_data_scaled_2m, forecast_data_scaled_3m, forecast_data_scaled_4m, forecast_data_1m, forecast_data_2m, forecast_data_3m, forecast_data_4m]


for i in new_list:
    if hasattr(i, 'shape'):
        print(i.shape)
len(new_list)"""

"new_list =[train_data_scaled, test_data_scaled, y_train, y_test, X, etf_history, forecast_data_scaled_1m, forecast_data_scaled_2m, forecast_data_scaled_3m, forecast_data_scaled_4m, forecast_data_1m, forecast_data_2m, forecast_data_3m, forecast_data_4m]\n\n\nfor i in new_list:\n    if hasattr(i, 'shape'):\n        print(i.shape)\nlen(new_list)"

In [10]:
"""def build_lstm_model(units, dropout_rate, input_shape):
    model = Sequential()

    # First LSTM layer
    model.add(LSTM(units=int(units), return_sequences=True, input_shape=input_shape))  # Pass `input_shape` here
    model.add(Dropout(dropout_rate))
    model.add(BatchNormalization())  # Added Batch Normalization
    
    # Second LSTM layer
    model.add(LSTM(units=int(units), return_sequences=True))
    model.add(Dropout(dropout_rate))
    
    # Third LSTM layer (newly added)
    model.add(LSTM(units=int(units), return_sequences=False))
    model.add(Dropout(dropout_rate))
    model.add(BatchNormalization())  # Added Batch Normalization
    
    # Dense layer
    model.add(Dense(units=int(units//2), activation='relu'))  # Added dense layer with ReLU activation
    model.add(Dropout(dropout_rate))
    
    # Output layer
    model.add(Dense(1))
    
    return model


def train_lstm_model(train_data_scaled, y_train, epochs=50, batch_size=32):
    # Reshape data for LSTM [samples, time steps, features]
    train_data_scaled = np.reshape(train_data_scaled, (train_data_scaled.shape[0], 1, train_data_scaled.shape[1]))
    
    # Initialize the model
    model = build_lstm_model(input_shape=(train_data_scaled.shape[1], train_data_scaled.shape[2]))
    
    # Compile the model
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    # Train the model
    model.fit(train_data_scaled, y_train, epochs=epochs, batch_size=batch_size)
    
    return model
"""

"def build_lstm_model(units, dropout_rate, input_shape):\n    model = Sequential()\n\n    # First LSTM layer\n    model.add(LSTM(units=int(units), return_sequences=True, input_shape=input_shape))  # Pass `input_shape` here\n    model.add(Dropout(dropout_rate))\n    model.add(BatchNormalization())  # Added Batch Normalization\n    \n    # Second LSTM layer\n    model.add(LSTM(units=int(units), return_sequences=True))\n    model.add(Dropout(dropout_rate))\n    \n    # Third LSTM layer (newly added)\n    model.add(LSTM(units=int(units), return_sequences=False))\n    model.add(Dropout(dropout_rate))\n    model.add(BatchNormalization())  # Added Batch Normalization\n    \n    # Dense layer\n    model.add(Dense(units=int(units//2), activation='relu'))  # Added dense layer with ReLU activation\n    model.add(Dropout(dropout_rate))\n    \n    # Output layer\n    model.add(Dense(1))\n    \n    return model\n\n\ndef train_lstm_model(train_data_scaled, y_train, epochs=50, batch_size=32):\n    # R

In [11]:
def build_and_train_lstm_model(epochs, batch_size, units, dropout_rate, learning_rate, train_data_scaled, y_train, validation_split=0.2):
    train_data_reshaped = reshape_for_lstm(train_data_scaled)
    
    model = Sequential()
    
    # First LSTM layer
    model.add(LSTM(units=int(units), return_sequences=True, input_shape=(train_data_reshaped.shape[1], train_data_reshaped.shape[2])))
    model.add(Dropout(dropout_rate))
    
    # Second LSTM layer with return_sequences=False
    model.add(LSTM(units=int(units), return_sequences=False))
    model.add(Dropout(dropout_rate))
    
    # Adding a Dense hidden layer with ReLU activation
    model.add(Dense(units=int(units/2), activation='relu'))
    model.add(Dropout(dropout_rate))
    
    # Output layer
    model.add(Dense(1))
    
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mean_squared_error')
    
    # Early stopping to prevent overfitting
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    
     # Track the start time
    #start_time = time.time()
    
    model.fit(train_data_reshaped, y_train, 
              epochs=int(epochs), 
              batch_size=int(batch_size), 
              validation_split=validation_split, 
              callbacks=[early_stopping],
              verbose=0)
    
    # Calculate time consumed
    #time_consumed = time.time() - start_time
    
    return model



def optimize_hyperparameters(train_data_scaled, y_train, test_data_scaled, y_test):
    def objective_function(epochs, batch_size, units, dropout_rate, learning_rate):
        model = build_and_train_lstm_model(epochs, batch_size, units, dropout_rate, learning_rate, train_data_scaled, y_train)
        test_data_reshaped = reshape_for_lstm(test_data_scaled)
        test_predictions = make_predictions(model, test_data_reshaped)
        mse = mean_squared_error(y_test, test_predictions)
        return -mse  # We return the negative MSE because Bayesian Optimization tries to maximize the function

    # Reduced parameter ranges for faster search
    param_bounds = {
        'epochs': (10, 50),  # Lowered the number of epochs
        'batch_size': (32, 64),  # Limited batch size range
        'units': (50, 100),  # Narrowed the range of units
        'dropout_rate': (0.1, 0.4),  # Lowered the dropout rate range
        'learning_rate': (0.0001, 0.001)  # Narrowed learning rate range
    }
    
    optimizer = BayesianOptimization(
        f=objective_function,
        pbounds=param_bounds,
        verbose=2,
        random_state=42,
    )

    # Reduced the number of iterations for faster optimization
    optimizer.maximize(init_points=5, n_iter=50)  # Fewer initial points and iterations

    best_params = optimizer.max['params']
    return best_params


In [12]:
def make_predictions(model, data_scaled):
    if len(data_scaled.shape) == 2:
        data_scaled = np.reshape(data_scaled, (data_scaled.shape[0], 1, data_scaled.shape[1]))
    elif len(data_scaled.shape) != 3:
        raise ValueError(f"Unexpected shape for input data: {data_scaled.shape}")
    
    predictions = model.predict(data_scaled)
    return predictions

def eval_model(best_model, test_data_scaled, y_test, y_train=None):
    # Reshape test data to match LSTM input requirements
    test_data_reshaped = test_data_scaled.reshape((test_data_scaled.shape[0], 1, test_data_scaled.shape[1]))
    
    # Make predictions
    test_predictions = best_model.predict(test_data_reshaped)
    
    # Calculate Mean Absolute Error (MAE)
    mae = mean_absolute_error(y_test, test_predictions)
    
    # Calculate Mean Absolute Scaled Error (MASE) - Example calculation assuming seasonal period m=1
    naive_forecast = y_test.shift(1).fillna(method='bfill')
    mase = mae / mean_absolute_error(y_test, naive_forecast)
    print("mae: ",mae)
    print("mase: ",mase)
    return mae, mase


In [13]:
# Ensured consistent feature handling and forecasting in predictions
def predictions(model, forecast_data_scaled, forecast_data):
    # Make predictions using the model
    predictions_forecast = make_predictions(model, forecast_data_scaled)
    
    # Create a DataFrame for the predictions with the same index as the forecast data
    forecast_predictions_df = pd.DataFrame(predictions_forecast, columns=["log_returns"], index=forecast_data.index)
    
    # Concatenate the original forecast data with the predictions
    forecast_data_with_predictions = pd.concat([forecast_data, forecast_predictions_df], axis=1)
    
    return forecast_predictions_df, forecast_data_with_predictions

In [14]:
"""def make_test_predictions(model, data_scaled):
    # Check if the data is already in the correct shape
    if len(data_scaled.shape) == 2:  # Check if it's (samples, features)
        data_scaled = np.reshape(data_scaled, (data_scaled.shape[0], 1, data_scaled.shape[1]))
    elif len(data_scaled.shape) != 3:  # If it's not already (samples, time_steps, features)
        raise ValueError(f"Unexpected shape for input data: {data_scaled.shape}")
    
    # Make predictions
    predictions = model.predict(data_scaled)
    
    return predictions


def evaluate_model(predictions, actual):
    # Calculate evaluation metrics
    mse = mean_squared_error(actual, predictions)
    mae = mean_absolute_error(actual, predictions)
    
    return mse, mae"""

'def make_test_predictions(model, data_scaled):\n    # Check if the data is already in the correct shape\n    if len(data_scaled.shape) == 2:  # Check if it\'s (samples, features)\n        data_scaled = np.reshape(data_scaled, (data_scaled.shape[0], 1, data_scaled.shape[1]))\n    elif len(data_scaled.shape) != 3:  # If it\'s not already (samples, time_steps, features)\n        raise ValueError(f"Unexpected shape for input data: {data_scaled.shape}")\n    \n    # Make predictions\n    predictions = model.predict(data_scaled)\n    \n    return predictions\n\n\ndef evaluate_model(predictions, actual):\n    # Calculate evaluation metrics\n    mse = mean_squared_error(actual, predictions)\n    mae = mean_absolute_error(actual, predictions)\n    \n    return mse, mae'

In [15]:
"""
# Train the LSTM model
lstm_model = train_lstm_model(train_data_scaled, y_train, epochs=50, batch_size=32)

# Make predictions on test data
test_predictions = make_predictions(lstm_model, test_data_scaled)

# Evaluate model on test data
mse, mae = evaluate_model(test_predictions, y_test)
print(f"Test MSE: {mse}, Test MAE: {mae}")"""

'\n# Train the LSTM model\nlstm_model = train_lstm_model(train_data_scaled, y_train, epochs=50, batch_size=32)\n\n# Make predictions on test data\ntest_predictions = make_predictions(lstm_model, test_data_scaled)\n\n# Evaluate model on test data\nmse, mae = evaluate_model(test_predictions, y_test)\nprint(f"Test MSE: {mse}, Test MAE: {mae}")'

In [16]:
def calculate_mase(y_true, y_pred, naive_forecast):
    # Calculate the MAE of the model's predictions
    mae_model = mean_absolute_error(y_true, y_pred)
    
    # Calculate the MAE of the naive forecast
    mae_naive = mean_absolute_error(y_true, naive_forecast)
    
    # Calculate MASE
    mase = mae_model / mae_naive
    return mase

In [17]:
def reshape_for_lstm(data):
    return np.reshape(data, (data.shape[0], 1, data.shape[1]))  # Reshape into (samples, time_steps, features)


In [18]:
#etf_pred_dict, scores_1m, scores_2m, scores_3m, scores_4m = main(tickers, etf_dict)

In [19]:
tickers = ['SPY', 'QQQ', 'VTI', 'IWM', 'EFA', 'EEM', 'GLD', 'SLV', 'USO', 'XLF']
#tickers = ['SPY', 'QQQ', 'VTI']
#tickers = ['SPY']
                                                           
start_date, end_date = '2000-01-01', '2024-06-01'
train_start_date, train_end_date, test_start_date, test_end_date, prediction_start_date = start_date,'2014-01-01', '2014-01-01', '2024-01-01', '2024-01-01'
prediction_dates=['2024-01-01','2024-02-01','2024-03-01', '2024-04-01','2024-05-01']

In [None]:
dict_data = {}
etf_dict = {}
data_with_predictors = []
for i in tickers:
    etf_history,etf_data = data_loading(i, start_date, end_date)
    
    print(etf_data.info.get('longName'), ":",i)
    
    #test_stationarity(etf_history)
    print(" ETF:",i)
    #print(etf_history.head(10))
    #print("################")
    #hist_data.append(etf_history)
    
    etf_history = etf_predictors(etf_history,  start_date, end_date, etf_data,benchmark_ticker = '^GSPC' )
    
    # Call the function
    preprocessed_data = pre_processing(etf_history, train_start_date, train_end_date, test_start_date, test_end_date, prediction_dates)

    # Extract individual components from the returned dictionary
    train_data_scaled = preprocessed_data['train_data_scaled']
    test_data_scaled = preprocessed_data['test_data_scaled']
    y_train = preprocessed_data['y_train']
    y_test = preprocessed_data['y_test']
    scaler = preprocessed_data['scaler']
    X = preprocessed_data['X']
    etf_history = preprocessed_data['etf_history']

    # Optionally, access forecast data for each period
    forecast_data_scaled_1m = preprocessed_data['forecast_data_scaled'].get('forecast_data_1m')
    forecast_data_scaled_2m = preprocessed_data['forecast_data_scaled'].get('forecast_data_2m')
    forecast_data_scaled_3m = preprocessed_data['forecast_data_scaled'].get('forecast_data_3m')
    forecast_data_scaled_4m = preprocessed_data['forecast_data_scaled'].get('forecast_data_4m')
    forecast_data_scaled_5m = preprocessed_data['forecast_data_scaled'].get('forecast_data_5m')
    
    forecast_data_1m = preprocessed_data['forecast_data'].get('forecast_data_1m')
    forecast_data_2m = preprocessed_data['forecast_data'].get('forecast_data_2m')
    forecast_data_3m = preprocessed_data['forecast_data'].get('forecast_data_3m')
    forecast_data_4m = preprocessed_data['forecast_data'].get('forecast_data_4m')
    forecast_data_5m = preprocessed_data['forecast_data'].get('forecast_data_5m')

    
    # Track the start time
    start_time = time.time()
    
    
    # Optimize hyperparameters
    # Optimize hyperparameters
    best_params = optimize_hyperparameters(train_data_scaled, y_train, test_data_scaled, y_test)
    
    # Extract the best hyperparameters
    best_epochs = int(best_params['epochs'])
    best_batch_size = int(best_params['batch_size'])
    best_units = int(best_params['units'])
    best_dropout_rate = best_params['dropout_rate']
    best_learning_rate = best_params['learning_rate']

    # Train the final model using the best hyperparameters
    best_model = build_and_train_lstm_model(
        epochs=best_epochs, 
        batch_size=best_batch_size, 
        units=best_units, 
        dropout_rate=best_dropout_rate, 
        learning_rate=best_learning_rate, 
        train_data_scaled=train_data_scaled, 
        y_train=y_train
    )
    
     
    
    # Calculate time consumed
    time_consumed = time.time() - start_time
    
    # Print the time consumed
    print(f"Time consumed for training: {time_consumed:.2f} seconds")

    # Evaluate the model
    mae, mase = eval_model(best_model, test_data_scaled, y_test)
    
    print(len(forecast_data_scaled_1m))
    print(len(forecast_data_scaled_2m))
    print(len(forecast_data_scaled_3m))
    print(len(forecast_data_scaled_4m))
        
    forecast_predictions_df_1m, forecast_data_with_predictions_1m = predictions(best_model, forecast_data_scaled_1m, forecast_data_1m)
    forecast_predictions_df_2m, forecast_data_with_predictions_2m = predictions(best_model, forecast_data_scaled_2m, forecast_data_2m)
    forecast_predictions_df_3m, forecast_data_with_predictions_3m = predictions(best_model, forecast_data_scaled_3m, forecast_data_3m)
    forecast_predictions_df_4m, forecast_data_with_predictions_4m = predictions(best_model, forecast_data_scaled_4m, forecast_data_4m)
    forecast_predictions_df_5m, forecast_data_with_predictions_5m = predictions(best_model, forecast_data_scaled_5m, forecast_data_5m)

        
    # Add data to dictionary
    # Store the relevant data in a dictionary

    print(forecast_predictions_df_5m)
    dict_data = {
        "etf_history": etf_history,
        "forecast_data_1m": forecast_data_1m,
        "forecast_data_2m": forecast_data_2m,
        "forecast_data_3m": forecast_data_3m,
        "forecast_data_4m": forecast_data_4m,
        "forecast_data_5m": forecast_data_5m,
        "X": X,
        "y_train_values": y_train , # Assuming this is relevant to the ETF data
        "model results": {"mae": mae,
                          "mase": mase,
                          #"mape": mape
                         },
                         
        "forecast_predictions_df_1m" : forecast_predictions_df_1m['log_returns'].values,
        "forecast_predictions_df_2m" : forecast_predictions_df_2m['log_returns'].values,
        "forecast_predictions_df_3m" : forecast_predictions_df_3m['log_returns'].values,
        "forecast_predictions_df_4m" : forecast_predictions_df_4m['log_returns'].values,
        "forecast_predictions_df_5m" : forecast_predictions_df_5m['log_returns'].values
    }
    
    # Associate the dictionary with the ETF ticker symbol
    etf_dict[i] = dict_data
     

[*********************100%%**********************]  1 of 1 completed

SPDR S&P 500 ETF Trust : SPY
 ETF: SPY





|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-5.045e-0 | [30m43.99     | [30m0.3852    | [30m39.28     | [30m0.0006388 | [30m57.8      |
| [30m2         | [30m-6.414e-0 | [30m36.99     | [30m0.1174    | [30m44.65     | [30m0.000641  | [30m85.4      |
| [30m3         | [30m-0.00029  | [30m32.66     | [30m0.391     | [30m43.3      | [30m0.0002911 | [30m59.09     |
| [30m4         | [30m-0.000456 | [30m37.87     | [30m0.1913    | [30m30.99     | [30m0.0004888 | [30m64.56     |
| [30m5         | [30m-0.000155 | [30m51.58     | [30m0.1418    | [30m21.69     | [30m0.0004297 | [30m72.8      |
| [30m6         | [30m-6.701e-0 | [30m36.46     | [30m0.2708    | [30m44.72     | [30m0.0006248 | [30m85.9      |
| [30m7         | [30m-0.000118 | [30m50.7      | [30m0.1454    | [30m42.16     | [30m0.0005783 

| [30m47        | [30m-0.000898 | [30m57.36     | [30m0.1861    | [30m45.31     | [30m0.0007933 | [30m62.34     |
| [30m48        | [30m-0.000181 | [30m36.67     | [30m0.1037    | [30m39.9      | [30m0.0005786 | [30m80.68     |
| [30m49        | [30m-6.422e-0 | [30m53.48     | [30m0.1025    | [30m44.59     | [30m0.000942  | [30m64.12     |
| [30m50        | [30m-0.000127 | [30m34.61     | [30m0.2153    | [30m42.19     | [30m0.0002925 | [30m80.44     |
| [30m51        | [30m-9.41e-05 | [30m51.83     | [30m0.1322    | [30m44.7      | [30m0.0001386 | [30m62.69     |
| [30m52        | [30m-6.205e-0 | [30m61.6      | [30m0.2612    | [30m46.75     | [30m0.0006161 | [30m64.35     |
| [30m53        | [30m-0.000131 | [30m43.42     | [30m0.3127    | [30m35.07     | [30m0.0005155 | [30m51.09     |
| [30m54        | [30m-5.967e-0 | [30m37.32     | [30m0.3758    | [30m42.95     | [30m0.0009765 | [30m83.51     |
| [30m55        | [30m-4.895e-

  naive_forecast = y_test.shift(1).fillna(method='bfill')
[*********************100%%**********************]  1 of 1 completed

Invesco QQQ Trust : QQQ
 ETF: QQQ
|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------





| [30m1         | [30m-6.75e-05 | [30m43.99     | [30m0.3852    | [30m39.28     | [30m0.0006388 | [30m57.8      |
| [30m2         | [30m-9.074e-0 | [30m36.99     | [30m0.1174    | [30m44.65     | [30m0.000641  | [30m85.4      |
| [35m3         | [35m-2.951e-0 | [35m32.66     | [35m0.391     | [35m43.3      | [35m0.0002911 | [35m59.09     |
| [30m4         | [30m-3.944e-0 | [30m37.87     | [30m0.1913    | [30m30.99     | [30m0.0004888 | [30m64.56     |
| [30m5         | [30m-9.634e-0 | [30m51.58     | [30m0.1418    | [30m21.69     | [30m0.0004297 | [30m72.8      |
| [30m6         | [30m-8.668e-0 | [30m32.32     | [30m0.296     | [30m36.09     | [30m0.0004441 | [30m60.97     |
| [30m7         | [30m-4.947e-0 | [30m32.66     | [30m0.3927    | [30m43.3      | [30m0.001     | [30m59.09     |
| [30m8         | [30m-9.555e-0 | [30m32.66     | [30m0.3883    | [30m43.3      | [30m0.001     | [30m59.09     |
| [30m9         | [30m-5.076e-

| [30m48        | [30m-0.000106 | [30m44.44     | [30m0.3077    | [30m33.96     | [30m0.0008363 | [30m82.14     |
| [30m49        | [30m-3.169e-0 | [30m44.42     | [30m0.2923    | [30m33.94     | [30m0.000999  | [30m82.13     |
| [30m50        | [30m-7.669e-0 | [30m39.45     | [30m0.2214    | [30m30.27     | [30m0.0002488 | [30m94.13     |
| [30m51        | [30m-3.934e-0 | [30m44.41     | [30m0.2842    | [30m33.93     | [30m0.0008908 | [30m82.12     |
| [30m52        | [30m-3.634e-0 | [30m34.67     | [30m0.3417    | [30m34.17     | [30m0.0009487 | [30m66.15     |
| [30m53        | [30m-3.609e-0 | [30m44.15     | [30m0.2836    | [30m46.45     | [30m0.0008462 | [30m55.53     |
| [30m54        | [30m-0.000124 | [30m39.73     | [30m0.3518    | [30m20.26     | [30m0.0003054 | [30m56.57     |
| [30m55        | [30m-0.000781 | [30m55.52     | [30m0.365     | [30m16.02     | [30m0.0002122 | [30m80.25     |
Time consumed for training: 220.

  naive_forecast = y_test.shift(1).fillna(method='bfill')
[*********************100%%**********************]  1 of 1 completed

Vanguard Total Stock Market Index Fund ETF Shares : VTI
 ETF: VTI
|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------





| [30m1         | [30m-5.475e-0 | [30m43.99     | [30m0.3852    | [30m39.28     | [30m0.0006388 | [30m57.8      |
| [30m2         | [30m-0.000291 | [30m36.99     | [30m0.1174    | [30m44.65     | [30m0.000641  | [30m85.4      |
| [35m3         | [35m-4.724e-0 | [35m32.66     | [35m0.391     | [35m43.3      | [35m0.0002911 | [35m59.09     |
| [30m4         | [30m-0.000263 | [30m37.87     | [30m0.1913    | [30m30.99     | [30m0.0004888 | [30m64.56     |
| [30m5         | [30m-0.000113 | [30m51.58     | [30m0.1418    | [30m21.69     | [30m0.0004297 | [30m72.8      |
| [30m6         | [30m-0.000171 | [30m32.98     | [30m0.1112    | [30m42.9      | [30m0.0004242 | [30m56.35     |
| [30m7         | [30m-0.000630 | [30m48.65     | [30m0.1605    | [30m26.68     | [30m0.0006596 | [30m94.09     |
| [30m8         | [30m-9.205e-0 | [30m39.73     | [30m0.1973    | [30m44.25     | [30m0.000235  | [30m61.84     |
| [30m9         | [30m-5.855e-

| [30m48        | [30m-0.000137 | [30m44.46     | [30m0.2828    | [30m21.83     | [30m0.0002035 | [30m52.68     |
| [30m49        | [30m-9.912e-0 | [30m48.51     | [30m0.3082    | [30m19.2      | [30m0.0006049 | [30m70.61     |
| [30m50        | [30m-0.000310 | [30m39.45     | [30m0.2214    | [30m30.27     | [30m0.0002488 | [30m94.13     |
| [30m51        | [30m-6.229e-0 | [30m51.4      | [30m0.2479    | [30m40.9      | [30m0.0007134 | [30m57.55     |
| [30m52        | [30m-9.028e-0 | [30m34.67     | [30m0.3417    | [30m34.17     | [30m0.0009487 | [30m66.15     |
| [30m53        | [30m-5.959e-0 | [30m44.15     | [30m0.2836    | [30m46.45     | [30m0.0008462 | [30m55.53     |
| [30m54        | [30m-5.546e-0 | [30m39.73     | [30m0.3518    | [30m20.26     | [30m0.0003054 | [30m56.57     |
| [30m55        | [30m-3.28e-05 | [30m55.52     | [30m0.365     | [30m16.02     | [30m0.0002122 | [30m80.25     |
Time consumed for training: 278.

  naive_forecast = y_test.shift(1).fillna(method='bfill')


            log_returns
Date                   
2024-05-01     0.026265
2024-05-02     0.028083
2024-05-03     0.023640
2024-05-06     0.024089
2024-05-07     0.019809
2024-05-08     0.019486
2024-05-09     0.016493
2024-05-10     0.015840
2024-05-13     0.012013
2024-05-14     0.009083
2024-05-15     0.007568
2024-05-16     0.001036
2024-05-17     0.002005
2024-05-20    -0.001364
2024-05-21     0.001236
2024-05-22     0.003355
2024-05-23     0.006386
2024-05-24     0.007921
2024-05-28     0.007485
2024-05-29     0.006614
2024-05-30     0.005250
2024-05-31     0.008284


[*********************100%%**********************]  1 of 1 completed

iShares Russell 2000 ETF : IWM
 ETF: IWM





|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-0.000162 | [30m43.99     | [30m0.3852    | [30m39.28     | [30m0.0006388 | [30m57.8      |
| [35m2         | [35m-2.778e-0 | [35m36.99     | [35m0.1174    | [35m44.65     | [35m0.000641  | [35m85.4      |
| [30m3         | [30m-9.371e-0 | [30m32.66     | [30m0.391     | [30m43.3      | [30m0.0002911 | [30m59.09     |
| [30m4         | [30m-7.703e-0 | [30m37.87     | [30m0.1913    | [30m30.99     | [30m0.0004888 | [30m64.56     |
| [30m5         | [30m-6.347e-0 | [30m51.58     | [30m0.1418    | [30m21.69     | [30m0.0004297 | [30m72.8      |
| [30m6         | [30m-6.484e-0 | [30m34.38     | [30m0.1311    | [30m42.52     | [30m0.000135  | [30m82.95     |
| [30m7         | [30m-4.558e-0 | [30m38.64     | [30m0.1728    | [30m49.45     | [30m0.000432  

| [30m47        | [30m-6.111e-0 | [30m62.87     | [30m0.136     | [30m28.71     | [30m0.0007867 | [30m93.99     |
| [30m48        | [30m-4.676e-0 | [30m44.46     | [30m0.2828    | [30m21.83     | [30m0.0002035 | [30m52.68     |
| [30m49        | [30m-0.000106 | [30m48.51     | [30m0.3082    | [30m19.2      | [30m0.0006049 | [30m70.61     |
| [30m50        | [30m-4.921e-0 | [30m39.45     | [30m0.2214    | [30m30.27     | [30m0.0002488 | [30m94.13     |
| [30m51        | [30m-5.546e-0 | [30m51.4      | [30m0.2479    | [30m40.9      | [30m0.0007134 | [30m57.55     |
| [30m52        | [30m-8.735e-0 | [30m34.67     | [30m0.3417    | [30m34.17     | [30m0.0009487 | [30m66.15     |
| [30m53        | [30m-8.221e-0 | [30m44.15     | [30m0.2836    | [30m46.45     | [30m0.0008462 | [30m55.53     |
| [30m54        | [30m-6.698e-0 | [30m39.73     | [30m0.3518    | [30m20.26     | [30m0.0003054 | [30m56.57     |
| [30m55        | [30m-0.00062

  naive_forecast = y_test.shift(1).fillna(method='bfill')
[*********************100%%**********************]  1 of 1 completed

iShares MSCI EAFE ETF : EFA
 ETF: EFA
|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------





| [30m1         | [30m-8.383e-0 | [30m43.99     | [30m0.3852    | [30m39.28     | [30m0.0006388 | [30m57.8      |
| [35m2         | [35m-3.944e-0 | [35m36.99     | [35m0.1174    | [35m44.65     | [35m0.000641  | [35m85.4      |
| [30m3         | [30m-1.704e-0 | [30m32.66     | [30m0.391     | [30m43.3      | [30m0.0002911 | [30m59.09     |
| [30m4         | [30m-1.086e-0 | [30m37.87     | [30m0.1913    | [30m30.99     | [30m0.0004888 | [30m64.56     |
| [30m5         | [30m-5.82e-06 | [30m51.58     | [30m0.1418    | [30m21.69     | [30m0.0004297 | [30m72.8      |
| [35m6         | [35m-3.04e-06 | [35m36.46     | [35m0.2708    | [35m44.72     | [35m0.0006248 | [35m85.9      |
| [30m7         | [30m-4.121e-0 | [30m33.25     | [30m0.1587    | [30m42.36     | [30m0.0008437 | [30m90.94     |
| [35m8         | [35m-1.61e-06 | [35m32.54     | [35m0.2372    | [35m49.82     | [35m0.0009763 | [35m87.85     |
| [30m9         | [30m-5.619e-

| [30m48        | [30m-1.533e-0 | [30m32.4      | [30m0.1123    | [30m31.14     | [30m0.000519  | [30m91.57     |
| [30m49        | [30m-6.061e-0 | [30m42.95     | [30m0.2944    | [30m20.29     | [30m0.0007146 | [30m70.94     |
| [30m50        | [30m-1.02e-05 | [30m32.19     | [30m0.3165    | [30m37.01     | [30m0.0002362 | [30m83.93     |
| [30m51        | [30m-7.204e-0 | [30m52.56     | [30m0.295     | [30m27.29     | [30m0.0002942 | [30m90.63     |
| [30m52        | [30m-7.193e-0 | [30m32.24     | [30m0.2683    | [30m38.35     | [30m0.0003252 | [30m99.76     |
| [30m53        | [30m-1.476e-0 | [30m63.77     | [30m0.1612    | [30m10.14     | [30m0.0004561 | [30m50.81     |
| [30m54        | [30m-1.061e-0 | [30m37.16     | [30m0.1214    | [30m49.88     | [30m0.0002486 | [30m50.62     |
| [30m55        | [30m-1.018e-0 | [30m32.03     | [30m0.1649    | [30m11.35     | [30m0.0001057 | [30m58.85     |
Time consumed for training: 412.

  naive_forecast = y_test.shift(1).fillna(method='bfill')


            log_returns
Date                   
2024-05-01    -0.002986
2024-05-02     0.011051
2024-05-03     0.009122
2024-05-06     0.007445
2024-05-07     0.004279
2024-05-08     0.000868
2024-05-09     0.009376
2024-05-10     0.005726
2024-05-13     0.004604
2024-05-14     0.011257
2024-05-15     0.015322
2024-05-16    -0.001684
2024-05-17     0.008322
2024-05-20     0.007025
2024-05-21     0.002976
2024-05-22    -0.007390
2024-05-23    -0.003077
2024-05-24     0.010566
2024-05-28     0.002091
2024-05-29    -0.013483
2024-05-30     0.009764
2024-05-31     0.011570


[*********************100%%**********************]  1 of 1 completed

iShares MSCI Emerging Markets ETF : EEM
 ETF: EEM
|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------





| [30m1         | [30m-2.359e-0 | [30m43.99     | [30m0.3852    | [30m39.28     | [30m0.0006388 | [30m57.8      |
| [35m2         | [35m-3.026e-0 | [35m36.99     | [35m0.1174    | [35m44.65     | [35m0.000641  | [35m85.4      |
| [30m3         | [30m-9.932e-0 | [30m32.66     | [30m0.391     | [30m43.3      | [30m0.0002911 | [30m59.09     |
| [35m4         | [35m-1.179e-0 | [35m37.87     | [35m0.1913    | [35m30.99     | [35m0.0004888 | [35m64.56     |
| [30m5         | [30m-4.57e-06 | [30m51.58     | [30m0.1418    | [30m21.69     | [30m0.0004297 | [30m72.8      |
| [30m6         | [30m-1.345e-0 | [30m37.67     | [30m0.3047    | [30m31.22     | [30m0.0001749 | [30m64.26     |
| [30m7         | [30m-2.349e-0 | [30m37.87     | [30m0.1916    | [30m30.99     | [30m0.0008448 | [30m64.56     |
| [30m8         | [30m-9.852e-0 | [30m37.86     | [30m0.1869    | [30m30.99     | [30m0.0001    | [30m64.56     |
| [30m9         | [30m-1.088e-

| [30m48        | [30m-1.177e-0 | [30m44.46     | [30m0.2828    | [30m21.83     | [30m0.0002035 | [30m52.68     |
| [30m49        | [30m-1.153e-0 | [30m48.51     | [30m0.3082    | [30m19.2      | [30m0.0006049 | [30m70.61     |
| [30m50        | [30m-3.935e-0 | [30m39.45     | [30m0.2214    | [30m30.27     | [30m0.0002488 | [30m94.13     |
| [30m51        | [30m-3.878e-0 | [30m51.4      | [30m0.2479    | [30m40.9      | [30m0.0007134 | [30m57.55     |
| [30m52        | [30m-2.629e-0 | [30m34.67     | [30m0.3417    | [30m34.17     | [30m0.0009487 | [30m66.15     |
| [30m53        | [30m-4.583e-0 | [30m44.15     | [30m0.2836    | [30m46.45     | [30m0.0008462 | [30m55.53     |
| [30m54        | [30m-1.618e-0 | [30m39.73     | [30m0.3518    | [30m20.26     | [30m0.0003054 | [30m56.57     |
| [30m55        | [30m-1.711e-0 | [30m55.52     | [30m0.365     | [30m16.02     | [30m0.0002122 | [30m80.25     |
Time consumed for training: 293.

  naive_forecast = y_test.shift(1).fillna(method='bfill')
[*********************100%%**********************]  1 of 1 completed

SPDR Gold Shares : GLD
 ETF: GLD
|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------





| [30m1         | [30m-1.678e-0 | [30m43.99     | [30m0.3852    | [30m39.28     | [30m0.0006388 | [30m57.8      |
| [35m2         | [35m-1.402e-0 | [35m36.99     | [35m0.1174    | [35m44.65     | [35m0.000641  | [35m85.4      |
| [30m3         | [30m-8.59e-06 | [30m32.66     | [30m0.391     | [30m43.3      | [30m0.0002911 | [30m59.09     |
| [30m4         | [30m-3.93e-06 | [30m37.87     | [30m0.1913    | [30m30.99     | [30m0.0004888 | [30m64.56     |
| [30m5         | [30m-2.6e-06  | [30m51.58     | [30m0.1418    | [30m21.69     | [30m0.0004297 | [30m72.8      |
| [30m6         | [30m-1.461e-0 | [30m36.46     | [30m0.2708    | [30m44.72     | [30m0.0006248 | [30m85.9      |
| [30m7         | [30m-1.848e-0 | [30m52.34     | [30m0.1809    | [30m36.07     | [30m0.0005113 | [30m55.76     |
| [30m8         | [30m-1.042e-0 | [30m52.47     | [30m0.3878    | [30m44.98     | [30m0.0001695 | [30m63.89     |
| [30m9         | [30m-7.423e-

| [30m48        | [30m-2.192e-0 | [30m63.88     | [30m0.3167    | [30m49.45     | [30m0.0005446 | [30m96.11     |
| [30m49        | [30m-3.738e-0 | [30m45.95     | [30m0.3105    | [30m25.11     | [30m0.0009289 | [30m85.15     |
| [30m50        | [30m-9.023e-0 | [30m32.73     | [30m0.3334    | [30m12.11     | [30m0.0006436 | [30m50.62     |
| [30m51        | [30m-1.098e-0 | [30m39.14     | [30m0.2758    | [30m49.69     | [30m0.0001815 | [30m99.91     |
| [30m52        | [30m-9.782e-0 | [30m62.46     | [30m0.3516    | [30m11.41     | [30m0.0005922 | [30m51.3      |
| [30m53        | [30m-5.607e-0 | [30m56.92     | [30m0.2554    | [30m49.98     | [30m0.000391  | [30m85.67     |
| [30m54        | [30m-2.812e-0 | [30m33.1      | [30m0.2106    | [30m49.71     | [30m0.0004288 | [30m73.62     |
| [30m55        | [30m-1.582e-0 | [30m32.14     | [30m0.3442    | [30m26.86     | [30m0.0009891 | [30m82.72     |
Time consumed for training: 218.

  naive_forecast = y_test.shift(1).fillna(method='bfill')
[*********************100%%**********************]  1 of 1 completed

iShares Silver Trust : SLV
 ETF: SLV
|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------





| [30m1         | [30m-6.933e-0 | [30m43.99     | [30m0.3852    | [30m39.28     | [30m0.0006388 | [30m57.8      |
| [35m2         | [35m-1.949e-0 | [35m36.99     | [35m0.1174    | [35m44.65     | [35m0.000641  | [35m85.4      |
| [30m3         | [30m-3.599e-0 | [30m32.66     | [30m0.391     | [30m43.3      | [30m0.0002911 | [30m59.09     |
| [30m4         | [30m-3.351e-0 | [30m37.87     | [30m0.1913    | [30m30.99     | [30m0.0004888 | [30m64.56     |
| [35m5         | [35m-1.786e-0 | [35m51.58     | [35m0.1418    | [35m21.69     | [35m0.0004297 | [35m72.8      |
| [30m6         | [30m-2.439e-0 | [30m37.67     | [30m0.3047    | [30m31.22     | [30m0.0001749 | [30m64.26     |
| [30m7         | [30m-2.866e-0 | [30m51.58     | [30m0.1423    | [30m21.69     | [30m0.000814  | [30m72.8      |
| [30m8         | [30m-5.054e-0 | [30m51.57     | [30m0.1317    | [30m21.68     | [30m0.0001    | [30m72.79     |
| [30m9         | [30m-3.371e-

| [30m48        | [30m-1.232e-0 | [30m44.46     | [30m0.2828    | [30m21.83     | [30m0.0002035 | [30m52.68     |
| [30m49        | [30m-5.963e-0 | [30m48.51     | [30m0.3082    | [30m19.2      | [30m0.0006049 | [30m70.61     |
| [30m50        | [30m-8.051e-0 | [30m39.45     | [30m0.2214    | [30m30.27     | [30m0.0002488 | [30m94.13     |
| [30m51        | [30m-4.952e-0 | [30m51.4      | [30m0.2479    | [30m40.9      | [30m0.0007134 | [30m57.55     |
| [30m52        | [30m-1.1e-05  | [30m34.67     | [30m0.3417    | [30m34.17     | [30m0.0009487 | [30m66.15     |
| [30m53        | [30m-5.722e-0 | [30m44.15     | [30m0.2836    | [30m46.45     | [30m0.0008462 | [30m55.53     |
| [30m54        | [30m-3.834e-0 | [30m39.73     | [30m0.3518    | [30m20.26     | [30m0.0003054 | [30m56.57     |
| [30m55        | [30m-4.056e-0 | [30m55.52     | [30m0.365     | [30m16.02     | [30m0.0002122 | [30m80.25     |
Time consumed for training: 254.

  naive_forecast = y_test.shift(1).fillna(method='bfill')





[*********************100%%**********************]  1 of 1 completed

United States Oil Fund, LP : USO
 ETF: USO





|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-0.000124 | [30m43.99     | [30m0.3852    | [30m39.28     | [30m0.0006388 | [30m57.8      |


In [None]:
def calculate_sharpe_ratio(returns, annual_risk_free_rate=0.1,period='daily'):
    #excess_returns = rate_of_return(returns) - risk_free_rate
    
    # Convert annual risk-free rate to daily rate
    daily_risk_free_rate = (1 + annual_risk_free_rate) ** (1/252) - 1
    
    # Calculate mean daily log return
    mean_return = np.mean(returns)
    
    # Calculate excess daily log return
    excess_return = mean_return - daily_risk_free_rate
    
    # Calculate standard deviation of daily log returns
    std_return = np.std(returns)
    
    # Print diagnostic information
    #print(f"Mean Daily Log Return: {mean_return}")
    #print(f"Excess Daily Log Return: {excess_return}")
    #print(f"Standard Deviation of Daily Log Returns: {std_return}")
    
    # Check for zero standard deviation to avoid division by zero
    if std_return == 0:
        return 0
    
    # Calculate Sharpe ratio
    sharpe_ratio = (excess_return / std_return) * np.sqrt(252)  # Annualize the Sharpe ratio
    return sharpe_ratio
    #return excess_returns / np.std(returns)


def calculate_rachev_ratio(returns, lower_percentile=5, upper_percentile=95):
    # Step 1: Sort the returns
    sorted_returns = np.sort(returns)
    
    # Step 2: Determine the percentiles
    lower_threshold = np.percentile(sorted_returns, lower_percentile)
    upper_threshold = np.percentile(sorted_returns, upper_percentile)
    
    # Step 3: Calculate Expected Shortfall (ES)
    es = np.mean(sorted_returns[sorted_returns <= lower_threshold])
    
    # Step 4: Calculate Expected Gain (EG)
    eg = np.mean(sorted_returns[sorted_returns >= upper_threshold])
    
    # Step 5: Compute the Rachev Ratio
    rachev_ratio = eg / -es
    return rachev_ratio

def rate_of_return(returns):
    return ((returns[-1]-returns[0])/returns[0])*100



def calculate_excess_annualized_return(returns, risk_free_rate=0.1):
    compounded_growth = (1 + returns).prod()
    n_periods = returns.shape[0] / 252  # Assuming daily returns, convert to years
    annualized_return = compounded_growth ** (1 / n_periods) - 1
    excess_annualized_return = annualized_return - risk_free_rate
    return excess_annualized_return

def calculate_max_drawdown(returns):
    cumulative_returns = (1 + returns).cumprod()
    peak = pd.Series(cumulative_returns).cummax()
    drawdown = (cumulative_returns - peak) / peak
    max_drawdown = drawdown.min()
    return max_drawdown

def calculate_calmar_ratio(returns, risk_free_rate=0.1):
    excess_annualized_return = calculate_excess_annualized_return(returns, risk_free_rate)
    max_drawdown = calculate_max_drawdown(returns)
    
    # Ensure max_drawdown is positive for ratio calculation
    if max_drawdown == 0:
        return np.inf
    
    calmar_ratio = excess_annualized_return / abs(max_drawdown)
    return calmar_ratio

def calculate_annualized_return(returns, periods_per_year=252):
    compounded_growth = (1 + returns).prod()
    n_periods = len(returns)
    annualized_return = compounded_growth ** (periods_per_year / n_periods) - 1
    return annualized_return

def calculate_max_drawdown(returns):
    cumulative_returns = (1 + returns).cumprod()
    peak = pd.Series(cumulative_returns).cummax()
    drawdown = (cumulative_returns - peak) / peak
    max_drawdown = drawdown.min()
    return max_drawdown

def calculate_average_max_drawdown(returns, periods_per_year=252):
    # Check if the data is less than the period per year
    if len(returns) < periods_per_year:
        return calculate_max_drawdown(returns)
    
    # Split returns into years
    n_years = len(returns) // periods_per_year
    returns_per_year = np.array_split(returns, n_years)
    drawdowns = [calculate_max_drawdown(yearly_returns) for yearly_returns in returns_per_year]
    
    # Exclude the worst 10% of drawdowns
    threshold = np.percentile(drawdowns, 10)
    filtered_drawdowns = [d for d in drawdowns if d > threshold]
    
    average_max_drawdown = np.mean(filtered_drawdowns)
    return average_max_drawdown

def calculate_sterling_ratio(returns, periods_per_year=252):
    annualized_return = calculate_annualized_return(returns, periods_per_year)
    average_max_drawdown = calculate_average_max_drawdown(returns, periods_per_year)
    
    # Ensure average_max_drawdown is positive for ratio calculation
    penalty_constant=0.1
    if average_max_drawdown == 0:
        average_max_drawdown = 0.000001
    
    sterling_ratio = annualized_return / (abs(average_max_drawdown) + penalty_constant)
    return sterling_ratio

def calculate_volatility_clustering(returns):
    squared_returns = returns ** 2
    n = len(squared_returns)
    
    # Mean of squared returns
    mean_squared_returns = np.mean(squared_returns)
    
    # Calculate the numerator and denominator for autocorrelation at lag 1
    numerator = np.sum((squared_returns[:-1] - mean_squared_returns) * (squared_returns[1:] - mean_squared_returns))
    denominator = np.sum((squared_returns - mean_squared_returns) ** 2)
    
    if denominator == 0:
        return 0  # Avoid division by zero
    
    rho_1 = numerator / denominator
    return rho_1


In [None]:
def calculate_composite_score(
    forecasted_values, risk_percentage, rachev_ratio, sharpe_ratio, calmar_ratio, volatility_clustering, 
    mean_forecast, std_forecast, mean_rachev, std_rachev, mean_sharpe, std_sharpe, mean_calmar, std_calmar, 
    mean_volatility_clustering, std_volatility_clustering
):
    forecasted_mean = np.mean(forecasted_values)
    forecasted_mean_normalized = (forecasted_mean - mean_forecast) / std_forecast
    rachev_normalized = (rachev_ratio - mean_rachev) / std_rachev
    sharpe_normalized = (sharpe_ratio - mean_sharpe) / std_sharpe
    calmar_normalized = (calmar_ratio - mean_calmar) / std_calmar
    volatility_clustering_normalized = (volatility_clustering - mean_volatility_clustering) / std_volatility_clustering
    
    score = forecasted_mean_normalized - (risk_percentage * rachev_normalized) + sharpe_normalized + calmar_normalized - volatility_clustering_normalized
    return score


In [None]:
def process_etf_data(tickers, etf_dict):
    etf_pred_dict = {}

    # Determine the forecast periods dynamically from the etf_dict
    sample_etf = next(iter(etf_dict.values()))
    forecast_periods = [key.split('_')[-1] for key in sample_etf.keys() if key.startswith('forecast_predictions_df')]

    for etf_name in tickers:
        etf_pred_dict[etf_name] = {f"returns_{period}": etf_dict[etf_name][f"forecast_predictions_df_{period}"] for period in forecast_periods}

        for period in forecast_periods:
            returns = etf_pred_dict[etf_name][f"returns_{period}"]
            etf_pred_dict[etf_name][f"rachev_ratio_{period}"] = calculate_rachev_ratio(returns)
            etf_pred_dict[etf_name][f"sharpe_ratio_{period}"] = calculate_sharpe_ratio(returns)
            etf_pred_dict[etf_name][f"calmar_ratio_{period}"] = calculate_calmar_ratio(returns)
            etf_pred_dict[etf_name][f"sterling_ratio_{period}"] = calculate_sterling_ratio(returns)
            etf_pred_dict[etf_name][f"volatility_clustering_{period}"] = calculate_volatility_clustering(returns)
    
    return etf_pred_dict


In [None]:
def calculate_means_and_stds(etf_pred_dict, forecast_period, is_sterling=False):
    mean_forecast = np.mean([np.mean(etf_pred_dict[etf][f'returns_{forecast_period}']) for etf in etf_pred_dict])
    std_forecast = np.std([np.mean(etf_pred_dict[etf][f'returns_{forecast_period}']) for etf in etf_pred_dict])

    rachev_ratios = np.array([etf_pred_dict[etf][f'rachev_ratio_{forecast_period}'] for etf in etf_pred_dict])
    mean_rachev = np.mean(rachev_ratios)
    std_rachev = np.std(rachev_ratios)

    sharpe_ratios = np.array([etf_pred_dict[etf][f'sharpe_ratio_{forecast_period}'] for etf in etf_pred_dict])
    mean_sharpe = np.mean(sharpe_ratios)
    std_sharpe = np.std(sharpe_ratios)
    
    volatility_clustering = np.array([etf_pred_dict[etf][f'volatility_clustering_{forecast_period}'] for etf in etf_pred_dict])
    mean_volatility_clustering = np.mean(volatility_clustering)
    std_volatility_clustering = np.std(volatility_clustering)

    if is_sterling:
        print('Fetching Sterling ratios')
        streling_ratios = np.array([etf_pred_dict[etf][f'sterling_ratio_{forecast_period}'] for etf in etf_pred_dict])
        print(' Sterling ratio: ',streling_ratios)
        mean_sterling = np.mean(streling_ratios)
        std_sterling = np.std(streling_ratios)
        
        return (mean_forecast, std_forecast, mean_rachev, std_rachev, mean_sharpe, std_sharpe, mean_sterling, std_sterling, mean_volatility_clustering, std_volatility_clustering)


    print('Fetching Calmar ratios')

    calmar_ratios = np.array([etf_pred_dict[etf][f'calmar_ratio_{forecast_period}'] for etf in etf_pred_dict])
    mean_calmar = np.mean(calmar_ratios)
    std_calmar = np.std(calmar_ratios)

    

    return (mean_forecast, std_forecast, mean_rachev, std_rachev, mean_sharpe, std_sharpe, mean_calmar, std_calmar, mean_volatility_clustering, std_volatility_clustering)


In [None]:
def calculate_scores_for_etfs(etf_pred_dict, forecast_period, risk_percentage):
    
    is_sterling = True
    
    if is_sterling:
        #calculating for sterling
        (mean_forecast, std_forecast, mean_rachev, std_rachev, mean_sharpe, std_sharpe, mean_sterling, std_sterling, mean_volatility_clustering, std_volatility_clustering) = calculate_means_and_stds(etf_pred_dict, forecast_period, is_sterling)


    #calculating for calmar
    else:
       (mean_forecast, std_forecast, mean_rachev, std_rachev, mean_sharpe, std_sharpe, mean_calmar, std_calmar, mean_volatility_clustering, std_volatility_clustering) = calculate_means_and_stds(etf_pred_dict, forecast_period, is_sterling)

    
    scores = []
    for etf in etf_pred_dict:
        forecasted_values = etf_pred_dict[etf][f'returns_{forecast_period}']
        rachev_ratio = etf_pred_dict[etf][f'rachev_ratio_{forecast_period}']
        sharpe_ratio = etf_pred_dict[etf][f'sharpe_ratio_{forecast_period}']
        volatility_clustering = etf_pred_dict[etf][f'volatility_clustering_{forecast_period}']

        if is_sterling:
            sterling_ratio = etf_pred_dict[etf][f'sterling_ratio_{forecast_period}']
            print('Scores with Sterling Ratios')
            score = calculate_composite_score(forecasted_values, risk_percentage, rachev_ratio, sharpe_ratio, sterling_ratio, volatility_clustering, mean_forecast, std_forecast, mean_rachev, std_rachev, mean_sharpe, std_sharpe, mean_sterling, std_sterling, mean_volatility_clustering, std_volatility_clustering)

        else:
            calmar_ratio = etf_pred_dict[etf][f'calmar_ratio_{forecast_period}']
            print('Scores with Calmar Ratios')
            score = calculate_composite_score(forecasted_values, risk_percentage, rachev_ratio, sharpe_ratio, calmar_ratio, volatility_clustering, mean_forecast, std_forecast, mean_rachev, std_rachev, mean_sharpe, std_sharpe, mean_calmar, std_calmar, mean_volatility_clustering, std_volatility_clustering)

            

        scores.append({
            'ETF': etf,
            'Month': forecast_period,
            'RiskPercentage': risk_percentage,
            'Score': score
        })
    
    return scores


In [None]:
def main(tickers, etf_dict):
    etf_pred_dict = process_etf_data(tickers, etf_dict)
    
    risk_percentage = 0.10

    scores_1m = calculate_scores_for_etfs(etf_pred_dict, '1m', risk_percentage)
    scores_2m = calculate_scores_for_etfs(etf_pred_dict, '2m', risk_percentage)
    scores_3m = calculate_scores_for_etfs(etf_pred_dict, '3m', risk_percentage)
    scores_4m = calculate_scores_for_etfs(etf_pred_dict, '4m', risk_percentage)
    scores_5m = calculate_scores_for_etfs(etf_pred_dict, '5m', risk_percentage)
    
    for scores in [scores_1m, scores_2m, scores_3m, scores_4m, scores_5m]:
        for score in scores:
            print(score)

    return etf_pred_dict, scores_1m, scores_2m, scores_3m, scores_4m, scores_5m

# Example usage
# etf_pred_dict, scores_1m, scores_2m, scores_3m = main(tickers, etf_dict)


In [None]:
etf_pred_dict, scores_1m, scores_2m, scores_3m, scores_4m, scores_5m = main(tickers, etf_dict)

In [None]:
df_scores_1m = pd.DataFrame(scores_1m)
df_scores_2m = pd.DataFrame(scores_2m)
df_scores_3m = pd.DataFrame(scores_3m)
df_scores_4m = pd.DataFrame(scores_4m)
df_scores_5m = pd.DataFrame(scores_5m)

In [None]:
# Select the top 2 ETFs based on scores for 1-month forecast
top_etfs_1m = df_scores_1m.nlargest(2, 'Score')
print("Top 2 ETFs for 1-month forecast:")
print(top_etfs_1m)

# Select the top 2 ETFs based on scores for 2-month forecast
top_etfs_2m = df_scores_2m.nlargest(2, 'Score')
print("\nTop 2 ETFs for 2-month forecast:")
print(top_etfs_2m)

# Select the top 2 ETFs based on scores for 3-month forecast
top_etfs_3m = df_scores_3m.nlargest(2, 'Score')
print("\nTop 2 ETFs for 3-month forecast:")
print(top_etfs_3m)

# Select the top 2 ETFs based on scores for 4-month forecast
top_etfs_4m = df_scores_4m.nlargest(2, 'Score')
print("\nTop 2 ETFs for 4-month forecast:")
print(top_etfs_4m)

# Select the top 2 ETFs based on scores for 5-month forecast
top_etfs_5m = df_scores_5m.nlargest(2, 'Score')
print("\nTop 2 ETFs for 5-month forecast:")
print(top_etfs_5m)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
# Enhanced plot function
def plot_scores_line(df):
    plt.figure(figsize=(14, 8))
    sns.set(style="whitegrid")  # Set the style to white grid for better readability

    # Use a more appealing color palette
    palette = sns.color_palette("husl", len(df['ETF'].unique()))

    # Create the line plot
    sns.lineplot(data=df, x='Month', y='Score', hue='ETF', marker='o', palette=palette, linewidth=2.5)

    # Add a horizontal line at y=0
    plt.axhline(0, color='red', linestyle='--')

    # Enhance the title and labels
    plt.title('ETF Scores Over Different Months', fontsize=16, weight='bold')
    plt.xlabel('Month', fontsize=14)
    plt.ylabel('Score', fontsize=14)

    # Adjust the legend
    plt.legend(title='ETF', bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=12)
    
    # Highlight points and add annotations for key scores
    for etf in df['ETF'].unique():
        subset = df[df['ETF'] == etf]
        for x, y in zip(subset['Month'], subset['Score']):
            plt.text(x, y, f'{y:.2f}', fontsize=9, ha='center')

    plt.tight_layout()
    plt.show

In [None]:
df_scores_all = pd.concat([df_scores_1m, df_scores_2m, df_scores_3m, df_scores_4m, df_scores_5m])
month_order = ['1m', '2m', '3m', '4m', '5m']
df_scores_all['Month'] = pd.Categorical(df_scores_all['Month'], categories=month_order, ordered=True)

plot_scores_line(df_scores_all)


In [None]:
def select_top_etfs(df_scores, forecast_period):
    top_etfs = df_scores.nlargest(2, 'Score')
    print(f"Top 2 ETFs for {forecast_period}-month forecast:")
    print(top_etfs)
    return top_etfs['ETF'].tolist()


In [None]:
def gather_etf_data_for_months(tickers, month_ranges):
    etf_histories = {}
    for start_date, end_date in month_ranges:
        month = pd.to_datetime(start_date).strftime('%Y-%m')
        etf_histories[month] = {}
        for ticker in tickers:
            etf_data = yf.download(ticker, start=start_date, end=end_date)
            if etf_data.empty:
                print(f"No data found for {ticker} in {month}")
                continue
            etf_data.index = pd.to_datetime(etf_data.index)
            etf_histories[month][ticker] = etf_data
            #print(f"Data for {ticker} in {month} gathered.")
    return etf_histories


In [None]:
# Initialize ticker_shares dictionary at the start of the process
ticker_shares = {}

def initialize_shares_for_first_month(top_etfs_1m, etf_histories, month, investment_amount=50000):
    ticker_shares = {}
    first_trading_day_start = f'{month}-01'
    
    # Loop through each ETF in the top 2
    for ticker in top_etfs_1m:
        etf_history = etf_histories.get(ticker)
        
        if etf_history is not None:
            # Get the first business day of the month
            first_trading_day = pd.to_datetime(first_trading_day_start) + BDay(1)
            if first_trading_day not in etf_history.index:
                first_trading_day = etf_history.index[etf_history.index.searchsorted(first_trading_day)]
            
            # Extract the closing price on the first trading day
            price_on_first_trading_day = etf_history.loc[first_trading_day, 'Close']
            
            # Calculate the number of shares with the provided investment amount
            num_shares = (investment_amount * 0.975)/ price_on_first_trading_day
            print("Shares 1st month: (",investment_amount, " * 0.975)/ (", price_on_first_trading_day ,")")
            # Store the number of shares in the dictionary
            ticker_shares[ticker] = num_shares
            
            # Print the result for each top ETF
            print(f"Ticker: {ticker}, First trading day: {first_trading_day.date()}, Price: {price_on_first_trading_day}, Shares: {num_shares:.2f}")
        else:
            print(f"No data found for {ticker} in {month}")

    # Return the ticker shares dictionary for this month
    return ticker_shares



In [None]:
# Updated portfolio management function to fix the month format issue
def manage_etf_portfolio(top_etfs_previous, top_etfs_current, previous_month, current_month, ticker_shares, gathered_data_per_month):
    # Use the current month directly as it's already a string like '2024-02'
    etf_histories_for_current_month = gathered_data_per_month.get(current_month, {})

    # Extract ETFs as lists to preserve order
    top2etfs_previous = list(top_etfs_previous)
    top2etfs_current = list(top_etfs_current)

    print(f"Top 2 ETFs for {previous_month}: {top2etfs_previous}")
    print(f"Top 2 ETFs for {current_month}: {top2etfs_current}")

    # Step 1: Identify common ETFs (present in both months)
    etfs_common = [etf for etf in top2etfs_previous if etf in top2etfs_current]

    # Step 2: Identify ETFs to sell (present in the previous month but not in the current month)
    etfs_to_sell = [etf for etf in top2etfs_previous if etf not in top2etfs_current]

    # Step 3: Identify ETFs to buy (present in the current month but not in the previous month)
    etfs_to_buy = [etf for etf in top2etfs_current if etf not in top2etfs_previous]

    # Step 4: Sell ETFs that are no longer in the current top ETFs
    total_selling_value = 0
    for etf in etfs_to_sell:
        no_of_shares = ticker_shares.get(etf, 0)
        if no_of_shares > 0:
            # Check if data for the ETF is available in the current month
            if etf in etf_histories_for_current_month:
                first_trading_day_current_month = etf_histories_for_current_month[etf].loc[etf_histories_for_current_month[etf].index[0], 'Close']
                
                selling_value = no_of_shares * first_trading_day_current_month * 0.975
                print("Formula: ",no_of_shares," * ", first_trading_day_current_month,"* 0.975")
                total_selling_value += selling_value  # Sum total selling value
                print(f"Sell {etf}: {no_of_shares:.2f} shares at {first_trading_day_current_month:.2f}. Total value: {selling_value:.2f}  \n")
                del ticker_shares[etf]
            else:
                print(f"Data for {etf} is missing for {current_month}. Skipping sale.")
        else:
            print(f"No shares found for {etf} to sell.")

    # Step 5: Buy new ETFs that were not in the previous month's top ETFs
    for etf in etfs_to_buy:
        if total_selling_value > 0 and etf in etf_histories_for_current_month:
            first_trading_day_new_etf = etf_histories_for_current_month[etf].loc[etf_histories_for_current_month[etf].index[0], 'Close']
            new_shares = (total_selling_value * 0.975) / first_trading_day_new_etf
            print("Formula: ()",total_selling_value," * 0.975)/", first_trading_day_current_month)
            
            print(f"Buy {etf}: {new_shares:.2f} shares at {first_trading_day_new_etf:.2f}. \n")
            ticker_shares[etf] = new_shares
        else:
            print(f"Data for {etf} is missing or no selling value available. Skipping purchase of {etf}.")

    # Step 6: Maintain the order of ETFs in the ticker_shares dictionary based on the current top 2 ETFs
    ordered_ticker_shares = {etf: ticker_shares[etf] for etf in top2etfs_current if etf in ticker_shares}

    print(f"Updated ticker shares after {current_month}: {ordered_ticker_shares}")
    print("")
    return ordered_ticker_shares



In [None]:
# Define month ranges for data gathering
month_ranges = [
    ('2024-01-01', '2024-01-31'),
    ('2024-02-01', '2024-02-29'),
    ('2024-03-01', '2024-03-31'),
    ('2024-04-01', '2024-04-30'),
    ('2024-05-01', '2024-05-31')
]

# Step 1: Select Top ETFs for each forecast period
top_etfs_1m = select_top_etfs(df_scores_1m, 1)
top_etfs_2m = select_top_etfs(df_scores_2m, 2)
top_etfs_3m = select_top_etfs(df_scores_3m, 3)
top_etfs_4m = select_top_etfs(df_scores_4m, 4)
top_etfs_5m = select_top_etfs(df_scores_5m, 5)

# Step 2: Gather historical data for all months
#tickers = list(set(top_etfs_1m + top_etfs_2m + top_etfs_3m + top_etfs_4m))
tickers = list(set(top_etfs_1m + top_etfs_2m + top_etfs_3m + top_etfs_4m + top_etfs_5m))

etf_histories = gather_etf_data_for_months(tickers, month_ranges)

# Verify gathered data
#for month, data in etf_histories.items():
#    print(f"Data for {month}: {data.keys()}")  # Should print tickers for each month

# Step 3: Perform portfolio optimization for each month
# Start with the first month's portfolio
ticker_shares = {}
ticker_shares_per_month = {}

# For January
ticker_shares_1m = initialize_shares_for_first_month(top_etfs_1m, etf_histories['2024-01'], '2024-01', investment_amount=50000)
ticker_shares_per_month['2024-01'] = ticker_shares_1m.copy()

# For February
ticker_shares_2m = manage_etf_portfolio(top_etfs_1m, top_etfs_2m, '2024-01', '2024-02', ticker_shares_1m, etf_histories)
ticker_shares_per_month['2024-02'] = ticker_shares_2m.copy()

# For March
ticker_shares_3m = manage_etf_portfolio(top_etfs_2m, top_etfs_3m, '2024-02', '2024-03', ticker_shares_2m, etf_histories)
ticker_shares_per_month['2024-03'] = ticker_shares_3m.copy()

# For April
ticker_shares_4m = manage_etf_portfolio(top_etfs_3m, top_etfs_4m, '2024-03', '2024-04', ticker_shares_3m, etf_histories)
ticker_shares_per_month['2024-04'] = ticker_shares_4m.copy()

# For May
ticker_shares_5m = manage_etf_portfolio(top_etfs_4m, top_etfs_5m, '2024-04', '2024-05', ticker_shares_4m, etf_histories)
ticker_shares_per_month['2024-05'] = ticker_shares_5m.copy()

# Debug output for each month's portfolio
for month, shares in ticker_shares_per_month.items():
    print(f"Ticker shares after {month}: {shares}")


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.animation as animation
from IPython.display import HTML

# Enhanced plot function with animation and dynamic y-axis
def animate_scores(df):
    fig, ax = plt.subplots(figsize=(10, 6))
    sns.set(style="whitegrid")
    palette = sns.color_palette("husl", len(df['ETF'].unique()))

    # Prepare an empty lineplot for each ETF
    lines = {etf: ax.plot([], [], marker='o', label=etf, color=color, lw=2.5)[0] 
             for etf, color in zip(df['ETF'].unique(), palette)}
    
    # Set up the plot title, labels, and legend
    ax.set_title('ETF Scores Over Different Months', fontsize=16, weight='bold')
    ax.set_xlabel('Month', fontsize=14)
    ax.set_ylabel('Score', fontsize=14)
    ax.axhline(0, color='red', linestyle='--')
    ax.legend(title='ETF', bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=12)

    # Dynamically calculate the y-axis limits
    y_min = df['Score'].min() * 1.1
    y_max = df['Score'].max() * 1.1

    # Set dynamic y-axis limits
    ax.set_xlim(1, 5)
    ax.set_xticks([1, 2, 3, 4, 5])

    # Set the x-axis tick labels to three-letter month abbreviations
    month_labels = ['Jan', 'Feb', 'Mar', 'Apr', 'May']  # Customize this list based on your data
    ax.set_xticklabels(month_labels)

    ax.set_ylim(y_min, y_max)  # Dynamically set y-limits based on the data

    # Function to update the plot frame by frame
    def update(frame):
        if 1 <= frame + 1 <= 5:
            current_month = month_order[frame]
            print(f"Processing frame: {frame+1}, month: {current_month}")
            subset = df[df['Month'] == current_month]
            
            for etf in subset['ETF'].unique():
                etf_data = subset[subset['ETF'] == etf]
                # Update x and y data for each ETF
                x_data = [month_order.index(month) + 1 for month in df[df['ETF'] == etf]['Month'][:frame+1]]
                y_data = df[df['ETF'] == etf]['Score'][:frame+1]
                lines[etf].set_data(x_data, y_data)

            return list(lines.values())
        else:
            print(f"Frame {frame+1} exceeds the available months.")
            return []

    ani = animation.FuncAnimation(fig, update, frames=len(month_order), interval=1000, blit=False)

    plt.tight_layout()  # Make sure all elements fit without overlapping
    
    return ani

# Prepare the data and order the months
df_scores_all = pd.concat([df_scores_1m, df_scores_2m, df_scores_3m, df_scores_4m, df_scores_5m])
month_order = ['1m', '2m', '3m', '4m', '5m']
df_scores_all['Month'] = pd.Categorical(df_scores_all['Month'], categories=month_order, ordered=True)

# Call the animate function
anim = animate_scores(df_scores_all)

# Display the animation in the notebook
HTML(anim.to_jshtml())
