In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from tabulate import tabulate
import pandas_ta as ta
from sklearn.preprocessing import StandardScaler,MinMaxScaler,Normalizer

import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers.legacy import Adam as LegacyAdam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from skopt import gp_minimize
from skopt.space import Real, Integer
from sklearn.metrics import mean_squared_error
from bayes_opt import BayesianOptimization

import tensorflow as tf
from tensorflow.keras.optimizers.legacy import Adam
import time

from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, BatchNormalization, Input, Bidirectional

import tensorflow as tf
from keras.layers import Attention


from pandas.tseries.offsets import MonthEnd

from pandas.tseries.offsets import BDay, Week

pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows', None)

from datetime import datetime, timedelta

In [2]:
#tickers = ['SPY', 'QQQ', 'VTI', 'IWM', 'EFA', 'EEM', 'GLD', 'SLV', 'USO', 'XLF']
#tickers = ['SPY', 'QQQ', 'VTI']
#tickers = ['SPY']

#start_date, end_date = '2000-01-01', '2024-05-01'
#train_start_date, train_end_date, test_start_date, test_end_date, prediction_start_date = start_date,'2014-01-01', '2014-01-01', '2024-01-01', '2024-01-01'
#prediction_dates=['2024-01-01','2024-02-01','2024-03-01', '2024-04-01']

In [3]:
def data_loading(ticker_symbol, start_date, end_date):
    # Fetch the ETF data from Yahoo Finance for the period from 2010-01-01 to 2024-01-01
    etf_data = yf.Ticker(ticker_symbol)
    etf_history = etf_data.history(start=start_date, end=end_date)
    etf_history.index = etf_history.index.tz_localize(None)
    return etf_history, etf_data
    
#etf_history,etf_data = data_loading('SPY', start_date, end_date)
#etf_history.head(10)

In [4]:
#etf_history.tail(10)

In [5]:
#Function to derive the predictor columns
def etf_predictors(etf_history,  start_date, end_date, etf_data, benchmark_ticker = '^GSPC' ):
    
    # Calculate Daily Returns
    etf_history['Daily Return'] = etf_history['Close'].pct_change()

    # Calculate 21-Day Volatility (standard deviation of daily returns, annualized)
    etf_history['Volatility'] = etf_history['Daily Return'].rolling(window=21).std() * np.sqrt(252)
    #etf_history['Volatility_ta'] = ta.volatility(etf_history['Close'], window=21, annualize=True)
   
    # Get the ETF info
    etf_info = etf_data.info

    # Retrieve the net asset value price (NAV) and total net assets
    nav_price = etf_info['navPrice']
    total_assets = etf_info['totalAssets']

    # Calculate the number of shares outstanding
    shares_outstanding = total_assets / nav_price

    # Calculate Market Capitalization for each day
    # Market Capitalization = Closing Price * Total Number of Shares Outstanding
    etf_history['Market Cap'] = etf_history['Close'] * shares_outstanding

    total_assets = etf_info['totalAssets']
    total_liabilities = etf_info.get('totalLiabilities', 0)  # Handle the case where total liabilities might not be present
    
    # Calculate Book Value per Share
    book_value_per_share = (total_assets - total_liabilities) / shares_outstanding
    
    # Calculate Price to Book (P/B) Ratio
    etf_history['P/B Ratio'] = etf_history['Close'] / book_value_per_share
    
    # Calculate 1-Month Momentum (21 trading days)
    
    etf_history['Momentum'] = ta.mom(etf_history['Close'], length=21)
    
    benchmark_data = yf.download(benchmark_ticker, start=start_date, end=end_date)
    benchmark_data['Daily Return'] = benchmark_data['Close'].pct_change()
    #benchmark_data_history = benchmark_data.dropna()
    benchmark_data.head(10)
    
    # Ensure the indices are time zone-naive
    benchmark_data.index = benchmark_data.index.tz_localize(None)
    combined_data = etf_history[['Close']].join(benchmark_data[['Close']], lsuffix='_ETF', rsuffix='_Benchmark')
    combined_data = combined_data.dropna()
    
    # Calculate rolling beta with a 30-day window
    rolling_beta_21 = rolling_beta(combined_data, window=21)

    # Add the rolling beta to the dataframe
    combined_data.loc[:, 'Rolling Beta 21-day'] = rolling_beta_21
    etf_history['Rolling Beta']=combined_data['Rolling Beta 21-day']
    
    # Calculate daily profitability
    daily_profitabilities = []
    previous_nav = None
    for index, row in etf_history.iterrows():
        current_nav = row['Close']  # Current day's NAV
        #print(current_nav)
        if previous_nav is not None:
            daily_profitability = (current_nav - previous_nav) / previous_nav * 100
            daily_profitabilities.append(daily_profitability)
        else:
            daily_profitabilities.append(None)
        previous_nav = current_nav  # Update previous_nav for the next iteration

    # Add daily profitabilities to ETF dataset
    etf_history['Daily Profitability (%)'] = daily_profitabilities
    
    # Calculate the dividend yield for each day
    dividend_yields = []

    for index, row in etf_history.iterrows():
        # Get the dividend payment for the day
        dividend_payment = row['Dividends']

        # Get the current market price of the ETF for the day
        current_price = row['Close']

        # Calculate the dividend yield for the day
        dividend_yield = (dividend_payment / current_price) * 100

        # Append the dividend yield to the list
        dividend_yields.append(dividend_yield)
      
    etf_history['Div yield'] = dividend_yields

    #Volatility
    etf_history['ATR'] = ta.atr(etf_history['High'], etf_history['Low'], etf_history['Close'], length=21)    
    
    # Compute the Relative Volatility Index (RVI)
    rvi = ta.rvi(etf_history['Close'],length=21)
    etf_history['RVI'] = rvi
    
    #Momentum
    rsi_window = 14  # Window size for RSI calculation
    roc_window = 12  # Window size for ROC calculation
    #rsi_window = 21  
    #roc_window = 21  

    

    etf_history['RSI'] = ta.rsi(etf_history['Close'],length=rsi_window)
    # Calculate Rate of Change (ROC)
    etf_history['ROC'] = ta.roc(etf_history['Close'], length=roc_window)
        
    etf_history['log_returns'] = np.log(etf_history['Close'] / etf_history['Close'].shift(1))

    return etf_history

# Function to calculate rolling beta
def rolling_beta(df, window):
    rolling_cov = df['Close_Benchmark'].rolling(window=window).cov(df['Close_ETF'])
    rolling_var = df['Close_Benchmark'].rolling(window=window).var()
    rolling_beta = rolling_cov / rolling_var
    return rolling_beta



In [6]:
#etf_history = etf_predictors(etf_history,  start_date, end_date, etf_data,benchmark_ticker = '^GSPC' )
#etf_history.head(10)

In [7]:
def pre_processing(etf_history, train_start_date, train_end_date, test_start_date, test_end_date, prediction_dates, feature_columns=None, scaling_strategy='StandardScaler', final_end_date='2024-12-01'):
    # Handle missing values and inf replacements
    etf_history.fillna(etf_history.median(), inplace=True)
    etf_history = etf_history.replace(-np.inf, 0)

    # Default feature columns if none are provided
    if feature_columns is None:
        feature_columns = ['Volatility', 'Volume', 'Daily Return', 'Market Cap', 'P/B Ratio', 'Momentum', 
                           'Rolling Beta', 'Daily Profitability (%)', 'ATR', 'RVI', 'RSI', 'ROC']

    # Selecting the features (X) and the target (y)
    X = etf_history.loc[:, feature_columns]
    y = etf_history['log_returns']

    # Split train and test data
    train_data = X.loc[train_start_date:train_end_date]
    test_data = X.loc[test_start_date:test_end_date]
    y_train = y.loc[train_start_date:train_end_date]
    y_test = y.loc[test_start_date:test_end_date]

    # Scaling strategy based on input parameter
    if scaling_strategy == 'StandardScaler':
        scaler = StandardScaler()
    elif scaling_strategy == 'Normalizer':
        scaler = Normalizer()
    else:
        raise ValueError(f"Unsupported scaling strategy: {scaling_strategy}")
    
    # Scaling the train and test data
    train_data_scaled = scaler.fit_transform(train_data)
    test_data_scaled = scaler.transform(test_data)

    # Create a dictionary to store forecast data for each weekly prediction period
    forecast_data = {}
    for i, start_date in enumerate(prediction_dates):
        # Determine the end date for each forecast week
        if i < len(prediction_dates) - 1:
            end_date = (pd.to_datetime(start_date) + Week(1) - pd.Timedelta(days=1)).strftime('%Y-%m-%d')
        else:
            end_date = final_end_date  # The final end date provided or default

        # Store the forecast data for each week
        forecast_data[f'forecast_data_{i+1}w'] = X.loc[start_date:end_date]

    # Scale the forecast data dynamically
    forecast_data_scaled = {}
    for period_key, period_data in forecast_data.items():
        if not period_data.empty:
            forecast_data_scaled[period_key] = sm.add_constant(scaler.transform(period_data))

    # Add constant to scaled train and test data
    train_data_scaled = sm.add_constant(train_data_scaled)
    test_data_scaled = sm.add_constant(test_data_scaled)

    #print(f"Weekly forecast data keys: {forecast_data.keys()}")

    # Dynamically return the scaled forecast data along with train and test data
    return {
        'train_data_scaled': train_data_scaled,
        'test_data_scaled': test_data_scaled,
        'y_train': y_train,
        'y_test': y_test,
        'scaler': scaler,
        'X': X,
        'etf_history': etf_history,
        'forecast_data_scaled': forecast_data_scaled,
        'forecast_data': forecast_data
    }

In [8]:
#train_data_scaled, test_data_scaled, y_train, y_test, scaler, X, etf_history, forecast_data_scaled_1m, forecast_data_scaled_2m, forecast_data_scaled_3m, forecast_data_scaled_4m, forecast_data_1m, forecast_data_2m, forecast_data_3m, forecast_data_4m = pre_processing(etf_history, train_start_date, train_end_date, test_start_date, test_end_date, prediction_start_date, prediction_dates)

In [9]:
def build_and_train_lstm_model(epochs, batch_size, units, dropout_rate, learning_rate, train_data_scaled, y_train, validation_split=0.2):
    train_data_reshaped = reshape_for_lstm(train_data_scaled)
    
    model = Sequential()
    
    # First LSTM layer
    model.add(LSTM(units=int(units), return_sequences=True, input_shape=(train_data_reshaped.shape[1], train_data_reshaped.shape[2])))
    model.add(Dropout(dropout_rate))
    
    # Second LSTM layer with return_sequences=False
    model.add(LSTM(units=int(units), return_sequences=False))
    model.add(Dropout(dropout_rate))
    
    # Adding a Dense hidden layer with ReLU activation
    model.add(Dense(units=int(units/2), activation='relu'))
    model.add(Dropout(dropout_rate))
    
    # Output layer
    model.add(Dense(1))
    
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mean_squared_error')
    
    # Early stopping to prevent overfitting
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    
     # Track the start time
    #start_time = time.time()
    
    model.fit(train_data_reshaped, y_train, 
              epochs=int(epochs), 
              batch_size=int(batch_size), 
              validation_split=validation_split, 
              callbacks=[early_stopping],
              verbose=0)
    
    # Calculate time consumed
    #time_consumed = time.time() - start_time
    
    return model


"""
def optimize_hyperparameters(train_data_scaled, y_train, test_data_scaled, y_test):
    def objective_function(epochs, batch_size, units, dropout_rate, learning_rate):
        model = build_and_train_lstm_model(epochs, batch_size, units, dropout_rate, learning_rate, train_data_scaled, y_train)
        test_data_reshaped = reshape_for_lstm(test_data_scaled)
        test_predictions = make_predictions(model, test_data_reshaped)
        mse = mean_squared_error(y_test, test_predictions)
        return -mse  # We return the negative MSE because Bayesian Optimization tries to maximize the function

    # Reduced parameter ranges for faster search
    param_bounds = {
        'epochs': (10, 30),  # Reduce max epochs
        'batch_size': (16, 64),  # Widen batch size range for exploration
        'units': (32, 128),  # Expand LSTM unit range
        'dropout_rate': (0.2, 0.5),  # Adjust dropout for robustness
        'learning_rate': (1e-4, 5e-3)  # Widen learning rate range for exploration
    }
    
    optimizer = BayesianOptimization(
        f=objective_function,
        pbounds=param_bounds,
        verbose=2,
        random_state=42,
    )

    # Reduced the number of iterations for faster optimization
    optimizer.maximize(init_points=15, n_iter=50)  # Fewer initial points and iterations

    best_params = optimizer.max['params']
    return best_params
"""

"\ndef optimize_hyperparameters(train_data_scaled, y_train, test_data_scaled, y_test):\n    def objective_function(epochs, batch_size, units, dropout_rate, learning_rate):\n        model = build_and_train_lstm_model(epochs, batch_size, units, dropout_rate, learning_rate, train_data_scaled, y_train)\n        test_data_reshaped = reshape_for_lstm(test_data_scaled)\n        test_predictions = make_predictions(model, test_data_reshaped)\n        mse = mean_squared_error(y_test, test_predictions)\n        return -mse  # We return the negative MSE because Bayesian Optimization tries to maximize the function\n\n    # Reduced parameter ranges for faster search\n    param_bounds = {\n        'epochs': (10, 30),  # Reduce max epochs\n        'batch_size': (16, 64),  # Widen batch size range for exploration\n        'units': (32, 128),  # Expand LSTM unit range\n        'dropout_rate': (0.2, 0.5),  # Adjust dropout for robustness\n        'learning_rate': (1e-4, 5e-3)  # Widen learning rate range

In [10]:
def optimize_hyperparameters(train_data_scaled, y_train, test_data_scaled, y_test):
    def objective_function(epochs, batch_size, units, dropout_rate, learning_rate):
        # Build and train the LSTM model
        model = build_and_train_lstm_model(epochs, batch_size, units, dropout_rate, learning_rate, train_data_scaled, y_train)
        test_data_reshaped = reshape_for_lstm(test_data_scaled)
        test_predictions = make_predictions(model, test_data_reshaped)
        mse = mean_squared_error(y_test, test_predictions)
        return -mse  # Negative MSE because we maximize in Bayesian Optimization

    # Define parameter bounds
    """param_bounds = {
        
        'batch_size': (16, 64),
        'dropout_rate': (0.2, 0.5),
        'epochs': (10, 30),
        'learning_rate': (1e-4, 5e-3),
        'units': (32, 128),
    }"""
    param_bounds = {
        
        'batch_size': (16, 64),
        'dropout_rate': (0.1, 0.5),
        'epochs': (10, 50),
        'learning_rate': (1e-4, 5e-3),
        'units': (32, 128),
    }#change in bounds

    # Initialize Bayesian optimizer
    optimizer = BayesianOptimization(
        f=objective_function,
        pbounds=param_bounds,
        verbose=2,
        random_state=42,
    )

    # Manual initialization points
    manual_init_points = [
        {"batch_size":36.99,"dropout_rate":0.1174,"epochs":44.65,"learning_rate":0.000641,"units":85.4},
        {"batch_size":54.8,"dropout_rate":0.2914,"epochs":11.95,"learning_rate":0.003453,"units":74.25},
        {"batch_size":44.44,"dropout_rate":0.1814,"epochs":43.15,"learning_rate":0.0004211,"units":64.05},
        {"batch_size":63.03,"dropout_rate":0.3325,"epochs":47.58,"learning_rate":0.0009053,"units":79.89},
        {"batch_size":51.58,"dropout_rate":0.1418,"epochs":21.69,"learning_rate":0.0004297,"units":72.8},
        {"batch_size":51.44,"dropout_rate":0.1512,"epochs":12.6,"learning_rate":0.000954,"units":98.28},
        {"batch_size":49.37,"dropout_rate":0.1423,"epochs":42.09,"learning_rate":0.0001671,"units":99.34},
        {"batch_size":36.99,"dropout_rate":0.1174,"epochs":44.65,"learning_rate":0.000641,"units":85.4},
        {"batch_size":37.87,"dropout_rate":0.1913,"epochs":30.99,"learning_rate":0.0004888,"units":64.56},
        {"batch_size":51.58,"dropout_rate":0.1418,"epochs":21.69,"learning_rate":0.0004297,"units":72.8},
        
        
        #{"batch_size":51.44,"dropout_rate":0.1512,"epochs":12.6,"learning_rate":0.000954,"units":98.28},
        #{"batch_size":56.71,"dropout_rate":0.1596,"epochs":10.22,"learning_rate":0.0008339,"units":85.34},
        #{"batch_size":63.03,"dropout_rate":0.3325,"epochs":47.58,"learning_rate":0.0009053,"units":79.89},
        #{"batch_size":51.58,"dropout_rate":0.1418,"epochs":21.69,"learning_rate":0.0004297,"units":72.8},
        #{"batch_size":36.99,"dropout_rate":0.1174,"epochs":44.65,"learning_rate":0.000641,"units":85.4},
        
        {"batch_size":54.8,"dropout_rate":0.2914,"epochs":11.95,"learning_rate":0.003453,"units":74.25},
        {"batch_size":23.49,"dropout_rate":0.2174,"epochs":27.32,"learning_rate":0.003045,"units":99.97},
        {"batch_size":54.8,"dropout_rate":0.2914,"epochs":11.95,"learning_rate":0.003453,"units":74.25},
        {"batch_size":62.54,"dropout_rate":0.4325,"epochs":28.79,"learning_rate":0.004485,"units":89.4},
        {"batch_size":16.99,"dropout_rate":0.491,"epochs":26.65,"learning_rate":0.00114,"units":49.46}
    ]

    # Add manual initialization points
    for point in manual_init_points:
        optimizer.probe(params=point, lazy=False)

    # Run the optimization process
    n_iter = 50  # Number of additional iterations
    optimizer.maximize(init_points=0, n_iter=n_iter)  # Set init_points=0 since we added manual points

    # Display initialization points results
    print("\nInitialization Points Results:")
    for i, result in enumerate(optimizer.res[:len(manual_init_points)]):
        print(f"Point {i + 1}: {result}")

    # Return the best parameters found
    best_params = optimizer.max['params']
    return best_params


In [11]:
def make_predictions(model, data_scaled):
    if len(data_scaled.shape) == 2:
        data_scaled = np.reshape(data_scaled, (data_scaled.shape[0], 1, data_scaled.shape[1]))
    elif len(data_scaled.shape) != 3:
        raise ValueError(f"Unexpected shape for input data: {data_scaled.shape}")
    
    predictions = model.predict(data_scaled)
    return predictions

def eval_model(best_model, test_data_scaled, y_test, y_train=None):
    # Reshape test data to match LSTM input requirements
    test_data_reshaped = test_data_scaled.reshape((test_data_scaled.shape[0], 1, test_data_scaled.shape[1]))
    
    # Make predictions
    test_predictions = best_model.predict(test_data_reshaped)
    
    # Calculate Mean Absolute Error (MAE)
    mae = mean_absolute_error(y_test, test_predictions)
    
    # Calculate Mean Absolute Scaled Error (MASE) - Example calculation assuming seasonal period m=1
    naive_forecast = y_test.shift(1).fillna(method='bfill')
    mase = mae / mean_absolute_error(y_test, naive_forecast)
    print("mae: ",mae)
    print("mase: ",mase)
    return mae, mase


In [12]:
# Ensured consistent feature handling and forecasting in predictions
def predictions(model, forecast_data_scaled, forecast_data):
    # Make predictions using the model
    predictions_forecast = make_predictions(model, forecast_data_scaled)
    
    # Create a DataFrame for the predictions with the same index as the forecast data
    forecast_predictions_df = pd.DataFrame(predictions_forecast, columns=["log_returns"], index=forecast_data.index)
    
    # Concatenate the original forecast data with the predictions
    forecast_data_with_predictions = pd.concat([forecast_data, forecast_predictions_df], axis=1)
    
    return forecast_predictions_df, forecast_data_with_predictions

In [13]:
def calculate_mase(y_true, y_pred, naive_forecast):
    # Calculate the MAE of the model's predictions
    mae_model = mean_absolute_error(y_true, y_pred)
    
    # Calculate the MAE of the naive forecast
    mae_naive = mean_absolute_error(y_true, naive_forecast)
    
    # Calculate MASE
    mase = mae_model / mae_naive
    return mase

In [14]:
def reshape_for_lstm(data):
    return np.reshape(data, (data.shape[0], 1, data.shape[1]))  # Reshape into (samples, time_steps, features)


In [15]:
#etf_pred_dict, scores_1m, scores_2m, scores_3m, scores_4m = main(tickers, etf_dict)

In [16]:
#tickers = ['SPY', 'QQQ', 'VTI', 'IWM', 'EFA', 'EEM', 'GLD', 'SLV', 'USO', 'XLF']
#tickers = ['SMH', 'SOXX', 'PSI']

tickers = ['SMH', 'SOXX', 'PSI', 'XSD', 'IYW', 'XLK', 'VGT', 'FTEC', 'IGM', 'IXN', 
           #'FNGU','USD', 'FNGO', 'GBTC', 'ETHE', 'TECL', 'FNGS', 'TQQQ', 'ROM', 'QLD' No data available from 2000: 2019
          ]

#tickers = ['SPY', 'QQQ', 'VTI']
#tickers = ['SPY']
                                                           
start_date, end_date = '2000-01-01', '2024-12-01'
train_start_date, train_end_date, test_start_date, test_end_date, prediction_start_date = start_date,'2014-01-01', '2014-01-01', '2024-01-01', '2024-01-01'
#prediction_dates=['2024-01-01','2024-02-01','2024-03-01', '2024-04-01','2024-05-01','2024-06-01','2024-07-01','2024-08-01','2024-09-01','2024-10-01','2024-11-01']

In [17]:
#currently timeperiod is set to 48 weeks
prediction_dates = pd.date_range(start='2024-01-01', 
                                 periods=48, 
                                 freq='W-MON').strftime('%Y-%m-%d').tolist()


In [18]:
dict_data = {}
etf_dict = {}
data_with_predictors = []
for i in tickers:
    etf_history,etf_data = data_loading(i, start_date, end_date)
    
    print(etf_data.info.get('longName'), ":",i)
    
    #test_stationarity(etf_history)
    print(" ETF:",i)
    #print(etf_history.head(10))
    #print("################")
    #hist_data.append(etf_history)
    
    etf_history = etf_predictors(etf_history,  start_date, end_date, etf_data,benchmark_ticker = '^GSPC' )
    
    # Call the function
    preprocessed_data = pre_processing(etf_history, train_start_date, train_end_date, test_start_date, test_end_date, prediction_dates)

    # Extract individual components from the returned dictionary
    train_data_scaled = preprocessed_data['train_data_scaled']
    test_data_scaled = preprocessed_data['test_data_scaled']
    y_train = preprocessed_data['y_train']
    y_test = preprocessed_data['y_test']
    scaler = preprocessed_data['scaler']
    X = preprocessed_data['X']
    etf_history = preprocessed_data['etf_history']

    
    
    # Track the start time
    start_time = time.time()
    
    
    # Optimize hyperparameters
    # Optimize hyperparameters
    best_params = optimize_hyperparameters(train_data_scaled, y_train, test_data_scaled, y_test)
    
    # Extract the best hyperparameters
    best_epochs = int(best_params['epochs'])
    best_batch_size = int(best_params['batch_size'])
    best_units = int(best_params['units'])
    best_dropout_rate = best_params['dropout_rate']
    best_learning_rate = best_params['learning_rate']

    # Train the final model using the best hyperparameters
    best_model = build_and_train_lstm_model(
        epochs=best_epochs, 
        batch_size=best_batch_size, 
        units=best_units, 
        dropout_rate=best_dropout_rate, 
        learning_rate=best_learning_rate, 
        train_data_scaled=train_data_scaled, 
        y_train=y_train
    )
    
     
    
    # Calculate time consumed
    time_consumed = time.time() - start_time
    
    # Print the time consumed
    print(f"Time consumed for training: {time_consumed:.2f} seconds")

    # Evaluate the model
    mae, mase = eval_model(best_model, test_data_scaled, y_test)
    
    # Initialize dictionaries for forecast data and predictions
    # Step 5: Initialize forecast data storage
    forecast_predictions = {}
    forecast_data_dict = {}
    forecast_data_scaled_dict = {}

    # Step 6: Fetch weekly forecast data and generate predictions
    for week in range(1, 49):  # Generate weekly forecasts for up to 52 weeks
        week_key = f'forecast_data_{week}w'
        if week_key in preprocessed_data['forecast_data']:
            forecast_data = preprocessed_data['forecast_data'][week_key]
            forecast_data_scaled = preprocessed_data['forecast_data_scaled'].get(week_key)

            if forecast_data is not None and forecast_data_scaled is not None:
                try:
                    forecast_predictions_df, _ = predictions(
                        best_model, forecast_data_scaled, forecast_data
                    )
                    # Store predictions and forecast data
                    forecast_predictions[f'forecast_predictions_df_{week}w'] = forecast_predictions_df['log_returns'].values
                    forecast_data_dict[week_key] = forecast_data
                    forecast_data_scaled_dict[week_key] = forecast_data_scaled
                except Exception as e:
                    print(f"Error generating predictions for {week_key} for ETF {i}: {e}")
            else:
                print(f"Warning: Missing scaled data for {week_key} for ETF {i}")
        else:
            print(f"Warning: Missing forecast key {week_key} for ETF {i}")

    # Step 7: Construct `dict_data` for the current ETF
    dict_data = {
        "etf_history": etf_history,
        "X": X,
        "y_train_values": y_train,
        "model results": {"mae": mae, "mase": mase},
        **forecast_data_dict,  # Add raw forecast data
        **forecast_data_scaled_dict,  # Add scaled forecast data
        **forecast_predictions  # Add predictions
    }

    # Step 8: Store in the main `etf_dict`
    etf_dict[i] = dict_data

# Print completion message
print("Weekly forecast data and predictions stored successfully.")

VanEck Semiconductor ETF : SMH
 ETF: SMH


[*********************100%%**********************]  1 of 1 completed


|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-4.628e-0 | [30m37.98     | [30m0.1307    | [30m41.6      | [30m0.002968  | [30m78.98     |
| [30m2         | [30m-8.562e-0 | [30m37.87     | [30m0.4197    | [30m44.3      | [30m0.004186  | [30m84.55     |
| [30m3         | [30m-0.000115 | [30m36.04     | [30m0.1243    | [30m43.86     | [30m0.0003247 | [30m85.98     |
| [30m4         | [30m-6.636e-0 | [30m23.32     | [30m0.193     | [30m27.54     | [30m0.0014    | [30m100.3     |
| [30m5         | [30m-7.857e-0 | [30m38.91     | [30m0.344     | [30m41.98     | [30m0.004887  | [30m78.59     |
| [30m6         | [30m-8.57e-05 | [30m37.86     | [30m0.3234    | [30m40.07     | [30m0.001296  | [30m79.05     |
| [30m7         | [30m-4.198e-0 | [30m61.8      | [30m0.4591    | [30m28.95     | [30m0.001206  

| [30m43        | [30m-0.000463 | [30m34.69     | [30m0.3437    | [30m21.83     | [30m0.0006633 | [30m37.14     |
| [30m44        | [30m-0.000107 | [30m40.77     | [30m0.3775    | [30m19.2      | [30m0.002849  | [30m71.58     |
| [35m45        | [35m-2.966e-0 | [35m27.17     | [35m0.2619    | [35m30.27     | [35m0.0009099 | [35m116.7     |
| [30m46        | [30m-6.799e-0 | [30m45.1      | [30m0.2971    | [30m40.9      | [30m0.00344   | [30m46.49     |
| [30m47        | [30m-4.917e-0 | [30m20.0      | [30m0.4223    | [30m34.17     | [30m0.004721  | [30m63.01     |
| [30m48        | [30m-6.969e-0 | [30m34.23     | [30m0.3448    | [30m46.45     | [30m0.004163  | [30m42.62     |
| [30m49        | [30m-9.185e-0 | [30m27.59     | [30m0.4357    | [30m20.26     | [30m0.001218  | [30m44.62     |
| [30m50        | [30m-9.65e-05 | [30m51.28     | [30m0.4533    | [30m16.02     | [30m0.0007108 | [30m90.08     |

Initialization Points Results:


  naive_forecast = y_test.shift(1).fillna(method='bfill')




[*********************100%%**********************]  1 of 1 completed

iShares Semiconductor ETF : SOXX
 ETF: SOXX





|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-0.000156 | [30m63.76     | [30m0.2267    | [30m30.35     | [30m0.002833  | [30m87.62     |
| [30m2         | [30m-0.000837 | [30m63.18     | [30m0.4338    | [30m13.14     | [30m0.003767  | [30m86.14     |
| [30m3         | [30m-0.000299 | [30m34.18     | [30m0.1605    | [30m37.12     | [30m0.001698  | [30m96.88     |
| [30m4         | [30m-0.000235 | [30m48.15     | [30m0.3957    | [30m28.3      | [30m0.0002638 | [30m91.53     |
| [30m5         | [30m-0.000181 | [30m28.82     | [30m0.1367    | [30m18.99     | [30m0.004305  | [30m63.52     |
| [30m6         | [30m-0.000139 | [30m30.04     | [30m0.2678    | [30m26.15     | [30m0.002319  | [30m81.87     |
| [30m7         | [30m-0.000187 | [30m38.56     | [30m0.1293    | [30m11.33     | [30m0.001516  

| [30m43        | [30m-0.001168 | [30m24.59     | [30m0.2161    | [30m10.83     | [30m0.003882  | [30m32.86     |
| [30m44        | [30m-0.000452 | [30m30.5      | [30m0.2713    | [30m26.2      | [30m0.003119  | [30m54.49     |
| [30m45        | [30m-0.000118 | [30m34.65     | [30m0.3822    | [30m20.26     | [30m0.002143  | [30m72.87     |
| [30m46        | [30m-0.000209 | [30m16.37     | [30m0.2564    | [30m49.93     | [30m0.003697  | [30m126.6     |
| [30m47        | [30m-0.001115 | [30m17.09     | [30m0.2682    | [30m34.67     | [30m0.002153  | [30m127.1     |
| [30m48        | [30m-0.000313 | [30m33.14     | [30m0.361     | [30m27.34     | [30m0.0004306 | [30m109.1     |
| [30m49        | [30m-0.000258 | [30m55.05     | [30m0.2058    | [30m48.93     | [30m0.00257   | [30m89.51     |
| [30m50        | [30m-0.000519 | [30m35.69     | [30m0.4407    | [30m49.58     | [30m0.001714  | [30m108.6     |

Initialization Points Results:


  naive_forecast = y_test.shift(1).fillna(method='bfill')




[*********************100%%**********************]  1 of 1 completed

Invesco Semiconductors ETF : PSI
 ETF: PSI





|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-0.000319 | [30m35.55     | [30m0.2741    | [30m43.96     | [30m0.0006275 | [30m86.23     |
| [30m2         | [30m-0.000509 | [30m41.62     | [30m0.1232    | [30m49.84     | [30m0.004908  | [30m82.34     |
| [30m3         | [30m-0.000912 | [30m41.69     | [30m0.3431    | [30m40.99     | [30m0.003899  | [30m84.81     |
| [30m4         | [30m-0.000275 | [30m36.62     | [30m0.398     | [30m48.37     | [30m0.0009226 | [30m85.93     |
| [30m5         | [30m-0.001151 | [30m33.01     | [30m0.1329    | [30m46.06     | [30m0.003114  | [30m78.83     |
| [30m6         | [30m-0.003722 | [30m39.26     | [30m0.1       | [30m47.57     | [30m0.0001    | [30m89.77     |
| [30m7         | [30m-0.000506 | [30m35.15     | [30m0.1574    | [30m46.52     | [30m0.002182  

| [30m43        | [30m-0.000383 | [30m60.47     | [30m0.1       | [30m43.12     | [30m0.002125  | [30m75.9      |
| [30m44        | [30m-0.000283 | [30m43.53     | [30m0.1       | [30m23.94     | [30m0.005     | [30m95.8      |
| [30m45        | [30m-0.000147 | [30m41.79     | [30m0.4601    | [30m23.47     | [30m0.005     | [30m91.56     |
| [30m46        | [30m-0.000501 | [30m58.87     | [30m0.1548    | [30m43.26     | [30m0.002292  | [30m71.53     |
| [30m47        | [30m-0.000530 | [30m17.55     | [30m0.3366    | [30m27.28     | [30m0.0002938 | [30m45.42     |
| [30m48        | [30m-0.000687 | [30m43.07     | [30m0.2181    | [30m26.61     | [30m0.0003603 | [30m89.64     |
| [30m49        | [30m-0.000340 | [30m63.5      | [30m0.4242    | [30m38.38     | [30m0.004561  | [30m77.14     |
| [30m50        | [30m-0.000462 | [30m63.47     | [30m0.3559    | [30m36.72     | [30m0.002295  | [30m73.47     |

Initialization Points Results:


  naive_forecast = y_test.shift(1).fillna(method='bfill')




[*********************100%%**********************]  1 of 1 completed

SPDR S&P Semiconductor ETF : XSD
 ETF: XSD





|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-0.000395 | [30m62.34     | [30m0.1413    | [30m28.41     | [30m0.002153  | [30m89.62     |
| [30m2         | [30m-0.000703 | [30m63.27     | [30m0.3114    | [30m30.18     | [30m0.003656  | [30m88.6      |
| [30m3         | [30m-0.000477 | [30m55.74     | [30m0.2756    | [30m11.27     | [30m0.001693  | [30m73.7      |
| [30m4         | [30m-0.000570 | [30m17.04     | [30m0.4674    | [30m26.24     | [30m0.003718  | [30m51.1      |
| [30m5         | [30m-0.000303 | [30m54.51     | [30m0.3475    | [30m12.66     | [30m0.003838  | [30m75.55     |
| [30m6         | [30m-0.000239 | [30m52.85     | [30m0.2576    | [30m10.74     | [30m0.002398  | [30m74.25     |
| [30m7         | [30m-0.000378 | [30m55.55     | [30m0.318     | [30m13.47     | [30m0.002208  

| [30m43        | [30m-0.000541 | [30m34.69     | [30m0.3437    | [30m21.83     | [30m0.0006633 | [30m37.14     |
| [30m44        | [30m-0.000212 | [30m40.77     | [30m0.3775    | [30m19.2      | [30m0.002849  | [30m71.58     |
| [30m45        | [30m-0.001133 | [30m27.17     | [30m0.2619    | [30m30.27     | [30m0.0009099 | [30m116.7     |
| [30m46        | [30m-0.000509 | [30m45.1      | [30m0.2971    | [30m40.9      | [30m0.00344   | [30m46.49     |
| [30m47        | [30m-0.000343 | [30m20.0      | [30m0.4223    | [30m34.17     | [30m0.004721  | [30m63.01     |
| [30m48        | [30m-0.000323 | [30m34.23     | [30m0.3448    | [30m46.45     | [30m0.004163  | [30m42.62     |
| [30m49        | [30m-0.000326 | [30m27.59     | [30m0.4357    | [30m20.26     | [30m0.001218  | [30m44.62     |
| [30m50        | [30m-0.000256 | [30m51.28     | [30m0.4533    | [30m16.02     | [30m0.0007108 | [30m90.08     |

Initialization Points Results:


  naive_forecast = y_test.shift(1).fillna(method='bfill')




[*********************100%%**********************]  1 of 1 completed

iShares U.S. Technology ETF : IYW
 ETF: IYW





|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-5.221e-0 | [30m58.81     | [30m0.2473    | [30m38.97     | [30m0.0005055 | [30m88.75     |
| [35m2         | [35m-4.009e-0 | [35m62.47     | [35m0.4649    | [35m28.55     | [35m0.00179   | [35m90.81     |
| [35m3         | [35m-3.48e-05 | [35m55.64     | [35m0.3857    | [35m24.21     | [35m0.002168  | [35m110.1     |
| [30m4         | [30m-3.675e-0 | [30m62.71     | [30m0.3126    | [30m10.48     | [30m0.001741  | [30m111.5     |
| [30m5         | [30m-5.522e-0 | [30m46.64     | [30m0.1258    | [30m10.35     | [30m0.004146  | [30m120.6     |
| [30m6         | [30m-3.963e-0 | [30m62.23     | [30m0.2977    | [30m20.89     | [30m0.003018  | [30m127.1     |
| [30m7         | [30m-8.413e-0 | [30m63.77     | [30m0.3528    | [30m39.71     | [30m0.002096  

| [30m43        | [30m-4.753e-0 | [30m43.41     | [30m0.1332    | [30m35.38     | [30m0.002313  | [30m108.2     |
| [30m44        | [30m-0.000111 | [30m44.28     | [30m0.1918    | [30m49.6      | [30m0.00431   | [30m111.1     |
| [30m45        | [30m-8.633e-0 | [30m39.18     | [30m0.3966    | [30m36.99     | [30m0.002006  | [30m96.17     |
| [30m46        | [30m-0.000101 | [30m55.25     | [30m0.3585    | [30m14.94     | [30m0.004179  | [30m120.8     |
| [30m47        | [30m-4.271e-0 | [30m60.37     | [30m0.2553    | [30m32.04     | [30m0.001637  | [30m104.2     |
| [30m48        | [30m-3.698e-0 | [30m25.58     | [30m0.31      | [30m40.73     | [30m0.003529  | [30m49.96     |
| [30m49        | [30m-6.818e-0 | [30m29.92     | [30m0.2075    | [30m49.48     | [30m0.004269  | [30m32.1      |
| [30m50        | [30m-4.881e-0 | [30m43.61     | [30m0.1917    | [30m23.35     | [30m0.002475  | [30m106.6     |

Initialization Points Results:


  naive_forecast = y_test.shift(1).fillna(method='bfill')




[*********************100%%**********************]  1 of 1 completed

The Technology Select Sector SPDR Fund : XLK
 ETF: XLK





|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-4.136e-0 | [30m63.57     | [30m0.4832    | [30m42.01     | [30m0.002719  | [30m96.78     |
| [30m2         | [30m-0.000174 | [30m16.99     | [30m0.4944    | [30m26.65     | [30m0.00452   | [30m49.46     |
| [30m3         | [30m-0.000104 | [30m23.7      | [30m0.4625    | [30m48.23     | [30m0.001397  | [30m91.2      |
| [30m4         | [30m-0.000178 | [30m23.58     | [30m0.1499    | [30m41.97     | [30m0.000571  | [30m80.55     |
| [30m5         | [30m-4.872e-0 | [30m56.64     | [30m0.2846    | [30m12.46     | [30m0.001727  | [30m126.2     |
| [30m6         | [30m-4.966e-0 | [30m34.64     | [30m0.3653    | [30m33.95     | [30m0.00402   | [30m93.7      |
| [30m7         | [30m-5.435e-0 | [30m22.14     | [30m0.2963    | [30m27.68     | [30m0.00356   

| [30m43        | [30m-7.653e-0 | [30m34.69     | [30m0.3437    | [30m21.83     | [30m0.0006633 | [30m37.14     |
| [30m44        | [30m-5.078e-0 | [30m40.77     | [30m0.3775    | [30m19.2      | [30m0.002849  | [30m71.58     |
| [30m45        | [30m-2.503e-0 | [30m27.17     | [30m0.2619    | [30m30.27     | [30m0.0009099 | [30m116.7     |
| [30m46        | [30m-3.506e-0 | [30m45.1      | [30m0.2971    | [30m40.9      | [30m0.00344   | [30m46.49     |
| [30m47        | [30m-0.000203 | [30m20.0      | [30m0.4223    | [30m34.17     | [30m0.004721  | [30m63.01     |
| [30m48        | [30m-0.000174 | [30m34.23     | [30m0.3448    | [30m46.45     | [30m0.004163  | [30m42.62     |
| [30m49        | [30m-5.887e-0 | [30m27.59     | [30m0.4357    | [30m20.26     | [30m0.001218  | [30m44.62     |
| [30m50        | [30m-5.418e-0 | [30m51.28     | [30m0.4533    | [30m16.02     | [30m0.0007108 | [30m90.08     |

Initialization Points Results:


  naive_forecast = y_test.shift(1).fillna(method='bfill')




[*********************100%%**********************]  1 of 1 completed

Vanguard Information Technology Index Fund ETF Shares : VGT
 ETF: VGT





|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-0.000642 | [30m60.77     | [30m0.1081    | [30m16.15     | [30m0.004442  | [30m76.02     |
| [30m2         | [30m-0.000202 | [30m40.98     | [30m0.1807    | [30m26.68     | [30m0.003147  | [30m116.6     |
| [30m3         | [30m-0.000189 | [30m23.7      | [30m0.4625    | [30m48.23     | [30m0.001397  | [30m91.2      |
| [30m4         | [30m-0.000403 | [30m54.84     | [30m0.3323    | [30m11.99     | [30m0.005     | [30m74.29     |
| [30m5         | [30m-0.000418 | [30m54.73     | [30m0.2258    | [30m11.88     | [30m0.001053  | [30m74.18     |
| [30m6         | [30m-0.000218 | [30m17.71     | [30m0.118     | [30m26.7      | [30m0.00179   | [30m49.3      |
| [30m7         | [30m-0.000889 | [30m39.74     | [30m0.3106    | [30m26.76     | [30m0.003321  

| [30m43        | [30m-0.000618 | [30m54.69     | [30m0.1433    | [30m11.85     | [30m0.005     | [30m74.44     |
| [30m44        | [30m-0.000846 | [30m54.85     | [30m0.2661    | [30m11.89     | [30m0.005     | [30m74.28     |
| [30m45        | [30m-0.000183 | [30m27.17     | [30m0.2619    | [30m30.27     | [30m0.0009099 | [30m116.7     |
| [30m46        | [30m-0.000132 | [30m54.74     | [30m0.3548    | [30m11.95     | [30m0.005     | [30m74.26     |
| [30m47        | [30m-0.000204 | [30m20.0      | [30m0.4223    | [30m34.17     | [30m0.004721  | [30m63.01     |
| [30m48        | [30m-0.000364 | [30m34.23     | [30m0.3448    | [30m46.45     | [30m0.004163  | [30m42.62     |
| [30m49        | [30m-0.000154 | [30m27.59     | [30m0.4357    | [30m20.26     | [30m0.001218  | [30m44.62     |
| [30m50        | [30m-0.000236 | [30m54.75     | [30m0.2931    | [30m11.98     | [30m0.005     | [30m74.31     |

Initialization Points Results:


  naive_forecast = y_test.shift(1).fillna(method='bfill')




[*********************100%%**********************]  1 of 1 completed

Fidelity MSCI Information Technology Index ETF : FTEC
 ETF: FTEC





|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-0.000409 | [30m61.85     | [30m0.2568    | [30m31.05     | [30m0.001841  | [30m89.23     |
| [30m2         | [30m-0.000741 | [30m47.37     | [30m0.3478    | [30m28.34     | [30m0.001271  | [30m91.8      |
| [30m3         | [30m-0.000590 | [30m28.48     | [30m0.3296    | [30m21.63     | [30m0.001473  | [30m63.4      |
| [30m4         | [30m-0.000581 | [30m33.01     | [30m0.271     | [30m23.9      | [30m0.004865  | [30m77.77     |
| [30m5         | [30m-0.004139 | [30m29.88     | [30m0.4362    | [30m44.51     | [30m0.003441  | [30m98.94     |
| [30m6         | [30m-0.001591 | [30m57.36     | [30m0.1671    | [30m26.35     | [30m0.0006876 | [30m103.3     |
| [30m7         | [30m-0.001342 | [30m41.1      | [30m0.2153    | [30m14.91     | [30m0.0004179 

| [30m43        | [30m-0.002292 | [30m51.16     | [30m0.1       | [30m10.0      | [30m0.005     | [30m111.7     |
| [30m44        | [30m-0.000691 | [30m64.0      | [30m0.5       | [30m50.0      | [30m0.0001    | [30m45.18     |
| [30m45        | [30m-0.001808 | [30m64.0      | [30m0.1       | [30m38.1      | [30m0.005     | [30m38.46     |
| [30m46        | [30m-0.000383 | [30m16.0      | [30m0.2704    | [30m10.0      | [30m0.005     | [30m32.0      |
| [30m47        | [30m-0.001077 | [30m48.89     | [30m0.4641    | [30m37.45     | [30m0.003429  | [30m113.5     |
| [30m48        | [30m-0.001241 | [30m16.0      | [30m0.4397    | [30m50.0      | [30m0.0001    | [30m32.0      |
| [30m49        | [30m-0.000508 | [30m59.19     | [30m0.379     | [30m50.0      | [30m0.005     | [30m96.77     |
| [30m50        | [30m-0.000267 | [30m48.76     | [30m0.5       | [30m50.0      | [30m0.0001    | [30m109.4     |

Initialization Points Results:


  naive_forecast = y_test.shift(1).fillna(method='bfill')




[*********************100%%**********************]  1 of 1 completed

iShares Expanded Tech Sector ETF : IGM
 ETF: IGM





|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-0.000188 | [30m52.01     | [30m0.3525    | [30m22.65     | [30m0.0008887 | [30m71.3      |
| [30m2         | [30m-0.000949 | [30m39.76     | [30m0.1566    | [30m22.42     | [30m0.0002094 | [30m71.0      |
| [30m3         | [30m-0.000143 | [30m56.84     | [30m0.1851    | [30m19.59     | [30m0.004469  | [30m76.94     |
| [30m4         | [30m-0.000361 | [30m62.7      | [30m0.3654    | [30m39.47     | [30m0.002643  | [30m84.2      |
| [30m5         | [30m-0.000223 | [30m50.54     | [30m0.2597    | [30m14.42     | [30m0.001885  | [30m79.93     |
| [30m6         | [30m-0.000207 | [30m31.91     | [30m0.1837    | [30m29.78     | [30m0.0007985 | [30m58.44     |
| [30m7         | [30m-0.000205 | [30m60.51     | [30m0.2085    | [30m15.59     | [30m0.0016    

| [30m43        | [30m-0.000192 | [30m20.52     | [30m0.3559    | [30m14.43     | [30m0.003461  | [30m127.2     |
| [30m44        | [30m-0.000123 | [30m32.07     | [30m0.1577    | [30m11.75     | [30m0.001129  | [30m125.6     |
| [30m45        | [30m-0.004515 | [30m28.58     | [30m0.146     | [30m20.22     | [30m0.0002942 | [30m124.3     |
| [30m46        | [30m-0.000228 | [30m24.18     | [30m0.1133    | [30m28.43     | [30m0.001324  | [30m59.99     |
| [30m47        | [30m-0.000115 | [30m21.71     | [30m0.1721    | [30m33.33     | [30m0.00443   | [30m47.44     |
| [30m48        | [30m-0.000181 | [30m16.14     | [30m0.1385    | [30m11.0      | [30m0.003743  | [30m123.7     |
| [30m49        | [30m-0.001342 | [30m56.36     | [30m0.2416    | [30m49.46     | [30m0.004948  | [30m76.56     |
| [35m50        | [35m-5.872e-0 | [35m22.0      | [35m0.3416    | [35m36.5      | [35m0.00238   | [35m59.02     |

Initialization Points Results:


  naive_forecast = y_test.shift(1).fillna(method='bfill')




[*********************100%%**********************]  1 of 1 completed

iShares Global Tech ETF : IXN
 ETF: IXN





|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-7.905e-0 | [30m38.31     | [30m0.1332    | [30m30.49     | [30m0.001705  | [30m64.22     |
| [30m2         | [30m-0.000526 | [30m41.38     | [30m0.1692    | [30m27.59     | [30m0.000965  | [30m61.49     |
| [35m3         | [35m-7.192e-0 | [35m38.86     | [35m0.3306    | [35m46.27     | [35m0.001204  | [35m86.98     |
| [30m4         | [30m-0.000206 | [30m36.62     | [30m0.398     | [30m48.37     | [30m0.0009226 | [30m85.93     |
| [30m5         | [30m-0.000703 | [30m40.12     | [30m0.2486    | [30m43.27     | [30m0.000225  | [30m86.33     |
| [30m6         | [30m-0.000111 | [30m36.97     | [30m0.2829    | [30m46.19     | [30m0.001917  | [30m86.73     |
| [30m7         | [30m-0.000241 | [30m38.68     | [30m0.1437    | [30m48.31     | [30m0.002192  

| [30m43        | [30m-0.000494 | [30m34.69     | [30m0.3437    | [30m21.83     | [30m0.0006633 | [30m37.14     |
| [30m44        | [30m-0.000265 | [30m40.77     | [30m0.3775    | [30m19.2      | [30m0.002849  | [30m71.58     |
| [30m45        | [30m-0.000351 | [30m27.17     | [30m0.2619    | [30m30.27     | [30m0.0009099 | [30m116.7     |
| [30m46        | [30m-0.000157 | [30m45.1      | [30m0.2971    | [30m40.9      | [30m0.00344   | [30m46.49     |
| [30m47        | [30m-0.000178 | [30m20.0      | [30m0.4223    | [30m34.17     | [30m0.004721  | [30m63.01     |
| [30m48        | [30m-0.000208 | [30m34.23     | [30m0.3448    | [30m46.45     | [30m0.004163  | [30m42.62     |
| [30m49        | [30m-0.000234 | [30m27.59     | [30m0.4357    | [30m20.26     | [30m0.001218  | [30m44.62     |
| [30m50        | [30m-0.000590 | [30m51.28     | [30m0.4533    | [30m16.02     | [30m0.0007108 | [30m90.08     |

Initialization Points Results:


  naive_forecast = y_test.shift(1).fillna(method='bfill')


Weekly forecast data and predictions stored successfully.


In [19]:
def calculate_sharpe_ratio(returns, annual_risk_free_rate=0.1,period='daily'):
    #excess_returns = rate_of_return(returns) - risk_free_rate
    
    # Convert annual risk-free rate to daily rate
    daily_risk_free_rate = (1 + annual_risk_free_rate) ** (1/252) - 1
    
    # Calculate mean daily log return
    mean_return = np.mean(returns)
    
    # Calculate excess daily log return
    excess_return = mean_return - daily_risk_free_rate
    
    # Calculate standard deviation of daily log returns
    std_return = np.std(returns)
    
    # Print diagnostic information
    #print(f"Mean Daily Log Return: {mean_return}")
    #print(f"Excess Daily Log Return: {excess_return}")
    #print(f"Standard Deviation of Daily Log Returns: {std_return}")
    
    # Check for zero standard deviation to avoid division by zero
    if std_return == 0:
        return 0
    
    # Calculate Sharpe ratio
    sharpe_ratio = (excess_return / std_return) * np.sqrt(252)  # Annualize the Sharpe ratio
    return sharpe_ratio
    #return excess_returns / np.std(returns)


def calculate_rachev_ratio(returns, lower_percentile=5, upper_percentile=95):
    # Step 1: Sort the returns
    sorted_returns = np.sort(returns)
    
    # Step 2: Determine the percentiles
    lower_threshold = np.percentile(sorted_returns, lower_percentile)
    upper_threshold = np.percentile(sorted_returns, upper_percentile)
    
    # Step 3: Calculate Expected Shortfall (ES)
    es = np.mean(sorted_returns[sorted_returns <= lower_threshold])
    
    # Step 4: Calculate Expected Gain (EG)
    eg = np.mean(sorted_returns[sorted_returns >= upper_threshold])
    
    # Step 5: Compute the Rachev Ratio
    rachev_ratio = eg / -es
    return rachev_ratio


def calculate_volatility_clustering(returns):
    squared_returns = returns ** 2
    n = len(squared_returns)
    
    # Mean of squared returns
    mean_squared_returns = np.mean(squared_returns)
    
    # Calculate the numerator and denominator for autocorrelation at lag 1
    numerator = np.sum((squared_returns[:-1] - mean_squared_returns) * (squared_returns[1:] - mean_squared_returns))
    denominator = np.sum((squared_returns - mean_squared_returns) ** 2)
    
    if denominator == 0:
        return 0  # Avoid division by zero
    
    rho_1 = numerator / denominator
    return rho_1

def calculate_sortino_ratio(log_returns, target_log_return=0.0):
    """
    Calculate the Sortino Ratio using log returns.
    
    Parameters:
    - log_returns (array-like): Array or list of log returns for the period.
    - target_log_return (float): The target log return. Default is 0, which is often used as a benchmark.
    
    Returns:
    - float: The Sortino Ratio.
    """
    # Convert input to a NumPy array for easier calculations
    log_returns = np.array(log_returns)
    
    # Calculate the average period log return (R)
    avg_log_return = np.mean(log_returns)
    
    # Calculate the Target Downside Deviation (TDD)
    downside_deviation = np.sqrt(
        np.mean(np.square(np.maximum(0, target_log_return - log_returns)))
    )
    
    # Calculate Sortino Ratio
    #sortino_ratio_value = (avg_log_return - target_log_return) / downside_deviation
    epsilon = 1e-8
    
    # Add epsilon to downside_deviation to prevent division by zero
    #if downside_deviation == 0:
     #   return np.nan  # Return NaN if downside deviation is zero
    
    # Calculate Sortino Ratio
    sortino_ratio_value = (avg_log_return - target_log_return) / (downside_deviation + epsilon)
    
    return sortino_ratio_value


In [20]:
def calculate_composite_score(
    forecasted_values, risk_percentage, rachev_ratio, sharpe_ratio, sortino_ratio, volatility_clustering, 
    mean_forecast, std_forecast, mean_rachev, std_rachev, mean_sharpe, std_sharpe, mean_sortino, std_sortino, 
    mean_volatility_clustering, std_volatility_clustering
):
    epsilon = 1e-8  # To prevent division by zero
    forecasted_mean = np.mean(forecasted_values)

    # Debugging: Log raw input values
    print(f"\nDebug: Composite Score Calculation")
    print(f"Forecasted Mean: {forecasted_mean}, Risk Percentage: {risk_percentage}")
    print(f"Rachev Ratio: {rachev_ratio}, Sharpe Ratio: {sharpe_ratio}")
    print(f"Sortino Ratio: {sortino_ratio}, Volatility Clustering: {volatility_clustering}")

    # Normalize the components with epsilon
    forecasted_mean_normalized = (forecasted_mean - mean_forecast) / (std_forecast + epsilon)
    rachev_normalized = (rachev_ratio - mean_rachev) / (std_rachev + epsilon)
    sharpe_normalized = (sharpe_ratio - mean_sharpe) / (std_sharpe + epsilon)
    sortino_normalized = (sortino_ratio - mean_sortino) / (std_sortino + epsilon)
    volatility_clustering_normalized = (volatility_clustering - mean_volatility_clustering) / (std_volatility_clustering + epsilon)

    # Debugging: Log normalized values
    print(f"Normalized Values -> Forecasted Mean: {forecasted_mean_normalized}, Rachev: {rachev_normalized}")
    print(f"Sharpe: {sharpe_normalized}, Sortino: {sortino_normalized}, Volatility Clustering: {volatility_clustering_normalized}")

    # Composite score calculation
    score = (
        forecasted_mean_normalized
        - (risk_percentage * rachev_normalized)
        + sharpe_normalized
        + sortino_normalized
        - volatility_clustering_normalized
    )

    # Debugging: Log final score
    print(f"Final Composite Score: {score}")

    return score


def process_etf_data_weekly(tickers, etf_dict):
    etf_pred_dict = {}

    # Determine the forecast periods dynamically from the etf_dict
    sample_etf = next(iter(etf_dict.values()))
    forecast_periods = [key.split('_')[-1] for key in sample_etf.keys() if key.startswith('forecast_predictions_df')]

    for etf_name in tickers:
        etf_pred_dict[etf_name] = {
            f"returns_{period}": etf_dict[etf_name][f"forecast_predictions_df_{period}"]
            for period in forecast_periods
        }

        for period in forecast_periods:
            returns = etf_pred_dict[etf_name][f"returns_{period}"]

            # Log returns for debugging
            print(f"\nDebug: Returns for {etf_name}, Forecast Period {period}: {returns}")

            # Calculate metrics
            etf_pred_dict[etf_name][f"rachev_ratio_{period}"] = calculate_rachev_ratio(returns)
            etf_pred_dict[etf_name][f"sharpe_ratio_{period}"] = calculate_sharpe_ratio(returns)
            etf_pred_dict[etf_name][f"sortino_ratio_{period}"] = calculate_sortino_ratio(returns)
            etf_pred_dict[etf_name][f"volatility_clustering_{period}"] = calculate_volatility_clustering(returns)

    return etf_pred_dict



def calculate_means_and_stds_weekly(etf_pred_dict, forecast_period):
    returns_list = [etf_pred_dict[etf][f'returns_{forecast_period}'] for etf in etf_pred_dict]

    # Compute global means and standard deviations
    mean_forecast = np.mean([np.mean(returns) for returns in returns_list])
    std_forecast = np.std([np.mean(returns) for returns in returns_list])

    print(f"\nDebug: Forecast Period = {forecast_period}")
    print(f"All Returns Means: {[np.mean(returns) for returns in returns_list]}")
    print(f"Mean Forecast = {mean_forecast}, Std Forecast = {std_forecast}")

    rachev_ratios = np.array([etf_pred_dict[etf][f'rachev_ratio_{forecast_period}'] for etf in etf_pred_dict])
    print(f"All Rachev Ratios: {rachev_ratios}")
    mean_rachev = np.mean(rachev_ratios)
    std_rachev = np.std(rachev_ratios)

    sharpe_ratios = np.array([etf_pred_dict[etf][f'sharpe_ratio_{forecast_period}'] for etf in etf_pred_dict])
    print(f"All Sharpe Ratios: {sharpe_ratios}")
    mean_sharpe = np.mean(sharpe_ratios)
    std_sharpe = np.std(sharpe_ratios)

    sortino_ratios = np.array([etf_pred_dict[etf][f'sortino_ratio_{forecast_period}'] for etf in etf_pred_dict])
    print(f"All Sortino Ratios: {sortino_ratios}")
    mean_sortino = np.mean(sortino_ratios)
    std_sortino = np.std(sortino_ratios)

    volatility_clustering = np.array([etf_pred_dict[etf][f'volatility_clustering_{forecast_period}'] for etf in etf_pred_dict])
    print(f"All Volatility Clustering: {volatility_clustering}")
    mean_volatility_clustering = np.mean(volatility_clustering)
    std_volatility_clustering = np.std(volatility_clustering)

    print(f"Mean Rachev = {mean_rachev}, Std Rachev = {std_rachev}")
    print(f"Mean Sharpe = {mean_sharpe}, Std Sharpe = {std_sharpe}")
    print(f"Mean Sortino = {mean_sortino}, Std Sortino = {std_sortino}")
    print(f"Mean Volatility Clustering = {mean_volatility_clustering}, Std Volatility Clustering = {std_volatility_clustering}")

    return (
        mean_forecast, std_forecast, mean_rachev, std_rachev,
        mean_sharpe, std_sharpe, mean_sortino, std_sortino,
        mean_volatility_clustering, std_volatility_clustering
    )



def calculate_scores_for_etfs_weekly(etf_pred_dict, forecast_period, risk_percentage):
    (mean_forecast, std_forecast, mean_rachev, std_rachev, 
     mean_sharpe, std_sharpe, mean_sortino, std_sortino, 
     mean_volatility_clustering, std_volatility_clustering) = calculate_means_and_stds_weekly(etf_pred_dict, forecast_period)

    scores = []
    for etf in etf_pred_dict:
        forecasted_values = etf_pred_dict[etf][f'returns_{forecast_period}']
        rachev_ratio = etf_pred_dict[etf][f'rachev_ratio_{forecast_period}']
        sharpe_ratio = etf_pred_dict[etf][f'sharpe_ratio_{forecast_period}']
        volatility_clustering = etf_pred_dict[etf][f'volatility_clustering_{forecast_period}']
        sortino_ratio = etf_pred_dict[etf][f'sortino_ratio_{forecast_period}']

        # Debugging: Log inputs to composite score calculation
        print(f"\nDebug: ETF = {etf}, Forecast Period = {forecast_period}")
        print(f"Forecasted Values Mean: {np.mean(forecasted_values)}")
        print(f"Rachev Ratio: {rachev_ratio}, Sharpe Ratio: {sharpe_ratio}")
        print(f"Sortino Ratio: {sortino_ratio}, Volatility Clustering: {volatility_clustering}")
        print(f"Means and Stds: Mean Forecast = {mean_forecast}, Std Forecast = {std_forecast}")
        
        # Calculate the composite score
        score = calculate_composite_score(
            forecasted_values, risk_percentage, rachev_ratio, sharpe_ratio, 
            sortino_ratio, volatility_clustering, mean_forecast, std_forecast, 
            mean_rachev, std_rachev, mean_sharpe, std_sharpe, mean_sortino, 
            std_sortino, mean_volatility_clustering, std_volatility_clustering
        )

        scores.append({
            'ETF': etf,
            'Week': forecast_period,
            'RiskPercentage': risk_percentage,
            'Score': score
        })

        # Debugging: Log the calculated score
        print(f"Calculated Score for {etf} ({forecast_period}): {score}")

    return scores



def main_weekly(tickers, etf_dict):
    etf_pred_dict = process_etf_data_weekly(tickers, etf_dict)
    
    risk_percentage = 0.10
    weekly_scores = {}

    # Iterate over 48 weeks
    for week in range(1, 49):
        week_key = f"{week}w"
        if any(f"returns_{week_key}" in etf_pred_dict[etf] for etf in etf_pred_dict):
            scores = calculate_scores_for_etfs_weekly(etf_pred_dict, week_key, risk_percentage)
            weekly_scores[week_key] = scores
            print(f"Scores calculated for {week_key}:")
            for score in scores:
                print(score)
        else:
            print(f"Skipping score calculation for {week_key}: No ETFs have data for this week.")

    return etf_pred_dict, weekly_scores


# Example usage:
# Execute weekly scoring
etf_pred_dict, weekly_scores = main_weekly(tickers, etf_dict)



Debug: Returns for SMH, Forecast Period 1w: [-0.02551755 -0.01617571 -0.00907259 -0.00350609]

Debug: Returns for SMH, Forecast Period 2w: [ 0.00605947 -0.00710686 -0.0088584  -0.00643576 -0.01012213]

Debug: Returns for SMH, Forecast Period 3w: [-0.00050983 -0.01072452  0.00731899  0.00772382]

Debug: Returns for SMH, Forecast Period 4w: [-0.0043764  -0.00491627 -0.00021382 -0.00756859 -0.01588712]

Debug: Returns for SMH, Forecast Period 5w: [-0.00264732 -0.01014778 -0.01046608 -0.00337104 -0.00102833]

Debug: Returns for SMH, Forecast Period 6w: [-0.00193795 -0.01144825 -0.0020594  -0.00452459 -0.00290433]

Debug: Returns for SMH, Forecast Period 7w: [-0.0095696  -0.013747   -0.0020018  -0.00856634 -0.00852687]

Debug: Returns for SMH, Forecast Period 8w: [-0.01362961 -0.00732099  0.01520726 -0.00528363]

Debug: Returns for SMH, Forecast Period 9w: [-0.00140327 -0.00473357 -0.00748112  0.00243737  0.00656862]

Debug: Returns for SMH, Forecast Period 10w: [-0.00032866 -0.00798244  0


Debug: Returns for PSI, Forecast Period 1w: [-0.02388461 -0.02586045 -0.0250103  -0.02190272]

Debug: Returns for PSI, Forecast Period 2w: [-0.01402145 -0.01994892 -0.02265381 -0.02606633 -0.02825789]

Debug: Returns for PSI, Forecast Period 3w: [-0.02657233 -0.03323479 -0.02415611 -0.01978445]

Debug: Returns for PSI, Forecast Period 4w: [-0.02493641 -0.0214619  -0.02330772 -0.02577257 -0.0359792 ]

Debug: Returns for PSI, Forecast Period 5w: [-0.02706739 -0.03280549 -0.03413899 -0.02634821 -0.02185866]

Debug: Returns for PSI, Forecast Period 6w: [-0.02002248 -0.02432306 -0.02442781 -0.02167061 -0.01788826]

Debug: Returns for PSI, Forecast Period 7w: [-0.02306332 -0.02913066 -0.01963971 -0.02413667 -0.02692643]

Debug: Returns for PSI, Forecast Period 8w: [-0.03523822 -0.03297806 -0.02007651 -0.0310144 ]

Debug: Returns for PSI, Forecast Period 9w: [-0.02542913 -0.02497653 -0.025023   -0.01942478 -0.01135975]

Debug: Returns for PSI, Forecast Period 10w: [-0.01566759 -0.02221881 -0


Debug: Returns for IYW, Forecast Period 1w: [-0.01735807 -0.01362093 -0.01237651 -0.01160714]

Debug: Returns for IYW, Forecast Period 2w: [ 0.00479883 -0.00657368 -0.00450547 -0.00622581 -0.00626643]

Debug: Returns for IYW, Forecast Period 3w: [-0.00774806 -0.00969271  0.00049541  0.00792933]

Debug: Returns for IYW, Forecast Period 4w: [ 0.00132849  0.00490992  0.00471468  0.00220208 -0.0029244 ]

Debug: Returns for IYW, Forecast Period 5w: [ 0.00495362 -0.00205164 -0.01176403  0.00716348  0.01516771]

Debug: Returns for IYW, Forecast Period 6w: [0.00700441 0.00453305 0.01127057 0.0059926  0.01127089]

Debug: Returns for IYW, Forecast Period 7w: [ 0.00056848 -0.00560701  0.00856562  0.00410045 -0.00389844]

Debug: Returns for IYW, Forecast Period 8w: [-0.00985162 -0.01251858  0.00970459 -0.00302184]

Debug: Returns for IYW, Forecast Period 9w: [-0.00541888 -0.00276394 -0.00986795 -0.0001989   0.01269171]

Debug: Returns for IYW, Forecast Period 10w: [ 0.00250678 -0.01244344 -0.0011


Debug: Returns for FTEC, Forecast Period 37w: [-0.03742586 -0.0356731  -0.01790461 -0.03605938 -0.03517463]

Debug: Returns for FTEC, Forecast Period 38w: [-0.03507721 -0.0366847  -0.02810562 -0.03258677 -0.03406581]

Debug: Returns for FTEC, Forecast Period 39w: [-0.03621977 -0.03418422 -0.03457055 -0.03240787 -0.0368311 ]

Debug: Returns for FTEC, Forecast Period 40w: [-0.03399359 -0.0386107  -0.0223335  -0.01827514 -0.00633028]

Debug: Returns for FTEC, Forecast Period 41w: [ 0.00666101  0.01829083  0.01676129 -0.01270401 -0.01541199]

Debug: Returns for FTEC, Forecast Period 42w: [-0.0067015  -0.02420258 -0.01756967 -0.01519346 -0.02227019]

Debug: Returns for FTEC, Forecast Period 43w: [-0.02044262 -0.02337211 -0.03758471 -0.03223914 -0.03503246]

Debug: Returns for FTEC, Forecast Period 44w: [-0.03334871 -0.02448756 -0.02563875 -0.04244239 -0.03686223]

Debug: Returns for FTEC, Forecast Period 45w: [-0.03809975 -0.0341512  -0.02428209 -0.02199574 -0.02996006]

Debug: Returns for

All Rachev Ratios: [ 1.0974811   2.476587   -0.7045676  -0.91171885  1.6954494  -0.42423213
 -0.60503954 -0.47343877 -2.478065   -1.4878818 ]
All Sharpe Ratios: [  -1.32006852    2.7735974  -115.72564252 -515.37303884    1.79039934
  -55.10384498  -95.3063566   -51.62463572   43.27830073  112.52066843]
All Sortino Ratios: [ 1.69150823e-01  4.78879666e-01 -9.90419630e-01 -9.99518465e-01
  4.01406414e-01 -9.58391731e-01 -9.85670147e-01 -9.54753773e-01
  1.58030745e+06  2.21180040e+06]
All Volatility Clustering: [ 0.01827349 -0.17801014 -0.1988591  -0.04267319 -0.13155536  0.03833963
 -0.02083681  0.09379944 -0.3465422   0.00589066]
Mean Rachev = -0.18154264986515045, Std Rachev = 1.426020860671997
Mean Sharpe = -67.40906213015383, Std Sharpe = 162.1398681299338
Mean Sortino = 379210.40109963424, Std Sortino = 771454.9480595783
Mean Volatility Clustering = -0.07621736079454422, Std Volatility Clustering = 0.1278405487537384

Debug: ETF = SMH, Forecast Period = 12w
Forecasted Values Mean: 

In [21]:
# Initialize an empty dictionary to hold DataFrames for each week
weekly_scores_dfs = {}

# Assuming weekly_scores is a dictionary with keys as week identifiers ('1w', '2w', ..., '48w') 
# and values as the respective scores
for week_key, scores in weekly_scores.items():
    # Convert scores for the current week into a DataFrame
    weekly_scores_dfs[week_key] = pd.DataFrame(scores)

    
# Create DataFrame variables dynamically for 48 weeks
for week in range(1, 49):
    week_key = f'{week}w'
    globals()[f'df_scores_{week_key}'] = weekly_scores_dfs.get(week_key, pd.DataFrame())




In [22]:
# Initialize a dictionary to hold the top 2 ETFs for each week
top_etfs_weekly = {}

# Loop through the weekly DataFrames and select the top 2 ETFs for each week
for week_key, df_scores in weekly_scores_dfs.items():
    # Select the top 2 ETFs based on the 'Score' column
    top_etfs_weekly[week_key] = df_scores.nlargest(2, 'Score')
    
    # Print the results for the current week
    print(f"\nTop 2 ETFs for {week_key} forecast:")
    print(top_etfs_weekly[week_key])



Top 2 ETFs for 1w forecast:
    ETF Week  RiskPercentage     Score
9   IXN   1w             0.1  4.718463
1  SOXX   1w             0.1  3.328601

Top 2 ETFs for 2w forecast:
   ETF Week  RiskPercentage     Score
8  IGM   2w             0.1  5.147601
9  IXN   2w             0.1  3.823881

Top 2 ETFs for 3w forecast:
    ETF Week  RiskPercentage     Score
8   IGM   3w             0.1  3.650598
1  SOXX   3w             0.1  1.925781

Top 2 ETFs for 4w forecast:
    ETF Week  RiskPercentage     Score
8   IGM   4w             0.1  5.309079
1  SOXX   4w             0.1  2.632051

Top 2 ETFs for 5w forecast:
    ETF Week  RiskPercentage     Score
9   IXN   5w             0.1  3.199654
1  SOXX   5w             0.1  2.170468

Top 2 ETFs for 6w forecast:
   ETF Week  RiskPercentage     Score
9  IXN   6w             0.1  5.129035
8  IGM   6w             0.1  2.913298

Top 2 ETFs for 7w forecast:
    ETF Week  RiskPercentage     Score
9   IXN   7w             0.1  2.948587
1  SOXX   7w           

In [23]:
def select_top_etfs_weekly(df_scores, forecast_period):
    if df_scores.empty:
        print(f"No scores available for {forecast_period}. Skipping.")
        return []
    print(f"Processing scores for {forecast_period}:")
    print(df_scores.head())  # Check the top rows of the DataFrame
    top_etfs = df_scores.nlargest(2, 'Score')
    print(f"Top ETFs for {forecast_period}: {top_etfs['ETF'].tolist()}")
    return top_etfs['ETF'].tolist()



In [24]:
# Function to generate week ranges
def generate_week_ranges(start_date, end_date):
    start = datetime.strptime(start_date, '%Y-%m-%d')
    end = datetime.strptime(end_date, '%Y-%m-%d')
    week_ranges = []
    
    while start < end:
        week_start = start
        week_end = start + timedelta(days=6)
        if week_end > end:
            week_end = end
        week_ranges.append((week_start.strftime('%Y-%m-%d'), week_end.strftime('%Y-%m-%d')))
        start += timedelta(days=7)
    
    return week_ranges

# Function to gather ETF data for weeks
def gather_etf_data_for_weeks(tickers, week_ranges):
    etf_histories = {}
    for start_date, end_date in week_ranges:
        week = f"{start_date} to {end_date}"
        etf_histories[week] = {}
        for ticker in tickers:
            etf_data = yf.download(ticker, start=start_date, end=end_date)
            if etf_data.empty:
                print(f"No data found for {ticker} in {week}")
                continue
            etf_data.index = pd.to_datetime(etf_data.index)
            etf_histories[week][ticker] = etf_data
            #print(f"Data for {ticker} in {week} gathered.")
    return etf_histories

# Function to initialize shares for the first week
def initialize_shares_for_first_week(top_etfs_1w, etf_histories, week, investment_amount=50000):
    ticker_shares = {}
    first_trading_day_start = week.split(" to ")[0]
    
    for ticker in top_etfs_1w:
        etf_history = etf_histories.get(week, {}).get(ticker)
        
        if etf_history is not None:
            first_trading_day = pd.to_datetime(first_trading_day_start) + BDay(1)
            if first_trading_day not in etf_history.index:
                first_trading_day = etf_history.index[etf_history.index.searchsorted(first_trading_day)]
            
            price_on_first_trading_day = etf_history.loc[first_trading_day, 'Close']
            num_shares = (investment_amount * 0.975) / price_on_first_trading_day
            print(f"Shares 1st week: ({investment_amount} * 0.975) / {price_on_first_trading_day}")
            ticker_shares[ticker] = num_shares
            print(f"Ticker: {ticker}, First trading day: {first_trading_day.date()}, Price: {price_on_first_trading_day}, Shares: {num_shares:.2f}")
        else:
            print(f"No data found for {ticker} in {week}")
    return ticker_shares

# Function to manage ETF portfolio weekly
def manage_etf_portfolio_weekly(
    top_etfs_previous, top_etfs_current, previous_week, current_week, ticker_shares, gathered_data_per_week
):
    etf_histories_for_current_week = gathered_data_per_week.get(current_week, {})
    top2etfs_previous = list(top_etfs_previous)
    top2etfs_current = list(top_etfs_current)

    print(f"Top 2 ETFs for {previous_week}: {top2etfs_previous}")
    print(f"Top 2 ETFs for {current_week}: {top2etfs_current}")

    etfs_to_sell = [etf for etf in top2etfs_previous if etf not in top2etfs_current]
    etfs_to_buy = [etf for etf in top2etfs_current if etf not in top2etfs_previous]

    # Ensure one-to-one mapping between sell and buy ETFs
    if len(etfs_to_sell) != len(etfs_to_buy):
        print("Mismatch between ETFs to sell and buy. Adjusting allocation...")
        return ticker_shares  # Abort if mismatched for now, you can implement custom logic

    # Allocate funds ETF-by-ETF
    for etf_sell, etf_buy in zip(etfs_to_sell, etfs_to_buy):
        no_of_shares = ticker_shares.get(etf_sell, 0)
        if no_of_shares > 0:
            # Selling old ETF
            if etf_sell in etf_histories_for_current_week:
                first_trading_day_sell_price = etf_histories_for_current_week[etf_sell].loc[
                    etf_histories_for_current_week[etf_sell].index[0], 'Close'
                ]
                selling_value = no_of_shares * first_trading_day_sell_price * 0.975
                print(f"Sell {etf_sell}: {no_of_shares:.2f} shares at {first_trading_day_sell_price:.2f}. Total value: {selling_value:.2f}")

                # Remove sold ETF from portfolio
                del ticker_shares[etf_sell]

                # Buying new ETF
                if etf_buy in etf_histories_for_current_week:
                    first_trading_day_buy_price = etf_histories_for_current_week[etf_buy].loc[
                        etf_histories_for_current_week[etf_buy].index[0], 'Close'
                    ]
                    new_shares = (selling_value * 0.975) / first_trading_day_buy_price
                    print(f"Buy {etf_buy}: {new_shares:.2f} shares at {first_trading_day_buy_price:.2f}.")
                    ticker_shares[etf_buy] = new_shares
                else:
                    print(f"Data for {etf_buy} is missing for {current_week}. Skipping purchase.")
            else:
                print(f"Data for {etf_sell} is missing for {current_week}. Skipping sale.")
        else:
            print(f"No shares found for {etf_sell} to sell.")

    print(f"Updated ticker shares after {current_week}: {ticker_shares}")
    return ticker_shares


In [25]:
# Generate Week Ranges
week_ranges = generate_week_ranges('2024-01-01', '2024-12-01')
#print("Generated Week Ranges:", week_ranges)

# Assuming `tickers` is a list of ETF tickers
tickers = tickers

# Gather ETF Data
etf_histories = gather_etf_data_for_weeks(tickers, week_ranges)
#print(f"ETF Histories Collected for {len(etf_histories)} weeks")

# Create a mapping between week numbers and date ranges
week_key_mapping = {f"{i+1}w": week_range for i, week_range in enumerate(etf_histories.keys())}

# Debug: Print the week key mapping
print("Week Key Mapping:", week_key_mapping)

# Align `top_etfs_weekly` Keys with `etf_histories`
aligned_top_etfs_weekly = {}

for week_key, df_scores in weekly_scores_dfs.items():
    # Call the `select_top_etfs_weekly` function here
    forecast_period = week_key_mapping.get(week_key, None)
    if forecast_period:
        aligned_top_etfs_weekly[forecast_period] = select_top_etfs_weekly(df_scores, forecast_period)

# Portfolio initialization and management
ticker_shares = {}
ticker_shares_per_week = {}

for i, week_range in enumerate(etf_histories.keys()):
    current_week_key = week_range
    if i == 0:
        # First week initialization
        ticker_shares = initialize_shares_for_first_week(
            aligned_top_etfs_weekly[current_week_key],
            etf_histories,
            current_week_key
        )
        ticker_shares_per_week[current_week_key] = ticker_shares.copy()
    else:
        # Subsequent weeks' portfolio management
        prev_week_key = list(etf_histories.keys())[i - 1]
        ticker_shares = manage_etf_portfolio_weekly(
            aligned_top_etfs_weekly[prev_week_key],
            aligned_top_etfs_weekly[current_week_key],
            prev_week_key,
            current_week_key,
            ticker_shares,
            etf_histories
        )
        ticker_shares_per_week[current_week_key] = ticker_shares.copy()

print("\nFinal Ticker Shares per Week:")
for week, shares in ticker_shares_per_week.items():
    print(f"Week {week}: {shares}")

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Week Key Mapping: {'1w': '2024-01-01 to 2024-01-07', '2w': '2024-01-08 to 2024-01-14', '3w': '2024-01-15 to 2024-01-21', '4w': '2024-01-22 to 2024-01-28', '5w': '2024-01-29 to 2024-02-04', '6w': '2024-02-05 to 2024-02-11', '7w': '2024-02-12 to 2024-02-18', '8w': '2024-02-19 to 2024-02-25', '9w': '2024-02-26 to 2024-03-03', '10w': '2024-03-04 to 2024-03-10', '11w': '2024-03-11 to 2024-03-17', '12w': '2024-03-18 to 2024-03-24', '13w': '2024-03-25 to 2024-03-31', '14w': '2024-04-01 to 2024-04-07', '15w': '2024-04-08 to 2024-04-14', '16w': '2024-04-15 to 2024-04-21', '17w': '2024-04-22 to 2024-04-28', '18w': '2024-04-29 to 2024-05-05', '19w': '2024-05-06 to 2024-05-12', '20w': '2024-05-13 to 2024-05-19', '21w': '2024-05-20 to 2024-05-26', '22w': '2024-05-27 to 2024-06-02', '23w': '2024-06-03 to 2024-06-09', '24w': '2024-06-10 to 2024-06-16', '25w': '2024-06-17 to 2024-06-23', '26w': '2024-06-24 to 2024-06-30', '27w': '2024-07-01 to 2024-07-07', '28w': '2024-07-08 to 2024-07-14', '29w': '20




In [26]:
# Define the first trading day of the 48th week
first_trading_day_49w = '2024-12-01'  # Adjust this to match the actual start date of the 48th week

# Identify the 48th and 49th week date range keys
week_48_range = list(ticker_shares_per_week.keys())[-1]  # Last key corresponds to the 48th week
week_49_start = first_trading_day_49w  # Replace with the actual start of the 49th week
print(f"Using data for the 48th week: {week_48_range}")

print(f"Fetching data starting from the first trading day of the 49th week: {week_49_start}")

week_49_end = '2024-12-06'
# Initialize a dictionary to store the values of shares
etf_values_49w = {}

# Ensure 48th week data exists
if week_48_range in ticker_shares_per_week:
    # Fetch ETF shares from the 48th week
    ticker_shares_48w = ticker_shares_per_week[week_48_range]
    
    # Fetch the first trading day price of the 49th week for each ETF
    for ticker, shares in ticker_shares_48w.items():
        print(f"Fetching data for ticker {ticker} starting from {week_49_start}...")
        # Download historical data for the 49th week
        data = yf.download(ticker, start=week_49_start, end=week_49_end)
        
        if not data.empty:
            # Get the closing price of the first trading day of the 49th week
            closing_price_49w = data['Close'].iloc[0]
            # Calculate the value of the shares
            total_value = shares * closing_price_49w
            etf_values_49w[ticker] = total_value
            print(f"{ticker}: {shares:.2f} shares at ${closing_price_49w:.2f} each, total value: ${total_value:.2f}")
        else:
            print(f"{ticker}: No data available for the 49th week's first trading day.")
else:
    print(f"No data available in ticker_shares_per_week for the 48th week: {week_48_range}")



[*********************100%%**********************]  1 of 1 completed


Using data for the 48th week: 2024-11-25 to 2024-12-01
Fetching data starting from the first trading day of the 49th week: 2024-12-01
Fetching data for ticker IGM starting from 2024-12-01...
IGM: 533.00 shares at $102.51 each, total value: $54637.76
Fetching data for ticker IXN starting from 2024-12-01...


[*********************100%%**********************]  1 of 1 completed

IXN: 176.85 shares at $84.80 each, total value: $14997.30





In [27]:
# Check if there are any values in the dictionary
if etf_values_49w:
    print("\nETF values on the 49th week's first trading day:")
    # Sum all the values (total portfolio value)
    total_value = sum(etf_values_49w.values())
    print(f"Total portfolio value: {total_value:.2f}")
    # Optionally display individual ETF values
    for ticker, value in etf_values_49w.items():
        print(f"{ticker}: {value:.2f}")
else:
    print("No values could be calculated for the 49th week's first trading day.")



ETF values on the 49th week's first trading day:
Total portfolio value: 69635.06
IGM: 54637.76
IXN: 14997.30


### Values for 7th month

### Smoothing

In [28]:
def calculate_composite_score(
    forecasted_values, risk_percentage, rachev_ratio, sharpe_ratio, sortino_ratio, 
    volatility_clustering, mean_forecast, std_forecast, mean_rachev, std_rachev, 
    mean_sharpe, std_sharpe, mean_sortino, std_sortino, mean_volatility_clustering, 
    std_volatility_clustering
):
    epsilon = 1e-8  # To prevent division by zero
    forecasted_mean = np.mean(forecasted_values)

    forecasted_mean_normalized = (forecasted_mean - mean_forecast) / (std_forecast + epsilon)
    rachev_normalized = (rachev_ratio - mean_rachev) / (std_rachev + epsilon)
    sharpe_normalized = (sharpe_ratio - mean_sharpe) / (std_sharpe + epsilon)
    sortino_normalized = (sortino_ratio - mean_sortino) / (std_sortino + epsilon)
    volatility_clustering_normalized = (volatility_clustering - mean_volatility_clustering) / (std_volatility_clustering + epsilon)

    # Composite score calculation
    score = (
        forecasted_mean_normalized
        - (risk_percentage * rachev_normalized)
        + sharpe_normalized
        + sortino_normalized
        - volatility_clustering_normalized
    )

    return score

def smooth_scores(scores, alpha=0.2):
    """
    Smooth scores using Exponential Moving Average (EMA).
    """
    if not isinstance(scores, (list, np.ndarray)):
        raise ValueError("Scores must be a list or numpy array of numerical values.")
    
    scores = np.array(scores)
    smoothed_scores = np.zeros_like(scores)
    smoothed_scores[0] = scores[0]  # Initialize EMA
    
    for t in range(1, len(scores)):
        smoothed_scores[t] = alpha * scores[t] + (1 - alpha) * smoothed_scores[t - 1]
    
    return smoothed_scores.tolist()

def calculate_means_and_stds_weekly(etf_pred_dict, forecast_period):
    returns_list = [etf_pred_dict[etf][f'returns_{forecast_period}'] for etf in etf_pred_dict]
    mean_forecast = np.mean([np.mean(returns) for returns in returns_list])
    std_forecast = np.std([np.mean(returns) for returns in returns_list])

    rachev_ratios = np.array([etf_pred_dict[etf][f'rachev_ratio_{forecast_period}'] for etf in etf_pred_dict])
    mean_rachev = np.mean(rachev_ratios)
    std_rachev = np.std(rachev_ratios)

    sharpe_ratios = np.array([etf_pred_dict[etf][f'sharpe_ratio_{forecast_period}'] for etf in etf_pred_dict])
    mean_sharpe = np.mean(sharpe_ratios)
    std_sharpe = np.std(sharpe_ratios)

    sortino_ratios = np.array([etf_pred_dict[etf][f'sortino_ratio_{forecast_period}'] for etf in etf_pred_dict])
    mean_sortino = np.mean(sortino_ratios)
    std_sortino = np.std(sortino_ratios)

    volatility_clustering = np.array([etf_pred_dict[etf][f'volatility_clustering_{forecast_period}'] for etf in etf_pred_dict])
    mean_volatility_clustering = np.mean(volatility_clustering)
    std_volatility_clustering = np.std(volatility_clustering)

    return (
        mean_forecast, std_forecast, mean_rachev, std_rachev,
        mean_sharpe, std_sharpe, mean_sortino, std_sortino,
        mean_volatility_clustering, std_volatility_clustering
    )

def calculate_scores_for_etfs_weekly(etf_pred_dict, forecast_period, risk_percentage, alpha=0.2):
    (mean_forecast, std_forecast, mean_rachev, std_rachev, 
     mean_sharpe, std_sharpe, mean_sortino, std_sortino, 
     mean_volatility_clustering, std_volatility_clustering) = calculate_means_and_stds_weekly(etf_pred_dict, forecast_period)

    raw_scores = []
    for etf in etf_pred_dict:
        forecasted_values = etf_pred_dict[etf][f'returns_{forecast_period}']
        rachev_ratio = etf_pred_dict[etf][f'rachev_ratio_{forecast_period}']
        sharpe_ratio = etf_pred_dict[etf][f'sharpe_ratio_{forecast_period}']
        volatility_clustering = etf_pred_dict[etf][f'volatility_clustering_{forecast_period}']
        sortino_ratio = etf_pred_dict[etf][f'sortino_ratio_{forecast_period}']

        score = calculate_composite_score(
            forecasted_values, risk_percentage, rachev_ratio, sharpe_ratio, 
            sortino_ratio, volatility_clustering, mean_forecast, std_forecast, 
            mean_rachev, std_rachev, mean_sharpe, std_sharpe, mean_sortino, 
            std_sortino, mean_volatility_clustering, std_volatility_clustering
        )

        raw_scores.append(score)

    # Apply smoothing to the raw scores
    smoothed_scores = smooth_scores(raw_scores, alpha=alpha)

    results = []
    for idx, etf in enumerate(etf_pred_dict):
        results.append({
            'ETF': etf,
            'Week': forecast_period,
            'RiskPercentage': risk_percentage,
            'RawScore': raw_scores[idx],
            'SmoothedScore': smoothed_scores[idx]
        })

    return results

def main_weekly(tickers, etf_dict):
    etf_pred_dict = process_etf_data_weekly(tickers, etf_dict)
    risk_percentage = 0.10
    weekly_scores = {}

    for week in range(1, 49):
        week_key = f"{week}w"
        if any(f"returns_{week_key}" in etf_pred_dict[etf] for etf in etf_pred_dict):
            scores = calculate_scores_for_etfs_weekly(etf_pred_dict, week_key, risk_percentage)
            weekly_scores[week_key] = scores
        else:
            print(f"Skipping score calculation for {week_key}: No ETFs have data for this week.")

    return etf_pred_dict, weekly_scores

# Select top ETFs for weekly portfolio management
def select_top_etfs_weekly(df_scores, forecast_period):
    if df_scores.empty:
        print(f"No scores available for {forecast_period}. Skipping.")
        return []
    top_etfs = df_scores.nlargest(2, 'SmoothedScore')
    return top_etfs['ETF'].tolist()


In [29]:
etf_pred_dict, weekly_scores = main_weekly(tickers, etf_dict)


Debug: Returns for SMH, Forecast Period 1w: [-0.02551755 -0.01617571 -0.00907259 -0.00350609]

Debug: Returns for SMH, Forecast Period 2w: [ 0.00605947 -0.00710686 -0.0088584  -0.00643576 -0.01012213]

Debug: Returns for SMH, Forecast Period 3w: [-0.00050983 -0.01072452  0.00731899  0.00772382]

Debug: Returns for SMH, Forecast Period 4w: [-0.0043764  -0.00491627 -0.00021382 -0.00756859 -0.01588712]

Debug: Returns for SMH, Forecast Period 5w: [-0.00264732 -0.01014778 -0.01046608 -0.00337104 -0.00102833]

Debug: Returns for SMH, Forecast Period 6w: [-0.00193795 -0.01144825 -0.0020594  -0.00452459 -0.00290433]

Debug: Returns for SMH, Forecast Period 7w: [-0.0095696  -0.013747   -0.0020018  -0.00856634 -0.00852687]

Debug: Returns for SMH, Forecast Period 8w: [-0.01362961 -0.00732099  0.01520726 -0.00528363]

Debug: Returns for SMH, Forecast Period 9w: [-0.00140327 -0.00473357 -0.00748112  0.00243737  0.00656862]

Debug: Returns for SMH, Forecast Period 10w: [-0.00032866 -0.00798244  0

In [30]:
# Initialize an empty dictionary to hold DataFrames for each week
weekly_scores_dfs = {}

# Assuming weekly_scores is a dictionary with keys as week identifiers ('1w', '2w', ..., '48w') 
# and values as the respective scores
for week_key, scores in weekly_scores.items():
    # Convert scores for the current week into a DataFrame
    weekly_scores_dfs[week_key] = pd.DataFrame(scores)

    
# Create DataFrame variables dynamically for 48 weeks
for week in range(1, 49):
    week_key = f'{week}w'
    globals()[f'df_scores_{week_key}'] = weekly_scores_dfs.get(week_key, pd.DataFrame())




In [31]:
# Initialize a dictionary to hold the top 2 ETFs for each week
top_etfs_weekly = {}

# Loop through the weekly DataFrames and select the top 2 ETFs for each week
for week_key, df_scores in weekly_scores_dfs.items():
    # Select the top 2 ETFs based on the 'Score' column
    top_etfs_weekly[week_key] = df_scores.nlargest(2, 'SmoothedScore')
    
    # Print the results for the current week
    print(f"\nTop 2 ETFs for {week_key} forecast:")
    print(top_etfs_weekly[week_key])



Top 2 ETFs for 1w forecast:
    ETF Week  RiskPercentage  RawScore  SmoothedScore
9   IXN   1w             0.1  4.718463       0.530358
1  SOXX   1w             0.1  3.328601       0.528928

Top 2 ETFs for 2w forecast:
   ETF Week  RiskPercentage  RawScore  SmoothedScore
9  IXN   2w             0.1  3.823881       1.098659
0  SMH   2w             0.1  0.661286       0.661286

Top 2 ETFs for 3w forecast:
    ETF Week  RiskPercentage  RawScore  SmoothedScore
1  SOXX   3w             0.1  1.925781       1.636467
0   SMH   3w             0.1  1.564139       1.564139

Top 2 ETFs for 4w forecast:
   ETF Week  RiskPercentage  RawScore  SmoothedScore
9  IXN   4w             0.1  2.539478       0.766002
8  IGM   4w             0.1  5.309079       0.322633

Top 2 ETFs for 5w forecast:
    ETF Week  RiskPercentage  RawScore  SmoothedScore
9   IXN   5w             0.1  3.199654       0.503393
1  SOXX   5w             0.1  2.170468       0.435927

Top 2 ETFs for 6w forecast:
    ETF Week  RiskPerc

In [32]:
# Function to generate week ranges
def generate_week_ranges(start_date, end_date):
    start = datetime.strptime(start_date, '%Y-%m-%d')
    end = datetime.strptime(end_date, '%Y-%m-%d')
    week_ranges = []
    
    while start < end:
        week_start = start
        week_end = start + timedelta(days=6)
        if week_end > end:
            week_end = end
        week_ranges.append((week_start.strftime('%Y-%m-%d'), week_end.strftime('%Y-%m-%d')))
        start += timedelta(days=7)
    
    return week_ranges

# Function to gather ETF data for weeks
def gather_etf_data_for_weeks(tickers, week_ranges):
    etf_histories = {}
    for start_date, end_date in week_ranges:
        week = f"{start_date} to {end_date}"
        etf_histories[week] = {}
        for ticker in tickers:
            etf_data = yf.download(ticker, start=start_date, end=end_date)
            if etf_data.empty:
                print(f"No data found for {ticker} in {week}")
                continue
            etf_data.index = pd.to_datetime(etf_data.index)
            etf_histories[week][ticker] = etf_data
            #print(f"Data for {ticker} in {week} gathered.")
    return etf_histories

# Function to initialize shares for the first week
def initialize_shares_for_first_week(top_etfs_1w, etf_histories, week, investment_amount=50000):
    ticker_shares = {}
    first_trading_day_start = week.split(" to ")[0]
    
    for ticker in top_etfs_1w:
        etf_history = etf_histories.get(week, {}).get(ticker)
        
        if etf_history is not None:
            first_trading_day = pd.to_datetime(first_trading_day_start) + BDay(1)
            if first_trading_day not in etf_history.index:
                first_trading_day = etf_history.index[etf_history.index.searchsorted(first_trading_day)]
            
            price_on_first_trading_day = etf_history.loc[first_trading_day, 'Close']
            num_shares = (investment_amount * 0.975) / price_on_first_trading_day
            print(f"Shares 1st week: ({investment_amount} * 0.975) / {price_on_first_trading_day}")
            ticker_shares[ticker] = num_shares
            print(f"Ticker: {ticker}, First trading day: {first_trading_day.date()}, Price: {price_on_first_trading_day}, Shares: {num_shares:.2f}")
        else:
            print(f"No data found for {ticker} in {week}")
    return ticker_shares

# Function to manage ETF portfolio weekly
def manage_etf_portfolio_weekly(
    top_etfs_previous, top_etfs_current, previous_week, current_week, ticker_shares, gathered_data_per_week
):
    etf_histories_for_current_week = gathered_data_per_week.get(current_week, {})
    top2etfs_previous = list(top_etfs_previous)
    top2etfs_current = list(top_etfs_current)

    print(f"Top 2 ETFs for {previous_week}: {top2etfs_previous}")
    print(f"Top 2 ETFs for {current_week}: {top2etfs_current}")

    etfs_to_sell = [etf for etf in top2etfs_previous if etf not in top2etfs_current]
    etfs_to_buy = [etf for etf in top2etfs_current if etf not in top2etfs_previous]

    # Ensure one-to-one mapping between sell and buy ETFs
    if len(etfs_to_sell) != len(etfs_to_buy):
        print("Mismatch between ETFs to sell and buy. Adjusting allocation...")
        return ticker_shares  # Abort if mismatched for now, you can implement custom logic

    # Allocate funds ETF-by-ETF
    for etf_sell, etf_buy in zip(etfs_to_sell, etfs_to_buy):
        no_of_shares = ticker_shares.get(etf_sell, 0)
        if no_of_shares > 0:
            # Selling old ETF
            if etf_sell in etf_histories_for_current_week:
                first_trading_day_sell_price = etf_histories_for_current_week[etf_sell].loc[
                    etf_histories_for_current_week[etf_sell].index[0], 'Close'
                ]
                selling_value = no_of_shares * first_trading_day_sell_price * 0.975
                print(f"Sell {etf_sell}: {no_of_shares:.2f} shares at {first_trading_day_sell_price:.2f}. Total value: {selling_value:.2f}")

                # Remove sold ETF from portfolio
                del ticker_shares[etf_sell]

                # Buying new ETF
                if etf_buy in etf_histories_for_current_week:
                    first_trading_day_buy_price = etf_histories_for_current_week[etf_buy].loc[
                        etf_histories_for_current_week[etf_buy].index[0], 'Close'
                    ]
                    new_shares = (selling_value * 0.975) / first_trading_day_buy_price
                    print(f"Buy {etf_buy}: {new_shares:.2f} shares at {first_trading_day_buy_price:.2f}.")
                    ticker_shares[etf_buy] = new_shares
                else:
                    print(f"Data for {etf_buy} is missing for {current_week}. Skipping purchase.")
            else:
                print(f"Data for {etf_sell} is missing for {current_week}. Skipping sale.")
        else:
            print(f"No shares found for {etf_sell} to sell.")

    print(f"Updated ticker shares after {current_week}: {ticker_shares}")
    return ticker_shares


In [33]:
# Generate Week Ranges
week_ranges = generate_week_ranges('2024-01-01', '2024-12-01')
#print("Generated Week Ranges:", week_ranges)

# Assuming `tickers` is a list of ETF tickers
tickers = tickers

# Gather ETF Data
etf_histories = gather_etf_data_for_weeks(tickers, week_ranges)
#print(f"ETF Histories Collected for {len(etf_histories)} weeks")

# Create a mapping between week numbers and date ranges
week_key_mapping = {f"{i+1}w": week_range for i, week_range in enumerate(etf_histories.keys())}

# Debug: Print the week key mapping
print("Week Key Mapping:", week_key_mapping)

# Align `top_etfs_weekly` Keys with `etf_histories`
aligned_top_etfs_weekly = {}

for week_key, df_scores in weekly_scores_dfs.items():
    # Call the `select_top_etfs_weekly` function here
    forecast_period = week_key_mapping.get(week_key, None)
    if forecast_period:
        aligned_top_etfs_weekly[forecast_period] = select_top_etfs_weekly(df_scores, forecast_period)

# Portfolio initialization and management
ticker_shares = {}
ticker_shares_per_week = {}

for i, week_range in enumerate(etf_histories.keys()):
    current_week_key = week_range
    if i == 0:
        # First week initialization
        ticker_shares = initialize_shares_for_first_week(
            aligned_top_etfs_weekly[current_week_key],
            etf_histories,
            current_week_key
        )
        ticker_shares_per_week[current_week_key] = ticker_shares.copy()
    else:
        # Subsequent weeks' portfolio management
        prev_week_key = list(etf_histories.keys())[i - 1]
        ticker_shares = manage_etf_portfolio_weekly(
            aligned_top_etfs_weekly[prev_week_key],
            aligned_top_etfs_weekly[current_week_key],
            prev_week_key,
            current_week_key,
            ticker_shares,
            etf_histories
        )
        ticker_shares_per_week[current_week_key] = ticker_shares.copy()

print("\nFinal Ticker Shares per Week:")
for week, shares in ticker_shares_per_week.items():
    print(f"Week {week}: {shares}")

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Week Key Mapping: {'1w': '2024-01-01 to 2024-01-07', '2w': '2024-01-08 to 2024-01-14', '3w': '2024-01-15 to 2024-01-21', '4w': '2024-01-22 to 2024-01-28', '5w': '2024-01-29 to 2024-02-04', '6w': '2024-02-05 to 2024-02-11', '7w': '2024-02-12 to 2024-02-18', '8w': '2024-02-19 to 2024-02-25', '9w': '2024-02-26 to 2024-03-03', '10w': '2024-03-04 to 2024-03-10', '11w': '2024-03-11 to 2024-03-17', '12w': '2024-03-18 to 2024-03-24', '13w': '2024-03-25 to 2024-03-31', '14w': '2024-04-01 to 2024-04-07', '15w': '2024-04-08 to 2024-04-14', '16w': '2024-04-15 to 2024-04-21', '17w': '2024-04-22 to 2024-04-28', '18w': '2024-04-29 to 2024-05-05', '19w': '2024-05-06 to 2024-05-12', '20w': '2024-05-13 to 2024-05-19', '21w': '2024-05-20 to 2024-05-26', '22w': '2024-05-27 to 2024-06-02', '23w': '2024-06-03 to 2024-06-09', '24w': '2024-06-10 to 2024-06-16', '25w': '2024-06-17 to 2024-06-23', '26w': '2024-06-24 to 2024-06-30', '27w': '2024-07-01 to 2024-07-07', '28w': '2024-07-08 to 2024-07-14', '29w': '20




In [34]:
# Define the first trading day of the 48th week
first_trading_day_49w = '2024-12-01'  # Adjust this to match the actual start date of the 48th week

# Identify the 48th and 49th week date range keys
week_48_range = list(ticker_shares_per_week.keys())[-1]  # Last key corresponds to the 48th week
week_49_start = first_trading_day_49w  # Replace with the actual start of the 49th week
print(f"Using data for the 48th week: {week_48_range}")

print(f"Fetching data starting from the first trading day of the 49th week: {week_49_start}")

week_49_end = '2024-12-06'
# Initialize a dictionary to store the values of shares
etf_values_49w = {}

# Ensure 48th week data exists
if week_48_range in ticker_shares_per_week:
    # Fetch ETF shares from the 48th week
    ticker_shares_48w = ticker_shares_per_week[week_48_range]
    
    # Fetch the first trading day price of the 49th week for each ETF
    for ticker, shares in ticker_shares_48w.items():
        print(f"Fetching data for ticker {ticker} starting from {week_49_start}...")
        # Download historical data for the 49th week
        data = yf.download(ticker, start=week_49_start, end=week_49_end)
        
        if not data.empty:
            # Get the closing price of the first trading day of the 49th week
            closing_price_49w = data['Close'].iloc[0]
            # Calculate the value of the shares
            total_value = shares * closing_price_49w
            etf_values_49w[ticker] = total_value
            print(f"{ticker}: {shares:.2f} shares at {closing_price_49w:.2f} each, total value: {total_value:.2f}")
        else:
            print(f"{ticker}: No data available for the 49th week's first trading day.")
else:
    print(f"No data available in ticker_shares_per_week for the 48th week: {week_48_range}")



[*********************100%%**********************]  1 of 1 completed


Using data for the 48th week: 2024-11-25 to 2024-12-01
Fetching data starting from the first trading day of the 49th week: 2024-12-01
Fetching data for ticker IXN starting from 2024-12-01...
IXN: 227.99 shares at 84.80 each, total value: 19333.55
Fetching data for ticker IGM starting from 2024-12-01...


[*********************100%%**********************]  1 of 1 completed

IGM: 265.67 shares at 102.51 each, total value: 27234.16





In [35]:
# Check if there are any values in the dictionary
if etf_values_49w:
    print("\nETF values on the 49th week's first trading day:")
    # Sum all the values (total portfolio value)
    total_value = sum(etf_values_49w.values())
    print(f"Total portfolio value: {total_value:.2f}")
    # Optionally display individual ETF values
    for ticker, value in etf_values_49w.items():
        print(f"{ticker}: {value:.2f}")
else:
    print("No values could be calculated for the 49th week's first trading day.")



ETF values on the 49th week's first trading day:
Total portfolio value: 46567.71
IXN: 19333.55
IGM: 27234.16


In [36]:
# Check if there are any values in the dictionary
if etf_values_49w:
    print("\nETF values on the 49th week's first trading day:")
    # Sum all the values (total portfolio value)
    total_value = sum(etf_values_49w.values())
    print(f"Total portfolio value: {total_value:.2f}")
    # Optionally display individual ETF values
    for ticker, value in etf_values_49w.items():
        print(f"{ticker}: {value:.2f}")
else:
    print("No values could be calculated for the 49th week's first trading day.")



ETF values on the 49th week's first trading day:
Total portfolio value: 46567.71
IXN: 19333.55
IGM: 27234.16



ETF values on the 49th week's first trading day:
Total portfolio value: 76969.68
IYW: 65677.09
IXN: 11292.60


ETF values on the 49th week's first trading day:
Total portfolio value: 70753.61
IYW: 39222.20
XLK: 31531.41