In [1]:
#import os
#os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
#import os
#os.environ["TF_USE_LEGACY_KERAS"] = "True"


In [2]:
#!pip uninstall tensorflow tensorflow-macos tensorflow-metal -y
#!pip uninstall tensorflow -y
#!pip install --upgrade tensorflow


In [3]:
#!pip install tensorflow==2.13.0
#!pip install tensorflow
#!pip install tf_keras


In [4]:
#pip show tensorflow
#!pip show tensorflow

In [5]:
import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler, Normalizer
import pandas_ta as ta
from pandas.tseries.offsets import MonthEnd, BDay, Week
from itertools import product
from sklearn.metrics import mean_absolute_error

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

from datetime import datetime, timedelta
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from xgboost import XGBRegressor
#from skopt import BayesSearchCV

from skopt.space import Real, Integer, Categorical


from tensorflow.keras.models import Sequential

#from tensorflow.keras.optimizers.legacy import Adam as LegacyAdam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from skopt import gp_minimize

from sklearn.metrics import mean_squared_error
from bayes_opt import BayesianOptimization

import tensorflow as tf
#from tensorflow.keras.optimizers.legacy import Adam
import time
#from tensorflow.keras.optimizers.legacy import Adam
from tensorflow.keras.optimizers.legacy import RMSprop

from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, BatchNormalization, Input, Bidirectional

In [6]:
# Function for loading ETF data
def data_loading(ticker_symbol, start_date, end_date):
    etf_data = yf.Ticker(ticker_symbol)
    etf_history = etf_data.history(start=start_date, end=end_date)
    etf_history.index = etf_history.index.tz_localize(None)
    return etf_history, etf_data

#Function to derive the predictor columns
def etf_predictors(etf_history,  start_date, end_date, etf_data, benchmark_ticker = '^GSPC' ):
    
    # Calculate Daily Returns
    etf_history['Daily Return'] = etf_history['Close'].pct_change()

    # Calculate 21-Day Volatility (standard deviation of daily returns, annualized)
    etf_history['Volatility'] = etf_history['Daily Return'].rolling(window=21).std() * np.sqrt(252)
    #etf_history['Volatility_ta'] = ta.volatility(etf_history['Close'], window=21, annualize=True)
   
    # Get the ETF info
    etf_info = etf_data.info

    # Retrieve the net asset value price (NAV) and total net assets
    nav_price = etf_info['navPrice']
    total_assets = etf_info['totalAssets']

    # Calculate the number of shares outstanding
    shares_outstanding = total_assets / nav_price

    # Calculate Market Capitalization for each day
    # Market Capitalization = Closing Price * Total Number of Shares Outstanding
    etf_history['Market Cap'] = etf_history['Close'] * shares_outstanding

    total_assets = etf_info['totalAssets']
    total_liabilities = etf_info.get('totalLiabilities', 0)  # Handle the case where total liabilities might not be present
    
    # Calculate Book Value per Share
    book_value_per_share = (total_assets - total_liabilities) / shares_outstanding
    
    # Calculate Price to Book (P/B) Ratio
    etf_history['P/B Ratio'] = etf_history['Close'] / book_value_per_share
    
    # Calculate 1-Month Momentum (21 trading days)
    
    etf_history['Momentum'] = ta.mom(etf_history['Close'], length=21)
    
    benchmark_data = yf.download(benchmark_ticker, start=start_date, end=end_date)
    benchmark_data['Daily Return'] = benchmark_data['Close'].pct_change()
    #benchmark_data_history = benchmark_data.dropna()
    benchmark_data.head(10)
    
    # Ensure the indices are time zone-naive
    benchmark_data.index = benchmark_data.index.tz_localize(None)
    combined_data = etf_history[['Close']].join(benchmark_data[['Close']], lsuffix='_ETF', rsuffix='_Benchmark')
    combined_data = combined_data.dropna()
    
    # Calculate rolling beta with a 30-day window
    rolling_beta_21 = rolling_beta(combined_data, window=21)

    # Add the rolling beta to the dataframe
    combined_data.loc[:, 'Rolling Beta 21-day'] = rolling_beta_21
    etf_history['Rolling Beta']=combined_data['Rolling Beta 21-day']
    
    # Calculate daily profitability
    daily_profitabilities = []
    previous_nav = None
    for index, row in etf_history.iterrows():
        current_nav = row['Close']  # Current day's NAV
        #print(current_nav)
        if previous_nav is not None:
            daily_profitability = (current_nav - previous_nav) / previous_nav * 100
            daily_profitabilities.append(daily_profitability)
        else:
            daily_profitabilities.append(None)
        previous_nav = current_nav  # Update previous_nav for the next iteration

    # Add daily profitabilities to ETF dataset
    etf_history['Daily Profitability (%)'] = daily_profitabilities
    
    # Calculate the dividend yield for each day
    dividend_yields = []

    for index, row in etf_history.iterrows():
        # Get the dividend payment for the day
        dividend_payment = row['Dividends']

        # Get the current market price of the ETF for the day
        current_price = row['Close']

        # Calculate the dividend yield for the day
        dividend_yield = (dividend_payment / current_price) * 100

        # Append the dividend yield to the list
        dividend_yields.append(dividend_yield)
      
    etf_history['Div yield'] = dividend_yields

    #Volatility
    etf_history['ATR'] = ta.atr(etf_history['High'], etf_history['Low'], etf_history['Close'], length=21)    
    
    # Compute the Relative Volatility Index (RVI)
    rvi = ta.rvi(etf_history['Close'],length=21)
    etf_history['RVI'] = rvi
    
    #Momentum
    rsi_window = 14  # Window size for RSI calculation
    roc_window = 12  # Window size for ROC calculation
    #rsi_window = 21  
    #roc_window = 21  

    

    etf_history['RSI'] = ta.rsi(etf_history['Close'],length=rsi_window)
    # Calculate Rate of Change (ROC)
    etf_history['ROC'] = ta.roc(etf_history['Close'], length=roc_window)
        
    etf_history['log_returns'] = np.log(etf_history['Close'] / etf_history['Close'].shift(1))

    return etf_history

# Function to calculate rolling beta
def rolling_beta(df, window):
    rolling_cov = df['Close_Benchmark'].rolling(window=window).cov(df['Close_ETF'])
    rolling_var = df['Close_Benchmark'].rolling(window=window).var()
    rolling_beta = rolling_cov / rolling_var
    return rolling_beta



In [7]:
def pre_processing(
    etf_history, 
    train_start_date,
    train_end_date,
    test_start_date,
    test_end_date,
    prediction_dates_monthly,
    prediction_dates_weekly,
    feature_columns=None,
    scaling_strategy='StandardScaler', 
    final_end_date='2024-12-01'
):
    """
    Preprocess ETF history data and prepare train/test splits along with 
    dynamically scaled forecast data for both weekly & monthly horizons,
    stored in separate dictionaries.

    Parameters
    ----------
    etf_history : pd.DataFrame
        Full historical data with at least 'log_returns' column plus other features.
    train_start_date : str
        Start date (YYYY-MM-DD) for training data.
    train_end_date : str
        End date (YYYY-MM-DD) for training data.
    test_start_date : str
        Start date (YYYY-MM-DD) for test data.
    test_end_date : str
        End date (YYYY-MM-DD) for test data.
    prediction_dates_monthly : list of str
        List of start dates for each monthly forecast window.
    prediction_dates_weekly : list of str
        List of start dates for each weekly forecast window.
    feature_columns : list of str, optional
        Columns to be used as features. If None, defaults are used.
    scaling_strategy : str, optional
        'StandardScaler' or 'Normalizer'. Defaults to 'StandardScaler'.
    final_end_date : str, optional
        The final end date (YYYY-MM-DD) for the last forecast window.
        Defaults to '2024-12-01'.

    Returns
    -------
    dict
        A dictionary containing:
        - 'train_data_scaled': np.ndarray
        - 'test_data_scaled': np.ndarray
        - 'y_train': pd.Series
        - 'y_test': pd.Series
        - 'scaler': scaler object
        - 'X': pd.DataFrame (all feature columns before splitting)
        - 'etf_history': pd.DataFrame (original data with missing values handled)
        - 'forecast_data_weekly': dict of weekly forecast DataFrame slices
        - 'forecast_data_monthly': dict of monthly forecast DataFrame slices
        - 'forecast_data_weekly_scaled': dict of scaled weekly forecast slices
        - 'forecast_data_monthly_scaled': dict of scaled monthly forecast slices
    """

    # 1. Handle missing values and infinite values
    etf_history.fillna(etf_history.median(), inplace=True)
    etf_history.replace([np.inf, -np.inf], 0, inplace=True)

    # 2. Default feature columns if none are provided
    if feature_columns is None:
        feature_columns = [
            'Volatility', 'Volume', 'Daily Return', 'Market Cap', 
            'P/B Ratio', 'Momentum', 'Rolling Beta', 
            'Daily Profitability (%)', 'ATR', 'RVI', 'RSI', 'ROC'
        ]

    # 3. Select features (X) and target (y)
    X = etf_history.loc[:, feature_columns]
    y = etf_history['log_returns']

    # 4. Split train and test data
    train_data = X.loc[train_start_date : train_end_date]
    test_data  = X.loc[test_start_date  : test_end_date]
    y_train = y.loc[train_start_date : train_end_date]
    y_test  = y.loc[test_start_date  : test_end_date]

    # 5. Initialize and fit scaler
    if scaling_strategy == 'StandardScaler':
        scaler = StandardScaler()
    elif scaling_strategy == 'Normalizer':
        scaler = Normalizer()
    else:
        raise ValueError(f"Unsupported scaling strategy: {scaling_strategy}")

    train_data_scaled = scaler.fit_transform(train_data)
    test_data_scaled  = scaler.transform(test_data)

    # 6. Create separate dictionaries for weekly and monthly
    forecast_data_weekly = {}
    forecast_data_monthly = {}

    # ---- Weekly Forecast Slices ----
    for i, start_date in enumerate(prediction_dates_weekly):
        if i < len(prediction_dates_weekly) - 1:
            weekly_end = (pd.to_datetime(start_date) + Week(1) - pd.Timedelta(days=1)).strftime('%Y-%m-%d')
        else:
            weekly_end = final_end_date
        
        w_key = f'forecast_data_{i+1}w'
        forecast_data_weekly[w_key] = X.loc[start_date : weekly_end]

    # ---- Monthly Forecast Slices ----
    for i, start_date in enumerate(prediction_dates_monthly):
        if i < len(prediction_dates_monthly) - 1:
            monthly_end = (pd.to_datetime(start_date) + MonthEnd(0)).strftime('%Y-%m-%d')
        else:
            monthly_end = final_end_date
        
        m_key = f'forecast_data_{i+1}m'
        forecast_data_monthly[m_key] = X.loc[start_date : monthly_end]

    # 7. Scale forecast data (weekly & monthly) and add intercept
    forecast_data_weekly_scaled = {}
    for period_key, period_data in forecast_data_weekly.items():
        if not period_data.empty:
            data_scaled = scaler.transform(period_data)
            forecast_data_weekly_scaled[period_key] = sm.add_constant(data_scaled)

    forecast_data_monthly_scaled = {}
    for period_key, period_data in forecast_data_monthly.items():
        if not period_data.empty:
            data_scaled = scaler.transform(period_data)
            forecast_data_monthly_scaled[period_key] = sm.add_constant(data_scaled)

    # 8. Add a constant to train and test data
    train_data_scaled = sm.add_constant(train_data_scaled)
    test_data_scaled  = sm.add_constant(test_data_scaled)

    # 9. Return all objects in a dictionary
    return {
        'train_data_scaled': train_data_scaled,
        'test_data_scaled': test_data_scaled,
        'y_train': y_train,
        'y_test': y_test,
        'scaler': scaler,
        'X': X,
        'etf_history': etf_history,
        'forecast_data_weekly': forecast_data_weekly,
        'forecast_data_monthly': forecast_data_monthly,
        'forecast_data_weekly_scaled': forecast_data_weekly_scaled,
        'forecast_data_monthly_scaled': forecast_data_monthly_scaled
    }


In [8]:
def build_and_train_lstm_model(epochs, batch_size, units, dropout_rate, learning_rate, train_data_scaled, y_train, validation_split=0.2):
    train_data_reshaped = reshape_for_lstm(train_data_scaled)
    
    model = Sequential()
    
    # First LSTM layer
    model.add(LSTM(units=int(units), return_sequences=True, input_shape=(train_data_reshaped.shape[1], train_data_reshaped.shape[2])))
    model.add(Dropout(dropout_rate))
    
    # Second LSTM layer with return_sequences=False
    model.add(LSTM(units=int(units), return_sequences=False))
    model.add(Dropout(dropout_rate))
    
    # Adding a Dense hidden layer with ReLU activation
    model.add(Dense(units=int(units/2), activation='relu'))
    model.add(Dropout(dropout_rate))
    
    # Output layer
    model.add(Dense(1))
    
    optimizer = RMSprop(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mean_squared_error')
    
    # Early stopping to prevent overfitting
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    
     # Track the start time
    #start_time = time.time()
    
    model.fit(train_data_reshaped, y_train, 
              epochs=int(epochs), 
              batch_size=int(batch_size), 
              validation_split=validation_split, 
              callbacks=[early_stopping],
              verbose=0)
    
    # Calculate time consumed
    #time_consumed = time.time() - start_time
    
    return model


"""
def optimize_hyperparameters(train_data_scaled, y_train, test_data_scaled, y_test):
    def objective_function(epochs, batch_size, units, dropout_rate, learning_rate):
        model = build_and_train_lstm_model(epochs, batch_size, units, dropout_rate, learning_rate, train_data_scaled, y_train)
        test_data_reshaped = reshape_for_lstm(test_data_scaled)
        test_predictions = make_predictions(model, test_data_reshaped)
        mse = mean_squared_error(y_test, test_predictions)
        return -mse  # We return the negative MSE because Bayesian Optimization tries to maximize the function

    # Reduced parameter ranges for faster search
    param_bounds = {
        'epochs': (10, 30),  # Reduce max epochs
        'batch_size': (16, 64),  # Widen batch size range for exploration
        'units': (32, 128),  # Expand LSTM unit range
        'dropout_rate': (0.2, 0.5),  # Adjust dropout for robustness
        'learning_rate': (1e-4, 5e-3)  # Widen learning rate range for exploration
    }
    
    optimizer = BayesianOptimization(
        f=objective_function,
        pbounds=param_bounds,
        verbose=2,
        random_state=42,
    )

    # Reduced the number of iterations for faster optimization
    optimizer.maximize(init_points=15, n_iter=50)  # Fewer initial points and iterations

    best_params = optimizer.max['params']
    return best_params
"""
def optimize_hyperparameters(train_data_scaled, y_train, test_data_scaled, y_test):
    def objective_function(epochs, batch_size, units, dropout_rate, learning_rate):
        # Build and train the LSTM model
        model = build_and_train_lstm_model(epochs, batch_size, units, dropout_rate, learning_rate, train_data_scaled, y_train)
        test_data_reshaped = reshape_for_lstm(test_data_scaled)
        test_predictions = make_predictions(model, test_data_reshaped)
        mse = mean_squared_error(y_test, test_predictions)
        return -mse  # Negative MSE because we maximize in Bayesian Optimization

    # Define parameter bounds
    """param_bounds = {
        
        'batch_size': (16, 64),
        'dropout_rate': (0.2, 0.5),
        'epochs': (10, 30),
        'learning_rate': (1e-4, 5e-3),
        'units': (32, 128),
    }"""
    param_bounds = {
        
        'batch_size': (16, 64),
        'dropout_rate': (0.1, 0.5),
        'epochs': (10, 50),
        'learning_rate': (1e-4, 5e-3),
        'units': (32, 128),
    }#change in bounds

    # Initialize Bayesian optimizer
    optimizer = BayesianOptimization(
        f=objective_function,
        pbounds=param_bounds,
        verbose=2,
        random_state=42,
    )

    # Manual initialization points
    manual_init_points = [
        {"batch_size":36.99,"dropout_rate":0.1174,"epochs":44.65,"learning_rate":0.000641,"units":85.4},
        {"batch_size":54.8,"dropout_rate":0.2914,"epochs":11.95,"learning_rate":0.003453,"units":74.25},
        {"batch_size":44.44,"dropout_rate":0.1814,"epochs":43.15,"learning_rate":0.0004211,"units":64.05},
        {"batch_size":63.03,"dropout_rate":0.3325,"epochs":47.58,"learning_rate":0.0009053,"units":79.89},
        {"batch_size":51.58,"dropout_rate":0.1418,"epochs":21.69,"learning_rate":0.0004297,"units":72.8},
        {"batch_size":51.44,"dropout_rate":0.1512,"epochs":12.6,"learning_rate":0.000954,"units":98.28},
        {"batch_size":49.37,"dropout_rate":0.1423,"epochs":42.09,"learning_rate":0.0001671,"units":99.34},
        {"batch_size":36.99,"dropout_rate":0.1174,"epochs":44.65,"learning_rate":0.000641,"units":85.4},
        {"batch_size":37.87,"dropout_rate":0.1913,"epochs":30.99,"learning_rate":0.0004888,"units":64.56},
        {"batch_size":51.58,"dropout_rate":0.1418,"epochs":21.69,"learning_rate":0.0004297,"units":72.8},
        {"batch_size":51.44,"dropout_rate":0.1512,"epochs":12.6,"learning_rate":0.000954,"units":98.28},
        {"batch_size":56.71,"dropout_rate":0.1596,"epochs":10.22,"learning_rate":0.0008339,"units":85.34},
        {"batch_size":63.03,"dropout_rate":0.3325,"epochs":47.58,"learning_rate":0.0009053,"units":79.89},
        {"batch_size":51.58,"dropout_rate":0.1418,"epochs":21.69,"learning_rate":0.0004297,"units":72.8},
        {"batch_size":36.99,"dropout_rate":0.1174,"epochs":44.65,"learning_rate":0.000641,"units":85.4}
    ]

    # Add manual initialization points
    for point in manual_init_points:
        optimizer.probe(params=point, lazy=False)

    # Run the optimization process
    n_iter = 50  # Number of additional iterations
    optimizer.maximize(init_points=0, n_iter=n_iter)  # Set init_points=0 since we added manual points

    # Display initialization points results
    print("\nInitialization Points Results:")
    for i, result in enumerate(optimizer.res[:len(manual_init_points)]):
        print(f"Point {i + 1}: {result}")

    # Return the best parameters found
    best_params = optimizer.max['params']
    return best_params
def make_predictions(model, data_scaled):
    if len(data_scaled.shape) == 2:
        data_scaled = np.reshape(data_scaled, (data_scaled.shape[0], 1, data_scaled.shape[1]))
    elif len(data_scaled.shape) != 3:
        raise ValueError(f"Unexpected shape for input data: {data_scaled.shape}")
    
    predictions = model.predict(data_scaled)
    return predictions

"""def eval_model(best_model,test_data_scaled,y_test,y_train):
    
    test_predictions = best_model.predict((test_data_scaled))
    mae = mean_absolute_error(y_test, test_predictions)
    # Mean Absolute Scaled Error (MASE) - Example calculation assuming seasonal period m=1
    seasonal_naive = np.roll(y_test, 1)  # Shift y_test by 1 for seasonal naive forecast
    #seasonal_mae = np.mean(np.abs(y_test - seasonal_naive))
    seasonal_mae = np.mean(np.abs(y_test[1:] - seasonal_naive[1:]))
    mase = mae / seasonal_mae

    # Mean Absolute Percentage Error (MAPE)
    #mape = np.mean(np.abs((y_test - test_predictions) / y_test)) * 100

    #n = y_test.shape[0]
    #d = np.abs(  np.diff( y_test) ).sum()/(n-1)
    
    #errors = np.abs(y_test - test_predictions )
    #mase_value = errors.mean()/d

    print("Mean Absolute Error (MAE):", mae)
    print("Mean Absolute Scaled Error (MASE):", mase)
    #print("Mean Absolute Scaled Error-2 (MASE-2):", mase_value)
    #print("Mean Absolute Percentage Error (MAPE):", mape)
    
    return mae, mase"""

def eval_model(best_model, test_data_scaled, y_test, y_train=None):
    # Reshape test data to match LSTM input requirements
    test_data_reshaped = test_data_scaled.reshape((test_data_scaled.shape[0], 1, test_data_scaled.shape[1]))
    
    # Make predictions
    test_predictions = best_model.predict(test_data_reshaped)
    
    # Calculate Mean Absolute Error (MAE)
    mae = mean_absolute_error(y_test, test_predictions)
    
    # Calculate Mean Absolute Scaled Error (MASE) - Example calculation assuming seasonal period m=1
    naive_forecast = y_test.shift(1).fillna(method='bfill')
    mase = mae / mean_absolute_error(y_test, naive_forecast)
    print("mae: ",mae)
    print("mase: ",mase)
    return mae, mase

# Function to make predictions
def predictions(model, forecast_data_scaled, original_forecast_data):
    forecast_results = {}
    for key, data in forecast_data_scaled.items():
        # Reshape data for compatibility with the LSTM model
        if len(data.shape) == 2:
            data = data.reshape((data.shape[0], 1, data.shape[1]))
        
        # Make predictions
        predictions = model.predict(data)
        
        # Use the index from the corresponding original forecast DataFrame
        original_index = original_forecast_data[key].index
        
        # Create a DataFrame for the predictions
        forecast_results[key] = pd.DataFrame(
            predictions, 
            columns=["log_returns"], 
            index=original_index
        )
    return forecast_results



def calculate_mase(y_true, y_pred, naive_forecast):
    # Calculate the MAE of the model's predictions
    mae_model = mean_absolute_error(y_true, y_pred)
    
    # Calculate the MAE of the naive forecast
    mae_naive = mean_absolute_error(y_true, naive_forecast)
    
    # Calculate MASE
    mase = mae_model / mae_naive
    return mase

def reshape_for_lstm(data):
    return np.reshape(data, (data.shape[0], 1, data.shape[1]))  # Reshape into (samples, time_steps, features)


In [9]:
def combined_workflow(
    tickers,
    start_date, 
    end_date, 
    train_start_date, 
    train_end_date, 
    test_start_date, 
    test_end_date,
    prediction_dates_monthly, 
    prediction_dates_weekly
):
    etf_results = {}
    
    for ticker in tickers:
        
        print(ticker)
        # 1. Load and preprocess data
        etf_history, etf_data = data_loading(ticker, start_date, end_date)
        etf_history = etf_predictors(etf_history, start_date, end_date, etf_data)

        # 2. Single call to pre_processing for training/testing & both forecast horizons
        preprocessed = pre_processing(
            etf_history=etf_history, 
            train_start_date=train_start_date, 
            train_end_date=train_end_date,
            test_start_date=test_start_date, 
            test_end_date=test_end_date,
            prediction_dates_monthly=prediction_dates_monthly,
            prediction_dates_weekly=prediction_dates_weekly
        )
        
        # 3. Train the model
        #model = model_training(preprocessed['train_data_scaled'], preprocessed['y_train'])
        
        # Track the start time
        start_time = time.time()


        # Optimize hyperparameters
        best_params = optimize_hyperparameters(preprocessed['train_data_scaled'], preprocessed['y_train'], preprocessed['test_data_scaled'], preprocessed['y_test'])

        # Extract the best hyperparameters
        best_epochs = int(best_params['epochs'])
        best_batch_size = int(best_params['batch_size'])
        best_units = int(best_params['units'])
        best_dropout_rate = best_params['dropout_rate']
        best_learning_rate = best_params['learning_rate']

        # Train the final model using the best hyperparameters
        model = build_and_train_lstm_model(
            epochs=best_epochs, 
            batch_size=best_batch_size, 
            units=best_units, 
            dropout_rate=best_dropout_rate, 
            learning_rate=best_learning_rate, 
            train_data_scaled=preprocessed['train_data_scaled'], 
            y_train=preprocessed['y_train']
        )


        # Calculate time consumed
        time_consumed = time.time() - start_time

        # Print the time consumed
        print(f"Time consumed for training: {time_consumed:.2f} seconds")
        
        # 4. Evaluate the model
        mae, mase = eval_model(
            model, 
            preprocessed['test_data_scaled'], 
            preprocessed['y_test'], 
            preprocessed['y_train']
        )
        
        # 5. Generate predictions for monthly forecasts
        monthly_forecast_results = predictions(
            model,
            preprocessed['forecast_data_monthly_scaled'],
            preprocessed['forecast_data_monthly']  # Pass unscaled forecast data for index preservation
        )
        # 6. Generate predictions for weekly forecasts

        weekly_forecast_results = predictions(
            model,
            preprocessed['forecast_data_weekly_scaled'],
            preprocessed['forecast_data_weekly']  # Pass unscaled forecast data for index preservation
        )


        # ---------------------------------------------------------------------
        # 7. Build your final dictionary in the desired format
        # ---------------------------------------------------------------------

        # A. Basic structure
        dict_data = {
            "etf_history": etf_history,              # The full ETF dataframe (with predictors, etc.)
            "X": preprocessed["X"],                 # All feature columns used
            "y_train_values": preprocessed["y_train"],  # Training target values

            # Store model performance
            "model results": {
                "mae": mae,
                "mase": mase
            },
        }

        # B. Store monthly forecast predictions in separate keys
        #    Suppose you want up to 12 monthly periods; adjust as needed
        for i_m in range(1, 13):
            # The keys produced by predictions(...) might look like 'forecast_data_1m', 'forecast_data_2m', etc.
            forecast_key = f"forecast_data_{i_m}m"

            if forecast_key in monthly_forecast_results:
                # monthly_forecast_results[forecast_key] might be a DataFrame with 'log_returns'
                df_forecast = monthly_forecast_results[forecast_key]
                
                # e.g. store array of predicted 'log_returns' (or your final target variable)
                dict_data[f"forecast_predictions_df_{i_m}m"] = df_forecast["log_returns"].values
            
            # If a given month's key doesn't exist, you might skip it or handle differently

        # C. Store weekly forecast predictions similarly, if desired
        #    For example, suppose you do up to 48 weekly windows
        for i_w in range(1, 49):
            forecast_key = f"forecast_data_{i_w}w"
            if forecast_key in weekly_forecast_results:
                df_forecast = weekly_forecast_results[forecast_key]
                dict_data[f"forecast_predictions_df_{i_w}w"] = df_forecast["log_returns"].values

        # Finally, store this dictionary per ticker
        etf_results[ticker] = dict_data
    
    return etf_results


In [10]:
import pandas as pd

# Example usage
tickers = ['SMH', 'SOXX', 'PSI', 'XSD', 'IYW', 'XLK', 'VGT', 'FTEC', 'IGM', 'IXN']
start_date, end_date = '2000-01-01', '2024-12-01'
train_start_date, train_end_date, test_start_date, test_end_date = (
    start_date,         # train_start_date
    '2014-01-01',       # train_end_date
    '2014-01-01',       # test_start_date
    '2024-01-01'        # test_end_date
)

# Generate prediction dates for monthly & weekly
prediction_dates_monthly = pd.date_range(
    start='2024-01-01', 
    periods=12, 
    freq='MS'
).strftime('%Y-%m-%d').tolist()

prediction_dates_weekly = pd.date_range(
    start='2024-01-01', 
    periods=48, 
    freq='W-MON'
).strftime('%Y-%m-%d').tolist()

# Run the workflow
results = combined_workflow(
    tickers=tickers,
    start_date=start_date,
    end_date=end_date,
    train_start_date=train_start_date,
    train_end_date=train_end_date,
    test_start_date=test_start_date,
    test_end_date=test_end_date,
    prediction_dates_monthly=prediction_dates_monthly,
    prediction_dates_weekly=prediction_dates_weekly
)

# The 'results' object is a dictionary keyed by ticker.
# For each ticker (e.g. 'SMH' or 'SOXX'), to inspect:
#   results[ticker]["etf_history"]
#   results[ticker]["X"]
#   results[ticker]["y_train_values"]
#   results[ticker]["model results"]
#   results[ticker]["forecast_predictions_df_1m"], etc.
#
# Example:
# Print the first monthly forecast predictions for 'SMH'
#print(results['SMH']['forecast_predictions_df_1m'])


SMH


[*********************100%%**********************]  1 of 1 completed


|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-0.000101 | [30m54.74     | [30m0.4141    | [30m10.64     | [30m0.0003334 | [30m84.78     |
| [30m2         | [30m-0.000513 | [30m59.6      | [30m0.1471    | [30m10.27     | [30m0.004682  | [30m82.89     |
| [30m3         | [30m-6.486e-0 | [30m55.33     | [30m0.499     | [30m10.78     | [30m0.001679  | [30m86.72     |
| [30m4         | [30m-4.928e-0 | [30m57.98     | [30m0.1981    | [30m10.99     | [30m0.002125  | [30m87.5      |
| [30m5         | [30m-0.000266 | [30m56.99     | [30m0.2681    | [30m10.44     | [30m0.002751  | [30m89.58     |
| [30m6         | [30m-0.000322 | [30m58.24     | [30m0.1549    | [30m12.96     | [30m0.003516  | [30m85.93     |
| [30m7         | [30m-0.000305 | [30m53.46     | [30m0.2401    | [30m10.04     | [30m0.003765  

| [30m44        | [30m-7.489e-0 | [30m56.12     | [30m0.4654    | [30m10.05     | [30m0.0009772 | [30m84.56     |
| [30m45        | [30m-0.000424 | [30m49.94     | [30m0.26      | [30m21.29     | [30m0.0001964 | [30m73.79     |
| [30m46        | [30m-7.424e-0 | [30m51.85     | [30m0.333     | [30m23.6      | [30m0.001265  | [30m73.96     |
| [30m47        | [30m-0.000102 | [30m55.19     | [30m0.3305    | [30m10.02     | [30m0.001703  | [30m85.92     |
| [30m48        | [30m-0.000215 | [30m34.23     | [30m0.3448    | [30m46.45     | [30m0.004163  | [30m42.62     |
| [30m49        | [30m-0.000233 | [30m51.95     | [30m0.1       | [30m22.61     | [30m0.001389  | [30m74.09     |
| [30m50        | [30m-4.93e-05 | [30m51.28     | [30m0.4533    | [30m16.02     | [30m0.0007108 | [30m90.08     |

Initialization Points Results:
Point 1: {'target': -9.496796208007666e-05, 'params': {'batch_size': 36.99, 'dropout_rate': 0.1174, 'epochs': 44.65, 'lea

  naive_forecast = y_test.shift(1).fillna(method='bfill')


SOXX


[*********************100%%**********************]  1 of 1 completed


|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-0.000605 | [30m38.94     | [30m0.2266    | [30m30.68     | [30m0.001844  | [30m64.95     |
| [35m2         | [35m-0.000200 | [35m40.98     | [35m0.1807    | [35m26.68     | [35m0.003147  | [35m116.6     |
| [30m3         | [30m-0.000415 | [30m23.7      | [30m0.4625    | [30m48.23     | [30m0.001397  | [30m91.2      |
| [30m4         | [30m-0.000621 | [30m23.58     | [30m0.1499    | [30m41.97     | [30m0.000571  | [30m80.55     |
| [30m5         | [30m-0.000220 | [30m56.64     | [30m0.2846    | [30m12.46     | [30m0.001727  | [30m126.2     |
| [30m6         | [30m-0.000221 | [30m34.64     | [30m0.3653    | [30m33.95     | [30m0.00402   | [30m93.7      |
| [30m7         | [30m-0.000368 | [30m22.14     | [30m0.2963    | [30m27.68     | [30m0.00356   

| [30m44        | [30m-0.000457 | [30m40.77     | [30m0.3775    | [30m19.2      | [30m0.002849  | [30m71.58     |
| [30m45        | [30m-0.000118 | [30m27.17     | [30m0.2619    | [30m30.27     | [30m0.0009099 | [30m116.7     |
| [30m46        | [30m-0.000454 | [30m45.1      | [30m0.2971    | [30m40.9      | [30m0.00344   | [30m46.49     |
| [30m47        | [30m-0.000116 | [30m20.0      | [30m0.4223    | [30m34.17     | [30m0.004721  | [30m63.01     |
| [30m48        | [30m-0.000235 | [30m34.23     | [30m0.3448    | [30m46.45     | [30m0.004163  | [30m42.62     |
| [30m49        | [30m-0.000212 | [30m27.59     | [30m0.4357    | [30m20.26     | [30m0.001218  | [30m44.62     |
| [30m50        | [30m-0.000552 | [30m51.28     | [30m0.4533    | [30m16.02     | [30m0.0007108 | [30m90.08     |

Initialization Points Results:
Point 1: {'target': -0.00022485359070667383, 'params': {'batch_size': 36.99, 'dropout_rate': 0.1174, 'epochs': 44.65, 'le

  naive_forecast = y_test.shift(1).fillna(method='bfill')


PSI


[*********************100%%**********************]  1 of 1 completed


|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-0.000151 | [30m58.15     | [30m0.3825    | [30m12.93     | [30m0.004485  | [30m78.78     |
| [30m2         | [30m-0.000743 | [30m37.08     | [30m0.2882    | [30m47.65     | [30m0.0007655 | [30m102.6     |
| [30m3         | [30m-0.00033  | [30m52.07     | [30m0.1297    | [30m43.8      | [30m0.0007689 | [30m88.3      |
| [30m4         | [30m-0.000364 | [30m60.84     | [30m0.2522    | [30m43.55     | [30m0.001662  | [30m99.86     |
| [30m5         | [30m-0.000522 | [30m63.74     | [30m0.4188    | [30m11.99     | [30m0.004233  | [30m65.53     |
| [30m6         | [30m-0.000292 | [30m42.14     | [30m0.1105    | [30m10.32     | [30m0.0003345 | [30m75.27     |
| [30m7         | [30m-0.000207 | [30m31.33     | [30m0.1936    | [30m15.08     | [30m0.004132  

| [30m44        | [30m-0.000567 | [30m39.01     | [30m0.2917    | [30m10.09     | [30m0.002104  | [30m43.44     |
| [30m45        | [30m-0.000397 | [30m43.36     | [30m0.1769    | [30m31.99     | [30m0.001902  | [30m33.63     |
| [30m46        | [30m-0.000260 | [30m39.6      | [30m0.4138    | [30m47.38     | [30m0.0009092 | [30m32.61     |
| [30m47        | [30m-0.000250 | [30m50.99     | [30m0.4128    | [30m47.75     | [30m0.003853  | [30m32.03     |
| [30m48        | [30m-0.000638 | [30m44.79     | [30m0.2671    | [30m47.8      | [30m0.0006784 | [30m42.49     |
| [30m49        | [30m-0.000236 | [30m62.66     | [30m0.3348    | [30m43.74     | [30m0.004335  | [30m32.71     |
| [30m50        | [30m-0.000342 | [30m62.83     | [30m0.4926    | [30m49.8      | [30m0.003034  | [30m40.77     |

Initialization Points Results:
Point 1: {'target': -0.0003054510098255802, 'params': {'batch_size': 36.99, 'dropout_rate': 0.1174, 'epochs': 44.65, 'lea

  naive_forecast = y_test.shift(1).fillna(method='bfill')


XSD


[*********************100%%**********************]  1 of 1 completed


|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-0.000348 | [30m61.16     | [30m0.4002    | [30m10.05     | [30m0.003852  | [30m103.0     |
| [35m2         | [35m-0.000344 | [35m61.21     | [35m0.1693    | [35m10.79     | [35m0.00217   | [35m103.8     |
| [35m3         | [35m-0.000166 | [35m48.26     | [35m0.3816    | [35m11.22     | [35m0.001466  | [35m122.1     |
| [30m4         | [30m-0.000323 | [30m26.53     | [30m0.2748    | [30m11.53     | [30m0.004113  | [30m127.4     |
| [30m5         | [30m-0.000381 | [30m62.96     | [30m0.2742    | [30m12.97     | [30m0.001609  | [30m127.9     |
| [30m6         | [30m-0.000183 | [30m42.43     | [30m0.2213    | [30m23.37     | [30m0.001207  | [30m126.6     |
| [30m7         | [30m-0.000245 | [30m62.56     | [30m0.3417    | [30m49.44     | [30m0.002901  

| [30m44        | [30m-0.000353 | [30m40.77     | [30m0.3775    | [30m19.2      | [30m0.002849  | [30m71.58     |
| [30m45        | [30m-0.001501 | [30m27.17     | [30m0.2619    | [30m30.27     | [30m0.0009099 | [30m116.7     |
| [30m46        | [30m-0.000443 | [30m45.1      | [30m0.2971    | [30m40.9      | [30m0.00344   | [30m46.49     |
| [30m47        | [30m-0.000632 | [30m20.0      | [30m0.4223    | [30m34.17     | [30m0.004721  | [30m63.01     |
| [30m48        | [30m-0.000447 | [30m34.23     | [30m0.3448    | [30m46.45     | [30m0.004163  | [30m42.62     |
| [35m49        | [35m-0.000152 | [35m27.59     | [35m0.4357    | [35m20.26     | [35m0.001218  | [35m44.62     |
| [30m50        | [30m-0.000221 | [30m51.28     | [30m0.4533    | [30m16.02     | [30m0.0007108 | [30m90.08     |

Initialization Points Results:
Point 1: {'target': -0.0006241846388997768, 'params': {'batch_size': 36.99, 'dropout_rate': 0.1174, 'epochs': 44.65, 'lea

  naive_forecast = y_test.shift(1).fillna(method='bfill')


IYW


[*********************100%%**********************]  1 of 1 completed


|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-0.000167 | [30m30.76     | [30m0.4323    | [30m47.39     | [30m0.004381  | [30m82.15     |
| [30m2         | [30m-5.959e-0 | [30m36.99     | [30m0.121     | [30m44.65     | [30m0.004199  | [30m85.4      |
| [30m3         | [30m-7.631e-0 | [30m36.98     | [30m0.1098    | [30m44.64     | [30m0.0001    | [30m85.39     |
| [30m4         | [30m-0.000163 | [30m23.58     | [30m0.1499    | [30m41.97     | [30m0.000571  | [30m80.55     |
| [30m5         | [30m-7.604e-0 | [30m56.64     | [30m0.2846    | [30m12.46     | [30m0.001727  | [30m126.2     |
| [30m6         | [30m-3.263e-0 | [30m34.64     | [30m0.3653    | [30m33.95     | [30m0.00402   | [30m93.7      |
| [30m7         | [30m-0.000469 | [30m36.99     | [30m0.1204    | [30m44.65     | [30m0.003594  

| [30m44        | [30m-3.422e-0 | [30m40.77     | [30m0.3775    | [30m19.2      | [30m0.002849  | [30m71.58     |
| [30m45        | [30m-7.217e-0 | [30m27.17     | [30m0.2619    | [30m30.27     | [30m0.0009099 | [30m116.7     |
| [30m46        | [30m-0.000310 | [30m45.1      | [30m0.2971    | [30m40.9      | [30m0.00344   | [30m46.49     |
| [30m47        | [30m-5.338e-0 | [30m20.0      | [30m0.4223    | [30m34.17     | [30m0.004721  | [30m63.01     |
| [30m48        | [30m-0.000129 | [30m34.23     | [30m0.3448    | [30m46.45     | [30m0.004163  | [30m42.62     |
| [30m49        | [30m-5.67e-05 | [30m27.59     | [30m0.4357    | [30m20.26     | [30m0.001218  | [30m44.62     |
| [30m50        | [30m-3.954e-0 | [30m51.28     | [30m0.4533    | [30m16.02     | [30m0.0007108 | [30m90.08     |

Initialization Points Results:
Point 1: {'target': -2.4005318180870047e-05, 'params': {'batch_size': 36.99, 'dropout_rate': 0.1174, 'epochs': 44.65, 'le

  naive_forecast = y_test.shift(1).fillna(method='bfill')


XLK


[*********************100%%**********************]  1 of 1 completed


|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-2.176e-0 | [30m58.66     | [30m0.4823    | [30m44.49     | [30m0.004067  | [30m94.9      |
| [30m2         | [30m-5.604e-0 | [30m58.19     | [30m0.4266    | [30m45.0      | [30m0.0002918 | [30m94.64     |
| [30m3         | [30m-3.6e-05  | [30m58.9      | [30m0.1638    | [30m44.32     | [30m0.001446  | [30m94.73     |
| [30m4         | [30m-3.083e-0 | [30m48.95     | [30m0.2825    | [30m42.55     | [30m0.003507  | [30m99.44     |
| [30m5         | [30m-5.23e-05 | [30m58.59     | [30m0.3793    | [30m42.74     | [30m0.001458  | [30m95.57     |
| [35m6         | [35m-1.373e-0 | [35m50.24     | [35m0.2304    | [35m42.81     | [35m0.003866  | [35m100.3     |
| [30m7         | [30m-2.625e-0 | [30m50.47     | [30m0.4034    | [30m43.06     | [30m0.00222   

| [30m44        | [30m-4.049e-0 | [30m59.37     | [30m0.4468    | [30m41.7      | [30m0.003509  | [30m95.06     |
| [30m45        | [30m-2.588e-0 | [30m27.17     | [30m0.2619    | [30m30.27     | [30m0.0009099 | [30m116.7     |
| [30m46        | [30m-0.000374 | [30m56.83     | [30m0.2275    | [30m32.82     | [30m0.003372  | [30m77.8      |
| [30m47        | [30m-5.528e-0 | [30m48.9      | [30m0.1834    | [30m40.92     | [30m0.00383   | [30m98.71     |
| [30m48        | [30m-4.776e-0 | [30m34.23     | [30m0.3448    | [30m46.45     | [30m0.004163  | [30m42.62     |
| [30m49        | [30m-0.000138 | [30m44.6      | [30m0.1265    | [30m37.38     | [30m0.000951  | [30m94.58     |
| [30m50        | [30m-6.524e-0 | [30m19.47     | [30m0.2757    | [30m11.31     | [30m0.001599  | [30m106.3     |

Initialization Points Results:
Point 1: {'target': -7.433462311690967e-05, 'params': {'batch_size': 36.99, 'dropout_rate': 0.1174, 'epochs': 44.65, 'lea

  naive_forecast = y_test.shift(1).fillna(method='bfill')


VGT


[*********************100%%**********************]  1 of 1 completed


|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-0.000324 | [30m53.89     | [30m0.3218    | [30m42.53     | [30m0.003149  | [30m94.72     |
| [30m2         | [30m-0.000137 | [30m40.98     | [30m0.1807    | [30m26.68     | [30m0.003147  | [30m116.6     |
| [30m3         | [30m-0.000120 | [30m23.7      | [30m0.4625    | [30m48.23     | [30m0.001397  | [30m91.2      |
| [30m4         | [30m-0.000135 | [30m49.38     | [30m0.1497    | [30m42.1      | [30m0.004643  | [30m99.35     |
| [30m5         | [30m-0.000136 | [30m49.34     | [30m0.1101    | [30m42.06     | [30m0.005     | [30m99.31     |
| [30m6         | [30m-0.000107 | [30m51.14     | [30m0.1362    | [30m13.03     | [30m0.00328   | [30m98.3      |
| [30m7         | [30m-0.000291 | [30m22.14     | [30m0.2963    | [30m27.68     | [30m0.00356   

| [30m44        | [30m-0.000250 | [30m40.77     | [30m0.3775    | [30m19.2      | [30m0.002849  | [30m71.58     |
| [30m45        | [30m-0.000308 | [30m27.17     | [30m0.2619    | [30m30.27     | [30m0.0009099 | [30m116.7     |
| [30m46        | [30m-0.000278 | [30m45.1      | [30m0.2971    | [30m40.9      | [30m0.00344   | [30m46.49     |
| [30m47        | [30m-0.000170 | [30m20.0      | [30m0.4223    | [30m34.17     | [30m0.004721  | [30m63.01     |
| [30m48        | [30m-0.000147 | [30m34.23     | [30m0.3448    | [30m46.45     | [30m0.004163  | [30m42.62     |
| [30m49        | [30m-0.000148 | [30m27.59     | [30m0.4357    | [30m20.26     | [30m0.001218  | [30m44.62     |
| [30m50        | [30m-0.000805 | [30m51.28     | [30m0.4533    | [30m16.02     | [30m0.0007108 | [30m90.08     |

Initialization Points Results:
Point 1: {'target': -0.0003774673561766754, 'params': {'batch_size': 36.99, 'dropout_rate': 0.1174, 'epochs': 44.65, 'lea

  naive_forecast = y_test.shift(1).fillna(method='bfill')


FTEC


[*********************100%%**********************]  1 of 1 completed


|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-0.00183  | [30m29.18     | [30m0.1579    | [30m39.47     | [30m0.0001    | [30m71.4      |
| [30m2         | [30m-0.000478 | [30m37.18     | [30m0.1489    | [30m32.25     | [30m0.001985  | [30m65.22     |
| [30m3         | [30m-0.000466 | [30m31.94     | [30m0.3356    | [30m31.57     | [30m0.0001    | [30m51.3      |
| [35m4         | [35m-0.000283 | [35m32.09     | [35m0.2946    | [35m47.58     | [35m0.002968  | [35m104.5     |
| [30m5         | [30m-0.000391 | [30m36.44     | [30m0.1336    | [30m31.02     | [30m0.00105   | [30m106.2     |
| [30m6         | [30m-0.006454 | [30m42.97     | [30m0.4565    | [30m44.26     | [30m0.003924  | [30m118.1     |
| [30m7         | [30m-0.000735 | [30m26.11     | [30m0.3674    | [30m36.42     | [30m0.004756  

| [30m44        | [30m-0.000445 | [30m16.0      | [30m0.5       | [30m10.0      | [30m0.0001    | [30m109.1     |
| [30m45        | [30m-0.000403 | [30m49.9      | [30m0.5       | [30m45.29     | [30m0.0001    | [30m32.0      |
| [30m46        | [30m-0.00179  | [30m64.0      | [30m0.1       | [30m50.0      | [30m0.0001    | [30m114.2     |
| [30m47        | [30m-0.000973 | [30m32.73     | [30m0.1688    | [30m10.0      | [30m0.002501  | [30m57.45     |
| [30m48        | [30m-0.000590 | [30m31.03     | [30m0.483     | [30m22.6      | [30m0.003311  | [30m128.0     |
| [30m49        | [30m-0.000441 | [30m30.01     | [30m0.1       | [30m22.37     | [30m0.0001    | [30m115.0     |
| [30m50        | [30m-0.00149  | [30m64.0      | [30m0.5       | [30m17.74     | [30m0.0001    | [30m90.64     |

Initialization Points Results:
Point 1: {'target': -0.0007384341743009072, 'params': {'batch_size': 36.99, 'dropout_rate': 0.1174, 'epochs': 44.65, 'lea

  naive_forecast = y_test.shift(1).fillna(method='bfill')


IGM


[*********************100%%**********************]  1 of 1 completed


|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-0.000339 | [30m38.31     | [30m0.1332    | [30m30.49     | [30m0.001705  | [30m64.22     |
| [30m2         | [30m-0.000317 | [30m40.98     | [30m0.1807    | [30m26.68     | [30m0.003147  | [30m116.6     |
| [30m3         | [30m-0.000262 | [30m23.7      | [30m0.4625    | [30m48.23     | [30m0.001397  | [30m91.2      |
| [35m4         | [35m-9.295e-0 | [35m37.88     | [35m0.1991    | [35m31.0      | [35m0.004694  | [35m64.57     |
| [30m5         | [30m-0.002016 | [30m37.68     | [30m0.3396    | [30m31.44     | [30m0.005     | [30m64.9      |
| [30m6         | [30m-0.000198 | [30m38.21     | [30m0.2837    | [30m30.87     | [30m0.005     | [30m64.52     |
| [35m7         | [35m-7.369e-0 | [35m37.84     | [35m0.5       | [35m30.76     | [35m0.005     

| [30m44        | [30m-0.000592 | [30m40.77     | [30m0.3775    | [30m19.2      | [30m0.002849  | [30m71.58     |
| [30m45        | [30m-0.000372 | [30m27.17     | [30m0.2619    | [30m30.27     | [30m0.0009099 | [30m116.7     |
| [30m46        | [30m-0.000100 | [30m45.1      | [30m0.2971    | [30m40.9      | [30m0.00344   | [30m46.49     |
| [30m47        | [30m-0.001034 | [30m20.0      | [30m0.4223    | [30m34.17     | [30m0.004721  | [30m63.01     |
| [30m48        | [30m-0.000114 | [30m34.23     | [30m0.3448    | [30m46.45     | [30m0.004163  | [30m42.62     |
| [30m49        | [30m-0.000137 | [30m27.59     | [30m0.4357    | [30m20.26     | [30m0.001218  | [30m44.62     |
| [30m50        | [30m-0.000160 | [30m51.28     | [30m0.4533    | [30m16.02     | [30m0.0007108 | [30m90.08     |

Initialization Points Results:
Point 1: {'target': -0.00024230196404296568, 'params': {'batch_size': 36.99, 'dropout_rate': 0.1174, 'epochs': 44.65, 'le

  naive_forecast = y_test.shift(1).fillna(method='bfill')


IXN


[*********************100%%**********************]  1 of 1 completed


|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-0.000195 | [30m60.71     | [30m0.3678    | [30m46.48     | [30m0.004864  | [30m79.72     |
| [30m2         | [30m-0.000249 | [30m39.31     | [30m0.4501    | [30m19.88     | [30m0.002715  | [30m67.43     |
| [30m3         | [30m-7.312e-0 | [30m26.76     | [30m0.2896    | [30m26.5      | [30m0.002101  | [30m64.07     |
| [30m4         | [30m-0.000282 | [30m33.14     | [30m0.3914    | [30m28.1      | [30m0.003965  | [30m79.36     |
| [30m5         | [30m-0.000133 | [30m34.98     | [30m0.1464    | [30m21.75     | [30m0.002387  | [30m51.62     |
| [30m6         | [30m-8.656e-0 | [30m63.13     | [30m0.4748    | [30m48.49     | [30m0.004092  | [30m94.15     |
| [30m7         | [30m-0.000536 | [30m64.0      | [30m0.3174    | [30m34.5      | [30m0.004322  

| [30m44        | [30m-0.000205 | [30m42.89     | [30m0.5       | [30m10.0      | [30m0.0001    | [30m113.3     |
| [30m45        | [30m-0.000188 | [30m16.0      | [30m0.1       | [30m17.81     | [30m0.0001    | [30m110.4     |
| [30m46        | [30m-0.000185 | [30m33.19     | [30m0.1       | [30m23.13     | [30m0.0001    | [30m111.9     |
| [30m47        | [30m-9.254e-0 | [30m16.0      | [30m0.5       | [30m50.0      | [30m0.005     | [30m56.13     |
| [30m48        | [30m-0.000116 | [30m16.0      | [30m0.5       | [30m10.0      | [30m0.005     | [30m58.25     |
| [30m49        | [30m-0.000390 | [30m16.0      | [30m0.1       | [30m50.0      | [30m0.005     | [30m32.0      |
| [30m50        | [30m-7.98e-05 | [30m27.82     | [30m0.1527    | [30m49.79     | [30m0.004289  | [30m43.79     |

Initialization Points Results:
Point 1: {'target': -0.00017294245353848881, 'params': {'batch_size': 36.99, 'dropout_rate': 0.1174, 'epochs': 44.65, 'le

  naive_forecast = y_test.shift(1).fillna(method='bfill')




In [11]:
def print_results_with_metrics(results):
    """
    Print out results (model summary, evaluation metrics, and some forecast predictions)
    from the results dictionary returned by combined_workflow.
    """
    for ticker, data in results.items():
        print(f"Ticker: {ticker}")
        print("=" * 50)
        
        # 1. Print model summary (if the model is a statsmodels object)
        if "model" in data and hasattr(data["model"], "summary"):
            print("Model Summary:")
            print(data["model"].summary())
            print("\n")
        else:
            print("No statsmodels summary available for this model.\n")
        
        # 2. Print evaluation metrics (stored in "model results")
        if "model results" in data:
            metrics_dict = data["model results"]
            mae = metrics_dict.get("mae", None)
            mase = metrics_dict.get("mase", None)
            
            print("Evaluation Metrics:")
            print(f"Mean Absolute Error (MAE): {mae}")
            print(f"Mean Absolute Scaled Error (MASE): {mase}")
            print("\n")
        else:
            print("No evaluation metrics found.\n")

        # 3. Print monthly forecast predictions
        print("Monthly Forecast Predictions (first 5 values for each period):")
        # Let's assume you have up to 12 monthly periods
        for i_m in range(1, 13):
            key_m = f"forecast_predictions_df_{i_m}m"
            if key_m in data:
                # Each is a NumPy array of predicted values
                preds_array = data[key_m]
                print(f"{key_m}: {preds_array[:5]}")  # Print first 5 predictions
        print("\n")

        # 4. Print weekly forecast predictions (first 5 values)
        print("Weekly Forecast Predictions (first 5 values for each period):")
        # Assume up to 48 weekly periods
        for i_w in range(1, 49):
            key_w = f"forecast_predictions_df_{i_w}w"
            if key_w in data:
                preds_array = data[key_w]
                print(f"{key_w}: {preds_array[:5]}")
        print("\n")
        
        print("=" * 50)
        print("\n")
print_results_with_metrics(results)


Ticker: SMH
No statsmodels summary available for this model.

Evaluation Metrics:
Mean Absolute Error (MAE): 0.007166894238211225
Mean Absolute Scaled Error (MASE): 0.3718947391409639


Monthly Forecast Predictions (first 5 values for each period):
forecast_predictions_df_1m: [-0.03696918 -0.03105305 -0.0283091  -0.02354526 -0.01716474]
forecast_predictions_df_2m: [-0.02870317 -0.02992181 -0.03244815 -0.03529588 -0.02776192]
forecast_predictions_df_3m: [-0.03513599 -0.0385417  -0.03916519 -0.03551377 -0.04055403]
forecast_predictions_df_4m: [-0.02299188 -0.02814872 -0.01630918 -0.03199092 -0.00453155]
forecast_predictions_df_5m: [-0.00586955  0.01355419  0.00825235  0.00540129 -0.01041923]
forecast_predictions_df_6m: [-0.03742424 -0.03682509 -0.03428222 -0.04160621 -0.04155367]
forecast_predictions_df_7m: [-0.03117906 -0.03052212 -0.02860746 -0.03158294 -0.02488342]
forecast_predictions_df_8m: [-0.01928279 -0.0262831  -0.01036478  0.01395231 -0.0085343 ]
forecast_predictions_df_9m: [-0

forecast_predictions_df_6w: [-0.00201075 -0.00347551 -0.00028239 -0.00046555 -0.00150116]
forecast_predictions_df_7w: [-0.00526796 -0.00561264  0.0008334  -0.0045671  -0.00307035]
forecast_predictions_df_8w: [0.00224398 0.00559445 0.00914397 0.00555852]
forecast_predictions_df_9w: [0.00666175 0.00275059 0.0033182  0.00196127 0.00338653]
forecast_predictions_df_10w: [-0.0016841  -0.00230012  0.00387421  0.00347921 -0.00311177]
forecast_predictions_df_11w: [0.00319934 0.00847726 0.00642915 0.00644171 0.00964378]
forecast_predictions_df_12w: [0.01050734 0.01010765 0.00944273 0.00720762 0.00609036]
forecast_predictions_df_13w: [0.00481869 0.00600333 0.00565652 0.00347506]
forecast_predictions_df_14w: [0.00482042 0.00721642 0.0077028  0.00770518 0.01003607]
forecast_predictions_df_15w: [0.00860547 0.00822856 0.00684199 0.01052679 0.00714611]
forecast_predictions_df_16w: [0.00916735 0.01007762 0.00885051 0.0126418  0.01081375]
forecast_predictions_df_17w: [0.02026062 0.02164491 0.01929655 0.

### Ratios


In [12]:
def calculate_sharpe_ratio(returns, annual_risk_free_rate=0.1,period='daily'):
    #excess_returns = rate_of_return(returns) - risk_free_rate
    
    # Convert annual risk-free rate to daily rate
    daily_risk_free_rate = (1 + annual_risk_free_rate) ** (1/252) - 1
    
    # Calculate mean daily log return
    mean_return = np.mean(returns)
    
    # Calculate excess daily log return
    excess_return = mean_return - daily_risk_free_rate
    
    # Calculate standard deviation of daily log returns
    std_return = np.std(returns)
    
    # Print diagnostic information
    #print(f"Mean Daily Log Return: {mean_return}")
    #print(f"Excess Daily Log Return: {excess_return}")
    #print(f"Standard Deviation of Daily Log Returns: {std_return}")
    
    # Check for zero standard deviation to avoid division by zero
    if std_return == 0:
        return 0
    
    # Calculate Sharpe ratio
    sharpe_ratio = (excess_return / std_return) * np.sqrt(252)  # Annualize the Sharpe ratio
    return sharpe_ratio
    #return excess_returns / np.std(returns)


def calculate_rachev_ratio(returns, lower_percentile=5, upper_percentile=95):
    # Step 1: Sort the returns
    sorted_returns = np.sort(returns)
    
    # Step 2: Determine the percentiles
    lower_threshold = np.percentile(sorted_returns, lower_percentile)
    upper_threshold = np.percentile(sorted_returns, upper_percentile)
    
    # Step 3: Calculate Expected Shortfall (ES)
    es = np.mean(sorted_returns[sorted_returns <= lower_threshold])
    
    # Step 4: Calculate Expected Gain (EG)
    eg = np.mean(sorted_returns[sorted_returns >= upper_threshold])
    
    # Step 5: Compute the Rachev Ratio
    rachev_ratio = eg / -es
    return rachev_ratio


"""def calculate_volatility_clustering(returns):
    squared_returns = returns ** 2
    n = len(squared_returns)
    
    # Mean of squared returns
    mean_squared_returns = np.mean(squared_returns)
    
    # Calculate the numerator and denominator for autocorrelation at lag 1
    numerator = np.sum((squared_returns[:-1] - mean_squared_returns) * (squared_returns[1:] - mean_squared_returns))
    denominator = np.sum((squared_returns - mean_squared_returns) ** 2)
    
    if denominator == 0:
        return 0  # Avoid division by zero
    
    rho_1 = numerator / denominator
    return rho_1"""
def calculate_volatility_clustering(returns):
    # Ensure returns is a NumPy array
    returns = np.array(returns)
    squared_returns = returns ** 2
    n = len(squared_returns)

    # Mean of squared returns
    mean_squared = np.mean(squared_returns)
    clustering = np.sum((squared_returns - mean_squared) ** 2) / (n - 1 if n > 1 else 1)
    return clustering

def calculate_sortino_ratio(log_returns, target_log_return=0.0):
    """
    Calculate the Sortino Ratio using log returns.
    
    Parameters:
    - log_returns (array-like): Array or list of log returns for the period.
    - target_log_return (float): The target log return. Default is 0, which is often used as a benchmark.
    
    Returns:
    - float: The Sortino Ratio.
    """
    # Convert input to a NumPy array for easier calculations
    log_returns = np.array(log_returns)
    
    # Calculate the average period log return (R)
    avg_log_return = np.mean(log_returns)
    
    # Calculate the Target Downside Deviation (TDD)
    downside_deviation = np.sqrt(
        np.mean(np.square(np.maximum(0, target_log_return - log_returns)))
    )
    
    # Calculate Sortino Ratio
    #sortino_ratio_value = (avg_log_return - target_log_return) / downside_deviation
    epsilon = 1e-8
    
    # Add epsilon to downside_deviation to prevent division by zero
    #if downside_deviation == 0:
     #   return np.nan  # Return NaN if downside deviation is zero
    
    # Calculate Sortino Ratio
    sortino_ratio_value = (avg_log_return - target_log_return) / (downside_deviation + epsilon)
    
    return sortino_ratio_value


In [13]:

def calculate_composite_score(
    forecasted_values, risk_percentage, rachev_ratio, sharpe_ratio, sortino_ratio, volatility_clustering, 
    mean_forecast, std_forecast, mean_rachev, std_rachev, mean_sharpe, std_sharpe, mean_sortino, std_sortino, 
    mean_volatility_clustering, std_volatility_clustering
):
    epsilon = 1e-8  # To prevent division by zero
    forecasted_mean = np.mean(forecasted_values)

    # Normalize the components with epsilon
    forecasted_mean_normalized = (forecasted_mean - mean_forecast) / (std_forecast + epsilon)
    rachev_normalized = (rachev_ratio - mean_rachev) / (std_rachev + epsilon)
    sharpe_normalized = (sharpe_ratio - mean_sharpe) / (std_sharpe + epsilon)
    sortino_normalized = (sortino_ratio - mean_sortino) / (std_sortino + epsilon)
    volatility_clustering_normalized = (volatility_clustering - mean_volatility_clustering) / (std_volatility_clustering + epsilon)

    # Composite score calculation
    score = (
        forecasted_mean_normalized
        - (risk_percentage * rachev_normalized)
        + sharpe_normalized
        + sortino_normalized
        - volatility_clustering_normalized
    )

    return score

def calculate_smoothing(data, alpha=0.1):
    if len(data) == 0:  # Explicitly check if the data is empty
        return []
    smoothed_data = []
    previous_value = data[0]
    for value in data:
        smoothed_value = alpha * value + (1 - alpha) * previous_value
        smoothed_data.append(smoothed_value)
        previous_value = smoothed_value
    return smoothed_data



def process_etf_data_weekly(tickers, etf_dict, smoothing=True):
    etf_pred_dict = {}

    # Determine the forecast periods dynamically from the etf_dict
    sample_etf = next(iter(etf_dict.values()))
    forecast_periods = [key.split('_')[-1] for key in sample_etf.keys() if key.startswith('forecast_predictions_df')]

    for etf_name in tickers:
        etf_pred_dict[etf_name] = {
            f"returns_{period}": etf_dict[etf_name][f"forecast_predictions_df_{period}"]
            for period in forecast_periods
        }

        for period in forecast_periods:
            returns = etf_pred_dict[etf_name][f"returns_{period}"]

            if smoothing:
                # Apply smoothing to returns
                smoothed_returns = calculate_smoothing(returns)
                etf_pred_dict[etf_name][f"smoothed_returns_{period}"] = smoothed_returns

                # Calculate metrics using smoothed returns
                etf_pred_dict[etf_name][f"rachev_ratio_{period}"] = calculate_rachev_ratio(smoothed_returns)
                etf_pred_dict[etf_name][f"sharpe_ratio_{period}"] = calculate_sharpe_ratio(smoothed_returns)
                etf_pred_dict[etf_name][f"sortino_ratio_{period}"] = calculate_sortino_ratio(smoothed_returns)
                etf_pred_dict[etf_name][f"volatility_clustering_{period}"] = calculate_volatility_clustering(smoothed_returns)
            else:
                # Calculate metrics using raw returns
                etf_pred_dict[etf_name][f"rachev_ratio_{period}"] = calculate_rachev_ratio(returns)
                etf_pred_dict[etf_name][f"sharpe_ratio_{period}"] = calculate_sharpe_ratio(returns)
                etf_pred_dict[etf_name][f"sortino_ratio_{period}"] = calculate_sortino_ratio(returns)
                etf_pred_dict[etf_name][f"volatility_clustering_{period}"] = calculate_volatility_clustering(returns)

    return etf_pred_dict


def calculate_means_and_stds_weekly(etf_pred_dict, forecast_period):
    returns_list = [etf_pred_dict[etf][f'returns_{forecast_period}'] for etf in etf_pred_dict]

    # Compute global means and standard deviations
    mean_forecast = np.mean([np.mean(returns) for returns in returns_list])
    std_forecast = np.std([np.mean(returns) for returns in returns_list])

    rachev_ratios = np.array([etf_pred_dict[etf][f'rachev_ratio_{forecast_period}'] for etf in etf_pred_dict])
    mean_rachev = np.mean(rachev_ratios)
    std_rachev = np.std(rachev_ratios)

    sharpe_ratios = np.array([etf_pred_dict[etf][f'sharpe_ratio_{forecast_period}'] for etf in etf_pred_dict])
    mean_sharpe = np.mean(sharpe_ratios)
    std_sharpe = np.std(sharpe_ratios)

    sortino_ratios = np.array([etf_pred_dict[etf][f'sortino_ratio_{forecast_period}'] for etf in etf_pred_dict])
    mean_sortino = np.mean(sortino_ratios)
    std_sortino = np.std(sortino_ratios)

    volatility_clustering = np.array([etf_pred_dict[etf][f'volatility_clustering_{forecast_period}'] for etf in etf_pred_dict])
    mean_volatility_clustering = np.mean(volatility_clustering)
    std_volatility_clustering = np.std(volatility_clustering)

    return (
        mean_forecast, std_forecast, mean_rachev, std_rachev,
        mean_sharpe, std_sharpe, mean_sortino, std_sortino,
        mean_volatility_clustering, std_volatility_clustering
    )


def calculate_scores_for_etfs_weekly(etf_pred_dict, forecast_period, risk_percentage):
    (mean_forecast, std_forecast, mean_rachev, std_rachev, 
     mean_sharpe, std_sharpe, mean_sortino, std_sortino, 
     mean_volatility_clustering, std_volatility_clustering) = calculate_means_and_stds_weekly(etf_pred_dict, forecast_period)

    scores = []
    for etf in etf_pred_dict:
        forecasted_values = etf_pred_dict[etf][f'returns_{forecast_period}']
        rachev_ratio = etf_pred_dict[etf][f'rachev_ratio_{forecast_period}']
        sharpe_ratio = etf_pred_dict[etf][f'sharpe_ratio_{forecast_period}']
        volatility_clustering = etf_pred_dict[etf][f'volatility_clustering_{forecast_period}']
        sortino_ratio = etf_pred_dict[etf][f'sortino_ratio_{forecast_period}']

        # Calculate the composite score
        score = calculate_composite_score(
            forecasted_values, risk_percentage, rachev_ratio, sharpe_ratio, 
            sortino_ratio, volatility_clustering, mean_forecast, std_forecast, 
            mean_rachev, std_rachev, mean_sharpe, std_sharpe, mean_sortino, 
            std_sortino, mean_volatility_clustering, std_volatility_clustering
        )

        scores.append({
            'ETF': etf,
            'Week': forecast_period,
            'RiskPercentage': risk_percentage,
            'Score': score
        })

    return scores


def select_top_etfs_weekly(df_scores, forecast_period):
    if df_scores.empty:
        print(f"No scores available for {forecast_period}. Skipping.")
        return []
    print(f"Processing scores for {forecast_period}:")
    print(df_scores.head())  # Check the top rows of the DataFrame
    top_etfs = df_scores.nlargest(2, 'Score')
    print(f"Top ETFs for {forecast_period}: {top_etfs['ETF'].tolist()}")
    return top_etfs['ETF'].tolist()


def generate_week_ranges(start_date, end_date):
    start = datetime.strptime(start_date, '%Y-%m-%d')
    end = datetime.strptime(end_date, '%Y-%m-%d')
    week_ranges = []

    while start < end:
        week_start = start
        week_end = start + timedelta(days=6)
        if week_end > end:
            week_end = end
        week_ranges.append((week_start.strftime('%Y-%m-%d'), week_end.strftime('%Y-%m-%d')))
        start += timedelta(days=7)

    return week_ranges



def gather_etf_data_for_weeks(selected_tickers, week_ranges):
    etf_histories = {}
    for start_date, end_date in week_ranges:
        week = f"{start_date} to {end_date}"
        etf_histories[week] = {}
        for ticker in selected_tickers:
            print(f"Fetching data for {ticker} from {start_date} to {end_date}...")
            etf_data = yf.download(ticker, start=start_date, end=end_date, progress=False)
            if not etf_data.empty:
                etf_histories[week][ticker] = etf_data
            else:
                print(f"No data found for {ticker} in {week}")
    return etf_histories



def initialize_shares_for_first_week(top_etfs_1w, etf_histories, week, investment_amount=50000):
    ticker_shares = {}
    first_trading_day_start = week.split(" to ")[0]

    for ticker in top_etfs_1w:
        etf_history = etf_histories.get(week, {}).get(ticker)

        if etf_history is not None:
            first_trading_day = pd.to_datetime(first_trading_day_start) + BDay(1)
            if first_trading_day not in etf_history.index:
                first_trading_day = etf_history.index[etf_history.index.searchsorted(first_trading_day)]

            price_on_first_trading_day = etf_history.loc[first_trading_day, 'Close']
            num_shares = (investment_amount * 0.975) / price_on_first_trading_day
            ticker_shares[ticker] = num_shares
        else:
            print(f"No data found for {ticker} in {week}")
    return ticker_shares


def manage_etf_portfolio_weekly(
    top_etfs_previous, top_etfs_current, previous_week, current_week, ticker_shares, gathered_data_per_week
):
    etf_histories_for_current_week = gathered_data_per_week.get(current_week, {})
    top2etfs_previous = list(top_etfs_previous)
    top2etfs_current = list(top_etfs_current)

    print(f"\nTop 2 ETFs for {previous_week}: {top2etfs_previous}")
    print(f"Top 2 ETFs for {current_week}: {top2etfs_current}")

    etfs_to_sell = [etf for etf in top2etfs_previous if etf not in top2etfs_current]
    etfs_to_buy = [etf for etf in top2etfs_current if etf not in top2etfs_previous]

    # Ensure one-to-one mapping between sell and buy ETFs
    if len(etfs_to_sell) != len(etfs_to_buy):
        print("Mismatch between ETFs to sell and buy. Adjusting allocation...")
        return ticker_shares  # Abort if mismatched for now, you can implement custom logic

    # Allocate funds ETF-by-ETF
    for etf_sell, etf_buy in zip(etfs_to_sell, etfs_to_buy):
        no_of_shares = ticker_shares.get(etf_sell, 0)
        if no_of_shares > 0:
            # Selling old ETF
            if etf_sell in etf_histories_for_current_week:
                first_trading_day_sell_price = etf_histories_for_current_week[etf_sell].loc[
                    etf_histories_for_current_week[etf_sell].index[0], 'Close'
                ]
                selling_value = no_of_shares * first_trading_day_sell_price * 0.975
                print(f"Sell {etf_sell}: {no_of_shares:.2f} shares at {first_trading_day_sell_price:.2f}. Total value: {selling_value:.2f}")

                # Remove sold ETF from portfolio
                del ticker_shares[etf_sell]

                # Buying new ETF
                if etf_buy in etf_histories_for_current_week:
                    first_trading_day_buy_price = etf_histories_for_current_week[etf_buy].loc[
                        etf_histories_for_current_week[etf_buy].index[0], 'Close'
                    ]
                    new_shares = (selling_value * 0.975) / first_trading_day_buy_price
                    print(f"Buy {etf_buy}: {new_shares:.2f} shares at {first_trading_day_buy_price:.2f}.")
                    ticker_shares[etf_buy] = new_shares
                else:
                    print(f"Data for {etf_buy} is missing for {current_week}. Skipping purchase.")
            else:
                print(f"Data for {etf_sell} is missing for {current_week}. Skipping sale.")
        else:
            print(f"No shares found for {etf_sell} to sell.")

    print(f"Updated ticker shares after {current_week}: {ticker_shares}")
    return ticker_shares


def main_weekly(tickers, etf_dict, smoothing=True):
    etf_pred_dict = process_etf_data_weekly(tickers, etf_dict, smoothing=smoothing)

    risk_percentage = 0.10
    weekly_scores = {}

    # Calculate scores for all weeks
    for week in range(1, 49):
        week_key = f"{week}w"
        if any(f"returns_{week_key}" in etf_pred_dict[etf] for etf in etf_pred_dict):
            scores = calculate_scores_for_etfs_weekly(etf_pred_dict, week_key, risk_percentage)
            weekly_scores[week_key] = scores

    # Convert scores to DataFrames
    weekly_scores_dfs = {}
    for week_key, scores in weekly_scores.items():
        weekly_scores_dfs[week_key] = pd.DataFrame(scores)

    # Select top ETFs for each week
    top_etfs_weekly = {}
    for week_key, df_scores in weekly_scores_dfs.items():
        top_etfs_weekly[week_key] = df_scores.nlargest(2, 'Score')

    # Generate week ranges for ETF data gathering
    week_ranges = generate_week_ranges('2024-01-01', '2024-12-01')

    # Gather ETF historical data
    etf_histories = gather_etf_data_for_weeks(tickers, week_ranges)

    # Map week keys to date ranges
    week_key_mapping = {f"{i+1}w": week_range for i, week_range in enumerate(etf_histories.keys())}

    # Align top ETFs with historical data
    aligned_top_etfs_weekly = {}
    for week_key, df_scores in weekly_scores_dfs.items():
        forecast_period = week_key_mapping.get(week_key, None)
        if forecast_period:
            aligned_top_etfs_weekly[forecast_period] = select_top_etfs_weekly(df_scores, forecast_period)

    # Initialize and manage portfolio
    ticker_shares = {}
    ticker_shares_per_week = {}
    for i, week_range in enumerate(etf_histories.keys()):
        current_week_key = week_range
        if i == 0:
            # Initialize shares for the first week
            ticker_shares = initialize_shares_for_first_week(
                aligned_top_etfs_weekly[current_week_key],
                etf_histories,
                current_week_key
            )
            ticker_shares_per_week[current_week_key] = ticker_shares.copy()
        else:
            # Manage portfolio for subsequent weeks
            prev_week_key = list(etf_histories.keys())[i - 1]
            ticker_shares = manage_etf_portfolio_weekly(
                aligned_top_etfs_weekly[prev_week_key],
                aligned_top_etfs_weekly[current_week_key],
                prev_week_key,
                current_week_key,
                ticker_shares,
                etf_histories
            )
            ticker_shares_per_week[current_week_key] = ticker_shares.copy()

    # Define the first trading day of the 49th week
    first_trading_day_49w = '2024-12-01'

    # Identify the 48th and 49th week date ranges
    week_48_range = list(ticker_shares_per_week.keys())[-1]
    week_49_start = first_trading_day_49w
    print(f"Using data for the 48th week: {week_48_range}")
    print(f"Fetching data starting from the first trading day of the 49th week: {week_49_start}")

    week_49_end = '2024-12-06'
    etf_values_49w = {}

    # Ensure 48th week data exists
    if week_48_range in ticker_shares_per_week:
        ticker_shares_48w = ticker_shares_per_week[week_48_range]

        # Fetch the first trading day price of the 49th week for each ETF
        for ticker, shares in ticker_shares_48w.items():
            print(f"Fetching data for ticker {ticker} starting from {week_49_start}...")
            data = yf.download(ticker, start=week_49_start, end=week_49_end)

            if not data.empty:
                closing_price_49w = data['Close'].iloc[0]
                total_value = shares * closing_price_49w
                etf_values_49w[ticker] = total_value
                print(f"{ticker}: {shares:.2f} shares at ${closing_price_49w:.2f} each, total value: ${total_value:.2f}")
            else:
                print(f"{ticker}: No data available for the 49th week's first trading day.")
    else:
        print(f"No data available in ticker_shares_per_week for the 48th week: {week_48_range}")

    # Display total portfolio value for the 49th week's first trading day
    if etf_values_49w:
        print("\nETF values on the 49th week's first trading day:")
        total_value = sum(etf_values_49w.values())
        print(f"Total portfolio value: {total_value:.2f}")
        for ticker, value in etf_values_49w.items():
            print(f"{ticker}: {value:.2f}")
    else:
        print("No values could be calculated for the 49th week's first trading day.")

    return ticker_shares_per_week


In [14]:
# Now call the weekly workflow using the 'results' as etf_dict
ticker_shares_per_week_wo_smoothing = main_weekly(tickers, results, smoothing=False)


Fetching data for SMH from 2024-01-01 to 2024-01-07...
Fetching data for SOXX from 2024-01-01 to 2024-01-07...
Fetching data for PSI from 2024-01-01 to 2024-01-07...
Fetching data for XSD from 2024-01-01 to 2024-01-07...
Fetching data for IYW from 2024-01-01 to 2024-01-07...
Fetching data for XLK from 2024-01-01 to 2024-01-07...
Fetching data for VGT from 2024-01-01 to 2024-01-07...
Fetching data for FTEC from 2024-01-01 to 2024-01-07...
Fetching data for IGM from 2024-01-01 to 2024-01-07...
Fetching data for IXN from 2024-01-01 to 2024-01-07...
Fetching data for SMH from 2024-01-08 to 2024-01-14...
Fetching data for SOXX from 2024-01-08 to 2024-01-14...
Fetching data for PSI from 2024-01-08 to 2024-01-14...
Fetching data for XSD from 2024-01-08 to 2024-01-14...
Fetching data for IYW from 2024-01-08 to 2024-01-14...
Fetching data for XLK from 2024-01-08 to 2024-01-14...
Fetching data for VGT from 2024-01-08 to 2024-01-14...
Fetching data for FTEC from 2024-01-08 to 2024-01-14...
Fetchi

Fetching data for IXN from 2024-04-08 to 2024-04-14...
Fetching data for SMH from 2024-04-15 to 2024-04-21...
Fetching data for SOXX from 2024-04-15 to 2024-04-21...
Fetching data for PSI from 2024-04-15 to 2024-04-21...
Fetching data for XSD from 2024-04-15 to 2024-04-21...
Fetching data for IYW from 2024-04-15 to 2024-04-21...
Fetching data for XLK from 2024-04-15 to 2024-04-21...
Fetching data for VGT from 2024-04-15 to 2024-04-21...
Fetching data for FTEC from 2024-04-15 to 2024-04-21...
Fetching data for IGM from 2024-04-15 to 2024-04-21...
Fetching data for IXN from 2024-04-15 to 2024-04-21...
Fetching data for SMH from 2024-04-22 to 2024-04-28...
Fetching data for SOXX from 2024-04-22 to 2024-04-28...
Fetching data for PSI from 2024-04-22 to 2024-04-28...
Fetching data for XSD from 2024-04-22 to 2024-04-28...
Fetching data for IYW from 2024-04-22 to 2024-04-28...
Fetching data for XLK from 2024-04-22 to 2024-04-28...
Fetching data for VGT from 2024-04-22 to 2024-04-28...
Fetchin

Fetching data for IGM from 2024-07-22 to 2024-07-28...
Fetching data for IXN from 2024-07-22 to 2024-07-28...
Fetching data for SMH from 2024-07-29 to 2024-08-04...
Fetching data for SOXX from 2024-07-29 to 2024-08-04...
Fetching data for PSI from 2024-07-29 to 2024-08-04...
Fetching data for XSD from 2024-07-29 to 2024-08-04...
Fetching data for IYW from 2024-07-29 to 2024-08-04...
Fetching data for XLK from 2024-07-29 to 2024-08-04...
Fetching data for VGT from 2024-07-29 to 2024-08-04...
Fetching data for FTEC from 2024-07-29 to 2024-08-04...
Fetching data for IGM from 2024-07-29 to 2024-08-04...
Fetching data for IXN from 2024-07-29 to 2024-08-04...
Fetching data for SMH from 2024-08-05 to 2024-08-11...
Fetching data for SOXX from 2024-08-05 to 2024-08-11...
Fetching data for PSI from 2024-08-05 to 2024-08-11...
Fetching data for XSD from 2024-08-05 to 2024-08-11...
Fetching data for IYW from 2024-08-05 to 2024-08-11...
Fetching data for XLK from 2024-08-05 to 2024-08-11...
Fetchin

Fetching data for IGM from 2024-11-04 to 2024-11-10...
Fetching data for IXN from 2024-11-04 to 2024-11-10...
Fetching data for SMH from 2024-11-11 to 2024-11-17...
Fetching data for SOXX from 2024-11-11 to 2024-11-17...
Fetching data for PSI from 2024-11-11 to 2024-11-17...
Fetching data for XSD from 2024-11-11 to 2024-11-17...
Fetching data for IYW from 2024-11-11 to 2024-11-17...
Fetching data for XLK from 2024-11-11 to 2024-11-17...
Fetching data for VGT from 2024-11-11 to 2024-11-17...
Fetching data for FTEC from 2024-11-11 to 2024-11-17...
Fetching data for IGM from 2024-11-11 to 2024-11-17...
Fetching data for IXN from 2024-11-11 to 2024-11-17...
Fetching data for SMH from 2024-11-18 to 2024-11-24...
Fetching data for SOXX from 2024-11-18 to 2024-11-24...
Fetching data for PSI from 2024-11-18 to 2024-11-24...
Fetching data for XSD from 2024-11-18 to 2024-11-24...
Fetching data for IYW from 2024-11-18 to 2024-11-24...
Fetching data for XLK from 2024-11-18 to 2024-11-24...
Fetchin

[*********************100%%**********************]  1 of 1 completed

Processing scores for 2024-01-01 to 2024-01-07:
    ETF Week  RiskPercentage     Score
0   SMH   1w             0.1 -4.066723
1  SOXX   1w             0.1 -1.259218
2   PSI   1w             0.1 -0.512049
3   XSD   1w             0.1  4.777811
4   IYW   1w             0.1 -1.234315
Top ETFs for 2024-01-01 to 2024-01-07: ['FTEC', 'XSD']
Processing scores for 2024-01-08 to 2024-01-14:
    ETF Week  RiskPercentage     Score
0   SMH   2w             0.1 -3.188424
1  SOXX   2w             0.1 -2.356731
2   PSI   2w             0.1 -1.129467
3   XSD   2w             0.1  6.574063
4   IYW   2w             0.1 -1.275364
Top ETFs for 2024-01-08 to 2024-01-14: ['XSD', 'FTEC']
Processing scores for 2024-01-15 to 2024-01-21:
    ETF Week  RiskPercentage     Score
0   SMH   3w             0.1 -2.513196
1  SOXX   3w             0.1 -1.495201
2   PSI   3w             0.1 -0.441563
3   XSD   3w             0.1  5.111990
4   IYW   3w             0.1 -1.618030
Top ETFs for 2024-01-15 to 2024-01-21: ['FTE


[*********************100%%**********************]  1 of 1 completed

XSD: 182.52 shares at $252.68 each, total value: $46119.38

ETF values on the 49th week's first trading day:
Total portfolio value: 104921.81
FTEC: 58802.43
XSD: 46119.38





In [15]:
ticker_shares_per_week_with_smoothing = main_weekly(tickers, results, smoothing=True)


Fetching data for SMH from 2024-01-01 to 2024-01-07...
Fetching data for SOXX from 2024-01-01 to 2024-01-07...
Fetching data for PSI from 2024-01-01 to 2024-01-07...
Fetching data for XSD from 2024-01-01 to 2024-01-07...
Fetching data for IYW from 2024-01-01 to 2024-01-07...
Fetching data for XLK from 2024-01-01 to 2024-01-07...
Fetching data for VGT from 2024-01-01 to 2024-01-07...
Fetching data for FTEC from 2024-01-01 to 2024-01-07...
Fetching data for IGM from 2024-01-01 to 2024-01-07...
Fetching data for IXN from 2024-01-01 to 2024-01-07...
Fetching data for SMH from 2024-01-08 to 2024-01-14...
Fetching data for SOXX from 2024-01-08 to 2024-01-14...
Fetching data for PSI from 2024-01-08 to 2024-01-14...
Fetching data for XSD from 2024-01-08 to 2024-01-14...
Fetching data for IYW from 2024-01-08 to 2024-01-14...
Fetching data for XLK from 2024-01-08 to 2024-01-14...
Fetching data for VGT from 2024-01-08 to 2024-01-14...
Fetching data for FTEC from 2024-01-08 to 2024-01-14...
Fetchi

Fetching data for IXN from 2024-04-08 to 2024-04-14...
Fetching data for SMH from 2024-04-15 to 2024-04-21...
Fetching data for SOXX from 2024-04-15 to 2024-04-21...
Fetching data for PSI from 2024-04-15 to 2024-04-21...
Fetching data for XSD from 2024-04-15 to 2024-04-21...
Fetching data for IYW from 2024-04-15 to 2024-04-21...
Fetching data for XLK from 2024-04-15 to 2024-04-21...
Fetching data for VGT from 2024-04-15 to 2024-04-21...
Fetching data for FTEC from 2024-04-15 to 2024-04-21...
Fetching data for IGM from 2024-04-15 to 2024-04-21...
Fetching data for IXN from 2024-04-15 to 2024-04-21...
Fetching data for SMH from 2024-04-22 to 2024-04-28...
Fetching data for SOXX from 2024-04-22 to 2024-04-28...
Fetching data for PSI from 2024-04-22 to 2024-04-28...
Fetching data for XSD from 2024-04-22 to 2024-04-28...
Fetching data for IYW from 2024-04-22 to 2024-04-28...
Fetching data for XLK from 2024-04-22 to 2024-04-28...
Fetching data for VGT from 2024-04-22 to 2024-04-28...
Fetchin

Fetching data for IGM from 2024-07-22 to 2024-07-28...
Fetching data for IXN from 2024-07-22 to 2024-07-28...
Fetching data for SMH from 2024-07-29 to 2024-08-04...
Fetching data for SOXX from 2024-07-29 to 2024-08-04...
Fetching data for PSI from 2024-07-29 to 2024-08-04...
Fetching data for XSD from 2024-07-29 to 2024-08-04...
Fetching data for IYW from 2024-07-29 to 2024-08-04...
Fetching data for XLK from 2024-07-29 to 2024-08-04...
Fetching data for VGT from 2024-07-29 to 2024-08-04...
Fetching data for FTEC from 2024-07-29 to 2024-08-04...
Fetching data for IGM from 2024-07-29 to 2024-08-04...
Fetching data for IXN from 2024-07-29 to 2024-08-04...
Fetching data for SMH from 2024-08-05 to 2024-08-11...
Fetching data for SOXX from 2024-08-05 to 2024-08-11...
Fetching data for PSI from 2024-08-05 to 2024-08-11...
Fetching data for XSD from 2024-08-05 to 2024-08-11...
Fetching data for IYW from 2024-08-05 to 2024-08-11...
Fetching data for XLK from 2024-08-05 to 2024-08-11...
Fetchin

Fetching data for IXN from 2024-11-04 to 2024-11-10...
Fetching data for SMH from 2024-11-11 to 2024-11-17...
Fetching data for SOXX from 2024-11-11 to 2024-11-17...
Fetching data for PSI from 2024-11-11 to 2024-11-17...
Fetching data for XSD from 2024-11-11 to 2024-11-17...
Fetching data for IYW from 2024-11-11 to 2024-11-17...
Fetching data for XLK from 2024-11-11 to 2024-11-17...
Fetching data for VGT from 2024-11-11 to 2024-11-17...
Fetching data for FTEC from 2024-11-11 to 2024-11-17...
Fetching data for IGM from 2024-11-11 to 2024-11-17...
Fetching data for IXN from 2024-11-11 to 2024-11-17...
Fetching data for SMH from 2024-11-18 to 2024-11-24...
Fetching data for SOXX from 2024-11-18 to 2024-11-24...
Fetching data for PSI from 2024-11-18 to 2024-11-24...
Fetching data for XSD from 2024-11-18 to 2024-11-24...
Fetching data for IYW from 2024-11-18 to 2024-11-24...
Fetching data for XLK from 2024-11-18 to 2024-11-24...
Fetching data for VGT from 2024-11-18 to 2024-11-24...
Fetchin

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

FTEC: 314.47 shares at $186.99 each, total value: $58802.43
Fetching data for ticker XSD starting from 2024-12-01...
XSD: 174.20 shares at $252.68 each, total value: $44017.44

ETF values on the 49th week's first trading day:
Total portfolio value: 102819.86
FTEC: 58802.43
XSD: 44017.44





In [16]:
def process_etf_data_monthly(tickers, etf_dict, smoothing=True):
    etf_pred_dict = {}

    # Determine forecast periods dynamically from the etf_dict
    sample_etf = next(iter(etf_dict.values()))
    forecast_periods = [key.split('_')[-1] for key in sample_etf.keys() if key.startswith('forecast_predictions_df')]

    for etf_name in tickers:
        etf_pred_dict[etf_name] = {
            f"returns_{period}": etf_dict[etf_name][f"forecast_predictions_df_{period}"]
            for period in forecast_periods
        }

        for period in forecast_periods:
            returns = etf_pred_dict[etf_name][f"returns_{period}"]

            if smoothing:
                # Apply smoothing
                smoothed_returns = calculate_smoothing(returns)
                etf_pred_dict[etf_name][f"smoothed_returns_{period}"] = smoothed_returns

                # Calculate metrics using smoothed returns
                etf_pred_dict[etf_name][f"rachev_ratio_{period}"] = calculate_rachev_ratio(smoothed_returns)
                etf_pred_dict[etf_name][f"sharpe_ratio_{period}"] = calculate_sharpe_ratio(smoothed_returns)
                etf_pred_dict[etf_name][f"sortino_ratio_{period}"] = calculate_sortino_ratio(smoothed_returns)
                etf_pred_dict[etf_name][f"volatility_clustering_{period}"] = calculate_volatility_clustering(smoothed_returns)
            else:
                # Calculate metrics using raw returns
                etf_pred_dict[etf_name][f"rachev_ratio_{period}"] = calculate_rachev_ratio(returns)
                etf_pred_dict[etf_name][f"sharpe_ratio_{period}"] = calculate_sharpe_ratio(returns)
                etf_pred_dict[etf_name][f"sortino_ratio_{period}"] = calculate_sortino_ratio(returns)
                etf_pred_dict[etf_name][f"volatility_clustering_{period}"] = calculate_volatility_clustering(returns)

    return etf_pred_dict


def calculate_means_and_stds_monthly(etf_pred_dict, forecast_period):
    returns_list = [etf_pred_dict[etf][f'returns_{forecast_period}'] for etf in etf_pred_dict]

    # Compute global means and standard deviations
    mean_forecast = np.mean([np.mean(returns) for returns in returns_list])
    std_forecast = np.std([np.mean(returns) for returns in returns_list])

    print(f"\nDebug: Forecast Period = {forecast_period}")
    print(f"All Returns Means: {[np.mean(returns) for returns in returns_list]}")
    print(f"Mean Forecast = {mean_forecast}, Std Forecast = {std_forecast}")

    rachev_ratios = np.array([etf_pred_dict[etf][f'rachev_ratio_{forecast_period}'] for etf in etf_pred_dict])
    print(f"All Rachev Ratios: {rachev_ratios}")
    mean_rachev = np.mean(rachev_ratios)
    std_rachev = np.std(rachev_ratios)

    sharpe_ratios = np.array([etf_pred_dict[etf][f'sharpe_ratio_{forecast_period}'] for etf in etf_pred_dict])
    print(f"All Sharpe Ratios: {sharpe_ratios}")
    mean_sharpe = np.mean(sharpe_ratios)
    std_sharpe = np.std(sharpe_ratios)

    sortino_ratios = np.array([etf_pred_dict[etf][f'sortino_ratio_{forecast_period}'] for etf in etf_pred_dict])
    print(f"All Sortino Ratios: {sortino_ratios}")
    mean_sortino = np.mean(sortino_ratios)
    std_sortino = np.std(sortino_ratios)

    volatility_clustering = np.array([etf_pred_dict[etf][f'volatility_clustering_{forecast_period}'] for etf in etf_pred_dict])
    print(f"All Volatility Clustering: {volatility_clustering}")
    mean_volatility_clustering = np.mean(volatility_clustering)
    std_volatility_clustering = np.std(volatility_clustering)

    print(f"Mean Rachev = {mean_rachev}, Std Rachev = {std_rachev}")
    print(f"Mean Sharpe = {mean_sharpe}, Std Sharpe = {std_sharpe}")
    print(f"Mean Sortino = {mean_sortino}, Std Sortino = {std_sortino}")
    print(f"Mean Volatility Clustering = {mean_volatility_clustering}, Std Volatility Clustering = {std_volatility_clustering}")

    return (
        mean_forecast, std_forecast, mean_rachev, std_rachev,
        mean_sharpe, std_sharpe, mean_sortino, std_sortino,
        mean_volatility_clustering, std_volatility_clustering
    )

def calculate_scores_for_etfs_monthly(etf_pred_dict, forecast_period, risk_percentage, smoothing = True):
    (mean_forecast, std_forecast, mean_rachev, std_rachev, 
     mean_sharpe, std_sharpe, mean_sortino, std_sortino, 
     mean_volatility_clustering, std_volatility_clustering) = calculate_means_and_stds_monthly(etf_pred_dict, forecast_period)

    scores = []
    for etf in etf_pred_dict:
        forecasted_values = etf_pred_dict[etf][f'returns_{forecast_period}']
        rachev_ratio = etf_pred_dict[etf][f'rachev_ratio_{forecast_period}']
        sharpe_ratio = etf_pred_dict[etf][f'sharpe_ratio_{forecast_period}']
        volatility_clustering = etf_pred_dict[etf][f'volatility_clustering_{forecast_period}']
        sortino_ratio = etf_pred_dict[etf][f'sortino_ratio_{forecast_period}']

        # Debugging: Log inputs to composite score calculation
        print(f"\nDebug: ETF = {etf}, Forecast Period = {forecast_period}")
        print(f"Forecasted Values Mean: {np.mean(forecasted_values)}")
        print(f"Rachev Ratio: {rachev_ratio}, Sharpe Ratio: {sharpe_ratio}")
        print(f"Sortino Ratio: {sortino_ratio}, Volatility Clustering: {volatility_clustering}")
        print(f"Means and Stds: Mean Forecast = {mean_forecast}, Std Forecast = {std_forecast}")
        
        # Calculate the composite score
        score = calculate_composite_score(
            forecasted_values, risk_percentage, rachev_ratio, sharpe_ratio, 
            sortino_ratio, volatility_clustering, mean_forecast, std_forecast, 
            mean_rachev, std_rachev, mean_sharpe, std_sharpe, mean_sortino, 
            std_sortino, mean_volatility_clustering, std_volatility_clustering
        )

        scores.append({
            'ETF': etf,
            'Month': forecast_period,
            'RiskPercentage': risk_percentage,
            'Score': score
        })

        # Debugging: Log the calculated score
        print(f"Calculated Score for {etf} ({forecast_period}): {score}")

    return scores

def select_top_etfs_monthly(df_scores, forecast_period):
    if df_scores.empty:
        print(f"No scores available for {forecast_period}. Skipping.")
        return []
    print(f"Processing scores for {forecast_period}:")
    print(df_scores.head())  # Check the top rows of the DataFrame
    top_etfs = df_scores.nlargest(2, 'Score')
    print(f"Top ETFs for {forecast_period}: {top_etfs['ETF'].tolist()}")
    return top_etfs['ETF'].tolist()


def generate_month_ranges(start_date, end_date):
    start = datetime.strptime(start_date, '%Y-%m-%d')
    end = datetime.strptime(end_date, '%Y-%m-%d')
    month_ranges = []

    while start < end:
        month_start = start
        month_end = (start + timedelta(days=31)).replace(day=1) - timedelta(days=1)
        if month_end > end:
            month_end = end
        month_ranges.append((month_start.strftime('%Y-%m-%d'), month_end.strftime('%Y-%m-%d')))
        start = month_end + timedelta(days=1)

    return month_ranges

def gather_etf_data_for_months(tickers, month_ranges):
    etf_histories = {}
    for start_date, end_date in month_ranges:
        month = f"{start_date} to {end_date}"
        etf_histories[month] = {}
        for ticker in tickers:
            etf_data = yf.download(ticker, start=start_date, end=end_date, progress=False)
            if etf_data.empty:
                print(f"No data found for {ticker} in {month}")
                continue
            etf_data.index = pd.to_datetime(etf_data.index)
            etf_histories[month][ticker] = etf_data
    return etf_histories

# Function to initialize shares for the first month
def initialize_shares_for_first_month(top_etfs_1m, etf_histories, month, investment_amount=50000):
    ticker_shares = {}
    first_trading_day_start = month.split(" to ")[0]

    for ticker in top_etfs_1m:
        etf_history = etf_histories.get(month, {}).get(ticker)

        if etf_history is not None:
            first_trading_day = pd.to_datetime(first_trading_day_start) + BDay(1)
            if first_trading_day not in etf_history.index:
                first_trading_day = etf_history.index[etf_history.index.searchsorted(first_trading_day)]

            price_on_first_trading_day = etf_history.loc[first_trading_day, 'Close']
            num_shares = (investment_amount * 0.975) / price_on_first_trading_day
            print(f"Shares 1st month: ({investment_amount} * 0.975) / {price_on_first_trading_day}")
            ticker_shares[ticker] = num_shares
            print(f"Ticker: {ticker}, First trading day: {first_trading_day.date()}, Price: {price_on_first_trading_day}, Shares: {num_shares:.2f}")
        else:
            print(f"No data found for {ticker} in {month}")
    return ticker_shares


def manage_etf_portfolio_monthly(
    top_etfs_previous, top_etfs_current, previous_month, current_month, ticker_shares, gathered_data_per_month
):
    etf_histories_for_current_month = gathered_data_per_month.get(current_month, {})

    # Extract ETFs as lists to preserve order
    top2etfs_previous = list(top_etfs_previous)
    top2etfs_current = list(top_etfs_current)

    print(f"Top 2 ETFs for {previous_month}: {top2etfs_previous}")
    print(f"Top 2 ETFs for {current_month}: {top2etfs_current}")

    # Step 1: Identify common ETFs (present in both months)
    etfs_common = [etf for etf in top2etfs_previous if etf in top2etfs_current]

    # Step 2: Identify ETFs to sell (present in the previous month but not in the current month)
    etfs_to_sell = [etf for etf in top2etfs_previous if etf not in top2etfs_current]

    # Step 3: Identify ETFs to buy (present in the current month but not in the previous month)
    etfs_to_buy = [etf for etf in top2etfs_current if etf not in top2etfs_previous]

    # Step 4: Sell ETFs that are no longer in the current top ETFs
    selling_values = {}  # Track selling values for each ETF being sold
    for etf in etfs_to_sell:
        no_of_shares = ticker_shares.get(etf, 0)
        if no_of_shares > 0:
            # Check if data for the ETF is available in the current month
            if etf in etf_histories_for_current_month:
                first_trading_day_current_month = etf_histories_for_current_month[etf].loc[
                    etf_histories_for_current_month[etf].index[0], 'Close'
                ]
                
                selling_value = no_of_shares * first_trading_day_current_month * 0.975
                selling_values[etf] = selling_value  # Store selling value for this ETF
                print(f"Formula: {no_of_shares} * {first_trading_day_current_month} * 0.975")
                print(f"Sell {etf}: {no_of_shares:.2f} shares at {first_trading_day_current_month:.2f}. Total value: {selling_value:.2f}\n")
                del ticker_shares[etf]
            else:
                print(f"Data for {etf} is missing for {current_month}. Skipping sale.")
        else:
            print(f"No shares found for {etf} to sell.")

    # Step 5: Buy new ETFs that were not in the previous month's top ETFs
    for etf_to_buy, etf_to_sell in zip(etfs_to_buy, etfs_to_sell):
        selling_value = selling_values.get(etf_to_sell, 0)
        if selling_value > 0 and etf_to_buy in etf_histories_for_current_month:
            first_trading_day_new_etf = etf_histories_for_current_month[etf_to_buy].loc[
                etf_histories_for_current_month[etf_to_buy].index[0], 'Close'
            ]
            new_shares = (selling_value * 0.975) / first_trading_day_new_etf
            print(f"Formula: ({selling_value} * 0.975) / {first_trading_day_new_etf}")
            print(f"Buy {etf_to_buy}: {new_shares:.2f} shares at {first_trading_day_new_etf:.2f}.\n")
            ticker_shares[etf_to_buy] = new_shares
        else:
            print(f"Data for {etf_to_buy} is missing or no selling value available. Skipping purchase of {etf_to_buy}.")

    # Step 6: Maintain the order of ETFs in the ticker_shares dictionary based on the current top 2 ETFs
    ordered_ticker_shares = {etf: ticker_shares[etf] for etf in top2etfs_current if etf in ticker_shares}

    print(f"Updated ticker shares after {current_month}: {ordered_ticker_shares}")
    print("")
    return ordered_ticker_shares



def main_monthly(tickers, etf_dict, smoothing=True):
    etf_pred_dict = process_etf_data_monthly(tickers, etf_dict, smoothing=smoothing)

    risk_percentage = 0.10
    monthly_scores = {}

    # Calculate scores for all months
    for month in range(1, 13):
        month_key = f"{month}m"
        if any(f"returns_{month_key}" in etf_pred_dict[etf] for etf in etf_pred_dict):
            scores = calculate_scores_for_etfs_weekly(etf_pred_dict, month_key, risk_percentage)  # Reusing weekly function
            monthly_scores[month_key] = scores

    # Convert scores to DataFrames
    monthly_scores_dfs = {}
    for month_key, scores in monthly_scores.items():
        monthly_scores_dfs[month_key] = pd.DataFrame(scores)

    # Select top ETFs for each month
    top_etfs_monthly = {}
    for month_key, df_scores in monthly_scores_dfs.items():
        top_etfs_monthly[month_key] = df_scores.nlargest(2, 'Score')

    # Generate month ranges for ETF data gathering
    month_ranges = generate_month_ranges('2024-01-01', '2024-12-01')

    # Gather ETF historical data
    etf_histories = gather_etf_data_for_weeks(tickers, month_ranges)

    # Map month keys to date ranges
    month_key_mapping = {f"{i+1}m": month_range for i, month_range in enumerate(etf_histories.keys())}

    # Align top ETFs with historical data
    aligned_top_etfs_monthly = {}
    for month_key, df_scores in monthly_scores_dfs.items():
        forecast_period = month_key_mapping.get(month_key, None)
        if forecast_period:
            aligned_top_etfs_monthly[forecast_period] = select_top_etfs_weekly(df_scores, forecast_period)

    # Initialize and manage portfolio
    ticker_shares = {}
    ticker_shares_per_month = {}
    for i, month_range in enumerate(etf_histories.keys()):
        current_month_key = month_range
        if i == 0:
            # Initialize shares for the first month
            ticker_shares = initialize_shares_for_first_week(
                aligned_top_etfs_monthly[current_month_key],
                etf_histories,
                current_month_key
            )
            ticker_shares_per_month[current_month_key] = ticker_shares.copy()
        else:
            # Manage portfolio for subsequent months
            prev_month_key = list(etf_histories.keys())[i - 1]
            ticker_shares = manage_etf_portfolio_monthly(
                aligned_top_etfs_monthly[prev_month_key],
                aligned_top_etfs_monthly[current_month_key],
                prev_month_key,
                current_month_key,
                ticker_shares,
                etf_histories
            )
            ticker_shares_per_month[current_month_key] = ticker_shares.copy()

    # Define the first trading day of the 13th month
    first_trading_day_13m = '2024-12-01'

    # Identify the 12th and 13th month date ranges
    month_12_range = list(ticker_shares_per_month.keys())[-1]
    month_13_start = first_trading_day_13m
    print(f"Using data for the 12th month: {month_12_range}")
    print(f"Fetching data starting from the first trading day of the 13th month: {month_13_start}")

    month_13_end = '2024-12-31'
    etf_values_13m = {}

    # Ensure 12th month data exists
    if month_12_range in ticker_shares_per_month:
        ticker_shares_12m = ticker_shares_per_month[month_12_range]

        # Fetch the first trading day price of the 13th month for each ETF
        for ticker, shares in ticker_shares_12m.items():
            print(f"Fetching data for ticker {ticker} starting from {month_13_start}...")
            data = yf.download(ticker, start=month_13_start, end=month_13_end)

            if not data.empty:
                closing_price_13m = data['Close'].iloc[0]
                total_value = shares * closing_price_13m
                etf_values_13m[ticker] = total_value
                print(f"{ticker}: {shares:.2f} shares at ${closing_price_13m:.2f} each, total value: ${total_value:.2f}")
            else:
                print(f"{ticker}: No data available for the 13th month's first trading day.")
    else:
        print(f"No data available in ticker_shares_per_month for the 12th month: {month_12_range}")

    # Display total portfolio value for the 13th month's first trading day
    if etf_values_13m:
        print("\nETF values on the 13th month's first trading day:")
        total_value = sum(etf_values_13m.values())
        print(f"Total portfolio value: {total_value:.2f}")
        for ticker, value in etf_values_13m.items():
            print(f"{ticker}: {value:.2f}")
    else:
        print("No values could be calculated for the 13th month's first trading day.")

    return ticker_shares_per_month



In [17]:
# Now call the weekly workflow using the 'results' as etf_dict
ticker_shares_per_month_wo_smoothing = main_monthly(tickers, results,False)


Fetching data for SMH from 2024-01-01 to 2024-01-31...
Fetching data for SOXX from 2024-01-01 to 2024-01-31...
Fetching data for PSI from 2024-01-01 to 2024-01-31...
Fetching data for XSD from 2024-01-01 to 2024-01-31...
Fetching data for IYW from 2024-01-01 to 2024-01-31...
Fetching data for XLK from 2024-01-01 to 2024-01-31...
Fetching data for VGT from 2024-01-01 to 2024-01-31...
Fetching data for FTEC from 2024-01-01 to 2024-01-31...
Fetching data for IGM from 2024-01-01 to 2024-01-31...
Fetching data for IXN from 2024-01-01 to 2024-01-31...
Fetching data for SMH from 2024-02-01 to 2024-02-29...
Fetching data for SOXX from 2024-02-01 to 2024-02-29...
Fetching data for PSI from 2024-02-01 to 2024-02-29...
Fetching data for XSD from 2024-02-01 to 2024-02-29...
Fetching data for IYW from 2024-02-01 to 2024-02-29...
Fetching data for XLK from 2024-02-01 to 2024-02-29...
Fetching data for VGT from 2024-02-01 to 2024-02-29...
Fetching data for FTEC from 2024-02-01 to 2024-02-29...
Fetchi

[*********************100%%**********************]  1 of 1 completed

Fetching data for IXN from 2024-11-01 to 2024-11-30...
Processing scores for 2024-01-01 to 2024-01-31:
    ETF Week  RiskPercentage     Score
0   SMH   1m             0.1 -4.018728
1  SOXX   1m             0.1 -1.300675
2   PSI   1m             0.1 -0.613770
3   XSD   1m             0.1  4.275864
4   IYW   1m             0.1 -1.253701
Top ETFs for 2024-01-01 to 2024-01-31: ['FTEC', 'XSD']
Processing scores for 2024-02-01 to 2024-02-29:
    ETF Week  RiskPercentage     Score
0   SMH   2m             0.1 -4.166207
1  SOXX   2m             0.1 -1.451726
2   PSI   2m             0.1 -0.165711
3   XSD   2m             0.1  5.733292
4   IYW   2m             0.1 -0.703638
Top ETFs for 2024-02-01 to 2024-02-29: ['XSD', 'FTEC']
Processing scores for 2024-03-01 to 2024-03-31:
    ETF Week  RiskPercentage     Score
0   SMH   3m             0.1 -5.455172
1  SOXX   3m             0.1 -1.078050
2   PSI   3m             0.1 -0.362367
3   XSD   3m             0.1  6.133548
4   IYW   3m             0.1


[*********************100%%**********************]  1 of 1 completed

XSD: 206.22 shares at $252.68 each, total value: $52108.88

ETF values on the 13th month's first trading day:
Total portfolio value: 117319.29
FTEC: 65210.41
XSD: 52108.88





In [18]:
# Now call the weekly workflow using the 'results' as etf_dict
ticker_shares_per_month_with_smoothing = main_monthly(tickers, results,True)


Fetching data for SMH from 2024-01-01 to 2024-01-31...
Fetching data for SOXX from 2024-01-01 to 2024-01-31...
Fetching data for PSI from 2024-01-01 to 2024-01-31...
Fetching data for XSD from 2024-01-01 to 2024-01-31...
Fetching data for IYW from 2024-01-01 to 2024-01-31...
Fetching data for XLK from 2024-01-01 to 2024-01-31...
Fetching data for VGT from 2024-01-01 to 2024-01-31...
Fetching data for FTEC from 2024-01-01 to 2024-01-31...
Fetching data for IGM from 2024-01-01 to 2024-01-31...
Fetching data for IXN from 2024-01-01 to 2024-01-31...
Fetching data for SMH from 2024-02-01 to 2024-02-29...
Fetching data for SOXX from 2024-02-01 to 2024-02-29...
Fetching data for PSI from 2024-02-01 to 2024-02-29...
Fetching data for XSD from 2024-02-01 to 2024-02-29...
Fetching data for IYW from 2024-02-01 to 2024-02-29...
Fetching data for XLK from 2024-02-01 to 2024-02-29...
Fetching data for VGT from 2024-02-01 to 2024-02-29...
Fetching data for FTEC from 2024-02-01 to 2024-02-29...
Fetchi

[*********************100%%**********************]  1 of 1 completed

Processing scores for 2024-01-01 to 2024-01-31:
    ETF Week  RiskPercentage     Score
0   SMH   1m             0.1 -3.969459
1  SOXX   1m             0.1 -1.400784
2   PSI   1m             0.1 -0.795767
3   XSD   1m             0.1  4.393351
4   IYW   1m             0.1 -1.355251
Top ETFs for 2024-01-01 to 2024-01-31: ['FTEC', 'XSD']
Processing scores for 2024-02-01 to 2024-02-29:
    ETF Week  RiskPercentage     Score
0   SMH   2m             0.1 -3.630102
1  SOXX   2m             0.1 -1.850701
2   PSI   2m             0.1 -1.293327
3   XSD   2m             0.1  5.723648
4   IYW   2m             0.1 -1.414212
Top ETFs for 2024-02-01 to 2024-02-29: ['XSD', 'FTEC']
Processing scores for 2024-03-01 to 2024-03-31:
    ETF Week  RiskPercentage     Score
0   SMH   3m             0.1 -4.365114
1  SOXX   3m             0.1 -1.890721
2   PSI   3m             0.1  0.062556
3   XSD   3m             0.1  5.054087
4   IYW   3m             0.1 -1.731995
Top ETFs for 2024-03-01 to 2024-03-31: ['XSD


[*********************100%%**********************]  1 of 1 completed

XSD: 206.22 shares at $252.68 each, total value: $52108.88

ETF values on the 13th month's first trading day:
Total portfolio value: 117319.29
FTEC: 65210.41
XSD: 52108.88



