In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from tabulate import tabulate
import pandas_ta as ta
from sklearn.preprocessing import StandardScaler,MinMaxScaler,Normalizer

import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers.legacy import Adam as LegacyAdam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from skopt import gp_minimize
from skopt.space import Real, Integer
from sklearn.metrics import mean_squared_error
from bayes_opt import BayesianOptimization

import tensorflow as tf
from tensorflow.keras.optimizers.legacy import Adam
import time

from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, BatchNormalization, Input, Bidirectional

import tensorflow as tf
from keras.layers import Attention


from pandas.tseries.offsets import MonthEnd

from pandas.tseries.offsets import BDay, Week

pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows', None)

from datetime import datetime, timedelta

In [2]:
#tickers = ['SPY', 'QQQ', 'VTI', 'IWM', 'EFA', 'EEM', 'GLD', 'SLV', 'USO', 'XLF']
#tickers = ['SPY', 'QQQ', 'VTI']
#tickers = ['SPY']

#start_date, end_date = '2000-01-01', '2024-05-01'
#train_start_date, train_end_date, test_start_date, test_end_date, prediction_start_date = start_date,'2014-01-01', '2014-01-01', '2024-01-01', '2024-01-01'
#prediction_dates=['2024-01-01','2024-02-01','2024-03-01', '2024-04-01']

In [3]:
def data_loading(ticker_symbol, start_date, end_date):
    # Fetch the ETF data from Yahoo Finance for the period from 2010-01-01 to 2024-01-01
    etf_data = yf.Ticker(ticker_symbol)
    etf_history = etf_data.history(start=start_date, end=end_date)
    etf_history.index = etf_history.index.tz_localize(None)
    return etf_history, etf_data
    
#etf_history,etf_data = data_loading('SPY', start_date, end_date)
#etf_history.head(10)

In [4]:
#etf_history.tail(10)

In [5]:
#Function to derive the predictor columns
def etf_predictors(etf_history,  start_date, end_date, etf_data, benchmark_ticker = '^GSPC' ):
    
    # Calculate Daily Returns
    etf_history['Daily Return'] = etf_history['Close'].pct_change()

    # Calculate 21-Day Volatility (standard deviation of daily returns, annualized)
    etf_history['Volatility'] = etf_history['Daily Return'].rolling(window=21).std() * np.sqrt(252)
    #etf_history['Volatility_ta'] = ta.volatility(etf_history['Close'], window=21, annualize=True)
   
    # Get the ETF info
    etf_info = etf_data.info

    # Retrieve the net asset value price (NAV) and total net assets
    nav_price = etf_info['navPrice']
    total_assets = etf_info['totalAssets']

    # Calculate the number of shares outstanding
    shares_outstanding = total_assets / nav_price

    # Calculate Market Capitalization for each day
    # Market Capitalization = Closing Price * Total Number of Shares Outstanding
    etf_history['Market Cap'] = etf_history['Close'] * shares_outstanding

    total_assets = etf_info['totalAssets']
    total_liabilities = etf_info.get('totalLiabilities', 0)  # Handle the case where total liabilities might not be present
    
    # Calculate Book Value per Share
    book_value_per_share = (total_assets - total_liabilities) / shares_outstanding
    
    # Calculate Price to Book (P/B) Ratio
    etf_history['P/B Ratio'] = etf_history['Close'] / book_value_per_share
    
    # Calculate 1-Month Momentum (21 trading days)
    
    etf_history['Momentum'] = ta.mom(etf_history['Close'], length=21)
    
    benchmark_data = yf.download(benchmark_ticker, start=start_date, end=end_date)
    benchmark_data['Daily Return'] = benchmark_data['Close'].pct_change()
    #benchmark_data_history = benchmark_data.dropna()
    benchmark_data.head(10)
    
    # Ensure the indices are time zone-naive
    benchmark_data.index = benchmark_data.index.tz_localize(None)
    combined_data = etf_history[['Close']].join(benchmark_data[['Close']], lsuffix='_ETF', rsuffix='_Benchmark')
    combined_data = combined_data.dropna()
    
    # Calculate rolling beta with a 30-day window
    rolling_beta_21 = rolling_beta(combined_data, window=21)

    # Add the rolling beta to the dataframe
    combined_data.loc[:, 'Rolling Beta 21-day'] = rolling_beta_21
    etf_history['Rolling Beta']=combined_data['Rolling Beta 21-day']
    
    # Calculate daily profitability
    daily_profitabilities = []
    previous_nav = None
    for index, row in etf_history.iterrows():
        current_nav = row['Close']  # Current day's NAV
        #print(current_nav)
        if previous_nav is not None:
            daily_profitability = (current_nav - previous_nav) / previous_nav * 100
            daily_profitabilities.append(daily_profitability)
        else:
            daily_profitabilities.append(None)
        previous_nav = current_nav  # Update previous_nav for the next iteration

    # Add daily profitabilities to ETF dataset
    etf_history['Daily Profitability (%)'] = daily_profitabilities
    
    # Calculate the dividend yield for each day
    dividend_yields = []

    for index, row in etf_history.iterrows():
        # Get the dividend payment for the day
        dividend_payment = row['Dividends']

        # Get the current market price of the ETF for the day
        current_price = row['Close']

        # Calculate the dividend yield for the day
        dividend_yield = (dividend_payment / current_price) * 100

        # Append the dividend yield to the list
        dividend_yields.append(dividend_yield)
      
    etf_history['Div yield'] = dividend_yields

    #Volatility
    etf_history['ATR'] = ta.atr(etf_history['High'], etf_history['Low'], etf_history['Close'], length=21)    
    
    # Compute the Relative Volatility Index (RVI)
    rvi = ta.rvi(etf_history['Close'],length=21)
    etf_history['RVI'] = rvi
    
    #Momentum
    rsi_window = 14  # Window size for RSI calculation
    roc_window = 12  # Window size for ROC calculation
    #rsi_window = 21  
    #roc_window = 21  

    

    etf_history['RSI'] = ta.rsi(etf_history['Close'],length=rsi_window)
    # Calculate Rate of Change (ROC)
    etf_history['ROC'] = ta.roc(etf_history['Close'], length=roc_window)
        
    etf_history['log_returns'] = np.log(etf_history['Close'] / etf_history['Close'].shift(1))

    return etf_history

# Function to calculate rolling beta
def rolling_beta(df, window):
    rolling_cov = df['Close_Benchmark'].rolling(window=window).cov(df['Close_ETF'])
    rolling_var = df['Close_Benchmark'].rolling(window=window).var()
    rolling_beta = rolling_cov / rolling_var
    return rolling_beta



In [6]:
#etf_history = etf_predictors(etf_history,  start_date, end_date, etf_data,benchmark_ticker = '^GSPC' )
#etf_history.head(10)

In [7]:
def pre_processing(etf_history, train_start_date, train_end_date, test_start_date, test_end_date, prediction_dates, feature_columns=None, scaling_strategy='StandardScaler', final_end_date='2024-12-01'):
    # Handle missing values and inf replacements
    etf_history.fillna(etf_history.median(), inplace=True)
    etf_history = etf_history.replace(-np.inf, 0)

    # Default feature columns if none are provided
    if feature_columns is None:
        feature_columns = ['Volatility', 'Volume', 'Daily Return', 'Market Cap', 'P/B Ratio', 'Momentum', 
                           'Rolling Beta', 'Daily Profitability (%)', 'ATR', 'RVI', 'RSI', 'ROC']

    # Selecting the features (X) and the target (y)
    X = etf_history.loc[:, feature_columns]
    y = etf_history['log_returns']

    # Split train and test data
    train_data = X.loc[train_start_date:train_end_date]
    test_data = X.loc[test_start_date:test_end_date]
    y_train = y.loc[train_start_date:train_end_date]
    y_test = y.loc[test_start_date:test_end_date]

    # Scaling strategy based on input parameter
    if scaling_strategy == 'StandardScaler':
        scaler = StandardScaler()
    elif scaling_strategy == 'Normalizer':
        scaler = Normalizer()
    else:
        raise ValueError(f"Unsupported scaling strategy: {scaling_strategy}")
    
    # Scaling the train and test data
    train_data_scaled = scaler.fit_transform(train_data)
    test_data_scaled = scaler.transform(test_data)

    # Create a dictionary to store forecast data for each weekly prediction period
    forecast_data = {}
    for i, start_date in enumerate(prediction_dates):
        # Determine the end date for each forecast week
        if i < len(prediction_dates) - 1:
            end_date = (pd.to_datetime(start_date) + Week(1) - pd.Timedelta(days=1)).strftime('%Y-%m-%d')
        else:
            end_date = final_end_date  # The final end date provided or default

        # Store the forecast data for each week
        forecast_data[f'forecast_data_{i+1}w'] = X.loc[start_date:end_date]

    # Scale the forecast data dynamically
    forecast_data_scaled = {}
    for period_key, period_data in forecast_data.items():
        if not period_data.empty:
            forecast_data_scaled[period_key] = sm.add_constant(scaler.transform(period_data))

    # Add constant to scaled train and test data
    train_data_scaled = sm.add_constant(train_data_scaled)
    test_data_scaled = sm.add_constant(test_data_scaled)

    #print(f"Weekly forecast data keys: {forecast_data.keys()}")

    # Dynamically return the scaled forecast data along with train and test data
    return {
        'train_data_scaled': train_data_scaled,
        'test_data_scaled': test_data_scaled,
        'y_train': y_train,
        'y_test': y_test,
        'scaler': scaler,
        'X': X,
        'etf_history': etf_history,
        'forecast_data_scaled': forecast_data_scaled,
        'forecast_data': forecast_data
    }

In [8]:
#train_data_scaled, test_data_scaled, y_train, y_test, scaler, X, etf_history, forecast_data_scaled_1m, forecast_data_scaled_2m, forecast_data_scaled_3m, forecast_data_scaled_4m, forecast_data_1m, forecast_data_2m, forecast_data_3m, forecast_data_4m = pre_processing(etf_history, train_start_date, train_end_date, test_start_date, test_end_date, prediction_start_date, prediction_dates)

In [9]:
def build_and_train_lstm_model(epochs, batch_size, units, dropout_rate, learning_rate, train_data_scaled, y_train, validation_split=0.2):
    train_data_reshaped = reshape_for_lstm(train_data_scaled)
    
    model = Sequential()
    
    # First LSTM layer
    model.add(LSTM(units=int(units), return_sequences=True, input_shape=(train_data_reshaped.shape[1], train_data_reshaped.shape[2])))
    model.add(Dropout(dropout_rate))
    
    # Second LSTM layer with return_sequences=False
    model.add(LSTM(units=int(units), return_sequences=False))
    model.add(Dropout(dropout_rate))
    
    # Adding a Dense hidden layer with ReLU activation
    model.add(Dense(units=int(units/2), activation='relu'))
    model.add(Dropout(dropout_rate))
    
    # Output layer
    model.add(Dense(1))
    
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mean_squared_error')
    
    # Early stopping to prevent overfitting
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    
     # Track the start time
    #start_time = time.time()
    
    model.fit(train_data_reshaped, y_train, 
              epochs=int(epochs), 
              batch_size=int(batch_size), 
              validation_split=validation_split, 
              callbacks=[early_stopping],
              verbose=0)
    
    # Calculate time consumed
    #time_consumed = time.time() - start_time
    
    return model



def optimize_hyperparameters(train_data_scaled, y_train, test_data_scaled, y_test):
    def objective_function(epochs, batch_size, units, dropout_rate, learning_rate):
        model = build_and_train_lstm_model(epochs, batch_size, units, dropout_rate, learning_rate, train_data_scaled, y_train)
        test_data_reshaped = reshape_for_lstm(test_data_scaled)
        test_predictions = make_predictions(model, test_data_reshaped)
        mse = mean_squared_error(y_test, test_predictions)
        return -mse  # We return the negative MSE because Bayesian Optimization tries to maximize the function

    # Reduced parameter ranges for faster search
    param_bounds = {
        'epochs': (10, 30),  # Reduce max epochs
        'batch_size': (16, 64),  # Widen batch size range for exploration
        'units': (32, 128),  # Expand LSTM unit range
        'dropout_rate': (0.2, 0.5),  # Adjust dropout for robustness
        'learning_rate': (1e-4, 5e-3)  # Widen learning rate range for exploration
    }
    
    optimizer = BayesianOptimization(
        f=objective_function,
        pbounds=param_bounds,
        verbose=2,
        random_state=42,
    )

    # Reduced the number of iterations for faster optimization
    optimizer.maximize(init_points=15, n_iter=50)  # Fewer initial points and iterations

    best_params = optimizer.max['params']
    return best_params


"def build_and_train_lstm_model(epochs, batch_size, units, dropout_rate, learning_rate, train_data_scaled, y_train, validation_split=0.2):\n    train_data_reshaped = reshape_for_lstm(train_data_scaled)\n    \n    model = Sequential()\n    \n    # Input layer\n    model.add(Input(shape=(train_data_reshaped.shape[1], train_data_reshaped.shape[2])))\n    \n    # First Bidirectional LSTM layer with Batch Normalization\n    lstm_output = Bidirectional(LSTM(units=int(units), return_sequences=True))(model.input)\n    lstm_output = Dropout(dropout_rate)(lstm_output)\n    lstm_output = BatchNormalization()(lstm_output)\n    \n    # Attention Layer\n    query = lstm_output\n    value = lstm_output\n    attention_output = Attention()([query, value])\n    \n    # Second LSTM layer\n    lstm_output_2 = LSTM(units=int(units), return_sequences=True)(attention_output)\n    lstm_output_2 = Dropout(dropout_rate)(lstm_output_2)\n    lstm_output_2 = BatchNormalization()(lstm_output_2)\n    \n    # Third L

In [11]:
def make_predictions(model, data_scaled):
    if len(data_scaled.shape) == 2:
        data_scaled = np.reshape(data_scaled, (data_scaled.shape[0], 1, data_scaled.shape[1]))
    elif len(data_scaled.shape) != 3:
        raise ValueError(f"Unexpected shape for input data: {data_scaled.shape}")
    
    predictions = model.predict(data_scaled)
    return predictions

def eval_model(best_model, test_data_scaled, y_test, y_train=None):
    # Reshape test data to match LSTM input requirements
    test_data_reshaped = test_data_scaled.reshape((test_data_scaled.shape[0], 1, test_data_scaled.shape[1]))
    
    # Make predictions
    test_predictions = best_model.predict(test_data_reshaped)
    
    # Calculate Mean Absolute Error (MAE)
    mae = mean_absolute_error(y_test, test_predictions)
    
    # Calculate Mean Absolute Scaled Error (MASE) - Example calculation assuming seasonal period m=1
    naive_forecast = y_test.shift(1).fillna(method='bfill')
    mase = mae / mean_absolute_error(y_test, naive_forecast)
    print("mae: ",mae)
    print("mase: ",mase)
    return mae, mase


In [12]:
# Ensured consistent feature handling and forecasting in predictions
def predictions(model, forecast_data_scaled, forecast_data):
    # Make predictions using the model
    predictions_forecast = make_predictions(model, forecast_data_scaled)
    
    # Create a DataFrame for the predictions with the same index as the forecast data
    forecast_predictions_df = pd.DataFrame(predictions_forecast, columns=["log_returns"], index=forecast_data.index)
    
    # Concatenate the original forecast data with the predictions
    forecast_data_with_predictions = pd.concat([forecast_data, forecast_predictions_df], axis=1)
    
    return forecast_predictions_df, forecast_data_with_predictions

In [13]:
def calculate_mase(y_true, y_pred, naive_forecast):
    # Calculate the MAE of the model's predictions
    mae_model = mean_absolute_error(y_true, y_pred)
    
    # Calculate the MAE of the naive forecast
    mae_naive = mean_absolute_error(y_true, naive_forecast)
    
    # Calculate MASE
    mase = mae_model / mae_naive
    return mase

In [14]:
def reshape_for_lstm(data):
    return np.reshape(data, (data.shape[0], 1, data.shape[1]))  # Reshape into (samples, time_steps, features)


In [15]:
#etf_pred_dict, scores_1m, scores_2m, scores_3m, scores_4m = main(tickers, etf_dict)

In [16]:
#tickers = ['SPY', 'QQQ', 'VTI', 'IWM', 'EFA', 'EEM', 'GLD', 'SLV', 'USO', 'XLF']
#tickers = ['SMH', 'SOXX', 'PSI']

tickers = ['SMH', 'SOXX', 'PSI', 'XSD', 'IYW', 'XLK', 'VGT', 'FTEC', 'IGM', 'IXN', 
           #'FNGU','USD', 'FNGO', 'GBTC', 'ETHE', 'TECL', 'FNGS', 'TQQQ', 'ROM', 'QLD' No data available from 2000: 2019
          ]

#tickers = ['SPY', 'QQQ', 'VTI']
#tickers = ['SPY']
                                                           
start_date, end_date = '2000-01-01', '2024-12-01'
train_start_date, train_end_date, test_start_date, test_end_date, prediction_start_date = start_date,'2014-01-01', '2014-01-01', '2024-01-01', '2024-01-01'
#prediction_dates=['2024-01-01','2024-02-01','2024-03-01', '2024-04-01','2024-05-01','2024-06-01','2024-07-01','2024-08-01','2024-09-01','2024-10-01','2024-11-01']

In [17]:
#currently timeperiod is set to 48 weeks
prediction_dates = pd.date_range(start='2024-01-01', 
                                 periods=48, 
                                 freq='W-MON').strftime('%Y-%m-%d').tolist()


In [18]:
dict_data = {}
etf_dict = {}
data_with_predictors = []
for i in tickers:
    etf_history,etf_data = data_loading(i, start_date, end_date)
    
    print(etf_data.info.get('longName'), ":",i)
    
    #test_stationarity(etf_history)
    print(" ETF:",i)
    #print(etf_history.head(10))
    #print("################")
    #hist_data.append(etf_history)
    
    etf_history = etf_predictors(etf_history,  start_date, end_date, etf_data,benchmark_ticker = '^GSPC' )
    
    # Call the function
    preprocessed_data = pre_processing(etf_history, train_start_date, train_end_date, test_start_date, test_end_date, prediction_dates)

    # Extract individual components from the returned dictionary
    train_data_scaled = preprocessed_data['train_data_scaled']
    test_data_scaled = preprocessed_data['test_data_scaled']
    y_train = preprocessed_data['y_train']
    y_test = preprocessed_data['y_test']
    scaler = preprocessed_data['scaler']
    X = preprocessed_data['X']
    etf_history = preprocessed_data['etf_history']

    
    
    # Track the start time
    start_time = time.time()
    
    
    # Optimize hyperparameters
    # Optimize hyperparameters
    best_params = optimize_hyperparameters(train_data_scaled, y_train, test_data_scaled, y_test)
    
    # Extract the best hyperparameters
    best_epochs = int(best_params['epochs'])
    best_batch_size = int(best_params['batch_size'])
    best_units = int(best_params['units'])
    best_dropout_rate = best_params['dropout_rate']
    best_learning_rate = best_params['learning_rate']

    # Train the final model using the best hyperparameters
    best_model = build_and_train_lstm_model(
        epochs=best_epochs, 
        batch_size=best_batch_size, 
        units=best_units, 
        dropout_rate=best_dropout_rate, 
        learning_rate=best_learning_rate, 
        train_data_scaled=train_data_scaled, 
        y_train=y_train
    )
    
     
    
    # Calculate time consumed
    time_consumed = time.time() - start_time
    
    # Print the time consumed
    print(f"Time consumed for training: {time_consumed:.2f} seconds")

    # Evaluate the model
    mae, mase = eval_model(best_model, test_data_scaled, y_test)
    
    # Initialize dictionaries for forecast data and predictions
    # Step 5: Initialize forecast data storage
    forecast_predictions = {}
    forecast_data_dict = {}
    forecast_data_scaled_dict = {}

    # Step 6: Fetch weekly forecast data and generate predictions
    for week in range(1, 49):  # Generate weekly forecasts for up to 52 weeks
        week_key = f'forecast_data_{week}w'
        if week_key in preprocessed_data['forecast_data']:
            forecast_data = preprocessed_data['forecast_data'][week_key]
            forecast_data_scaled = preprocessed_data['forecast_data_scaled'].get(week_key)

            if forecast_data is not None and forecast_data_scaled is not None:
                try:
                    forecast_predictions_df, _ = predictions(
                        best_model, forecast_data_scaled, forecast_data
                    )
                    # Store predictions and forecast data
                    forecast_predictions[f'forecast_predictions_df_{week}w'] = forecast_predictions_df['log_returns'].values
                    forecast_data_dict[week_key] = forecast_data
                    forecast_data_scaled_dict[week_key] = forecast_data_scaled
                except Exception as e:
                    print(f"Error generating predictions for {week_key} for ETF {i}: {e}")
            else:
                print(f"Warning: Missing scaled data for {week_key} for ETF {i}")
        else:
            print(f"Warning: Missing forecast key {week_key} for ETF {i}")

    # Step 7: Construct `dict_data` for the current ETF
    dict_data = {
        "etf_history": etf_history,
        "X": X,
        "y_train_values": y_train,
        "model results": {"mae": mae, "mase": mase},
        **forecast_data_dict,  # Add raw forecast data
        **forecast_data_scaled_dict,  # Add scaled forecast data
        **forecast_predictions  # Add predictions
    }

    # Step 8: Store in the main `etf_dict`
    etf_dict[i] = dict_data

# Print completion message
print("Weekly forecast data and predictions stored successfully.")

[*********************100%%**********************]  1 of 1 completed

VanEck Semiconductor ETF : SMH
 ETF: SMH





|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-8.9e-05  | [30m33.98     | [30m0.4852    | [30m24.64     | [30m0.003033  | [30m46.98     |
| [35m2         | [35m-5.626e-0 | [35m23.49     | [35m0.2174    | [35m27.32     | [35m0.003045  | [35m99.97     |
| [35m3         | [35m-5.109e-0 | [35m16.99     | [35m0.491     | [35m26.65     | [35m0.00114   | [35m49.46     |
| [30m4         | [30m-6.797e-0 | [30m24.8      | [30m0.2913    | [30m20.5      | [30m0.002217  | [30m59.96     |
| [30m5         | [30m-0.000132 | [30m45.37     | [30m0.2418    | [30m15.84     | [30m0.001895  | [30m75.78     |
| [30m6         | [30m-0.000183 | [30m53.69     | [30m0.2599    | [30m20.28     | [30m0.003003  | [30m36.46     |
| [30m7         | [30m-7.131e-0 | [30m45.16     | [30m0.2512    | [30m11.3      | [30m0.00475   

| [30m47        | [30m-0.000213 | [30m17.97     | [30m0.2341    | [30m15.27     | [30m0.0001    | [30m51.55     |
| [30m48        | [30m-0.000164 | [30m40.69     | [30m0.3313    | [30m10.97     | [30m0.002147  | [30m32.07     |
| [30m49        | [30m-8.668e-0 | [30m33.71     | [30m0.2384    | [30m17.67     | [30m0.003452  | [30m61.71     |
| [30m50        | [30m-6.992e-0 | [30m17.96     | [30m0.2316    | [30m15.26     | [30m0.004274  | [30m51.56     |
| [30m51        | [30m-0.000178 | [30m17.96     | [30m0.2259    | [30m15.25     | [30m0.005     | [30m51.55     |
| [30m52        | [30m-0.000102 | [30m57.01     | [30m0.4602    | [30m21.78     | [30m0.003401  | [30m77.78     |
| [30m53        | [30m-5.078e-0 | [30m53.68     | [30m0.2166    | [30m11.78     | [30m0.004297  | [30m58.2      |
| [30m54        | [30m-7.308e-0 | [30m19.29     | [30m0.3525    | [30m10.69     | [30m0.0007013 | [30m106.0     |
| [30m55        | [30m-6.538e-

  naive_forecast = y_test.shift(1).fillna(method='bfill')




[*********************100%%**********************]  1 of 1 completed

iShares Semiconductor ETF : SOXX
 ETF: SOXX





|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-0.000182 | [30m33.98     | [30m0.4852    | [30m24.64     | [30m0.003033  | [30m46.98     |
| [30m2         | [30m-0.000552 | [30m23.49     | [30m0.2174    | [30m27.32     | [30m0.003045  | [30m99.97     |
| [30m3         | [30m-0.000184 | [30m16.99     | [30m0.491     | [30m26.65     | [30m0.00114   | [30m49.46     |
| [30m4         | [30m-0.000206 | [30m24.8      | [30m0.2913    | [30m20.5      | [30m0.002217  | [30m59.96     |
| [30m5         | [30m-0.000200 | [30m45.37     | [30m0.2418    | [30m15.84     | [30m0.001895  | [30m75.78     |
| [30m6         | [30m-0.000287 | [30m53.69     | [30m0.2599    | [30m20.28     | [30m0.003003  | [30m36.46     |
| [30m7         | [30m-0.000353 | [30m45.16     | [30m0.2512    | [30m11.3      | [30m0.00475   

| [30m47        | [30m-0.000202 | [30m23.08     | [30m0.3772    | [30m15.15     | [30m0.003318  | [30m118.2     |
| [30m48        | [30m-0.000706 | [30m40.69     | [30m0.3313    | [30m10.97     | [30m0.002147  | [30m32.07     |
| [30m49        | [30m-0.000235 | [30m33.71     | [30m0.2384    | [30m17.67     | [30m0.003452  | [30m61.71     |
| [30m50        | [30m-0.000428 | [30m56.53     | [30m0.2972    | [30m11.35     | [30m0.0009344 | [30m44.76     |
| [30m51        | [30m-0.000117 | [30m44.2      | [30m0.3868    | [30m24.67     | [30m0.003539  | [30m57.72     |
| [30m52        | [30m-0.000278 | [30m57.01     | [30m0.4602    | [30m21.78     | [30m0.003401  | [30m77.78     |
| [30m53        | [30m-0.000686 | [30m53.68     | [30m0.2166    | [30m11.78     | [30m0.004297  | [30m58.2      |
| [30m54        | [30m-0.000720 | [30m19.29     | [30m0.3525    | [30m10.69     | [30m0.0007013 | [30m106.0     |
| [30m55        | [30m-0.00081

  naive_forecast = y_test.shift(1).fillna(method='bfill')




[*********************100%%**********************]  1 of 1 completed

Invesco Semiconductors ETF : PSI
 ETF: PSI





|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-0.000197 | [30m33.98     | [30m0.4852    | [30m24.64     | [30m0.003033  | [30m46.98     |
| [35m2         | [35m-0.000177 | [35m23.49     | [35m0.2174    | [35m27.32     | [35m0.003045  | [35m99.97     |
| [30m3         | [30m-0.000423 | [30m16.99     | [30m0.491     | [30m26.65     | [30m0.00114   | [30m49.46     |
| [30m4         | [30m-0.000485 | [30m24.8      | [30m0.2913    | [30m20.5      | [30m0.002217  | [30m59.96     |
| [30m5         | [30m-0.000196 | [30m45.37     | [30m0.2418    | [30m15.84     | [30m0.001895  | [30m75.78     |
| [30m6         | [30m-0.000327 | [30m53.69     | [30m0.2599    | [30m20.28     | [30m0.003003  | [30m36.46     |
| [35m7         | [35m-0.000157 | [35m45.16     | [35m0.2512    | [35m11.3      | [35m0.00475   

| [30m47        | [30m-0.000314 | [30m44.99     | [30m0.4116    | [30m11.27     | [30m0.004906  | [30m124.7     |
| [30m48        | [30m-0.000393 | [30m44.8      | [30m0.2       | [30m11.07     | [30m0.005     | [30m124.6     |
| [30m49        | [30m-0.000408 | [30m50.0      | [30m0.2247    | [30m12.67     | [30m0.002974  | [30m123.8     |
| [30m50        | [30m-0.000235 | [30m47.79     | [30m0.2048    | [30m11.47     | [30m0.004314  | [30m127.8     |
| [30m51        | [30m-0.000242 | [30m47.73     | [30m0.3935    | [30m11.31     | [30m0.001947  | [30m127.7     |
| [30m52        | [30m-0.000255 | [30m45.04     | [30m0.2       | [30m11.36     | [30m0.003011  | [30m125.0     |
| [30m53        | [30m-0.000576 | [30m53.68     | [30m0.2166    | [30m11.78     | [30m0.004297  | [30m58.2      |
| [30m54        | [30m-0.000230 | [30m19.29     | [30m0.3525    | [30m10.69     | [30m0.0007013 | [30m106.0     |
| [30m55        | [30m-0.00019

  naive_forecast = y_test.shift(1).fillna(method='bfill')




[*********************100%%**********************]  1 of 1 completed

SPDR S&P Semiconductor ETF : XSD
 ETF: XSD





|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-0.000382 | [30m33.98     | [30m0.4852    | [30m24.64     | [30m0.003033  | [30m46.98     |
| [35m2         | [35m-0.000269 | [35m23.49     | [35m0.2174    | [35m27.32     | [35m0.003045  | [35m99.97     |
| [30m3         | [30m-0.000654 | [30m16.99     | [30m0.491     | [30m26.65     | [30m0.00114   | [30m49.46     |
| [30m4         | [30m-0.000336 | [30m24.8      | [30m0.2913    | [30m20.5      | [30m0.002217  | [30m59.96     |
| [30m5         | [30m-0.000420 | [30m45.37     | [30m0.2418    | [30m15.84     | [30m0.001895  | [30m75.78     |
| [30m6         | [30m-0.003094 | [30m53.69     | [30m0.2599    | [30m20.28     | [30m0.003003  | [30m36.46     |
| [30m7         | [30m-0.002312 | [30m45.16     | [30m0.2512    | [30m11.3      | [30m0.00475   

| [30m47        | [30m-0.000320 | [30m20.03     | [30m0.4354    | [30m17.62     | [30m0.00189   | [30m95.5      |
| [30m48        | [30m-0.000388 | [30m29.82     | [30m0.4693    | [30m23.26     | [30m0.004066  | [30m94.48     |
| [30m49        | [30m-0.000393 | [30m43.87     | [30m0.2184    | [30m10.68     | [30m0.003775  | [30m103.0     |
| [30m50        | [30m-0.000456 | [30m25.01     | [30m0.2071    | [30m29.88     | [30m0.0006598 | [30m68.01     |
| [30m51        | [30m-0.000298 | [30m30.82     | [30m0.3807    | [30m10.09     | [30m0.004393  | [30m97.11     |
| [30m52        | [30m-0.000248 | [30m16.2      | [30m0.2709    | [30m28.57     | [30m0.001319  | [30m106.6     |
| [30m53        | [30m-0.000655 | [30m52.77     | [30m0.2636    | [30m29.64     | [30m0.002877  | [30m53.57     |
| [30m54        | [30m-0.000614 | [30m18.8      | [30m0.366     | [30m10.0      | [30m0.0001    | [30m108.3     |
| [30m55        | [30m-0.00043

  naive_forecast = y_test.shift(1).fillna(method='bfill')




[*********************100%%**********************]  1 of 1 completed

iShares U.S. Technology ETF : IYW
 ETF: IYW





|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-0.000105 | [30m33.98     | [30m0.4852    | [30m24.64     | [30m0.003033  | [30m46.98     |
| [35m2         | [35m-9.619e-0 | [35m23.49     | [35m0.2174    | [35m27.32     | [35m0.003045  | [35m99.97     |
| [35m3         | [35m-4.324e-0 | [35m16.99     | [35m0.491     | [35m26.65     | [35m0.00114   | [35m49.46     |
| [30m4         | [30m-5.118e-0 | [30m24.8      | [30m0.2913    | [30m20.5      | [30m0.002217  | [30m59.96     |
| [30m5         | [30m-7.223e-0 | [30m45.37     | [30m0.2418    | [30m15.84     | [30m0.001895  | [30m75.78     |
| [30m6         | [30m-4.369e-0 | [30m53.69     | [30m0.2599    | [30m20.28     | [30m0.003003  | [30m36.46     |
| [30m7         | [30m-4.658e-0 | [30m45.16     | [30m0.2512    | [30m11.3      | [30m0.00475   

| [30m47        | [30m-0.000187 | [30m23.08     | [30m0.3772    | [30m15.15     | [30m0.003318  | [30m118.2     |
| [30m48        | [30m-0.000122 | [30m40.69     | [30m0.3313    | [30m10.97     | [30m0.002147  | [30m32.07     |
| [30m49        | [30m-7.397e-0 | [30m33.71     | [30m0.2384    | [30m17.67     | [30m0.003452  | [30m61.71     |
| [30m50        | [30m-7.078e-0 | [30m56.53     | [30m0.2972    | [30m11.35     | [30m0.0009344 | [30m44.76     |
| [30m51        | [30m-0.000213 | [30m44.2      | [30m0.3868    | [30m24.67     | [30m0.003539  | [30m57.72     |
| [30m52        | [30m-0.000105 | [30m57.01     | [30m0.4602    | [30m21.78     | [30m0.003401  | [30m77.78     |
| [30m53        | [30m-9.656e-0 | [30m53.68     | [30m0.2166    | [30m11.78     | [30m0.004297  | [30m58.2      |
| [30m54        | [30m-0.000146 | [30m19.29     | [30m0.3525    | [30m10.69     | [30m0.0007013 | [30m106.0     |
| [30m55        | [30m-5.984e-

  naive_forecast = y_test.shift(1).fillna(method='bfill')




[*********************100%%**********************]  1 of 1 completed

The Technology Select Sector SPDR Fund : XLK
 ETF: XLK





|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-5.721e-0 | [30m33.98     | [30m0.4852    | [30m24.64     | [30m0.003033  | [30m46.98     |
| [35m2         | [35m-2.155e-0 | [35m23.49     | [35m0.2174    | [35m27.32     | [35m0.003045  | [35m99.97     |
| [30m3         | [30m-4.684e-0 | [30m16.99     | [30m0.491     | [30m26.65     | [30m0.00114   | [30m49.46     |
| [30m4         | [30m-4.808e-0 | [30m24.8      | [30m0.2913    | [30m20.5      | [30m0.002217  | [30m59.96     |
| [30m5         | [30m-0.000242 | [30m45.37     | [30m0.2418    | [30m15.84     | [30m0.001895  | [30m75.78     |
| [30m6         | [30m-6.609e-0 | [30m53.69     | [30m0.2599    | [30m20.28     | [30m0.003003  | [30m36.46     |
| [30m7         | [30m-0.000170 | [30m45.16     | [30m0.2512    | [30m11.3      | [30m0.00475   

| [30m47        | [30m-5.393e-0 | [30m36.52     | [30m0.2792    | [30m24.52     | [30m0.004515  | [30m59.77     |
| [30m48        | [30m-4.722e-0 | [30m21.34     | [30m0.2629    | [30m21.71     | [30m0.003826  | [30m62.33     |
| [30m49        | [30m-5.981e-0 | [30m40.69     | [30m0.241     | [30m25.67     | [30m0.000183  | [30m57.46     |
| [30m50        | [30m-3.562e-0 | [30m27.98     | [30m0.3152    | [30m19.67     | [30m0.004706  | [30m59.43     |
| [30m51        | [30m-6.788e-0 | [30m28.28     | [30m0.478     | [30m16.72     | [30m0.003192  | [30m60.08     |
| [30m52        | [30m-5.348e-0 | [30m47.87     | [30m0.2126    | [30m25.42     | [30m0.001008  | [30m127.9     |
| [30m53        | [30m-2.889e-0 | [30m42.4      | [30m0.2764    | [30m23.55     | [30m0.0005676 | [30m127.7     |
| [30m54        | [30m-0.000104 | [30m55.3      | [30m0.3252    | [30m16.47     | [30m0.004443  | [30m70.38     |
| [30m55        | [30m-7.628e-

  naive_forecast = y_test.shift(1).fillna(method='bfill')




[*********************100%%**********************]  1 of 1 completed

Vanguard Information Technology Index Fund ETF Shares : VGT
 ETF: VGT





|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-0.000137 | [30m33.98     | [30m0.4852    | [30m24.64     | [30m0.003033  | [30m46.98     |
| [30m2         | [30m-0.000210 | [30m23.49     | [30m0.2174    | [30m27.32     | [30m0.003045  | [30m99.97     |
| [30m3         | [30m-0.000365 | [30m16.99     | [30m0.491     | [30m26.65     | [30m0.00114   | [30m49.46     |
| [35m4         | [35m-8.642e-0 | [35m24.8      | [35m0.2913    | [35m20.5      | [35m0.002217  | [35m59.96     |
| [30m5         | [30m-0.000197 | [30m45.37     | [30m0.2418    | [30m15.84     | [30m0.001895  | [30m75.78     |
| [30m6         | [30m-0.000286 | [30m53.69     | [30m0.2599    | [30m20.28     | [30m0.003003  | [30m36.46     |
| [30m7         | [30m-0.000174 | [30m45.16     | [30m0.2512    | [30m11.3      | [30m0.00475   

| [30m47        | [30m-0.000673 | [30m21.99     | [30m0.2005    | [30m19.9      | [30m0.0003648 | [30m57.27     |
| [30m48        | [30m-0.000775 | [30m40.69     | [30m0.3313    | [30m10.97     | [30m0.002147  | [30m32.07     |
| [30m49        | [30m-0.000139 | [30m33.71     | [30m0.2384    | [30m17.67     | [30m0.003452  | [30m61.71     |
| [30m50        | [30m-0.000196 | [30m20.97     | [30m0.3255    | [30m21.0      | [30m0.00169   | [30m61.21     |
| [30m51        | [30m-0.000197 | [30m22.04     | [30m0.3549    | [30m19.88     | [30m0.002333  | [30m60.49     |
| [30m52        | [30m-0.000107 | [30m23.98     | [30m0.2918    | [30m20.27     | [30m0.003239  | [30m61.09     |
| [30m53        | [30m-0.000674 | [30m23.22     | [30m0.3406    | [30m19.88     | [30m0.00387   | [30m62.51     |
| [30m54        | [30m-0.000224 | [30m19.06     | [30m0.4636    | [30m23.44     | [30m0.0005479 | [30m59.82     |
| [30m55        | [30m-0.00194

  naive_forecast = y_test.shift(1).fillna(method='bfill')




[*********************100%%**********************]  1 of 1 completed

Fidelity MSCI Information Technology Index ETF : FTEC
 ETF: FTEC
|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------





| [30m1         | [30m-0.000753 | [30m33.98     | [30m0.4852    | [30m24.64     | [30m0.003033  | [30m46.98     |
| [35m2         | [35m-0.000466 | [35m23.49     | [35m0.2174    | [35m27.32     | [35m0.003045  | [35m99.97     |
| [35m3         | [35m-0.000338 | [35m16.99     | [35m0.491     | [35m26.65     | [35m0.00114   | [35m49.46     |
| [30m4         | [30m-0.002667 | [30m24.8      | [30m0.2913    | [30m20.5      | [30m0.002217  | [30m59.96     |
| [30m5         | [30m-0.000963 | [30m45.37     | [30m0.2418    | [30m15.84     | [30m0.001895  | [30m75.78     |
| [30m6         | [30m-0.001228 | [30m53.69     | [30m0.2599    | [30m20.28     | [30m0.003003  | [30m36.46     |
| [30m7         | [30m-0.001045 | [30m45.16     | [30m0.2512    | [30m11.3      | [30m0.00475   | [30m124.7     |
| [30m8         | [30m-0.000962 | [30m54.8      | [30m0.2914    | [30m11.95     | [30m0.003453  | [30m74.25     |
| [30m9         | [30m-0.00858

| [30m48        | [30m-0.002069 | [30m40.69     | [30m0.3313    | [30m10.97     | [30m0.002147  | [30m32.07     |
| [35m49        | [35m-0.000326 | [35m33.71     | [35m0.2384    | [35m17.67     | [35m0.003452  | [35m61.71     |
| [30m50        | [30m-0.000624 | [30m56.53     | [30m0.2972    | [30m11.35     | [30m0.0009344 | [30m44.76     |
| [30m51        | [30m-0.001229 | [30m44.2      | [30m0.3868    | [30m24.67     | [30m0.003539  | [30m57.72     |
| [30m52        | [30m-0.000734 | [30m57.01     | [30m0.4602    | [30m21.78     | [30m0.003401  | [30m77.78     |
| [30m53        | [30m-0.001204 | [30m53.68     | [30m0.2166    | [30m11.78     | [30m0.004297  | [30m58.2      |
| [30m54        | [30m-0.000485 | [30m19.29     | [30m0.3525    | [30m10.69     | [30m0.0007013 | [30m106.0     |
| [30m55        | [30m-0.004081 | [30m35.52     | [30m0.334     | [30m13.68     | [30m0.004601  | [30m109.3     |
| [35m56        | [35m-0.00029

  naive_forecast = y_test.shift(1).fillna(method='bfill')




[*********************100%%**********************]  1 of 1 completed

iShares Expanded Tech Sector ETF : IGM
 ETF: IGM





|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-0.000188 | [30m33.98     | [30m0.4852    | [30m24.64     | [30m0.003033  | [30m46.98     |
| [30m2         | [30m-0.000212 | [30m23.49     | [30m0.2174    | [30m27.32     | [30m0.003045  | [30m99.97     |
| [35m3         | [35m-0.000174 | [35m16.99     | [35m0.491     | [35m26.65     | [35m0.00114   | [35m49.46     |
| [30m4         | [30m-0.000544 | [30m24.8      | [30m0.2913    | [30m20.5      | [30m0.002217  | [30m59.96     |
| [35m5         | [35m-0.000110 | [35m45.37     | [35m0.2418    | [35m15.84     | [35m0.001895  | [35m75.78     |
| [30m6         | [30m-0.000122 | [30m53.69     | [30m0.2599    | [30m20.28     | [30m0.003003  | [30m36.46     |
| [30m7         | [30m-0.000158 | [30m45.16     | [30m0.2512    | [30m11.3      | [30m0.00475   

| [30m47        | [30m-0.000167 | [30m23.08     | [30m0.3772    | [30m15.15     | [30m0.003318  | [30m118.2     |
| [30m48        | [30m-0.000241 | [30m40.69     | [30m0.3313    | [30m10.97     | [30m0.002147  | [30m32.07     |
| [30m49        | [30m-0.000189 | [30m33.71     | [30m0.2384    | [30m17.67     | [30m0.003452  | [30m61.71     |
| [30m50        | [30m-0.000100 | [30m56.53     | [30m0.2972    | [30m11.35     | [30m0.0009344 | [30m44.76     |
| [30m51        | [30m-0.000127 | [30m44.2      | [30m0.3868    | [30m24.67     | [30m0.003539  | [30m57.72     |
| [30m52        | [30m-0.000102 | [30m57.01     | [30m0.4602    | [30m21.78     | [30m0.003401  | [30m77.78     |
| [30m53        | [30m-0.000139 | [30m53.68     | [30m0.2166    | [30m11.78     | [30m0.004297  | [30m58.2      |
| [30m54        | [30m-0.000106 | [30m19.29     | [30m0.3525    | [30m10.69     | [30m0.0007013 | [30m106.0     |
| [30m55        | [30m-0.00031

  naive_forecast = y_test.shift(1).fillna(method='bfill')




[*********************100%%**********************]  1 of 1 completed

iShares Global Tech ETF : IXN
 ETF: IXN





|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   units   |
-------------------------------------------------------------------------------------
| [30m1         | [30m-0.000180 | [30m33.98     | [30m0.4852    | [30m24.64     | [30m0.003033  | [30m46.98     |
| [30m2         | [30m-0.000669 | [30m23.49     | [30m0.2174    | [30m27.32     | [30m0.003045  | [30m99.97     |
| [35m3         | [35m-8.898e-0 | [35m16.99     | [35m0.491     | [35m26.65     | [35m0.00114   | [35m49.46     |
| [30m4         | [30m-0.000330 | [30m24.8      | [30m0.2913    | [30m20.5      | [30m0.002217  | [30m59.96     |
| [35m5         | [35m-8.705e-0 | [35m45.37     | [35m0.2418    | [35m15.84     | [35m0.001895  | [35m75.78     |
| [35m6         | [35m-8.033e-0 | [35m53.69     | [35m0.2599    | [35m20.28     | [35m0.003003  | [35m36.46     |
| [30m7         | [30m-0.000549 | [30m45.16     | [30m0.2512    | [30m11.3      | [30m0.00475   

| [30m47        | [30m-0.000374 | [30m23.08     | [30m0.3772    | [30m15.15     | [30m0.003318  | [30m118.2     |
| [30m48        | [30m-0.000177 | [30m40.69     | [30m0.3313    | [30m10.97     | [30m0.002147  | [30m32.07     |
| [30m49        | [30m-0.000136 | [30m33.71     | [30m0.2384    | [30m17.67     | [30m0.003452  | [30m61.71     |
| [30m50        | [30m-0.000264 | [30m56.53     | [30m0.2972    | [30m11.35     | [30m0.0009344 | [30m44.76     |
| [30m51        | [30m-7.088e-0 | [30m44.2      | [30m0.3868    | [30m24.67     | [30m0.003539  | [30m57.72     |
| [30m52        | [30m-9.076e-0 | [30m57.01     | [30m0.4602    | [30m21.78     | [30m0.003401  | [30m77.78     |
| [30m53        | [30m-0.000316 | [30m53.68     | [30m0.2166    | [30m11.78     | [30m0.004297  | [30m58.2      |
| [30m54        | [30m-9.566e-0 | [30m19.29     | [30m0.3525    | [30m10.69     | [30m0.0007013 | [30m106.0     |
| [30m55        | [30m-0.00101

  naive_forecast = y_test.shift(1).fillna(method='bfill')


Weekly forecast data and predictions stored successfully.


In [19]:
def calculate_sharpe_ratio(returns, annual_risk_free_rate=0.1,period='daily'):
    #excess_returns = rate_of_return(returns) - risk_free_rate
    
    # Convert annual risk-free rate to daily rate
    daily_risk_free_rate = (1 + annual_risk_free_rate) ** (1/252) - 1
    
    # Calculate mean daily log return
    mean_return = np.mean(returns)
    
    # Calculate excess daily log return
    excess_return = mean_return - daily_risk_free_rate
    
    # Calculate standard deviation of daily log returns
    std_return = np.std(returns)
    
    # Print diagnostic information
    #print(f"Mean Daily Log Return: {mean_return}")
    #print(f"Excess Daily Log Return: {excess_return}")
    #print(f"Standard Deviation of Daily Log Returns: {std_return}")
    
    # Check for zero standard deviation to avoid division by zero
    if std_return == 0:
        return 0
    
    # Calculate Sharpe ratio
    sharpe_ratio = (excess_return / std_return) * np.sqrt(252)  # Annualize the Sharpe ratio
    return sharpe_ratio
    #return excess_returns / np.std(returns)


def calculate_rachev_ratio(returns, lower_percentile=5, upper_percentile=95):
    # Step 1: Sort the returns
    sorted_returns = np.sort(returns)
    
    # Step 2: Determine the percentiles
    lower_threshold = np.percentile(sorted_returns, lower_percentile)
    upper_threshold = np.percentile(sorted_returns, upper_percentile)
    
    # Step 3: Calculate Expected Shortfall (ES)
    es = np.mean(sorted_returns[sorted_returns <= lower_threshold])
    
    # Step 4: Calculate Expected Gain (EG)
    eg = np.mean(sorted_returns[sorted_returns >= upper_threshold])
    
    # Step 5: Compute the Rachev Ratio
    rachev_ratio = eg / -es
    return rachev_ratio


def calculate_volatility_clustering(returns):
    squared_returns = returns ** 2
    n = len(squared_returns)
    
    # Mean of squared returns
    mean_squared_returns = np.mean(squared_returns)
    
    # Calculate the numerator and denominator for autocorrelation at lag 1
    numerator = np.sum((squared_returns[:-1] - mean_squared_returns) * (squared_returns[1:] - mean_squared_returns))
    denominator = np.sum((squared_returns - mean_squared_returns) ** 2)
    
    if denominator == 0:
        return 0  # Avoid division by zero
    
    rho_1 = numerator / denominator
    return rho_1

def calculate_sortino_ratio(log_returns, target_log_return=0.0):
    """
    Calculate the Sortino Ratio using log returns.
    
    Parameters:
    - log_returns (array-like): Array or list of log returns for the period.
    - target_log_return (float): The target log return. Default is 0, which is often used as a benchmark.
    
    Returns:
    - float: The Sortino Ratio.
    """
    # Convert input to a NumPy array for easier calculations
    log_returns = np.array(log_returns)
    
    # Calculate the average period log return (R)
    avg_log_return = np.mean(log_returns)
    
    # Calculate the Target Downside Deviation (TDD)
    downside_deviation = np.sqrt(
        np.mean(np.square(np.maximum(0, target_log_return - log_returns)))
    )
    
    # Calculate Sortino Ratio
    #sortino_ratio_value = (avg_log_return - target_log_return) / downside_deviation
    epsilon = 1e-8
    
    # Add epsilon to downside_deviation to prevent division by zero
    #if downside_deviation == 0:
     #   return np.nan  # Return NaN if downside deviation is zero
    
    # Calculate Sortino Ratio
    sortino_ratio_value = (avg_log_return - target_log_return) / (downside_deviation + epsilon)
    
    return sortino_ratio_value


In [20]:
def calculate_composite_score(
    forecasted_values, risk_percentage, rachev_ratio, sharpe_ratio, sortino_ratio, volatility_clustering, 
    mean_forecast, std_forecast, mean_rachev, std_rachev, mean_sharpe, std_sharpe, mean_sortino, std_sortino, 
    mean_volatility_clustering, std_volatility_clustering
):
    epsilon = 1e-8  # To prevent division by zero
    forecasted_mean = np.mean(forecasted_values)

    # Debugging: Log raw input values
    print(f"\nDebug: Composite Score Calculation")
    print(f"Forecasted Mean: {forecasted_mean}, Risk Percentage: {risk_percentage}")
    print(f"Rachev Ratio: {rachev_ratio}, Sharpe Ratio: {sharpe_ratio}")
    print(f"Sortino Ratio: {sortino_ratio}, Volatility Clustering: {volatility_clustering}")

    # Normalize the components with epsilon
    forecasted_mean_normalized = (forecasted_mean - mean_forecast) / (std_forecast + epsilon)
    rachev_normalized = (rachev_ratio - mean_rachev) / (std_rachev + epsilon)
    sharpe_normalized = (sharpe_ratio - mean_sharpe) / (std_sharpe + epsilon)
    sortino_normalized = (sortino_ratio - mean_sortino) / (std_sortino + epsilon)
    volatility_clustering_normalized = (volatility_clustering - mean_volatility_clustering) / (std_volatility_clustering + epsilon)

    # Debugging: Log normalized values
    print(f"Normalized Values -> Forecasted Mean: {forecasted_mean_normalized}, Rachev: {rachev_normalized}")
    print(f"Sharpe: {sharpe_normalized}, Sortino: {sortino_normalized}, Volatility Clustering: {volatility_clustering_normalized}")

    # Composite score calculation
    score = (
        forecasted_mean_normalized
        - (risk_percentage * rachev_normalized)
        + sharpe_normalized
        + sortino_normalized
        - volatility_clustering_normalized
    )

    # Debugging: Log final score
    print(f"Final Composite Score: {score}")

    return score


def process_etf_data_weekly(tickers, etf_dict):
    etf_pred_dict = {}

    # Determine the forecast periods dynamically from the etf_dict
    sample_etf = next(iter(etf_dict.values()))
    forecast_periods = [key.split('_')[-1] for key in sample_etf.keys() if key.startswith('forecast_predictions_df')]

    for etf_name in tickers:
        etf_pred_dict[etf_name] = {
            f"returns_{period}": etf_dict[etf_name][f"forecast_predictions_df_{period}"]
            for period in forecast_periods
        }

        for period in forecast_periods:
            returns = etf_pred_dict[etf_name][f"returns_{period}"]

            # Log returns for debugging
            print(f"\nDebug: Returns for {etf_name}, Forecast Period {period}: {returns}")

            # Calculate metrics
            etf_pred_dict[etf_name][f"rachev_ratio_{period}"] = calculate_rachev_ratio(returns)
            etf_pred_dict[etf_name][f"sharpe_ratio_{period}"] = calculate_sharpe_ratio(returns)
            etf_pred_dict[etf_name][f"sortino_ratio_{period}"] = calculate_sortino_ratio(returns)
            etf_pred_dict[etf_name][f"volatility_clustering_{period}"] = calculate_volatility_clustering(returns)

    return etf_pred_dict



def calculate_means_and_stds_weekly(etf_pred_dict, forecast_period):
    returns_list = [etf_pred_dict[etf][f'returns_{forecast_period}'] for etf in etf_pred_dict]

    # Compute global means and standard deviations
    mean_forecast = np.mean([np.mean(returns) for returns in returns_list])
    std_forecast = np.std([np.mean(returns) for returns in returns_list])

    print(f"\nDebug: Forecast Period = {forecast_period}")
    print(f"All Returns Means: {[np.mean(returns) for returns in returns_list]}")
    print(f"Mean Forecast = {mean_forecast}, Std Forecast = {std_forecast}")

    rachev_ratios = np.array([etf_pred_dict[etf][f'rachev_ratio_{forecast_period}'] for etf in etf_pred_dict])
    print(f"All Rachev Ratios: {rachev_ratios}")
    mean_rachev = np.mean(rachev_ratios)
    std_rachev = np.std(rachev_ratios)

    sharpe_ratios = np.array([etf_pred_dict[etf][f'sharpe_ratio_{forecast_period}'] for etf in etf_pred_dict])
    print(f"All Sharpe Ratios: {sharpe_ratios}")
    mean_sharpe = np.mean(sharpe_ratios)
    std_sharpe = np.std(sharpe_ratios)

    sortino_ratios = np.array([etf_pred_dict[etf][f'sortino_ratio_{forecast_period}'] for etf in etf_pred_dict])
    print(f"All Sortino Ratios: {sortino_ratios}")
    mean_sortino = np.mean(sortino_ratios)
    std_sortino = np.std(sortino_ratios)

    volatility_clustering = np.array([etf_pred_dict[etf][f'volatility_clustering_{forecast_period}'] for etf in etf_pred_dict])
    print(f"All Volatility Clustering: {volatility_clustering}")
    mean_volatility_clustering = np.mean(volatility_clustering)
    std_volatility_clustering = np.std(volatility_clustering)

    print(f"Mean Rachev = {mean_rachev}, Std Rachev = {std_rachev}")
    print(f"Mean Sharpe = {mean_sharpe}, Std Sharpe = {std_sharpe}")
    print(f"Mean Sortino = {mean_sortino}, Std Sortino = {std_sortino}")
    print(f"Mean Volatility Clustering = {mean_volatility_clustering}, Std Volatility Clustering = {std_volatility_clustering}")

    return (
        mean_forecast, std_forecast, mean_rachev, std_rachev,
        mean_sharpe, std_sharpe, mean_sortino, std_sortino,
        mean_volatility_clustering, std_volatility_clustering
    )



def calculate_scores_for_etfs_weekly(etf_pred_dict, forecast_period, risk_percentage):
    (mean_forecast, std_forecast, mean_rachev, std_rachev, 
     mean_sharpe, std_sharpe, mean_sortino, std_sortino, 
     mean_volatility_clustering, std_volatility_clustering) = calculate_means_and_stds_weekly(etf_pred_dict, forecast_period)

    scores = []
    for etf in etf_pred_dict:
        forecasted_values = etf_pred_dict[etf][f'returns_{forecast_period}']
        rachev_ratio = etf_pred_dict[etf][f'rachev_ratio_{forecast_period}']
        sharpe_ratio = etf_pred_dict[etf][f'sharpe_ratio_{forecast_period}']
        volatility_clustering = etf_pred_dict[etf][f'volatility_clustering_{forecast_period}']
        sortino_ratio = etf_pred_dict[etf][f'sortino_ratio_{forecast_period}']

        # Debugging: Log inputs to composite score calculation
        print(f"\nDebug: ETF = {etf}, Forecast Period = {forecast_period}")
        print(f"Forecasted Values Mean: {np.mean(forecasted_values)}")
        print(f"Rachev Ratio: {rachev_ratio}, Sharpe Ratio: {sharpe_ratio}")
        print(f"Sortino Ratio: {sortino_ratio}, Volatility Clustering: {volatility_clustering}")
        print(f"Means and Stds: Mean Forecast = {mean_forecast}, Std Forecast = {std_forecast}")
        
        # Calculate the composite score
        score = calculate_composite_score(
            forecasted_values, risk_percentage, rachev_ratio, sharpe_ratio, 
            sortino_ratio, volatility_clustering, mean_forecast, std_forecast, 
            mean_rachev, std_rachev, mean_sharpe, std_sharpe, mean_sortino, 
            std_sortino, mean_volatility_clustering, std_volatility_clustering
        )

        scores.append({
            'ETF': etf,
            'Week': forecast_period,
            'RiskPercentage': risk_percentage,
            'Score': score
        })

        # Debugging: Log the calculated score
        print(f"Calculated Score for {etf} ({forecast_period}): {score}")

    return scores



def main_weekly(tickers, etf_dict):
    etf_pred_dict = process_etf_data_weekly(tickers, etf_dict)
    
    risk_percentage = 0.10
    weekly_scores = {}

    # Iterate over 48 weeks
    for week in range(1, 49):
        week_key = f"{week}w"
        if any(f"returns_{week_key}" in etf_pred_dict[etf] for etf in etf_pred_dict):
            scores = calculate_scores_for_etfs_weekly(etf_pred_dict, week_key, risk_percentage)
            weekly_scores[week_key] = scores
            print(f"Scores calculated for {week_key}:")
            for score in scores:
                print(score)
        else:
            print(f"Skipping score calculation for {week_key}: No ETFs have data for this week.")

    return etf_pred_dict, weekly_scores


# Example usage:
# Execute weekly scoring
etf_pred_dict, weekly_scores = main_weekly(tickers, etf_dict)



Debug: Returns for SMH, Forecast Period 1w: [-0.02114971 -0.01641913 -0.01328753 -0.010488  ]

Debug: Returns for SMH, Forecast Period 2w: [-0.00236152 -0.01116573 -0.01175923 -0.01105097 -0.01297825]

Debug: Returns for SMH, Forecast Period 3w: [-0.00797619 -0.01325713 -0.00273405 -0.00242556]

Debug: Returns for SMH, Forecast Period 4w: [-0.01171342 -0.01074597 -0.00792683 -0.01255071 -0.01969242]

Debug: Returns for SMH, Forecast Period 5w: [-0.0102156  -0.01522117 -0.01648088 -0.00956425 -0.00606219]

Debug: Returns for SMH, Forecast Period 6w: [-0.00558619 -0.01330606 -0.00610416 -0.00728405 -0.00485444]

Debug: Returns for SMH, Forecast Period 7w: [-0.01057591 -0.01520299 -0.0058159  -0.01044066 -0.011452  ]

Debug: Returns for SMH, Forecast Period 8w: [-0.01689423 -0.0141256   0.00472629 -0.01330351]

Debug: Returns for SMH, Forecast Period 9w: [-0.01023087 -0.01121312 -0.01387803 -0.006257   -0.0012057 ]

Debug: Returns for SMH, Forecast Period 10w: [-0.00638326 -0.01454497 -0


Debug: Returns for IYW, Forecast Period 31w: [-0.01118734 -0.01481485  0.00836619 -0.01633306 -0.00303925]

Debug: Returns for IYW, Forecast Period 32w: [0.00334995 0.0297714  0.01607421 0.04427855 0.01694501]

Debug: Returns for IYW, Forecast Period 33w: [ 0.01530505  0.02506191  0.00312704 -0.0033914  -0.01657383]

Debug: Returns for IYW, Forecast Period 34w: [-0.01608879 -0.01713597 -0.01456202 -0.03007118 -0.01851346]

Debug: Returns for IYW, Forecast Period 35w: [-0.02561922 -0.0216019  -0.02902836 -0.0206775  -0.02043002]

Debug: Returns for IYW, Forecast Period 36w: [-0.04488691 -0.0307057  -0.02698626 -0.03628361]

Debug: Returns for IYW, Forecast Period 37w: [-0.0177433  -0.01826184 -0.01279288 -0.01912971 -0.0201684 ]

Debug: Returns for IYW, Forecast Period 38w: [-0.01974697 -0.01660769 -0.0148714  -0.0036963  -0.01805418]

Debug: Returns for IYW, Forecast Period 39w: [-0.02022734 -0.01556312 -0.01869611 -0.01512938 -0.0239931 ]

Debug: Returns for IYW, Forecast Period 40w:


Debug: Returns for IXN, Forecast Period 35w: [-0.04249205 -0.03403961 -0.0377393  -0.04444391 -0.03269501]

Debug: Returns for IXN, Forecast Period 36w: [-0.04532517 -0.02945026 -0.03002169 -0.03628815]

Debug: Returns for IXN, Forecast Period 37w: [-0.02647718 -0.02657902 -0.02392951 -0.02248999 -0.02235912]

Debug: Returns for IXN, Forecast Period 38w: [-0.03094886 -0.03458959 -0.04012578 -0.03227904 -0.05262852]

Debug: Returns for IXN, Forecast Period 39w: [-0.04860606 -0.05271756 -0.05372999 -0.04513129 -0.05509873]

Debug: Returns for IXN, Forecast Period 40w: [-0.0495817  -0.05976531 -0.04122253 -0.0425685  -0.03897509]

Debug: Returns for IXN, Forecast Period 41w: [-0.04000404 -0.03532034 -0.03717837 -0.05129903 -0.05134911]

Debug: Returns for IXN, Forecast Period 42w: [-0.04680733 -0.05873854 -0.05486901 -0.05161192 -0.06000765]

Debug: Returns for IXN, Forecast Period 43w: [-0.05600062 -0.05731987 -0.0692068  -0.06106299 -0.06194661]

Debug: Returns for IXN, Forecast Period

In [21]:
# Initialize an empty dictionary to hold DataFrames for each week
weekly_scores_dfs = {}

# Assuming weekly_scores is a dictionary with keys as week identifiers ('1w', '2w', ..., '48w') 
# and values as the respective scores
for week_key, scores in weekly_scores.items():
    # Convert scores for the current week into a DataFrame
    weekly_scores_dfs[week_key] = pd.DataFrame(scores)

    
# Create DataFrame variables dynamically for 48 weeks
for week in range(1, 49):
    week_key = f'{week}w'
    globals()[f'df_scores_{week_key}'] = weekly_scores_dfs.get(week_key, pd.DataFrame())




In [22]:
# Initialize a dictionary to hold the top 2 ETFs for each week
top_etfs_weekly = {}

# Loop through the weekly DataFrames and select the top 2 ETFs for each week
for week_key, df_scores in weekly_scores_dfs.items():
    # Select the top 2 ETFs based on the 'Score' column
    top_etfs_weekly[week_key] = df_scores.nlargest(2, 'Score')
    
    # Print the results for the current week
    print(f"\nTop 2 ETFs for {week_key} forecast:")
    print(top_etfs_weekly[week_key])



Top 2 ETFs for 1w forecast:
    ETF Week  RiskPercentage     Score
8   IGM   1w             0.1  7.991462
7  FTEC   1w             0.1  4.181336

Top 2 ETFs for 2w forecast:
    ETF Week  RiskPercentage     Score
8   IGM   2w             0.1  5.403989
7  FTEC   2w             0.1  4.648313

Top 2 ETFs for 3w forecast:
    ETF Week  RiskPercentage     Score
8   IGM   3w             0.1  4.739230
7  FTEC   3w             0.1  4.048963

Top 2 ETFs for 4w forecast:
    ETF Week  RiskPercentage     Score
8   IGM   4w             0.1  5.752882
7  FTEC   4w             0.1  4.712484

Top 2 ETFs for 5w forecast:
    ETF Week  RiskPercentage     Score
8   IGM   5w             0.1  4.634972
7  FTEC   5w             0.1  4.255123

Top 2 ETFs for 6w forecast:
    ETF Week  RiskPercentage     Score
7  FTEC   6w             0.1  5.852318
8   IGM   6w             0.1  3.260157

Top 2 ETFs for 7w forecast:
    ETF Week  RiskPercentage     Score
7  FTEC   7w             0.1  6.467252
1  SOXX   7w     

In [23]:
def select_top_etfs_weekly(df_scores, forecast_period):
    if df_scores.empty:
        print(f"No scores available for {forecast_period}. Skipping.")
        return []
    print(f"Processing scores for {forecast_period}:")
    print(df_scores.head())  # Check the top rows of the DataFrame
    top_etfs = df_scores.nlargest(2, 'Score')
    print(f"Top ETFs for {forecast_period}: {top_etfs['ETF'].tolist()}")
    return top_etfs['ETF'].tolist()



In [24]:
# Function to generate week ranges
def generate_week_ranges(start_date, end_date):
    start = datetime.strptime(start_date, '%Y-%m-%d')
    end = datetime.strptime(end_date, '%Y-%m-%d')
    week_ranges = []
    
    while start < end:
        week_start = start
        week_end = start + timedelta(days=6)
        if week_end > end:
            week_end = end
        week_ranges.append((week_start.strftime('%Y-%m-%d'), week_end.strftime('%Y-%m-%d')))
        start += timedelta(days=7)
    
    return week_ranges

# Function to gather ETF data for weeks
def gather_etf_data_for_weeks(tickers, week_ranges):
    etf_histories = {}
    for start_date, end_date in week_ranges:
        week = f"{start_date} to {end_date}"
        etf_histories[week] = {}
        for ticker in tickers:
            etf_data = yf.download(ticker, start=start_date, end=end_date)
            if etf_data.empty:
                print(f"No data found for {ticker} in {week}")
                continue
            etf_data.index = pd.to_datetime(etf_data.index)
            etf_histories[week][ticker] = etf_data
            #print(f"Data for {ticker} in {week} gathered.")
    return etf_histories

# Function to initialize shares for the first week
def initialize_shares_for_first_week(top_etfs_1w, etf_histories, week, investment_amount=50000):
    ticker_shares = {}
    first_trading_day_start = week.split(" to ")[0]
    
    for ticker in top_etfs_1w:
        etf_history = etf_histories.get(week, {}).get(ticker)
        
        if etf_history is not None:
            first_trading_day = pd.to_datetime(first_trading_day_start) + BDay(1)
            if first_trading_day not in etf_history.index:
                first_trading_day = etf_history.index[etf_history.index.searchsorted(first_trading_day)]
            
            price_on_first_trading_day = etf_history.loc[first_trading_day, 'Close']
            num_shares = (investment_amount * 0.975) / price_on_first_trading_day
            print(f"Shares 1st week: ({investment_amount} * 0.975) / {price_on_first_trading_day}")
            ticker_shares[ticker] = num_shares
            print(f"Ticker: {ticker}, First trading day: {first_trading_day.date()}, Price: {price_on_first_trading_day}, Shares: {num_shares:.2f}")
        else:
            print(f"No data found for {ticker} in {week}")
    return ticker_shares

# Function to manage ETF portfolio weekly
def manage_etf_portfolio_weekly(
    top_etfs_previous, top_etfs_current, previous_week, current_week, ticker_shares, gathered_data_per_week
):
    etf_histories_for_current_week = gathered_data_per_week.get(current_week, {})
    top2etfs_previous = list(top_etfs_previous)
    top2etfs_current = list(top_etfs_current)

    print(f"Top 2 ETFs for {previous_week}: {top2etfs_previous}")
    print(f"Top 2 ETFs for {current_week}: {top2etfs_current}")

    etfs_to_sell = [etf for etf in top2etfs_previous if etf not in top2etfs_current]
    etfs_to_buy = [etf for etf in top2etfs_current if etf not in top2etfs_previous]

    # Ensure one-to-one mapping between sell and buy ETFs
    if len(etfs_to_sell) != len(etfs_to_buy):
        print("Mismatch between ETFs to sell and buy. Adjusting allocation...")
        return ticker_shares  # Abort if mismatched for now, you can implement custom logic

    # Allocate funds ETF-by-ETF
    for etf_sell, etf_buy in zip(etfs_to_sell, etfs_to_buy):
        no_of_shares = ticker_shares.get(etf_sell, 0)
        if no_of_shares > 0:
            # Selling old ETF
            if etf_sell in etf_histories_for_current_week:
                first_trading_day_sell_price = etf_histories_for_current_week[etf_sell].loc[
                    etf_histories_for_current_week[etf_sell].index[0], 'Close'
                ]
                selling_value = no_of_shares * first_trading_day_sell_price * 0.975
                print(f"Sell {etf_sell}: {no_of_shares:.2f} shares at {first_trading_day_sell_price:.2f}. Total value: {selling_value:.2f}")

                # Remove sold ETF from portfolio
                del ticker_shares[etf_sell]

                # Buying new ETF
                if etf_buy in etf_histories_for_current_week:
                    first_trading_day_buy_price = etf_histories_for_current_week[etf_buy].loc[
                        etf_histories_for_current_week[etf_buy].index[0], 'Close'
                    ]
                    new_shares = (selling_value * 0.975) / first_trading_day_buy_price
                    print(f"Buy {etf_buy}: {new_shares:.2f} shares at {first_trading_day_buy_price:.2f}.")
                    ticker_shares[etf_buy] = new_shares
                else:
                    print(f"Data for {etf_buy} is missing for {current_week}. Skipping purchase.")
            else:
                print(f"Data for {etf_sell} is missing for {current_week}. Skipping sale.")
        else:
            print(f"No shares found for {etf_sell} to sell.")

    print(f"Updated ticker shares after {current_week}: {ticker_shares}")
    return ticker_shares


In [25]:
# Generate Week Ranges
week_ranges = generate_week_ranges('2024-01-01', '2024-12-01')
#print("Generated Week Ranges:", week_ranges)

# Assuming `tickers` is a list of ETF tickers
tickers = tickers

# Gather ETF Data
etf_histories = gather_etf_data_for_weeks(tickers, week_ranges)
#print(f"ETF Histories Collected for {len(etf_histories)} weeks")

# Create a mapping between week numbers and date ranges
week_key_mapping = {f"{i+1}w": week_range for i, week_range in enumerate(etf_histories.keys())}

# Debug: Print the week key mapping
print("Week Key Mapping:", week_key_mapping)

# Align `top_etfs_weekly` Keys with `etf_histories`
aligned_top_etfs_weekly = {}

for week_key, df_scores in weekly_scores_dfs.items():
    # Call the `select_top_etfs_weekly` function here
    forecast_period = week_key_mapping.get(week_key, None)
    if forecast_period:
        aligned_top_etfs_weekly[forecast_period] = select_top_etfs_weekly(df_scores, forecast_period)

# Portfolio initialization and management
ticker_shares = {}
ticker_shares_per_week = {}

for i, week_range in enumerate(etf_histories.keys()):
    current_week_key = week_range
    if i == 0:
        # First week initialization
        ticker_shares = initialize_shares_for_first_week(
            aligned_top_etfs_weekly[current_week_key],
            etf_histories,
            current_week_key
        )
        ticker_shares_per_week[current_week_key] = ticker_shares.copy()
    else:
        # Subsequent weeks' portfolio management
        prev_week_key = list(etf_histories.keys())[i - 1]
        ticker_shares = manage_etf_portfolio_weekly(
            aligned_top_etfs_weekly[prev_week_key],
            aligned_top_etfs_weekly[current_week_key],
            prev_week_key,
            current_week_key,
            ticker_shares,
            etf_histories
        )
        ticker_shares_per_week[current_week_key] = ticker_shares.copy()

print("\nFinal Ticker Shares per Week:")
for week, shares in ticker_shares_per_week.items():
    print(f"Week {week}: {shares}")

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Week Key Mapping: {'1w': '2024-01-01 to 2024-01-07', '2w': '2024-01-08 to 2024-01-14', '3w': '2024-01-15 to 2024-01-21', '4w': '2024-01-22 to 2024-01-28', '5w': '2024-01-29 to 2024-02-04', '6w': '2024-02-05 to 2024-02-11', '7w': '2024-02-12 to 2024-02-18', '8w': '2024-02-19 to 2024-02-25', '9w': '2024-02-26 to 2024-03-03', '10w': '2024-03-04 to 2024-03-10', '11w': '2024-03-11 to 2024-03-17', '12w': '2024-03-18 to 2024-03-24', '13w': '2024-03-25 to 2024-03-31', '14w': '2024-04-01 to 2024-04-07', '15w': '2024-04-08 to 2024-04-14', '16w': '2024-04-15 to 2024-04-21', '17w': '2024-04-22 to 2024-04-28', '18w': '2024-04-29 to 2024-05-05', '19w': '2024-05-06 to 2024-05-12', '20w': '2024-05-13 to 2024-05-19', '21w': '2024-05-20 to 2024-05-26', '22w': '2024-05-27 to 2024-06-02', '23w': '2024-06-03 to 2024-06-09', '24w': '2024-06-10 to 2024-06-16', '25w': '2024-06-17 to 2024-06-23', '26w': '2024-06-24 to 2024-06-30', '27w': '2024-07-01 to 2024-07-07', '28w': '2024-07-08 to 2024-07-14', '29w': '20

In [26]:
# Define the first trading day of the 48th week
first_trading_day_49w = '2024-12-01'  # Adjust this to match the actual start date of the 48th week

# Identify the 48th and 49th week date range keys
week_48_range = list(ticker_shares_per_week.keys())[-1]  # Last key corresponds to the 48th week
week_49_start = first_trading_day_49w  # Replace with the actual start of the 49th week
print(f"Using data for the 48th week: {week_48_range}")

print(f"Fetching data starting from the first trading day of the 49th week: {week_49_start}")

week_49_end = '2024-12-06'
# Initialize a dictionary to store the values of shares
etf_values_49w = {}

# Ensure 48th week data exists
if week_48_range in ticker_shares_per_week:
    # Fetch ETF shares from the 48th week
    ticker_shares_48w = ticker_shares_per_week[week_48_range]
    
    # Fetch the first trading day price of the 49th week for each ETF
    for ticker, shares in ticker_shares_48w.items():
        print(f"Fetching data for ticker {ticker} starting from {week_49_start}...")
        # Download historical data for the 49th week
        data = yf.download(ticker, start=week_49_start, end=week_49_end)
        
        if not data.empty:
            # Get the closing price of the first trading day of the 49th week
            closing_price_49w = data['Close'].iloc[0]
            # Calculate the value of the shares
            total_value = shares * closing_price_49w
            etf_values_49w[ticker] = total_value
            print(f"{ticker}: {shares:.2f} shares at ${closing_price_49w:.2f} each, total value: ${total_value:.2f}")
        else:
            print(f"{ticker}: No data available for the 49th week's first trading day.")
else:
    print(f"No data available in ticker_shares_per_week for the 48th week: {week_48_range}")



Using data for the 48th week: 2024-11-25 to 2024-12-01
Fetching data starting from the first trading day of the 49th week: 2024-12-01
Fetching data for ticker IGM starting from 2024-12-01...


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

IGM: 487.72 shares at $102.51 each, total value: $49996.61
Fetching data for ticker FTEC starting from 2024-12-01...
FTEC: 190.26 shares at $186.99 each, total value: $35577.59





In [27]:
# Check if there are any values in the dictionary
if etf_values_49w:
    print("\nETF values on the 49th week's first trading day:")
    # Sum all the values (total portfolio value)
    total_value = sum(etf_values_49w.values())
    print(f"Total portfolio value: {total_value:.2f}")
    # Optionally display individual ETF values
    for ticker, value in etf_values_49w.items():
        print(f"{ticker}: {value:.2f}")
else:
    print("No values could be calculated for the 49th week's first trading day.")



ETF values on the 49th week's first trading day:
Total portfolio value: 85574.20
IGM: 49996.61
FTEC: 35577.59


### Values for 7th month

### Smoothing

In [28]:
def calculate_composite_score(
    forecasted_values, risk_percentage, rachev_ratio, sharpe_ratio, sortino_ratio, 
    volatility_clustering, mean_forecast, std_forecast, mean_rachev, std_rachev, 
    mean_sharpe, std_sharpe, mean_sortino, std_sortino, mean_volatility_clustering, 
    std_volatility_clustering
):
    epsilon = 1e-8  # To prevent division by zero
    forecasted_mean = np.mean(forecasted_values)

    forecasted_mean_normalized = (forecasted_mean - mean_forecast) / (std_forecast + epsilon)
    rachev_normalized = (rachev_ratio - mean_rachev) / (std_rachev + epsilon)
    sharpe_normalized = (sharpe_ratio - mean_sharpe) / (std_sharpe + epsilon)
    sortino_normalized = (sortino_ratio - mean_sortino) / (std_sortino + epsilon)
    volatility_clustering_normalized = (volatility_clustering - mean_volatility_clustering) / (std_volatility_clustering + epsilon)

    # Composite score calculation
    score = (
        forecasted_mean_normalized
        - (risk_percentage * rachev_normalized)
        + sharpe_normalized
        + sortino_normalized
        - volatility_clustering_normalized
    )

    return score

def smooth_scores(scores, alpha=0.2):
    """
    Smooth scores using Exponential Moving Average (EMA).
    """
    if not isinstance(scores, (list, np.ndarray)):
        raise ValueError("Scores must be a list or numpy array of numerical values.")
    
    scores = np.array(scores)
    smoothed_scores = np.zeros_like(scores)
    smoothed_scores[0] = scores[0]  # Initialize EMA
    
    for t in range(1, len(scores)):
        smoothed_scores[t] = alpha * scores[t] + (1 - alpha) * smoothed_scores[t - 1]
    
    return smoothed_scores.tolist()

def calculate_means_and_stds_weekly(etf_pred_dict, forecast_period):
    returns_list = [etf_pred_dict[etf][f'returns_{forecast_period}'] for etf in etf_pred_dict]
    mean_forecast = np.mean([np.mean(returns) for returns in returns_list])
    std_forecast = np.std([np.mean(returns) for returns in returns_list])

    rachev_ratios = np.array([etf_pred_dict[etf][f'rachev_ratio_{forecast_period}'] for etf in etf_pred_dict])
    mean_rachev = np.mean(rachev_ratios)
    std_rachev = np.std(rachev_ratios)

    sharpe_ratios = np.array([etf_pred_dict[etf][f'sharpe_ratio_{forecast_period}'] for etf in etf_pred_dict])
    mean_sharpe = np.mean(sharpe_ratios)
    std_sharpe = np.std(sharpe_ratios)

    sortino_ratios = np.array([etf_pred_dict[etf][f'sortino_ratio_{forecast_period}'] for etf in etf_pred_dict])
    mean_sortino = np.mean(sortino_ratios)
    std_sortino = np.std(sortino_ratios)

    volatility_clustering = np.array([etf_pred_dict[etf][f'volatility_clustering_{forecast_period}'] for etf in etf_pred_dict])
    mean_volatility_clustering = np.mean(volatility_clustering)
    std_volatility_clustering = np.std(volatility_clustering)

    return (
        mean_forecast, std_forecast, mean_rachev, std_rachev,
        mean_sharpe, std_sharpe, mean_sortino, std_sortino,
        mean_volatility_clustering, std_volatility_clustering
    )

def calculate_scores_for_etfs_weekly(etf_pred_dict, forecast_period, risk_percentage, alpha=0.2):
    (mean_forecast, std_forecast, mean_rachev, std_rachev, 
     mean_sharpe, std_sharpe, mean_sortino, std_sortino, 
     mean_volatility_clustering, std_volatility_clustering) = calculate_means_and_stds_weekly(etf_pred_dict, forecast_period)

    raw_scores = []
    for etf in etf_pred_dict:
        forecasted_values = etf_pred_dict[etf][f'returns_{forecast_period}']
        rachev_ratio = etf_pred_dict[etf][f'rachev_ratio_{forecast_period}']
        sharpe_ratio = etf_pred_dict[etf][f'sharpe_ratio_{forecast_period}']
        volatility_clustering = etf_pred_dict[etf][f'volatility_clustering_{forecast_period}']
        sortino_ratio = etf_pred_dict[etf][f'sortino_ratio_{forecast_period}']

        score = calculate_composite_score(
            forecasted_values, risk_percentage, rachev_ratio, sharpe_ratio, 
            sortino_ratio, volatility_clustering, mean_forecast, std_forecast, 
            mean_rachev, std_rachev, mean_sharpe, std_sharpe, mean_sortino, 
            std_sortino, mean_volatility_clustering, std_volatility_clustering
        )

        raw_scores.append(score)

    # Apply smoothing to the raw scores
    smoothed_scores = smooth_scores(raw_scores, alpha=alpha)

    results = []
    for idx, etf in enumerate(etf_pred_dict):
        results.append({
            'ETF': etf,
            'Week': forecast_period,
            'RiskPercentage': risk_percentage,
            'RawScore': raw_scores[idx],
            'SmoothedScore': smoothed_scores[idx]
        })

    return results

def main_weekly(tickers, etf_dict):
    etf_pred_dict = process_etf_data_weekly(tickers, etf_dict)
    risk_percentage = 0.10
    weekly_scores = {}

    for week in range(1, 49):
        week_key = f"{week}w"
        if any(f"returns_{week_key}" in etf_pred_dict[etf] for etf in etf_pred_dict):
            scores = calculate_scores_for_etfs_weekly(etf_pred_dict, week_key, risk_percentage)
            weekly_scores[week_key] = scores
        else:
            print(f"Skipping score calculation for {week_key}: No ETFs have data for this week.")

    return etf_pred_dict, weekly_scores

# Select top ETFs for weekly portfolio management
def select_top_etfs_weekly(df_scores, forecast_period):
    if df_scores.empty:
        print(f"No scores available for {forecast_period}. Skipping.")
        return []
    top_etfs = df_scores.nlargest(2, 'SmoothedScore')
    return top_etfs['ETF'].tolist()


In [29]:
etf_pred_dict, weekly_scores = main_weekly(tickers, etf_dict)


Debug: Returns for SMH, Forecast Period 1w: [-0.02114971 -0.01641913 -0.01328753 -0.010488  ]

Debug: Returns for SMH, Forecast Period 2w: [-0.00236152 -0.01116573 -0.01175923 -0.01105097 -0.01297825]

Debug: Returns for SMH, Forecast Period 3w: [-0.00797619 -0.01325713 -0.00273405 -0.00242556]

Debug: Returns for SMH, Forecast Period 4w: [-0.01171342 -0.01074597 -0.00792683 -0.01255071 -0.01969242]

Debug: Returns for SMH, Forecast Period 5w: [-0.0102156  -0.01522117 -0.01648088 -0.00956425 -0.00606219]

Debug: Returns for SMH, Forecast Period 6w: [-0.00558619 -0.01330606 -0.00610416 -0.00728405 -0.00485444]

Debug: Returns for SMH, Forecast Period 7w: [-0.01057591 -0.01520299 -0.0058159  -0.01044066 -0.011452  ]

Debug: Returns for SMH, Forecast Period 8w: [-0.01689423 -0.0141256   0.00472629 -0.01330351]

Debug: Returns for SMH, Forecast Period 9w: [-0.01023087 -0.01121312 -0.01387803 -0.006257   -0.0012057 ]

Debug: Returns for SMH, Forecast Period 10w: [-0.00638326 -0.01454497 -0


Debug: Returns for XSD, Forecast Period 14w: [-0.00244686 -0.00973153 -0.00323254 -0.00902619 -0.00590699]

Debug: Returns for XSD, Forecast Period 15w: [-0.00682055 -0.00305399 -0.0185381  -0.0053959  -0.01748949]

Debug: Returns for XSD, Forecast Period 16w: [-0.01533725 -0.01070783 -0.01556608 -0.01597719 -0.02381318]

Debug: Returns for XSD, Forecast Period 17w: [-0.01631819 -0.01207331 -0.00933799 -0.00802024 -0.00879104]

Debug: Returns for XSD, Forecast Period 18w: [-0.00794612 -0.01870687 -0.02189957 -0.01066282 -0.00809114]

Debug: Returns for XSD, Forecast Period 19w: [-0.0082732  -0.01331414 -0.01226776 -0.00917027 -0.01207251]

Debug: Returns for XSD, Forecast Period 20w: [-0.00757253 -0.00557658 -0.00516787 -0.00913219 -0.00878456]

Debug: Returns for XSD, Forecast Period 21w: [-0.00969743 -0.01018249 -0.00735598 -0.0114689  -0.00513432]

Debug: Returns for XSD, Forecast Period 22w: [-0.00560648 -0.00992238 -0.00643708 -0.00965803]

Debug: Returns for XSD, Forecast Period


Debug: Returns for VGT, Forecast Period 4w: [-0.01626586 -0.01721056 -0.01737224 -0.01707104 -0.01868544]

Debug: Returns for VGT, Forecast Period 5w: [-0.01343267 -0.01914961 -0.02263393 -0.01214707 -0.01007409]

Debug: Returns for VGT, Forecast Period 6w: [-0.01086628 -0.01148363 -0.01007716 -0.01147219 -0.00945687]

Debug: Returns for VGT, Forecast Period 7w: [-0.01424802 -0.01738851 -0.01044105 -0.01236383 -0.01732765]

Debug: Returns for VGT, Forecast Period 8w: [-0.02057586 -0.0216851  -0.00923422 -0.01909657]

Debug: Returns for VGT, Forecast Period 9w: [-0.01836484 -0.01769918 -0.01998598 -0.01562534 -0.00947603]

Debug: Returns for VGT, Forecast Period 10w: [-0.01464659 -0.02094447 -0.01470986 -0.01121026 -0.01933784]

Debug: Returns for VGT, Forecast Period 11w: [-0.01878343 -0.01438778 -0.01995129 -0.01689406 -0.02141055]

Debug: Returns for VGT, Forecast Period 12w: [-0.01737088 -0.01568122 -0.01081161 -0.0114262  -0.01672766]

Debug: Returns for VGT, Forecast Period 13w: 

In [30]:
# Initialize an empty dictionary to hold DataFrames for each week
weekly_scores_dfs = {}

# Assuming weekly_scores is a dictionary with keys as week identifiers ('1w', '2w', ..., '48w') 
# and values as the respective scores
for week_key, scores in weekly_scores.items():
    # Convert scores for the current week into a DataFrame
    weekly_scores_dfs[week_key] = pd.DataFrame(scores)

    
# Create DataFrame variables dynamically for 48 weeks
for week in range(1, 49):
    week_key = f'{week}w'
    globals()[f'df_scores_{week_key}'] = weekly_scores_dfs.get(week_key, pd.DataFrame())




In [31]:
# Initialize a dictionary to hold the top 2 ETFs for each week
top_etfs_weekly = {}

# Loop through the weekly DataFrames and select the top 2 ETFs for each week
for week_key, df_scores in weekly_scores_dfs.items():
    # Select the top 2 ETFs based on the 'Score' column
    top_etfs_weekly[week_key] = df_scores.nlargest(2, 'SmoothedScore')
    
    # Print the results for the current week
    print(f"\nTop 2 ETFs for {week_key} forecast:")
    print(top_etfs_weekly[week_key])



Top 2 ETFs for 1w forecast:
   ETF Week  RiskPercentage  RawScore  SmoothedScore
8  IGM   1w             0.1  7.991462       1.296970
9  IXN   1w             0.1 -4.509046       0.135767

Top 2 ETFs for 2w forecast:
    ETF Week  RiskPercentage  RawScore  SmoothedScore
8   IGM   2w             0.1  5.403989       1.018020
7  FTEC   2w             0.1  4.648313      -0.078472

Top 2 ETFs for 3w forecast:
    ETF Week  RiskPercentage  RawScore  SmoothedScore
8   IGM   3w             0.1   4.73923       0.864945
1  SOXX   3w             0.1   3.79871       0.310958

Top 2 ETFs for 4w forecast:
   ETF Week  RiskPercentage  RawScore  SmoothedScore
8  IGM   4w             0.1  5.752882       0.700868
9  IXN   4w             0.1 -0.912540       0.378186

Top 2 ETFs for 5w forecast:
   ETF Week  RiskPercentage  RawScore  SmoothedScore
8  IGM   5w             0.1  4.634972       0.720633
9  IXN   5w             0.1 -3.277212      -0.078936

Top 2 ETFs for 6w forecast:
    ETF Week  RiskPercent

In [32]:
# Function to generate week ranges
def generate_week_ranges(start_date, end_date):
    start = datetime.strptime(start_date, '%Y-%m-%d')
    end = datetime.strptime(end_date, '%Y-%m-%d')
    week_ranges = []
    
    while start < end:
        week_start = start
        week_end = start + timedelta(days=6)
        if week_end > end:
            week_end = end
        week_ranges.append((week_start.strftime('%Y-%m-%d'), week_end.strftime('%Y-%m-%d')))
        start += timedelta(days=7)
    
    return week_ranges

# Function to gather ETF data for weeks
def gather_etf_data_for_weeks(tickers, week_ranges):
    etf_histories = {}
    for start_date, end_date in week_ranges:
        week = f"{start_date} to {end_date}"
        etf_histories[week] = {}
        for ticker in tickers:
            etf_data = yf.download(ticker, start=start_date, end=end_date)
            if etf_data.empty:
                print(f"No data found for {ticker} in {week}")
                continue
            etf_data.index = pd.to_datetime(etf_data.index)
            etf_histories[week][ticker] = etf_data
            #print(f"Data for {ticker} in {week} gathered.")
    return etf_histories

# Function to initialize shares for the first week
def initialize_shares_for_first_week(top_etfs_1w, etf_histories, week, investment_amount=50000):
    ticker_shares = {}
    first_trading_day_start = week.split(" to ")[0]
    
    for ticker in top_etfs_1w:
        etf_history = etf_histories.get(week, {}).get(ticker)
        
        if etf_history is not None:
            first_trading_day = pd.to_datetime(first_trading_day_start) + BDay(1)
            if first_trading_day not in etf_history.index:
                first_trading_day = etf_history.index[etf_history.index.searchsorted(first_trading_day)]
            
            price_on_first_trading_day = etf_history.loc[first_trading_day, 'Close']
            num_shares = (investment_amount * 0.975) / price_on_first_trading_day
            print(f"Shares 1st week: ({investment_amount} * 0.975) / {price_on_first_trading_day}")
            ticker_shares[ticker] = num_shares
            print(f"Ticker: {ticker}, First trading day: {first_trading_day.date()}, Price: {price_on_first_trading_day}, Shares: {num_shares:.2f}")
        else:
            print(f"No data found for {ticker} in {week}")
    return ticker_shares

# Function to manage ETF portfolio weekly
def manage_etf_portfolio_weekly(
    top_etfs_previous, top_etfs_current, previous_week, current_week, ticker_shares, gathered_data_per_week
):
    etf_histories_for_current_week = gathered_data_per_week.get(current_week, {})
    top2etfs_previous = list(top_etfs_previous)
    top2etfs_current = list(top_etfs_current)

    print(f"Top 2 ETFs for {previous_week}: {top2etfs_previous}")
    print(f"Top 2 ETFs for {current_week}: {top2etfs_current}")

    etfs_to_sell = [etf for etf in top2etfs_previous if etf not in top2etfs_current]
    etfs_to_buy = [etf for etf in top2etfs_current if etf not in top2etfs_previous]

    # Ensure one-to-one mapping between sell and buy ETFs
    if len(etfs_to_sell) != len(etfs_to_buy):
        print("Mismatch between ETFs to sell and buy. Adjusting allocation...")
        return ticker_shares  # Abort if mismatched for now, you can implement custom logic

    # Allocate funds ETF-by-ETF
    for etf_sell, etf_buy in zip(etfs_to_sell, etfs_to_buy):
        no_of_shares = ticker_shares.get(etf_sell, 0)
        if no_of_shares > 0:
            # Selling old ETF
            if etf_sell in etf_histories_for_current_week:
                first_trading_day_sell_price = etf_histories_for_current_week[etf_sell].loc[
                    etf_histories_for_current_week[etf_sell].index[0], 'Close'
                ]
                selling_value = no_of_shares * first_trading_day_sell_price * 0.975
                print(f"Sell {etf_sell}: {no_of_shares:.2f} shares at {first_trading_day_sell_price:.2f}. Total value: {selling_value:.2f}")

                # Remove sold ETF from portfolio
                del ticker_shares[etf_sell]

                # Buying new ETF
                if etf_buy in etf_histories_for_current_week:
                    first_trading_day_buy_price = etf_histories_for_current_week[etf_buy].loc[
                        etf_histories_for_current_week[etf_buy].index[0], 'Close'
                    ]
                    new_shares = (selling_value * 0.975) / first_trading_day_buy_price
                    print(f"Buy {etf_buy}: {new_shares:.2f} shares at {first_trading_day_buy_price:.2f}.")
                    ticker_shares[etf_buy] = new_shares
                else:
                    print(f"Data for {etf_buy} is missing for {current_week}. Skipping purchase.")
            else:
                print(f"Data for {etf_sell} is missing for {current_week}. Skipping sale.")
        else:
            print(f"No shares found for {etf_sell} to sell.")

    print(f"Updated ticker shares after {current_week}: {ticker_shares}")
    return ticker_shares


In [33]:
# Generate Week Ranges
week_ranges = generate_week_ranges('2024-01-01', '2024-12-01')
#print("Generated Week Ranges:", week_ranges)

# Assuming `tickers` is a list of ETF tickers
tickers = tickers

# Gather ETF Data
etf_histories = gather_etf_data_for_weeks(tickers, week_ranges)
#print(f"ETF Histories Collected for {len(etf_histories)} weeks")

# Create a mapping between week numbers and date ranges
week_key_mapping = {f"{i+1}w": week_range for i, week_range in enumerate(etf_histories.keys())}

# Debug: Print the week key mapping
print("Week Key Mapping:", week_key_mapping)

# Align `top_etfs_weekly` Keys with `etf_histories`
aligned_top_etfs_weekly = {}

for week_key, df_scores in weekly_scores_dfs.items():
    # Call the `select_top_etfs_weekly` function here
    forecast_period = week_key_mapping.get(week_key, None)
    if forecast_period:
        aligned_top_etfs_weekly[forecast_period] = select_top_etfs_weekly(df_scores, forecast_period)

# Portfolio initialization and management
ticker_shares = {}
ticker_shares_per_week = {}

for i, week_range in enumerate(etf_histories.keys()):
    current_week_key = week_range
    if i == 0:
        # First week initialization
        ticker_shares = initialize_shares_for_first_week(
            aligned_top_etfs_weekly[current_week_key],
            etf_histories,
            current_week_key
        )
        ticker_shares_per_week[current_week_key] = ticker_shares.copy()
    else:
        # Subsequent weeks' portfolio management
        prev_week_key = list(etf_histories.keys())[i - 1]
        ticker_shares = manage_etf_portfolio_weekly(
            aligned_top_etfs_weekly[prev_week_key],
            aligned_top_etfs_weekly[current_week_key],
            prev_week_key,
            current_week_key,
            ticker_shares,
            etf_histories
        )
        ticker_shares_per_week[current_week_key] = ticker_shares.copy()

print("\nFinal Ticker Shares per Week:")
for week, shares in ticker_shares_per_week.items():
    print(f"Week {week}: {shares}")

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Week Key Mapping: {'1w': '2024-01-01 to 2024-01-07', '2w': '2024-01-08 to 2024-01-14', '3w': '2024-01-15 to 2024-01-21', '4w': '2024-01-22 to 2024-01-28', '5w': '2024-01-29 to 2024-02-04', '6w': '2024-02-05 to 2024-02-11', '7w': '2024-02-12 to 2024-02-18', '8w': '2024-02-19 to 2024-02-25', '9w': '2024-02-26 to 2024-03-03', '10w': '2024-03-04 to 2024-03-10', '11w': '2024-03-11 to 2024-03-17', '12w': '2024-03-18 to 2024-03-24', '13w': '2024-03-25 to 2024-03-31', '14w': '2024-04-01 to 2024-04-07', '15w': '2024-04-08 to 2024-04-14', '16w': '2024-04-15 to 2024-04-21', '17w': '2024-04-22 to 2024-04-28', '18w': '2024-04-29 to 2024-05-05', '19w': '2024-05-06 to 2024-05-12', '20w': '2024-05-13 to 2024-05-19', '21w': '2024-05-20 to 2024-05-26', '22w': '2024-05-27 to 2024-06-02', '23w': '2024-06-03 to 2024-06-09', '24w': '2024-06-10 to 2024-06-16', '25w': '2024-06-17 to 2024-06-23', '26w': '2024-06-24 to 2024-06-30', '27w': '2024-07-01 to 2024-07-07', '28w': '2024-07-08 to 2024-07-14', '29w': '20

In [34]:
# Define the first trading day of the 48th week
first_trading_day_49w = '2024-12-01'  # Adjust this to match the actual start date of the 48th week

# Identify the 48th and 49th week date range keys
week_48_range = list(ticker_shares_per_week.keys())[-1]  # Last key corresponds to the 48th week
week_49_start = first_trading_day_49w  # Replace with the actual start of the 49th week
print(f"Using data for the 48th week: {week_48_range}")

print(f"Fetching data starting from the first trading day of the 49th week: {week_49_start}")

week_49_end = '2024-12-06'
# Initialize a dictionary to store the values of shares
etf_values_49w = {}

# Ensure 48th week data exists
if week_48_range in ticker_shares_per_week:
    # Fetch ETF shares from the 48th week
    ticker_shares_48w = ticker_shares_per_week[week_48_range]
    
    # Fetch the first trading day price of the 49th week for each ETF
    for ticker, shares in ticker_shares_48w.items():
        print(f"Fetching data for ticker {ticker} starting from {week_49_start}...")
        # Download historical data for the 49th week
        data = yf.download(ticker, start=week_49_start, end=week_49_end)
        
        if not data.empty:
            # Get the closing price of the first trading day of the 49th week
            closing_price_49w = data['Close'].iloc[0]
            # Calculate the value of the shares
            total_value = shares * closing_price_49w
            etf_values_49w[ticker] = total_value
            print(f"{ticker}: {shares:.2f} shares at {closing_price_49w:.2f} each, total value: {total_value:.2f}")
        else:
            print(f"{ticker}: No data available for the 49th week's first trading day.")
else:
    print(f"No data available in ticker_shares_per_week for the 48th week: {week_48_range}")



[*********************100%%**********************]  1 of 1 completed

Using data for the 48th week: 2024-11-25 to 2024-12-01
Fetching data starting from the first trading day of the 49th week: 2024-12-01
Fetching data for ticker IGM starting from 2024-12-01...
IGM: 547.72 shares at 102.51 each, total value: 56146.94
Fetching data for ticker IXN starting from 2024-12-01...



[*********************100%%**********************]  1 of 1 completed

IXN: 184.19 shares at 84.80 each, total value: 15619.23





In [35]:
# Check if there are any values in the dictionary
if etf_values_49w:
    print("\nETF values on the 49th week's first trading day:")
    # Sum all the values (total portfolio value)
    total_value = sum(etf_values_49w.values())
    print(f"Total portfolio value: {total_value:.2f}")
    # Optionally display individual ETF values
    for ticker, value in etf_values_49w.items():
        print(f"{ticker}: {value:.2f}")
else:
    print("No values could be calculated for the 49th week's first trading day.")



ETF values on the 49th week's first trading day:
Total portfolio value: 71766.17
IGM: 56146.94
IXN: 15619.23


In [36]:
# Check if there are any values in the dictionary
if etf_values_49w:
    print("\nETF values on the 49th week's first trading day:")
    # Sum all the values (total portfolio value)
    total_value = sum(etf_values_49w.values())
    print(f"Total portfolio value: {total_value:.2f}")
    # Optionally display individual ETF values
    for ticker, value in etf_values_49w.items():
        print(f"{ticker}: {value:.2f}")
else:
    print("No values could be calculated for the 49th week's first trading day.")



ETF values on the 49th week's first trading day:
Total portfolio value: 71766.17
IGM: 56146.94
IXN: 15619.23
