In [1]:
#!pip install altair


In [2]:
import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.model_selection import train_test_split,GridSearchCV
from skopt import BayesSearchCV

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from tabulate import tabulate
import pandas_ta as ta
from sklearn.preprocessing import StandardScaler,MinMaxScaler,Normalizer
import statsmodels.api as sm
import itertools

from pandas.tseries.offsets import MonthEnd
from pandas.tseries.offsets import BDay,Week

pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows', None)

from datetime import datetime, timedelta

In [3]:
def data_loading(ticker_symbol, start_date, end_date):
    # Fetch the ETF data from Yahoo Finance for the period from 2010-01-01 to 2024-01-01
    etf_data = yf.Ticker(ticker_symbol)
    etf_history = etf_data.history(start=start_date, end=end_date)
    etf_history.index = etf_history.index.tz_localize(None)
    return etf_history, etf_data

In [4]:
#Function to derive the predictor columns
def etf_predictors(etf_history,  start_date, end_date, etf_data, benchmark_ticker = '^GSPC' ):
    
    # Calculate Daily Returns
    etf_history['Daily Return'] = etf_history['Close'].pct_change()

    # Calculate 21-Day Volatility (standard deviation of daily returns, annualized)
    etf_history['Volatility'] = etf_history['Daily Return'].rolling(window=21).std() * np.sqrt(252)
    #etf_history['Volatility_ta'] = ta.volatility(etf_history['Close'], window=21, annualize=True)
   
    # Get the ETF info
    etf_info = etf_data.info

    # Retrieve the net asset value price (NAV) and total net assets
    nav_price = etf_info['navPrice']
    total_assets = etf_info['totalAssets']

    # Calculate the number of shares outstanding
    shares_outstanding = total_assets / nav_price

    # Calculate Market Capitalization for each day
    # Market Capitalization = Closing Price * Total Number of Shares Outstanding
    etf_history['Market Cap'] = etf_history['Close'] * shares_outstanding

    total_assets = etf_info['totalAssets']
    total_liabilities = etf_info.get('totalLiabilities', 0)  # Handle the case where total liabilities might not be present
    
    # Calculate Book Value per Share
    book_value_per_share = (total_assets - total_liabilities) / shares_outstanding
    
    # Calculate Price to Book (P/B) Ratio
    etf_history['P/B Ratio'] = etf_history['Close'] / book_value_per_share
    
    # Calculate 1-Month Momentum (21 trading days)
    
    etf_history['Momentum'] = ta.mom(etf_history['Close'], length=21)
    
    benchmark_data = yf.download(benchmark_ticker, start=start_date, end=end_date)
    benchmark_data['Daily Return'] = benchmark_data['Close'].pct_change()
    #benchmark_data_history = benchmark_data.dropna()
    benchmark_data.head(10)
    
    # Ensure the indices are time zone-naive
    benchmark_data.index = benchmark_data.index.tz_localize(None)
    combined_data = etf_history[['Close']].join(benchmark_data[['Close']], lsuffix='_ETF', rsuffix='_Benchmark')
    combined_data = combined_data.dropna()
    
    # Calculate rolling beta with a 30-day window
    rolling_beta_21 = rolling_beta(combined_data, window=21)

    # Add the rolling beta to the dataframe
    combined_data.loc[:, 'Rolling Beta 21-day'] = rolling_beta_21
    etf_history['Rolling Beta']=combined_data['Rolling Beta 21-day']
    
    # Calculate daily profitability
    daily_profitabilities = []
    previous_nav = None
    for index, row in etf_history.iterrows():
        current_nav = row['Close']  # Current day's NAV
        #print(current_nav)
        if previous_nav is not None:
            daily_profitability = (current_nav - previous_nav) / previous_nav * 100
            daily_profitabilities.append(daily_profitability)
        else:
            daily_profitabilities.append(None)
        previous_nav = current_nav  # Update previous_nav for the next iteration

    # Add daily profitabilities to ETF dataset
    etf_history['Daily Profitability (%)'] = daily_profitabilities
    
    # Calculate the dividend yield for each day
    dividend_yields = []

    for index, row in etf_history.iterrows():
        # Get the dividend payment for the day
        dividend_payment = row['Dividends']

        # Get the current market price of the ETF for the day
        current_price = row['Close']

        # Calculate the dividend yield for the day
        dividend_yield = (dividend_payment / current_price) * 100

        # Append the dividend yield to the list
        dividend_yields.append(dividend_yield)
      
    etf_history['Div yield'] = dividend_yields

    #Volatility
    etf_history['ATR'] = ta.atr(etf_history['High'], etf_history['Low'], etf_history['Close'], length=21)    
    
    # Compute the Relative Volatility Index (RVI)
    rvi = ta.rvi(etf_history['Close'],length=21)
    etf_history['RVI'] = rvi
    
    #Momentum
    rsi_window = 14  # Window size for RSI calculation
    roc_window = 12  # Window size for ROC calculation
    #rsi_window = 21  
    #roc_window = 21  

    

    etf_history['RSI'] = ta.rsi(etf_history['Close'],length=rsi_window)
    # Calculate Rate of Change (ROC)
    etf_history['ROC'] = ta.roc(etf_history['Close'], length=roc_window)
        
    etf_history['log_returns'] = np.log(etf_history['Close'] / etf_history['Close'].shift(1))

    return etf_history

# Function to calculate rolling beta
def rolling_beta(df, window):
    rolling_cov = df['Close_Benchmark'].rolling(window=window).cov(df['Close_ETF'])
    rolling_var = df['Close_Benchmark'].rolling(window=window).var()
    rolling_beta = rolling_cov / rolling_var
    return rolling_beta



In [5]:
def pre_processing(etf_history, train_start_date, train_end_date, test_start_date, test_end_date, prediction_dates, feature_columns=None, scaling_strategy='StandardScaler', final_end_date='2024-12-01'):
    # Handle missing values and inf replacements
    etf_history.fillna(etf_history.median(), inplace=True)
    etf_history = etf_history.replace(-np.inf, 0)

    # Default feature columns if none are provided
    if feature_columns is None:
        feature_columns = ['Volatility', 'Volume', 'Daily Return', 'Market Cap', 'P/B Ratio', 'Momentum', 
                           'Rolling Beta', 'Daily Profitability (%)', 'ATR', 'RVI', 'RSI', 'ROC']

    # Selecting the features (X) and the target (y)
    X = etf_history.loc[:, feature_columns]
    y = etf_history['log_returns']

    # Split train and test data
    train_data = X.loc[train_start_date:train_end_date]
    test_data = X.loc[test_start_date:test_end_date]
    y_train = y.loc[train_start_date:train_end_date]
    y_test = y.loc[test_start_date:test_end_date]

    # Scaling strategy based on input parameter
    if scaling_strategy == 'StandardScaler':
        scaler = StandardScaler()
    elif scaling_strategy == 'Normalizer':
        scaler = Normalizer()
    else:
        raise ValueError(f"Unsupported scaling strategy: {scaling_strategy}")
    
    # Scaling the train and test data
    train_data_scaled = scaler.fit_transform(train_data)
    test_data_scaled = scaler.transform(test_data)

    # Create a dictionary to store forecast data for each weekly prediction period
    forecast_data = {}
    for i, start_date in enumerate(prediction_dates):
        # Determine the end date for each forecast week
        if i < len(prediction_dates) - 1:
            end_date = (pd.to_datetime(start_date) + Week(1) - pd.Timedelta(days=1)).strftime('%Y-%m-%d')
        else:
            end_date = final_end_date  # The final end date provided or default

        # Store the forecast data for each week
        forecast_data[f'forecast_data_{i+1}w'] = X.loc[start_date:end_date]

    # Scale the forecast data dynamically
    forecast_data_scaled = {}
    for period_key, period_data in forecast_data.items():
        if not period_data.empty:
            forecast_data_scaled[period_key] = sm.add_constant(scaler.transform(period_data))

    # Add constant to scaled train and test data
    train_data_scaled = sm.add_constant(train_data_scaled)
    test_data_scaled = sm.add_constant(test_data_scaled)

    #print(f"Weekly forecast data keys: {forecast_data.keys()}")

    # Dynamically return the scaled forecast data along with train and test data
    return {
        'train_data_scaled': train_data_scaled,
        'test_data_scaled': test_data_scaled,
        'y_train': y_train,
        'y_test': y_test,
        'scaler': scaler,
        'X': X,
        'etf_history': etf_history,
        'forecast_data_scaled': forecast_data_scaled,
        'forecast_data': forecast_data
    }

In [6]:
def model_training(train_data_scaled, y_train):
    model = sm.OLS(endog=y_train, exog=sm.add_constant(train_data_scaled))
    
     # Define the hyperparameters
    cov_types = ['HC0', 'HC1', 'HC2', 'HC3']
    use_ts = [True, False]
    methods = ['pinv', 'qr']
    #missing = ['none', 'drop', 'raise']
    #hasconsts = [None, 'add', 'raise']
    tol_values = [1e-8, 1e-6, 1e-4]
    maxiter_values = [500, 1000, 5000]

    best_score = float('-inf')
    best_model = None

    # Loop through all combinations of hyperparameters
    for cov_type, use_t, method, tol, maxiter in itertools.product(cov_types, use_ts, methods,  tol_values, maxiter_values):
        try:
            # Define and fit the model
            model = sm.OLS(endog=y_train, exog=train_data_scaled)
            fitted_model = model.fit(cov_type=cov_type, use_t=use_t, method=method, tol=tol, maxiter=maxiter)

            # Evaluate on test data
            score = fitted_model.rsquared_adj  # Or any other metric you prefer

            # Update best model if necessary
            if score > best_score:
                best_score = score
                best_model = fitted_model
                best_params = {
                    'cov_type': cov_type,
                    'use_t': use_t,
                    'method': method,
                    #'missing': miss,
                    #'hasconst': hasconst,
                    'tol': tol,
                    'maxiter': maxiter
                }
        except:
            # Handle any errors that arise from invalid hyperparameter combinations
            continue
            
    # Display the best model parameters
    print("Best Model Parameters:")
    print(best_params)
    return best_model

In [7]:
#!pip install scikit-optimize


In [8]:
def eval_model(best_model,test_data_scaled,y_test,y_train):
    
    test_predictions = best_model.predict((test_data_scaled))
    mae = mean_absolute_error(y_test, test_predictions)
    # Mean Absolute Scaled Error (MASE) - Example calculation assuming seasonal period m=1
    seasonal_naive = np.roll(y_test, 1)  # Shift y_test by 1 for seasonal naive forecast
    #seasonal_mae = np.mean(np.abs(y_test - seasonal_naive))
    seasonal_mae = np.mean(np.abs(y_test[1:] - seasonal_naive[1:]))
    mase = mae / seasonal_mae

    # Mean Absolute Percentage Error (MAPE)
    #mape = np.mean(np.abs((y_test - test_predictions) / y_test)) * 100

    #n = y_test.shape[0]
    #d = np.abs(  np.diff( y_test) ).sum()/(n-1)
    
    #errors = np.abs(y_test - test_predictions )
    #mase_value = errors.mean()/d

    print("Mean Absolute Error (MAE):", mae)
    print("Mean Absolute Scaled Error (MASE):", mase)
    #print("Mean Absolute Scaled Error-2 (MASE-2):", mase_value)
    #print("Mean Absolute Percentage Error (MAPE):", mape)
    
    return mae, mase

In [9]:
# Ensured consistent feature handling and forecasting in predictions
def predictions(model, forecast_data_scaled, forecast_data):
    predictions_forecast = model.predict(forecast_data_scaled)
    forecast_predictions_df = pd.DataFrame(predictions_forecast, columns=["log_returns"], index=forecast_data.index)
    forecast_data_with_predictions = pd.concat([forecast_data, forecast_predictions_df], axis=1)
    return forecast_predictions_df, forecast_data_with_predictions

In [10]:
"""
# Ensured consistent feature handling and forecasting in predictions
def predictions(model, forecast_data_scaled, forecast_data):
    predictions_forecast = model.predict(forecast_data_scaled)
    forecast_predictions_df = pd.DataFrame(predictions_forecast, columns=["log_returns"], index=forecast_data.index)
    forecast_data_with_predictions = pd.concat([forecast_data, forecast_predictions_df], axis=1)
    return forecast_predictions_df, forecast_data_with_predictions
    """

'\n# Ensured consistent feature handling and forecasting in predictions\ndef predictions(model, forecast_data_scaled, forecast_data):\n    predictions_forecast = model.predict(forecast_data_scaled)\n    forecast_predictions_df = pd.DataFrame(predictions_forecast, columns=["log_returns"], index=forecast_data.index)\n    forecast_data_with_predictions = pd.concat([forecast_data, forecast_predictions_df], axis=1)\n    return forecast_predictions_df, forecast_data_with_predictions\n    '

In [11]:
#tickers = ['SPY', 'QQQ', 'VTI', 'IWM', 'EFA', 'EEM', 'GLD', 'SLV', 'USO', 'XLF']
#tickers = ['SMH', 'SOXX', 'PSI']

tickers = ['SMH', 'SOXX', 'PSI', 'XSD', 'IYW', 'XLK', 'VGT', 'FTEC', 'IGM', 'IXN', 
           #'FNGU','USD', 'FNGO', 'GBTC', 'ETHE', 'TECL', 'FNGS', 'TQQQ', 'ROM', 'QLD' No data available from 2000: 2019
          ]

#tickers = ['SPY', 'QQQ', 'VTI']
#tickers = ['SPY']
                                                           
start_date, end_date = '2000-01-01', '2024-12-01'
train_start_date, train_end_date, test_start_date, test_end_date, prediction_start_date = start_date,'2014-01-01', '2014-01-01', '2024-01-01', '2024-01-01'
#prediction_dates=['2024-01-01','2024-02-01','2024-03-01', '2024-04-01','2024-05-01','2024-06-01','2024-07-01','2024-08-01','2024-09-01','2024-10-01','2024-11-01']

In [12]:
#currently timeperiod is set to 48 weeks
prediction_dates = pd.date_range(start='2024-01-01', 
                                 periods=48, 
                                 freq='W-MON').strftime('%Y-%m-%d').tolist()


In [13]:
dict_data = {}
etf_dict = {}
data_with_predictors = []

# Loop through tickers
for i in tickers:
    etf_history, etf_data = data_loading(i, start_date, end_date)
    print(etf_data.info.get('longName'), ":", i)

    # Preprocess ETF data
    etf_history = etf_predictors(etf_history, start_date, end_date, etf_data, benchmark_ticker='^GSPC')
    preprocessed_data = pre_processing(
        etf_history, train_start_date, train_end_date, test_start_date, test_end_date, prediction_dates
    )

    # Extract components
    train_data_scaled = preprocessed_data['train_data_scaled']
    test_data_scaled = preprocessed_data['test_data_scaled']
    y_train = preprocessed_data['y_train']
    y_test = preprocessed_data['y_test']
    scaler = preprocessed_data['scaler']
    X = preprocessed_data['X']
    etf_history = preprocessed_data['etf_history']
    
    """for key in preprocessed_data['forecast_data']:
        print(key)"""  # Check if 'forecast_data_Xw' keys are present.

    # Train the model
    best_model = model_training(train_data_scaled, y_train)

    # Evaluate the model
    mae, mase = eval_model(best_model, test_data_scaled, y_test, y_train)
    print(f"Evaluation results for {i}: MAE = {mae}, MASE = {mase}")

    # Initialize dictionaries for forecast data and predictions
    # Step 5: Initialize forecast data storage
    forecast_predictions = {}
    forecast_data_dict = {}
    forecast_data_scaled_dict = {}

    # Step 6: Fetch weekly forecast data and generate predictions
    for week in range(1, 49):  # Generate weekly forecasts for up to 52 weeks
        week_key = f'forecast_data_{week}w'
        if week_key in preprocessed_data['forecast_data']:
            forecast_data = preprocessed_data['forecast_data'][week_key]
            forecast_data_scaled = preprocessed_data['forecast_data_scaled'].get(week_key)

            if forecast_data is not None and forecast_data_scaled is not None:
                try:
                    forecast_predictions_df, _ = predictions(
                        best_model, forecast_data_scaled, forecast_data
                    )
                    # Store predictions and forecast data
                    forecast_predictions[f'forecast_predictions_df_{week}w'] = forecast_predictions_df['log_returns'].values
                    forecast_data_dict[week_key] = forecast_data
                    forecast_data_scaled_dict[week_key] = forecast_data_scaled
                except Exception as e:
                    print(f"Error generating predictions for {week_key} for ETF {i}: {e}")
            else:
                print(f"Warning: Missing scaled data for {week_key} for ETF {i}")
        else:
            print(f"Warning: Missing forecast key {week_key} for ETF {i}")

    # Step 7: Construct `dict_data` for the current ETF
    dict_data = {
        "etf_history": etf_history,
        "X": X,
        "y_train_values": y_train,
        "model results": {"mae": mae, "mase": mase},
        **forecast_data_dict,  # Add raw forecast data
        **forecast_data_scaled_dict,  # Add scaled forecast data
        **forecast_predictions  # Add predictions
    }

    # Step 8: Store in the main `etf_dict`
    etf_dict[i] = dict_data

# Print completion message
print("Weekly forecast data and predictions stored successfully.")

[*********************100%%**********************]  1 of 1 completed

VanEck Semiconductor ETF : SMH





Best Model Parameters:
{'cov_type': 'HC0', 'use_t': True, 'method': 'qr', 'tol': 1e-08, 'maxiter': 500}
Mean Absolute Error (MAE): 0.0004958027292883956
Mean Absolute Scaled Error (MASE): 0.025717292384648413
Evaluation results for SMH: MAE = 0.0004958027292883956, MASE = 0.025717292384648413


[*********************100%%**********************]  1 of 1 completed

iShares Semiconductor ETF : SOXX





Best Model Parameters:
{'cov_type': 'HC0', 'use_t': True, 'method': 'pinv', 'tol': 1e-08, 'maxiter': 500}
Mean Absolute Error (MAE): 0.00043861149568094664
Mean Absolute Scaled Error (MASE): 0.02248201783048627
Evaluation results for SOXX: MAE = 0.00043861149568094664, MASE = 0.02248201783048627


[*********************100%%**********************]  1 of 1 completed

Invesco Semiconductors ETF : PSI





Best Model Parameters:
{'cov_type': 'HC0', 'use_t': True, 'method': 'qr', 'tol': 1e-08, 'maxiter': 500}
Mean Absolute Error (MAE): 0.00027221648202699914
Mean Absolute Scaled Error (MASE): 0.01332974143771139
Evaluation results for PSI: MAE = 0.00027221648202699914, MASE = 0.01332974143771139


[*********************100%%**********************]  1 of 1 completed

SPDR S&P Semiconductor ETF : XSD





Best Model Parameters:
{'cov_type': 'HC0', 'use_t': True, 'method': 'qr', 'tol': 1e-08, 'maxiter': 500}
Mean Absolute Error (MAE): 0.0002343537637621655
Mean Absolute Scaled Error (MASE): 0.011221480284356395
Evaluation results for XSD: MAE = 0.0002343537637621655, MASE = 0.011221480284356395


[*********************100%%**********************]  1 of 1 completed

iShares U.S. Technology ETF : IYW





Best Model Parameters:
{'cov_type': 'HC0', 'use_t': True, 'method': 'qr', 'tol': 1e-08, 'maxiter': 500}
Mean Absolute Error (MAE): 0.00023121462549368981
Mean Absolute Scaled Error (MASE): 0.015512563329453498
Evaluation results for IYW: MAE = 0.00023121462549368981, MASE = 0.015512563329453498


[*********************100%%**********************]  1 of 1 completed

The Technology Select Sector SPDR Fund : XLK





Best Model Parameters:
{'cov_type': 'HC0', 'use_t': True, 'method': 'qr', 'tol': 1e-08, 'maxiter': 500}
Mean Absolute Error (MAE): 0.00018973567534454123
Mean Absolute Scaled Error (MASE): 0.013372176896293293
Evaluation results for XLK: MAE = 0.00018973567534454123, MASE = 0.013372176896293293


[*********************100%%**********************]  1 of 1 completed

Vanguard Information Technology Index Fund ETF Shares : VGT





Best Model Parameters:
{'cov_type': 'HC0', 'use_t': True, 'method': 'qr', 'tol': 1e-08, 'maxiter': 500}
Mean Absolute Error (MAE): 0.0001878578114437768
Mean Absolute Scaled Error (MASE): 0.012997347885433698
Evaluation results for VGT: MAE = 0.0001878578114437768, MASE = 0.012997347885433698


[*********************100%%**********************]  1 of 1 completed

Fidelity MSCI Information Technology Index ETF : FTEC
Best Model Parameters:
{'cov_type': 'HC0', 'use_t': True, 'method': 'qr', 'tol': 1e-08, 'maxiter': 500}
Mean Absolute Error (MAE): 0.0007518364167056398
Mean Absolute Scaled Error (MASE): 0.05199246800644897
Evaluation results for FTEC: MAE = 0.0007518364167056398, MASE = 0.05199246800644897



[*********************100%%**********************]  1 of 1 completed

iShares Expanded Tech Sector ETF : IGM





Best Model Parameters:
{'cov_type': 'HC0', 'use_t': True, 'method': 'qr', 'tol': 1e-08, 'maxiter': 500}
Mean Absolute Error (MAE): 0.00023297892638845106
Mean Absolute Scaled Error (MASE): 0.015974942917354157
Evaluation results for IGM: MAE = 0.00023297892638845106, MASE = 0.015974942917354157


[*********************100%%**********************]  1 of 1 completed

iShares Global Tech ETF : IXN





Best Model Parameters:
{'cov_type': 'HC0', 'use_t': True, 'method': 'qr', 'tol': 1e-08, 'maxiter': 500}
Mean Absolute Error (MAE): 0.00015533922183411802
Mean Absolute Scaled Error (MASE): 0.01095997929752199
Evaluation results for IXN: MAE = 0.00015533922183411802, MASE = 0.01095997929752199
Weekly forecast data and predictions stored successfully.


In [14]:
def calculate_sharpe_ratio(returns, annual_risk_free_rate=0.1,period='daily'):
    #excess_returns = rate_of_return(returns) - risk_free_rate
    
    # Convert annual risk-free rate to daily rate
    daily_risk_free_rate = (1 + annual_risk_free_rate) ** (1/252) - 1
    
    # Calculate mean daily log return
    mean_return = np.mean(returns)
    
    # Calculate excess daily log return
    excess_return = mean_return - daily_risk_free_rate
    
    # Calculate standard deviation of daily log returns
    std_return = np.std(returns)
    
    # Print diagnostic information
    #print(f"Mean Daily Log Return: {mean_return}")
    #print(f"Excess Daily Log Return: {excess_return}")
    #print(f"Standard Deviation of Daily Log Returns: {std_return}")
    
    # Check for zero standard deviation to avoid division by zero
    if std_return == 0:
        return 0
    
    # Calculate Sharpe ratio
    sharpe_ratio = (excess_return / std_return) * np.sqrt(252)  # Annualize the Sharpe ratio
    return sharpe_ratio
    #return excess_returns / np.std(returns)


def calculate_rachev_ratio(returns, lower_percentile=5, upper_percentile=95):
    # Step 1: Sort the returns
    sorted_returns = np.sort(returns)
    
    # Step 2: Determine the percentiles
    lower_threshold = np.percentile(sorted_returns, lower_percentile)
    upper_threshold = np.percentile(sorted_returns, upper_percentile)
    
    # Step 3: Calculate Expected Shortfall (ES)
    es = np.mean(sorted_returns[sorted_returns <= lower_threshold])
    
    # Step 4: Calculate Expected Gain (EG)
    eg = np.mean(sorted_returns[sorted_returns >= upper_threshold])
    
    # Step 5: Compute the Rachev Ratio
    rachev_ratio = eg / -es
    return rachev_ratio


def calculate_volatility_clustering(returns):
    squared_returns = returns ** 2
    n = len(squared_returns)
    
    # Mean of squared returns
    mean_squared_returns = np.mean(squared_returns)
    
    # Calculate the numerator and denominator for autocorrelation at lag 1
    numerator = np.sum((squared_returns[:-1] - mean_squared_returns) * (squared_returns[1:] - mean_squared_returns))
    denominator = np.sum((squared_returns - mean_squared_returns) ** 2)
    
    if denominator == 0:
        return 0  # Avoid division by zero
    
    rho_1 = numerator / denominator
    return rho_1

def calculate_sortino_ratio(log_returns, target_log_return=0.0):
    """
    Calculate the Sortino Ratio using log returns.
    
    Parameters:
    - log_returns (array-like): Array or list of log returns for the period.
    - target_log_return (float): The target log return. Default is 0, which is often used as a benchmark.
    
    Returns:
    - float: The Sortino Ratio.
    """
    # Convert input to a NumPy array for easier calculations
    log_returns = np.array(log_returns)
    
    # Calculate the average period log return (R)
    avg_log_return = np.mean(log_returns)
    
    # Calculate the Target Downside Deviation (TDD)
    downside_deviation = np.sqrt(
        np.mean(np.square(np.maximum(0, target_log_return - log_returns)))
    )
    
    # Calculate Sortino Ratio
    #sortino_ratio_value = (avg_log_return - target_log_return) / downside_deviation
    epsilon = 1e-8
    
    # Add epsilon to downside_deviation to prevent division by zero
    #if downside_deviation == 0:
     #   return np.nan  # Return NaN if downside deviation is zero
    
    # Calculate Sortino Ratio
    sortino_ratio_value = (avg_log_return - target_log_return) / (downside_deviation + epsilon)
    
    return sortino_ratio_value


In [15]:
def calculate_composite_score(
    forecasted_values, risk_percentage, rachev_ratio, sharpe_ratio, sortino_ratio, volatility_clustering, 
    mean_forecast, std_forecast, mean_rachev, std_rachev, mean_sharpe, std_sharpe, mean_sortino, std_sortino, 
    mean_volatility_clustering, std_volatility_clustering
):
    epsilon = 1e-8  # To prevent division by zero
    forecasted_mean = np.mean(forecasted_values)

    # Debugging: Log raw input values
    print(f"\nDebug: Composite Score Calculation")
    print(f"Forecasted Mean: {forecasted_mean}, Risk Percentage: {risk_percentage}")
    print(f"Rachev Ratio: {rachev_ratio}, Sharpe Ratio: {sharpe_ratio}")
    print(f"Sortino Ratio: {sortino_ratio}, Volatility Clustering: {volatility_clustering}")

    # Normalize the components with epsilon
    forecasted_mean_normalized = (forecasted_mean - mean_forecast) / (std_forecast + epsilon)
    rachev_normalized = (rachev_ratio - mean_rachev) / (std_rachev + epsilon)
    sharpe_normalized = (sharpe_ratio - mean_sharpe) / (std_sharpe + epsilon)
    sortino_normalized = (sortino_ratio - mean_sortino) / (std_sortino + epsilon)
    volatility_clustering_normalized = (volatility_clustering - mean_volatility_clustering) / (std_volatility_clustering + epsilon)

    # Debugging: Log normalized values
    print(f"Normalized Values -> Forecasted Mean: {forecasted_mean_normalized}, Rachev: {rachev_normalized}")
    print(f"Sharpe: {sharpe_normalized}, Sortino: {sortino_normalized}, Volatility Clustering: {volatility_clustering_normalized}")

    # Composite score calculation
    score = (
        forecasted_mean_normalized
        - (risk_percentage * rachev_normalized)
        + sharpe_normalized
        + sortino_normalized
        - volatility_clustering_normalized
    )

    # Debugging: Log final score
    print(f"Final Composite Score: {score}")

    return score


def process_etf_data_weekly(tickers, etf_dict):
    etf_pred_dict = {}

    # Determine the forecast periods dynamically from the etf_dict
    sample_etf = next(iter(etf_dict.values()))
    forecast_periods = [key.split('_')[-1] for key in sample_etf.keys() if key.startswith('forecast_predictions_df')]

    for etf_name in tickers:
        etf_pred_dict[etf_name] = {
            f"returns_{period}": etf_dict[etf_name][f"forecast_predictions_df_{period}"]
            for period in forecast_periods
        }

        for period in forecast_periods:
            returns = etf_pred_dict[etf_name][f"returns_{period}"]

            # Log returns for debugging
            print(f"\nDebug: Returns for {etf_name}, Forecast Period {period}: {returns}")

            # Calculate metrics
            etf_pred_dict[etf_name][f"rachev_ratio_{period}"] = calculate_rachev_ratio(returns)
            etf_pred_dict[etf_name][f"sharpe_ratio_{period}"] = calculate_sharpe_ratio(returns)
            etf_pred_dict[etf_name][f"sortino_ratio_{period}"] = calculate_sortino_ratio(returns)
            etf_pred_dict[etf_name][f"volatility_clustering_{period}"] = calculate_volatility_clustering(returns)

    return etf_pred_dict



def calculate_means_and_stds_weekly(etf_pred_dict, forecast_period):
    returns_list = [etf_pred_dict[etf][f'returns_{forecast_period}'] for etf in etf_pred_dict]

    # Compute global means and standard deviations
    mean_forecast = np.mean([np.mean(returns) for returns in returns_list])
    std_forecast = np.std([np.mean(returns) for returns in returns_list])

    print(f"\nDebug: Forecast Period = {forecast_period}")
    print(f"All Returns Means: {[np.mean(returns) for returns in returns_list]}")
    print(f"Mean Forecast = {mean_forecast}, Std Forecast = {std_forecast}")

    rachev_ratios = np.array([etf_pred_dict[etf][f'rachev_ratio_{forecast_period}'] for etf in etf_pred_dict])
    print(f"All Rachev Ratios: {rachev_ratios}")
    mean_rachev = np.mean(rachev_ratios)
    std_rachev = np.std(rachev_ratios)

    sharpe_ratios = np.array([etf_pred_dict[etf][f'sharpe_ratio_{forecast_period}'] for etf in etf_pred_dict])
    print(f"All Sharpe Ratios: {sharpe_ratios}")
    mean_sharpe = np.mean(sharpe_ratios)
    std_sharpe = np.std(sharpe_ratios)

    sortino_ratios = np.array([etf_pred_dict[etf][f'sortino_ratio_{forecast_period}'] for etf in etf_pred_dict])
    print(f"All Sortino Ratios: {sortino_ratios}")
    mean_sortino = np.mean(sortino_ratios)
    std_sortino = np.std(sortino_ratios)

    volatility_clustering = np.array([etf_pred_dict[etf][f'volatility_clustering_{forecast_period}'] for etf in etf_pred_dict])
    print(f"All Volatility Clustering: {volatility_clustering}")
    mean_volatility_clustering = np.mean(volatility_clustering)
    std_volatility_clustering = np.std(volatility_clustering)

    print(f"Mean Rachev = {mean_rachev}, Std Rachev = {std_rachev}")
    print(f"Mean Sharpe = {mean_sharpe}, Std Sharpe = {std_sharpe}")
    print(f"Mean Sortino = {mean_sortino}, Std Sortino = {std_sortino}")
    print(f"Mean Volatility Clustering = {mean_volatility_clustering}, Std Volatility Clustering = {std_volatility_clustering}")

    return (
        mean_forecast, std_forecast, mean_rachev, std_rachev,
        mean_sharpe, std_sharpe, mean_sortino, std_sortino,
        mean_volatility_clustering, std_volatility_clustering
    )



def calculate_scores_for_etfs_weekly(etf_pred_dict, forecast_period, risk_percentage):
    (mean_forecast, std_forecast, mean_rachev, std_rachev, 
     mean_sharpe, std_sharpe, mean_sortino, std_sortino, 
     mean_volatility_clustering, std_volatility_clustering) = calculate_means_and_stds_weekly(etf_pred_dict, forecast_period)

    scores = []
    for etf in etf_pred_dict:
        forecasted_values = etf_pred_dict[etf][f'returns_{forecast_period}']
        rachev_ratio = etf_pred_dict[etf][f'rachev_ratio_{forecast_period}']
        sharpe_ratio = etf_pred_dict[etf][f'sharpe_ratio_{forecast_period}']
        volatility_clustering = etf_pred_dict[etf][f'volatility_clustering_{forecast_period}']
        sortino_ratio = etf_pred_dict[etf][f'sortino_ratio_{forecast_period}']

        # Debugging: Log inputs to composite score calculation
        print(f"\nDebug: ETF = {etf}, Forecast Period = {forecast_period}")
        print(f"Forecasted Values Mean: {np.mean(forecasted_values)}")
        print(f"Rachev Ratio: {rachev_ratio}, Sharpe Ratio: {sharpe_ratio}")
        print(f"Sortino Ratio: {sortino_ratio}, Volatility Clustering: {volatility_clustering}")
        print(f"Means and Stds: Mean Forecast = {mean_forecast}, Std Forecast = {std_forecast}")
        
        # Calculate the composite score
        score = calculate_composite_score(
            forecasted_values, risk_percentage, rachev_ratio, sharpe_ratio, 
            sortino_ratio, volatility_clustering, mean_forecast, std_forecast, 
            mean_rachev, std_rachev, mean_sharpe, std_sharpe, mean_sortino, 
            std_sortino, mean_volatility_clustering, std_volatility_clustering
        )

        scores.append({
            'ETF': etf,
            'Week': forecast_period,
            'RiskPercentage': risk_percentage,
            'Score': score
        })

        # Debugging: Log the calculated score
        print(f"Calculated Score for {etf} ({forecast_period}): {score}")

    return scores



def main_weekly(tickers, etf_dict):
    etf_pred_dict = process_etf_data_weekly(tickers, etf_dict)
    
    risk_percentage = 0.10
    weekly_scores = {}

    # Iterate over 48 weeks
    for week in range(1, 49):
        week_key = f"{week}w"
        if any(f"returns_{week_key}" in etf_pred_dict[etf] for etf in etf_pred_dict):
            scores = calculate_scores_for_etfs_weekly(etf_pred_dict, week_key, risk_percentage)
            weekly_scores[week_key] = scores
            print(f"Scores calculated for {week_key}:")
            for score in scores:
                print(score)
        else:
            print(f"Skipping score calculation for {week_key}: No ETFs have data for this week.")

    return etf_pred_dict, weekly_scores


# Example usage:
# Execute weekly scoring
etf_pred_dict, weekly_scores = main_weekly(tickers, etf_dict)



Debug: Returns for SMH, Forecast Period 1w: [-0.03475353 -0.0190482  -0.0071898   0.00528752]

Debug: Returns for SMH, Forecast Period 2w: [ 0.03416537  0.00151172 -0.00033629  0.00427034 -0.00496809]

Debug: Returns for SMH, Forecast Period 3w: [ 0.01433271 -0.00766895  0.03076492  0.03667688]

Debug: Returns for SMH, Forecast Period 4w: [ 0.00348621  0.00429403  0.01685095 -0.00086758 -0.02236661]

Debug: Returns for SMH, Forecast Period 5w: [ 0.01060041 -0.01112131 -0.01565295  0.00825322  0.0188383 ]

Debug: Returns for SMH, Forecast Period 6w: [ 0.01905304 -0.01181881  0.01939125  0.01357168  0.02159133]

Debug: Returns for SMH, Forecast Period 7w: [-0.00440405 -0.01907772  0.01932874 -0.0039811  -0.00517776]

Debug: Returns for SMH, Forecast Period 8w: [-0.02093725 -0.00787031  0.06679589 -0.00756042]

Debug: Returns for SMH, Forecast Period 9w: [ 0.00700177 -0.00401006 -0.01240139  0.02075349  0.03982313]

Debug: Returns for SMH, Forecast Period 10w: [ 0.01556183 -0.01599483  0

In [16]:
# Initialize an empty dictionary to hold DataFrames for each week
weekly_scores_dfs = {}

# Assuming weekly_scores is a dictionary with keys as week identifiers ('1w', '2w', ..., '48w') 
# and values as the respective scores
for week_key, scores in weekly_scores.items():
    # Convert scores for the current week into a DataFrame
    weekly_scores_dfs[week_key] = pd.DataFrame(scores)

    
# Create DataFrame variables dynamically for 48 weeks
for week in range(1, 49):
    week_key = f'{week}w'
    globals()[f'df_scores_{week_key}'] = weekly_scores_dfs.get(week_key, pd.DataFrame())




In [17]:
# Initialize a dictionary to hold the top 2 ETFs for each week
top_etfs_weekly = {}

# Loop through the weekly DataFrames and select the top 2 ETFs for each week
for week_key, df_scores in weekly_scores_dfs.items():
    # Select the top 2 ETFs based on the 'Score' column
    top_etfs_weekly[week_key] = df_scores.nlargest(2, 'Score')
    
    # Print the results for the current week
    print(f"\nTop 2 ETFs for {week_key} forecast:")
    print(top_etfs_weekly[week_key])



Top 2 ETFs for 1w forecast:
    ETF Week  RiskPercentage     Score
7  FTEC   1w             0.1  4.493329
8   IGM   1w             0.1  3.396828

Top 2 ETFs for 2w forecast:
    ETF Week  RiskPercentage     Score
7  FTEC   2w             0.1  5.327491
8   IGM   2w             0.1  3.465493

Top 2 ETFs for 3w forecast:
    ETF Week  RiskPercentage     Score
0   SMH   3w             0.1  5.433687
7  FTEC   3w             0.1  2.606728

Top 2 ETFs for 4w forecast:
    ETF Week  RiskPercentage     Score
7  FTEC   4w             0.1  5.189820
8   IGM   4w             0.1  4.603237

Top 2 ETFs for 5w forecast:
    ETF Week  RiskPercentage     Score
8   IGM   5w             0.1  4.461813
7  FTEC   5w             0.1  3.290580

Top 2 ETFs for 6w forecast:
    ETF Week  RiskPercentage     Score
7  FTEC   6w             0.1  3.428748
0   SMH   6w             0.1  2.617966

Top 2 ETFs for 7w forecast:
   ETF Week  RiskPercentage     Score
2  PSI   7w             0.1  5.849144
3  XSD   7w        

In [18]:
def select_top_etfs_weekly(df_scores, forecast_period):
    if df_scores.empty:
        print(f"No scores available for {forecast_period}. Skipping.")
        return []
    print(f"Processing scores for {forecast_period}:")
    print(df_scores.head())  # Check the top rows of the DataFrame
    top_etfs = df_scores.nlargest(2, 'Score')
    print(f"Top ETFs for {forecast_period}: {top_etfs['ETF'].tolist()}")
    return top_etfs['ETF'].tolist()



In [19]:
# Function to generate week ranges
def generate_week_ranges(start_date, end_date):
    start = datetime.strptime(start_date, '%Y-%m-%d')
    end = datetime.strptime(end_date, '%Y-%m-%d')
    week_ranges = []
    
    while start < end:
        week_start = start
        week_end = start + timedelta(days=6)
        if week_end > end:
            week_end = end
        week_ranges.append((week_start.strftime('%Y-%m-%d'), week_end.strftime('%Y-%m-%d')))
        start += timedelta(days=7)
    
    return week_ranges

# Function to gather ETF data for weeks
def gather_etf_data_for_weeks(tickers, week_ranges):
    etf_histories = {}
    for start_date, end_date in week_ranges:
        week = f"{start_date} to {end_date}"
        etf_histories[week] = {}
        for ticker in tickers:
            etf_data = yf.download(ticker, start=start_date, end=end_date)
            if etf_data.empty:
                print(f"No data found for {ticker} in {week}")
                continue
            etf_data.index = pd.to_datetime(etf_data.index)
            etf_histories[week][ticker] = etf_data
            #print(f"Data for {ticker} in {week} gathered.")
    return etf_histories

# Function to initialize shares for the first week
def initialize_shares_for_first_week(top_etfs_1w, etf_histories, week, investment_amount=50000):
    ticker_shares = {}
    first_trading_day_start = week.split(" to ")[0]
    
    for ticker in top_etfs_1w:
        etf_history = etf_histories.get(week, {}).get(ticker)
        
        if etf_history is not None:
            first_trading_day = pd.to_datetime(first_trading_day_start) + BDay(1)
            if first_trading_day not in etf_history.index:
                first_trading_day = etf_history.index[etf_history.index.searchsorted(first_trading_day)]
            
            price_on_first_trading_day = etf_history.loc[first_trading_day, 'Close']
            num_shares = (investment_amount * 0.975) / price_on_first_trading_day
            print(f"Shares 1st week: ({investment_amount} * 0.975) / {price_on_first_trading_day}")
            ticker_shares[ticker] = num_shares
            print(f"Ticker: {ticker}, First trading day: {first_trading_day.date()}, Price: {price_on_first_trading_day}, Shares: {num_shares:.2f}")
        else:
            print(f"No data found for {ticker} in {week}")
    return ticker_shares

# Function to manage ETF portfolio weekly
def manage_etf_portfolio_weekly(
    top_etfs_previous, top_etfs_current, previous_week, current_week, ticker_shares, gathered_data_per_week
):
    etf_histories_for_current_week = gathered_data_per_week.get(current_week, {})
    top2etfs_previous = list(top_etfs_previous)
    top2etfs_current = list(top_etfs_current)

    print(f"Top 2 ETFs for {previous_week}: {top2etfs_previous}")
    print(f"Top 2 ETFs for {current_week}: {top2etfs_current}")

    etfs_to_sell = [etf for etf in top2etfs_previous if etf not in top2etfs_current]
    etfs_to_buy = [etf for etf in top2etfs_current if etf not in top2etfs_previous]

    # Ensure one-to-one mapping between sell and buy ETFs
    if len(etfs_to_sell) != len(etfs_to_buy):
        print("Mismatch between ETFs to sell and buy. Adjusting allocation...")
        return ticker_shares  # Abort if mismatched for now, you can implement custom logic

    # Allocate funds ETF-by-ETF
    for etf_sell, etf_buy in zip(etfs_to_sell, etfs_to_buy):
        no_of_shares = ticker_shares.get(etf_sell, 0)
        if no_of_shares > 0:
            # Selling old ETF
            if etf_sell in etf_histories_for_current_week:
                first_trading_day_sell_price = etf_histories_for_current_week[etf_sell].loc[
                    etf_histories_for_current_week[etf_sell].index[0], 'Close'
                ]
                selling_value = no_of_shares * first_trading_day_sell_price * 0.975
                print(f"Sell {etf_sell}: {no_of_shares:.2f} shares at {first_trading_day_sell_price:.2f}. Total value: {selling_value:.2f}")

                # Remove sold ETF from portfolio
                del ticker_shares[etf_sell]

                # Buying new ETF
                if etf_buy in etf_histories_for_current_week:
                    first_trading_day_buy_price = etf_histories_for_current_week[etf_buy].loc[
                        etf_histories_for_current_week[etf_buy].index[0], 'Close'
                    ]
                    new_shares = (selling_value * 0.975) / first_trading_day_buy_price
                    print(f"Buy {etf_buy}: {new_shares:.2f} shares at {first_trading_day_buy_price:.2f}.")
                    ticker_shares[etf_buy] = new_shares
                else:
                    print(f"Data for {etf_buy} is missing for {current_week}. Skipping purchase.")
            else:
                print(f"Data for {etf_sell} is missing for {current_week}. Skipping sale.")
        else:
            print(f"No shares found for {etf_sell} to sell.")

    print(f"Updated ticker shares after {current_week}: {ticker_shares}")
    return ticker_shares


In [20]:
# Generate Week Ranges
week_ranges = generate_week_ranges('2024-01-01', '2024-12-01')
#print("Generated Week Ranges:", week_ranges)

# Assuming `tickers` is a list of ETF tickers
tickers = tickers

# Gather ETF Data
etf_histories = gather_etf_data_for_weeks(tickers, week_ranges)
#print(f"ETF Histories Collected for {len(etf_histories)} weeks")

# Create a mapping between week numbers and date ranges
week_key_mapping = {f"{i+1}w": week_range for i, week_range in enumerate(etf_histories.keys())}

# Debug: Print the week key mapping
print("Week Key Mapping:", week_key_mapping)

# Align `top_etfs_weekly` Keys with `etf_histories`
aligned_top_etfs_weekly = {}

for week_key, df_scores in weekly_scores_dfs.items():
    # Call the `select_top_etfs_weekly` function here
    forecast_period = week_key_mapping.get(week_key, None)
    if forecast_period:
        aligned_top_etfs_weekly[forecast_period] = select_top_etfs_weekly(df_scores, forecast_period)

# Portfolio initialization and management
ticker_shares = {}
ticker_shares_per_week = {}

for i, week_range in enumerate(etf_histories.keys()):
    current_week_key = week_range
    if i == 0:
        # First week initialization
        ticker_shares = initialize_shares_for_first_week(
            aligned_top_etfs_weekly[current_week_key],
            etf_histories,
            current_week_key
        )
        ticker_shares_per_week[current_week_key] = ticker_shares.copy()
    else:
        # Subsequent weeks' portfolio management
        prev_week_key = list(etf_histories.keys())[i - 1]
        ticker_shares = manage_etf_portfolio_weekly(
            aligned_top_etfs_weekly[prev_week_key],
            aligned_top_etfs_weekly[current_week_key],
            prev_week_key,
            current_week_key,
            ticker_shares,
            etf_histories
        )
        ticker_shares_per_week[current_week_key] = ticker_shares.copy()

print("\nFinal Ticker Shares per Week:")
for week, shares in ticker_shares_per_week.items():
    print(f"Week {week}: {shares}")

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Week Key Mapping: {'1w': '2024-01-01 to 2024-01-07', '2w': '2024-01-08 to 2024-01-14', '3w': '2024-01-15 to 2024-01-21', '4w': '2024-01-22 to 2024-01-28', '5w': '2024-01-29 to 2024-02-04', '6w': '2024-02-05 to 2024-02-11', '7w': '2024-02-12 to 2024-02-18', '8w': '2024-02-19 to 2024-02-25', '9w': '2024-02-26 to 2024-03-03', '10w': '2024-03-04 to 2024-03-10', '11w': '2024-03-11 to 2024-03-17', '12w': '2024-03-18 to 2024-03-24', '13w': '2024-03-25 to 2024-03-31', '14w': '2024-04-01 to 2024-04-07', '15w': '2024-04-08 to 2024-04-14', '16w': '2024-04-15 to 2024-04-21', '17w': '2024-04-22 to 2024-04-28', '18w': '2024-04-29 to 2024-05-05', '19w': '2024-05-06 to 2024-05-12', '20w': '2024-05-13 to 2024-05-19', '21w': '2024-05-20 to 2024-05-26', '22w': '2024-05-27 to 2024-06-02', '23w': '2024-06-03 to 2024-06-09', '24w': '2024-06-10 to 2024-06-16', '25w': '2024-06-17 to 2024-06-23', '26w': '2024-06-24 to 2024-06-30', '27w': '2024-07-01 to 2024-07-07', '28w': '2024-07-08 to 2024-07-14', '29w': '20




In [21]:
# Define the first trading day of the 48th week
first_trading_day_49w = '2024-12-01'  # Adjust this to match the actual start date of the 48th week

# Identify the 48th and 49th week date range keys
week_48_range = list(ticker_shares_per_week.keys())[-1]  # Last key corresponds to the 48th week
week_49_start = first_trading_day_49w  # Replace with the actual start of the 49th week
print(f"Using data for the 48th week: {week_48_range}")

print(f"Fetching data starting from the first trading day of the 49th week: {week_49_start}")

week_49_end = '2024-12-06'
# Initialize a dictionary to store the values of shares
etf_values_49w = {}

# Ensure 48th week data exists
if week_48_range in ticker_shares_per_week:
    # Fetch ETF shares from the 48th week
    ticker_shares_48w = ticker_shares_per_week[week_48_range]
    
    # Fetch the first trading day price of the 49th week for each ETF
    for ticker, shares in ticker_shares_48w.items():
        print(f"Fetching data for ticker {ticker} starting from {week_49_start}...")
        # Download historical data for the 49th week
        data = yf.download(ticker, start=week_49_start, end=week_49_end)
        
        if not data.empty:
            # Get the closing price of the first trading day of the 49th week
            closing_price_49w = data['Close'].iloc[0]
            # Calculate the value of the shares
            total_value = shares * closing_price_49w
            etf_values_49w[ticker] = total_value
            print(f"{ticker}: {shares:.2f} shares at ${closing_price_49w:.2f} each, total value: ${total_value:.2f}")
        else:
            print(f"{ticker}: No data available for the 49th week's first trading day.")
else:
    print(f"No data available in ticker_shares_per_week for the 48th week: {week_48_range}")



[*********************100%%**********************]  1 of 1 completed


Using data for the 48th week: 2024-11-25 to 2024-12-01
Fetching data starting from the first trading day of the 49th week: 2024-12-01
Fetching data for ticker FTEC starting from 2024-12-01...
FTEC: 214.73 shares at $186.99 each, total value: $40152.16
Fetching data for ticker XSD starting from 2024-12-01...


[*********************100%%**********************]  1 of 1 completed

XSD: 51.61 shares at $252.68 each, total value: $13040.85





In [22]:
# Check if there are any values in the dictionary
if etf_values_49w:
    print("\nETF values on the 49th week's first trading day:")
    # Sum all the values (total portfolio value)
    total_value = sum(etf_values_49w.values())
    print(f"Total portfolio value: {total_value:.2f}")
    # Optionally display individual ETF values
    for ticker, value in etf_values_49w.items():
        print(f"{ticker}: {value:.2f}")
else:
    print("No values could be calculated for the 49th week's first trading day.")



ETF values on the 49th week's first trading day:
Total portfolio value: 53193.01
FTEC: 40152.16
XSD: 13040.85


### WIth Smoothing

In [23]:
def calculate_composite_score(
    forecasted_values, risk_percentage, rachev_ratio, sharpe_ratio, sortino_ratio, 
    volatility_clustering, mean_forecast, std_forecast, mean_rachev, std_rachev, 
    mean_sharpe, std_sharpe, mean_sortino, std_sortino, mean_volatility_clustering, 
    std_volatility_clustering
):
    epsilon = 1e-8  # To prevent division by zero
    forecasted_mean = np.mean(forecasted_values)

    forecasted_mean_normalized = (forecasted_mean - mean_forecast) / (std_forecast + epsilon)
    rachev_normalized = (rachev_ratio - mean_rachev) / (std_rachev + epsilon)
    sharpe_normalized = (sharpe_ratio - mean_sharpe) / (std_sharpe + epsilon)
    sortino_normalized = (sortino_ratio - mean_sortino) / (std_sortino + epsilon)
    volatility_clustering_normalized = (volatility_clustering - mean_volatility_clustering) / (std_volatility_clustering + epsilon)

    # Composite score calculation
    score = (
        forecasted_mean_normalized
        - (risk_percentage * rachev_normalized)
        + sharpe_normalized
        + sortino_normalized
        - volatility_clustering_normalized
    )

    return score

def smooth_scores(scores, alpha=0.2):
    """
    Smooth scores using Exponential Moving Average (EMA).
    """
    if not isinstance(scores, (list, np.ndarray)):
        raise ValueError("Scores must be a list or numpy array of numerical values.")
    
    scores = np.array(scores)
    smoothed_scores = np.zeros_like(scores)
    smoothed_scores[0] = scores[0]  # Initialize EMA
    
    for t in range(1, len(scores)):
        smoothed_scores[t] = alpha * scores[t] + (1 - alpha) * smoothed_scores[t - 1]
    
    return smoothed_scores.tolist()

def calculate_means_and_stds_weekly(etf_pred_dict, forecast_period):
    returns_list = [etf_pred_dict[etf][f'returns_{forecast_period}'] for etf in etf_pred_dict]
    mean_forecast = np.mean([np.mean(returns) for returns in returns_list])
    std_forecast = np.std([np.mean(returns) for returns in returns_list])

    rachev_ratios = np.array([etf_pred_dict[etf][f'rachev_ratio_{forecast_period}'] for etf in etf_pred_dict])
    mean_rachev = np.mean(rachev_ratios)
    std_rachev = np.std(rachev_ratios)

    sharpe_ratios = np.array([etf_pred_dict[etf][f'sharpe_ratio_{forecast_period}'] for etf in etf_pred_dict])
    mean_sharpe = np.mean(sharpe_ratios)
    std_sharpe = np.std(sharpe_ratios)

    sortino_ratios = np.array([etf_pred_dict[etf][f'sortino_ratio_{forecast_period}'] for etf in etf_pred_dict])
    mean_sortino = np.mean(sortino_ratios)
    std_sortino = np.std(sortino_ratios)

    volatility_clustering = np.array([etf_pred_dict[etf][f'volatility_clustering_{forecast_period}'] for etf in etf_pred_dict])
    mean_volatility_clustering = np.mean(volatility_clustering)
    std_volatility_clustering = np.std(volatility_clustering)

    return (
        mean_forecast, std_forecast, mean_rachev, std_rachev,
        mean_sharpe, std_sharpe, mean_sortino, std_sortino,
        mean_volatility_clustering, std_volatility_clustering
    )

def calculate_scores_for_etfs_weekly(etf_pred_dict, forecast_period, risk_percentage, alpha=0.2):
    (mean_forecast, std_forecast, mean_rachev, std_rachev, 
     mean_sharpe, std_sharpe, mean_sortino, std_sortino, 
     mean_volatility_clustering, std_volatility_clustering) = calculate_means_and_stds_weekly(etf_pred_dict, forecast_period)

    raw_scores = []
    for etf in etf_pred_dict:
        forecasted_values = etf_pred_dict[etf][f'returns_{forecast_period}']
        rachev_ratio = etf_pred_dict[etf][f'rachev_ratio_{forecast_period}']
        sharpe_ratio = etf_pred_dict[etf][f'sharpe_ratio_{forecast_period}']
        volatility_clustering = etf_pred_dict[etf][f'volatility_clustering_{forecast_period}']
        sortino_ratio = etf_pred_dict[etf][f'sortino_ratio_{forecast_period}']

        score = calculate_composite_score(
            forecasted_values, risk_percentage, rachev_ratio, sharpe_ratio, 
            sortino_ratio, volatility_clustering, mean_forecast, std_forecast, 
            mean_rachev, std_rachev, mean_sharpe, std_sharpe, mean_sortino, 
            std_sortino, mean_volatility_clustering, std_volatility_clustering
        )

        raw_scores.append(score)

    # Apply smoothing to the raw scores
    smoothed_scores = smooth_scores(raw_scores, alpha=alpha)

    results = []
    for idx, etf in enumerate(etf_pred_dict):
        results.append({
            'ETF': etf,
            'Week': forecast_period,
            'RiskPercentage': risk_percentage,
            'RawScore': raw_scores[idx],
            'SmoothedScore': smoothed_scores[idx]
        })

    return results

def main_weekly(tickers, etf_dict):
    etf_pred_dict = process_etf_data_weekly(tickers, etf_dict)
    risk_percentage = 0.10
    weekly_scores = {}

    for week in range(1, 49):
        week_key = f"{week}w"
        if any(f"returns_{week_key}" in etf_pred_dict[etf] for etf in etf_pred_dict):
            scores = calculate_scores_for_etfs_weekly(etf_pred_dict, week_key, risk_percentage)
            weekly_scores[week_key] = scores
        else:
            print(f"Skipping score calculation for {week_key}: No ETFs have data for this week.")

    return etf_pred_dict, weekly_scores

# Select top ETFs for weekly portfolio management
def select_top_etfs_weekly(df_scores, forecast_period):
    if df_scores.empty:
        print(f"No scores available for {forecast_period}. Skipping.")
        return []
    top_etfs = df_scores.nlargest(2, 'SmoothedScore')
    return top_etfs['ETF'].tolist()


In [24]:
etf_pred_dict, weekly_scores = main_weekly(tickers, etf_dict)


Debug: Returns for SMH, Forecast Period 1w: [-0.03475353 -0.0190482  -0.0071898   0.00528752]

Debug: Returns for SMH, Forecast Period 2w: [ 0.03416537  0.00151172 -0.00033629  0.00427034 -0.00496809]

Debug: Returns for SMH, Forecast Period 3w: [ 0.01433271 -0.00766895  0.03076492  0.03667688]

Debug: Returns for SMH, Forecast Period 4w: [ 0.00348621  0.00429403  0.01685095 -0.00086758 -0.02236661]

Debug: Returns for SMH, Forecast Period 5w: [ 0.01060041 -0.01112131 -0.01565295  0.00825322  0.0188383 ]

Debug: Returns for SMH, Forecast Period 6w: [ 0.01905304 -0.01181881  0.01939125  0.01357168  0.02159133]

Debug: Returns for SMH, Forecast Period 7w: [-0.00440405 -0.01907772  0.01932874 -0.0039811  -0.00517776]

Debug: Returns for SMH, Forecast Period 8w: [-0.02093725 -0.00787031  0.06679589 -0.00756042]

Debug: Returns for SMH, Forecast Period 9w: [ 0.00700177 -0.00401006 -0.01240139  0.02075349  0.03982313]

Debug: Returns for SMH, Forecast Period 10w: [ 0.01556183 -0.01599483  0

In [25]:
# Initialize an empty dictionary to hold DataFrames for each week
weekly_scores_dfs = {}

# Assuming weekly_scores is a dictionary with keys as week identifiers ('1w', '2w', ..., '48w') 
# and values as the respective scores
for week_key, scores in weekly_scores.items():
    # Convert scores for the current week into a DataFrame
    weekly_scores_dfs[week_key] = pd.DataFrame(scores)

    
# Create DataFrame variables dynamically for 48 weeks
for week in range(1, 49):
    week_key = f'{week}w'
    globals()[f'df_scores_{week_key}'] = weekly_scores_dfs.get(week_key, pd.DataFrame())




In [26]:
# Initialize a dictionary to hold the top 2 ETFs for each week
top_etfs_weekly = {}

# Loop through the weekly DataFrames and select the top 2 ETFs for each week
for week_key, df_scores in weekly_scores_dfs.items():
    # Select the top 2 ETFs based on the 'Score' column
    top_etfs_weekly[week_key] = df_scores.nlargest(2, 'SmoothedScore')
    
    # Print the results for the current week
    print(f"\nTop 2 ETFs for {week_key} forecast:")
    print(top_etfs_weekly[week_key])



Top 2 ETFs for 1w forecast:
   ETF Week  RiskPercentage  RawScore  SmoothedScore
0  SMH   1w             0.1  1.646795       1.646795
9  IXN   1w             0.1  1.394608       1.175217

Top 2 ETFs for 2w forecast:
   ETF Week  RiskPercentage  RawScore  SmoothedScore
8  IGM   2w             0.1  3.465493       1.051561
9  IXN   2w             0.1  0.479823       0.937214

Top 2 ETFs for 3w forecast:
    ETF Week  RiskPercentage  RawScore  SmoothedScore
0   SMH   3w             0.1  5.433687       5.433687
1  SOXX   3w             0.1  1.873097       4.721569

Top 2 ETFs for 4w forecast:
   ETF Week  RiskPercentage  RawScore  SmoothedScore
9  IXN   4w             0.1  2.694837       1.451195
8  IGM   4w             0.1  4.603237       1.140285

Top 2 ETFs for 5w forecast:
    ETF Week  RiskPercentage  RawScore  SmoothedScore
0   SMH   5w             0.1  3.185559       3.185559
1  SOXX   5w             0.1 -3.127433       1.922961

Top 2 ETFs for 6w forecast:
    ETF Week  RiskPercent

In [27]:
# Function to generate week ranges
def generate_week_ranges(start_date, end_date):
    start = datetime.strptime(start_date, '%Y-%m-%d')
    end = datetime.strptime(end_date, '%Y-%m-%d')
    week_ranges = []
    
    while start < end:
        week_start = start
        week_end = start + timedelta(days=6)
        if week_end > end:
            week_end = end
        week_ranges.append((week_start.strftime('%Y-%m-%d'), week_end.strftime('%Y-%m-%d')))
        start += timedelta(days=7)
    
    return week_ranges

# Function to gather ETF data for weeks
def gather_etf_data_for_weeks(tickers, week_ranges):
    etf_histories = {}
    for start_date, end_date in week_ranges:
        week = f"{start_date} to {end_date}"
        etf_histories[week] = {}
        for ticker in tickers:
            etf_data = yf.download(ticker, start=start_date, end=end_date)
            if etf_data.empty:
                print(f"No data found for {ticker} in {week}")
                continue
            etf_data.index = pd.to_datetime(etf_data.index)
            etf_histories[week][ticker] = etf_data
            #print(f"Data for {ticker} in {week} gathered.")
    return etf_histories

# Function to initialize shares for the first week
def initialize_shares_for_first_week(top_etfs_1w, etf_histories, week, investment_amount=50000):
    ticker_shares = {}
    first_trading_day_start = week.split(" to ")[0]
    
    for ticker in top_etfs_1w:
        etf_history = etf_histories.get(week, {}).get(ticker)
        
        if etf_history is not None:
            first_trading_day = pd.to_datetime(first_trading_day_start) + BDay(1)
            if first_trading_day not in etf_history.index:
                first_trading_day = etf_history.index[etf_history.index.searchsorted(first_trading_day)]
            
            price_on_first_trading_day = etf_history.loc[first_trading_day, 'Close']
            num_shares = (investment_amount * 0.975) / price_on_first_trading_day
            print(f"Shares 1st week: ({investment_amount} * 0.975) / {price_on_first_trading_day}")
            ticker_shares[ticker] = num_shares
            print(f"Ticker: {ticker}, First trading day: {first_trading_day.date()}, Price: {price_on_first_trading_day}, Shares: {num_shares:.2f}")
        else:
            print(f"No data found for {ticker} in {week}")
    return ticker_shares

# Function to manage ETF portfolio weekly
def manage_etf_portfolio_weekly(
    top_etfs_previous, top_etfs_current, previous_week, current_week, ticker_shares, gathered_data_per_week
):
    etf_histories_for_current_week = gathered_data_per_week.get(current_week, {})
    top2etfs_previous = list(top_etfs_previous)
    top2etfs_current = list(top_etfs_current)

    print(f"Top 2 ETFs for {previous_week}: {top2etfs_previous}")
    print(f"Top 2 ETFs for {current_week}: {top2etfs_current}")

    etfs_to_sell = [etf for etf in top2etfs_previous if etf not in top2etfs_current]
    etfs_to_buy = [etf for etf in top2etfs_current if etf not in top2etfs_previous]

    # Ensure one-to-one mapping between sell and buy ETFs
    if len(etfs_to_sell) != len(etfs_to_buy):
        print("Mismatch between ETFs to sell and buy. Adjusting allocation...")
        return ticker_shares  # Abort if mismatched for now, you can implement custom logic

    # Allocate funds ETF-by-ETF
    for etf_sell, etf_buy in zip(etfs_to_sell, etfs_to_buy):
        no_of_shares = ticker_shares.get(etf_sell, 0)
        if no_of_shares > 0:
            # Selling old ETF
            if etf_sell in etf_histories_for_current_week:
                first_trading_day_sell_price = etf_histories_for_current_week[etf_sell].loc[
                    etf_histories_for_current_week[etf_sell].index[0], 'Close'
                ]
                selling_value = no_of_shares * first_trading_day_sell_price * 0.975
                print(f"Sell {etf_sell}: {no_of_shares:.2f} shares at {first_trading_day_sell_price:.2f}. Total value: {selling_value:.2f}")

                # Remove sold ETF from portfolio
                del ticker_shares[etf_sell]

                # Buying new ETF
                if etf_buy in etf_histories_for_current_week:
                    first_trading_day_buy_price = etf_histories_for_current_week[etf_buy].loc[
                        etf_histories_for_current_week[etf_buy].index[0], 'Close'
                    ]
                    new_shares = (selling_value * 0.975) / first_trading_day_buy_price
                    print(f"Buy {etf_buy}: {new_shares:.2f} shares at {first_trading_day_buy_price:.2f}.")
                    ticker_shares[etf_buy] = new_shares
                else:
                    print(f"Data for {etf_buy} is missing for {current_week}. Skipping purchase.")
            else:
                print(f"Data for {etf_sell} is missing for {current_week}. Skipping sale.")
        else:
            print(f"No shares found for {etf_sell} to sell.")

    print(f"Updated ticker shares after {current_week}: {ticker_shares}")
    return ticker_shares


In [28]:
# Generate Week Ranges
week_ranges = generate_week_ranges('2024-01-01', '2024-12-01')
#print("Generated Week Ranges:", week_ranges)

# Assuming `tickers` is a list of ETF tickers
tickers = tickers

# Gather ETF Data
etf_histories = gather_etf_data_for_weeks(tickers, week_ranges)
#print(f"ETF Histories Collected for {len(etf_histories)} weeks")

# Create a mapping between week numbers and date ranges
week_key_mapping = {f"{i+1}w": week_range for i, week_range in enumerate(etf_histories.keys())}

# Debug: Print the week key mapping
print("Week Key Mapping:", week_key_mapping)

# Align `top_etfs_weekly` Keys with `etf_histories`
aligned_top_etfs_weekly = {}

for week_key, df_scores in weekly_scores_dfs.items():
    # Call the `select_top_etfs_weekly` function here
    forecast_period = week_key_mapping.get(week_key, None)
    if forecast_period:
        aligned_top_etfs_weekly[forecast_period] = select_top_etfs_weekly(df_scores, forecast_period)

# Portfolio initialization and management
ticker_shares = {}
ticker_shares_per_week = {}

for i, week_range in enumerate(etf_histories.keys()):
    current_week_key = week_range
    if i == 0:
        # First week initialization
        ticker_shares = initialize_shares_for_first_week(
            aligned_top_etfs_weekly[current_week_key],
            etf_histories,
            current_week_key
        )
        ticker_shares_per_week[current_week_key] = ticker_shares.copy()
    else:
        # Subsequent weeks' portfolio management
        prev_week_key = list(etf_histories.keys())[i - 1]
        ticker_shares = manage_etf_portfolio_weekly(
            aligned_top_etfs_weekly[prev_week_key],
            aligned_top_etfs_weekly[current_week_key],
            prev_week_key,
            current_week_key,
            ticker_shares,
            etf_histories
        )
        ticker_shares_per_week[current_week_key] = ticker_shares.copy()

print("\nFinal Ticker Shares per Week:")
for week, shares in ticker_shares_per_week.items():
    print(f"Week {week}: {shares}")

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Week Key Mapping: {'1w': '2024-01-01 to 2024-01-07', '2w': '2024-01-08 to 2024-01-14', '3w': '2024-01-15 to 2024-01-21', '4w': '2024-01-22 to 2024-01-28', '5w': '2024-01-29 to 2024-02-04', '6w': '2024-02-05 to 2024-02-11', '7w': '2024-02-12 to 2024-02-18', '8w': '2024-02-19 to 2024-02-25', '9w': '2024-02-26 to 2024-03-03', '10w': '2024-03-04 to 2024-03-10', '11w': '2024-03-11 to 2024-03-17', '12w': '2024-03-18 to 2024-03-24', '13w': '2024-03-25 to 2024-03-31', '14w': '2024-04-01 to 2024-04-07', '15w': '2024-04-08 to 2024-04-14', '16w': '2024-04-15 to 2024-04-21', '17w': '2024-04-22 to 2024-04-28', '18w': '2024-04-29 to 2024-05-05', '19w': '2024-05-06 to 2024-05-12', '20w': '2024-05-13 to 2024-05-19', '21w': '2024-05-20 to 2024-05-26', '22w': '2024-05-27 to 2024-06-02', '23w': '2024-06-03 to 2024-06-09', '24w': '2024-06-10 to 2024-06-16', '25w': '2024-06-17 to 2024-06-23', '26w': '2024-06-24 to 2024-06-30', '27w': '2024-07-01 to 2024-07-07', '28w': '2024-07-08 to 2024-07-14', '29w': '20

In [29]:
# Define the first trading day of the 48th week
first_trading_day_49w = '2024-12-01'  # Adjust this to match the actual start date of the 48th week

# Identify the 48th and 49th week date range keys
week_48_range = list(ticker_shares_per_week.keys())[-1]  # Last key corresponds to the 48th week
week_49_start = first_trading_day_49w  # Replace with the actual start of the 49th week
print(f"Using data for the 48th week: {week_48_range}")

print(f"Fetching data starting from the first trading day of the 49th week: {week_49_start}")

week_49_end = '2024-12-06'
# Initialize a dictionary to store the values of shares
etf_values_49w = {}

# Ensure 48th week data exists
if week_48_range in ticker_shares_per_week:
    # Fetch ETF shares from the 48th week
    ticker_shares_48w = ticker_shares_per_week[week_48_range]
    
    # Fetch the first trading day price of the 49th week for each ETF
    for ticker, shares in ticker_shares_48w.items():
        print(f"Fetching data for ticker {ticker} starting from {week_49_start}...")
        # Download historical data for the 49th week
        data = yf.download(ticker, start=week_49_start, end=week_49_end)
        
        if not data.empty:
            # Get the closing price of the first trading day of the 49th week
            closing_price_49w = data['Close'].iloc[0]
            # Calculate the value of the shares
            total_value = shares * closing_price_49w
            etf_values_49w[ticker] = total_value
            print(f"{ticker}: {shares:.2f} shares at {closing_price_49w:.2f} each, total value: {total_value:.2f}")
        else:
            print(f"{ticker}: No data available for the 49th week's first trading day.")
else:
    print(f"No data available in ticker_shares_per_week for the 48th week: {week_48_range}")



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Using data for the 48th week: 2024-11-25 to 2024-12-01
Fetching data starting from the first trading day of the 49th week: 2024-12-01
Fetching data for ticker IGM starting from 2024-12-01...
IGM: 162.91 shares at 102.51 each, total value: 16699.68
Fetching data for ticker IXN starting from 2024-12-01...
IXN: 232.15 shares at 84.80 each, total value: 19685.98





In [30]:
# Check if there are any values in the dictionary
if etf_values_49w:
    print("\nETF values on the 49th week's first trading day:")
    # Sum all the values (total portfolio value)
    total_value = sum(etf_values_49w.values())
    print(f"Total portfolio value: {total_value:.2f}")
    # Optionally display individual ETF values
    for ticker, value in etf_values_49w.items():
        print(f"{ticker}: {value:.2f}")
else:
    print("No values could be calculated for the 49th week's first trading day.")



ETF values on the 49th week's first trading day:
Total portfolio value: 36385.67
IGM: 16699.68
IXN: 19685.98
