In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.model_selection import train_test_split,GridSearchCV
from skopt import BayesSearchCV

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from tabulate import tabulate
import pandas_ta as ta
from sklearn.preprocessing import StandardScaler,MinMaxScaler,Normalizer
import statsmodels.api as sm
import itertools
from sklearn.ensemble import RandomForestRegressor
from skopt import BayesSearchCV


from pandas.tseries.offsets import MonthEnd
from pandas.tseries.offsets import BDay


pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows', None)

from datetime import datetime, timedelta

In [2]:
def data_loading(ticker_symbol, start_date, end_date):
    # Fetch the ETF data from Yahoo Finance for the period from 2010-01-01 to 2024-01-01
    etf_data = yf.Ticker(ticker_symbol)
    etf_history = etf_data.history(start=start_date, end=end_date)
    etf_history.index = etf_history.index.tz_localize(None)
    return etf_history, etf_data

In [3]:
#Function to derive the predictor columns
def etf_predictors(etf_history,  start_date, end_date, etf_data, benchmark_ticker = '^GSPC' ):
    
    # Calculate Daily Returns
    etf_history['Daily Return'] = etf_history['Close'].pct_change()

    # Calculate 21-Day Volatility (standard deviation of daily returns, annualized)
    etf_history['Volatility'] = etf_history['Daily Return'].rolling(window=21).std() * np.sqrt(252)
    #etf_history['Volatility_ta'] = ta.volatility(etf_history['Close'], window=21, annualize=True)
   
    # Get the ETF info
    etf_info = etf_data.info

    # Retrieve the net asset value price (NAV) and total net assets
    nav_price = etf_info['navPrice']
    total_assets = etf_info['totalAssets']

    # Calculate the number of shares outstanding
    shares_outstanding = total_assets / nav_price

    # Calculate Market Capitalization for each day
    # Market Capitalization = Closing Price * Total Number of Shares Outstanding
    etf_history['Market Cap'] = etf_history['Close'] * shares_outstanding

    total_assets = etf_info['totalAssets']
    total_liabilities = etf_info.get('totalLiabilities', 0)  # Handle the case where total liabilities might not be present
    
    # Calculate Book Value per Share
    book_value_per_share = (total_assets - total_liabilities) / shares_outstanding
    
    # Calculate Price to Book (P/B) Ratio
    etf_history['P/B Ratio'] = etf_history['Close'] / book_value_per_share
    
    # Calculate 1-Month Momentum (21 trading days)
    
    etf_history['Momentum'] = ta.mom(etf_history['Close'], length=21)
    
    benchmark_data = yf.download(benchmark_ticker, start=start_date, end=end_date)
    benchmark_data['Daily Return'] = benchmark_data['Close'].pct_change()
    #benchmark_data_history = benchmark_data.dropna()
    benchmark_data.head(10)
    
    # Ensure the indices are time zone-naive
    benchmark_data.index = benchmark_data.index.tz_localize(None)
    combined_data = etf_history[['Close']].join(benchmark_data[['Close']], lsuffix='_ETF', rsuffix='_Benchmark')
    combined_data = combined_data.dropna()
    
    # Calculate rolling beta with a 30-day window
    rolling_beta_21 = rolling_beta(combined_data, window=21)

    # Add the rolling beta to the dataframe
    combined_data.loc[:, 'Rolling Beta 21-day'] = rolling_beta_21
    etf_history['Rolling Beta']=combined_data['Rolling Beta 21-day']
    
    # Calculate daily profitability
    daily_profitabilities = []
    previous_nav = None
    for index, row in etf_history.iterrows():
        current_nav = row['Close']  # Current day's NAV
        #print(current_nav)
        if previous_nav is not None:
            daily_profitability = (current_nav - previous_nav) / previous_nav * 100
            daily_profitabilities.append(daily_profitability)
        else:
            daily_profitabilities.append(None)
        previous_nav = current_nav  # Update previous_nav for the next iteration

    # Add daily profitabilities to ETF dataset
    etf_history['Daily Profitability (%)'] = daily_profitabilities
    
    # Calculate the dividend yield for each day
    dividend_yields = []

    for index, row in etf_history.iterrows():
        # Get the dividend payment for the day
        dividend_payment = row['Dividends']

        # Get the current market price of the ETF for the day
        current_price = row['Close']

        # Calculate the dividend yield for the day
        dividend_yield = (dividend_payment / current_price) * 100

        # Append the dividend yield to the list
        dividend_yields.append(dividend_yield)
      
    etf_history['Div yield'] = dividend_yields

    #Volatility
    etf_history['ATR'] = ta.atr(etf_history['High'], etf_history['Low'], etf_history['Close'], length=21)    
    
    # Compute the Relative Volatility Index (RVI)
    rvi = ta.rvi(etf_history['Close'],length=21)
    etf_history['RVI'] = rvi
    
    #Momentum
    rsi_window = 14  # Window size for RSI calculation
    roc_window = 12  # Window size for ROC calculation
    #rsi_window = 21  
    #roc_window = 21  

    

    etf_history['RSI'] = ta.rsi(etf_history['Close'],length=rsi_window)
    # Calculate Rate of Change (ROC)
    etf_history['ROC'] = ta.roc(etf_history['Close'], length=roc_window)
        
    etf_history['log_returns'] = np.log(etf_history['Close'] / etf_history['Close'].shift(1))

    return etf_history

# Function to calculate rolling beta
def rolling_beta(df, window):
    rolling_cov = df['Close_Benchmark'].rolling(window=window).cov(df['Close_ETF'])
    rolling_var = df['Close_Benchmark'].rolling(window=window).var()
    rolling_beta = rolling_cov / rolling_var
    return rolling_beta



In [4]:

def pre_processing(etf_history, train_start_date, train_end_date, test_start_date, test_end_date, prediction_dates, feature_columns=None, scaling_strategy='StandardScaler', final_end_date='2024-12-01'):
    # Handle missing values and inf replacements
    etf_history.fillna(etf_history.median(), inplace=True)
    etf_history = etf_history.replace(-np.inf, 0)

    # Default feature columns if none are provided
    if feature_columns is None:
        feature_columns = ['Volatility', 'Volume', 'Daily Return', 'Market Cap', 'P/B Ratio', 'Momentum', 
                           'Rolling Beta', 'Daily Profitability (%)', 'ATR', 'RVI', 'RSI', 'ROC']

    # Selecting the features (X) and the target (y)
    X = etf_history.loc[:, feature_columns]
    y = etf_history['log_returns']

    # Split train and test data
    train_data = X.loc[train_start_date:train_end_date]
    test_data = X.loc[test_start_date:test_end_date]
    y_train = y.loc[train_start_date:train_end_date]
    y_test = y.loc[test_start_date:test_end_date]

    # Scaling strategy based on input parameter
    if scaling_strategy == 'StandardScaler':
        scaler = StandardScaler()
    elif scaling_strategy == 'Normalizer':
        scaler = Normalizer()
    else:
        raise ValueError(f"Unsupported scaling strategy: {scaling_strategy}")
    
    # Scaling the train and test data
    train_data_scaled = scaler.fit_transform(train_data)
    test_data_scaled = scaler.transform(test_data)

    # Create a dictionary to store forecast data for each prediction period
    forecast_data = {}
    for i, start_date in enumerate(prediction_dates):
        # Determine the end date for each forecast period
        if i < len(prediction_dates) - 1:
            end_date = (pd.to_datetime(start_date) + MonthEnd(0)).strftime('%Y-%m-%d')
        else:
            end_date = final_end_date  # The final end date provided or default

        # Store the forecast data for each period
        forecast_data[f'forecast_data_{i+1}m'] = X.loc[start_date:end_date]

    # Scale the forecast data dynamically
    forecast_data_scaled = {}
    for period_key, period_data in forecast_data.items():
        if not period_data.empty:
            forecast_data_scaled[period_key] = sm.add_constant(scaler.transform(period_data))

    # Add constant to scaled train and test data
    train_data_scaled = sm.add_constant(train_data_scaled)
    test_data_scaled = sm.add_constant(test_data_scaled)

    # Dynamically return the scaled forecast data along with train and test data
    return {
        'train_data_scaled': train_data_scaled,
        'test_data_scaled': test_data_scaled,
        'y_train': y_train,
        'y_test': y_test,
        'scaler': scaler,
        'X': X,
        'etf_history': etf_history,
        'forecast_data_scaled': forecast_data_scaled,
        'forecast_data': forecast_data
    }


In [5]:
def model_training(train_data_scaled, y_train):
    # Define the hyperparameters space
    param_space = {
        'n_estimators': (100, 300),  # integer values from 100 to 300
        'max_features': ['sqrt', 'log2'],  # categorical values
        'max_depth': (10, 50),  # integer values from 10 to 50
        'min_samples_split': (2, 10),  # integer values from 2 to 10
        'min_samples_leaf': (1, 4),  # integer values from 1 to 4
        'bootstrap': [True, False],  # categorical values
        'criterion': ['absolute_error', 'squared_error', 'friedman_mse'],  # categorical values for split criterion
        'max_leaf_nodes': (10, 100),  # integer values for maximum leaf nodes
        'min_impurity_decrease': (0.0, 0.2)  # float values for minimum impurity decrease
    }

    # Initialize the Random Forest Regressor
    rf = RandomForestRegressor(random_state=42)

    # Set up the Bayesian Optimization to search for the best hyperparameters
    bayes_search = BayesSearchCV(
        estimator=rf,
        search_spaces=param_space,
        n_iter=32,  # number of iterations for the search
        cv=5,  # cross-validation folds
        n_jobs=-1,  # use all available CPU cores
        verbose=0,
        scoring='neg_mean_squared_error',
        random_state=42
    )

    # Fit the model using the best hyperparameters
    bayes_search.fit(train_data_scaled, y_train)

    # Get the best model
    best_model = bayes_search.best_estimator_

    # Display the best model parameters
    print("Best Model Parameters:")
    print(bayes_search.best_params_)

    return best_model

In [6]:
#!pip install scikit-optimize


In [7]:
def model_training2(train_data_scaled, y_train):
    # Initialize LassoLarsIC model
    # Initialize LassoLarsIC model
    lasso_lars_ic = LassoLarsIC()

    # Define the hyperparameters and their values for tuning
    param_space = {
        'criterion': Categorical(['aic', 'bic']),  # Criterion to be used for model selection
        'max_iter': Integer(100, 1000),  # Maximum number of iterations
        'fit_intercept': Categorical([True, False]),  # Whether to calculate the intercept
        #'normalize': Categorical([True, False])  # Whether to normalize the predictors
    }

    # Use BayesSearchCV to find the best combination of hyperparameters
    bayes_search = BayesSearchCV(estimator=lasso_lars_ic, search_spaces=param_space, n_iter=50, cv=5, scoring='neg_mean_squared_error', random_state=42)

    # Fit the model
    bayes_search.fit(train_data_scaled, y_train)

    # Get the best model and parameters
    best_model = bayes_search.best_estimator_
    best_params = bayes_search.best_params_

    # Display the best model parameters
    print("Best Model Parameters:")
    print(best_params)

    return best_model

In [8]:
def eval_model(best_model,test_data_scaled,y_test,y_train):
    
    test_predictions = best_model.predict((test_data_scaled))
    mae = mean_absolute_error(y_test, test_predictions)
    # Mean Absolute Scaled Error (MASE) - Example calculation assuming seasonal period m=1
    seasonal_naive = np.roll(y_test, 1)  # Shift y_test by 1 for seasonal naive forecast
    #seasonal_mae = np.mean(np.abs(y_test - seasonal_naive))
    seasonal_mae = np.mean(np.abs(y_test[1:] - seasonal_naive[1:]))
    mase = mae / seasonal_mae

    # Mean Absolute Percentage Error (MAPE)
    #mape = np.mean(np.abs((y_test - test_predictions) / y_test)) * 100

    #n = y_test.shape[0]
    #d = np.abs(  np.diff( y_test) ).sum()/(n-1)
    
    #errors = np.abs(y_test - test_predictions )
    #mase_value = errors.mean()/d

    print("Mean Absolute Error (MAE):", mae)
    print("Mean Absolute Scaled Error (MASE):", mase)
    #print("Mean Absolute Scaled Error-2 (MASE-2):", mase_value)
    #print("Mean Absolute Percentage Error (MAPE):", mape)
    
    return mae, mase

In [9]:
# Ensured consistent feature handling and forecasting in predictions
def predictions(model, forecast_data_scaled, forecast_data):
    predictions_forecast = model.predict(forecast_data_scaled)
    forecast_predictions_df = pd.DataFrame(predictions_forecast, columns=["log_returns"], index=forecast_data.index)
    forecast_data_with_predictions = pd.concat([forecast_data, forecast_predictions_df], axis=1)
    return forecast_predictions_df, forecast_data_with_predictions

In [10]:
#tickers = ['SPY', 'QQQ', 'VTI', 'IWM', 'EFA', 'EEM', 'GLD', 'SLV', 'USO', 'XLF']
tickers = ['SMH', 'SOXX', 'PSI', 'XSD', 'IYW', 'XLK', 'VGT', 'FTEC', 'IGM', 'IXN', 
           #'FNGU','USD', 'FNGO', 'GBTC', 'ETHE', 'TECL', 'FNGS', 'TQQQ', 'ROM', 'QLD' No data available from 2000: 2019
           ]

#tickers = ['SPY', 'QQQ', 'VTI']
#tickers = ['SPY']
                                                           
start_date, end_date = '2000-01-01', '2024-12-01'
train_start_date, train_end_date, test_start_date, test_end_date, prediction_start_date = start_date,'2014-01-01', '2014-01-01', '2024-01-01', '2024-01-01'
prediction_dates=['2024-01-01','2024-02-01','2024-03-01', '2024-04-01','2024-05-01','2024-06-01','2024-07-01','2024-08-01','2024-09-01','2024-10-01','2024-11-01']

In [11]:
dict_data = {}
etf_dict = {}
data_with_predictors = []
for i in tickers:
    etf_history,etf_data = data_loading(i, start_date, end_date)
    
    print(etf_data.info.get('longName'), ":",i)
    
    #test_stationarity(etf_history)
    print(" ETF:",i)
    #print(etf_history.head(10))
    #print("################")
    #hist_data.append(etf_history)
    
    etf_history = etf_predictors(etf_history,  start_date, end_date, etf_data,benchmark_ticker = '^GSPC' )
    #print(etf_history.head(10))
    #rolling_beta(etf_history, window)
    
    # Call the function
    preprocessed_data = pre_processing(etf_history, train_start_date, train_end_date, test_start_date, test_end_date, prediction_dates)

    # Extract individual components from the returned dictionary
    train_data_scaled = preprocessed_data['train_data_scaled']
    test_data_scaled = preprocessed_data['test_data_scaled']
    y_train = preprocessed_data['y_train']
    y_test = preprocessed_data['y_test']
    scaler = preprocessed_data['scaler']
    X = preprocessed_data['X']
    etf_history = preprocessed_data['etf_history']

    # Optionally, access forecast data for each period
    forecast_data_scaled_1m = preprocessed_data['forecast_data_scaled'].get('forecast_data_1m')
    forecast_data_scaled_2m = preprocessed_data['forecast_data_scaled'].get('forecast_data_2m')
    forecast_data_scaled_3m = preprocessed_data['forecast_data_scaled'].get('forecast_data_3m')
    forecast_data_scaled_4m = preprocessed_data['forecast_data_scaled'].get('forecast_data_4m')
    forecast_data_scaled_5m = preprocessed_data['forecast_data_scaled'].get('forecast_data_5m')
    forecast_data_scaled_6m = preprocessed_data['forecast_data_scaled'].get('forecast_data_6m')
    forecast_data_scaled_7m = preprocessed_data['forecast_data_scaled'].get('forecast_data_7m')
    
    forecast_data_scaled_8m = preprocessed_data['forecast_data_scaled'].get('forecast_data_8m')
    forecast_data_scaled_9m = preprocessed_data['forecast_data_scaled'].get('forecast_data_9m')
    forecast_data_scaled_10m = preprocessed_data['forecast_data_scaled'].get('forecast_data_10m')
    forecast_data_scaled_11m = preprocessed_data['forecast_data_scaled'].get('forecast_data_11m')
    
    
    forecast_data_1m = preprocessed_data['forecast_data'].get('forecast_data_1m')
    forecast_data_2m = preprocessed_data['forecast_data'].get('forecast_data_2m')
    forecast_data_3m = preprocessed_data['forecast_data'].get('forecast_data_3m')
    forecast_data_4m = preprocessed_data['forecast_data'].get('forecast_data_4m')
    forecast_data_5m = preprocessed_data['forecast_data'].get('forecast_data_5m')
    forecast_data_6m = preprocessed_data['forecast_data'].get('forecast_data_6m')
    forecast_data_7m = preprocessed_data['forecast_data'].get('forecast_data_7m')
    
    forecast_data_8m = preprocessed_data['forecast_data'].get('forecast_data_8m')
    forecast_data_9m = preprocessed_data['forecast_data'].get('forecast_data_9m')
    forecast_data_10m = preprocessed_data['forecast_data'].get('forecast_data_10m')
    forecast_data_11m = preprocessed_data['forecast_data'].get('forecast_data_11m')
    

    #data_with_predictors.append(etf_history)
    #print(forecast_data)
    
    best_model = model_training(train_data_scaled, y_train)
    #print(model.summary())
    
    mae, mase = eval_model(best_model, test_data_scaled, y_test, y_train)
    print(len(forecast_data_scaled_1m))
    print(len(forecast_data_scaled_2m))
    print(len(forecast_data_scaled_3m))
    print(len(forecast_data_scaled_4m))
    print(len(forecast_data_scaled_5m))
    print(len(forecast_data_scaled_6m))
    print(len(forecast_data_scaled_7m))
    
    print(len(forecast_data_scaled_8m))
    print(len(forecast_data_scaled_9m))
    print(len(forecast_data_scaled_10m))
    print(len(forecast_data_scaled_11m))
    
    
    forecast_predictions_df_1m, forecast_data_with_predictions_1m = predictions(best_model, forecast_data_scaled_1m, forecast_data_1m)
    forecast_predictions_df_2m, forecast_data_with_predictions_2m = predictions(best_model, forecast_data_scaled_2m, forecast_data_2m)
    forecast_predictions_df_3m, forecast_data_with_predictions_3m = predictions(best_model, forecast_data_scaled_3m, forecast_data_3m)
    forecast_predictions_df_4m, forecast_data_with_predictions_4m = predictions(best_model, forecast_data_scaled_4m, forecast_data_4m)
    forecast_predictions_df_5m, forecast_data_with_predictions_5m = predictions(best_model, forecast_data_scaled_5m, forecast_data_5m)
    forecast_predictions_df_6m, forecast_data_with_predictions_6m = predictions(best_model, forecast_data_scaled_6m, forecast_data_6m)
    forecast_predictions_df_7m, forecast_data_with_predictions_7m = predictions(best_model, forecast_data_scaled_7m, forecast_data_7m)
    
    forecast_predictions_df_8m, forecast_data_with_predictions_8m = predictions(best_model, forecast_data_scaled_8m, forecast_data_8m)
    forecast_predictions_df_9m, forecast_data_with_predictions_9m = predictions(best_model, forecast_data_scaled_9m, forecast_data_9m)
    forecast_predictions_df_10m, forecast_data_with_predictions_10m = predictions(best_model, forecast_data_scaled_10m, forecast_data_10m)
    forecast_predictions_df_11m, forecast_data_with_predictions_11m = predictions(best_model, forecast_data_scaled_11m, forecast_data_11m)
    
    
    # Add data to dictionary
    # Store the relevant data in a dictionary

    #print(forecast_predictions_df_6m)
    #print(forecast_predictions_df_7m)
    
    dict_data = {
        "etf_history": etf_history,
        "forecast_data_1m": forecast_data_1m,
        "forecast_data_2m": forecast_data_2m,
        "forecast_data_3m": forecast_data_3m,
        "forecast_data_4m": forecast_data_4m,
        "forecast_data_5m": forecast_data_5m,
        "forecast_data_6m": forecast_data_6m,
        "forecast_data_7m": forecast_data_7m,
        
        "forecast_data_8m": forecast_data_8m,
        "forecast_data_9m": forecast_data_9m,
        "forecast_data_10m": forecast_data_10m,
        "forecast_data_11m": forecast_data_11m,
        
        "X": X,
        "y_train_values": y_train , # Assuming this is relevant to the ETF data
        "model results": {"mae": mae,
                          "mase": mase,
                          #"mape": mape
                         },
                         
        "forecast_predictions_df_1m" : forecast_predictions_df_1m['log_returns'].values,
        "forecast_predictions_df_2m" : forecast_predictions_df_2m['log_returns'].values,
        "forecast_predictions_df_3m" : forecast_predictions_df_3m['log_returns'].values,
        "forecast_predictions_df_4m" : forecast_predictions_df_4m['log_returns'].values,
        "forecast_predictions_df_5m" : forecast_predictions_df_5m['log_returns'].values,
        "forecast_predictions_df_6m" : forecast_predictions_df_6m['log_returns'].values,
        "forecast_predictions_df_7m" : forecast_predictions_df_7m['log_returns'].values,
        
        "forecast_predictions_df_8m" : forecast_predictions_df_8m['log_returns'].values,
        "forecast_predictions_df_9m" : forecast_predictions_df_9m['log_returns'].values,
        "forecast_predictions_df_10m" : forecast_predictions_df_10m['log_returns'].values,
        "forecast_predictions_df_11m" : forecast_predictions_df_11m['log_returns'].values
    }
    
    # Associate the dictionary with the ETF ticker symbol
    etf_dict[i] = dict_data
     

[*********************100%%**********************]  1 of 1 completed

VanEck Semiconductor ETF : SMH
 ETF: SMH





Best Model Parameters:
OrderedDict([('bootstrap', True), ('criterion', 'friedman_mse'), ('max_depth', 50), ('max_features', 'sqrt'), ('max_leaf_nodes', 100), ('min_impurity_decrease', 0.0), ('min_samples_leaf', 1), ('min_samples_split', 2), ('n_estimators', 100)])
Mean Absolute Error (MAE): 0.002725930028260518
Mean Absolute Scaled Error (MASE): 0.1413940369080162
21
20
20
22
22
19
22
22
20
23
20


[*********************100%%**********************]  1 of 1 completed

iShares Semiconductor ETF : SOXX
 ETF: SOXX





Best Model Parameters:
OrderedDict([('bootstrap', True), ('criterion', 'friedman_mse'), ('max_depth', 30), ('max_features', 'sqrt'), ('max_leaf_nodes', 89), ('min_impurity_decrease', 0.0), ('min_samples_leaf', 1), ('min_samples_split', 2), ('n_estimators', 214)])
Mean Absolute Error (MAE): 0.0023000347628884463
Mean Absolute Scaled Error (MASE): 0.11789334834839843
21
20
20
22
22
19
22
22
20
23
20


[*********************100%%**********************]  1 of 1 completed

Invesco Semiconductors ETF : PSI
 ETF: PSI





Best Model Parameters:
OrderedDict([('bootstrap', False), ('criterion', 'friedman_mse'), ('max_depth', 33), ('max_features', 'sqrt'), ('max_leaf_nodes', 78), ('min_impurity_decrease', 5.281657808306003e-21), ('min_samples_leaf', 4), ('min_samples_split', 9), ('n_estimators', 252)])
Mean Absolute Error (MAE): 0.002467632363688787
Mean Absolute Scaled Error (MASE): 0.1208335886293755
21
20
20
22
22
19
22
22
20
23
20


[*********************100%%**********************]  1 of 1 completed

SPDR S&P Semiconductor ETF : XSD
 ETF: XSD





Best Model Parameters:
OrderedDict([('bootstrap', True), ('criterion', 'friedman_mse'), ('max_depth', 16), ('max_features', 'sqrt'), ('max_leaf_nodes', 82), ('min_impurity_decrease', 1.1843188051446196e-19), ('min_samples_leaf', 4), ('min_samples_split', 10), ('n_estimators', 117)])
Mean Absolute Error (MAE): 0.0014950202113515807
Mean Absolute Scaled Error (MASE): 0.07158551146717708
21
20
20
22
22
19
22
22
20
23
20


[*********************100%%**********************]  1 of 1 completed

iShares U.S. Technology ETF : IYW
 ETF: IYW





Best Model Parameters:
OrderedDict([('bootstrap', True), ('criterion', 'friedman_mse'), ('max_depth', 10), ('max_features', 'log2'), ('max_leaf_nodes', 76), ('min_impurity_decrease', 0.0), ('min_samples_leaf', 4), ('min_samples_split', 10), ('n_estimators', 300)])
Mean Absolute Error (MAE): 0.0019942984885662563
Mean Absolute Scaled Error (MASE): 0.13379127419763567
21
20
20
22
22
19
22
22
20
23
20


[*********************100%%**********************]  1 of 1 completed

The Technology Select Sector SPDR Fund : XLK
 ETF: XLK





Best Model Parameters:
OrderedDict([('bootstrap', False), ('criterion', 'friedman_mse'), ('max_depth', 13), ('max_features', 'log2'), ('max_leaf_nodes', 45), ('min_impurity_decrease', 6.866031712929126e-20), ('min_samples_leaf', 4), ('min_samples_split', 10), ('n_estimators', 180)])
Mean Absolute Error (MAE): 0.0013973278102772297
Mean Absolute Scaled Error (MASE): 0.09848074340350242
21
20
20
22
22
19
22
22
20
23
20


[*********************100%%**********************]  1 of 1 completed

Vanguard Information Technology Index Fund ETF Shares : VGT
 ETF: VGT





Best Model Parameters:
OrderedDict([('bootstrap', True), ('criterion', 'friedman_mse'), ('max_depth', 49), ('max_features', 'log2'), ('max_leaf_nodes', 30), ('min_impurity_decrease', 0.0), ('min_samples_leaf', 3), ('min_samples_split', 2), ('n_estimators', 101)])
Mean Absolute Error (MAE): 0.0022690069332535925
Mean Absolute Scaled Error (MASE): 0.15698612471824253
21
20
20
22
22
19
22
22
20
23
20


[*********************100%%**********************]  1 of 1 completed

Fidelity MSCI Information Technology Index ETF : FTEC
 ETF: FTEC





Best Model Parameters:
OrderedDict([('bootstrap', False), ('criterion', 'absolute_error'), ('max_depth', 44), ('max_features', 'log2'), ('max_leaf_nodes', 96), ('min_impurity_decrease', 0.12198774331506781), ('min_samples_leaf', 3), ('min_samples_split', 8), ('n_estimators', 166)])
Mean Absolute Error (MAE): 0.00983196123926118
Mean Absolute Scaled Error (MASE): 0.6799192726361755
21
20
20
22
22
19
22
22
20
23
20


[*********************100%%**********************]  1 of 1 completed

iShares Expanded Tech Sector ETF : IGM
 ETF: IGM





Best Model Parameters:
OrderedDict([('bootstrap', True), ('criterion', 'friedman_mse'), ('max_depth', 30), ('max_features', 'sqrt'), ('max_leaf_nodes', 89), ('min_impurity_decrease', 0.0), ('min_samples_leaf', 1), ('min_samples_split', 2), ('n_estimators', 214)])
Mean Absolute Error (MAE): 0.001833461873548311
Mean Absolute Scaled Error (MASE): 0.1257170952276096
21
20
20
22
22
19
22
22
20
23
20


[*********************100%%**********************]  1 of 1 completed

iShares Global Tech ETF : IXN
 ETF: IXN





Best Model Parameters:
OrderedDict([('bootstrap', True), ('criterion', 'friedman_mse'), ('max_depth', 30), ('max_features', 'sqrt'), ('max_leaf_nodes', 89), ('min_impurity_decrease', 0.0), ('min_samples_leaf', 1), ('min_samples_split', 2), ('n_estimators', 214)])
Mean Absolute Error (MAE): 0.001219763298827492
Mean Absolute Scaled Error (MASE): 0.0860605343989813
21
20
20
22
22
19
22
22
20
23
20


In [12]:
etf_history.tail(10)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Capital Gains,Daily Return,Volatility,Market Cap,P/B Ratio,Momentum,Rolling Beta,Daily Profitability (%),Div yield,ATR,RVI,RSI,ROC,log_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-11-15,82.520737,82.670395,81.453186,81.652725,154200,0.0,0.0,0.0,-0.020701,0.191732,5020205000.0,0.953693,-1.556442,0.010481,-2.070133,0.0,1.270292,43.29588,44.556557,-1.468818,-0.020919
2024-11-18,81.622795,82.401009,81.483116,82.111679,116600,0.0,0.0,0.0,0.005621,0.192413,5048423000.0,0.959053,-1.406776,0.010264,0.56208,0.0,1.253511,48.563763,47.021849,2.020583,0.005605
2024-11-19,81.582885,82.720284,81.582885,82.540695,114300,0.0,0.0,0.0,0.005225,0.192961,5074800000.0,0.964064,-1.277077,0.010068,0.522479,0.0,1.247982,53.273315,49.291646,2.211515,0.005211
2024-11-20,82.430946,82.630486,81.413276,82.381058,115700,0.0,0.0,0.0,-0.001934,0.192999,5064985000.0,0.9622,-1.426735,0.009916,-0.193405,0.0,1.246517,48.464352,48.459659,2.20324,-0.001936
2024-11-21,83.079453,83.328881,81.93209,82.959732,232100,0.0,0.0,0.0,0.007024,0.187008,5100563000.0,0.968959,0.448975,0.009878,0.702436,0.0,1.253673,53.117898,51.645775,1.489077,0.007
2024-11-22,82.94976,83.348847,82.770172,83.189209,161500,0.0,0.0,0.0,0.002766,0.187025,5114672000.0,0.971639,0.458954,0.00988,0.276612,0.0,1.22153,57.364583,52.889492,0.011988,0.002762
2024-11-25,83.757904,84.057221,82.959739,83.408707,206900,0.0,0.0,0.0,0.002639,0.186516,5128167000.0,0.974203,0.279358,0.010143,0.263854,0.0,1.215623,61.24344,54.105465,-1.670192,0.002635
2024-11-26,83.548386,83.857681,83.428665,83.668114,202400,0.0,0.0,0.0,0.00311,0.186743,5144116000.0,0.977232,0.598625,0.010235,0.311007,0.0,1.179116,64.804287,55.565173,-0.96835,0.003105
2024-11-27,83.23909,83.378769,82.211448,82.750214,91400,0.0,0.0,0.0,-0.010971,0.185403,5087681000.0,0.966511,-1.346916,0.010485,-1.097073,0.0,1.192332,59.006995,49.558654,-1.250152,-0.011031
2024-11-29,82.830032,83.708023,82.690353,83.608253,106300,0.0,0.0,0.0,0.010369,0.181974,5140436000.0,0.976533,0.738319,0.010793,1.036903,0.0,1.184015,62.717974,54.50907,-0.261835,0.010316


In [13]:
def calculate_sharpe_ratio(returns, annual_risk_free_rate=0.1,period='daily'):
    #excess_returns = rate_of_return(returns) - risk_free_rate
    
    # Convert annual risk-free rate to daily rate
    daily_risk_free_rate = (1 + annual_risk_free_rate) ** (1/252) - 1
    
    # Calculate mean daily log return
    mean_return = np.mean(returns)
    
    # Calculate excess daily log return
    excess_return = mean_return - daily_risk_free_rate
    
    # Calculate standard deviation of daily log returns
    std_return = np.std(returns)
    
    # Print diagnostic information
    #print(f"Mean Daily Log Return: {mean_return}")
    #print(f"Excess Daily Log Return: {excess_return}")
    #print(f"Standard Deviation of Daily Log Returns: {std_return}")
    
    # Check for zero standard deviation to avoid division by zero
    if std_return == 0:
        return 0
    
    # Calculate Sharpe ratio
    sharpe_ratio = (excess_return / std_return) * np.sqrt(252)  # Annualize the Sharpe ratio
    return sharpe_ratio
    #return excess_returns / np.std(returns)


def calculate_rachev_ratio(returns, lower_percentile=5, upper_percentile=95):
    # Step 1: Sort the returns
    sorted_returns = np.sort(returns)
    
    # Step 2: Determine the percentiles
    lower_threshold = np.percentile(sorted_returns, lower_percentile)
    upper_threshold = np.percentile(sorted_returns, upper_percentile)
    
    # Step 3: Calculate Expected Shortfall (ES)
    es = np.mean(sorted_returns[sorted_returns <= lower_threshold])
    
    # Step 4: Calculate Expected Gain (EG)
    eg = np.mean(sorted_returns[sorted_returns >= upper_threshold])
    
    # Step 5: Compute the Rachev Ratio
    rachev_ratio = eg / -es
    return rachev_ratio


def calculate_volatility_clustering(returns):
    squared_returns = returns ** 2
    n = len(squared_returns)
    
    # Mean of squared returns
    mean_squared_returns = np.mean(squared_returns)
    
    # Calculate the numerator and denominator for autocorrelation at lag 1
    numerator = np.sum((squared_returns[:-1] - mean_squared_returns) * (squared_returns[1:] - mean_squared_returns))
    denominator = np.sum((squared_returns - mean_squared_returns) ** 2)
    
    if denominator == 0:
        return 0  # Avoid division by zero
    
    rho_1 = numerator / denominator
    return rho_1

def calculate_sortino_ratio(log_returns, target_log_return=0.0):
    """
    Calculate the Sortino Ratio using log returns.
    
    Parameters:
    - log_returns (array-like): Array or list of log returns for the period.
    - target_log_return (float): The target log return. Default is 0, which is often used as a benchmark.
    
    Returns:
    - float: The Sortino Ratio.
    """
    # Convert input to a NumPy array for easier calculations
    log_returns = np.array(log_returns)
    
    # Calculate the average period log return (R)
    avg_log_return = np.mean(log_returns)
    
    # Calculate the Target Downside Deviation (TDD)
    downside_deviation = np.sqrt(
        np.mean(np.square(np.maximum(0, target_log_return - log_returns)))
    )
    
    # Calculate Sortino Ratio
    #sortino_ratio_value = (avg_log_return - target_log_return) / downside_deviation
    epsilon = 1e-8
    
    # Add epsilon to downside_deviation to prevent division by zero
    #if downside_deviation == 0:
     #   return np.nan  # Return NaN if downside deviation is zero
    
    # Calculate Sortino Ratio
    sortino_ratio_value = (avg_log_return - target_log_return) / (downside_deviation + epsilon)
    
    return sortino_ratio_value


In [14]:
"""def calculate_composite_score(
    forecasted_values, risk_percentage, rachev_ratio, sharpe_ratio, sortino_ratio, volatility_clustering, 
    mean_forecast, std_forecast, mean_rachev, std_rachev, mean_sharpe, std_sharpe, mean_sortino, std_sortino, 
    mean_volatility_clustering, std_volatility_clustering
):
    forecasted_mean = np.mean(forecasted_values)
    forecasted_mean_normalized = (forecasted_mean - mean_forecast) / std_forecast
    rachev_normalized = (rachev_ratio - mean_rachev) / std_rachev
    sharpe_normalized = (sharpe_ratio - mean_sharpe) / std_sharpe
    
    sortino_normalized = (sortino_ratio - mean_sortino) / std_sortino
    
    volatility_clustering_normalized = (volatility_clustering - mean_volatility_clustering) / std_volatility_clustering
    
    score = forecasted_mean_normalized - (risk_percentage * rachev_normalized) + sharpe_normalized + sortino_normalized - volatility_clustering_normalized
    return score
"""

'def calculate_composite_score(\n    forecasted_values, risk_percentage, rachev_ratio, sharpe_ratio, sortino_ratio, volatility_clustering, \n    mean_forecast, std_forecast, mean_rachev, std_rachev, mean_sharpe, std_sharpe, mean_sortino, std_sortino, \n    mean_volatility_clustering, std_volatility_clustering\n):\n    forecasted_mean = np.mean(forecasted_values)\n    forecasted_mean_normalized = (forecasted_mean - mean_forecast) / std_forecast\n    rachev_normalized = (rachev_ratio - mean_rachev) / std_rachev\n    sharpe_normalized = (sharpe_ratio - mean_sharpe) / std_sharpe\n    \n    sortino_normalized = (sortino_ratio - mean_sortino) / std_sortino\n    \n    volatility_clustering_normalized = (volatility_clustering - mean_volatility_clustering) / std_volatility_clustering\n    \n    score = forecasted_mean_normalized - (risk_percentage * rachev_normalized) + sharpe_normalized + sortino_normalized - volatility_clustering_normalized\n    return score\n'

In [39]:
def calculate_composite_score(
    forecasted_values, risk_percentage, rachev_ratio, sharpe_ratio, sortino_ratio, volatility_clustering, 
    mean_forecast, std_forecast, mean_rachev, std_rachev, mean_sharpe, std_sharpe, mean_sortino, std_sortino, 
    mean_volatility_clustering, std_volatility_clustering
):
    epsilon = 1e-8  # To prevent division by zero
    forecasted_mean = np.mean(forecasted_values)

    # Debugging: Log raw input values
    print(f"\nDebug: Composite Score Calculation")
    print(f"Forecasted Mean: {forecasted_mean}, Risk Percentage: {risk_percentage}")
    print(f"Rachev Ratio: {rachev_ratio}, Sharpe Ratio: {sharpe_ratio}")
    print(f"Sortino Ratio: {sortino_ratio}, Volatility Clustering: {volatility_clustering}")

    # Normalize the components with epsilon
    forecasted_mean_normalized = (forecasted_mean - mean_forecast) / (std_forecast + epsilon)
    rachev_normalized = (rachev_ratio - mean_rachev) / (std_rachev + epsilon)
    sharpe_normalized = (sharpe_ratio - mean_sharpe) / (std_sharpe + epsilon)
    sortino_normalized = (sortino_ratio - mean_sortino) / (std_sortino + epsilon)
    volatility_clustering_normalized = (volatility_clustering - mean_volatility_clustering) / (std_volatility_clustering + epsilon)

    # Debugging: Log normalized values
    print(f"Normalized Values -> Forecasted Mean: {forecasted_mean_normalized}, Rachev: {rachev_normalized}")
    print(f"Sharpe: {sharpe_normalized}, Sortino: {sortino_normalized}, Volatility Clustering: {volatility_clustering_normalized}")

    # Composite score calculation
    score = (
        forecasted_mean_normalized
        - (risk_percentage * rachev_normalized)
        + sharpe_normalized
        + sortino_normalized
        - volatility_clustering_normalized
    )

    # Debugging: Log final score
    print(f"Final Composite Score: {score}")

    return score


def process_etf_data_monthly(tickers, etf_dict):
    etf_pred_dict = {}

    # Determine the forecast periods dynamically from the etf_dict
    sample_etf = next(iter(etf_dict.values()))
    forecast_periods = [key.split('_')[-1] for key in sample_etf.keys() if key.startswith('forecast_predictions_df')]

    for etf_name in tickers:
        etf_pred_dict[etf_name] = {
            f"returns_{period}": etf_dict[etf_name][f"forecast_predictions_df_{period}"]
            for period in forecast_periods
        }

        for period in forecast_periods:
            returns = etf_pred_dict[etf_name][f"returns_{period}"]

            # Log returns for debugging
            print(f"\nDebug: Returns for {etf_name}, Forecast Period {period}: {returns}")

            # Calculate metrics
            etf_pred_dict[etf_name][f"rachev_ratio_{period}"] = calculate_rachev_ratio(returns)
            etf_pred_dict[etf_name][f"sharpe_ratio_{period}"] = calculate_sharpe_ratio(returns)
            etf_pred_dict[etf_name][f"sortino_ratio_{period}"] = calculate_sortino_ratio(returns)
            etf_pred_dict[etf_name][f"volatility_clustering_{period}"] = calculate_volatility_clustering(returns)

    return etf_pred_dict


def calculate_means_and_stds_monthly(etf_pred_dict, forecast_period):
    returns_list = [etf_pred_dict[etf][f'returns_{forecast_period}'] for etf in etf_pred_dict]

    # Compute global means and standard deviations
    mean_forecast = np.mean([np.mean(returns) for returns in returns_list])
    std_forecast = np.std([np.mean(returns) for returns in returns_list])

    print(f"\nDebug: Forecast Period = {forecast_period}")
    print(f"All Returns Means: {[np.mean(returns) for returns in returns_list]}")
    print(f"Mean Forecast = {mean_forecast}, Std Forecast = {std_forecast}")

    rachev_ratios = np.array([etf_pred_dict[etf][f'rachev_ratio_{forecast_period}'] for etf in etf_pred_dict])
    print(f"All Rachev Ratios: {rachev_ratios}")
    mean_rachev = np.mean(rachev_ratios)
    std_rachev = np.std(rachev_ratios)

    sharpe_ratios = np.array([etf_pred_dict[etf][f'sharpe_ratio_{forecast_period}'] for etf in etf_pred_dict])
    print(f"All Sharpe Ratios: {sharpe_ratios}")
    mean_sharpe = np.mean(sharpe_ratios)
    std_sharpe = np.std(sharpe_ratios)

    sortino_ratios = np.array([etf_pred_dict[etf][f'sortino_ratio_{forecast_period}'] for etf in etf_pred_dict])
    print(f"All Sortino Ratios: {sortino_ratios}")
    mean_sortino = np.mean(sortino_ratios)
    std_sortino = np.std(sortino_ratios)

    volatility_clustering = np.array([etf_pred_dict[etf][f'volatility_clustering_{forecast_period}'] for etf in etf_pred_dict])
    print(f"All Volatility Clustering: {volatility_clustering}")
    mean_volatility_clustering = np.mean(volatility_clustering)
    std_volatility_clustering = np.std(volatility_clustering)

    print(f"Mean Rachev = {mean_rachev}, Std Rachev = {std_rachev}")
    print(f"Mean Sharpe = {mean_sharpe}, Std Sharpe = {std_sharpe}")
    print(f"Mean Sortino = {mean_sortino}, Std Sortino = {std_sortino}")
    print(f"Mean Volatility Clustering = {mean_volatility_clustering}, Std Volatility Clustering = {std_volatility_clustering}")

    return (
        mean_forecast, std_forecast, mean_rachev, std_rachev,
        mean_sharpe, std_sharpe, mean_sortino, std_sortino,
        mean_volatility_clustering, std_volatility_clustering
    )


def calculate_scores_for_etfs_monthly(etf_pred_dict, forecast_period, risk_percentage):
    (mean_forecast, std_forecast, mean_rachev, std_rachev, 
     mean_sharpe, std_sharpe, mean_sortino, std_sortino, 
     mean_volatility_clustering, std_volatility_clustering) = calculate_means_and_stds_monthly(etf_pred_dict, forecast_period)

    scores = []
    for etf in etf_pred_dict:
        forecasted_values = etf_pred_dict[etf][f'returns_{forecast_period}']
        rachev_ratio = etf_pred_dict[etf][f'rachev_ratio_{forecast_period}']
        sharpe_ratio = etf_pred_dict[etf][f'sharpe_ratio_{forecast_period}']
        volatility_clustering = etf_pred_dict[etf][f'volatility_clustering_{forecast_period}']
        sortino_ratio = etf_pred_dict[etf][f'sortino_ratio_{forecast_period}']

        # Debugging: Log inputs to composite score calculation
        print(f"\nDebug: ETF = {etf}, Forecast Period = {forecast_period}")
        print(f"Forecasted Values Mean: {np.mean(forecasted_values)}")
        print(f"Rachev Ratio: {rachev_ratio}, Sharpe Ratio: {sharpe_ratio}")
        print(f"Sortino Ratio: {sortino_ratio}, Volatility Clustering: {volatility_clustering}")
        print(f"Means and Stds: Mean Forecast = {mean_forecast}, Std Forecast = {std_forecast}")
        
        # Calculate the composite score
        score = calculate_composite_score(
            forecasted_values, risk_percentage, rachev_ratio, sharpe_ratio, 
            sortino_ratio, volatility_clustering, mean_forecast, std_forecast, 
            mean_rachev, std_rachev, mean_sharpe, std_sharpe, mean_sortino, 
            std_sortino, mean_volatility_clustering, std_volatility_clustering
        )

        scores.append({
            'ETF': etf,
            'Month': forecast_period,
            'RiskPercentage': risk_percentage,
            'Score': score
        })

        # Debugging: Log the calculated score
        print(f"Calculated Score for {etf} ({forecast_period}): {score}")

    return scores


def main_monthly(tickers, etf_dict):
    etf_pred_dict = process_etf_data_monthly(tickers, etf_dict)
    
    risk_percentage = 0.10
    monthly_scores = {}

    # Iterate over 12 months
    for month in range(1, 13):
        month_key = f"{month}m"
        if any(f"returns_{month_key}" in etf_pred_dict[etf] for etf in etf_pred_dict):
            scores = calculate_scores_for_etfs_monthly(etf_pred_dict, month_key, risk_percentage)
            monthly_scores[month_key] = scores
            print(f"Scores calculated for {month_key}:")
            for score in scores:
                print(score)
        else:
            print(f"Skipping score calculation for {month_key}: No ETFs have data for this month.")

    return etf_pred_dict, monthly_scores


# Example usage:
# Execute monthly scoring
etf_pred_dict, monthly_scores = main_monthly(tickers, etf_dict)





Debug: Returns for SMH, Forecast Period 1m: [-0.02420223 -0.01118687 -0.00072344  0.01117988  0.03903612  0.0088854
  0.00711443  0.01020354  0.00163733  0.02274735 -0.00013798  0.03540212
  0.04134176  0.01086362  0.01104122  0.02458771  0.00646798 -0.0125126
  0.01835858 -0.00209159 -0.00807263]

Debug: Returns for SMH, Forecast Period 2m: [ 1.36961676e-02  2.56218169e-02  2.56857549e-02 -5.37493999e-03
  2.59382112e-02  2.17980307e-02  2.74153728e-02  1.07546086e-03
 -1.07480590e-02  2.59954525e-02  6.43691896e-04 -7.62985253e-05
 -1.30566447e-02 -1.81764904e-03  5.96523477e-02 -2.79890223e-04
  1.32975139e-02  1.86070132e-03 -4.58534163e-03  2.60741226e-02]

Debug: Returns for SMH, Forecast Period 3m: [ 0.04172052  0.02309381 -0.00884701  0.02987111  0.03776553 -0.02723932
 -0.01049677  0.03425536 -0.01225463 -0.01079653 -0.00204281  0.00800667
  0.00071561  0.02201177  0.02696527  0.01018372  0.00164934 -0.00313888
  0.00485789  0.00597236]

Debug: Returns for SMH, Forecast Perio

In [40]:
# Initialize an empty dictionary to hold DataFrames for each month
monthly_scores_dfs = {}

# Assuming monthly_scores is a dictionary with keys as month identifiers ('1m', '2m', ..., '12m') 
# and values as the respective scores
for month_key, scores in monthly_scores.items():
    # Convert scores for the current month into a DataFrame
    monthly_scores_dfs[month_key] = pd.DataFrame(scores)

    
# Create DataFrame variables dynamically for 12 months
for month in range(1, 13):
    month_key = f'{month}m'
    globals()[f'df_scores_{month_key}'] = monthly_scores_dfs.get(month_key, pd.DataFrame())




In [41]:
# Initialize a dictionary to hold the top 2 ETFs for each month
top_etfs_monthly = {}

# Loop through the monthly DataFrames and select the top 2 ETFs for each month
for month_key, df_scores in monthly_scores_dfs.items():
    # Select the top 2 ETFs based on the 'Score' column
    top_etfs_monthly[month_key] = df_scores.nlargest(2, 'Score')
    
    # Print the results for the current month
    print(f"\nTop 2 ETFs for {month_key} forecast:")
    print(top_etfs_monthly[month_key])


Top 2 ETFs for 1m forecast:
    ETF Month  RiskPercentage     Score
7  FTEC    1m             0.1  2.939085
0   SMH    1m             0.1  1.656696

Top 2 ETFs for 2m forecast:
    ETF Month  RiskPercentage     Score
7  FTEC    2m             0.1  1.992716
0   SMH    2m             0.1  1.919753

Top 2 ETFs for 3m forecast:
    ETF Month  RiskPercentage     Score
7  FTEC    3m             0.1  2.668419
0   SMH    3m             0.1  1.398387

Top 2 ETFs for 4m forecast:
    ETF Month  RiskPercentage     Score
7  FTEC    4m             0.1  4.234614
0   SMH    4m             0.1  1.512169

Top 2 ETFs for 5m forecast:
    ETF Month  RiskPercentage     Score
0   SMH    5m             0.1  1.985824
7  FTEC    5m             0.1  1.727723

Top 2 ETFs for 6m forecast:
    ETF Month  RiskPercentage     Score
7  FTEC    6m             0.1  1.927017
0   SMH    6m             0.1  1.789899

Top 2 ETFs for 7m forecast:
    ETF Month  RiskPercentage     Score
7  FTEC    7m             0.1  3.5498

In [42]:
def select_top_etfs_monthly(df_scores, forecast_period):
    if df_scores.empty:
        print(f"No scores available for {forecast_period}. Skipping.")
        return []
    print(f"Processing scores for {forecast_period}:")
    print(df_scores.head())  # Check the top rows of the DataFrame
    top_etfs = df_scores.nlargest(2, 'Score')
    print(f"Top ETFs for {forecast_period}: {top_etfs['ETF'].tolist()}")
    return top_etfs['ETF'].tolist()

# Function to generate month ranges
def generate_month_ranges(start_date, end_date):
    start = datetime.strptime(start_date, '%Y-%m-%d')
    end = datetime.strptime(end_date, '%Y-%m-%d')
    month_ranges = []

    while start < end:
        # Start of the current month
        month_start = start
        # End of the current month
        next_month_start = (start + timedelta(days=32)).replace(day=1)
        month_end = next_month_start - timedelta(days=1)

        # Ensure the end date doesn't go beyond the specified end_date
        if month_end > end:
            month_end = end

        # Add the range to the list
        month_ranges.append((month_start.strftime('%Y-%m-%d'), month_end.strftime('%Y-%m-%d')))
        # Move to the next month's start
        start = next_month_start

    return month_ranges

# Function to gather ETF data for months
def gather_etf_data_for_months(tickers, month_ranges):
    etf_histories = {}
    for start_date, end_date in month_ranges:
        month = f"{start_date} to {end_date}"
        etf_histories[month] = {}
        for ticker in tickers:
            etf_data = yf.download(ticker, start=start_date, end=end_date)
            if etf_data.empty:
                print(f"No data found for {ticker} in {month}")
                continue
            etf_data.index = pd.to_datetime(etf_data.index)
            etf_histories[month][ticker] = etf_data
    return etf_histories

# Function to initialize shares for the first month
def initialize_shares_for_first_month(top_etfs_1m, etf_histories, month, investment_amount=50000):
    ticker_shares = {}
    first_trading_day_start = month.split(" to ")[0]

    for ticker in top_etfs_1m:
        etf_history = etf_histories.get(month, {}).get(ticker)

        if etf_history is not None:
            first_trading_day = pd.to_datetime(first_trading_day_start) + BDay(1)
            if first_trading_day not in etf_history.index:
                first_trading_day = etf_history.index[etf_history.index.searchsorted(first_trading_day)]

            price_on_first_trading_day = etf_history.loc[first_trading_day, 'Close']
            num_shares = (investment_amount * 0.975) / price_on_first_trading_day
            print(f"Shares 1st month: ({investment_amount} * 0.975) / {price_on_first_trading_day}")
            ticker_shares[ticker] = num_shares
            print(f"Ticker: {ticker}, First trading day: {first_trading_day.date()}, Price: {price_on_first_trading_day}, Shares: {num_shares:.2f}")
        else:
            print(f"No data found for {ticker} in {month}")
    return ticker_shares

# Function to manage ETF portfolio monthly
def manage_etf_portfolio_monthly(
    top_etfs_previous, top_etfs_current, previous_month, current_month, ticker_shares, gathered_data_per_month
):
    etf_histories_for_current_month = gathered_data_per_month.get(current_month, {})

    # Extract ETFs as lists to preserve order
    top2etfs_previous = list(top_etfs_previous)
    top2etfs_current = list(top_etfs_current)

    print(f"Top 2 ETFs for {previous_month}: {top2etfs_previous}")
    print(f"Top 2 ETFs for {current_month}: {top2etfs_current}")

    # Step 1: Identify common ETFs (present in both months)
    etfs_common = [etf for etf in top2etfs_previous if etf in top2etfs_current]

    # Step 2: Identify ETFs to sell (present in the previous month but not in the current month)
    etfs_to_sell = [etf for etf in top2etfs_previous if etf not in top2etfs_current]

    # Step 3: Identify ETFs to buy (present in the current month but not in the previous month)
    etfs_to_buy = [etf for etf in top2etfs_current if etf not in top2etfs_previous]

    # Step 4: Sell ETFs that are no longer in the current top ETFs
    total_selling_value = 0
    for etf in etfs_to_sell:
        no_of_shares = ticker_shares.get(etf, 0)
        if no_of_shares > 0:
            # Check if data for the ETF is available in the current month
            if etf in etf_histories_for_current_month:
                first_trading_day_current_month = etf_histories_for_current_month[etf].loc[etf_histories_for_current_month[etf].index[0], 'Close']
                
                selling_value = no_of_shares * first_trading_day_current_month * 0.975
                print("Formula: ",no_of_shares," * ", first_trading_day_current_month,"* 0.975")
                total_selling_value += selling_value  # Sum total selling value
                print(f"Sell {etf}: {no_of_shares:.2f} shares at {first_trading_day_current_month:.2f}. Total value: {selling_value:.2f}  \n")
                del ticker_shares[etf]
            else:
                print(f"Data for {etf} is missing for {current_month}. Skipping sale.")
        else:
            print(f"No shares found for {etf} to sell.")

    # Step 5: Buy new ETFs that were not in the previous month's top ETFs
    for etf in etfs_to_buy:
        if total_selling_value > 0 and etf in etf_histories_for_current_month:
            first_trading_day_new_etf = etf_histories_for_current_month[etf].loc[etf_histories_for_current_month[etf].index[0], 'Close']
            new_shares = (total_selling_value * 0.975) / first_trading_day_new_etf
            print("Formula: ()",total_selling_value," * 0.975)/", first_trading_day_current_month)
            
            print(f"Buy {etf}: {new_shares:.2f} shares at {first_trading_day_new_etf:.2f}. \n")
            ticker_shares[etf] = new_shares
        else:
            print(f"Data for {etf} is missing or no selling value available. Skipping purchase of {etf}.")

    # Step 6: Maintain the order of ETFs in the ticker_shares dictionary based on the current top 2 ETFs
    ordered_ticker_shares = {etf: ticker_shares[etf] for etf in top2etfs_current if etf in ticker_shares}

    print(f"Updated ticker shares after {current_month}: {ordered_ticker_shares}")
    print("")
    return ordered_ticker_shares



In [43]:
# Generate Month Ranges
month_ranges = generate_month_ranges('2024-01-01', '2024-12-01')

# Assuming `tickers` is a list of ETF tickers
tickers = tickers

# Gather ETF Data
etf_histories = gather_etf_data_for_months(tickers, month_ranges)

# Create a mapping between month numbers and date ranges
month_key_mapping = {f"{i+1}m": month_range for i, month_range in enumerate(etf_histories.keys())}

# Debug: Print the month key mapping
print("Month Key Mapping:", month_key_mapping)

# Align `top_etfs_monthly` Keys with `etf_histories`
aligned_top_etfs_monthly = {}

for month_key, df_scores in monthly_scores_dfs.items():
    # Call the `select_top_etfs_monthly` function here
    forecast_period = month_key_mapping.get(month_key, None)
    if forecast_period:
        aligned_top_etfs_monthly[forecast_period] = select_top_etfs_monthly(df_scores, forecast_period)
        


# Portfolio initialization and management
ticker_shares = {}
ticker_shares_per_month = {}

for i, month_range in enumerate(etf_histories.keys()):
    current_month_key = month_range
    if i == 0:
        # First month initialization
        ticker_shares = initialize_shares_for_first_month(
            aligned_top_etfs_monthly[current_month_key],
            etf_histories,
            current_month_key
        )
        ticker_shares_per_month[current_month_key] = ticker_shares.copy()
    else:
        # Subsequent months' portfolio management
        prev_month_key = list(etf_histories.keys())[i - 1]
        ticker_shares = manage_etf_portfolio_monthly(
            aligned_top_etfs_monthly[prev_month_key],
            aligned_top_etfs_monthly[current_month_key],
            prev_month_key,
            current_month_key,
            ticker_shares,
            etf_histories
        )
        ticker_shares_per_month[current_month_key] = ticker_shares.copy()

print("\nFinal Ticker Shares per Month:")
for month, shares in ticker_shares_per_month.items():
    print(f"Month {month}: {shares}")


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

Month Key Mapping: {'1m': '2024-01-01 to 2024-01-31', '2m': '2024-02-01 to 2024-02-29', '3m': '2024-03-01 to 2024-03-31', '4m': '2024-04-01 to 2024-04-30', '5m': '2024-05-01 to 2024-05-31', '6m': '2024-06-01 to 2024-06-30', '7m': '2024-07-01 to 2024-07-31', '8m': '2024-08-01 to 2024-08-31', '9m': '2024-09-01 to 2024-09-30', '10m': '2024-10-01 to 2024-10-31', '11m': '2024-11-01 to 2024-11-30'}
Processing scores for 2024-01-01 to 2024-01-31:
    ETF Month  RiskPercentage     Score
0   SMH    1m             0.1  1.656696
1  SOXX    1m             0.1 -0.728974
2   PSI    1m             0.1 -1.168920
3   XSD    1m             0.1 -2.815225
4   IYW    1m             0.1  0.700924
Top ETFs for 2024-01-01 to 2024-01-31: ['FTEC', 'SMH']
Processing scores for 2024-02-01 to 2024-02-29:
    ETF Month  RiskPercentage     Score
0   SMH    2m             0.1  1.919753
1  SOXX    2m             0.1  0.218460
2   PSI    2m             0.1  0.634205
3   XSD    2m             0.1 -1.781446
4   IYW    2m




In [44]:
# Define the first trading day of December 2024
first_trading_day_dec = '2024-12-01'  # Adjust this to match the actual start date of December

# Identify the November and December month date range keys
november_range = list(ticker_shares_per_month.keys())[-1]  # Last key corresponds to November
december_start = first_trading_day_dec  # Replace with the actual start of December
print(f"Using data for November: {november_range}")

print(f"Fetching data starting from the first trading day of December: {december_start}")

december_end = '2024-12-31'
# Initialize a dictionary to store the values of shares
etf_values_dec = {}

# Ensure November data exists
if november_range in ticker_shares_per_month:
    # Fetch ETF shares from November
    ticker_shares_nov = ticker_shares_per_month[november_range]
    
    # Fetch the first trading day price of December for each ETF
    for ticker, shares in ticker_shares_nov.items():
        print(f"Fetching data for ticker {ticker} starting from {december_start}...")
        # Download historical data for December
        data = yf.download(ticker, start=december_start, end=december_end)
        
        if not data.empty:
            # Get the closing price of the first trading day of December
            closing_price_dec = data['Close'].iloc[0]
            # Calculate the value of the shares
            total_value = shares * closing_price_dec
            etf_values_dec[ticker] = total_value
            print(f"{ticker}: {shares:.2f} shares at ${closing_price_dec:.2f} each, total value: ${total_value:.2f}")
        else:
            print(f"{ticker}: No data available for December's first trading day.")
else:
    print(f"No data available in ticker_shares_per_month for November: {november_range}")



[*********************100%%**********************]  1 of 1 completed


Using data for November: 2024-11-01 to 2024-11-30
Fetching data starting from the first trading day of December: 2024-12-01
Fetching data for ticker FTEC starting from 2024-12-01...
FTEC: 348.74 shares at $186.99 each, total value: $65210.41
Fetching data for ticker XSD starting from 2024-12-01...


[*********************100%%**********************]  1 of 1 completed

XSD: 286.35 shares at $252.68 each, total value: $72355.77





### Value at the end of 11th month

In [45]:
# Check if there are any values in the dictionary
if etf_values_dec:
    print("\nETF values on December's first trading day:")
    # Sum all the values (total portfolio value)
    total_value = sum(etf_values_dec.values())
    print(f"Total portfolio value: {total_value:.2f}")
    # Optionally display individual ETF values
    for ticker, value in etf_values_dec.items():
        print(f"{ticker}: {value:.2f}")
else:
    print("No values could be calculated for December's first trading day.")



ETF values on December's first trading day:
Total portfolio value: 137566.18
FTEC: 65210.41
XSD: 72355.77


### With Smoothing

In [46]:
def calculate_composite_score(
    forecasted_values, risk_percentage, rachev_ratio, sharpe_ratio, sortino_ratio, 
    volatility_clustering, mean_forecast, std_forecast, mean_rachev, std_rachev, 
    mean_sharpe, std_sharpe, mean_sortino, std_sortino, mean_volatility_clustering, 
    std_volatility_clustering
):
    epsilon = 1e-8  # To prevent division by zero
    forecasted_mean = np.mean(forecasted_values)

    forecasted_mean_normalized = (forecasted_mean - mean_forecast) / (std_forecast + epsilon)
    rachev_normalized = (rachev_ratio - mean_rachev) / (std_rachev + epsilon)
    sharpe_normalized = (sharpe_ratio - mean_sharpe) / (std_sharpe + epsilon)
    sortino_normalized = (sortino_ratio - mean_sortino) / (std_sortino + epsilon)
    volatility_clustering_normalized = (volatility_clustering - mean_volatility_clustering) / (std_volatility_clustering + epsilon)

    # Composite score calculation
    score = (
        forecasted_mean_normalized
        - (risk_percentage * rachev_normalized)
        + sharpe_normalized
        + sortino_normalized
        - volatility_clustering_normalized
    )

    return score

def smooth_scores(scores, alpha=0.2):
    """
    Smooth scores using Exponential Moving Average (EMA).
    """
    if not isinstance(scores, (list, np.ndarray)):
        raise ValueError("Scores must be a list or numpy array of numerical values.")
    
    scores = np.array(scores)
    smoothed_scores = np.zeros_like(scores)
    smoothed_scores[0] = scores[0]  # Initialize EMA
    
    for t in range(1, len(scores)):
        smoothed_scores[t] = alpha * scores[t] + (1 - alpha) * smoothed_scores[t - 1]
    
    return smoothed_scores.tolist()

def calculate_means_and_stds_monthly(etf_pred_dict, forecast_period):
    returns_list = [etf_pred_dict[etf][f'returns_{forecast_period}'] for etf in etf_pred_dict]
    mean_forecast = np.mean([np.mean(returns) for returns in returns_list])
    std_forecast = np.std([np.mean(returns) for returns in returns_list])

    rachev_ratios = np.array([etf_pred_dict[etf][f'rachev_ratio_{forecast_period}'] for etf in etf_pred_dict])
    mean_rachev = np.mean(rachev_ratios)
    std_rachev = np.std(rachev_ratios)

    sharpe_ratios = np.array([etf_pred_dict[etf][f'sharpe_ratio_{forecast_period}'] for etf in etf_pred_dict])
    mean_sharpe = np.mean(sharpe_ratios)
    std_sharpe = np.std(sharpe_ratios)

    sortino_ratios = np.array([etf_pred_dict[etf][f'sortino_ratio_{forecast_period}'] for etf in etf_pred_dict])
    mean_sortino = np.mean(sortino_ratios)
    std_sortino = np.std(sortino_ratios)

    volatility_clustering = np.array([etf_pred_dict[etf][f'volatility_clustering_{forecast_period}'] for etf in etf_pred_dict])
    mean_volatility_clustering = np.mean(volatility_clustering)
    std_volatility_clustering = np.std(volatility_clustering)

    return (
        mean_forecast, std_forecast, mean_rachev, std_rachev,
        mean_sharpe, std_sharpe, mean_sortino, std_sortino,
        mean_volatility_clustering, std_volatility_clustering
    )

def calculate_scores_for_etfs_monthly(etf_pred_dict, forecast_period, risk_percentage, alpha=0.2):
    (mean_forecast, std_forecast, mean_rachev, std_rachev, 
     mean_sharpe, std_sharpe, mean_sortino, std_sortino, 
     mean_volatility_clustering, std_volatility_clustering) = calculate_means_and_stds_monthly(etf_pred_dict, forecast_period)

    raw_scores = []
    for etf in etf_pred_dict:
        forecasted_values = etf_pred_dict[etf][f'returns_{forecast_period}']
        rachev_ratio = etf_pred_dict[etf][f'rachev_ratio_{forecast_period}']
        sharpe_ratio = etf_pred_dict[etf][f'sharpe_ratio_{forecast_period}']
        volatility_clustering = etf_pred_dict[etf][f'volatility_clustering_{forecast_period}']
        sortino_ratio = etf_pred_dict[etf][f'sortino_ratio_{forecast_period}']

        score = calculate_composite_score(
            forecasted_values, risk_percentage, rachev_ratio, sharpe_ratio, 
            sortino_ratio, volatility_clustering, mean_forecast, std_forecast, 
            mean_rachev, std_rachev, mean_sharpe, std_sharpe, mean_sortino, 
            std_sortino, mean_volatility_clustering, std_volatility_clustering
        )

        raw_scores.append(score)

    # Apply smoothing to the raw scores
    smoothed_scores = smooth_scores(raw_scores, alpha=alpha)

    results = []
    for idx, etf in enumerate(etf_pred_dict):
        results.append({
            'ETF': etf,
            'Month': forecast_period,
            'RiskPercentage': risk_percentage,
            'RawScore': raw_scores[idx],
            'SmoothedScore': smoothed_scores[idx]
        })

    return results

def main_monthly(tickers, etf_dict):
    etf_pred_dict = process_etf_data_monthly(tickers, etf_dict)
    risk_percentage = 0.10
    monthly_scores = {}

    for month in range(1, 13):
        month_key = f"{month}m"
        if any(f"returns_{month_key}" in etf_pred_dict[etf] for etf in etf_pred_dict):
            scores = calculate_scores_for_etfs_monthly(etf_pred_dict, month_key, risk_percentage)
            monthly_scores[month_key] = scores
        else:
            print(f"Skipping score calculation for {month_key}: No ETFs have data for this month.")

    return etf_pred_dict, monthly_scores

# Select top ETFs for monthly portfolio management
def select_top_etfs_monthly(df_scores, forecast_period):
    if df_scores.empty:
        print(f"No scores available for {forecast_period}. Skipping.")
        return []
    top_etfs = df_scores.nlargest(2, 'SmoothedScore')
    return top_etfs['ETF'].tolist()

etf_pred_dict, monthly_scores = main_monthly(tickers, etf_dict)


# Initialize an empty dictionary to hold DataFrames for each month
monthly_scores_dfs = {}

# Assuming monthly_scores is a dictionary with keys as month identifiers ('1m', '2m', ..., '12m') 
# and values as the respective scores
for month_key, scores in monthly_scores.items():
    # Convert scores for the current month into a DataFrame
    monthly_scores_dfs[month_key] = pd.DataFrame(scores)

    
# Create DataFrame variables dynamically for 12 months
for month in range(1, 13):
    month_key = f'{month}m'
    globals()[f'df_scores_{month_key}'] = monthly_scores_dfs.get(month_key, pd.DataFrame())


# Initialize a dictionary to hold the top 2 ETFs for each month
top_etfs_monthly = {}

# Loop through the monthly DataFrames and select the top 2 ETFs for each month
for month_key, df_scores in monthly_scores_dfs.items():
    # Select the top 2 ETFs based on the 'Score' column
    top_etfs_monthly[month_key] = df_scores.nlargest(2, 'SmoothedScore')
    
    # Print the results for the current month
    print(f"\nTop 2 ETFs for {month_key} forecast:")
    print(top_etfs_monthly[month_key])



Debug: Returns for SMH, Forecast Period 1m: [-0.02420223 -0.01118687 -0.00072344  0.01117988  0.03903612  0.0088854
  0.00711443  0.01020354  0.00163733  0.02274735 -0.00013798  0.03540212
  0.04134176  0.01086362  0.01104122  0.02458771  0.00646798 -0.0125126
  0.01835858 -0.00209159 -0.00807263]

Debug: Returns for SMH, Forecast Period 2m: [ 1.36961676e-02  2.56218169e-02  2.56857549e-02 -5.37493999e-03
  2.59382112e-02  2.17980307e-02  2.74153728e-02  1.07546086e-03
 -1.07480590e-02  2.59954525e-02  6.43691896e-04 -7.62985253e-05
 -1.30566447e-02 -1.81764904e-03  5.96523477e-02 -2.79890223e-04
  1.32975139e-02  1.86070132e-03 -4.58534163e-03  2.60741226e-02]

Debug: Returns for SMH, Forecast Period 3m: [ 0.04172052  0.02309381 -0.00884701  0.02987111  0.03776553 -0.02723932
 -0.01049677  0.03425536 -0.01225463 -0.01079653 -0.00204281  0.00800667
  0.00071561  0.02201177  0.02696527  0.01018372  0.00164934 -0.00313888
  0.00485789  0.00597236]

Debug: Returns for SMH, Forecast Perio

In [47]:
# Generate Month Ranges
month_ranges = generate_month_ranges('2024-01-01', '2024-12-01')

# Assuming `tickers` is a list of ETF tickers
tickers = tickers

# Gather ETF Data
etf_histories = gather_etf_data_for_months(tickers, month_ranges)

# Create a mapping between month numbers and date ranges
month_key_mapping = {f"{i+1}m": month_range for i, month_range in enumerate(etf_histories.keys())}

# Debug: Print the month key mapping
print("Month Key Mapping:", month_key_mapping)

# Align `top_etfs_monthly` Keys with `etf_histories`
aligned_top_etfs_monthly = {}

for month_key, df_scores in monthly_scores_dfs.items():
    # Call the `select_top_etfs_monthly` function here
    forecast_period = month_key_mapping.get(month_key, None)
    if forecast_period:
        aligned_top_etfs_monthly[forecast_period] = select_top_etfs_monthly(df_scores, forecast_period)

# Portfolio initialization and management
ticker_shares = {}
ticker_shares_per_month = {}

for i, month_range in enumerate(etf_histories.keys()):
    current_month_key = month_range
    if i == 0:
        # First month initialization
        ticker_shares = initialize_shares_for_first_month(
            aligned_top_etfs_monthly[current_month_key],
            etf_histories,
            current_month_key
        )
        ticker_shares_per_month[current_month_key] = ticker_shares.copy()
    else:
        # Subsequent months' portfolio management
        prev_month_key = list(etf_histories.keys())[i - 1]
        ticker_shares = manage_etf_portfolio_monthly(
            aligned_top_etfs_monthly[prev_month_key],
            aligned_top_etfs_monthly[current_month_key],
            prev_month_key,
            current_month_key,
            ticker_shares,
            etf_histories
        )
        ticker_shares_per_month[current_month_key] = ticker_shares.copy()

print("\nFinal Ticker Shares per Month:")
for month, shares in ticker_shares_per_month.items():
    print(f"Month {month}: {shares}")


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

Month Key Mapping: {'1m': '2024-01-01 to 2024-01-31', '2m': '2024-02-01 to 2024-02-29', '3m': '2024-03-01 to 2024-03-31', '4m': '2024-04-01 to 2024-04-30', '5m': '2024-05-01 to 2024-05-31', '6m': '2024-06-01 to 2024-06-30', '7m': '2024-07-01 to 2024-07-31', '8m': '2024-08-01 to 2024-08-31', '9m': '2024-09-01 to 2024-09-30', '10m': '2024-10-01 to 2024-10-31', '11m': '2024-11-01 to 2024-11-30'}
Shares 1st month: (50000 * 0.975) / 168.97000122070312
Ticker: SMH, First trading day: 2024-01-02, Price: 168.97000122070312, Shares: 288.51
Shares 1st month: (50000 * 0.975) / 185.11666870117188
Ticker: SOXX, First trading day: 2024-01-02, Price: 185.11666870117188, Shares: 263.35
Top 2 ETFs for 2024-01-01 to 2024-01-31: ['SMH', 'SOXX']
Top 2 ETFs for 2024-02-01 to 2024-02-29: ['SMH', 'SOXX']
Updated ticker shares after 2024-02-01 to 2024-02-29: {'SMH': 288.51275165894293, 'SOXX': 263.3474356579721}

Top 2 ETFs for 2024-02-01 to 2024-02-29: ['SMH', 'SOXX']
Top 2 ETFs for 2024-03-01 to 2024-03-31:




In [48]:
# Define the first trading day of December 2024
first_trading_day_dec = '2024-12-01'  # Adjust this to match the actual start date of December

# Identify the November and December month date range keys
november_range = list(ticker_shares_per_month.keys())[-1]  # Last key corresponds to November
december_start = first_trading_day_dec  # Replace with the actual start of December
print(f"Using data for November: {november_range}")

print(f"Fetching data starting from the first trading day of December: {december_start}")

december_end = '2024-12-31'
# Initialize a dictionary to store the values of shares
etf_values_dec = {}

# Ensure November data exists
if november_range in ticker_shares_per_month:
    # Fetch ETF shares from November
    ticker_shares_nov = ticker_shares_per_month[november_range]
    
    # Fetch the first trading day price of December for each ETF
    for ticker, shares in ticker_shares_nov.items():
        print(f"Fetching data for ticker {ticker} starting from {december_start}...")
        # Download historical data for December
        data = yf.download(ticker, start=december_start, end=december_end)
        
        if not data.empty:
            # Get the closing price of the first trading day of December
            closing_price_dec = data['Close'].iloc[0]
            # Calculate the value of the shares
            total_value = shares * closing_price_dec
            etf_values_dec[ticker] = total_value
            print(f"{ticker}: {shares:.2f} shares at ${closing_price_dec:.2f} each, total value: ${total_value:.2f}")
        else:
            print(f"{ticker}: No data available for December's first trading day.")
else:
    print(f"No data available in ticker_shares_per_month for November: {november_range}")



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Using data for November: 2024-11-01 to 2024-11-30
Fetching data starting from the first trading day of December: 2024-12-01
Fetching data for ticker SMH starting from 2024-12-01...
SMH: 288.51 shares at $247.87 each, total value: $71513.65
Fetching data for ticker FTEC starting from 2024-12-01...
FTEC: 316.73 shares at $186.99 each, total value: $59224.43





In [49]:
# Check if there are any values in the dictionary
if etf_values_dec:
    print("\nETF values on December's first trading day:")
    # Sum all the values (total portfolio value)
    total_value = sum(etf_values_dec.values())
    print(f"Total portfolio value: {total_value:.2f}")
    # Optionally display individual ETF values
    for ticker, value in etf_values_dec.items():
        print(f"{ticker}: {value:.2f}")
else:
    print("No values could be calculated for December's first trading day.")



ETF values on December's first trading day:
Total portfolio value: 130738.08
SMH: 71513.65
FTEC: 59224.43
