In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error, r2_score
from datetime import datetime, timedelta
from pandas.tseries.offsets import MonthEnd, BDay, Week


In [2]:
import matplotlib.pyplot as plt

import numpy as np
import matplotlib.pyplot as plt

def plot_predictions(data, train_predictions, test_predictions, scaler):
    """
    Plot actual data along with train and test predictions, using dates on the x-axis.

    Parameters:
    - data: The original DataFrame containing the actual target values.
    - train_predictions: Predicted values for the train dataset (scaled).
    - test_predictions: Predicted values for the test dataset (scaled).
    - scaler: Fitted scaler object used to inverse transform the scaled predictions.
    """
    # Ensure the index is used as the x-axis (datetime index)
    dates = data.index

    # Inverse transform the actual data to the original scale
    actual_data2 = data['Log_Return'].values.reshape(-1, 1)
    actual_data = scaler.inverse_transform(data['Log_Return'].values.reshape(-1, 1))

    # Prepare placeholders for plotting train and test predictions
    train_predict_plot = np.empty_like(actual_data)
    train_predict_plot[:] = np.nan
    train_predict_plot[:len(train_predictions)] = scaler.inverse_transform(train_predictions.reshape(-1, 1))

    test_predict_plot = np.empty_like(actual_data)
    test_predict_plot[:] = np.nan
    test_predict_plot[len(train_predictions):len(train_predictions) + len(test_predictions)] = scaler.inverse_transform(test_predictions.reshape(-1, 1))

    train_dates = data.index[:len(train_predictions)]
    test_dates = data.index[len(train_predictions):len(train_predictions) + len(test_predictions)]

    plt.figure(figsize=(12, 6))
    plt.plot(data.index, data['Log_Return'], label="Actual Data", color="blue")
    plt.plot(train_dates, scaler.inverse_transform(train_predictions.reshape(-1, 1)), label="Train Predictions", color="orange")
    plt.plot(test_dates, scaler.inverse_transform(test_predictions.reshape(-1, 1)), label="Test Predictions", color="green")
    plt.title("Train and Test Predictions")
    plt.xlabel("Date")
    plt.ylabel("Value (Original Scale)")
    plt.legend()
    plt.grid()
    plt.show()

def calculate_metrics(y_true, y_pred, scaler):
    """
    Calculates RMSE and MAE evaluation metrics before and after inverse transformation.
    
    Parameters:
    - y_true: True values.
    - y_pred: Predicted values.
    - scaler: The target scaler used for inverse transformation.
    
    Returns:
    - RMSE and MAE values before and after inverse transformation.
    """
    # Compute metrics before inverse transformation
    rmse_scaled = np.sqrt(mean_squared_error(y_true, y_pred))
    mae_scaled = mean_absolute_error(y_true, y_pred)
    
    print(f"Scaled RMSE: {rmse_scaled}, Scaled MAE: {mae_scaled}")
    
    # Compute metrics after inverse transformation
    y_true_original = scaler.inverse_transform(y_true.reshape(-1, 1)).flatten()
    y_pred_original = scaler.inverse_transform(y_pred.reshape(-1, 1)).flatten()
    
    rmse_original = np.sqrt(mean_squared_error(y_true_original, y_pred_original))
    mae_original = mean_absolute_error(y_true_original, y_pred_original)
    
    print(f"Original RMSE: {rmse_original}, Original MAE: {mae_original}")
    
    return rmse_original, mae_original

In [3]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from itertools import product

# Step 1: Split the data
def split_data(data, max_lag, sma_windows, train_start, train_end, test_start, test_end):
    """
    Splits the dataset into training and testing sets with lagged log returns and SMA features.
    
    Parameters:
    - data: DataFrame containing stock log returns.
    - max_lag: Maximum number of lagged days for log return features.
    - sma_windows: List of window sizes for SMA features (e.g., [10, 20]).
    - train_start, train_end: Date range for the training dataset.
    - test_start, test_end: Date range for the test dataset.

    Returns:
    - X_train, X_test, y_train, y_test: Scaled train-test feature sets and target values.
    """
    
    # Generate lagged log return features
    for lag in range(1, max_lag + 1):
        data[f'Lagged_Log_Return_{lag}'] = data['Log_Return'].shift(lag)

    # Generate SMA features
    for window in sma_windows:
        data[f'SMA_{window}'] = data['Log_Return'].rolling(window=window).mean()

    # Drop rows with NaN values due to lagging and SMA calculation
    data = data.dropna()

    # Split data into train and test sets based on provided dates
    train_data = data.loc[train_start:train_end]
    test_data = data.loc[test_start:test_end]

    # Extract features (X) and target (y)
    feature_columns = [f'Lagged_Log_Return_{lag}' for lag in range(1, max_lag + 1)] + \
                      [f'SMA_{window}' for window in sma_windows]
    
    X_train = train_data[feature_columns]
    y_train = train_data['Log_Return']
    X_test = test_data[feature_columns]
    y_test = test_data['Log_Return']

    return X_train, X_test, y_train, y_test


# Step 2: Scale the data

def scale_data(X_train, X_test, y_train, y_test):
    # Create scalers for features and target
    feature_scaler = StandardScaler()
    target_scaler = StandardScaler()
    
    # Scale features
    X_train_scaled = feature_scaler.fit_transform(X_train)
    X_test_scaled = feature_scaler.transform(X_test)
    
    # Scale target variable
    y_train_scaled = target_scaler.fit_transform(y_train.values.reshape(-1, 1))
    y_test_scaled = target_scaler.transform(y_test.values.reshape(-1, 1))
    
    return X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled, feature_scaler, target_scaler

# Step 3: Model training with hyperparameter tuning
def model_training(train_data_scaled, y_train):
    best_model = None
    best_score = float('-inf')
    best_params = None  # Store the best parameters

    hyperparameters = {
        'cov_type': ['HC0', 'HC1'],
        'use_t': [True, False],
        'method': ['pinv', 'qr']
    }

    for params in product(*hyperparameters.values()):
        try:
            model = sm.OLS(y_train, train_data_scaled)
            fitted_model = model.fit(cov_type=params[0], use_t=params[1], method=params[2])
            if fitted_model.rsquared_adj > best_score:
                best_model = fitted_model
                best_score = fitted_model.rsquared_adj
                best_params = params  # Save the best hyperparameters
        except Exception as e:
            print(f"Error with params {params}: {e}")

    return best_model, best_params


# Step 4: Main workflow


def train_final_model(data, max_lag, sma_windows,feature_scaler, target_scaler, best_params):
    
    X_full, _, y_full, _ = split_data(data, max_lag,sma_windows, "2000-01-01", "2023-12-31", None, None)
    X_full_scaled = feature_scaler.transform(X_full)
    y_full_scaled = target_scaler.transform(y_full.values.reshape(-1, 1))

    final_model = sm.OLS(y_full_scaled, X_full_scaled).fit(
        cov_type=best_params[0],
        use_t=best_params[1],
        method=best_params[2]
    )

    return final_model

In [4]:
def forecast_future(data, model, scaler, max_lag, sma_windows, start_date="2024-01-01", end_date="2024-12-31"):
    """Forecast future log returns using the trained model with lagged returns and SMA features."""
    if model is None:
        raise ValueError("Cannot forecast because the model is not trained.")

    # Get the last available features (Lagged Log Returns + SMA)
    lagged_features = data.iloc[-1, :][[f'Lagged_Log_Return_{i}' for i in range(1, max_lag + 1)]].values
    sma_features = data.iloc[-1, :][[f'SMA_{w}' for w in sma_windows]].values

    future_dates = pd.date_range(start=start_date, end=end_date, freq="B")  
    future_forecasts = pd.DataFrame(index=future_dates)
    future_forecasts['Forecasted_Log_Return'] = np.nan

    for date in future_forecasts.index:
        # Combine lagged features and SMA features
        input_features = np.hstack((lagged_features, sma_features))

        # Predict scaled forecasted return
        forecasted_scaled = model.predict(input_features.reshape(1, -1))[0]

        # Inverse transform to original scale
        forecasted_original = scaler.inverse_transform([[forecasted_scaled]])[0][0]
        future_forecasts.at[date, 'Forecasted_Log_Return'] = forecasted_original

        # Update lagged features (shift left and insert new forecast)
        lagged_features = np.roll(lagged_features, -1)
        lagged_features[-1] = forecasted_scaled  # Use predicted value for next forecast

        # Update SMA values dynamically (simulate moving average)
        new_sma_values = []
        for window in sma_windows:
            recent_returns = np.concatenate(([forecasted_original], data['Log_Return'].values[-(window - 1):]))  # Add new forecast
            new_sma_values.append(np.mean(recent_returns))  # Compute new SMA
        
        sma_features = np.array(new_sma_values)  # Update SMA features for next prediction

    return future_forecasts


In [6]:
def group_forecasts(forecast_df, year):
    """
    Groups the forecasted log returns month-wise and week-wise based on the given year.
    
    Parameters:
    - forecast_df: DataFrame with forecasted log returns indexed by date.
    - year: Year for which weeks should be grouped.
    
    Returns:
    - Dictionary with forecasted log returns per month and per week as lists.
    """
    forecast_df['Month'] = forecast_df.index.to_period('M')
    forecast_df['Week'] = forecast_df.index.to_period('W-SUN')  # Ensure weeks align with trading days
    
    grouped_month_forecast = {f"forecast_predictions_df_{i+1}m": values for i, (key, values) in enumerate(forecast_df.groupby('Month')['Forecasted_Log_Return'].apply(list).items())}
    grouped_week_forecast = {f"forecast_predictions_df_{i+1}w": values for i, (key, values) in enumerate(forecast_df[forecast_df.index.year == year].groupby(pd.Grouper(freq='W-FRI'))['Forecasted_Log_Return'].apply(list).items())}
    
    return grouped_month_forecast, grouped_week_forecast


In [44]:
# ðŸ”¹ Step 7: Main Function
def main(data):
    max_lag = 80
    train_start, train_end = "2000-01-01", "2014-12-31"
    test_start, test_end = "2015-01-01", "2024-01-01"
    sma_windows = [5,10,15]
    # Split Data for Evaluation
    X_train, X_test, y_train, y_test = split_data(data, max_lag,sma_windows, train_start, train_end, test_start, test_end)

    # Scale Data
    X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled, feature_scaler, target_scaler = scale_data(X_train, X_test, y_train, y_test)

    # Train Model on Train Set (`2000-2014`)
    best_model, best_params = model_training(X_train_scaled, y_train)

    # Generate Predictions
    train_predictions_scaled = best_model.predict(X_train_scaled)
    test_predictions_scaled = best_model.predict(X_test_scaled)

    
    # Evaluate Train-Test Performance
    train_rmse, train_mae = calculate_metrics(y_train_scaled, train_predictions_scaled, target_scaler)
    test_rmse, test_mae = calculate_metrics(y_test_scaled, test_predictions_scaled, target_scaler)

    print(f"Train RMSE: {train_rmse:.4f}, Train MAE: {train_mae:.4f}")
    print(f"Test RMSE: {test_rmse:.4f}, Test MAE: {test_mae:.4f}")
    # Step 6: Compute evaluation metrics
    
    

    #plot_predictions(data, y_train_scaled, y_test_scaled, target_scaler)
    
    # Train Final Model on Full Dataset (`2000-2023`)
    final_model = train_final_model(data, max_lag,sma_windows, feature_scaler, target_scaler, best_params)

    # Forecast Future (`2024`)
    future_predictions = forecast_future(data, final_model, target_scaler, max_lag, sma_windows,start_date="2024-01-01", end_date="2024-12-31" )

    print("Future Predictions:")
    print(future_predictions)

    return final_model, future_predictions

In [45]:
def run_workflow(tickers):
    """
    Runs the workflow for multiple stock tickers.
    """
    results = {}
    
    for ticker in tickers:
        print(f"Processing {ticker}...")
        
        # Step 1: Download data
        data = yf.download(ticker, start="2000-01-01", end="2023-12-31")
        
        # Step 2: Compute log returns
        data['Log_Return'] = np.log(data['Close'] / data['Close'].shift(1))
        
        # Step 3: Drop NaN values
        data = data.dropna()
        
        # Step 4: Ensure index is datetime
        data.index = pd.to_datetime(data.index)
        
        # Step 5: Run the main function
        final_model, future_predictions = main(data)
        
        # Step 6: Group forecasted returns month-wise and week-wise for 2024
        monthly_forecasts, weekly_forecasts = group_forecasts(future_predictions, year=2024)
        
        # Step 7: Store results in a dictionary
        results[ticker] = {
            #"final_model": final_model,
            #"future_predictions": future_predictions,
            "monthly_forecast_results": monthly_forecasts,
            "weekly_forecast_results": weekly_forecasts
        }
    
    return results

# List of tickers to process
tickers = ['SMH', 'SOXX', 'PSI', 'XSD', 'IYW', 'XLK', 'VGT', 'FTEC', 'IGM', 'IXN']
#tickers = ['SMH', 'SOXX', 'PSI','XSD', 'IYW', 'XLK', 'VGT',]

# Execute the workflow for multiple tickers
results = run_workflow(tickers)


Processing SMH...


[*********************100%%**********************]  1 of 1 completed
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[f'Lagged_Log_Return_{lag}'] = data['Log_Return'].shift(lag)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[f'Lagged_Log_Return_{lag}'] = data['Log_Return'].shift(lag)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[f

Scaled RMSE: 0.9769018816494484, Scaled MAE: 0.6794307916930196
Original RMSE: 0.022565040764965825, Original MAE: 0.01569388267083665
Scaled RMSE: 0.8027050531547877, Scaled MAE: 0.5783103019262543
Original RMSE: 0.01854134236705416, Original MAE: 0.013358143517680667
Train RMSE: 0.0226, Train MAE: 0.0157
Test RMSE: 0.0185, Test MAE: 0.0134
Future Predictions:
            Forecasted_Log_Return
2024-01-01              -0.000804
2024-01-02              -0.001394
2024-01-03              -0.000792
2024-01-04              -0.000505
2024-01-05              -0.000251
2024-01-08               0.000220
2024-01-09              -0.000603
2024-01-10              -0.000828
2024-01-11              -0.001061
2024-01-12              -0.001392
2024-01-15              -0.001091
2024-01-16              -0.001214
2024-01-17              -0.000688
2024-01-18              -0.000266
2024-01-19               0.000148
2024-01-22               0.000568
2024-01-23               0.000558
2024-01-24              

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[f'Lagged_Log_Return_{lag}'] = data['Log_Return'].shift(lag)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[f'SMA_{window}'] = data['Log_Return'].rolling(window=window).mean()
[*********************100%%**********************]  1 of 1 completed
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Scaled RMSE: 0.9793312547198789, Scaled MAE: 0.7092313695800971
Original RMSE: 0.020241548248666526, Original MAE: 0.014658922522522424
Scaled RMSE: 0.9176997776499474, Scaled MAE: 0.6572545301243051
Original RMSE: 0.01896770294787049, Original MAE: 0.013584626467344915
Train RMSE: 0.0202, Train MAE: 0.0147
Test RMSE: 0.0190, Test MAE: 0.0136
Future Predictions:
            Forecasted_Log_Return
2024-01-01              -0.000379
2024-01-02              -0.001012
2024-01-03              -0.000551
2024-01-04              -0.000280
2024-01-05              -0.000287
2024-01-08               0.000181
2024-01-09              -0.000969
2024-01-10              -0.001086
2024-01-11              -0.001212
2024-01-12              -0.001571
2024-01-15              -0.001144
2024-01-16              -0.001185
2024-01-17              -0.000953
2024-01-18              -0.000181
2024-01-19               0.000142
2024-01-22               0.000620
2024-01-23               0.000569
2024-01-24             

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[f'Lagged_Log_Return_{lag}'] = data['Log_Return'].shift(lag)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[f'SMA_{window}'] = data['Log_Return'].rolling(window=window).mean()
[*********************100%%**********************]  1 of 1 completed
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Scaled RMSE: 0.9817514181048784, Scaled MAE: 0.731242814153099
Original RMSE: 0.017915571153938634, Original MAE: 0.013344144379292008
Scaled RMSE: 1.0773334066889577, Scaled MAE: 0.7783536177729236
Original RMSE: 0.019659806900313784, Original MAE: 0.014203849737293366
Train RMSE: 0.0179, Train MAE: 0.0133
Test RMSE: 0.0197, Test MAE: 0.0142
Future Predictions:
            Forecasted_Log_Return
2024-01-01              -0.000227
2024-01-02              -0.000885
2024-01-03              -0.000579
2024-01-04              -0.000373
2024-01-05              -0.000344
2024-01-08               0.000223
2024-01-09              -0.000833
2024-01-10              -0.000958
2024-01-11              -0.001045
2024-01-12              -0.001439
2024-01-15              -0.000906
2024-01-16              -0.000769
2024-01-17              -0.000599
2024-01-18               0.000090
2024-01-19               0.000348
2024-01-22               0.000710
2024-01-23               0.000693
2024-01-24             

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[f'Lagged_Log_Return_{lag}'] = data['Log_Return'].shift(lag)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[f'SMA_{window}'] = data['Log_Return'].rolling(window=window).mean()
[*********************100%%**********************]  1 of 1 completed
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Scaled RMSE: 0.9807631242984145, Scaled MAE: 0.7234654961452751
Original RMSE: 0.018866818314827227, Original MAE: 0.013917215823732525
Scaled RMSE: 1.0265075738606702, Scaled MAE: 0.7497217944506709
Original RMSE: 0.019746798605093787, Original MAE: 0.014422304970617172
Train RMSE: 0.0189, Train MAE: 0.0139
Test RMSE: 0.0197, Test MAE: 0.0144
Future Predictions:
            Forecasted_Log_Return
2024-01-01              -0.000258
2024-01-02              -0.001004
2024-01-03              -0.000667
2024-01-04              -0.000403
2024-01-05              -0.000323
2024-01-08               0.000199
2024-01-09              -0.001066
2024-01-10              -0.001286
2024-01-11              -0.001279
2024-01-12              -0.001549
2024-01-15              -0.001025
2024-01-16              -0.000884
2024-01-17              -0.000873
2024-01-18              -0.000274
2024-01-19               0.000030
2024-01-22               0.000341
2024-01-23               0.000289
2024-01-24            

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[f'Lagged_Log_Return_{lag}'] = data['Log_Return'].shift(lag)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[f'SMA_{window}'] = data['Log_Return'].rolling(window=window).mean()
[*********************100%%**********************]  1 of 1 completed
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Scaled RMSE: 0.9818271525713537, Scaled MAE: 0.6608654280475535
Original RMSE: 0.017842862778125065, Original MAE: 0.01200998680529201
Scaled RMSE: 0.8318950687355998, Scaled MAE: 0.5736268569136611
Original RMSE: 0.015118129009137882, Original MAE: 0.010424589773212454
Train RMSE: 0.0178, Train MAE: 0.0120
Test RMSE: 0.0151, Test MAE: 0.0104
Future Predictions:
            Forecasted_Log_Return
2024-01-01              -0.000251
2024-01-02              -0.000523
2024-01-03              -0.000227
2024-01-04              -0.000209
2024-01-05              -0.000235
2024-01-08              -0.000091
2024-01-09              -0.000300
2024-01-10              -0.000398
2024-01-11              -0.000375
2024-01-12              -0.000398
2024-01-15              -0.000524
2024-01-16              -0.000655
2024-01-17              -0.000366
2024-01-18              -0.000399
2024-01-19              -0.000057
2024-01-22               0.000167
2024-01-23               0.000032
2024-01-24             

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[f'Lagged_Log_Return_{lag}'] = data['Log_Return'].shift(lag)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[f'SMA_{window}'] = data['Log_Return'].rolling(window=window).mean()
[*********************100%%**********************]  1 of 1 completed
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Scaled RMSE: 0.98311037742287, Scaled MAE: 0.6633901056094982
Original RMSE: 0.016604363226332096, Original MAE: 0.011204408505146833
Scaled RMSE: 0.8626543761809186, Scaled MAE: 0.5876789888407177
Original RMSE: 0.014569906828205234, Original MAE: 0.009925676318029115
Train RMSE: 0.0166, Train MAE: 0.0112
Test RMSE: 0.0146, Test MAE: 0.0099
Future Predictions:
            Forecasted_Log_Return
2024-01-01              -0.000223
2024-01-02              -0.000363
2024-01-03              -0.000198
2024-01-04              -0.000204
2024-01-05              -0.000201
2024-01-08              -0.000105
2024-01-09              -0.000375
2024-01-10              -0.000390
2024-01-11              -0.000479
2024-01-12              -0.000457
2024-01-15              -0.000605
2024-01-16              -0.000614
2024-01-17              -0.000390
2024-01-18              -0.000343
2024-01-19              -0.000048
2024-01-22               0.000129
2024-01-23              -0.000048
2024-01-24              

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[f'Lagged_Log_Return_{lag}'] = data['Log_Return'].shift(lag)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[f'SMA_{window}'] = data['Log_Return'].rolling(window=window).mean()
[*********************100%%**********************]  1 of 1 completed
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Scaled RMSE: 0.9866011042632021, Scaled MAE: 0.684264120838828
Original RMSE: 0.013219316427557366, Original MAE: 0.009168349695035096
Scaled RMSE: 1.0992001387525376, Scaled MAE: 0.7532964251440782
Original RMSE: 0.014728013569614163, Original MAE: 0.010093302921208542
Train RMSE: 0.0132, Train MAE: 0.0092
Test RMSE: 0.0147, Test MAE: 0.0101
Future Predictions:
            Forecasted_Log_Return
2024-01-01               0.000177
2024-01-02               0.000039
2024-01-03               0.000190
2024-01-04               0.000202
2024-01-05               0.000188
2024-01-08               0.000277
2024-01-09               0.000016
2024-01-10              -0.000034
2024-01-11              -0.000077
2024-01-12              -0.000053
2024-01-15              -0.000126
2024-01-16              -0.000081
2024-01-17               0.000084
2024-01-18               0.000103
2024-01-19               0.000323
2024-01-22               0.000413
2024-01-23               0.000289
2024-01-24             

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[f'Lagged_Log_Return_{lag}'] = data['Log_Return'].shift(lag)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[f'SMA_{window}'] = data['Log_Return'].rolling(window=window).mean()
[*********************100%%**********************]  1 of 1 completed
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Scaled RMSE: 0.9913756236940753, Scaled MAE: 0.7434109421216362
Original RMSE: 0.008549996439258573, Original MAE: 0.006411455714799048
Scaled RMSE: 1.7236332757612174, Scaled MAE: 1.1769731253182545
Original RMSE: 0.01486526198357854, Original MAE: 0.010150659134704977
Train RMSE: 0.0085, Train MAE: 0.0064
Test RMSE: 0.0149, Test MAE: 0.0102
Future Predictions:
            Forecasted_Log_Return
2024-01-01               0.000547
2024-01-02               0.000461
2024-01-03               0.000553
2024-01-04               0.000550
2024-01-05               0.000543
2024-01-08               0.000616
2024-01-09               0.000453
2024-01-10               0.000436
2024-01-11               0.000411
2024-01-12               0.000406
2024-01-15               0.000349
2024-01-16               0.000378
2024-01-17               0.000486
2024-01-18               0.000500
2024-01-19               0.000638
2024-01-22               0.000697
2024-01-23               0.000616
2024-01-24             

[*********************100%%**********************]  1 of 1 completed
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[f'Lagged_Log_Return_{lag}'] = data['Log_Return'].shift(lag)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[f'Lagged_Log_Return_{lag}'] = data['Log_Return'].shift(lag)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[f

Scaled RMSE: 0.9843333158082646, Scaled MAE: 0.6834114393233593
Original RMSE: 0.015421239198171588, Original MAE: 0.010706791192898244
Scaled RMSE: 0.9403022296499537, Scaled MAE: 0.647670810752355
Original RMSE: 0.014731418076710252, Original MAE: 0.010146854052262214
Train RMSE: 0.0154, Train MAE: 0.0107
Test RMSE: 0.0147, Test MAE: 0.0101
Future Predictions:
            Forecasted_Log_Return
2024-01-01              -0.000047
2024-01-02              -0.000250
2024-01-03              -0.000084
2024-01-04              -0.000063
2024-01-05              -0.000133
2024-01-08              -0.000002
2024-01-09              -0.000331
2024-01-10              -0.000336
2024-01-11              -0.000293
2024-01-12              -0.000255
2024-01-15              -0.000319
2024-01-16              -0.000353
2024-01-17              -0.000136
2024-01-18              -0.000053
2024-01-19               0.000218
2024-01-22               0.000436
2024-01-23               0.000322
2024-01-24             

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[f'Lagged_Log_Return_{lag}'] = data['Log_Return'].shift(lag)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[f'SMA_{window}'] = data['Log_Return'].rolling(window=window).mean()
[*********************100%%**********************]  1 of 1 completed
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Scaled RMSE: 0.9852129437579286, Scaled MAE: 0.6693278369256384
Original RMSE: 0.014568930902775988, Original MAE: 0.009897749587291357
Scaled RMSE: 0.9791958065031525, Scaled MAE: 0.6712369424857917
Original RMSE: 0.014479951908485705, Original MAE: 0.009925980668874483
Train RMSE: 0.0146, Train MAE: 0.0099
Test RMSE: 0.0145, Test MAE: 0.0099
Future Predictions:
            Forecasted_Log_Return
2024-01-01               0.000062
2024-01-02              -0.000089
2024-01-03               0.000145
2024-01-04               0.000164
2024-01-05               0.000163
2024-01-08               0.000275
2024-01-09               0.000010
2024-01-10              -0.000049
2024-01-11              -0.000124
2024-01-12              -0.000134
2024-01-15              -0.000239
2024-01-16              -0.000248
2024-01-17              -0.000059
2024-01-18              -0.000017
2024-01-19               0.000252
2024-01-22               0.000388
2024-01-23               0.000266
2024-01-24            

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[f'Lagged_Log_Return_{lag}'] = data['Log_Return'].shift(lag)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[f'SMA_{window}'] = data['Log_Return'].rolling(window=window).mean()


In [46]:
#print("Results Dictionary:")
#for key, value in results.items():
#    print(f"{key}: {value}")

In [47]:
def calculate_sharpe_ratio(returns, annual_risk_free_rate=0.1,period='daily'):
    #excess_returns = rate_of_return(returns) - risk_free_rate
    
    # Convert annual risk-free rate to daily rate
    daily_risk_free_rate = (1 + annual_risk_free_rate) ** (1/252) - 1
    
    # Calculate mean daily log return
    mean_return = np.mean(returns)
    
    # Calculate excess daily log return
    excess_return = mean_return - daily_risk_free_rate
    
    # Calculate standard deviation of daily log returns
    std_return = np.std(returns)
    
    # Print diagnostic information
    #print(f"Mean Daily Log Return: {mean_return}")
    #print(f"Excess Daily Log Return: {excess_return}")
    #print(f"Standard Deviation of Daily Log Returns: {std_return}")
    
    # Check for zero standard deviation to avoid division by zero
    if std_return == 0:
        return 0
    
    # Calculate Sharpe ratio
    sharpe_ratio = (excess_return / std_return) * np.sqrt(252)  # Annualize the Sharpe ratio
    return sharpe_ratio
    #return excess_returns / np.std(returns)


def calculate_rachev_ratio(returns, lower_percentile=5, upper_percentile=95):
    # Step 1: Sort the returns
    sorted_returns = np.sort(returns)
    
    # Step 2: Determine the percentiles
    lower_threshold = np.percentile(sorted_returns, lower_percentile)
    upper_threshold = np.percentile(sorted_returns, upper_percentile)
    
    # Step 3: Calculate Expected Shortfall (ES)
    es = np.mean(sorted_returns[sorted_returns <= lower_threshold])
    
    # Step 4: Calculate Expected Gain (EG)
    eg = np.mean(sorted_returns[sorted_returns >= upper_threshold])
    
    # Step 5: Compute the Rachev Ratio
    rachev_ratio = eg / -es
    return rachev_ratio


"""def calculate_volatility_clustering(returns):
    squared_returns = returns ** 2
    n = len(squared_returns)
    
    # Mean of squared returns
    mean_squared_returns = np.mean(squared_returns)
    
    # Calculate the numerator and denominator for autocorrelation at lag 1
    numerator = np.sum((squared_returns[:-1] - mean_squared_returns) * (squared_returns[1:] - mean_squared_returns))
    denominator = np.sum((squared_returns - mean_squared_returns) ** 2)
    
    if denominator == 0:
        return 0  # Avoid division by zero
    
    rho_1 = numerator / denominator
    return rho_1"""
def calculate_volatility_clustering(returns):
    # Ensure returns is a NumPy array
    returns = np.array(returns)
    squared_returns = returns ** 2
    n = len(squared_returns)

    # Mean of squared returns
    mean_squared = np.mean(squared_returns)
    clustering = np.sum((squared_returns - mean_squared) ** 2) / (n - 1 if n > 1 else 1)
    return clustering

def calculate_sortino_ratio(log_returns, target_log_return=0.0):
    """
    Calculate the Sortino Ratio using log returns.
    
    Parameters:
    - log_returns (array-like): Array or list of log returns for the period.
    - target_log_return (float): The target log return. Default is 0, which is often used as a benchmark.
    
    Returns:
    - float: The Sortino Ratio.
    """
    # Convert input to a NumPy array for easier calculations
    log_returns = np.array(log_returns)
    
    # Calculate the average period log return (R)
    avg_log_return = np.mean(log_returns)
    
    # Calculate the Target Downside Deviation (TDD)
    downside_deviation = np.sqrt(
        np.mean(np.square(np.maximum(0, target_log_return - log_returns)))
    )
    
    # Calculate Sortino Ratio
    #sortino_ratio_value = (avg_log_return - target_log_return) / downside_deviation
    epsilon = 1e-8
    
    # Add epsilon to downside_deviation to prevent division by zero
    #if downside_deviation == 0:
     #   return np.nan  # Return NaN if downside deviation is zero
    
    # Calculate Sortino Ratio
    sortino_ratio_value = (avg_log_return - target_log_return) / (downside_deviation + epsilon)
    
    return sortino_ratio_value


def compute_risk_metrics(results):
    risk_metrics_results = {}
    for ticker, data in results.items():
        risk_metrics_results[ticker] = {}
        for month in range(1, 13):
            future_predictions = data['monthly_forecast_results'][f'forecast_predictions_df_{month}m']
            
            sharpe = calculate_sharpe_ratio(future_predictions)
            rachev = calculate_rachev_ratio(future_predictions)
            volatility_clustering = calculate_volatility_clustering(future_predictions)
            sortino = calculate_sortino_ratio(future_predictions)
            
            risk_metrics_results[ticker][f'risk_metrics_{month}m'] = {
                'Sharpe Ratio': sharpe,
                'Rachev Ratio': rachev,
                'Volatility Clustering': volatility_clustering,
                'Sortino Ratio': sortino
            }
    return risk_metrics_results


In [48]:
def calculate_composite_score(
    forecasted_values, risk_percentage, rachev_ratio, sharpe_ratio, sortino_ratio, volatility_clustering, 
    mean_forecast, std_forecast, mean_rachev, std_rachev, mean_sharpe, std_sharpe, mean_sortino, std_sortino, 
    mean_volatility_clustering, std_volatility_clustering
):
    epsilon = 1e-8  # To prevent division by zero
    forecasted_mean = np.mean(forecasted_values)

    # Normalize the components with epsilon
    forecasted_mean_normalized = (forecasted_mean - mean_forecast) / (std_forecast + epsilon)
    rachev_normalized = (rachev_ratio - mean_rachev) / (std_rachev + epsilon)
    sharpe_normalized = (sharpe_ratio - mean_sharpe) / (std_sharpe + epsilon)
    sortino_normalized = (sortino_ratio - mean_sortino) / (std_sortino + epsilon)
    volatility_clustering_normalized = (volatility_clustering - mean_volatility_clustering) / (std_volatility_clustering + epsilon)

    # Composite score calculation
    score = (
        forecasted_mean_normalized
        - (risk_percentage * rachev_normalized)
        + sharpe_normalized
        + sortino_normalized
        - volatility_clustering_normalized
    )

    return score

def compute_composite_scores(results, risk_metrics_results, risk_percentage):
    composite_scores = {}
    
    for month in range(1, 13):
        forecasted_values = {
            ticker: data['monthly_forecast_results'][f'forecast_predictions_df_{month}m']
            for ticker, data in results.items()
        }
        
        mean_forecast = np.mean([np.mean(values) for values in forecasted_values.values()])
        std_forecast = np.std([np.mean(values) for values in forecasted_values.values()])
        
        for ticker, metrics in risk_metrics_results.items():
            risk_metrics = metrics[f'risk_metrics_{month}m']
            composite_scores.setdefault(ticker, {})[f'composite_score_{month}m'] = calculate_composite_score(
                forecasted_values[ticker], risk_percentage, risk_metrics['Rachev Ratio'], risk_metrics['Sharpe Ratio'],
                risk_metrics['Sortino Ratio'], risk_metrics['Volatility Clustering'], mean_forecast, std_forecast,
                np.mean([m[f'risk_metrics_{month}m']['Rachev Ratio'] for m in risk_metrics_results.values()]),
                np.std([m[f'risk_metrics_{month}m']['Rachev Ratio'] for m in risk_metrics_results.values()]),
                np.mean([m[f'risk_metrics_{month}m']['Sharpe Ratio'] for m in risk_metrics_results.values()]),
                np.std([m[f'risk_metrics_{month}m']['Sharpe Ratio'] for m in risk_metrics_results.values()]),
                np.mean([m[f'risk_metrics_{month}m']['Sortino Ratio'] for m in risk_metrics_results.values()]),
                np.std([m[f'risk_metrics_{month}m']['Sortino Ratio'] for m in risk_metrics_results.values()]),
                np.mean([m[f'risk_metrics_{month}m']['Volatility Clustering'] for m in risk_metrics_results.values()]),
                np.std([m[f'risk_metrics_{month}m']['Volatility Clustering'] for m in risk_metrics_results.values()])
            )
    
    return composite_scores


In [49]:
#tickers = ['SMH', 'SOXX', 'PSI']
#results = run_workflow(tickers)
#risk_metrics_results = compute_risk_metrics(results)
#composite_scores = compute_composite_scores(results, risk_metrics_results, risk_percentage=0.1)
#composite_scores

In [50]:
#portfolios

In [51]:
def select_top_etfs_monthly(df, month_label, return_scores=False):
    df_sorted = df.sort_values(by='Score', ascending=False).head(2)
    if return_scores:
        return list(df_sorted.itertuples(index=False, name=None))  # Returns tuples (ETF, Score)
    return list(df_sorted['ETF'])  # Returns only ETF names

def generate_month_ranges(start_date, end_date):
    start = datetime.strptime(start_date, '%Y-%m-%d')
    end = datetime.strptime(end_date, '%Y-%m-%d')
    month_ranges = []
    while start < end:
        month_start = start
        month_end = (start + timedelta(days=31)).replace(day=1) - timedelta(days=1)
        if month_end > end:
            month_end = end
        month_ranges.append((month_start.strftime('%Y-%m-%d'), month_end.strftime('%Y-%m-%d')))
        start = month_end + timedelta(days=1)
    return month_ranges

def gather_etf_data(tickers, month_ranges):
    etf_histories = {}
    for start_date, end_date in month_ranges:
        month = f"{start_date} to {end_date}"
        etf_histories[month] = {}
        for ticker in tickers:
            etf_data = yf.download(ticker, start=start_date, end=end_date, progress=False)
            if etf_data.empty:
                print(f"No data found for {ticker} in {month}")
                continue
            etf_data.index = pd.to_datetime(etf_data.index)
            etf_histories[month][ticker] = etf_data
    return etf_histories

def initialize_shares(top_etfs, etf_histories, month, investment_amount=50000):
    ticker_shares = {}
    first_trading_day_start = month.split(" to ")[0]
    for ticker in top_etfs:
        etf_history = etf_histories.get(month, {}).get(ticker)
        if etf_history is not None:
            first_trading_day = pd.to_datetime(first_trading_day_start) + BDay(1)
            if first_trading_day not in etf_history.index:
                first_trading_day = etf_history.index[etf_history.index.searchsorted(first_trading_day)]
            price = etf_history.loc[first_trading_day, 'Close']
            num_shares = (investment_amount * 0.9975) / price
            print(f"Buy {ticker}: {num_shares:.2f} shares at {price:.2f}.")
            ticker_shares[ticker] = num_shares
        else:
            print(f"No data found for {ticker} in {month}")
    return ticker_shares

def manage_portfolio(prev_top_etfs, curr_top_etfs, prev_month, curr_month, ticker_shares, etf_histories):
    print(f"Top 2 ETFs for {prev_month}: {prev_top_etfs}")
    print(f"Top 2 ETFs for {curr_month}: {curr_top_etfs}")
    etf_histories_curr = etf_histories.get(curr_month, {})
    common_etfs = set(prev_top_etfs) & set(curr_top_etfs)
    etfs_to_sell = set(prev_top_etfs) - common_etfs
    etfs_to_buy = set(curr_top_etfs) - common_etfs
    selling_values = {}
    for etf in etfs_to_sell:
        shares = ticker_shares.pop(etf, 0)
        if shares > 0 and etf in etf_histories_curr:
            price = etf_histories_curr[etf]['Close'].iloc[0]
            selling_values[etf] = shares * price * 0.9975
            print(f"Sell {etf}: {shares:.2f} shares at {price:.2f}. Total value: {selling_values[etf]:.2f}")
    for etf_to_buy, etf_to_sell in zip(etfs_to_buy, etfs_to_sell):
        if etf_to_buy in etf_histories_curr and etf_to_sell in selling_values:
            price = etf_histories_curr[etf_to_buy]['Close'].iloc[0]
            ticker_shares[etf_to_buy] = (selling_values[etf_to_sell] * 0.9975) / price
            print(f"Buy {etf_to_buy}: {ticker_shares[etf_to_buy]:.2f} shares at {price:.2f}.")
    print(f"Updated ticker shares after {curr_month}: {ticker_shares}")
    return {etf: ticker_shares[etf] for etf in curr_top_etfs if etf in ticker_shares}

'''def portfolio(results, risk_percentage=0.2):
    risk_metrics_results = compute_risk_metrics(results)
    composite_scores = compute_composite_scores(results, risk_metrics_results, risk_percentage)
    portfolios = {
        str(month): select_top_etfs_monthly(
            pd.DataFrame({'ETF': list(composite_scores.keys()), 'Score': [composite_scores[t][f'composite_score_{month}m'] for t in composite_scores]}),
            f'{month}m'
        ) for month in range(1, 13)
    }
    return portfolios'''

'''def portfolio(results, risk_percentage=0.1, return_scores=False):
    risk_metrics_results = compute_risk_metrics(results)
    composite_scores = compute_composite_scores(results, risk_metrics_results, risk_percentage)
    
    portfolios = {
        str(month): select_top_etfs_monthly(
            pd.DataFrame({'ETF': list(composite_scores.keys()), 'Score': [composite_scores[t][f'composite_score_{month}m'] for t in composite_scores]}),
            f'{month}m',
            return_scores=return_scores
        ) for month in range(1, 13)
    }
    return portfolios
'''



"def portfolio(results, risk_percentage=0.1, return_scores=False):\n    risk_metrics_results = compute_risk_metrics(results)\n    composite_scores = compute_composite_scores(results, risk_metrics_results, risk_percentage)\n    \n    portfolios = {\n        str(month): select_top_etfs_monthly(\n            pd.DataFrame({'ETF': list(composite_scores.keys()), 'Score': [composite_scores[t][f'composite_score_{month}m'] for t in composite_scores]}),\n            f'{month}m',\n            return_scores=return_scores\n        ) for month in range(1, 13)\n    }\n    return portfolios\n"

"\ndef portfolio_weekly(results, risk_percentage=0.1, return_scores=False):\n    # Compute risk metrics (already updated to weekly in your prior code)\n    risk_metrics_results = compute_risk_metrics(results)  \n    composite_scores = compute_composite_scores(results, risk_metrics_results, risk_percentage)\n    \n    # For weeks, we typically do range(1, 53) to represent ~1 year of weeks\n    portfolios = {\n        str(week): select_top_etfs_weekly(\n            pd.DataFrame({\n                'ETF': list(composite_scores.keys()), \n                'Score': [composite_scores[t][f'composite_score_{week}w'] for t in composite_scores]\n            }),\n            f'{week}w',\n            return_scores=return_scores\n        )\n        for week in range(1, 53)\n    }\n    return portfolios\n"

In [52]:

def calculate_smoothing(scores_dict, alpha=0.1):
    smoothed_scores = {}
    previous_values = {}
    sorted_months = sorted(scores_dict.keys(), key=lambda m: int(m))
    for month in sorted_months:
        smoothed_scores[month] = {}
        for etf, raw_score in scores_dict[month].items():
            if etf not in previous_values:
                smoothed_value = float(raw_score)
            else:
                smoothed_value = alpha * float(raw_score) + (1 - alpha) * previous_values[etf]
            smoothed_scores[month][etf] = smoothed_value
            previous_values[etf] = smoothed_value
    return smoothed_scores

def portfolio(results, risk_percentage=0.2, return_scores=False):
    risk_metrics_results = compute_risk_metrics(results)
    composite_scores = compute_composite_scores(results, risk_metrics_results, risk_percentage)
    
    # Store all ETF scores per month instead of only top 2 ETFs
    all_scores_per_month = {
        str(month): {
            etf: composite_scores[etf][f'composite_score_{month}m']
            for etf in composite_scores
        }
        for month in range(1, 13)
    }
    
    return all_scores_per_month

def portfolio_optimization(results, risk_percentage=0.2, smoothing=False):
    all_scores = portfolio(results, risk_percentage, return_scores=True)
    print("\n--- Raw Scores Before Smoothing ---")
    for month, scores in sorted(all_scores.items(), key=lambda x: int(x[0])):
        print(f"Month {month}: {scores}")
    
    if smoothing:
        smoothed_scores = calculate_smoothing(all_scores)
        print("\n--- Smoothed Scores After Smoothing ---")
        for month, scores in sorted(smoothed_scores.items(), key=lambda x: int(x[0])):
            print(f"Month {month}: {scores}")
        
        # Use smoothed scores to select top 2 ETFs per month
        portfolios = {
            month: select_top_etfs_monthly(
                pd.DataFrame({'ETF': list(scores.keys()), 'Score': list(scores.values())}),
                f'{month}m',
                return_scores=False
            )
            for month, scores in smoothed_scores.items()
        }
    else:
        # Select top 2 ETFs based on raw scores if no smoothing
        portfolios = {
            month: select_top_etfs_monthly(
                pd.DataFrame({'ETF': list(scores.keys()), 'Score': list(scores.values())}),
                f'{month}m',
                return_scores=False
            )
            for month, scores in all_scores.items()
        }
    
    print("\n--- Final Portfolio Selection ---")
    for month, etfs in sorted(portfolios.items(), key=lambda x: int(x[0])):
        print(f"Month {month}: {etfs}")
    
    month_ranges = generate_month_ranges('2024-01-01', '2025-01-01')
    etf_histories = gather_etf_data(results.keys(), month_ranges)
    ticker_shares = {}
    ticker_shares_per_month = {}
    
    for i, month_range in enumerate(etf_histories.keys()):
        month_index = str(i + 1)
        top_etfs = portfolios.get(month_index, [])
        if not top_etfs:
            print(f"Warning: No ETFs selected for {month_range}. Skipping trading.")
            continue
        if i == 0:
            ticker_shares = initialize_shares(top_etfs, etf_histories, month_range)
        else:
            prev_month_index = str(i)
            prev_top_etfs = portfolios.get(prev_month_index, [])
            ticker_shares = manage_portfolio(
                prev_top_etfs,
                top_etfs,
                list(etf_histories.keys())[i - 1],
                month_range,
                ticker_shares,
                etf_histories
            )
        ticker_shares_per_month[month_range] = ticker_shares.copy()
    
    first_trading_day_13m = '2025-01-01'
    month_12_range = list(ticker_shares_per_month.keys())[-1]
    month_13_start = first_trading_day_13m
    month_13_end = '2025-01-06'
    etf_values_13m = {}
    
    if month_12_range in ticker_shares_per_month:
        ticker_shares_12m = ticker_shares_per_month[month_12_range]
        for ticker, shares in ticker_shares_12m.items():
            data = yf.download(ticker, start=month_13_start, end=month_13_end)
            if not data.empty:
                closing_price_13m = data['Close'].iloc[0]
                total_value = shares * closing_price_13m
                etf_values_13m[ticker] = total_value
    
    total_portfolio_value = sum(etf_values_13m.values()) if etf_values_13m else 0
    return ticker_shares_per_month, total_portfolio_value


In [53]:

portfolios, final_value = portfolio_optimization(results, smoothing=True)
print(f"Final Portfolio Value: {final_value:.2f}")


--- Raw Scores Before Smoothing ---
Month 1: {'SMH': -1.2809366991818403, 'SOXX': -0.8292789100110324, 'PSI': -0.24129163769531675, 'XSD': -1.4224364068337079, 'IYW': -1.9899310860776078, 'XLK': -2.100550677247565, 'VGT': 0.05764015918106233, 'FTEC': 8.039413707762796, 'IGM': -0.31424384320242676, 'IXN': 0.08161539330563669}
Month 2: {'SMH': -1.8528768945898266, 'SOXX': -0.662240343377156, 'PSI': 1.022165163673279, 'XSD': 0.33276579713762716, 'IYW': -2.2003510329943974, 'XLK': -3.1269522383236557, 'VGT': 0.1572061353388222, 'FTEC': 7.799478072871466, 'IGM': -0.44327288735288756, 'IXN': -1.0259217723832708}
Month 3: {'SMH': -1.4174628739067237, 'SOXX': 0.025508494948031295, 'PSI': 0.8789392283716239, 'XSD': 1.6056858450256355, 'IYW': -2.8232647228599292, 'XLK': -3.0432089378753253, 'VGT': -0.02149823697725658, 'FTEC': 6.958894171964336, 'IGM': -0.9788403072469474, 'IXN': -1.1847526614434432}
Month 4: {'SMH': 2.339035131962466, 'SOXX': 1.8842119149606702, 'PSI': 2.434369363436558, 'XSD'

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Buy FTEC: 356.79 shares at 139.79.
Buy IXN: 752.15 shares at 66.31.
Top 2 ETFs for 2024-01-01 to 2024-01-31: ['FTEC', 'IXN']
Top 2 ETFs for 2024-02-01 to 2024-02-29: ['FTEC', 'VGT']
Sell IXN: 752.15 shares at 70.79. Total value: 53111.52
Buy VGT: 105.90 shares at 500.28.
Updated ticker shares after 2024-02-01 to 2024-02-29: {'FTEC': 356.7851949024137, 'VGT': 105.89817684810276}
Top 2 ETFs for 2024-02-01 to 2024-02-29: ['FTEC', 'VGT']
Top 2 ETFs for 2024-03-01 to 2024-03-31: ['FTEC', 'VGT']
Updated ticker shares after 2024-03-01 to 2024-03-31: {'FTEC': 356.7851949024137, 'VGT': 105.89817684810276}
Top 2 ETFs for 2024-03-01 to 2024-03-31: ['FTEC', 'VGT']
Top 2 ETFs for 2024-04-01 to 2024-04-30: ['FTEC', 'PSI']
Sell VGT: 105.90 shares at 525.42. Total value: 55501.92
Buy PSI: 971.11 shares at 57.01.
Updated ticker shares after 2024-04-01 to 2024-04-30: {'FTEC': 356.7851949024137, 'PSI': 971.1131830335651}
Top 2 ETFs for 2024-04-01 to 2024-04-30: ['FTEC', 'PSI']
Top 2 ETFs for 2024-05-01 t




In [54]:

portfolios, final_value = portfolio_optimization(results, smoothing=False)
print(f"Final Portfolio Value: {final_value:.2f}")


--- Raw Scores Before Smoothing ---
Month 1: {'SMH': -1.2809366991818403, 'SOXX': -0.8292789100110324, 'PSI': -0.24129163769531675, 'XSD': -1.4224364068337079, 'IYW': -1.9899310860776078, 'XLK': -2.100550677247565, 'VGT': 0.05764015918106233, 'FTEC': 8.039413707762796, 'IGM': -0.31424384320242676, 'IXN': 0.08161539330563669}
Month 2: {'SMH': -1.8528768945898266, 'SOXX': -0.662240343377156, 'PSI': 1.022165163673279, 'XSD': 0.33276579713762716, 'IYW': -2.2003510329943974, 'XLK': -3.1269522383236557, 'VGT': 0.1572061353388222, 'FTEC': 7.799478072871466, 'IGM': -0.44327288735288756, 'IXN': -1.0259217723832708}
Month 3: {'SMH': -1.4174628739067237, 'SOXX': 0.025508494948031295, 'PSI': 0.8789392283716239, 'XSD': 1.6056858450256355, 'IYW': -2.8232647228599292, 'XLK': -3.0432089378753253, 'VGT': -0.02149823697725658, 'FTEC': 6.958894171964336, 'IGM': -0.9788403072469474, 'IXN': -1.1847526614434432}
Month 4: {'SMH': 2.339035131962466, 'SOXX': 1.8842119149606702, 'PSI': 2.434369363436558, 'XSD'

[*********************100%%**********************]  1 of 1 completed

Buy FTEC: 356.79 shares at 139.79.
Buy IXN: 752.15 shares at 66.31.
Top 2 ETFs for 2024-01-01 to 2024-01-31: ['FTEC', 'IXN']
Top 2 ETFs for 2024-02-01 to 2024-02-29: ['FTEC', 'PSI']
Sell IXN: 752.15 shares at 70.79. Total value: 53111.52
Buy PSI: 1071.36 shares at 49.45.
Updated ticker shares after 2024-02-01 to 2024-02-29: {'FTEC': 356.7851949024137, 'PSI': 1071.3597364391508}
Top 2 ETFs for 2024-02-01 to 2024-02-29: ['FTEC', 'PSI']
Top 2 ETFs for 2024-03-01 to 2024-03-31: ['FTEC', 'XSD']
Sell PSI: 1071.36 shares at 56.60. Total value: 60487.36
Buy XSD: 260.01 shares at 232.05.
Updated ticker shares after 2024-03-01 to 2024-03-31: {'FTEC': 356.7851949024137, 'XSD': 260.01354385024973}
Top 2 ETFs for 2024-03-01 to 2024-03-31: ['FTEC', 'XSD']
Top 2 ETFs for 2024-04-01 to 2024-04-30: ['XSD', 'PSI']
Sell FTEC: 356.79 shares at 155.92. Total value: 55490.87
Buy PSI: 970.92 shares at 57.01.
Updated ticker shares after 2024-04-01 to 2024-04-30: {'XSD': 260.01354385024973, 'PSI': 970.91995295


[*********************100%%**********************]  1 of 1 completed

Final Portfolio Value: 131115.47





In [68]:
import numpy as np

def compute_risk_metrics(results):
    risk_metrics_results = {}
    for ticker, data in results.items():
        risk_metrics_results[ticker] = {}
        # Change range(1, 13) to range(1, 53) and month -> week
        for week in range(1, 54):
            # Change 'monthly_forecast_results' to 'weekly_forecast_results' 
            # and '{month}m' to '{week}w'
            future_predictions = data['weekly_forecast_results'][f'forecast_predictions_df_{week}w']
            
            sharpe = calculate_sharpe_ratio(future_predictions)
            rachev = calculate_rachev_ratio(future_predictions)
            volatility_clustering = calculate_volatility_clustering(future_predictions)
            sortino = calculate_sortino_ratio(future_predictions)
            
            # Rename 'risk_metrics_{month}m' to 'risk_metrics_{week}w'
            risk_metrics_results[ticker][f'risk_metrics_{week}w'] = {
                'Sharpe Ratio': sharpe,
                'Rachev Ratio': rachev,
                'Volatility Clustering': volatility_clustering,
                'Sortino Ratio': sortino
            }
    return risk_metrics_results

def compute_composite_scores(results, risk_metrics_results, risk_percentage):
    composite_scores = {}
    
    # Change range(1, 13) to range(1, 53) and month -> week
    for week in range(1, 54):
        forecasted_values = {
            ticker: data['weekly_forecast_results'][f'forecast_predictions_df_{week}w']
            for ticker, data in results.items()
        }
        
        mean_forecast = np.mean([np.mean(values) for values in forecasted_values.values()])
        std_forecast = np.std([np.mean(values) for values in forecasted_values.values()])
        
        for ticker, metrics in risk_metrics_results.items():
            # Rename risk_metrics_{month}m to risk_metrics_{week}w
            risk_metrics = metrics[f'risk_metrics_{week}w']
            
            # Rename composite_score_{month}m to composite_score_{week}w
            composite_scores.setdefault(ticker, {})[f'composite_score_{week}w'] = calculate_composite_score(
                forecasted_values[ticker],
                risk_percentage,
                risk_metrics['Rachev Ratio'],
                risk_metrics['Sharpe Ratio'],
                risk_metrics['Sortino Ratio'],
                risk_metrics['Volatility Clustering'],
                mean_forecast,
                std_forecast,
                np.mean([m[f'risk_metrics_{week}w']['Rachev Ratio'] for m in risk_metrics_results.values()]),
                np.std([m[f'risk_metrics_{week}w']['Rachev Ratio'] for m in risk_metrics_results.values()]),
                np.mean([m[f'risk_metrics_{week}w']['Sharpe Ratio'] for m in risk_metrics_results.values()]),
                np.std([m[f'risk_metrics_{week}w']['Sharpe Ratio'] for m in risk_metrics_results.values()]),
                np.mean([m[f'risk_metrics_{week}w']['Sortino Ratio'] for m in risk_metrics_results.values()]),
                np.std([m[f'risk_metrics_{week}w']['Sortino Ratio'] for m in risk_metrics_results.values()]),
                np.mean([m[f'risk_metrics_{week}w']['Volatility Clustering'] for m in risk_metrics_results.values()]),
                np.std([m[f'risk_metrics_{week}w']['Volatility Clustering'] for m in risk_metrics_results.values()])
            )
    
    return composite_scores


In [69]:
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime, timedelta
from pandas.tseries.offsets import BDay

def select_top_etfs_weekly(df, week_label, return_scores=False):
    """
    Select the top 2 ETFs based on 'Score' for the given week_label.
    """
    df_sorted = df.sort_values(by='Score', ascending=False).head(2)
    if return_scores:
        # Returns list of tuples (ETF, Score)
        return list(df_sorted.itertuples(index=False, name=None))
    # Returns only ETF names
    return list(df_sorted['ETF'])

def generate_week_ranges(start_date, end_date):
    """
    Generate a list of weekly date ranges from start_date to end_date.
    Each range is 7 days long (e.g., Monday to Sunday).
    """
    start = datetime.strptime(start_date, '%Y-%m-%d')
    end = datetime.strptime(end_date, '%Y-%m-%d')
    week_ranges = []
    
    while start < end:
        week_start = start
        # Each period is one week long (7 days)
        week_end = start + timedelta(days=6)
        # If the next 7-day block goes beyond end, truncate to end
        if week_end > end:
            week_end = end
        week_ranges.append((week_start.strftime('%Y-%m-%d'), week_end.strftime('%Y-%m-%d')))
        
        # Move to the day after week_end to start the next week
        start = week_end + timedelta(days=1)
    
    return week_ranges

def gather_etf_data_weekly(tickers, week_ranges):
    """
    Download weekly ETF data for the specified tickers over the given weekly ranges.
    Returns a dictionary keyed by 'start_date to end_date', each containing data per ticker.
    """
    etf_histories = {}
    for start_date, end_date in week_ranges:
        week = f"{start_date} to {end_date}"
        etf_histories[week] = {}
        for ticker in tickers:
            etf_data = yf.download(ticker, start=start_date, end=end_date, progress=False)
            if etf_data.empty:
                print(f"No data found for {ticker} in {week}")
                continue
            etf_data.index = pd.to_datetime(etf_data.index)
            etf_histories[week][ticker] = etf_data
    return etf_histories

def initialize_shares(top_etfs, etf_histories, week, investment_amount=50000):
    """
    Initialize shares for the top ETFs at the beginning of a given 'week'.
    """
    ticker_shares = {}
    # The 'week' string is in the format: "yyyy-mm-dd to yyyy-mm-dd"
    first_trading_day_start = week.split(" to ")[0]
    
    for ticker in top_etfs:
        etf_history = etf_histories.get(week, {}).get(ticker)
        if etf_history is not None and not etf_history.empty:
            # Move one business day forward from the week start
            first_trading_day = pd.to_datetime(first_trading_day_start) + BDay(1)
            # If this exact date isn't in the index, find the nearest future trading day
            if first_trading_day not in etf_history.index:
                idx_pos = etf_history.index.searchsorted(first_trading_day)
                if idx_pos < len(etf_history.index):
                    first_trading_day = etf_history.index[idx_pos]
                else:
                    print(f"No valid trading day found for {ticker} in {week}")
                    continue
            
            price = etf_history.loc[first_trading_day, 'Close']
            num_shares = (investment_amount * 0.9975) / price  # small friction cost (0.25%)
            print(f"Buy {ticker}: {num_shares:.2f} shares at {price:.2f}.")
            ticker_shares[ticker] = num_shares
        else:
            print(f"No data found for {ticker} in {week}")
    return ticker_shares

def manage_portfolio(prev_top_etfs, curr_top_etfs, prev_week, curr_week, ticker_shares, etf_histories):
    """
    Manage the transition from prev_week's top ETFs to curr_week's top ETFs,
    selling ETFs no longer in the top 2, and buying newly selected ETFs.
    """
    print(f"Top 2 ETFs for {prev_week}: {prev_top_etfs}")
    print(f"Top 2 ETFs for {curr_week}: {curr_top_etfs}")
    
    etf_histories_curr = etf_histories.get(curr_week, {})
    common_etfs = set(prev_top_etfs) & set(curr_top_etfs)
    etfs_to_sell = set(prev_top_etfs) - common_etfs
    etfs_to_buy = set(curr_top_etfs) - common_etfs

    selling_values = {}
    for etf in etfs_to_sell:
        shares = ticker_shares.pop(etf, 0)
        if shares > 0 and etf in etf_histories_curr and not etf_histories_curr[etf].empty:
            # Use the first available Close price in the new week to sell
            price = etf_histories_curr[etf]['Close'].iloc[0]
            selling_values[etf] = shares * price * 0.9975
            print(f"Sell {etf}: {shares:.2f} shares at {price:.2f}. Total value: {selling_values[etf]:.2f}")

    # Use zipped pairs to buy new ETFs using the proceeds from sold ETFs
    for etf_to_buy, etf_to_sell in zip(etfs_to_buy, etfs_to_sell):
        if etf_to_buy in etf_histories_curr and etf_to_sell in selling_values:
            price = etf_histories_curr[etf_to_buy]['Close'].iloc[0]
            ticker_shares[etf_to_buy] = (selling_values[etf_to_sell] * 0.9975) / price
            print(f"Buy {etf_to_buy}: {ticker_shares[etf_to_buy]:.2f} shares at {price:.2f}.")

    print(f"Updated ticker shares after {curr_week}: {ticker_shares}")
    # Return only shares for the newly selected top ETFs
    return {etf: ticker_shares[etf] for etf in curr_top_etfs if etf in ticker_shares}




In [70]:
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime, timedelta
from pandas.tseries.offsets import BDay

##############################################
# Assume you already have these functions:
#   - compute_risk_metrics(results)
#   - compute_composite_scores(results, risk_metrics_results, risk_percentage)
#   - select_top_etfs_weekly()  (similar to select_top_etfs_monthly but for weeks)
#   - generate_week_ranges()
#   - gather_etf_data_weekly()
#   - initialize_shares()
#   - manage_portfolio()
##############################################

def calculate_smoothing(scores_dict, alpha=0.1):
    """
    Apply exponential smoothing to a dictionary of dictionaries containing scores.
    Keys at the top level are weeks (as strings), and values are {etf: raw_score}.
    """
    smoothed_scores = {}
    previous_values = {}
    # Sort by the numeric value of the week string (e.g., '1', '2', ..., '52')
    sorted_weeks = sorted(scores_dict.keys(), key=lambda w: int(w))
    
    for week in sorted_weeks:
        smoothed_scores[week] = {}
        for etf, raw_score in scores_dict[week].items():
            if etf not in previous_values:
                # First time seeing this ETF, just take the raw score
                smoothed_value = float(raw_score)
            else:
                # Exponential smoothing
                smoothed_value = alpha * float(raw_score) + (1 - alpha) * previous_values[etf]
            smoothed_scores[week][etf] = smoothed_value
            previous_values[etf] = smoothed_value
    
    return smoothed_scores

def portfolio_weekly(results, risk_percentage=0.1, return_scores=False):
    """
    Compute risk metrics and composite scores on a weekly basis.
    Return a dictionary of all ETF scores per week (rather than just the top 2).
    """
    risk_metrics_results = compute_risk_metrics(results)
    composite_scores = compute_composite_scores(results, risk_metrics_results, risk_percentage)
    
    # Collect all ETF scores per week
    # Instead of composite_score_{month}m, we use composite_score_{week}w
    all_scores_per_week = {
        str(week): {
            etf: composite_scores[etf][f'composite_score_{week}w']
            for etf in composite_scores
        }
        for week in range(1, 53)  # 1 to 52 for weeks in a year, adjust as needed
    }
    
    return all_scores_per_week

def portfolio_optimization_weekly(results, risk_percentage=0.1, smoothing=False):
    """
    High-level function to:
      - Get all raw weekly scores
      - Optionally apply smoothing
      - Select top ETFs each week
      - Manage portfolio transitions through the weeks
      - Calculate final portfolio value after a certain 'week 53' period or next horizon
    """
    # 1) Get all raw weekly scores
    all_scores = portfolio_weekly(results, risk_percentage, return_scores=True)
    
    print("\n--- Raw Scores Before Smoothing ---")
    for week, scores in sorted(all_scores.items(), key=lambda x: int(x[0])):
        print(f"Week {week}: {scores}")
    
    # 2) Optionally apply smoothing
    if smoothing:
        smoothed_scores = calculate_smoothing(all_scores)
        print("\n--- Smoothed Scores After Smoothing ---")
        for week, scores in sorted(smoothed_scores.items(), key=lambda x: int(x[0])):
            print(f"Week {week}: {scores}")
        
        # Select top 2 ETFs based on smoothed scores
        portfolios = {
            week: select_top_etfs_weekly(
                pd.DataFrame({'ETF': list(scores.keys()), 'Score': list(scores.values())}),
                f'{week}w',
                return_scores=False
            )
            for week, scores in smoothed_scores.items()
        }
    else:
        # Select top 2 ETFs based on raw scores
        portfolios = {
            week: select_top_etfs_weekly(
                pd.DataFrame({'ETF': list(scores.keys()), 'Score': list(scores.values())}),
                f'{week}w',
                return_scores=False
            )
            for week, scores in all_scores.items()
        }
    
    print("\n--- Final Portfolio Selection ---")
    for week, etfs in sorted(portfolios.items(), key=lambda x: int(x[0])):
        print(f"Week {week}: {etfs}")
    
    # 3) Gather weekly data for the actual buy/sell simulation
    #    Replace this range with your desired start/end for weekly intervals
    week_ranges = generate_week_ranges('2024-01-01', '2025-01-01')
    etf_histories = gather_etf_data_weekly(results.keys(), week_ranges)
    
    # Keep track of shares after each week
    ticker_shares = {}
    ticker_shares_per_week = {}
    
    # 4) Loop through the weeks in etf_histories
    for i, week_range in enumerate(etf_histories.keys()):
        week_index = str(i + 1)  # '1', '2', '3', ...
        top_etfs = portfolios.get(week_index, [])
        
        if not top_etfs:
            print(f"Warning: No ETFs selected for {week_range}. Skipping trading.")
            continue
        
        if i == 0:
            # First week: initialize shares
            ticker_shares = initialize_shares(top_etfs, etf_histories, week_range)
        else:
            # Manage portfolio transitions from previous week
            prev_week_index = str(i)
            prev_top_etfs = portfolios.get(prev_week_index, [])
            ticker_shares = manage_portfolio(
                prev_top_etfs,
                top_etfs,
                list(etf_histories.keys())[i - 1],
                week_range,
                ticker_shares,
                etf_histories
            )
        
        ticker_shares_per_week[week_range] = ticker_shares.copy()
    
    # 5) Simulate final valuation after "Week 53" or any next horizon
    #    For illustration, let's say it's a few days into 2025.
    first_trading_day_53w = '2025-01-01'
    week_52_range = list(ticker_shares_per_week.keys())[-1] if ticker_shares_per_week else None
    
    # We can define a short window for the next week:
    week_53_start = first_trading_day_53w
    week_53_end = '2025-01-06'
    etf_values_53w = {}
    
    if week_52_range and week_52_range in ticker_shares_per_week:
        ticker_shares_52w = ticker_shares_per_week[week_52_range]
        for ticker, shares in ticker_shares_52w.items():
            data = yf.download(ticker, start=week_53_start, end=week_53_end, progress=False)
            if not data.empty:
                closing_price_53w = data['Close'].iloc[0]
                total_value = shares * closing_price_53w
                etf_values_53w[ticker] = total_value
    
    total_portfolio_value = sum(etf_values_53w.values()) if etf_values_53w else 0
    
    return ticker_shares_per_week, total_portfolio_value


In [71]:

portfolios, final_value = portfolio_optimization_weekly(results, smoothing=True)
print(f"Final Portfolio Value: {final_value:.2f}")


--- Raw Scores Before Smoothing ---
Week 1: {'SMH': -1.6846978772104477, 'SOXX': -1.0827606991121823, 'PSI': -1.173045453578598, 'XSD': -1.1892999478124255, 'IYW': -1.213816250897503, 'XLK': -2.2600634876011894, 'VGT': 1.8867442780808321, 'FTEC': 7.3113648216155, 'IGM': -1.0651296263259877, 'IXN': 0.4707042428420017}
Week 2: {'SMH': -0.625010364646769, 'SOXX': -1.1485335259279768, 'PSI': -0.8467677661593932, 'XSD': -1.300867154239207, 'IYW': -1.6393559089939744, 'XLK': -1.532843812851385, 'VGT': 0.45395541741260254, 'FTEC': 7.083671560908055, 'IGM': -1.0278493387218512, 'IXN': 0.5836008932199014}
Week 3: {'SMH': -1.2081139813093442, 'SOXX': -1.2968175594772897, 'PSI': -0.11521497047336747, 'XSD': -1.4952684857732343, 'IYW': -2.0777580311721957, 'XLK': -1.9623949893070067, 'VGT': 0.5391866961349321, 'FTEC': 7.816774473387807, 'IGM': -0.196815398794498, 'IXN': -0.003577753215801246}
Week 4: {'SMH': -1.2689766961883533, 'SOXX': 1.6298070610117867, 'PSI': 3.1993568879684133, 'XSD': -2.565

Buy FTEC: 356.79 shares at 139.79.
Buy VGT: 105.83 shares at 471.29.
Top 2 ETFs for 2024-01-01 to 2024-01-07: ['FTEC', 'VGT']
Top 2 ETFs for 2024-01-08 to 2024-01-14: ['FTEC', 'VGT']
Updated ticker shares after 2024-01-08 to 2024-01-14: {'FTEC': 356.7851949024137, 'VGT': 105.82655922196592}
Top 2 ETFs for 2024-01-08 to 2024-01-14: ['FTEC', 'VGT']
Top 2 ETFs for 2024-01-15 to 2024-01-21: ['FTEC', 'VGT']
Updated ticker shares after 2024-01-15 to 2024-01-21: {'FTEC': 356.7851949024137, 'VGT': 105.82655922196592}
Top 2 ETFs for 2024-01-15 to 2024-01-21: ['FTEC', 'VGT']
Top 2 ETFs for 2024-01-22 to 2024-01-28: ['FTEC', 'VGT']
Updated ticker shares after 2024-01-22 to 2024-01-28: {'FTEC': 356.7851949024137, 'VGT': 105.82655922196592}
Top 2 ETFs for 2024-01-22 to 2024-01-28: ['FTEC', 'VGT']
Top 2 ETFs for 2024-01-29 to 2024-02-04: ['FTEC', 'VGT']
Updated ticker shares after 2024-01-29 to 2024-02-04: {'FTEC': 356.7851949024137, 'VGT': 105.82655922196592}
Top 2 ETFs for 2024-01-29 to 2024-02-04

In [64]:

portfolios, final_value = portfolio_optimization_weekly(results, smoothing=False)
print(f"Final Portfolio Value: {final_value:.2f}")


--- Raw Scores Before Smoothing ---
Week 1: {'SMH': -1.7135590181979583, 'SOXX': -1.1058609733638407, 'PSI': -1.1973584626400289, 'XSD': -1.2135921730603039, 'IYW': -1.2295851593234595, 'XLK': -2.267158237438438, 'VGT': 2.156676739289993, 'FTEC': 7.343188163492915, 'IGM': -1.093559630412998, 'IXN': 0.3208087516541203}
Week 2: {'SMH': -0.6050476300955865, 'SOXX': -1.1253439419212383, 'PSI': -0.8265637886624074, 'XSD': -1.2786532430426163, 'IYW': -1.5905051851899858, 'XLK': -1.4847653770854743, 'VGT': 0.21597196810165015, 'FTEC': 7.228610279831715, 'IGM': -0.9957010340367076, 'IXN': 0.4619979521006535}
Week 3: {'SMH': -1.1922721092332542, 'SOXX': -1.2807918473830906, 'PSI': -0.12474895935545986, 'XSD': -1.470484359228183, 'IYW': -2.0417536463448633, 'XLK': -1.92715438025902, 'VGT': 0.3193167185935991, 'FTEC': 8.021342208540167, 'IGM': -0.22901498997212527, 'IXN': -0.07443863535776586}
Week 4: {'SMH': -1.4066636615419703, 'SOXX': 1.7524800470975788, 'PSI': 3.2220632444894894, 'XSD': -2.6

Buy FTEC: 356.79 shares at 139.79.
Buy VGT: 105.83 shares at 471.29.
Top 2 ETFs for 2024-01-01 to 2024-01-07: ['FTEC', 'VGT']
Top 2 ETFs for 2024-01-08 to 2024-01-14: ['FTEC', 'IXN']
Sell VGT: 105.83 shares at 474.06. Total value: 50042.72
Buy IXN: 747.61 shares at 66.77.
Updated ticker shares after 2024-01-08 to 2024-01-14: {'FTEC': 356.7851949024137, 'IXN': 747.605418225166}
Top 2 ETFs for 2024-01-08 to 2024-01-14: ['FTEC', 'IXN']
Top 2 ETFs for 2024-01-15 to 2024-01-21: ['FTEC', 'VGT']
Sell IXN: 747.61 shares at 67.81. Total value: 50568.38
Buy VGT: 104.57 shares at 482.39.
Updated ticker shares after 2024-01-15 to 2024-01-21: {'FTEC': 356.7851949024137, 'VGT': 104.56676400731145}
Top 2 ETFs for 2024-01-15 to 2024-01-21: ['FTEC', 'VGT']
Top 2 ETFs for 2024-01-22 to 2024-01-28: ['FTEC', 'PSI']
Sell VGT: 104.57 shares at 502.96. Total value: 52461.42
Buy PSI: 1025.68 shares at 51.02.
Updated ticker shares after 2024-01-22 to 2024-01-28: {'FTEC': 356.7851949024137, 'PSI': 1025.68134994

Final Portfolio Value: 111675.31
